Add check runtime metric
continuous-integration/drone/push Build is failing Details

This commit is contained in:
IamTheFij 2021-05-11 10:41:22 -07:00
parent 30c2c7d6b2
commit befea7375f
3 changed files with 29 additions and 8 deletions

View File

@ -74,7 +74,7 @@ func checkMonitors(config *Config) error {
// Track status metrics // Track status metrics
Metrics.SetMonitorStatus(monitor.Name, monitor.IsUp()) Metrics.SetMonitorStatus(monitor.Name, monitor.IsUp())
Metrics.CountCheck(monitor.Name, success, hasAlert) Metrics.CountCheck(monitor.Name, success, monitor.LastCheckMilliseconds(), hasAlert)
if alertNotice != nil { if alertNotice != nil {
return sendAlerts(config, monitor, alertNotice) return sendAlerts(config, monitor, alertNotice)
@ -108,7 +108,7 @@ func main() {
// Serve metrics exporter, if specified // Serve metrics exporter, if specified
if ExportMetrics { if ExportMetrics {
slog.Infof("Exporting metrics to Prometheus") slog.Infof("Exporting metrics to Prometheus on port %d", MetricsPort)
go ServeMetrics() go ServeMetrics()
} }

View File

@ -19,6 +19,7 @@ import (
type MinitorMetrics struct { type MinitorMetrics struct {
alertCount *prometheus.CounterVec alertCount *prometheus.CounterVec
checkCount *prometheus.CounterVec checkCount *prometheus.CounterVec
checkTime *prometheus.GaugeVec
monitorStatus *prometheus.GaugeVec monitorStatus *prometheus.GaugeVec
} }
@ -40,6 +41,13 @@ func NewMetrics() *MinitorMetrics {
}, },
[]string{"monitor", "status", "is_alert"}, []string{"monitor", "status", "is_alert"},
), ),
checkTime: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "minitor_check_milliseconds",
Help: "Time in miliseconds that a check ran for",
},
[]string{"monitor", "status"},
),
monitorStatus: prometheus.NewGaugeVec( monitorStatus: prometheus.NewGaugeVec(
prometheus.GaugeOpts{ prometheus.GaugeOpts{
Name: "minitor_monitor_up_count", Name: "minitor_monitor_up_count",
@ -52,6 +60,7 @@ func NewMetrics() *MinitorMetrics {
// Register newly created metrics // Register newly created metrics
prometheus.MustRegister(metrics.alertCount) prometheus.MustRegister(metrics.alertCount)
prometheus.MustRegister(metrics.checkCount) prometheus.MustRegister(metrics.checkCount)
prometheus.MustRegister(metrics.checkTime)
prometheus.MustRegister(metrics.monitorStatus) prometheus.MustRegister(metrics.monitorStatus)
return metrics return metrics
@ -68,7 +77,7 @@ func (metrics *MinitorMetrics) SetMonitorStatus(monitor string, isUp bool) {
} }
// CountCheck counts the result of a particular Monitor check // CountCheck counts the result of a particular Monitor check
func (metrics *MinitorMetrics) CountCheck(monitor string, isSuccess bool, isAlert bool) { func (metrics *MinitorMetrics) CountCheck(monitor string, isSuccess bool, ms int64, isAlert bool) {
status := "failure" status := "failure"
if isSuccess { if isSuccess {
status = "success" status = "success"
@ -82,6 +91,10 @@ func (metrics *MinitorMetrics) CountCheck(monitor string, isSuccess bool, isAler
metrics.checkCount.With( metrics.checkCount.With(
prometheus.Labels{"monitor": monitor, "status": status, "is_alert": alertVal}, prometheus.Labels{"monitor": monitor, "status": status, "is_alert": alertVal},
).Inc() ).Inc()
metrics.checkTime.With(
prometheus.Labels{"monitor": monitor, "status": status},
).Set(float64(ms))
} }
// CountAlert counts an alert // CountAlert counts an alert

View File

@ -20,11 +20,12 @@ type Monitor struct { //nolint:maligned
Command CommandOrShell Command CommandOrShell
// Other values // Other values
alertCount int16 alertCount int16
failureCount int16 failureCount int16
lastCheck time.Time lastCheck time.Time
lastSuccess time.Time lastSuccess time.Time
lastOutput string lastOutput string
lastCheckDuration time.Duration
} }
// IsValid returns a boolean indicating if the Monitor has been correctly // IsValid returns a boolean indicating if the Monitor has been correctly
@ -57,9 +58,11 @@ func (monitor *Monitor) Check() (bool, *AlertNotice) {
cmd = ShellCommand(monitor.Command.ShellCommand) cmd = ShellCommand(monitor.Command.ShellCommand)
} }
checkStartTime := time.Now()
output, err := cmd.CombinedOutput() output, err := cmd.CombinedOutput()
monitor.lastCheck = time.Now() monitor.lastCheck = time.Now()
monitor.lastOutput = string(output) monitor.lastOutput = string(output)
monitor.lastCheckDuration = monitor.lastCheck.Sub(checkStartTime)
var alertNotice *AlertNotice var alertNotice *AlertNotice
@ -88,6 +91,11 @@ func (monitor Monitor) IsUp() bool {
return monitor.alertCount == 0 return monitor.alertCount == 0
} }
// LastCheckMilliseconds gives number of miliseconds the last check ran for
func (monitor Monitor) LastCheckMilliseconds() int64 {
return monitor.lastCheckDuration.Milliseconds()
}
func (monitor *Monitor) success() (notice *AlertNotice) { func (monitor *Monitor) success() (notice *AlertNotice) {
if !monitor.IsUp() { if !monitor.IsUp() {
// Alert that we have recovered // Alert that we have recovered