This commit is contained in:
parent
30c2c7d6b2
commit
befea7375f
4
main.go
4
main.go
@ -74,7 +74,7 @@ func checkMonitors(config *Config) error {
|
||||
|
||||
// Track status metrics
|
||||
Metrics.SetMonitorStatus(monitor.Name, monitor.IsUp())
|
||||
Metrics.CountCheck(monitor.Name, success, hasAlert)
|
||||
Metrics.CountCheck(monitor.Name, success, monitor.LastCheckMilliseconds(), hasAlert)
|
||||
|
||||
if alertNotice != nil {
|
||||
return sendAlerts(config, monitor, alertNotice)
|
||||
@ -108,7 +108,7 @@ func main() {
|
||||
|
||||
// Serve metrics exporter, if specified
|
||||
if ExportMetrics {
|
||||
slog.Infof("Exporting metrics to Prometheus")
|
||||
slog.Infof("Exporting metrics to Prometheus on port %d", MetricsPort)
|
||||
|
||||
go ServeMetrics()
|
||||
}
|
||||
|
15
metrics.go
15
metrics.go
@ -19,6 +19,7 @@ import (
|
||||
type MinitorMetrics struct {
|
||||
alertCount *prometheus.CounterVec
|
||||
checkCount *prometheus.CounterVec
|
||||
checkTime *prometheus.GaugeVec
|
||||
monitorStatus *prometheus.GaugeVec
|
||||
}
|
||||
|
||||
@ -40,6 +41,13 @@ func NewMetrics() *MinitorMetrics {
|
||||
},
|
||||
[]string{"monitor", "status", "is_alert"},
|
||||
),
|
||||
checkTime: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "minitor_check_milliseconds",
|
||||
Help: "Time in miliseconds that a check ran for",
|
||||
},
|
||||
[]string{"monitor", "status"},
|
||||
),
|
||||
monitorStatus: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "minitor_monitor_up_count",
|
||||
@ -52,6 +60,7 @@ func NewMetrics() *MinitorMetrics {
|
||||
// Register newly created metrics
|
||||
prometheus.MustRegister(metrics.alertCount)
|
||||
prometheus.MustRegister(metrics.checkCount)
|
||||
prometheus.MustRegister(metrics.checkTime)
|
||||
prometheus.MustRegister(metrics.monitorStatus)
|
||||
|
||||
return metrics
|
||||
@ -68,7 +77,7 @@ func (metrics *MinitorMetrics) SetMonitorStatus(monitor string, isUp bool) {
|
||||
}
|
||||
|
||||
// CountCheck counts the result of a particular Monitor check
|
||||
func (metrics *MinitorMetrics) CountCheck(monitor string, isSuccess bool, isAlert bool) {
|
||||
func (metrics *MinitorMetrics) CountCheck(monitor string, isSuccess bool, ms int64, isAlert bool) {
|
||||
status := "failure"
|
||||
if isSuccess {
|
||||
status = "success"
|
||||
@ -82,6 +91,10 @@ func (metrics *MinitorMetrics) CountCheck(monitor string, isSuccess bool, isAler
|
||||
metrics.checkCount.With(
|
||||
prometheus.Labels{"monitor": monitor, "status": status, "is_alert": alertVal},
|
||||
).Inc()
|
||||
|
||||
metrics.checkTime.With(
|
||||
prometheus.Labels{"monitor": monitor, "status": status},
|
||||
).Set(float64(ms))
|
||||
}
|
||||
|
||||
// CountAlert counts an alert
|
||||
|
18
monitor.go
18
monitor.go
@ -20,11 +20,12 @@ type Monitor struct { //nolint:maligned
|
||||
Command CommandOrShell
|
||||
|
||||
// Other values
|
||||
alertCount int16
|
||||
failureCount int16
|
||||
lastCheck time.Time
|
||||
lastSuccess time.Time
|
||||
lastOutput string
|
||||
alertCount int16
|
||||
failureCount int16
|
||||
lastCheck time.Time
|
||||
lastSuccess time.Time
|
||||
lastOutput string
|
||||
lastCheckDuration time.Duration
|
||||
}
|
||||
|
||||
// IsValid returns a boolean indicating if the Monitor has been correctly
|
||||
@ -57,9 +58,11 @@ func (monitor *Monitor) Check() (bool, *AlertNotice) {
|
||||
cmd = ShellCommand(monitor.Command.ShellCommand)
|
||||
}
|
||||
|
||||
checkStartTime := time.Now()
|
||||
output, err := cmd.CombinedOutput()
|
||||
monitor.lastCheck = time.Now()
|
||||
monitor.lastOutput = string(output)
|
||||
monitor.lastCheckDuration = monitor.lastCheck.Sub(checkStartTime)
|
||||
|
||||
var alertNotice *AlertNotice
|
||||
|
||||
@ -88,6 +91,11 @@ func (monitor Monitor) IsUp() bool {
|
||||
return monitor.alertCount == 0
|
||||
}
|
||||
|
||||
// LastCheckMilliseconds gives number of miliseconds the last check ran for
|
||||
func (monitor Monitor) LastCheckMilliseconds() int64 {
|
||||
return monitor.lastCheckDuration.Milliseconds()
|
||||
}
|
||||
|
||||
func (monitor *Monitor) success() (notice *AlertNotice) {
|
||||
if !monitor.IsUp() {
|
||||
// Alert that we have recovered
|
||||
|
Loading…
Reference in New Issue
Block a user