This commit is contained in:
parent
30c2c7d6b2
commit
befea7375f
4
main.go
4
main.go
@ -74,7 +74,7 @@ func checkMonitors(config *Config) error {
|
|||||||
|
|
||||||
// Track status metrics
|
// Track status metrics
|
||||||
Metrics.SetMonitorStatus(monitor.Name, monitor.IsUp())
|
Metrics.SetMonitorStatus(monitor.Name, monitor.IsUp())
|
||||||
Metrics.CountCheck(monitor.Name, success, hasAlert)
|
Metrics.CountCheck(monitor.Name, success, monitor.LastCheckMilliseconds(), hasAlert)
|
||||||
|
|
||||||
if alertNotice != nil {
|
if alertNotice != nil {
|
||||||
return sendAlerts(config, monitor, alertNotice)
|
return sendAlerts(config, monitor, alertNotice)
|
||||||
@ -108,7 +108,7 @@ func main() {
|
|||||||
|
|
||||||
// Serve metrics exporter, if specified
|
// Serve metrics exporter, if specified
|
||||||
if ExportMetrics {
|
if ExportMetrics {
|
||||||
slog.Infof("Exporting metrics to Prometheus")
|
slog.Infof("Exporting metrics to Prometheus on port %d", MetricsPort)
|
||||||
|
|
||||||
go ServeMetrics()
|
go ServeMetrics()
|
||||||
}
|
}
|
||||||
|
15
metrics.go
15
metrics.go
@ -19,6 +19,7 @@ import (
|
|||||||
type MinitorMetrics struct {
|
type MinitorMetrics struct {
|
||||||
alertCount *prometheus.CounterVec
|
alertCount *prometheus.CounterVec
|
||||||
checkCount *prometheus.CounterVec
|
checkCount *prometheus.CounterVec
|
||||||
|
checkTime *prometheus.GaugeVec
|
||||||
monitorStatus *prometheus.GaugeVec
|
monitorStatus *prometheus.GaugeVec
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -40,6 +41,13 @@ func NewMetrics() *MinitorMetrics {
|
|||||||
},
|
},
|
||||||
[]string{"monitor", "status", "is_alert"},
|
[]string{"monitor", "status", "is_alert"},
|
||||||
),
|
),
|
||||||
|
checkTime: prometheus.NewGaugeVec(
|
||||||
|
prometheus.GaugeOpts{
|
||||||
|
Name: "minitor_check_milliseconds",
|
||||||
|
Help: "Time in miliseconds that a check ran for",
|
||||||
|
},
|
||||||
|
[]string{"monitor", "status"},
|
||||||
|
),
|
||||||
monitorStatus: prometheus.NewGaugeVec(
|
monitorStatus: prometheus.NewGaugeVec(
|
||||||
prometheus.GaugeOpts{
|
prometheus.GaugeOpts{
|
||||||
Name: "minitor_monitor_up_count",
|
Name: "minitor_monitor_up_count",
|
||||||
@ -52,6 +60,7 @@ func NewMetrics() *MinitorMetrics {
|
|||||||
// Register newly created metrics
|
// Register newly created metrics
|
||||||
prometheus.MustRegister(metrics.alertCount)
|
prometheus.MustRegister(metrics.alertCount)
|
||||||
prometheus.MustRegister(metrics.checkCount)
|
prometheus.MustRegister(metrics.checkCount)
|
||||||
|
prometheus.MustRegister(metrics.checkTime)
|
||||||
prometheus.MustRegister(metrics.monitorStatus)
|
prometheus.MustRegister(metrics.monitorStatus)
|
||||||
|
|
||||||
return metrics
|
return metrics
|
||||||
@ -68,7 +77,7 @@ func (metrics *MinitorMetrics) SetMonitorStatus(monitor string, isUp bool) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// CountCheck counts the result of a particular Monitor check
|
// CountCheck counts the result of a particular Monitor check
|
||||||
func (metrics *MinitorMetrics) CountCheck(monitor string, isSuccess bool, isAlert bool) {
|
func (metrics *MinitorMetrics) CountCheck(monitor string, isSuccess bool, ms int64, isAlert bool) {
|
||||||
status := "failure"
|
status := "failure"
|
||||||
if isSuccess {
|
if isSuccess {
|
||||||
status = "success"
|
status = "success"
|
||||||
@ -82,6 +91,10 @@ func (metrics *MinitorMetrics) CountCheck(monitor string, isSuccess bool, isAler
|
|||||||
metrics.checkCount.With(
|
metrics.checkCount.With(
|
||||||
prometheus.Labels{"monitor": monitor, "status": status, "is_alert": alertVal},
|
prometheus.Labels{"monitor": monitor, "status": status, "is_alert": alertVal},
|
||||||
).Inc()
|
).Inc()
|
||||||
|
|
||||||
|
metrics.checkTime.With(
|
||||||
|
prometheus.Labels{"monitor": monitor, "status": status},
|
||||||
|
).Set(float64(ms))
|
||||||
}
|
}
|
||||||
|
|
||||||
// CountAlert counts an alert
|
// CountAlert counts an alert
|
||||||
|
@ -25,6 +25,7 @@ type Monitor struct { //nolint:maligned
|
|||||||
lastCheck time.Time
|
lastCheck time.Time
|
||||||
lastSuccess time.Time
|
lastSuccess time.Time
|
||||||
lastOutput string
|
lastOutput string
|
||||||
|
lastCheckDuration time.Duration
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsValid returns a boolean indicating if the Monitor has been correctly
|
// IsValid returns a boolean indicating if the Monitor has been correctly
|
||||||
@ -57,9 +58,11 @@ func (monitor *Monitor) Check() (bool, *AlertNotice) {
|
|||||||
cmd = ShellCommand(monitor.Command.ShellCommand)
|
cmd = ShellCommand(monitor.Command.ShellCommand)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
checkStartTime := time.Now()
|
||||||
output, err := cmd.CombinedOutput()
|
output, err := cmd.CombinedOutput()
|
||||||
monitor.lastCheck = time.Now()
|
monitor.lastCheck = time.Now()
|
||||||
monitor.lastOutput = string(output)
|
monitor.lastOutput = string(output)
|
||||||
|
monitor.lastCheckDuration = monitor.lastCheck.Sub(checkStartTime)
|
||||||
|
|
||||||
var alertNotice *AlertNotice
|
var alertNotice *AlertNotice
|
||||||
|
|
||||||
@ -88,6 +91,11 @@ func (monitor Monitor) IsUp() bool {
|
|||||||
return monitor.alertCount == 0
|
return monitor.alertCount == 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LastCheckMilliseconds gives number of miliseconds the last check ran for
|
||||||
|
func (monitor Monitor) LastCheckMilliseconds() int64 {
|
||||||
|
return monitor.lastCheckDuration.Milliseconds()
|
||||||
|
}
|
||||||
|
|
||||||
func (monitor *Monitor) success() (notice *AlertNotice) {
|
func (monitor *Monitor) success() (notice *AlertNotice) {
|
||||||
if !monitor.IsUp() {
|
if !monitor.IsUp() {
|
||||||
// Alert that we have recovered
|
// Alert that we have recovered
|
||||||
|
Loading…
Reference in New Issue
Block a user