2019-09-21 22:03:26 +00:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
2019-10-04 23:05:25 +00:00
|
|
|
"flag"
|
2019-10-04 22:46:49 +00:00
|
|
|
"fmt"
|
2019-10-02 16:37:29 +00:00
|
|
|
"log"
|
2019-09-21 22:03:26 +00:00
|
|
|
"time"
|
|
|
|
)
|
|
|
|
|
2019-10-04 23:17:20 +00:00
|
|
|
var (
|
|
|
|
// LogDebug will control whether debug messsages should be logged
|
|
|
|
LogDebug = false
|
|
|
|
|
2019-11-15 19:25:21 +00:00
|
|
|
// ExportMetrics will track whether or not we want to export metrics to prometheus
|
|
|
|
ExportMetrics = false
|
|
|
|
// MetricsPort is the port to expose metrics on
|
|
|
|
MetricsPort = 8080
|
|
|
|
// Metrics contains all active metrics
|
|
|
|
Metrics = NewMetrics()
|
|
|
|
|
2019-10-04 23:17:20 +00:00
|
|
|
// version of minitor being run
|
|
|
|
version = "dev"
|
|
|
|
)
|
2019-10-04 23:05:25 +00:00
|
|
|
|
2019-10-04 23:17:36 +00:00
|
|
|
func checkMonitors(config *Config) error {
|
2019-10-04 21:47:38 +00:00
|
|
|
for _, monitor := range config.Monitors {
|
|
|
|
if monitor.ShouldCheck() {
|
2019-11-15 19:25:21 +00:00
|
|
|
success, alertNotice := monitor.Check()
|
|
|
|
|
|
|
|
hasAlert := alertNotice != nil
|
|
|
|
|
|
|
|
// Track status metrics
|
|
|
|
Metrics.SetMonitorStatus(monitor.Name, success)
|
|
|
|
Metrics.CountCheck(monitor.Name, success, hasAlert)
|
2019-10-04 21:47:38 +00:00
|
|
|
|
|
|
|
// Should probably consider refactoring everything below here
|
|
|
|
if alertNotice != nil {
|
2019-10-04 23:05:25 +00:00
|
|
|
if LogDebug {
|
|
|
|
log.Printf("DEBUG: Recieved an alert notice from %s", alertNotice.MonitorName)
|
|
|
|
}
|
2019-10-04 21:47:38 +00:00
|
|
|
alertNames := monitor.GetAlertNames(alertNotice.IsUp)
|
|
|
|
if alertNames == nil {
|
2019-10-07 17:48:19 +00:00
|
|
|
// This should only happen for a recovery alert. AlertDown is validated not empty
|
2019-10-04 22:46:49 +00:00
|
|
|
log.Printf(
|
|
|
|
"WARNING: Recieved alert, but no alert mechanisms exist. MonitorName=%s IsUp=%t",
|
|
|
|
alertNotice.MonitorName, alertNotice.IsUp,
|
|
|
|
)
|
2019-10-04 21:47:38 +00:00
|
|
|
}
|
|
|
|
for _, alertName := range alertNames {
|
|
|
|
if alert, ok := config.Alerts[alertName]; ok {
|
|
|
|
output, err := alert.Send(*alertNotice)
|
|
|
|
if err != nil {
|
|
|
|
log.Printf(
|
|
|
|
"ERROR: Alert '%s' failed. result=%v: output=%s",
|
|
|
|
alert.Name,
|
|
|
|
err,
|
|
|
|
output,
|
|
|
|
)
|
2019-10-04 23:17:36 +00:00
|
|
|
return fmt.Errorf(
|
2019-10-07 17:48:19 +00:00
|
|
|
"Unsuccessfully triggered alert '%s'. "+
|
2019-10-04 21:47:38 +00:00
|
|
|
"Crashing to avoid false negatives: %v",
|
|
|
|
alert.Name,
|
|
|
|
err,
|
2019-10-04 23:17:36 +00:00
|
|
|
)
|
2019-10-04 21:47:38 +00:00
|
|
|
}
|
2019-11-15 19:25:21 +00:00
|
|
|
|
|
|
|
// Count alert metrics
|
|
|
|
Metrics.CountAlert(monitor.Name, alert.Name)
|
2019-10-04 21:47:38 +00:00
|
|
|
} else {
|
2019-10-07 17:48:19 +00:00
|
|
|
// This case should never actually happen since we validate against it
|
|
|
|
log.Printf("ERROR: Unknown alert for monitor %s: %s", alertNotice.MonitorName, alertName)
|
|
|
|
return fmt.Errorf("Unknown alert for monitor %s: %s", alertNotice.MonitorName, alertName)
|
2019-10-04 21:47:38 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-10-04 23:17:36 +00:00
|
|
|
|
|
|
|
return nil
|
2019-10-04 21:47:38 +00:00
|
|
|
}
|
|
|
|
|
2019-09-21 22:03:26 +00:00
|
|
|
func main() {
|
2019-10-04 23:05:25 +00:00
|
|
|
// Get debug flag
|
|
|
|
flag.BoolVar(&LogDebug, "debug", false, "Enables debug logs (default: false)")
|
2019-11-15 19:25:21 +00:00
|
|
|
flag.BoolVar(&ExportMetrics, "metrics", false, "Enables prometheus metrics exporting (default: false)")
|
2019-10-04 23:17:20 +00:00
|
|
|
var showVersion = flag.Bool("version", false, "Display the version of minitor and exit")
|
2019-10-04 23:05:25 +00:00
|
|
|
flag.Parse()
|
|
|
|
|
2019-10-04 23:17:20 +00:00
|
|
|
// Print version if flag is provided
|
|
|
|
if *showVersion {
|
2019-11-15 19:25:21 +00:00
|
|
|
log.Println("Minitor version:", version)
|
2019-10-04 23:17:20 +00:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// Load configuration
|
2019-10-04 01:16:03 +00:00
|
|
|
config, err := LoadConfig("config.yml")
|
|
|
|
if err != nil {
|
|
|
|
log.Fatalf("Error loading config: %v", err)
|
|
|
|
}
|
2019-09-21 22:03:26 +00:00
|
|
|
|
2019-11-15 19:25:21 +00:00
|
|
|
// Serve metrics exporter, if specified
|
|
|
|
if ExportMetrics {
|
|
|
|
log.Println("INFO: Exporting metrics to Prometheus")
|
|
|
|
go ServeMetrics()
|
|
|
|
}
|
|
|
|
|
2019-10-04 21:47:38 +00:00
|
|
|
// Start main loop
|
2019-09-21 22:03:26 +00:00
|
|
|
for {
|
2019-10-04 23:17:36 +00:00
|
|
|
err = checkMonitors(&config)
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
2019-09-21 22:03:26 +00:00
|
|
|
|
|
|
|
sleepTime := time.Duration(config.CheckInterval) * time.Second
|
|
|
|
time.Sleep(sleepTime)
|
|
|
|
}
|
|
|
|
}
|