From f58b4c149512a9137172b57bdbe91b66b37f1253 Mon Sep 17 00:00:00 2001 From: Ian Fijolek Date: Wed, 3 Apr 2024 12:03:17 -0700 Subject: [PATCH] Adds ability to run specified alerts on startup This is helpful to determine if your alerts are valid before an actual failure --- README.md | 10 +++++++++ main.go | 38 ++++++++++++++++++++++++++++++++ main_test.go | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 109 insertions(+) diff --git a/README.md b/README.md index f24ac86..5b9c232 100644 --- a/README.md +++ b/README.md @@ -118,6 +118,16 @@ To provide flexible formatting, the following non-standard functions are availab For more information, check out the [Go documentation for the time module](https://pkg.go.dev/time@go1.20.7#pkg-constants). +#### Running alerts on startup + +It's not the best feeling to find out your alerts are broken when you're expecting to be alerted about another failure. To avoid this and provide early insight into broken alerts, it is possible to specify a list of alerts to run when Minitor starts up. This can be done using the command line flag `-startup-alerts`. This flag accepts a comma separated list of strings and will run a test of each of those alerts. Minitor will then respond as it typically does for any failed alert. This can be used to allow you time to correct when initially launching, and to allow schedulers to more easily detect a failed deployment of Minitor. + +Eg. + +```bash +minitor -startup-alerts=log_down,log_up -config ./config.yml +``` + ### Metrics Minitor supports exporting metrics for [Prometheus](https://prometheus.io/). Prometheus is an open source tool for reading and querying metrics from different sources. Combined with another tool, [Grafana](https://grafana.com/), it allows building of charts and dashboards. You could also opt to just use Minitor to log check results, and instead do your alerting with Grafana. diff --git a/main.go b/main.go index 97249e2..1a0d22a 100644 --- a/main.go +++ b/main.go @@ -4,6 +4,7 @@ import ( "errors" "flag" "fmt" + "strings" "time" "git.iamthefij.com/iamthefij/slog" @@ -91,9 +92,38 @@ func checkMonitors(config *Config) error { return nil } +func sendStartupAlerts(config *Config, alertNames []string) error { + for _, alertName := range alertNames { + var err error + + alert, ok := config.Alerts[alertName] + if !ok { + err = fmt.Errorf("unknown alert %s: %w", alertName, errUnknownAlert) + } + + if err == nil { + _, err = alert.Send(AlertNotice{ + AlertCount: 0, + FailureCount: 0, + IsUp: true, + LastSuccess: time.Now(), + MonitorName: fmt.Sprintf("First Run Alert Test: %s", alert.Name), + LastCheckOutput: "", + }) + } + + if err != nil { + return err + } + } + + return nil +} + func main() { showVersion := flag.Bool("version", false, "Display the version of minitor and exit") configPath := flag.String("config", "config.yml", "Alternate configuration path (default: config.yml)") + startupAlerts := flag.String("startup-alerts", "", "List of alerts to run on startup. This can help determine unhealthy alerts early on. (default \"\")") flag.BoolVar(&slog.DebugLevel, "debug", false, "Enables debug logs (default: false)") flag.BoolVar(&ExportMetrics, "metrics", false, "Enables prometheus metrics exporting (default: false)") @@ -119,6 +149,14 @@ func main() { go ServeMetrics() } + if *startupAlerts != "" { + alertNames := strings.Split(*startupAlerts, ",") + + err = sendStartupAlerts(&config, alertNames) + + slog.OnErrPanicf(err, "Error running startup alerts") + } + // Start main loop for { err = checkMonitors(&config) diff --git a/main_test.go b/main_test.go index 9bc11d7..947f1ae 100644 --- a/main_test.go +++ b/main_test.go @@ -134,3 +134,64 @@ func TestCheckMonitors(t *testing.T) { } } } + +func TestFirstRunAlerts(t *testing.T) { + cases := []struct { + config Config + expectErr bool + startupAlerts []string + name string + }{ + { + config: Config{}, + expectErr: false, + startupAlerts: []string{}, + name: "Empty", + }, + { + config: Config{}, + expectErr: true, + startupAlerts: []string{"missing"}, + name: "Unknown", + }, + { + config: Config{ + Alerts: map[string]*Alert{ + "good": { + Command: CommandOrShell{Command: []string{"true"}}, + }, + }, + }, + expectErr: false, + startupAlerts: []string{"good"}, + name: "Successful alert", + }, + { + config: Config{ + Alerts: map[string]*Alert{ + "bad": { + Name: "bad", + Command: CommandOrShell{Command: []string{"false"}}, + }, + }, + }, + expectErr: true, + startupAlerts: []string{"bad"}, + name: "Failed alert", + }, + } + + for _, c := range cases { + err := c.config.Init() + if err != nil { + t.Errorf("sendFirstRunAlerts(%s): unexpected error reading config: %v", c.name, err) + } + + err = sendStartupAlerts(&c.config, c.startupAlerts) + if err == nil && c.expectErr { + t.Errorf("sendFirstRunAlerts(%s): Expected error, the code did not error", c.name) + } else if err != nil && !c.expectErr { + t.Errorf("sendFirstRunAlerts(%s): Did not expect an error, but we got one anyway: %v", c.name, err) + } + } +}