Adds ability to run specified alerts on startup

This is helpful to determine if your alerts are valid before an actual failure
2024-04-03 12:03:17 -07:00 · 2024-04-03 12:03:17 -07:00 · f58b4c1495
commit f58b4c1495
parent 6a2b44673e
3 changed files with 109 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -118,6 +118,16 @@ To provide flexible formatting, the following non-standard functions are availab

 For more information, check out the [Go documentation for the time module](https://pkg.go.dev/time@go1.20.7#pkg-constants).

+#### Running alerts on startup
+
+It's not the best feeling to find out your alerts are broken when you're expecting to be alerted about another failure. To avoid this and provide early insight into broken alerts, it is possible to specify a list of alerts to run when Minitor starts up. This can be done using the command line flag `-startup-alerts`. This flag accepts a comma separated list of strings and will run a test of each of those alerts. Minitor will then respond as it typically does for any failed alert. This can be used to allow you time to correct when initially launching, and to allow schedulers to more easily detect a failed deployment of Minitor.
+
+Eg.
+
+```bash
+minitor -startup-alerts=log_down,log_up -config ./config.yml
+```
+
 ### Metrics

 Minitor supports exporting metrics for [Prometheus](https://prometheus.io/). Prometheus is an open source tool for reading and querying metrics from different sources. Combined with another tool, [Grafana](https://grafana.com/), it allows building of charts and dashboards. You could also opt to just use Minitor to log check results, and instead do your alerting with Grafana.
--- a/main.go
+++ b/main.go
@ -4,6 +4,7 @@ import (
 	"errors"
 	"flag"
 	"fmt"
+	"strings"
 	"time"

 	"git.iamthefij.com/iamthefij/slog"
@ -91,9 +92,38 @@ func checkMonitors(config *Config) error {
 	return nil
 }

+func sendStartupAlerts(config *Config, alertNames []string) error {
+	for _, alertName := range alertNames {
+		var err error
+
+		alert, ok := config.Alerts[alertName]
+		if !ok {
+			err = fmt.Errorf("unknown alert %s: %w", alertName, errUnknownAlert)
+		}
+
+		if err == nil {
+			_, err = alert.Send(AlertNotice{
+				AlertCount:      0,
+				FailureCount:    0,
+				IsUp:            true,
+				LastSuccess:     time.Now(),
+				MonitorName:     fmt.Sprintf("First Run Alert Test: %s", alert.Name),
+				LastCheckOutput: "",
+			})
+		}
+
+		if err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
+
 func main() {
 	showVersion := flag.Bool("version", false, "Display the version of minitor and exit")
 	configPath := flag.String("config", "config.yml", "Alternate configuration path (default: config.yml)")
+	startupAlerts := flag.String("startup-alerts", "", "List of alerts to run on startup. This can help determine unhealthy alerts early on. (default \"\")")

 	flag.BoolVar(&slog.DebugLevel, "debug", false, "Enables debug logs (default: false)")
 	flag.BoolVar(&ExportMetrics, "metrics", false, "Enables prometheus metrics exporting (default: false)")
@ -119,6 +149,14 @@ func main() {
 		go ServeMetrics()
 	}

+	if *startupAlerts != "" {
+		alertNames := strings.Split(*startupAlerts, ",")
+
+		err = sendStartupAlerts(&config, alertNames)
+
+		slog.OnErrPanicf(err, "Error running startup alerts")
+	}
+
 	// Start main loop
 	for {
 		err = checkMonitors(&config)
--- a/main_test.go
+++ b/main_test.go
@ -134,3 +134,64 @@ func TestCheckMonitors(t *testing.T) {
 		}
 	}
 }
+
+func TestFirstRunAlerts(t *testing.T) {
+	cases := []struct {
+		config        Config
+		expectErr     bool
+		startupAlerts []string
+		name          string
+	}{
+		{
+			config:        Config{},
+			expectErr:     false,
+			startupAlerts: []string{},
+			name:          "Empty",
+		},
+		{
+			config:        Config{},
+			expectErr:     true,
+			startupAlerts: []string{"missing"},
+			name:          "Unknown",
+		},
+		{
+			config: Config{
+				Alerts: map[string]*Alert{
+					"good": {
+						Command: CommandOrShell{Command: []string{"true"}},
+					},
+				},
+			},
+			expectErr:     false,
+			startupAlerts: []string{"good"},
+			name:          "Successful alert",
+		},
+		{
+			config: Config{
+				Alerts: map[string]*Alert{
+					"bad": {
+						Name:    "bad",
+						Command: CommandOrShell{Command: []string{"false"}},
+					},
+				},
+			},
+			expectErr:     true,
+			startupAlerts: []string{"bad"},
+			name:          "Failed alert",
+		},
+	}
+
+	for _, c := range cases {
+		err := c.config.Init()
+		if err != nil {
+			t.Errorf("sendFirstRunAlerts(%s): unexpected error reading config: %v", c.name, err)
+		}
+
+		err = sendStartupAlerts(&c.config, c.startupAlerts)
+		if err == nil && c.expectErr {
+			t.Errorf("sendFirstRunAlerts(%s): Expected error, the code did not error", c.name)
+		} else if err != nil && !c.expectErr {
+			t.Errorf("sendFirstRunAlerts(%s): Did not expect an error, but we got one anyway: %v", c.name, err)
+		}
+	}
+}