Adds ability to run specified alerts on startup
Some checks failed
continuous-integration/drone/push Build is failing

This is helpful to determine if your alerts are valid before an actual failure
This commit is contained in:
IamTheFij 2024-04-03 12:03:17 -07:00
parent 6a2b44673e
commit f58b4c1495
3 changed files with 109 additions and 0 deletions

View File

@ -118,6 +118,16 @@ To provide flexible formatting, the following non-standard functions are availab
For more information, check out the [Go documentation for the time module](https://pkg.go.dev/time@go1.20.7#pkg-constants).
#### Running alerts on startup
It's not the best feeling to find out your alerts are broken when you're expecting to be alerted about another failure. To avoid this and provide early insight into broken alerts, it is possible to specify a list of alerts to run when Minitor starts up. This can be done using the command line flag `-startup-alerts`. This flag accepts a comma separated list of strings and will run a test of each of those alerts. Minitor will then respond as it typically does for any failed alert. This can be used to allow you time to correct when initially launching, and to allow schedulers to more easily detect a failed deployment of Minitor.
Eg.
```bash
minitor -startup-alerts=log_down,log_up -config ./config.yml
```
### Metrics
Minitor supports exporting metrics for [Prometheus](https://prometheus.io/). Prometheus is an open source tool for reading and querying metrics from different sources. Combined with another tool, [Grafana](https://grafana.com/), it allows building of charts and dashboards. You could also opt to just use Minitor to log check results, and instead do your alerting with Grafana.

38
main.go
View File

@ -4,6 +4,7 @@ import (
"errors"
"flag"
"fmt"
"strings"
"time"
"git.iamthefij.com/iamthefij/slog"
@ -91,9 +92,38 @@ func checkMonitors(config *Config) error {
return nil
}
func sendStartupAlerts(config *Config, alertNames []string) error {
for _, alertName := range alertNames {
var err error
alert, ok := config.Alerts[alertName]
if !ok {
err = fmt.Errorf("unknown alert %s: %w", alertName, errUnknownAlert)
}
if err == nil {
_, err = alert.Send(AlertNotice{
AlertCount: 0,
FailureCount: 0,
IsUp: true,
LastSuccess: time.Now(),
MonitorName: fmt.Sprintf("First Run Alert Test: %s", alert.Name),
LastCheckOutput: "",
})
}
if err != nil {
return err
}
}
return nil
}
func main() {
showVersion := flag.Bool("version", false, "Display the version of minitor and exit")
configPath := flag.String("config", "config.yml", "Alternate configuration path (default: config.yml)")
startupAlerts := flag.String("startup-alerts", "", "List of alerts to run on startup. This can help determine unhealthy alerts early on. (default \"\")")
flag.BoolVar(&slog.DebugLevel, "debug", false, "Enables debug logs (default: false)")
flag.BoolVar(&ExportMetrics, "metrics", false, "Enables prometheus metrics exporting (default: false)")
@ -119,6 +149,14 @@ func main() {
go ServeMetrics()
}
if *startupAlerts != "" {
alertNames := strings.Split(*startupAlerts, ",")
err = sendStartupAlerts(&config, alertNames)
slog.OnErrPanicf(err, "Error running startup alerts")
}
// Start main loop
for {
err = checkMonitors(&config)

View File

@ -134,3 +134,64 @@ func TestCheckMonitors(t *testing.T) {
}
}
}
func TestFirstRunAlerts(t *testing.T) {
cases := []struct {
config Config
expectErr bool
startupAlerts []string
name string
}{
{
config: Config{},
expectErr: false,
startupAlerts: []string{},
name: "Empty",
},
{
config: Config{},
expectErr: true,
startupAlerts: []string{"missing"},
name: "Unknown",
},
{
config: Config{
Alerts: map[string]*Alert{
"good": {
Command: CommandOrShell{Command: []string{"true"}},
},
},
},
expectErr: false,
startupAlerts: []string{"good"},
name: "Successful alert",
},
{
config: Config{
Alerts: map[string]*Alert{
"bad": {
Name: "bad",
Command: CommandOrShell{Command: []string{"false"}},
},
},
},
expectErr: true,
startupAlerts: []string{"bad"},
name: "Failed alert",
},
}
for _, c := range cases {
err := c.config.Init()
if err != nil {
t.Errorf("sendFirstRunAlerts(%s): unexpected error reading config: %v", c.name, err)
}
err = sendStartupAlerts(&c.config, c.startupAlerts)
if err == nil && c.expectErr {
t.Errorf("sendFirstRunAlerts(%s): Expected error, the code did not error", c.name)
} else if err != nil && !c.expectErr {
t.Errorf("sendFirstRunAlerts(%s): Did not expect an error, but we got one anyway: %v", c.name, err)
}
}
}