Adds ability to run specified alerts on startup
Some checks failed
continuous-integration/drone/push Build is failing
Some checks failed
continuous-integration/drone/push Build is failing
This is helpful to determine if your alerts are valid before an actual failure
This commit is contained in:
parent
6a2b44673e
commit
f58b4c1495
10
README.md
10
README.md
@ -118,6 +118,16 @@ To provide flexible formatting, the following non-standard functions are availab
|
||||
|
||||
For more information, check out the [Go documentation for the time module](https://pkg.go.dev/time@go1.20.7#pkg-constants).
|
||||
|
||||
#### Running alerts on startup
|
||||
|
||||
It's not the best feeling to find out your alerts are broken when you're expecting to be alerted about another failure. To avoid this and provide early insight into broken alerts, it is possible to specify a list of alerts to run when Minitor starts up. This can be done using the command line flag `-startup-alerts`. This flag accepts a comma separated list of strings and will run a test of each of those alerts. Minitor will then respond as it typically does for any failed alert. This can be used to allow you time to correct when initially launching, and to allow schedulers to more easily detect a failed deployment of Minitor.
|
||||
|
||||
Eg.
|
||||
|
||||
```bash
|
||||
minitor -startup-alerts=log_down,log_up -config ./config.yml
|
||||
```
|
||||
|
||||
### Metrics
|
||||
|
||||
Minitor supports exporting metrics for [Prometheus](https://prometheus.io/). Prometheus is an open source tool for reading and querying metrics from different sources. Combined with another tool, [Grafana](https://grafana.com/), it allows building of charts and dashboards. You could also opt to just use Minitor to log check results, and instead do your alerting with Grafana.
|
||||
|
38
main.go
38
main.go
@ -4,6 +4,7 @@ import (
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"git.iamthefij.com/iamthefij/slog"
|
||||
@ -91,9 +92,38 @@ func checkMonitors(config *Config) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func sendStartupAlerts(config *Config, alertNames []string) error {
|
||||
for _, alertName := range alertNames {
|
||||
var err error
|
||||
|
||||
alert, ok := config.Alerts[alertName]
|
||||
if !ok {
|
||||
err = fmt.Errorf("unknown alert %s: %w", alertName, errUnknownAlert)
|
||||
}
|
||||
|
||||
if err == nil {
|
||||
_, err = alert.Send(AlertNotice{
|
||||
AlertCount: 0,
|
||||
FailureCount: 0,
|
||||
IsUp: true,
|
||||
LastSuccess: time.Now(),
|
||||
MonitorName: fmt.Sprintf("First Run Alert Test: %s", alert.Name),
|
||||
LastCheckOutput: "",
|
||||
})
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func main() {
|
||||
showVersion := flag.Bool("version", false, "Display the version of minitor and exit")
|
||||
configPath := flag.String("config", "config.yml", "Alternate configuration path (default: config.yml)")
|
||||
startupAlerts := flag.String("startup-alerts", "", "List of alerts to run on startup. This can help determine unhealthy alerts early on. (default \"\")")
|
||||
|
||||
flag.BoolVar(&slog.DebugLevel, "debug", false, "Enables debug logs (default: false)")
|
||||
flag.BoolVar(&ExportMetrics, "metrics", false, "Enables prometheus metrics exporting (default: false)")
|
||||
@ -119,6 +149,14 @@ func main() {
|
||||
go ServeMetrics()
|
||||
}
|
||||
|
||||
if *startupAlerts != "" {
|
||||
alertNames := strings.Split(*startupAlerts, ",")
|
||||
|
||||
err = sendStartupAlerts(&config, alertNames)
|
||||
|
||||
slog.OnErrPanicf(err, "Error running startup alerts")
|
||||
}
|
||||
|
||||
// Start main loop
|
||||
for {
|
||||
err = checkMonitors(&config)
|
||||
|
61
main_test.go
61
main_test.go
@ -134,3 +134,64 @@ func TestCheckMonitors(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestFirstRunAlerts(t *testing.T) {
|
||||
cases := []struct {
|
||||
config Config
|
||||
expectErr bool
|
||||
startupAlerts []string
|
||||
name string
|
||||
}{
|
||||
{
|
||||
config: Config{},
|
||||
expectErr: false,
|
||||
startupAlerts: []string{},
|
||||
name: "Empty",
|
||||
},
|
||||
{
|
||||
config: Config{},
|
||||
expectErr: true,
|
||||
startupAlerts: []string{"missing"},
|
||||
name: "Unknown",
|
||||
},
|
||||
{
|
||||
config: Config{
|
||||
Alerts: map[string]*Alert{
|
||||
"good": {
|
||||
Command: CommandOrShell{Command: []string{"true"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectErr: false,
|
||||
startupAlerts: []string{"good"},
|
||||
name: "Successful alert",
|
||||
},
|
||||
{
|
||||
config: Config{
|
||||
Alerts: map[string]*Alert{
|
||||
"bad": {
|
||||
Name: "bad",
|
||||
Command: CommandOrShell{Command: []string{"false"}},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectErr: true,
|
||||
startupAlerts: []string{"bad"},
|
||||
name: "Failed alert",
|
||||
},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
err := c.config.Init()
|
||||
if err != nil {
|
||||
t.Errorf("sendFirstRunAlerts(%s): unexpected error reading config: %v", c.name, err)
|
||||
}
|
||||
|
||||
err = sendStartupAlerts(&c.config, c.startupAlerts)
|
||||
if err == nil && c.expectErr {
|
||||
t.Errorf("sendFirstRunAlerts(%s): Expected error, the code did not error", c.name)
|
||||
} else if err != nil && !c.expectErr {
|
||||
t.Errorf("sendFirstRunAlerts(%s): Did not expect an error, but we got one anyway: %v", c.name, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user