Add more liniting and update to pass
This commit is contained in:
parent
fda9e1bfc3
commit
5f250f17a8
1
.gitignore
vendored
1
.gitignore
vendored
@ -17,4 +17,5 @@ config.yml
|
|||||||
|
|
||||||
# Output binary
|
# Output binary
|
||||||
minitor
|
minitor
|
||||||
|
minitor-go
|
||||||
dist/
|
dist/
|
||||||
|
48
.golangci.yml
Normal file
48
.golangci.yml
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
---
|
||||||
|
linters:
|
||||||
|
enable:
|
||||||
|
- asciicheck
|
||||||
|
- bodyclose
|
||||||
|
- dogsled
|
||||||
|
- dupl
|
||||||
|
- exhaustive
|
||||||
|
- gochecknoinits
|
||||||
|
- gocognit
|
||||||
|
- gocritic
|
||||||
|
- gocyclo
|
||||||
|
- goerr113
|
||||||
|
- gofumpt
|
||||||
|
- goimports
|
||||||
|
- gomnd
|
||||||
|
- goprintffuncname
|
||||||
|
# - gosec
|
||||||
|
# - ifshort
|
||||||
|
- interfacer
|
||||||
|
- maligned
|
||||||
|
- misspell
|
||||||
|
- nakedret
|
||||||
|
- nestif
|
||||||
|
- nlreturn
|
||||||
|
- noctx
|
||||||
|
- unparam
|
||||||
|
- wsl
|
||||||
|
# - errorlint
|
||||||
|
disable:
|
||||||
|
- gochecknoglobals
|
||||||
|
|
||||||
|
linters-settings:
|
||||||
|
gosec:
|
||||||
|
excludes:
|
||||||
|
- G204
|
||||||
|
# gomnd:
|
||||||
|
# settings:
|
||||||
|
# mnd:
|
||||||
|
# ignored-functions: math.*
|
||||||
|
|
||||||
|
issues:
|
||||||
|
exclude-rules:
|
||||||
|
- path: _test\.go
|
||||||
|
linters:
|
||||||
|
- errcheck
|
||||||
|
- gosec
|
||||||
|
- maligned
|
@ -1,7 +1,7 @@
|
|||||||
---
|
---
|
||||||
repos:
|
repos:
|
||||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||||
rev: v2.4.0
|
rev: v3.4.0
|
||||||
hooks:
|
hooks:
|
||||||
- id: check-added-large-files
|
- id: check-added-large-files
|
||||||
- id: check-yaml
|
- id: check-yaml
|
||||||
@ -11,12 +11,11 @@ repos:
|
|||||||
- id: end-of-file-fixer
|
- id: end-of-file-fixer
|
||||||
- id: check-merge-conflict
|
- id: check-merge-conflict
|
||||||
- repo: git://github.com/dnephin/pre-commit-golang
|
- repo: git://github.com/dnephin/pre-commit-golang
|
||||||
rev: v0.3.5
|
rev: v0.4.0
|
||||||
hooks:
|
hooks:
|
||||||
- id: go-fmt
|
- id: go-fmt
|
||||||
- id: go-imports
|
- id: go-imports
|
||||||
# - id: gometalinter
|
- id: golangci-lint
|
||||||
# - id: golangci-lint
|
|
||||||
# - repo: https://github.com/IamTheFij/docker-pre-commit
|
# - repo: https://github.com/IamTheFij/docker-pre-commit
|
||||||
# rev: v2.0.0
|
# rev: v2.0.0
|
||||||
# hooks:
|
# hooks:
|
||||||
|
52
alert.go
52
alert.go
@ -2,6 +2,7 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"strings"
|
"strings"
|
||||||
@ -11,6 +12,13 @@ import (
|
|||||||
"git.iamthefij.com/iamthefij/slog"
|
"git.iamthefij.com/iamthefij/slog"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
errNoTemplate = errors.New("no template")
|
||||||
|
|
||||||
|
// ErrAlertFailed indicates that an alert failed to send
|
||||||
|
ErrAlertFailed = errors.New("alert failed")
|
||||||
|
)
|
||||||
|
|
||||||
// Alert is a config driven mechanism for sending a notice
|
// Alert is a config driven mechanism for sending a notice
|
||||||
type Alert struct {
|
type Alert struct {
|
||||||
Name string
|
Name string
|
||||||
@ -21,12 +29,12 @@ type Alert struct {
|
|||||||
|
|
||||||
// AlertNotice captures the context for an alert to be sent
|
// AlertNotice captures the context for an alert to be sent
|
||||||
type AlertNotice struct {
|
type AlertNotice struct {
|
||||||
MonitorName string
|
|
||||||
AlertCount int16
|
AlertCount int16
|
||||||
FailureCount int16
|
FailureCount int16
|
||||||
LastCheckOutput string
|
|
||||||
LastSuccess time.Time
|
|
||||||
IsUp bool
|
IsUp bool
|
||||||
|
LastSuccess time.Time
|
||||||
|
MonitorName string
|
||||||
|
LastCheckOutput string
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsValid returns a boolean indicating if the Alert has been correctly
|
// IsValid returns a boolean indicating if the Alert has been correctly
|
||||||
@ -49,26 +57,30 @@ func (alert *Alert) BuildTemplates() error {
|
|||||||
|
|
||||||
slog.Debugf("Building template for alert %s", alert.Name)
|
slog.Debugf("Building template for alert %s", alert.Name)
|
||||||
|
|
||||||
if alert.commandTemplate == nil && alert.Command.Command != nil {
|
switch {
|
||||||
|
case alert.commandTemplate == nil && alert.Command.Command != nil:
|
||||||
alert.commandTemplate = []*template.Template{}
|
alert.commandTemplate = []*template.Template{}
|
||||||
for i, cmdPart := range alert.Command.Command {
|
for i, cmdPart := range alert.Command.Command {
|
||||||
if PyCompat {
|
if PyCompat {
|
||||||
cmdPart = legacy.Replace(cmdPart)
|
cmdPart = legacy.Replace(cmdPart)
|
||||||
}
|
}
|
||||||
|
|
||||||
alert.commandTemplate = append(alert.commandTemplate, template.Must(
|
alert.commandTemplate = append(alert.commandTemplate, template.Must(
|
||||||
template.New(alert.Name+fmt.Sprint(i)).Parse(cmdPart),
|
template.New(alert.Name+fmt.Sprint(i)).Parse(cmdPart),
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
} else if alert.commandShellTemplate == nil && alert.Command.ShellCommand != "" {
|
case alert.commandShellTemplate == nil && alert.Command.ShellCommand != "":
|
||||||
shellCmd := alert.Command.ShellCommand
|
shellCmd := alert.Command.ShellCommand
|
||||||
|
|
||||||
if PyCompat {
|
if PyCompat {
|
||||||
shellCmd = legacy.Replace(shellCmd)
|
shellCmd = legacy.Replace(shellCmd)
|
||||||
}
|
}
|
||||||
|
|
||||||
alert.commandShellTemplate = template.Must(
|
alert.commandShellTemplate = template.Must(
|
||||||
template.New(alert.Name).Parse(shellCmd),
|
template.New(alert.Name).Parse(shellCmd),
|
||||||
)
|
)
|
||||||
} else {
|
default:
|
||||||
return fmt.Errorf("No template provided for alert %s", alert.Name)
|
return fmt.Errorf("No template provided for alert %s: %w", alert.Name, errNoTemplate)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
@ -79,28 +91,37 @@ func (alert Alert) Send(notice AlertNotice) (outputStr string, err error) {
|
|||||||
slog.Infof("Sending alert %s for %s", alert.Name, notice.MonitorName)
|
slog.Infof("Sending alert %s for %s", alert.Name, notice.MonitorName)
|
||||||
|
|
||||||
var cmd *exec.Cmd
|
var cmd *exec.Cmd
|
||||||
if alert.commandTemplate != nil {
|
|
||||||
|
switch {
|
||||||
|
case alert.commandTemplate != nil:
|
||||||
command := []string{}
|
command := []string{}
|
||||||
|
|
||||||
for _, cmdTmp := range alert.commandTemplate {
|
for _, cmdTmp := range alert.commandTemplate {
|
||||||
var commandBuffer bytes.Buffer
|
var commandBuffer bytes.Buffer
|
||||||
|
|
||||||
err = cmdTmp.Execute(&commandBuffer, notice)
|
err = cmdTmp.Execute(&commandBuffer, notice)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
command = append(command, commandBuffer.String())
|
command = append(command, commandBuffer.String())
|
||||||
}
|
}
|
||||||
|
|
||||||
cmd = exec.Command(command[0], command[1:]...)
|
cmd = exec.Command(command[0], command[1:]...)
|
||||||
} else if alert.commandShellTemplate != nil {
|
case alert.commandShellTemplate != nil:
|
||||||
var commandBuffer bytes.Buffer
|
var commandBuffer bytes.Buffer
|
||||||
|
|
||||||
err = alert.commandShellTemplate.Execute(&commandBuffer, notice)
|
err = alert.commandShellTemplate.Execute(&commandBuffer, notice)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
shellCommand := commandBuffer.String()
|
shellCommand := commandBuffer.String()
|
||||||
|
|
||||||
cmd = ShellCommand(shellCommand)
|
cmd = ShellCommand(shellCommand)
|
||||||
} else {
|
default:
|
||||||
err = fmt.Errorf("No templates compiled for alert %v", alert.Name)
|
err = fmt.Errorf("No templates compiled for alert %s: %w", alert.Name, errNoTemplate)
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -114,6 +135,15 @@ func (alert Alert) Send(notice AlertNotice) (outputStr string, err error) {
|
|||||||
outputStr = string(output)
|
outputStr = string(output)
|
||||||
slog.Debugf("Alert output for: %s\n---\n%s\n---", alert.Name, outputStr)
|
slog.Debugf("Alert output for: %s\n---\n%s\n---", alert.Name, outputStr)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
err = fmt.Errorf(
|
||||||
|
"Alert '%s' failed to send. Returned %v: %w",
|
||||||
|
alert.Name,
|
||||||
|
err,
|
||||||
|
ErrAlertFailed,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
return outputStr, err
|
return outputStr, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -123,6 +123,7 @@ func TestAlertSend(t *testing.T) {
|
|||||||
|
|
||||||
// Set PyCompat back to default value
|
// Set PyCompat back to default value
|
||||||
PyCompat = false
|
PyCompat = false
|
||||||
|
|
||||||
log.Println("-----")
|
log.Println("-----")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
32
config.go
32
config.go
@ -8,6 +8,8 @@ import (
|
|||||||
"gopkg.in/yaml.v2"
|
"gopkg.in/yaml.v2"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var errInvalidConfig = errors.New("Invalid configuration")
|
||||||
|
|
||||||
// Config type is contains all provided user configuration
|
// Config type is contains all provided user configuration
|
||||||
type Config struct {
|
type Config struct {
|
||||||
CheckInterval int64 `yaml:"check_interval"`
|
CheckInterval int64 `yaml:"check_interval"`
|
||||||
@ -35,14 +37,17 @@ func (cos *CommandOrShell) UnmarshalYAML(unmarshal func(interface{}) error) erro
|
|||||||
// Error indicates this is shell command
|
// Error indicates this is shell command
|
||||||
if err != nil {
|
if err != nil {
|
||||||
var shellCmd string
|
var shellCmd string
|
||||||
|
|
||||||
err := unmarshal(&shellCmd)
|
err := unmarshal(&shellCmd)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
cos.ShellCommand = shellCmd
|
cos.ShellCommand = shellCmd
|
||||||
} else {
|
} else {
|
||||||
cos.Command = cmd
|
cos.Command = cmd
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -57,11 +62,14 @@ func (config Config) IsValid() (isValid bool) {
|
|||||||
|
|
||||||
isValid = false
|
isValid = false
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, alert := range config.Alerts {
|
for _, alert := range config.Alerts {
|
||||||
if !alert.IsValid() {
|
if !alert.IsValid() {
|
||||||
slog.Errorf("Invalid alert configuration: %s", alert.Name)
|
slog.Errorf("Invalid alert configuration: %+v", alert.Name)
|
||||||
|
|
||||||
isValid = false
|
isValid = false
|
||||||
|
} else {
|
||||||
|
slog.Debugf("Loaded alert %s", alert.Name)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -71,6 +79,7 @@ func (config Config) IsValid() (isValid bool) {
|
|||||||
|
|
||||||
isValid = false
|
isValid = false
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, monitor := range config.Monitors {
|
for _, monitor := range config.Monitors {
|
||||||
if !monitor.IsValid() {
|
if !monitor.IsValid() {
|
||||||
slog.Errorf("Invalid monitor configuration: %s", monitor.Name)
|
slog.Errorf("Invalid monitor configuration: %s", monitor.Name)
|
||||||
@ -85,13 +94,14 @@ func (config Config) IsValid() (isValid bool) {
|
|||||||
"Invalid monitor configuration: %s. Unknown alert %s",
|
"Invalid monitor configuration: %s. Unknown alert %s",
|
||||||
monitor.Name, alertName,
|
monitor.Name, alertName,
|
||||||
)
|
)
|
||||||
|
|
||||||
isValid = false
|
isValid = false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return
|
return isValid
|
||||||
}
|
}
|
||||||
|
|
||||||
// Init performs extra initialization on top of loading the config from file
|
// Init performs extra initialization on top of loading the config from file
|
||||||
@ -122,22 +132,26 @@ func LoadConfig(filePath string) (config Config, err error) {
|
|||||||
|
|
||||||
// Add log alert if not present
|
// Add log alert if not present
|
||||||
if PyCompat {
|
if PyCompat {
|
||||||
// Intialize alerts list if not present
|
// Initialize alerts list if not present
|
||||||
if config.Alerts == nil {
|
if config.Alerts == nil {
|
||||||
config.Alerts = map[string]*Alert{}
|
config.Alerts = map[string]*Alert{}
|
||||||
}
|
}
|
||||||
|
|
||||||
if _, ok := config.Alerts["log"]; !ok {
|
if _, ok := config.Alerts["log"]; !ok {
|
||||||
config.Alerts["log"] = NewLogAlert()
|
config.Alerts["log"] = NewLogAlert()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if !config.IsValid() {
|
|
||||||
err = errors.New("Invalid configuration")
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Finish initializing configuration
|
// Finish initializing configuration
|
||||||
err = config.Init()
|
if err = config.Init(); err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if !config.IsValid() {
|
||||||
|
err = errInvalidConfig
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return config, err
|
||||||
|
}
|
||||||
|
@ -27,12 +27,15 @@ func TestLoadConfig(t *testing.T) {
|
|||||||
PyCompat = c.pyCompat
|
PyCompat = c.pyCompat
|
||||||
_, err := LoadConfig(c.configPath)
|
_, err := LoadConfig(c.configPath)
|
||||||
hasErr := (err != nil)
|
hasErr := (err != nil)
|
||||||
|
|
||||||
if hasErr != c.expectErr {
|
if hasErr != c.expectErr {
|
||||||
t.Errorf("LoadConfig(%v), expected_error=%v actual=%v", c.name, c.expectErr, err)
|
t.Errorf("LoadConfig(%v), expected_error=%v actual=%v", c.name, c.expectErr, err)
|
||||||
log.Printf("Case failed: %s", c.name)
|
log.Printf("Case failed: %s", c.name)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set PyCompat to default value
|
// Set PyCompat to default value
|
||||||
PyCompat = false
|
PyCompat = false
|
||||||
|
|
||||||
log.Println("-----")
|
log.Println("-----")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -41,6 +44,7 @@ func TestLoadConfig(t *testing.T) {
|
|||||||
// and execution of mutli-line strings presented in YAML
|
// and execution of mutli-line strings presented in YAML
|
||||||
func TestMultiLineConfig(t *testing.T) {
|
func TestMultiLineConfig(t *testing.T) {
|
||||||
log.Println("Testing multi-line string config")
|
log.Println("Testing multi-line string config")
|
||||||
|
|
||||||
config, err := LoadConfig("./test/valid-verify-multi-line.yml")
|
config, err := LoadConfig("./test/valid-verify-multi-line.yml")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("TestMultiLineConfig(load), expected=no_error actual=%v", err)
|
t.Fatalf("TestMultiLineConfig(load), expected=no_error actual=%v", err)
|
||||||
@ -48,8 +52,10 @@ func TestMultiLineConfig(t *testing.T) {
|
|||||||
|
|
||||||
log.Println("-----")
|
log.Println("-----")
|
||||||
log.Println("TestMultiLineConfig(parse > string)")
|
log.Println("TestMultiLineConfig(parse > string)")
|
||||||
|
|
||||||
expected := "echo 'Some string with stuff'; echo \"<angle brackets>\"; exit 1\n"
|
expected := "echo 'Some string with stuff'; echo \"<angle brackets>\"; exit 1\n"
|
||||||
actual := config.Monitors[0].Command.ShellCommand
|
actual := config.Monitors[0].Command.ShellCommand
|
||||||
|
|
||||||
if expected != actual {
|
if expected != actual {
|
||||||
t.Errorf("TestMultiLineConfig(>) failed")
|
t.Errorf("TestMultiLineConfig(>) failed")
|
||||||
t.Logf("string expected=`%v`", expected)
|
t.Logf("string expected=`%v`", expected)
|
||||||
@ -60,12 +66,15 @@ func TestMultiLineConfig(t *testing.T) {
|
|||||||
|
|
||||||
log.Println("-----")
|
log.Println("-----")
|
||||||
log.Println("TestMultiLineConfig(execute > string)")
|
log.Println("TestMultiLineConfig(execute > string)")
|
||||||
|
|
||||||
_, notice := config.Monitors[0].Check()
|
_, notice := config.Monitors[0].Check()
|
||||||
if notice == nil {
|
if notice == nil {
|
||||||
t.Fatalf("Did not receive an alert notice")
|
t.Fatalf("Did not receive an alert notice")
|
||||||
}
|
}
|
||||||
|
|
||||||
expected = "Some string with stuff\n<angle brackets>\n"
|
expected = "Some string with stuff\n<angle brackets>\n"
|
||||||
actual = notice.LastCheckOutput
|
actual = notice.LastCheckOutput
|
||||||
|
|
||||||
if expected != actual {
|
if expected != actual {
|
||||||
t.Errorf("TestMultiLineConfig(execute > string) check failed")
|
t.Errorf("TestMultiLineConfig(execute > string) check failed")
|
||||||
t.Logf("string expected=`%v`", expected)
|
t.Logf("string expected=`%v`", expected)
|
||||||
@ -76,8 +85,10 @@ func TestMultiLineConfig(t *testing.T) {
|
|||||||
|
|
||||||
log.Println("-----")
|
log.Println("-----")
|
||||||
log.Println("TestMultiLineConfig(parse | string)")
|
log.Println("TestMultiLineConfig(parse | string)")
|
||||||
|
|
||||||
expected = "echo 'Some string with stuff'\necho '<angle brackets>'\n"
|
expected = "echo 'Some string with stuff'\necho '<angle brackets>'\n"
|
||||||
actual = config.Alerts["log_shell"].Command.ShellCommand
|
actual = config.Alerts["log_shell"].Command.ShellCommand
|
||||||
|
|
||||||
if expected != actual {
|
if expected != actual {
|
||||||
t.Errorf("TestMultiLineConfig(|) failed")
|
t.Errorf("TestMultiLineConfig(|) failed")
|
||||||
t.Logf("string expected=`%v`", expected)
|
t.Logf("string expected=`%v`", expected)
|
||||||
@ -88,10 +99,12 @@ func TestMultiLineConfig(t *testing.T) {
|
|||||||
|
|
||||||
log.Println("-----")
|
log.Println("-----")
|
||||||
log.Println("TestMultiLineConfig(execute | string)")
|
log.Println("TestMultiLineConfig(execute | string)")
|
||||||
|
|
||||||
actual, err = config.Alerts["log_shell"].Send(AlertNotice{})
|
actual, err = config.Alerts["log_shell"].Send(AlertNotice{})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("Execution of alert failed")
|
t.Errorf("Execution of alert failed")
|
||||||
}
|
}
|
||||||
|
|
||||||
expected = "Some string with stuff\n<angle brackets>\n"
|
expected = "Some string with stuff\n<angle brackets>\n"
|
||||||
if expected != actual {
|
if expected != actual {
|
||||||
t.Errorf("TestMultiLineConfig(execute | string) check failed")
|
t.Errorf("TestMultiLineConfig(execute | string) check failed")
|
||||||
|
80
main.go
80
main.go
@ -1,6 +1,7 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"time"
|
"time"
|
||||||
@ -21,8 +22,49 @@ var (
|
|||||||
|
|
||||||
// version of minitor being run
|
// version of minitor being run
|
||||||
version = "dev"
|
version = "dev"
|
||||||
|
|
||||||
|
errUnknownAlert = errors.New("unknown alert")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func sendAlerts(config *Config, monitor *Monitor, alertNotice *AlertNotice) error {
|
||||||
|
slog.Debugf("Received an alert notice from %s", alertNotice.MonitorName)
|
||||||
|
alertNames := monitor.GetAlertNames(alertNotice.IsUp)
|
||||||
|
|
||||||
|
if alertNames == nil {
|
||||||
|
// This should only happen for a recovery alert. AlertDown is validated not empty
|
||||||
|
slog.Warningf(
|
||||||
|
"Received alert, but no alert mechanisms exist. MonitorName=%s IsUp=%t",
|
||||||
|
alertNotice.MonitorName, alertNotice.IsUp,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, alertName := range alertNames {
|
||||||
|
if alert, ok := config.Alerts[alertName]; ok {
|
||||||
|
output, err := alert.Send(*alertNotice)
|
||||||
|
if err != nil {
|
||||||
|
slog.Errorf(
|
||||||
|
"Alert '%s' failed. result=%v: output=%s",
|
||||||
|
alert.Name,
|
||||||
|
err,
|
||||||
|
output,
|
||||||
|
)
|
||||||
|
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Count alert metrics
|
||||||
|
Metrics.CountAlert(monitor.Name, alert.Name)
|
||||||
|
} else {
|
||||||
|
// This case should never actually happen since we validate against it
|
||||||
|
slog.Errorf("Unknown alert for monitor %s: %s", alertNotice.MonitorName, alertName)
|
||||||
|
|
||||||
|
return fmt.Errorf("unknown alert for monitor %s: %s: %w", alertNotice.MonitorName, alertName, errUnknownAlert)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func checkMonitors(config *Config) error {
|
func checkMonitors(config *Config) error {
|
||||||
for _, monitor := range config.Monitors {
|
for _, monitor := range config.Monitors {
|
||||||
if monitor.ShouldCheck() {
|
if monitor.ShouldCheck() {
|
||||||
@ -34,44 +76,8 @@ func checkMonitors(config *Config) error {
|
|||||||
Metrics.SetMonitorStatus(monitor.Name, monitor.IsUp())
|
Metrics.SetMonitorStatus(monitor.Name, monitor.IsUp())
|
||||||
Metrics.CountCheck(monitor.Name, success, hasAlert)
|
Metrics.CountCheck(monitor.Name, success, hasAlert)
|
||||||
|
|
||||||
// Should probably consider refactoring everything below here
|
|
||||||
if alertNotice != nil {
|
if alertNotice != nil {
|
||||||
slog.Debugf("Received an alert notice from %s", alertNotice.MonitorName)
|
return sendAlerts(config, monitor, alertNotice)
|
||||||
alertNames := monitor.GetAlertNames(alertNotice.IsUp)
|
|
||||||
if alertNames == nil {
|
|
||||||
// This should only happen for a recovery alert. AlertDown is validated not empty
|
|
||||||
slog.Warningf(
|
|
||||||
"Received alert, but no alert mechanisms exist. MonitorName=%s IsUp=%t",
|
|
||||||
alertNotice.MonitorName, alertNotice.IsUp,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
for _, alertName := range alertNames {
|
|
||||||
if alert, ok := config.Alerts[alertName]; ok {
|
|
||||||
output, err := alert.Send(*alertNotice)
|
|
||||||
if err != nil {
|
|
||||||
slog.Errorf(
|
|
||||||
"Alert '%s' failed. result=%v: output=%s",
|
|
||||||
alert.Name,
|
|
||||||
err,
|
|
||||||
output,
|
|
||||||
)
|
|
||||||
return fmt.Errorf(
|
|
||||||
"Unsuccessfully triggered alert '%s'. "+
|
|
||||||
"Crashing to avoid false negatives: %v",
|
|
||||||
alert.Name,
|
|
||||||
err,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Count alert metrics
|
|
||||||
Metrics.CountAlert(monitor.Name, alert.Name)
|
|
||||||
} else {
|
|
||||||
// This case should never actually happen since we validate against it
|
|
||||||
slog.Errorf("Unknown alert for monitor %s: %s", alertNotice.MonitorName, alertName)
|
|
||||||
|
|
||||||
return fmt.Errorf("Unknown alert for monitor %s: %s", alertNotice.MonitorName, alertName)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -63,6 +63,7 @@ func (metrics *MinitorMetrics) SetMonitorStatus(monitor string, isUp bool) {
|
|||||||
if isUp {
|
if isUp {
|
||||||
val = 1.0
|
val = 1.0
|
||||||
}
|
}
|
||||||
|
|
||||||
metrics.monitorStatus.With(prometheus.Labels{"monitor": monitor}).Set(val)
|
metrics.monitorStatus.With(prometheus.Labels{"monitor": monitor}).Set(val)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -96,6 +97,8 @@ func (metrics *MinitorMetrics) CountAlert(monitor string, alert string) {
|
|||||||
// ServeMetrics starts an http server with a Prometheus metrics handler
|
// ServeMetrics starts an http server with a Prometheus metrics handler
|
||||||
func ServeMetrics() {
|
func ServeMetrics() {
|
||||||
http.Handle("/metrics", promhttp.Handler())
|
http.Handle("/metrics", promhttp.Handler())
|
||||||
|
|
||||||
host := fmt.Sprintf(":%d", MetricsPort)
|
host := fmt.Sprintf(":%d", MetricsPort)
|
||||||
|
|
||||||
_ = http.ListenAndServe(host, nil)
|
_ = http.ListenAndServe(host, nil)
|
||||||
}
|
}
|
||||||
|
33
monitor.go
33
monitor.go
@ -9,21 +9,22 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
// Monitor represents a particular periodic check of a command
|
// Monitor represents a particular periodic check of a command
|
||||||
type Monitor struct {
|
type Monitor struct { //nolint:maligned
|
||||||
// Config values
|
// Config values
|
||||||
Name string
|
|
||||||
Command CommandOrShell
|
|
||||||
AlertDown []string `yaml:"alert_down"`
|
|
||||||
AlertUp []string `yaml:"alert_up"`
|
|
||||||
CheckInterval float64 `yaml:"check_interval"`
|
|
||||||
AlertAfter int16 `yaml:"alert_after"`
|
AlertAfter int16 `yaml:"alert_after"`
|
||||||
AlertEvery int16 `yaml:"alert_every"`
|
AlertEvery int16 `yaml:"alert_every"`
|
||||||
|
CheckInterval float64 `yaml:"check_interval"`
|
||||||
|
Name string
|
||||||
|
AlertDown []string `yaml:"alert_down"`
|
||||||
|
AlertUp []string `yaml:"alert_up"`
|
||||||
|
Command CommandOrShell
|
||||||
|
|
||||||
// Other values
|
// Other values
|
||||||
lastCheck time.Time
|
|
||||||
lastOutput string
|
|
||||||
alertCount int16
|
alertCount int16
|
||||||
failureCount int16
|
failureCount int16
|
||||||
|
lastCheck time.Time
|
||||||
lastSuccess time.Time
|
lastSuccess time.Time
|
||||||
|
lastOutput string
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsValid returns a boolean indicating if the Monitor has been correctly
|
// IsValid returns a boolean indicating if the Monitor has been correctly
|
||||||
@ -42,6 +43,7 @@ func (monitor Monitor) ShouldCheck() bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
sinceLastCheck := time.Since(monitor.lastCheck).Seconds()
|
sinceLastCheck := time.Since(monitor.lastCheck).Seconds()
|
||||||
|
|
||||||
return sinceLastCheck >= monitor.CheckInterval
|
return sinceLastCheck >= monitor.CheckInterval
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -60,6 +62,7 @@ func (monitor *Monitor) Check() (bool, *AlertNotice) {
|
|||||||
monitor.lastOutput = string(output)
|
monitor.lastOutput = string(output)
|
||||||
|
|
||||||
var alertNotice *AlertNotice
|
var alertNotice *AlertNotice
|
||||||
|
|
||||||
isSuccess := (err == nil)
|
isSuccess := (err == nil)
|
||||||
if isSuccess {
|
if isSuccess {
|
||||||
alertNotice = monitor.success()
|
alertNotice = monitor.success()
|
||||||
@ -90,6 +93,7 @@ func (monitor *Monitor) success() (notice *AlertNotice) {
|
|||||||
// Alert that we have recovered
|
// Alert that we have recovered
|
||||||
notice = monitor.createAlertNotice(true)
|
notice = monitor.createAlertNotice(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
monitor.failureCount = 0
|
monitor.failureCount = 0
|
||||||
monitor.alertCount = 0
|
monitor.alertCount = 0
|
||||||
monitor.lastSuccess = time.Now()
|
monitor.lastSuccess = time.Now()
|
||||||
@ -116,19 +120,20 @@ func (monitor *Monitor) failure() (notice *AlertNotice) {
|
|||||||
failureCount := (monitor.failureCount - monitor.getAlertAfter())
|
failureCount := (monitor.failureCount - monitor.getAlertAfter())
|
||||||
|
|
||||||
// Use alert cadence to determine if we should alert
|
// Use alert cadence to determine if we should alert
|
||||||
if monitor.AlertEvery > 0 {
|
switch {
|
||||||
|
case monitor.AlertEvery > 0:
|
||||||
// Handle integer number of failures before alerting
|
// Handle integer number of failures before alerting
|
||||||
if failureCount%monitor.AlertEvery == 0 {
|
if failureCount%monitor.AlertEvery == 0 {
|
||||||
notice = monitor.createAlertNotice(false)
|
notice = monitor.createAlertNotice(false)
|
||||||
}
|
}
|
||||||
} else if monitor.AlertEvery == 0 {
|
case monitor.AlertEvery == 0:
|
||||||
// Handle alerting on first failure only
|
// Handle alerting on first failure only
|
||||||
if failureCount == 0 {
|
if failureCount == 0 {
|
||||||
notice = monitor.createAlertNotice(false)
|
notice = monitor.createAlertNotice(false)
|
||||||
}
|
}
|
||||||
} else {
|
default:
|
||||||
// Handle negative numbers indicating an exponential backoff
|
// Handle negative numbers indicating an exponential backoff
|
||||||
if failureCount >= int16(math.Pow(2, float64(monitor.alertCount))-1) {
|
if failureCount >= int16(math.Pow(2, float64(monitor.alertCount))-1) { //nolint:gomnd
|
||||||
notice = monitor.createAlertNotice(false)
|
notice = monitor.createAlertNotice(false)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -138,7 +143,7 @@ func (monitor *Monitor) failure() (notice *AlertNotice) {
|
|||||||
monitor.alertCount++
|
monitor.alertCount++
|
||||||
}
|
}
|
||||||
|
|
||||||
return
|
return notice
|
||||||
}
|
}
|
||||||
|
|
||||||
func (monitor Monitor) getAlertAfter() int16 {
|
func (monitor Monitor) getAlertAfter() int16 {
|
||||||
@ -147,6 +152,7 @@ func (monitor Monitor) getAlertAfter() int16 {
|
|||||||
if monitor.AlertAfter == 0 {
|
if monitor.AlertAfter == 0 {
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
return monitor.AlertAfter
|
return monitor.AlertAfter
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -155,6 +161,7 @@ func (monitor Monitor) GetAlertNames(up bool) []string {
|
|||||||
if up {
|
if up {
|
||||||
return monitor.AlertUp
|
return monitor.AlertUp
|
||||||
}
|
}
|
||||||
|
|
||||||
return monitor.AlertDown
|
return monitor.AlertDown
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -22,11 +22,13 @@ func TestMonitorIsValid(t *testing.T) {
|
|||||||
|
|
||||||
for _, c := range cases {
|
for _, c := range cases {
|
||||||
log.Printf("Testing case %s", c.name)
|
log.Printf("Testing case %s", c.name)
|
||||||
|
|
||||||
actual := c.monitor.IsValid()
|
actual := c.monitor.IsValid()
|
||||||
if actual != c.expected {
|
if actual != c.expected {
|
||||||
t.Errorf("IsValid(%v), expected=%t actual=%t", c.name, c.expected, actual)
|
t.Errorf("IsValid(%v), expected=%t actual=%t", c.name, c.expected, actual)
|
||||||
log.Printf("Case failed: %s", c.name)
|
log.Printf("Case failed: %s", c.name)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Println("-----")
|
log.Println("-----")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -71,11 +73,13 @@ func TestMonitorIsUp(t *testing.T) {
|
|||||||
|
|
||||||
for _, c := range cases {
|
for _, c := range cases {
|
||||||
log.Printf("Testing case %s", c.name)
|
log.Printf("Testing case %s", c.name)
|
||||||
|
|
||||||
actual := c.monitor.IsUp()
|
actual := c.monitor.IsUp()
|
||||||
if actual != c.expected {
|
if actual != c.expected {
|
||||||
t.Errorf("IsUp(%v), expected=%t actual=%t", c.name, c.expected, actual)
|
t.Errorf("IsUp(%v), expected=%t actual=%t", c.name, c.expected, actual)
|
||||||
log.Printf("Case failed: %s", c.name)
|
log.Printf("Case failed: %s", c.name)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Println("-----")
|
log.Println("-----")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -96,11 +100,13 @@ func TestMonitorGetAlertNames(t *testing.T) {
|
|||||||
|
|
||||||
for _, c := range cases {
|
for _, c := range cases {
|
||||||
log.Printf("Testing case %s", c.name)
|
log.Printf("Testing case %s", c.name)
|
||||||
|
|
||||||
actual := c.monitor.GetAlertNames(c.up)
|
actual := c.monitor.GetAlertNames(c.up)
|
||||||
if !EqualSliceString(actual, c.expected) {
|
if !EqualSliceString(actual, c.expected) {
|
||||||
t.Errorf("GetAlertNames(%v), expected=%v actual=%v", c.name, c.expected, actual)
|
t.Errorf("GetAlertNames(%v), expected=%v actual=%v", c.name, c.expected, actual)
|
||||||
log.Printf("Case failed: %s", c.name)
|
log.Printf("Case failed: %s", c.name)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Println("-----")
|
log.Println("-----")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -119,12 +125,15 @@ func TestMonitorSuccess(t *testing.T) {
|
|||||||
|
|
||||||
for _, c := range cases {
|
for _, c := range cases {
|
||||||
log.Printf("Testing case %s", c.name)
|
log.Printf("Testing case %s", c.name)
|
||||||
|
|
||||||
notice := c.monitor.success()
|
notice := c.monitor.success()
|
||||||
hasNotice := (notice != nil)
|
hasNotice := (notice != nil)
|
||||||
|
|
||||||
if hasNotice != c.expectNotice {
|
if hasNotice != c.expectNotice {
|
||||||
t.Errorf("success(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice)
|
t.Errorf("success(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice)
|
||||||
log.Printf("Case failed: %s", c.name)
|
log.Printf("Case failed: %s", c.name)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Println("-----")
|
log.Println("-----")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -147,12 +156,15 @@ func TestMonitorFailureAlertAfter(t *testing.T) {
|
|||||||
|
|
||||||
for _, c := range cases {
|
for _, c := range cases {
|
||||||
log.Printf("Testing case %s", c.name)
|
log.Printf("Testing case %s", c.name)
|
||||||
|
|
||||||
notice := c.monitor.failure()
|
notice := c.monitor.failure()
|
||||||
hasNotice := (notice != nil)
|
hasNotice := (notice != nil)
|
||||||
|
|
||||||
if hasNotice != c.expectNotice {
|
if hasNotice != c.expectNotice {
|
||||||
t.Errorf("failure(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice)
|
t.Errorf("failure(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice)
|
||||||
log.Printf("Case failed: %s", c.name)
|
log.Printf("Case failed: %s", c.name)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Println("-----")
|
log.Println("-----")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -195,10 +207,12 @@ func TestMonitorFailureAlertEvery(t *testing.T) {
|
|||||||
|
|
||||||
notice := c.monitor.failure()
|
notice := c.monitor.failure()
|
||||||
hasNotice := (notice != nil)
|
hasNotice := (notice != nil)
|
||||||
|
|
||||||
if hasNotice != c.expectNotice {
|
if hasNotice != c.expectNotice {
|
||||||
t.Errorf("failure(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice)
|
t.Errorf("failure(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice)
|
||||||
log.Printf("Case failed: %s", c.name)
|
log.Printf("Case failed: %s", c.name)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Println("-----")
|
log.Println("-----")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -223,15 +237,18 @@ func TestMonitorFailureExponential(t *testing.T) {
|
|||||||
// Unlike previous tests, this one requires a static Monitor with repeated
|
// Unlike previous tests, this one requires a static Monitor with repeated
|
||||||
// calls to the failure method
|
// calls to the failure method
|
||||||
monitor := Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: -1}
|
monitor := Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: -1}
|
||||||
|
|
||||||
for _, c := range cases {
|
for _, c := range cases {
|
||||||
log.Printf("Testing case %s", c.name)
|
log.Printf("Testing case %s", c.name)
|
||||||
|
|
||||||
notice := monitor.failure()
|
notice := monitor.failure()
|
||||||
hasNotice := (notice != nil)
|
hasNotice := (notice != nil)
|
||||||
|
|
||||||
if hasNotice != c.expectNotice {
|
if hasNotice != c.expectNotice {
|
||||||
t.Errorf("failure(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice)
|
t.Errorf("failure(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice)
|
||||||
log.Printf("Case failed: %s", c.name)
|
log.Printf("Case failed: %s", c.name)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Println("-----")
|
log.Println("-----")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -243,6 +260,7 @@ func TestMonitorCheck(t *testing.T) {
|
|||||||
hasNotice bool
|
hasNotice bool
|
||||||
lastOutput string
|
lastOutput string
|
||||||
}
|
}
|
||||||
|
|
||||||
cases := []struct {
|
cases := []struct {
|
||||||
monitor Monitor
|
monitor Monitor
|
||||||
expect expected
|
expect expected
|
||||||
@ -290,6 +308,7 @@ func TestMonitorCheck(t *testing.T) {
|
|||||||
t.Errorf("Check(%v) (output), expected=%v actual=%v", c.name, c.expect.lastOutput, lastOutput)
|
t.Errorf("Check(%v) (output), expected=%v actual=%v", c.name, c.expect.lastOutput, lastOutput)
|
||||||
log.Printf("Case failed: %s", c.name)
|
log.Printf("Case failed: %s", c.name)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Println("-----")
|
log.Println("-----")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3,14 +3,14 @@ check_interval: 5
|
|||||||
|
|
||||||
monitors:
|
monitors:
|
||||||
- name: Fake Website
|
- name: Fake Website
|
||||||
command: ['curl', '-s', '-o', '/dev/null', 'https://minitor.mon']
|
command: ["curl", "-s", "-o", "/dev/null", "https://minitor.mon"]
|
||||||
alert_down: [log_down, mailgun_down, sms_down]
|
alert_down: [log_down, mailgun_down, sms_down]
|
||||||
alert_up: [log_up, email_up]
|
alert_up: [log_up, email_up]
|
||||||
check_interval: 10 # Must be at minimum the global `check_interval`
|
check_interval: 10 # Must be at minimum the global `check_interval`
|
||||||
alert_after: 3
|
alert_after: 3
|
||||||
alert_every: -1 # Defaults to -1 for exponential backoff. 0 to disable repeating
|
alert_every: -1 # Defaults to -1 for exponential backoff. 0 to disable repeating
|
||||||
- name: Real Website
|
- name: Real Website
|
||||||
command: ['curl', '-s', '-o', '/dev/null', 'https://google.com']
|
command: ["curl", "-s", "-o", "/dev/null", "https://google.com"]
|
||||||
alert_down: [log_down, mailgun_down, sms_down]
|
alert_down: [log_down, mailgun_down, sms_down]
|
||||||
alert_up: [log_up, email_up]
|
alert_up: [log_up, email_up]
|
||||||
check_interval: 5
|
check_interval: 5
|
||||||
@ -23,7 +23,8 @@ alerts:
|
|||||||
log_up:
|
log_up:
|
||||||
command: ["echo", "Minitor recovery for {{.MonitorName}}"]
|
command: ["echo", "Minitor recovery for {{.MonitorName}}"]
|
||||||
email_up:
|
email_up:
|
||||||
command: [sendmail, "me@minitor.mon", "Recovered: {monitor_name}", "We're back!"]
|
command:
|
||||||
|
[sendmail, "me@minitor.mon", "Recovered: {monitor_name}", "We're back!"]
|
||||||
mailgun_down:
|
mailgun_down:
|
||||||
command: >
|
command: >
|
||||||
curl -s -X POST
|
curl -s -X POST
|
||||||
|
2
util.go
2
util.go
@ -17,10 +17,12 @@ func EqualSliceString(a, b []string) bool {
|
|||||||
if len(a) != len(b) {
|
if len(a) != len(b) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
for i, val := range a {
|
for i, val := range a {
|
||||||
if val != b[i] {
|
if val != b[i] {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user