diff --git a/README.md b/README.md index 2c56fd2..cdceeba 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,96 @@ # minitor-go -A reimplementation of minitor in Go \ No newline at end of file +A reimplementation of [Minitor](https://git.iamthefij/iamthefij/minitor) in Go + +Minitor is already a very minimal monitoring tool. Python 3 was a quick way to get something live, but Python itself comes with a very large footprint.Thus Go feels like a better fit for the project, longer term. + +Initial target is meant to be roughly compatible requiring only minor changes to configuration. Future iterations may diverge to take advantage of Go specific features. + +## Differences from Python version + +There are a few key differences between the Python version and the v0.x Go version. + +First, configuration keys cannot have multiple types in Go, so a different key must be used when specifying a Shell command as a string rather than a list of args. Instead of `command`, you must use `command_shell`. Eg: + +minitor-py: +```yaml +monitors: + - name: Exec command + command: ['echo', 'test'] + - name: Shell command + command: echo 'test' +``` + +minitor-go: +```yaml +monitors: + - name: Exec command + command: ['echo', 'test'] + - name: Shell command + command_shell: echo 'test' +``` + +Second, templating for Alert messages has been updated. In the Python version, `str.format(...)` was used with certain keys passed in that could be used to format messages. In the Go version, we use a struct containing Alert info and the built in Go templating format. Eg. + +minitor-py: +```yaml +alerts: + log_command: + command: ['echo', '{monitor_name}'] + log_shell: + command_shell: "echo {monitor_name}" +``` + +minitor-go: +```yaml +alerts: + log_command: + command: ['echo', '{{.MonitorName}}'] + log_shell: + command_shell: "echo {{.MonitorName}}" +``` + +Finally, newlines in a shell command don't terminate a particular command. Semicolons must be used and continuations should not. + +minitor-py: +```yaml +alerts: + log_shell: + command_shell: > + echo "line 1" + echo "line 2" + echo "continued" \ + "line" +``` + +minitor-go: +```yaml +alerts: + log_shell: + command_shell: > + echo "line 1"; + echo "line 2"; + echo "continued" + "line" +``` + +## To do +There are two sets of task lists. The first is to get rough parity on key features with the Python version. The second is to make some improvements to the framework. + +Pairity: + + - [x] Run monitor commands + - [x] Run monitor commands in a shell + - [x] Run alert commands + - [x] Run alert commands in a shell + - [x] Allow templating of alert commands + - [] Implement Prometheus client to export metrics + - [] Test coverage + +Improvement: + + - [] Implement leveled logging (maybe glog or logrus) + - [] Consider switching from YAML to TOML + - [] Consider value of templating vs injecting values into Env variables + - [] Async checking + - [] Use durations rather than seconds checked in event loop \ No newline at end of file diff --git a/alert.go b/alert.go index 04c63a0..2a27ed1 100644 --- a/alert.go +++ b/alert.go @@ -2,7 +2,6 @@ package main import ( "bytes" - "fmt" "log" "os/exec" "text/template" @@ -13,7 +12,7 @@ type Alert struct { Name string Command []string CommandShell string `yaml:"command_shell"` - commandTemplate []template.Template + commandTemplate []*template.Template commandShellTemplate *template.Template } @@ -26,42 +25,73 @@ func (alert Alert) IsValid() bool { func (alert *Alert) BuildTemplates() { if alert.commandTemplate == nil && alert.Command != nil { // build template - fmt.Println("Building template for command...") + log.Println("Building template for command...") + alert.commandTemplate = []*template.Template{} + for i, cmdPart := range alert.Command { + alert.commandTemplate = append(alert.commandTemplate, template.Must( + template.New(alert.Name+string(i)).Parse(cmdPart), + )) + } + log.Printf("Template built: %v", alert.commandTemplate) } else if alert.commandShellTemplate == nil && alert.CommandShell != "" { + log.Println("Building template for shell command...") alert.commandShellTemplate = template.Must( template.New(alert.Name).Parse(alert.CommandShell), ) + log.Printf("Template built: %v", alert.commandShellTemplate) } else { - panic("No template?") + panic("No template provided?") } } -func (alert Alert) Send(notice AlertNotice) { +func (alert *Alert) Send(notice AlertNotice) { + // TODO: Validate and build templates in a better place and make this immutable + if !alert.IsValid() { + log.Fatalf("Alert is invalid: %v", alert) + } + alert.BuildTemplates() + var cmd *exec.Cmd if alert.commandTemplate != nil { // build template - fmt.Println("Send command thing...") + log.Println("Send command thing...") + command := []string{} + for _, cmdTmp := range alert.commandTemplate { + var commandBuffer bytes.Buffer + err := cmdTmp.Execute(&commandBuffer, notice) + if err != nil { + panic(err) + } + command = append(command, commandBuffer.String()) + } + cmd = exec.Command(command[0], command[1:]...) } else if alert.commandShellTemplate != nil { var commandBuffer bytes.Buffer err := alert.commandShellTemplate.Execute(&commandBuffer, notice) if err != nil { panic(err) } - cmd = exec.Command(commandBuffer.String()) - - output, err := cmd.CombinedOutput() - log.Printf("Check %s\n---\n%s\n---", alert.Name, string(output)) + shellCommand := commandBuffer.String() + log.Printf("About to run alert command: %s", shellCommand) + cmd = ShellCommand(shellCommand) } else { - panic("No template?") + panic("No template compiled?") + } + + output, err := cmd.CombinedOutput() + log.Printf("Check %s\n---\n%s\n---", alert.Name, string(output)) + if err != nil { + panic(err) } } type AlertNotice struct { MonitorName string - AlertCount int64 - FailureCount int64 + AlertCount int16 + FailureCount int16 LastCheckOutput string LastSuccess time.Time + IsUp bool } diff --git a/config.go b/config.go index 9356a63..9726d7a 100644 --- a/config.go +++ b/config.go @@ -9,8 +9,8 @@ import ( type Config struct { CheckInterval int64 `yaml:"check_interval"` - Monitors []Monitor - Alerts map[string]Alert + Monitors []*Monitor + Alerts map[string]*Alert } func LoadConfig(filePath string) (config Config) { @@ -24,7 +24,7 @@ func LoadConfig(filePath string) (config Config) { err = yaml.Unmarshal([]byte(env_expanded), &config) if err != nil { - log.Fatalf("error: %v", err) + log.Fatalf("ERROR: %v", err) panic(err) } diff --git a/main.go b/main.go index 9770a44..dca25a5 100644 --- a/main.go +++ b/main.go @@ -1,6 +1,7 @@ package main import ( + "log" "time" ) @@ -10,7 +11,28 @@ func main() { for { for _, monitor := range config.Monitors { if monitor.ShouldCheck() { - monitor.Check() + _, alertNotice := monitor.Check() + if alertNotice != nil { + //log.Printf("Recieved an alert notice: %v", alertNotice) + var alerts []string + if alertNotice.IsUp { + alerts = monitor.AlertUp + log.Printf("Alert up: %v", monitor.AlertUp) + } else { + alerts = monitor.AlertDown + log.Printf("Alert down: %v", monitor.AlertDown) + } + if alerts == nil { + log.Printf("WARNING: Found alert, but no alert mechanism: %v", alertNotice) + } + for _, alertName := range alerts { + if alert, ok := config.Alerts[alertName]; ok { + alert.Send(*alertNotice) + } else { + log.Printf("WARNING: Could not find alert for %s", alertName) + } + } + } } } diff --git a/monitor.go b/monitor.go index cfa2546..975606f 100644 --- a/monitor.go +++ b/monitor.go @@ -2,10 +2,12 @@ package main import ( "log" + "math" "os/exec" "time" ) +// Monitor represents a particular periodic check of a command type Monitor struct { // Config values Name string @@ -15,65 +17,138 @@ type Monitor struct { AlertUp []string `yaml:"alert_up"` CheckInterval float64 `yaml:"check_interval"` AlertAfter int16 `yaml:"alert_after"` - AlertEvey int16 `yaml:"alert_every"` + AlertEvery int16 `yaml:"alert_every"` // Other values - LastCheck time.Time - LastOutput string + lastCheck time.Time + lastOutput string + alertCount int16 + failureCount int16 + lastSuccess time.Time } +// IsValid returns a boolean indicating if the Monitor has been correctly +// configured func (monitor Monitor) IsValid() bool { atLeastOneCommand := (monitor.CommandShell != "" || monitor.Command != nil) atMostOneCommand := (monitor.CommandShell == "" || monitor.Command == nil) return atLeastOneCommand && atMostOneCommand } +// ShouldCheck returns a boolean indicating if the Monitor is ready to be +// be checked again func (monitor Monitor) ShouldCheck() bool { - if monitor.LastCheck.IsZero() { + if monitor.lastCheck.IsZero() { return true } - sinceLastCheck := time.Now().Sub(monitor.LastCheck).Seconds() + sinceLastCheck := time.Now().Sub(monitor.lastCheck).Seconds() return sinceLastCheck >= monitor.CheckInterval } -func (monitor *Monitor) Check() bool { - // TODO: This should probably return a list of alerts since the `raise` - // pattern doesn't carry over from Python +// Check will run the command configured by the Monitor and return a status +// and a possible AlertNotice +func (monitor *Monitor) Check() (bool, *AlertNotice) { var cmd *exec.Cmd - if monitor.Command != nil { cmd = exec.Command(monitor.Command[0], monitor.Command[1:]...) } else { - // TODO: Handle a command shell as well. This is untested - - //cmd = exec.Command("sh", "-c", "echo \"This is a test of the command system\"") cmd = ShellCommand(monitor.CommandShell) } output, err := cmd.CombinedOutput() - log.Printf("Check %s\n---\n%s\n---", monitor.Name, string(output)) + //log.Printf("Check %s\n---\n%s\n---", monitor.Name, string(output)) - is_success := (err == nil) + isSuccess := (err == nil) if err != nil { - log.Printf("error: %v", err) + log.Printf("ERROR: %v", err) } - monitor.LastCheck = time.Now() - monitor.LastOutput = string(output) + monitor.lastCheck = time.Now() + monitor.lastOutput = string(output) - if is_success { - monitor.success() + var alertNotice *AlertNotice + if isSuccess { + alertNotice = monitor.success() } else { - monitor.failure() + alertNotice = monitor.failure() } - return is_success + log.Printf( + "Check result for %s: %v, %v at %v", + monitor.Name, + isSuccess, + alertNotice, + monitor.lastCheck, + ) + + return isSuccess, alertNotice } -func (monitor Monitor) success() { +func (monitor Monitor) isUp() bool { + return monitor.alertCount == 0 +} + +func (monitor *Monitor) success() (notice *AlertNotice) { log.Printf("Great success!") + if !monitor.isUp() { + // Alert that we have recovered + notice = monitor.createAlertNotice(true) + } + monitor.failureCount = 0 + monitor.alertCount = 0 + monitor.lastSuccess = time.Now() + + return } -func (monitor *Monitor) failure() { +func (monitor *Monitor) failure() (notice *AlertNotice) { log.Printf("Devastating failure. :(") + monitor.failureCount++ + // If we haven't hit the minimum failures, we can exit + if monitor.failureCount < monitor.AlertAfter { + // TODO: Turn into a debug + log.Printf( + "Have not hit minimum failures. failures: %v alert after: %v", + monitor.failureCount, + monitor.AlertAfter, + ) + return + } + + failureCount := (monitor.failureCount - monitor.AlertAfter) + + if monitor.AlertEvery > 0 { + // Handle integer number of failures before alerting + if failureCount%monitor.AlertEvery == 0 { + notice = monitor.createAlertNotice(false) + } + } else if monitor.AlertEvery == 0 { + // Handle alerting on first failure only + if failureCount == 1 { + notice = monitor.createAlertNotice(false) + } + } else { + // Handle negative numbers indicating an exponential backoff + if failureCount >= int16(math.Pow(2, float64(monitor.alertCount))-1) { + notice = monitor.createAlertNotice(false) + } + } + + if notice != nil { + monitor.alertCount++ + } + + return +} + +func (monitor Monitor) createAlertNotice(isUp bool) *AlertNotice { + // TODO: Maybe add something about recovery status here + return &AlertNotice{ + MonitorName: monitor.Name, + AlertCount: monitor.alertCount, + FailureCount: monitor.failureCount, + LastCheckOutput: monitor.lastOutput, + LastSuccess: monitor.lastSuccess, + IsUp: isUp, + } } diff --git a/util.go b/util.go index 787ac43..36eef38 100644 --- a/util.go +++ b/util.go @@ -1,7 +1,6 @@ package main import ( - "log" "os/exec" "strings" ) @@ -19,6 +18,6 @@ func escapeCommandShell(command string) string { /// ShellCommand takes a string and executes it as a command using `sh` func ShellCommand(command string) *exec.Cmd { shellCommand := []string{"sh", "-c", escapeCommandShell(command)} - log.Printf("Command: %v", shellCommand) + //log.Printf("Shell command: %v", shellCommand) return exec.Command(shellCommand[0], shellCommand[1:]...) }