Commands all running

2019-10-02 09:37:29 -07:00 · 2019-10-02 09:37:29 -07:00 · 7b746ed62a
commit 7b746ed62a
parent dd0b8e3f38
6 changed files with 262 additions and 43 deletions
--- a/README.md
+++ b/README.md
@ -1,3 +1,96 @@
 # minitor-go

-A reimplementation of minitor in Go
+A reimplementation of [Minitor](https://git.iamthefij/iamthefij/minitor) in Go
+
+Minitor is already a very minimal monitoring tool. Python 3 was a quick way to get something live, but Python itself comes with a very large footprint.Thus Go feels like a better fit for the project, longer term.
+
+Initial target is meant to be roughly compatible requiring only minor changes to configuration. Future iterations may diverge to take advantage of Go specific features.
+
+## Differences from Python version
+
+There are a few key differences between the Python version and the v0.x Go version. 
+
+First, configuration keys cannot have multiple types in Go, so a different key must be used when specifying a Shell command as a string rather than a list of args. Instead of `command`, you must use `command_shell`. Eg:
+
+minitor-py:
+```yaml
+monitors:
+  - name: Exec command
+    command: ['echo', 'test']
+  - name: Shell command
+    command: echo 'test'
+```
+
+minitor-go:
+```yaml
+monitors:
+  - name: Exec command
+    command: ['echo', 'test']
+  - name: Shell command
+    command_shell: echo 'test'
+```
+
+Second, templating for Alert messages has been updated. In the Python version, `str.format(...)` was used with certain keys passed in that could be used to format messages. In the Go version, we use a struct containing Alert info and the built in Go templating format. Eg.
+
+minitor-py:
+```yaml
+alerts:
+  log_command:
+    command: ['echo', '{monitor_name}']
+  log_shell:
+    command_shell: "echo {monitor_name}"
+```
+
+minitor-go:
+```yaml
+alerts:
+  log_command:
+    command: ['echo', '{{.MonitorName}}']
+  log_shell:
+    command_shell: "echo {{.MonitorName}}"
+```
+
+Finally, newlines in a shell command don't terminate a particular command. Semicolons must be used and continuations should not.
+
+minitor-py:
+```yaml
+alerts:
+  log_shell:
+    command_shell: >
+      echo "line 1"
+      echo "line 2"
+      echo "continued" \
+        "line"
+```
+
+minitor-go:
+```yaml
+alerts:
+  log_shell:
+    command_shell: >
+      echo "line 1";
+      echo "line 2";
+      echo "continued"
+        "line"
+```
+
+## To do
+There are two sets of task lists. The first is to get rough parity on key features with the Python version. The second is to make some improvements to the framework.
+
+Pairity:
+
+  - [x] Run monitor commands
+  - [x] Run monitor commands in a shell
+  - [x] Run alert commands
+  - [x] Run alert commands in a shell
+  - [x] Allow templating of alert commands
+  - [] Implement Prometheus client to export metrics
+  - [] Test coverage
+
+Improvement:
+
+  - [] Implement leveled logging (maybe glog or logrus)
+  - [] Consider switching from YAML to TOML
+  - [] Consider value of templating vs injecting values into Env variables
+  - [] Async checking
+  - [] Use durations rather than seconds checked in event loop
--- a/alert.go
+++ b/alert.go
@ -2,7 +2,6 @@ package main

 import (
 	"bytes"
-	"fmt"
 	"log"
 	"os/exec"
 	"text/template"
@ -13,7 +12,7 @@ type Alert struct {
 	Name                 string
 	Command              []string
 	CommandShell         string `yaml:"command_shell"`
-	commandTemplate      []template.Template
+	commandTemplate      []*template.Template
 	commandShellTemplate *template.Template
 }

@ -26,42 +25,73 @@ func (alert Alert) IsValid() bool {
 func (alert *Alert) BuildTemplates() {
 	if alert.commandTemplate == nil && alert.Command != nil {
 		// build template
-		fmt.Println("Building template for command...")
+		log.Println("Building template for command...")
+		alert.commandTemplate = []*template.Template{}
+		for i, cmdPart := range alert.Command {
+			alert.commandTemplate = append(alert.commandTemplate, template.Must(
+				template.New(alert.Name+string(i)).Parse(cmdPart),
+			))
+		}
+		log.Printf("Template built: %v", alert.commandTemplate)
 	} else if alert.commandShellTemplate == nil && alert.CommandShell != "" {
+		log.Println("Building template for shell command...")
 		alert.commandShellTemplate = template.Must(
 			template.New(alert.Name).Parse(alert.CommandShell),
 		)
+		log.Printf("Template built: %v", alert.commandShellTemplate)
 	} else {
-		panic("No template?")
+		panic("No template provided?")
 	}
 }

-func (alert Alert) Send(notice AlertNotice) {
+func (alert *Alert) Send(notice AlertNotice) {
+	// TODO: Validate and build templates in a better place and make this immutable
+	if !alert.IsValid() {
+		log.Fatalf("Alert is invalid: %v", alert)
+	}
+	alert.BuildTemplates()
+
 	var cmd *exec.Cmd

 	if alert.commandTemplate != nil {
 		// build template
-		fmt.Println("Send command thing...")
+		log.Println("Send command thing...")
+		command := []string{}
+		for _, cmdTmp := range alert.commandTemplate {
+			var commandBuffer bytes.Buffer
+			err := cmdTmp.Execute(&commandBuffer, notice)
+			if err != nil {
+				panic(err)
+			}
+			command = append(command, commandBuffer.String())
+		}
+		cmd = exec.Command(command[0], command[1:]...)
 	} else if alert.commandShellTemplate != nil {
 		var commandBuffer bytes.Buffer
 		err := alert.commandShellTemplate.Execute(&commandBuffer, notice)
 		if err != nil {
 			panic(err)
 		}
-		cmd = exec.Command(commandBuffer.String())
-
-		output, err := cmd.CombinedOutput()
-		log.Printf("Check %s\n---\n%s\n---", alert.Name, string(output))
+		shellCommand := commandBuffer.String()

+		log.Printf("About to run alert command: %s", shellCommand)
+		cmd = ShellCommand(shellCommand)
 	} else {
-		panic("No template?")
+		panic("No template compiled?")
+	}
+
+	output, err := cmd.CombinedOutput()
+	log.Printf("Check %s\n---\n%s\n---", alert.Name, string(output))
+	if err != nil {
+		panic(err)
 	}
 }

 type AlertNotice struct {
 	MonitorName     string
-	AlertCount      int64
-	FailureCount    int64
+	AlertCount      int16
+	FailureCount    int16
 	LastCheckOutput string
 	LastSuccess     time.Time
+	IsUp            bool
 }
--- a/config.go
+++ b/config.go
@ -9,8 +9,8 @@ import (

 type Config struct {
 	CheckInterval int64 `yaml:"check_interval"`
-	Monitors      []Monitor
-	Alerts        map[string]Alert
+	Monitors      []*Monitor
+	Alerts        map[string]*Alert
 }

 func LoadConfig(filePath string) (config Config) {
@ -24,7 +24,7 @@ func LoadConfig(filePath string) (config Config) {

 	err = yaml.Unmarshal([]byte(env_expanded), &config)
 	if err != nil {
-		log.Fatalf("error: %v", err)
+		log.Fatalf("ERROR: %v", err)
 		panic(err)
 	}

--- a/main.go
+++ b/main.go
@ -1,6 +1,7 @@
 package main

 import (
+	"log"
 	"time"
 )

@ -10,7 +11,28 @@ func main() {
 	for {
 		for _, monitor := range config.Monitors {
 			if monitor.ShouldCheck() {
-				monitor.Check()
+				_, alertNotice := monitor.Check()
+				if alertNotice != nil {
+					//log.Printf("Recieved an alert notice: %v", alertNotice)
+					var alerts []string
+					if alertNotice.IsUp {
+						alerts = monitor.AlertUp
+						log.Printf("Alert up: %v", monitor.AlertUp)
+					} else {
+						alerts = monitor.AlertDown
+						log.Printf("Alert down: %v", monitor.AlertDown)
+					}
+					if alerts == nil {
+						log.Printf("WARNING: Found alert, but no alert mechanism: %v", alertNotice)
+					}
+					for _, alertName := range alerts {
+						if alert, ok := config.Alerts[alertName]; ok {
+							alert.Send(*alertNotice)
+						} else {
+							log.Printf("WARNING: Could not find alert for %s", alertName)
+						}
+					}
+				}
 			}
 		}

--- a/monitor.go
+++ b/monitor.go
@ -2,10 +2,12 @@ package main

 import (
 	"log"
+	"math"
 	"os/exec"
 	"time"
 )

+// Monitor represents a particular periodic check of a command
 type Monitor struct {
 	// Config values
 	Name          string
@ -15,65 +17,138 @@ type Monitor struct {
 	AlertUp       []string `yaml:"alert_up"`
 	CheckInterval float64  `yaml:"check_interval"`
 	AlertAfter    int16    `yaml:"alert_after"`
-	AlertEvey     int16    `yaml:"alert_every"`
+	AlertEvery    int16    `yaml:"alert_every"`
 	// Other values
-	LastCheck  time.Time
-	LastOutput string
+	lastCheck    time.Time
+	lastOutput   string
+	alertCount   int16
+	failureCount int16
+	lastSuccess  time.Time
 }

+// IsValid returns a boolean indicating if the Monitor has been correctly
+// configured
 func (monitor Monitor) IsValid() bool {
 	atLeastOneCommand := (monitor.CommandShell != "" || monitor.Command != nil)
 	atMostOneCommand := (monitor.CommandShell == "" || monitor.Command == nil)
 	return atLeastOneCommand && atMostOneCommand
 }

+// ShouldCheck returns a boolean indicating if the Monitor is ready to be
+// be checked again
 func (monitor Monitor) ShouldCheck() bool {
-	if monitor.LastCheck.IsZero() {
+	if monitor.lastCheck.IsZero() {
 		return true
 	}

-	sinceLastCheck := time.Now().Sub(monitor.LastCheck).Seconds()
+	sinceLastCheck := time.Now().Sub(monitor.lastCheck).Seconds()
 	return sinceLastCheck >= monitor.CheckInterval
 }

-func (monitor *Monitor) Check() bool {
-	// TODO: This should probably return a list of alerts since the `raise`
-	// pattern doesn't carry over from Python
+// Check will run the command configured by the Monitor and return a status
+// and a possible AlertNotice
+func (monitor *Monitor) Check() (bool, *AlertNotice) {
 	var cmd *exec.Cmd
-
 	if monitor.Command != nil {
 		cmd = exec.Command(monitor.Command[0], monitor.Command[1:]...)
 	} else {
-		// TODO: Handle a command shell as well. This is untested
-
-		//cmd = exec.Command("sh", "-c", "echo \"This is a test of the command system\"")
 		cmd = ShellCommand(monitor.CommandShell)
 	}

 	output, err := cmd.CombinedOutput()
-	log.Printf("Check %s\n---\n%s\n---", monitor.Name, string(output))
+	//log.Printf("Check %s\n---\n%s\n---", monitor.Name, string(output))

-	is_success := (err == nil)
+	isSuccess := (err == nil)
 	if err != nil {
-		log.Printf("error: %v", err)
+		log.Printf("ERROR: %v", err)
 	}

-	monitor.LastCheck = time.Now()
-	monitor.LastOutput = string(output)
+	monitor.lastCheck = time.Now()
+	monitor.lastOutput = string(output)

-	if is_success {
-		monitor.success()
+	var alertNotice *AlertNotice
+	if isSuccess {
+		alertNotice = monitor.success()
 	} else {
-		monitor.failure()
+		alertNotice = monitor.failure()
 	}

-	return is_success
+	log.Printf(
+		"Check result for %s: %v, %v at %v",
+		monitor.Name,
+		isSuccess,
+		alertNotice,
+		monitor.lastCheck,
+	)
+
+	return isSuccess, alertNotice
 }

-func (monitor Monitor) success() {
+func (monitor Monitor) isUp() bool {
+	return monitor.alertCount == 0
+}
+
+func (monitor *Monitor) success() (notice *AlertNotice) {
 	log.Printf("Great success!")
+	if !monitor.isUp() {
+		// Alert that we have recovered
+		notice = monitor.createAlertNotice(true)
+	}
+	monitor.failureCount = 0
+	monitor.alertCount = 0
+	monitor.lastSuccess = time.Now()
+
+	return
 }

-func (monitor *Monitor) failure() {
+func (monitor *Monitor) failure() (notice *AlertNotice) {
 	log.Printf("Devastating failure. :(")
+	monitor.failureCount++
+	// If we haven't hit the minimum failures, we can exit
+	if monitor.failureCount < monitor.AlertAfter {
+		// TODO: Turn into a debug
+		log.Printf(
+			"Have not hit minimum failures. failures: %v alert after: %v",
+			monitor.failureCount,
+			monitor.AlertAfter,
+		)
+		return
+	}
+
+	failureCount := (monitor.failureCount - monitor.AlertAfter)
+
+	if monitor.AlertEvery > 0 {
+		// Handle integer number of failures before alerting
+		if failureCount%monitor.AlertEvery == 0 {
+			notice = monitor.createAlertNotice(false)
+		}
+	} else if monitor.AlertEvery == 0 {
+		// Handle alerting on first failure only
+		if failureCount == 1 {
+			notice = monitor.createAlertNotice(false)
+		}
+	} else {
+		// Handle negative numbers indicating an exponential backoff
+		if failureCount >= int16(math.Pow(2, float64(monitor.alertCount))-1) {
+			notice = monitor.createAlertNotice(false)
+		}
+	}
+
+	if notice != nil {
+		monitor.alertCount++
+	}
+
+	return
+}
+
+func (monitor Monitor) createAlertNotice(isUp bool) *AlertNotice {
+	// TODO: Maybe add something about recovery status here
+	return &AlertNotice{
+		MonitorName:     monitor.Name,
+		AlertCount:      monitor.alertCount,
+		FailureCount:    monitor.failureCount,
+		LastCheckOutput: monitor.lastOutput,
+		LastSuccess:     monitor.lastSuccess,
+		IsUp:            isUp,
+	}
 }
--- a/util.go
+++ b/util.go
@ -1,7 +1,6 @@
 package main

 import (
-	"log"
 	"os/exec"
 	"strings"
 )
@ -19,6 +18,6 @@ func escapeCommandShell(command string) string {
 /// ShellCommand takes a string and executes it as a command using `sh`
 func ShellCommand(command string) *exec.Cmd {
 	shellCommand := []string{"sh", "-c", escapeCommandShell(command)}
-	log.Printf("Command: %v", shellCommand)
+	//log.Printf("Shell command: %v", shellCommand)
 	return exec.Command(shellCommand[0], shellCommand[1:]...)
 }