Commands all running

This commit is contained in:
IamTheFij 2019-10-02 09:37:29 -07:00
parent dd0b8e3f38
commit 7b746ed62a
6 changed files with 262 additions and 43 deletions

View File

@ -1,3 +1,96 @@
# minitor-go # minitor-go
A reimplementation of minitor in Go A reimplementation of [Minitor](https://git.iamthefij/iamthefij/minitor) in Go
Minitor is already a very minimal monitoring tool. Python 3 was a quick way to get something live, but Python itself comes with a very large footprint.Thus Go feels like a better fit for the project, longer term.
Initial target is meant to be roughly compatible requiring only minor changes to configuration. Future iterations may diverge to take advantage of Go specific features.
## Differences from Python version
There are a few key differences between the Python version and the v0.x Go version.
First, configuration keys cannot have multiple types in Go, so a different key must be used when specifying a Shell command as a string rather than a list of args. Instead of `command`, you must use `command_shell`. Eg:
minitor-py:
```yaml
monitors:
- name: Exec command
command: ['echo', 'test']
- name: Shell command
command: echo 'test'
```
minitor-go:
```yaml
monitors:
- name: Exec command
command: ['echo', 'test']
- name: Shell command
command_shell: echo 'test'
```
Second, templating for Alert messages has been updated. In the Python version, `str.format(...)` was used with certain keys passed in that could be used to format messages. In the Go version, we use a struct containing Alert info and the built in Go templating format. Eg.
minitor-py:
```yaml
alerts:
log_command:
command: ['echo', '{monitor_name}']
log_shell:
command_shell: "echo {monitor_name}"
```
minitor-go:
```yaml
alerts:
log_command:
command: ['echo', '{{.MonitorName}}']
log_shell:
command_shell: "echo {{.MonitorName}}"
```
Finally, newlines in a shell command don't terminate a particular command. Semicolons must be used and continuations should not.
minitor-py:
```yaml
alerts:
log_shell:
command_shell: >
echo "line 1"
echo "line 2"
echo "continued" \
"line"
```
minitor-go:
```yaml
alerts:
log_shell:
command_shell: >
echo "line 1";
echo "line 2";
echo "continued"
"line"
```
## To do
There are two sets of task lists. The first is to get rough parity on key features with the Python version. The second is to make some improvements to the framework.
Pairity:
- [x] Run monitor commands
- [x] Run monitor commands in a shell
- [x] Run alert commands
- [x] Run alert commands in a shell
- [x] Allow templating of alert commands
- [] Implement Prometheus client to export metrics
- [] Test coverage
Improvement:
- [] Implement leveled logging (maybe glog or logrus)
- [] Consider switching from YAML to TOML
- [] Consider value of templating vs injecting values into Env variables
- [] Async checking
- [] Use durations rather than seconds checked in event loop

View File

@ -2,7 +2,6 @@ package main
import ( import (
"bytes" "bytes"
"fmt"
"log" "log"
"os/exec" "os/exec"
"text/template" "text/template"
@ -13,7 +12,7 @@ type Alert struct {
Name string Name string
Command []string Command []string
CommandShell string `yaml:"command_shell"` CommandShell string `yaml:"command_shell"`
commandTemplate []template.Template commandTemplate []*template.Template
commandShellTemplate *template.Template commandShellTemplate *template.Template
} }
@ -26,42 +25,73 @@ func (alert Alert) IsValid() bool {
func (alert *Alert) BuildTemplates() { func (alert *Alert) BuildTemplates() {
if alert.commandTemplate == nil && alert.Command != nil { if alert.commandTemplate == nil && alert.Command != nil {
// build template // build template
fmt.Println("Building template for command...") log.Println("Building template for command...")
alert.commandTemplate = []*template.Template{}
for i, cmdPart := range alert.Command {
alert.commandTemplate = append(alert.commandTemplate, template.Must(
template.New(alert.Name+string(i)).Parse(cmdPart),
))
}
log.Printf("Template built: %v", alert.commandTemplate)
} else if alert.commandShellTemplate == nil && alert.CommandShell != "" { } else if alert.commandShellTemplate == nil && alert.CommandShell != "" {
log.Println("Building template for shell command...")
alert.commandShellTemplate = template.Must( alert.commandShellTemplate = template.Must(
template.New(alert.Name).Parse(alert.CommandShell), template.New(alert.Name).Parse(alert.CommandShell),
) )
log.Printf("Template built: %v", alert.commandShellTemplate)
} else { } else {
panic("No template?") panic("No template provided?")
} }
} }
func (alert Alert) Send(notice AlertNotice) { func (alert *Alert) Send(notice AlertNotice) {
// TODO: Validate and build templates in a better place and make this immutable
if !alert.IsValid() {
log.Fatalf("Alert is invalid: %v", alert)
}
alert.BuildTemplates()
var cmd *exec.Cmd var cmd *exec.Cmd
if alert.commandTemplate != nil { if alert.commandTemplate != nil {
// build template // build template
fmt.Println("Send command thing...") log.Println("Send command thing...")
command := []string{}
for _, cmdTmp := range alert.commandTemplate {
var commandBuffer bytes.Buffer
err := cmdTmp.Execute(&commandBuffer, notice)
if err != nil {
panic(err)
}
command = append(command, commandBuffer.String())
}
cmd = exec.Command(command[0], command[1:]...)
} else if alert.commandShellTemplate != nil { } else if alert.commandShellTemplate != nil {
var commandBuffer bytes.Buffer var commandBuffer bytes.Buffer
err := alert.commandShellTemplate.Execute(&commandBuffer, notice) err := alert.commandShellTemplate.Execute(&commandBuffer, notice)
if err != nil { if err != nil {
panic(err) panic(err)
} }
cmd = exec.Command(commandBuffer.String()) shellCommand := commandBuffer.String()
output, err := cmd.CombinedOutput()
log.Printf("Check %s\n---\n%s\n---", alert.Name, string(output))
log.Printf("About to run alert command: %s", shellCommand)
cmd = ShellCommand(shellCommand)
} else { } else {
panic("No template?") panic("No template compiled?")
}
output, err := cmd.CombinedOutput()
log.Printf("Check %s\n---\n%s\n---", alert.Name, string(output))
if err != nil {
panic(err)
} }
} }
type AlertNotice struct { type AlertNotice struct {
MonitorName string MonitorName string
AlertCount int64 AlertCount int16
FailureCount int64 FailureCount int16
LastCheckOutput string LastCheckOutput string
LastSuccess time.Time LastSuccess time.Time
IsUp bool
} }

View File

@ -9,8 +9,8 @@ import (
type Config struct { type Config struct {
CheckInterval int64 `yaml:"check_interval"` CheckInterval int64 `yaml:"check_interval"`
Monitors []Monitor Monitors []*Monitor
Alerts map[string]Alert Alerts map[string]*Alert
} }
func LoadConfig(filePath string) (config Config) { func LoadConfig(filePath string) (config Config) {
@ -24,7 +24,7 @@ func LoadConfig(filePath string) (config Config) {
err = yaml.Unmarshal([]byte(env_expanded), &config) err = yaml.Unmarshal([]byte(env_expanded), &config)
if err != nil { if err != nil {
log.Fatalf("error: %v", err) log.Fatalf("ERROR: %v", err)
panic(err) panic(err)
} }

24
main.go
View File

@ -1,6 +1,7 @@
package main package main
import ( import (
"log"
"time" "time"
) )
@ -10,7 +11,28 @@ func main() {
for { for {
for _, monitor := range config.Monitors { for _, monitor := range config.Monitors {
if monitor.ShouldCheck() { if monitor.ShouldCheck() {
monitor.Check() _, alertNotice := monitor.Check()
if alertNotice != nil {
//log.Printf("Recieved an alert notice: %v", alertNotice)
var alerts []string
if alertNotice.IsUp {
alerts = monitor.AlertUp
log.Printf("Alert up: %v", monitor.AlertUp)
} else {
alerts = monitor.AlertDown
log.Printf("Alert down: %v", monitor.AlertDown)
}
if alerts == nil {
log.Printf("WARNING: Found alert, but no alert mechanism: %v", alertNotice)
}
for _, alertName := range alerts {
if alert, ok := config.Alerts[alertName]; ok {
alert.Send(*alertNotice)
} else {
log.Printf("WARNING: Could not find alert for %s", alertName)
}
}
}
} }
} }

View File

@ -2,10 +2,12 @@ package main
import ( import (
"log" "log"
"math"
"os/exec" "os/exec"
"time" "time"
) )
// Monitor represents a particular periodic check of a command
type Monitor struct { type Monitor struct {
// Config values // Config values
Name string Name string
@ -15,65 +17,138 @@ type Monitor struct {
AlertUp []string `yaml:"alert_up"` AlertUp []string `yaml:"alert_up"`
CheckInterval float64 `yaml:"check_interval"` CheckInterval float64 `yaml:"check_interval"`
AlertAfter int16 `yaml:"alert_after"` AlertAfter int16 `yaml:"alert_after"`
AlertEvey int16 `yaml:"alert_every"` AlertEvery int16 `yaml:"alert_every"`
// Other values // Other values
LastCheck time.Time lastCheck time.Time
LastOutput string lastOutput string
alertCount int16
failureCount int16
lastSuccess time.Time
} }
// IsValid returns a boolean indicating if the Monitor has been correctly
// configured
func (monitor Monitor) IsValid() bool { func (monitor Monitor) IsValid() bool {
atLeastOneCommand := (monitor.CommandShell != "" || monitor.Command != nil) atLeastOneCommand := (monitor.CommandShell != "" || monitor.Command != nil)
atMostOneCommand := (monitor.CommandShell == "" || monitor.Command == nil) atMostOneCommand := (monitor.CommandShell == "" || monitor.Command == nil)
return atLeastOneCommand && atMostOneCommand return atLeastOneCommand && atMostOneCommand
} }
// ShouldCheck returns a boolean indicating if the Monitor is ready to be
// be checked again
func (monitor Monitor) ShouldCheck() bool { func (monitor Monitor) ShouldCheck() bool {
if monitor.LastCheck.IsZero() { if monitor.lastCheck.IsZero() {
return true return true
} }
sinceLastCheck := time.Now().Sub(monitor.LastCheck).Seconds() sinceLastCheck := time.Now().Sub(monitor.lastCheck).Seconds()
return sinceLastCheck >= monitor.CheckInterval return sinceLastCheck >= monitor.CheckInterval
} }
func (monitor *Monitor) Check() bool { // Check will run the command configured by the Monitor and return a status
// TODO: This should probably return a list of alerts since the `raise` // and a possible AlertNotice
// pattern doesn't carry over from Python func (monitor *Monitor) Check() (bool, *AlertNotice) {
var cmd *exec.Cmd var cmd *exec.Cmd
if monitor.Command != nil { if monitor.Command != nil {
cmd = exec.Command(monitor.Command[0], monitor.Command[1:]...) cmd = exec.Command(monitor.Command[0], monitor.Command[1:]...)
} else { } else {
// TODO: Handle a command shell as well. This is untested
//cmd = exec.Command("sh", "-c", "echo \"This is a test of the command system\"")
cmd = ShellCommand(monitor.CommandShell) cmd = ShellCommand(monitor.CommandShell)
} }
output, err := cmd.CombinedOutput() output, err := cmd.CombinedOutput()
log.Printf("Check %s\n---\n%s\n---", monitor.Name, string(output)) //log.Printf("Check %s\n---\n%s\n---", monitor.Name, string(output))
is_success := (err == nil) isSuccess := (err == nil)
if err != nil { if err != nil {
log.Printf("error: %v", err) log.Printf("ERROR: %v", err)
} }
monitor.LastCheck = time.Now() monitor.lastCheck = time.Now()
monitor.LastOutput = string(output) monitor.lastOutput = string(output)
if is_success { var alertNotice *AlertNotice
monitor.success() if isSuccess {
alertNotice = monitor.success()
} else { } else {
monitor.failure() alertNotice = monitor.failure()
} }
return is_success log.Printf(
"Check result for %s: %v, %v at %v",
monitor.Name,
isSuccess,
alertNotice,
monitor.lastCheck,
)
return isSuccess, alertNotice
} }
func (monitor Monitor) success() { func (monitor Monitor) isUp() bool {
return monitor.alertCount == 0
}
func (monitor *Monitor) success() (notice *AlertNotice) {
log.Printf("Great success!") log.Printf("Great success!")
if !monitor.isUp() {
// Alert that we have recovered
notice = monitor.createAlertNotice(true)
}
monitor.failureCount = 0
monitor.alertCount = 0
monitor.lastSuccess = time.Now()
return
} }
func (monitor *Monitor) failure() { func (monitor *Monitor) failure() (notice *AlertNotice) {
log.Printf("Devastating failure. :(") log.Printf("Devastating failure. :(")
monitor.failureCount++
// If we haven't hit the minimum failures, we can exit
if monitor.failureCount < monitor.AlertAfter {
// TODO: Turn into a debug
log.Printf(
"Have not hit minimum failures. failures: %v alert after: %v",
monitor.failureCount,
monitor.AlertAfter,
)
return
}
failureCount := (monitor.failureCount - monitor.AlertAfter)
if monitor.AlertEvery > 0 {
// Handle integer number of failures before alerting
if failureCount%monitor.AlertEvery == 0 {
notice = monitor.createAlertNotice(false)
}
} else if monitor.AlertEvery == 0 {
// Handle alerting on first failure only
if failureCount == 1 {
notice = monitor.createAlertNotice(false)
}
} else {
// Handle negative numbers indicating an exponential backoff
if failureCount >= int16(math.Pow(2, float64(monitor.alertCount))-1) {
notice = monitor.createAlertNotice(false)
}
}
if notice != nil {
monitor.alertCount++
}
return
}
func (monitor Monitor) createAlertNotice(isUp bool) *AlertNotice {
// TODO: Maybe add something about recovery status here
return &AlertNotice{
MonitorName: monitor.Name,
AlertCount: monitor.alertCount,
FailureCount: monitor.failureCount,
LastCheckOutput: monitor.lastOutput,
LastSuccess: monitor.lastSuccess,
IsUp: isUp,
}
} }

View File

@ -1,7 +1,6 @@
package main package main
import ( import (
"log"
"os/exec" "os/exec"
"strings" "strings"
) )
@ -19,6 +18,6 @@ func escapeCommandShell(command string) string {
/// ShellCommand takes a string and executes it as a command using `sh` /// ShellCommand takes a string and executes it as a command using `sh`
func ShellCommand(command string) *exec.Cmd { func ShellCommand(command string) *exec.Cmd {
shellCommand := []string{"sh", "-c", escapeCommandShell(command)} shellCommand := []string{"sh", "-c", escapeCommandShell(command)}
log.Printf("Command: %v", shellCommand) //log.Printf("Shell command: %v", shellCommand)
return exec.Command(shellCommand[0], shellCommand[1:]...) return exec.Command(shellCommand[0], shellCommand[1:]...)
} }