Commands all running

This commit is contained in:
IamTheFij 2019-10-02 09:37:29 -07:00
parent dd0b8e3f38
commit 7b746ed62a
6 changed files with 262 additions and 43 deletions

View File

@ -1,3 +1,96 @@
# minitor-go
A reimplementation of minitor in Go
A reimplementation of [Minitor](https://git.iamthefij/iamthefij/minitor) in Go
Minitor is already a very minimal monitoring tool. Python 3 was a quick way to get something live, but Python itself comes with a very large footprint.Thus Go feels like a better fit for the project, longer term.
Initial target is meant to be roughly compatible requiring only minor changes to configuration. Future iterations may diverge to take advantage of Go specific features.
## Differences from Python version
There are a few key differences between the Python version and the v0.x Go version.
First, configuration keys cannot have multiple types in Go, so a different key must be used when specifying a Shell command as a string rather than a list of args. Instead of `command`, you must use `command_shell`. Eg:
minitor-py:
```yaml
monitors:
- name: Exec command
command: ['echo', 'test']
- name: Shell command
command: echo 'test'
```
minitor-go:
```yaml
monitors:
- name: Exec command
command: ['echo', 'test']
- name: Shell command
command_shell: echo 'test'
```
Second, templating for Alert messages has been updated. In the Python version, `str.format(...)` was used with certain keys passed in that could be used to format messages. In the Go version, we use a struct containing Alert info and the built in Go templating format. Eg.
minitor-py:
```yaml
alerts:
log_command:
command: ['echo', '{monitor_name}']
log_shell:
command_shell: "echo {monitor_name}"
```
minitor-go:
```yaml
alerts:
log_command:
command: ['echo', '{{.MonitorName}}']
log_shell:
command_shell: "echo {{.MonitorName}}"
```
Finally, newlines in a shell command don't terminate a particular command. Semicolons must be used and continuations should not.
minitor-py:
```yaml
alerts:
log_shell:
command_shell: >
echo "line 1"
echo "line 2"
echo "continued" \
"line"
```
minitor-go:
```yaml
alerts:
log_shell:
command_shell: >
echo "line 1";
echo "line 2";
echo "continued"
"line"
```
## To do
There are two sets of task lists. The first is to get rough parity on key features with the Python version. The second is to make some improvements to the framework.
Pairity:
- [x] Run monitor commands
- [x] Run monitor commands in a shell
- [x] Run alert commands
- [x] Run alert commands in a shell
- [x] Allow templating of alert commands
- [] Implement Prometheus client to export metrics
- [] Test coverage
Improvement:
- [] Implement leveled logging (maybe glog or logrus)
- [] Consider switching from YAML to TOML
- [] Consider value of templating vs injecting values into Env variables
- [] Async checking
- [] Use durations rather than seconds checked in event loop

View File

@ -2,7 +2,6 @@ package main
import (
"bytes"
"fmt"
"log"
"os/exec"
"text/template"
@ -13,7 +12,7 @@ type Alert struct {
Name string
Command []string
CommandShell string `yaml:"command_shell"`
commandTemplate []template.Template
commandTemplate []*template.Template
commandShellTemplate *template.Template
}
@ -26,42 +25,73 @@ func (alert Alert) IsValid() bool {
func (alert *Alert) BuildTemplates() {
if alert.commandTemplate == nil && alert.Command != nil {
// build template
fmt.Println("Building template for command...")
log.Println("Building template for command...")
alert.commandTemplate = []*template.Template{}
for i, cmdPart := range alert.Command {
alert.commandTemplate = append(alert.commandTemplate, template.Must(
template.New(alert.Name+string(i)).Parse(cmdPart),
))
}
log.Printf("Template built: %v", alert.commandTemplate)
} else if alert.commandShellTemplate == nil && alert.CommandShell != "" {
log.Println("Building template for shell command...")
alert.commandShellTemplate = template.Must(
template.New(alert.Name).Parse(alert.CommandShell),
)
log.Printf("Template built: %v", alert.commandShellTemplate)
} else {
panic("No template?")
panic("No template provided?")
}
}
func (alert Alert) Send(notice AlertNotice) {
func (alert *Alert) Send(notice AlertNotice) {
// TODO: Validate and build templates in a better place and make this immutable
if !alert.IsValid() {
log.Fatalf("Alert is invalid: %v", alert)
}
alert.BuildTemplates()
var cmd *exec.Cmd
if alert.commandTemplate != nil {
// build template
fmt.Println("Send command thing...")
log.Println("Send command thing...")
command := []string{}
for _, cmdTmp := range alert.commandTemplate {
var commandBuffer bytes.Buffer
err := cmdTmp.Execute(&commandBuffer, notice)
if err != nil {
panic(err)
}
command = append(command, commandBuffer.String())
}
cmd = exec.Command(command[0], command[1:]...)
} else if alert.commandShellTemplate != nil {
var commandBuffer bytes.Buffer
err := alert.commandShellTemplate.Execute(&commandBuffer, notice)
if err != nil {
panic(err)
}
cmd = exec.Command(commandBuffer.String())
output, err := cmd.CombinedOutput()
log.Printf("Check %s\n---\n%s\n---", alert.Name, string(output))
shellCommand := commandBuffer.String()
log.Printf("About to run alert command: %s", shellCommand)
cmd = ShellCommand(shellCommand)
} else {
panic("No template?")
panic("No template compiled?")
}
output, err := cmd.CombinedOutput()
log.Printf("Check %s\n---\n%s\n---", alert.Name, string(output))
if err != nil {
panic(err)
}
}
type AlertNotice struct {
MonitorName string
AlertCount int64
FailureCount int64
AlertCount int16
FailureCount int16
LastCheckOutput string
LastSuccess time.Time
IsUp bool
}

View File

@ -9,8 +9,8 @@ import (
type Config struct {
CheckInterval int64 `yaml:"check_interval"`
Monitors []Monitor
Alerts map[string]Alert
Monitors []*Monitor
Alerts map[string]*Alert
}
func LoadConfig(filePath string) (config Config) {
@ -24,7 +24,7 @@ func LoadConfig(filePath string) (config Config) {
err = yaml.Unmarshal([]byte(env_expanded), &config)
if err != nil {
log.Fatalf("error: %v", err)
log.Fatalf("ERROR: %v", err)
panic(err)
}

24
main.go
View File

@ -1,6 +1,7 @@
package main
import (
"log"
"time"
)
@ -10,7 +11,28 @@ func main() {
for {
for _, monitor := range config.Monitors {
if monitor.ShouldCheck() {
monitor.Check()
_, alertNotice := monitor.Check()
if alertNotice != nil {
//log.Printf("Recieved an alert notice: %v", alertNotice)
var alerts []string
if alertNotice.IsUp {
alerts = monitor.AlertUp
log.Printf("Alert up: %v", monitor.AlertUp)
} else {
alerts = monitor.AlertDown
log.Printf("Alert down: %v", monitor.AlertDown)
}
if alerts == nil {
log.Printf("WARNING: Found alert, but no alert mechanism: %v", alertNotice)
}
for _, alertName := range alerts {
if alert, ok := config.Alerts[alertName]; ok {
alert.Send(*alertNotice)
} else {
log.Printf("WARNING: Could not find alert for %s", alertName)
}
}
}
}
}

View File

@ -2,10 +2,12 @@ package main
import (
"log"
"math"
"os/exec"
"time"
)
// Monitor represents a particular periodic check of a command
type Monitor struct {
// Config values
Name string
@ -15,65 +17,138 @@ type Monitor struct {
AlertUp []string `yaml:"alert_up"`
CheckInterval float64 `yaml:"check_interval"`
AlertAfter int16 `yaml:"alert_after"`
AlertEvey int16 `yaml:"alert_every"`
AlertEvery int16 `yaml:"alert_every"`
// Other values
LastCheck time.Time
LastOutput string
lastCheck time.Time
lastOutput string
alertCount int16
failureCount int16
lastSuccess time.Time
}
// IsValid returns a boolean indicating if the Monitor has been correctly
// configured
func (monitor Monitor) IsValid() bool {
atLeastOneCommand := (monitor.CommandShell != "" || monitor.Command != nil)
atMostOneCommand := (monitor.CommandShell == "" || monitor.Command == nil)
return atLeastOneCommand && atMostOneCommand
}
// ShouldCheck returns a boolean indicating if the Monitor is ready to be
// be checked again
func (monitor Monitor) ShouldCheck() bool {
if monitor.LastCheck.IsZero() {
if monitor.lastCheck.IsZero() {
return true
}
sinceLastCheck := time.Now().Sub(monitor.LastCheck).Seconds()
sinceLastCheck := time.Now().Sub(monitor.lastCheck).Seconds()
return sinceLastCheck >= monitor.CheckInterval
}
func (monitor *Monitor) Check() bool {
// TODO: This should probably return a list of alerts since the `raise`
// pattern doesn't carry over from Python
// Check will run the command configured by the Monitor and return a status
// and a possible AlertNotice
func (monitor *Monitor) Check() (bool, *AlertNotice) {
var cmd *exec.Cmd
if monitor.Command != nil {
cmd = exec.Command(monitor.Command[0], monitor.Command[1:]...)
} else {
// TODO: Handle a command shell as well. This is untested
//cmd = exec.Command("sh", "-c", "echo \"This is a test of the command system\"")
cmd = ShellCommand(monitor.CommandShell)
}
output, err := cmd.CombinedOutput()
log.Printf("Check %s\n---\n%s\n---", monitor.Name, string(output))
//log.Printf("Check %s\n---\n%s\n---", monitor.Name, string(output))
is_success := (err == nil)
isSuccess := (err == nil)
if err != nil {
log.Printf("error: %v", err)
log.Printf("ERROR: %v", err)
}
monitor.LastCheck = time.Now()
monitor.LastOutput = string(output)
monitor.lastCheck = time.Now()
monitor.lastOutput = string(output)
if is_success {
monitor.success()
var alertNotice *AlertNotice
if isSuccess {
alertNotice = monitor.success()
} else {
monitor.failure()
alertNotice = monitor.failure()
}
return is_success
log.Printf(
"Check result for %s: %v, %v at %v",
monitor.Name,
isSuccess,
alertNotice,
monitor.lastCheck,
)
return isSuccess, alertNotice
}
func (monitor Monitor) success() {
func (monitor Monitor) isUp() bool {
return monitor.alertCount == 0
}
func (monitor *Monitor) success() (notice *AlertNotice) {
log.Printf("Great success!")
if !monitor.isUp() {
// Alert that we have recovered
notice = monitor.createAlertNotice(true)
}
monitor.failureCount = 0
monitor.alertCount = 0
monitor.lastSuccess = time.Now()
return
}
func (monitor *Monitor) failure() {
func (monitor *Monitor) failure() (notice *AlertNotice) {
log.Printf("Devastating failure. :(")
monitor.failureCount++
// If we haven't hit the minimum failures, we can exit
if monitor.failureCount < monitor.AlertAfter {
// TODO: Turn into a debug
log.Printf(
"Have not hit minimum failures. failures: %v alert after: %v",
monitor.failureCount,
monitor.AlertAfter,
)
return
}
failureCount := (monitor.failureCount - monitor.AlertAfter)
if monitor.AlertEvery > 0 {
// Handle integer number of failures before alerting
if failureCount%monitor.AlertEvery == 0 {
notice = monitor.createAlertNotice(false)
}
} else if monitor.AlertEvery == 0 {
// Handle alerting on first failure only
if failureCount == 1 {
notice = monitor.createAlertNotice(false)
}
} else {
// Handle negative numbers indicating an exponential backoff
if failureCount >= int16(math.Pow(2, float64(monitor.alertCount))-1) {
notice = monitor.createAlertNotice(false)
}
}
if notice != nil {
monitor.alertCount++
}
return
}
func (monitor Monitor) createAlertNotice(isUp bool) *AlertNotice {
// TODO: Maybe add something about recovery status here
return &AlertNotice{
MonitorName: monitor.Name,
AlertCount: monitor.alertCount,
FailureCount: monitor.failureCount,
LastCheckOutput: monitor.lastOutput,
LastSuccess: monitor.lastSuccess,
IsUp: isUp,
}
}

View File

@ -1,7 +1,6 @@
package main
import (
"log"
"os/exec"
"strings"
)
@ -19,6 +18,6 @@ func escapeCommandShell(command string) string {
/// ShellCommand takes a string and executes it as a command using `sh`
func ShellCommand(command string) *exec.Cmd {
shellCommand := []string{"sh", "-c", escapeCommandShell(command)}
log.Printf("Command: %v", shellCommand)
//log.Printf("Shell command: %v", shellCommand)
return exec.Command(shellCommand[0], shellCommand[1:]...)
}