Commands all running
This commit is contained in:
parent
dd0b8e3f38
commit
7b746ed62a
95
README.md
95
README.md
@ -1,3 +1,96 @@
|
|||||||
# minitor-go
|
# minitor-go
|
||||||
|
|
||||||
A reimplementation of minitor in Go
|
A reimplementation of [Minitor](https://git.iamthefij/iamthefij/minitor) in Go
|
||||||
|
|
||||||
|
Minitor is already a very minimal monitoring tool. Python 3 was a quick way to get something live, but Python itself comes with a very large footprint.Thus Go feels like a better fit for the project, longer term.
|
||||||
|
|
||||||
|
Initial target is meant to be roughly compatible requiring only minor changes to configuration. Future iterations may diverge to take advantage of Go specific features.
|
||||||
|
|
||||||
|
## Differences from Python version
|
||||||
|
|
||||||
|
There are a few key differences between the Python version and the v0.x Go version.
|
||||||
|
|
||||||
|
First, configuration keys cannot have multiple types in Go, so a different key must be used when specifying a Shell command as a string rather than a list of args. Instead of `command`, you must use `command_shell`. Eg:
|
||||||
|
|
||||||
|
minitor-py:
|
||||||
|
```yaml
|
||||||
|
monitors:
|
||||||
|
- name: Exec command
|
||||||
|
command: ['echo', 'test']
|
||||||
|
- name: Shell command
|
||||||
|
command: echo 'test'
|
||||||
|
```
|
||||||
|
|
||||||
|
minitor-go:
|
||||||
|
```yaml
|
||||||
|
monitors:
|
||||||
|
- name: Exec command
|
||||||
|
command: ['echo', 'test']
|
||||||
|
- name: Shell command
|
||||||
|
command_shell: echo 'test'
|
||||||
|
```
|
||||||
|
|
||||||
|
Second, templating for Alert messages has been updated. In the Python version, `str.format(...)` was used with certain keys passed in that could be used to format messages. In the Go version, we use a struct containing Alert info and the built in Go templating format. Eg.
|
||||||
|
|
||||||
|
minitor-py:
|
||||||
|
```yaml
|
||||||
|
alerts:
|
||||||
|
log_command:
|
||||||
|
command: ['echo', '{monitor_name}']
|
||||||
|
log_shell:
|
||||||
|
command_shell: "echo {monitor_name}"
|
||||||
|
```
|
||||||
|
|
||||||
|
minitor-go:
|
||||||
|
```yaml
|
||||||
|
alerts:
|
||||||
|
log_command:
|
||||||
|
command: ['echo', '{{.MonitorName}}']
|
||||||
|
log_shell:
|
||||||
|
command_shell: "echo {{.MonitorName}}"
|
||||||
|
```
|
||||||
|
|
||||||
|
Finally, newlines in a shell command don't terminate a particular command. Semicolons must be used and continuations should not.
|
||||||
|
|
||||||
|
minitor-py:
|
||||||
|
```yaml
|
||||||
|
alerts:
|
||||||
|
log_shell:
|
||||||
|
command_shell: >
|
||||||
|
echo "line 1"
|
||||||
|
echo "line 2"
|
||||||
|
echo "continued" \
|
||||||
|
"line"
|
||||||
|
```
|
||||||
|
|
||||||
|
minitor-go:
|
||||||
|
```yaml
|
||||||
|
alerts:
|
||||||
|
log_shell:
|
||||||
|
command_shell: >
|
||||||
|
echo "line 1";
|
||||||
|
echo "line 2";
|
||||||
|
echo "continued"
|
||||||
|
"line"
|
||||||
|
```
|
||||||
|
|
||||||
|
## To do
|
||||||
|
There are two sets of task lists. The first is to get rough parity on key features with the Python version. The second is to make some improvements to the framework.
|
||||||
|
|
||||||
|
Pairity:
|
||||||
|
|
||||||
|
- [x] Run monitor commands
|
||||||
|
- [x] Run monitor commands in a shell
|
||||||
|
- [x] Run alert commands
|
||||||
|
- [x] Run alert commands in a shell
|
||||||
|
- [x] Allow templating of alert commands
|
||||||
|
- [] Implement Prometheus client to export metrics
|
||||||
|
- [] Test coverage
|
||||||
|
|
||||||
|
Improvement:
|
||||||
|
|
||||||
|
- [] Implement leveled logging (maybe glog or logrus)
|
||||||
|
- [] Consider switching from YAML to TOML
|
||||||
|
- [] Consider value of templating vs injecting values into Env variables
|
||||||
|
- [] Async checking
|
||||||
|
- [] Use durations rather than seconds checked in event loop
|
54
alert.go
54
alert.go
@ -2,7 +2,6 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"fmt"
|
|
||||||
"log"
|
"log"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"text/template"
|
"text/template"
|
||||||
@ -13,7 +12,7 @@ type Alert struct {
|
|||||||
Name string
|
Name string
|
||||||
Command []string
|
Command []string
|
||||||
CommandShell string `yaml:"command_shell"`
|
CommandShell string `yaml:"command_shell"`
|
||||||
commandTemplate []template.Template
|
commandTemplate []*template.Template
|
||||||
commandShellTemplate *template.Template
|
commandShellTemplate *template.Template
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -26,42 +25,73 @@ func (alert Alert) IsValid() bool {
|
|||||||
func (alert *Alert) BuildTemplates() {
|
func (alert *Alert) BuildTemplates() {
|
||||||
if alert.commandTemplate == nil && alert.Command != nil {
|
if alert.commandTemplate == nil && alert.Command != nil {
|
||||||
// build template
|
// build template
|
||||||
fmt.Println("Building template for command...")
|
log.Println("Building template for command...")
|
||||||
|
alert.commandTemplate = []*template.Template{}
|
||||||
|
for i, cmdPart := range alert.Command {
|
||||||
|
alert.commandTemplate = append(alert.commandTemplate, template.Must(
|
||||||
|
template.New(alert.Name+string(i)).Parse(cmdPart),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
log.Printf("Template built: %v", alert.commandTemplate)
|
||||||
} else if alert.commandShellTemplate == nil && alert.CommandShell != "" {
|
} else if alert.commandShellTemplate == nil && alert.CommandShell != "" {
|
||||||
|
log.Println("Building template for shell command...")
|
||||||
alert.commandShellTemplate = template.Must(
|
alert.commandShellTemplate = template.Must(
|
||||||
template.New(alert.Name).Parse(alert.CommandShell),
|
template.New(alert.Name).Parse(alert.CommandShell),
|
||||||
)
|
)
|
||||||
|
log.Printf("Template built: %v", alert.commandShellTemplate)
|
||||||
} else {
|
} else {
|
||||||
panic("No template?")
|
panic("No template provided?")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (alert Alert) Send(notice AlertNotice) {
|
func (alert *Alert) Send(notice AlertNotice) {
|
||||||
|
// TODO: Validate and build templates in a better place and make this immutable
|
||||||
|
if !alert.IsValid() {
|
||||||
|
log.Fatalf("Alert is invalid: %v", alert)
|
||||||
|
}
|
||||||
|
alert.BuildTemplates()
|
||||||
|
|
||||||
var cmd *exec.Cmd
|
var cmd *exec.Cmd
|
||||||
|
|
||||||
if alert.commandTemplate != nil {
|
if alert.commandTemplate != nil {
|
||||||
// build template
|
// build template
|
||||||
fmt.Println("Send command thing...")
|
log.Println("Send command thing...")
|
||||||
|
command := []string{}
|
||||||
|
for _, cmdTmp := range alert.commandTemplate {
|
||||||
|
var commandBuffer bytes.Buffer
|
||||||
|
err := cmdTmp.Execute(&commandBuffer, notice)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
command = append(command, commandBuffer.String())
|
||||||
|
}
|
||||||
|
cmd = exec.Command(command[0], command[1:]...)
|
||||||
} else if alert.commandShellTemplate != nil {
|
} else if alert.commandShellTemplate != nil {
|
||||||
var commandBuffer bytes.Buffer
|
var commandBuffer bytes.Buffer
|
||||||
err := alert.commandShellTemplate.Execute(&commandBuffer, notice)
|
err := alert.commandShellTemplate.Execute(&commandBuffer, notice)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
cmd = exec.Command(commandBuffer.String())
|
shellCommand := commandBuffer.String()
|
||||||
|
|
||||||
|
log.Printf("About to run alert command: %s", shellCommand)
|
||||||
|
cmd = ShellCommand(shellCommand)
|
||||||
|
} else {
|
||||||
|
panic("No template compiled?")
|
||||||
|
}
|
||||||
|
|
||||||
output, err := cmd.CombinedOutput()
|
output, err := cmd.CombinedOutput()
|
||||||
log.Printf("Check %s\n---\n%s\n---", alert.Name, string(output))
|
log.Printf("Check %s\n---\n%s\n---", alert.Name, string(output))
|
||||||
|
if err != nil {
|
||||||
} else {
|
panic(err)
|
||||||
panic("No template?")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
type AlertNotice struct {
|
type AlertNotice struct {
|
||||||
MonitorName string
|
MonitorName string
|
||||||
AlertCount int64
|
AlertCount int16
|
||||||
FailureCount int64
|
FailureCount int16
|
||||||
LastCheckOutput string
|
LastCheckOutput string
|
||||||
LastSuccess time.Time
|
LastSuccess time.Time
|
||||||
|
IsUp bool
|
||||||
}
|
}
|
||||||
|
@ -9,8 +9,8 @@ import (
|
|||||||
|
|
||||||
type Config struct {
|
type Config struct {
|
||||||
CheckInterval int64 `yaml:"check_interval"`
|
CheckInterval int64 `yaml:"check_interval"`
|
||||||
Monitors []Monitor
|
Monitors []*Monitor
|
||||||
Alerts map[string]Alert
|
Alerts map[string]*Alert
|
||||||
}
|
}
|
||||||
|
|
||||||
func LoadConfig(filePath string) (config Config) {
|
func LoadConfig(filePath string) (config Config) {
|
||||||
@ -24,7 +24,7 @@ func LoadConfig(filePath string) (config Config) {
|
|||||||
|
|
||||||
err = yaml.Unmarshal([]byte(env_expanded), &config)
|
err = yaml.Unmarshal([]byte(env_expanded), &config)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("error: %v", err)
|
log.Fatalf("ERROR: %v", err)
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
24
main.go
24
main.go
@ -1,6 +1,7 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"log"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -10,7 +11,28 @@ func main() {
|
|||||||
for {
|
for {
|
||||||
for _, monitor := range config.Monitors {
|
for _, monitor := range config.Monitors {
|
||||||
if monitor.ShouldCheck() {
|
if monitor.ShouldCheck() {
|
||||||
monitor.Check()
|
_, alertNotice := monitor.Check()
|
||||||
|
if alertNotice != nil {
|
||||||
|
//log.Printf("Recieved an alert notice: %v", alertNotice)
|
||||||
|
var alerts []string
|
||||||
|
if alertNotice.IsUp {
|
||||||
|
alerts = monitor.AlertUp
|
||||||
|
log.Printf("Alert up: %v", monitor.AlertUp)
|
||||||
|
} else {
|
||||||
|
alerts = monitor.AlertDown
|
||||||
|
log.Printf("Alert down: %v", monitor.AlertDown)
|
||||||
|
}
|
||||||
|
if alerts == nil {
|
||||||
|
log.Printf("WARNING: Found alert, but no alert mechanism: %v", alertNotice)
|
||||||
|
}
|
||||||
|
for _, alertName := range alerts {
|
||||||
|
if alert, ok := config.Alerts[alertName]; ok {
|
||||||
|
alert.Send(*alertNotice)
|
||||||
|
} else {
|
||||||
|
log.Printf("WARNING: Could not find alert for %s", alertName)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
121
monitor.go
121
monitor.go
@ -2,10 +2,12 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"log"
|
"log"
|
||||||
|
"math"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Monitor represents a particular periodic check of a command
|
||||||
type Monitor struct {
|
type Monitor struct {
|
||||||
// Config values
|
// Config values
|
||||||
Name string
|
Name string
|
||||||
@ -15,65 +17,138 @@ type Monitor struct {
|
|||||||
AlertUp []string `yaml:"alert_up"`
|
AlertUp []string `yaml:"alert_up"`
|
||||||
CheckInterval float64 `yaml:"check_interval"`
|
CheckInterval float64 `yaml:"check_interval"`
|
||||||
AlertAfter int16 `yaml:"alert_after"`
|
AlertAfter int16 `yaml:"alert_after"`
|
||||||
AlertEvey int16 `yaml:"alert_every"`
|
AlertEvery int16 `yaml:"alert_every"`
|
||||||
// Other values
|
// Other values
|
||||||
LastCheck time.Time
|
lastCheck time.Time
|
||||||
LastOutput string
|
lastOutput string
|
||||||
|
alertCount int16
|
||||||
|
failureCount int16
|
||||||
|
lastSuccess time.Time
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// IsValid returns a boolean indicating if the Monitor has been correctly
|
||||||
|
// configured
|
||||||
func (monitor Monitor) IsValid() bool {
|
func (monitor Monitor) IsValid() bool {
|
||||||
atLeastOneCommand := (monitor.CommandShell != "" || monitor.Command != nil)
|
atLeastOneCommand := (monitor.CommandShell != "" || monitor.Command != nil)
|
||||||
atMostOneCommand := (monitor.CommandShell == "" || monitor.Command == nil)
|
atMostOneCommand := (monitor.CommandShell == "" || monitor.Command == nil)
|
||||||
return atLeastOneCommand && atMostOneCommand
|
return atLeastOneCommand && atMostOneCommand
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ShouldCheck returns a boolean indicating if the Monitor is ready to be
|
||||||
|
// be checked again
|
||||||
func (monitor Monitor) ShouldCheck() bool {
|
func (monitor Monitor) ShouldCheck() bool {
|
||||||
if monitor.LastCheck.IsZero() {
|
if monitor.lastCheck.IsZero() {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
sinceLastCheck := time.Now().Sub(monitor.LastCheck).Seconds()
|
sinceLastCheck := time.Now().Sub(monitor.lastCheck).Seconds()
|
||||||
return sinceLastCheck >= monitor.CheckInterval
|
return sinceLastCheck >= monitor.CheckInterval
|
||||||
}
|
}
|
||||||
|
|
||||||
func (monitor *Monitor) Check() bool {
|
// Check will run the command configured by the Monitor and return a status
|
||||||
// TODO: This should probably return a list of alerts since the `raise`
|
// and a possible AlertNotice
|
||||||
// pattern doesn't carry over from Python
|
func (monitor *Monitor) Check() (bool, *AlertNotice) {
|
||||||
var cmd *exec.Cmd
|
var cmd *exec.Cmd
|
||||||
|
|
||||||
if monitor.Command != nil {
|
if monitor.Command != nil {
|
||||||
cmd = exec.Command(monitor.Command[0], monitor.Command[1:]...)
|
cmd = exec.Command(monitor.Command[0], monitor.Command[1:]...)
|
||||||
} else {
|
} else {
|
||||||
// TODO: Handle a command shell as well. This is untested
|
|
||||||
|
|
||||||
//cmd = exec.Command("sh", "-c", "echo \"This is a test of the command system\"")
|
|
||||||
cmd = ShellCommand(monitor.CommandShell)
|
cmd = ShellCommand(monitor.CommandShell)
|
||||||
}
|
}
|
||||||
|
|
||||||
output, err := cmd.CombinedOutput()
|
output, err := cmd.CombinedOutput()
|
||||||
log.Printf("Check %s\n---\n%s\n---", monitor.Name, string(output))
|
//log.Printf("Check %s\n---\n%s\n---", monitor.Name, string(output))
|
||||||
|
|
||||||
is_success := (err == nil)
|
isSuccess := (err == nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Printf("error: %v", err)
|
log.Printf("ERROR: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
monitor.LastCheck = time.Now()
|
monitor.lastCheck = time.Now()
|
||||||
monitor.LastOutput = string(output)
|
monitor.lastOutput = string(output)
|
||||||
|
|
||||||
if is_success {
|
var alertNotice *AlertNotice
|
||||||
monitor.success()
|
if isSuccess {
|
||||||
|
alertNotice = monitor.success()
|
||||||
} else {
|
} else {
|
||||||
monitor.failure()
|
alertNotice = monitor.failure()
|
||||||
}
|
}
|
||||||
|
|
||||||
return is_success
|
log.Printf(
|
||||||
|
"Check result for %s: %v, %v at %v",
|
||||||
|
monitor.Name,
|
||||||
|
isSuccess,
|
||||||
|
alertNotice,
|
||||||
|
monitor.lastCheck,
|
||||||
|
)
|
||||||
|
|
||||||
|
return isSuccess, alertNotice
|
||||||
}
|
}
|
||||||
|
|
||||||
func (monitor Monitor) success() {
|
func (monitor Monitor) isUp() bool {
|
||||||
|
return monitor.alertCount == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (monitor *Monitor) success() (notice *AlertNotice) {
|
||||||
log.Printf("Great success!")
|
log.Printf("Great success!")
|
||||||
|
if !monitor.isUp() {
|
||||||
|
// Alert that we have recovered
|
||||||
|
notice = monitor.createAlertNotice(true)
|
||||||
|
}
|
||||||
|
monitor.failureCount = 0
|
||||||
|
monitor.alertCount = 0
|
||||||
|
monitor.lastSuccess = time.Now()
|
||||||
|
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func (monitor *Monitor) failure() {
|
func (monitor *Monitor) failure() (notice *AlertNotice) {
|
||||||
log.Printf("Devastating failure. :(")
|
log.Printf("Devastating failure. :(")
|
||||||
|
monitor.failureCount++
|
||||||
|
// If we haven't hit the minimum failures, we can exit
|
||||||
|
if monitor.failureCount < monitor.AlertAfter {
|
||||||
|
// TODO: Turn into a debug
|
||||||
|
log.Printf(
|
||||||
|
"Have not hit minimum failures. failures: %v alert after: %v",
|
||||||
|
monitor.failureCount,
|
||||||
|
monitor.AlertAfter,
|
||||||
|
)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
failureCount := (monitor.failureCount - monitor.AlertAfter)
|
||||||
|
|
||||||
|
if monitor.AlertEvery > 0 {
|
||||||
|
// Handle integer number of failures before alerting
|
||||||
|
if failureCount%monitor.AlertEvery == 0 {
|
||||||
|
notice = monitor.createAlertNotice(false)
|
||||||
|
}
|
||||||
|
} else if monitor.AlertEvery == 0 {
|
||||||
|
// Handle alerting on first failure only
|
||||||
|
if failureCount == 1 {
|
||||||
|
notice = monitor.createAlertNotice(false)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Handle negative numbers indicating an exponential backoff
|
||||||
|
if failureCount >= int16(math.Pow(2, float64(monitor.alertCount))-1) {
|
||||||
|
notice = monitor.createAlertNotice(false)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if notice != nil {
|
||||||
|
monitor.alertCount++
|
||||||
|
}
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func (monitor Monitor) createAlertNotice(isUp bool) *AlertNotice {
|
||||||
|
// TODO: Maybe add something about recovery status here
|
||||||
|
return &AlertNotice{
|
||||||
|
MonitorName: monitor.Name,
|
||||||
|
AlertCount: monitor.alertCount,
|
||||||
|
FailureCount: monitor.failureCount,
|
||||||
|
LastCheckOutput: monitor.lastOutput,
|
||||||
|
LastSuccess: monitor.lastSuccess,
|
||||||
|
IsUp: isUp,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
3
util.go
3
util.go
@ -1,7 +1,6 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"log"
|
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
@ -19,6 +18,6 @@ func escapeCommandShell(command string) string {
|
|||||||
/// ShellCommand takes a string and executes it as a command using `sh`
|
/// ShellCommand takes a string and executes it as a command using `sh`
|
||||||
func ShellCommand(command string) *exec.Cmd {
|
func ShellCommand(command string) *exec.Cmd {
|
||||||
shellCommand := []string{"sh", "-c", escapeCommandShell(command)}
|
shellCommand := []string{"sh", "-c", escapeCommandShell(command)}
|
||||||
log.Printf("Command: %v", shellCommand)
|
//log.Printf("Shell command: %v", shellCommand)
|
||||||
return exec.Command(shellCommand[0], shellCommand[1:]...)
|
return exec.Command(shellCommand[0], shellCommand[1:]...)
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user