From 202ca73e968fbd319da77faaaf9bae49e261ee03 Mon Sep 17 00:00:00 2001 From: Ian Fijolek Date: Thu, 15 Feb 2018 18:10:19 -0800 Subject: [PATCH] MVP! --- minitor/main.py | 230 +++++++++++++++++++++++++++++++++++++++++----- sample-config.yml | 41 ++++++--- 2 files changed, 234 insertions(+), 37 deletions(-) diff --git a/minitor/main.py b/minitor/main.py index cb69484..7c628dd 100644 --- a/minitor/main.py +++ b/minitor/main.py @@ -1,4 +1,8 @@ +from argparse import ArgumentParser +from datetime import datetime +from os import environ from subprocess import CalledProcessError +from subprocess import call from subprocess import check_call from time import sleep @@ -6,38 +10,218 @@ import yamlenv # TODO: validate on start -def get_config(path): +def read_yaml(path): """Loads config from a YAML file with env interpolation""" with open(path, 'r') as yaml: contents = yaml.read() return yamlenv.load(contents) -def check_monitor(monitor): - cmd = monitor.get('command', []) - if cmd: - check_call(cmd, shell=isinstance(cmd, str)) +class InvalidAlertException(Exception): + pass -def alert_for_monitor(monitor, alerts): - for alert_name in monitor.get('alerts', []): - cmd = alerts.get(alert_name, {}).get('command', []) - if cmd: - check_call(cmd, shell=isinstance(cmd, str)) +class InvalidMonitorException(Exception): + pass -def main(): - # TODO: get config file off command line - config = get_config('config.yml') - alerts = config.get('alerts', {}) - while True: - for monitor in config.get('monitors', []): - try: - check_monitor(monitor) - except CalledProcessError: - # Need some way to not alert EVERY time - alert_for_monitor(monitor, alerts) - sleep(config.get('interval', 1)) +class MinitorAlert(Exception): + pass + + +class Monitor(object): + """Primary configuration item for Minitor""" + def __init__(self, config): + """Accepts a dictionary of configuration items to override defaults""" + settings = { + 'alerts': [ 'log' ], + 'check_interval': 30, + 'alert_after': 3, + 'alert_every': 4, + } + settings.update(config) + self.validate_settings(settings) + + self.name = settings['name'] + self.command = settings['command'] + self.alerts = settings.get('alerts', []) + self.check_interval = settings.get('check_interval') + self.alert_after = settings.get('alert_after') + self.alert_every = settings.get('alert_every') + + self.last_check = None + self.failure_count = 0 + + def validate_settings(self, settings): + """Validates that settings for this Monitor are valid + + Note: Cannot yet validate the Alerts exist from within this class. + That will be done by Minitor later + """ + name = settings.get('name') + if not name: + raise InvalidMonitorException('Invalid name for monitor') + if not settings.get('command'): + raise InvalidMonitorException( + 'Invalid command for monitor {}'.format(name) + ) + + type_assertions = ( + ('check_interval', int), + ('alert_after', int), + ('alert_every', int), + ) + + for key, val_type in type_assertions: + val = settings.get(key) + if not isinstance(val, val_type): + raise InvalidMonitorException( + 'Invalid type on {} {}. Expected {} and found {}'.format( + name, key, val_type.__name__, type(val).__name__ + ) + ) + + def should_check(self): + """Determines if this Monitor should run it's check command""" + if not self.last_check: + return True + since_last_check = (datetime.now()-self.last_check).total_seconds() + return since_last_check >= self.check_interval + + def check(self): + """Returns None if skipped, False if failed, and True if successful + + Will raise an exception if should alert + """ + if not self.should_check(): + return None + result = call(self.command, shell=isinstance(self.command, str)) + self.last_check = datetime.now() + if result == 0: + self.success() + return True + else: + self.failure() + return False + + def success(self): + """Handles success tasks""" + self.failure_count = 0 + + def failure(self): + """Handles failure tasks and possibly raises MinitorAlert""" + self.failure_count += 1 + min_failures = self.failure_count >= self.alert_after + failure_interval = (self.failure_count % self.alert_every) == 0 + if min_failures and failure_interval: + raise MinitorAlert('{} check has failed {} times'.format( + self.name, self.failure_count + )) + + +class Alert(object): + def __init__(self, name, config): + """An alert must be named and have a config dict""" + self.name = name + self.command = config.get('command') + if not self.command: + raise InvalidAlertException('Invalid alert {}'.format(self.name)) + + def _formated_command(self, **kwargs): + """Formats command array or string with kwargs from Monitor""" + if isinstance(self.command, str): + return self.command.format(**kwargs) + args = [] + for arg in self.command: + args.append(arg.format(**kwargs)) + return args + + def alert(self, monitor): + """Calls the alert command for the provided monitor""" + check_call( + self._formated_command(monitor_name=monitor.name), + shell=isinstance(self.command, str), + ) + + +class Minitor(object): + monitors = None + alerts = None + state = None + check_interval = None + + def setup(self, config_path): + """Load all setup from YAML file at provided path""" + config = read_yaml(config_path) + self.check_interval = config.get('check_interval', 30) + self.monitors = [Monitor(mon) for mon in config.get('monitors', [])] + # Add default alert for logging + self.alerts = { + 'log': Alert( + 'log', + {'command': ['echo', '{monitor_name} has failed!']} + ) + } + self.alerts.update({ + alert_name: Alert(alert_name, alert) + for alert_name, alert in config.get('alerts', {}).items() + }) + + def validate_monitors(self): + """Validates monitors are valid against other config values""" + for monitor in self.monitors: + # Validate that the interval is valid + if monitor.check_interval < self.check_interval: + raise InvalidMonitorException( + 'Monitor {} check interval is lower global value {}'.format( + monitor.name, self.check_interval + ) + ) + # Validate that the the alerts for the monitor exist + for alert in monitor.alerts: + if alert not in self.alerts: + raise InvalidMonitorException( + 'Monitor {} contains an unknown alert: {}'.format( + monitor.name, alert + ) + ) + + def alert_for_monitor(self, monitor): + """Issues all alerts for a provided monitor""" + for alert in monitor.alerts: + self.alerts[alert].alert(monitor) + + def parse_args(self): + """Parses command line arguments and returns them""" + parser = ArgumentParser(description='Minimal monitoring') + parser.add_argument( + '--config', '-c', + dest='config_path', + default='config.yml', + help='Path to the config YAML file to use' + ) + return parser.parse_args() + + def run(self): + """Runs Minitor in a loop""" + args = self.parse_args() + self.setup(args.config_path) + self.validate_monitors() + + while True: + for monitor in self.monitors: + try: + result = monitor.check() + if result is not None: + print('{}: {}'.format( + monitor.name, + 'SUCCESS' if result else 'FAILURE' + )) + except MinitorAlert as minitor_alert: + print(minitor_alert) + self.alert_for_monitor(monitor) + + sleep(self.check_interval) if __name__ == '__main__': - main() + Minitor().run() diff --git a/sample-config.yml b/sample-config.yml index 7dee907..8399c6e 100644 --- a/sample-config.yml +++ b/sample-config.yml @@ -1,21 +1,34 @@ -interval: 5 +check_interval: 30 monitors: - - name: View - command: [ 'curl', 'https://localhost:5000' ] - # environment: - # PATH: ${PATH} - alerts: [ email, sms ] - # other settings from other health check apps like interval and back off + - name: My Website + command: [ 'curl', 'https://minitor.mon' ] + alerts: [ log, email, sms ] + check_interval: 30 + alert_after: 3 + alert_every: 4 alerts: email: - command: echo 'Send an email' + command: [ sendmail, "me@minitor.mon", "Failure: {monitor_name}", "This thing failed!" ] + mailgun: + command: > + curl -s -X POST + -F subject="Alert! {alert_name} failed" + -F from="Minitor " + -F to=me@minitor.mon + -F text="Our monitor failed" + https://api.mailgun.net/v3/minitor.mon/messages + -u "api:${TWILIO_API_KEY}" sms: - command: echo 'Send an SMS' + command: > + curl -s -X POST -F "Body=Failure: {monitor_name} has failed" + -F "From=${AVAILABLE_NUMBER}" -F "To=${MY_PHONE}" + "https://api.twilio.com/2010-04-01/Accounts/${ACCOUNT_SID}/Messages" + -u "${ACCOUNT_SID}:${AUTH_TOKEN}" -federation: - - location: https://host1.com - client_key: keyfromhost1 - server_key: keyhost1uses - alerts: [ sms ] +# federation: +# - location: https://host1.com +# client_key: keyfromhost1 +# server_key: keyhost1uses +# alerts: [ sms ]