import logging import subprocess import sys from argparse import ArgumentParser from datetime import datetime from itertools import chain from subprocess import CalledProcessError from subprocess import check_output from time import sleep import yamlenv logging.basicConfig( level=logging.INFO, format='%(asctime)s %(levelname)s %(name)s %(message)s' ) logging.getLogger(__name__).addHandler(logging.NullHandler()) def read_yaml(path): """Loads config from a YAML file with env interpolation""" with open(path, 'r') as yaml: contents = yaml.read() return yamlenv.load(contents) def validate_monitor_settings(settings): """Validates that settings for a Monitor are valid Note: Cannot yet validate the Alerts exist from within this class. That will be done by Minitor later """ name = settings.get('name') if not name: raise InvalidMonitorException('Invalid name for monitor') if not settings.get('command'): raise InvalidMonitorException( 'Invalid command for monitor {}'.format(name) ) type_assertions = ( ('check_interval', int), ('alert_after', int), ('alert_every', int), ) for key, val_type in type_assertions: val = settings.get(key) if not isinstance(val, val_type): raise InvalidMonitorException( 'Invalid type on {}: {}. Expected {} and found {}'.format( name, key, val_type.__name__, type(val).__name__ ) ) non_zero = ( 'check_interval', 'alert_after', 'alert_every', ) for key in non_zero: if settings.get(key) == 0: raise InvalidMonitorException( 'Invalid value for {}: {}. Value cannot be 0'.format( name, key ) ) def maybe_decode(bstr, encoding='utf-8'): try: return bstr.decode(encoding) except TypeError: return bstr def call_output(*popenargs, **kwargs): """Similar to check_output, but instead returns output and exception""" # So we can capture complete output, redirect sderr to stdout kwargs.setdefault('stderr', subprocess.STDOUT) output, ex = None, None try: output = check_output(*popenargs, **kwargs) except CalledProcessError as e: output, ex = e.output, e output = output.rstrip(b'\n') return output, ex class InvalidAlertException(Exception): pass class InvalidMonitorException(Exception): pass class MinitorAlert(Exception): def __init__(self, message, monitor): super().__init__(message) self.monitor = monitor class Monitor(object): """Primary configuration item for Minitor""" def __init__(self, config): """Accepts a dictionary of configuration items to override defaults""" settings = { 'alerts': ['log'], 'check_interval': 30, 'alert_after': 4, 'alert_every': -1, } settings.update(config) validate_monitor_settings(settings) self.name = settings['name'] self.command = settings['command'] self.alert_down = settings.get('alert_down', []) if not self.alert_down: self.alert_down = settings.get('alerts', []) self.alert_up = settings.get('alert_up', []) self.check_interval = settings.get('check_interval') self.alert_after = settings.get('alert_after') self.alert_every = settings.get('alert_every') self.alert_count = 0 self.last_check = None self.last_output = None self.last_success = None self.total_failure_count = 0 self.logger = logging.getLogger( '{}({})'.format(self.__class__.__name__, self.name) ) def should_check(self): """Determines if this Monitor should run it's check command""" if not self.last_check: return True since_last_check = (datetime.now() - self.last_check).total_seconds() return since_last_check >= self.check_interval def check(self): """Returns None if skipped, False if failed, and True if successful Will raise an exception if should alert """ if not self.should_check(): return None output, ex = call_output( self.command, shell=isinstance(self.command, str), ) output = maybe_decode(output) self.logger.debug(output) self.last_check = datetime.now() self.last_output = output if ex is None: self.success() return True else: self.failure() return False def success(self): """Handles success tasks""" back_up = None if not self.is_up(): back_up = MinitorAlert( '{} check is up again!'.format(self.name), self, ) self.total_failure_count = 0 self.alert_count = 0 self.last_success = datetime.now() if back_up: raise back_up def failure(self): """Handles failure tasks and possibly raises MinitorAlert""" self.total_failure_count += 1 # Ensure we've hit the minimum number of failures to alert if self.total_failure_count < self.alert_after: return failure_count = (self.total_failure_count - self.alert_after) if self.alert_every > 0: # Otherwise, we should check against our alert_every should_alert = (failure_count % self.alert_every) == 0 else: should_alert = (failure_count >= (2 ** self.alert_count) - 1) if should_alert: self.alert_count += 1 raise MinitorAlert( '{} check has failed {} times'.format( self.name, self.total_failure_count ), self ) def is_up(self): """Indicates if the monitor is already alerting failures""" return self.alert_count == 0 class Alert(object): def __init__(self, name, config): """An alert must be named and have a config dict""" self.name = name self.command = config.get('command') if not self.command: raise InvalidAlertException('Invalid alert {}'.format(self.name)) self.logger = logging.getLogger( '{}({})'.format(self.__class__.__name__, self.name) ) def _formated_command(self, **kwargs): """Formats command array or string with kwargs from Monitor""" if isinstance(self.command, str): return self.command.format(**kwargs) args = [] for arg in self.command: args.append(arg.format(**kwargs)) return args def _format_datetime(self, dt): """Formats a datetime for an alert""" if dt is None: return 'Never' return dt.isoformat() def alert(self, message, monitor): """Calls the alert command for the provided monitor""" output, ex = call_output( self._formated_command( alert_count=monitor.alert_count, alert_message=message, failure_count=monitor.total_failure_count, last_output=monitor.last_output, last_success=self._format_datetime(monitor.last_success), monitor_name=monitor.name, ), shell=isinstance(self.command, str), ) self.logger.error(maybe_decode(output)) if ex is not None: raise ex class Minitor(object): monitors = None alerts = None state = None check_interval = None def __init__(self): self.logger = logging.getLogger(self.__class__.__name__) def setup(self, config_path): """Load all setup from YAML file at provided path""" config = read_yaml(config_path) self.check_interval = config.get('check_interval', 30) self.monitors = [Monitor(mon) for mon in config.get('monitors', [])] # Add default alert for logging self.alerts = { 'log': Alert( 'log', {'command': ['echo', '{alert_message}!']} ) } self.alerts.update({ alert_name: Alert(alert_name, alert) for alert_name, alert in config.get('alerts', {}).items() }) def validate_monitors(self): """Validates monitors are valid against other config values""" for monitor in self.monitors: # Validate that the interval is valid if monitor.check_interval < self.check_interval: raise InvalidMonitorException( 'Monitor {} check interval is lower global value {}'.format( monitor.name, self.check_interval ) ) # Validate that the the alerts for the monitor exist for alert in chain(monitor.alert_down, monitor.alert_up): if alert not in self.alerts: raise InvalidMonitorException( 'Monitor {} contains an unknown alert: {}'.format( monitor.name, alert ) ) def handle_minitor_alert(self, minitor_alert): """Issues all alerts for a provided monitor""" monitor = minitor_alert.monitor alerts = monitor.alert_up if monitor.is_up() else monitor.alert_down for alert in alerts: self.alerts[alert].alert(str(minitor_alert), monitor) def parse_args(self): """Parses command line arguments and returns them""" parser = ArgumentParser(description='Minimal monitoring') parser.add_argument( '--config', '-c', dest='config_path', default='config.yml', help='Path to the config YAML file to use', ) return parser.parse_args() def run(self): """Runs Minitor in a loop""" args = self.parse_args() self.setup(args.config_path) self.validate_monitors() while True: for monitor in self.monitors: try: result = monitor.check() if result is not None: self.logger.info( '%s: %s', monitor.name, 'SUCCESS' if result else 'FAILURE' ) except MinitorAlert as minitor_alert: self.logger.warn(minitor_alert) self.handle_minitor_alert(minitor_alert) sleep(self.check_interval) def main(): try: Minitor().run() except KeyboardInterrupt: pass return 0 if __name__ == '__main__': sys.exit(main())