From aad28976e26d10324dbf41132c2c925235693ae9 Mon Sep 17 00:00:00 2001 From: Ian Fijolek Date: Sat, 14 Apr 2018 17:27:37 -0700 Subject: [PATCH] Add alerting for recovered monitors Based on the idea from SeaLife, adds alerts for when a monitor comes out of an alerting down state. Also includes a bunch of unit tests to cover the new code. --- minitor/main.py | 45 +++++++++++++++++++++++++++++++---------- tests/alert_test.py | 16 ++++++++++----- tests/monitor_test.py | 47 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 92 insertions(+), 16 deletions(-) diff --git a/minitor/main.py b/minitor/main.py index ef457c3..c0539a6 100644 --- a/minitor/main.py +++ b/minitor/main.py @@ -98,7 +98,9 @@ class InvalidMonitorException(Exception): class MinitorAlert(Exception): - pass + def __init__(self, message, monitor): + super().__init__(message) + self.monitor = monitor class Monitor(object): @@ -117,7 +119,10 @@ class Monitor(object): self.name = settings['name'] self.command = settings['command'] - self.alerts = settings.get('alerts', []) + self.alert_down = settings.get('alert_down', []) + if not self.alert_down: + self.alert_down = settings.get('alerts', []) + self.alert_up = settings.get('alert_up', []) self.check_interval = settings.get('check_interval') self.alert_after = settings.get('alert_after') self.alert_every = settings.get('alert_every') @@ -162,9 +167,17 @@ class Monitor(object): def success(self): """Handles success tasks""" + back_up = None + if not self.is_up(): + back_up = MinitorAlert( + '{} check is up again!'.format(self.name), + self, + ) self.total_failure_count = 0 self.alert_count = 0 self.last_success = datetime.now() + if back_up: + raise back_up def failure(self): """Handles failure tasks and possibly raises MinitorAlert""" @@ -182,9 +195,16 @@ class Monitor(object): if should_alert: self.alert_count += 1 - raise MinitorAlert('{} check has failed {} times'.format( - self.name, self.total_failure_count - )) + raise MinitorAlert( + '{} check has failed {} times'.format( + self.name, self.total_failure_count + ), + self + ) + + def is_up(self): + """Indicates if the monitor is already alerting failures""" + return self.alert_count == 0 class Alert(object): @@ -214,11 +234,12 @@ class Alert(object): return 'Never' return dt.isoformat() - def alert(self, monitor): + def alert(self, message, monitor): """Calls the alert command for the provided monitor""" output, ex = call_output( self._formated_command( alert_count=monitor.alert_count, + alert_message=message, monitor_name=monitor.name, failure_count=monitor.total_failure_count, last_success=self._format_datetime(monitor.last_success), @@ -248,7 +269,7 @@ class Minitor(object): self.alerts = { 'log': Alert( 'log', - {'command': ['echo', '{monitor_name} has failed!']} + {'command': ['echo', '{alert_message}!']} ) } self.alerts.update({ @@ -275,10 +296,12 @@ class Minitor(object): ) ) - def alert_for_monitor(self, monitor): + def handle_minitor_alert(self, minitor_alert): """Issues all alerts for a provided monitor""" - for alert in monitor.alerts: - self.alerts[alert].alert(monitor) + monitor = minitor_alert.monitor + alerts = monitor.alert_up if monitor.is_up() else monitor.alert_down + for alert in alerts: + self.alerts[alert].alert(str(minitor_alert), monitor) def parse_args(self): """Parses command line arguments and returns them""" @@ -309,7 +332,7 @@ class Minitor(object): ) except MinitorAlert as minitor_alert: self.logger.warn(minitor_alert) - self.alert_for_monitor(monitor) + self.handle_minitor_alert(minitor_alert) sleep(self.check_interval) diff --git a/tests/alert_test.py b/tests/alert_test.py index 294633a..d19452d 100644 --- a/tests/alert_test.py +++ b/tests/alert_test.py @@ -32,20 +32,26 @@ class TestAlert(object): ) @pytest.mark.parametrize( - 'last_success', + 'last_success,expected_success', [ (None, 'Never'), (datetime(2018, 4, 10), '2018-04-10T00:00:00') ] ) - def test_simple_alert(self, monitor, echo_alert, last_success): + def test_simple_alert( + self, + monitor, + echo_alert, + last_success, + expected_success + ): monitor.total_failure_count = 1 monitor.alert_count = 1 - monitor.last_success = last_success[0] + monitor.last_success = last_success with patch.object(echo_alert.logger, 'error') as mock_error: - echo_alert.alert(monitor) + echo_alert.alert('Exception message', monitor) mock_error.assert_called_once_with( 'Dummy Monitor has failed 1 time(s)!\n' 'We have alerted 1 time(s)\n' - 'Last success was ' + last_success[1] + 'Last success was ' + expected_success ) diff --git a/tests/monitor_test.py b/tests/monitor_test.py index 7089bc1..32959d1 100644 --- a/tests/monitor_test.py +++ b/tests/monitor_test.py @@ -1,3 +1,6 @@ +from datetime import datetime +from unittest.mock import patch + import pytest from minitor.main import InvalidMonitorException @@ -13,6 +16,11 @@ class TestMonitor(object): return Monitor({ 'name': 'Sample Monitor', 'command': ['echo', 'foo'], + 'alert_down': ['log'], + 'alert_up': ['log'], + 'check_interval': 1, + 'alert_after': 1, + 'alert_every': 1, }) @pytest.mark.parametrize('settings', [ @@ -92,3 +100,42 @@ class TestMonitor(object): monitor.failure() else: monitor.failure() + + @pytest.mark.parametrize('last_check', [None, datetime(2018, 4, 10)]) + def test_monitor_should_check(self, monitor, last_check): + monitor.last_check = last_check + assert monitor.should_check() + + def test_monitor_check_fail(self, monitor): + with patch.object(monitor, 'failure') as mock_failure: + monitor.command = ['ls', '--not-real'] + assert not monitor.check() + mock_failure.assert_called_once() + + def test_monitor_check_success(self, monitor): + with patch.object(monitor, 'success') as mock_success: + assert monitor.check() + mock_success.assert_called_once() + + @pytest.mark.parametrize('failure_count', [0, 1]) + def test_monitor_success(self, monitor, failure_count): + monitor.alert_count = 0 + monitor.total_failure_count = failure_count + assert monitor.last_success is None + + monitor.success() + + assert monitor.alert_count == 0 + assert monitor.last_success is not None + assert monitor.total_failure_count == 0 + + def test_monitor_success_back_up(self, monitor): + monitor.total_failure_count = 1 + monitor.alert_count = 1 + + with pytest.raises(MinitorAlert): + monitor.success() + + assert monitor.alert_count == 0 + assert monitor.last_success is not None + assert monitor.total_failure_count == 0