Add alerting for recovered monitors

Based on the idea from SeaLife, adds alerts for when a monitor comes out
of an alerting down state. Also includes a bunch of unit tests to cover
the new code.
This commit is contained in:
IamTheFij 2018-04-14 17:27:37 -07:00
parent 4fe8020a77
commit aad28976e2
3 changed files with 92 additions and 16 deletions

View File

@ -98,7 +98,9 @@ class InvalidMonitorException(Exception):
class MinitorAlert(Exception): class MinitorAlert(Exception):
pass def __init__(self, message, monitor):
super().__init__(message)
self.monitor = monitor
class Monitor(object): class Monitor(object):
@ -117,7 +119,10 @@ class Monitor(object):
self.name = settings['name'] self.name = settings['name']
self.command = settings['command'] self.command = settings['command']
self.alerts = settings.get('alerts', []) self.alert_down = settings.get('alert_down', [])
if not self.alert_down:
self.alert_down = settings.get('alerts', [])
self.alert_up = settings.get('alert_up', [])
self.check_interval = settings.get('check_interval') self.check_interval = settings.get('check_interval')
self.alert_after = settings.get('alert_after') self.alert_after = settings.get('alert_after')
self.alert_every = settings.get('alert_every') self.alert_every = settings.get('alert_every')
@ -162,9 +167,17 @@ class Monitor(object):
def success(self): def success(self):
"""Handles success tasks""" """Handles success tasks"""
back_up = None
if not self.is_up():
back_up = MinitorAlert(
'{} check is up again!'.format(self.name),
self,
)
self.total_failure_count = 0 self.total_failure_count = 0
self.alert_count = 0 self.alert_count = 0
self.last_success = datetime.now() self.last_success = datetime.now()
if back_up:
raise back_up
def failure(self): def failure(self):
"""Handles failure tasks and possibly raises MinitorAlert""" """Handles failure tasks and possibly raises MinitorAlert"""
@ -182,9 +195,16 @@ class Monitor(object):
if should_alert: if should_alert:
self.alert_count += 1 self.alert_count += 1
raise MinitorAlert('{} check has failed {} times'.format( raise MinitorAlert(
'{} check has failed {} times'.format(
self.name, self.total_failure_count self.name, self.total_failure_count
)) ),
self
)
def is_up(self):
"""Indicates if the monitor is already alerting failures"""
return self.alert_count == 0
class Alert(object): class Alert(object):
@ -214,11 +234,12 @@ class Alert(object):
return 'Never' return 'Never'
return dt.isoformat() return dt.isoformat()
def alert(self, monitor): def alert(self, message, monitor):
"""Calls the alert command for the provided monitor""" """Calls the alert command for the provided monitor"""
output, ex = call_output( output, ex = call_output(
self._formated_command( self._formated_command(
alert_count=monitor.alert_count, alert_count=monitor.alert_count,
alert_message=message,
monitor_name=monitor.name, monitor_name=monitor.name,
failure_count=monitor.total_failure_count, failure_count=monitor.total_failure_count,
last_success=self._format_datetime(monitor.last_success), last_success=self._format_datetime(monitor.last_success),
@ -248,7 +269,7 @@ class Minitor(object):
self.alerts = { self.alerts = {
'log': Alert( 'log': Alert(
'log', 'log',
{'command': ['echo', '{monitor_name} has failed!']} {'command': ['echo', '{alert_message}!']}
) )
} }
self.alerts.update({ self.alerts.update({
@ -275,10 +296,12 @@ class Minitor(object):
) )
) )
def alert_for_monitor(self, monitor): def handle_minitor_alert(self, minitor_alert):
"""Issues all alerts for a provided monitor""" """Issues all alerts for a provided monitor"""
for alert in monitor.alerts: monitor = minitor_alert.monitor
self.alerts[alert].alert(monitor) alerts = monitor.alert_up if monitor.is_up() else monitor.alert_down
for alert in alerts:
self.alerts[alert].alert(str(minitor_alert), monitor)
def parse_args(self): def parse_args(self):
"""Parses command line arguments and returns them""" """Parses command line arguments and returns them"""
@ -309,7 +332,7 @@ class Minitor(object):
) )
except MinitorAlert as minitor_alert: except MinitorAlert as minitor_alert:
self.logger.warn(minitor_alert) self.logger.warn(minitor_alert)
self.alert_for_monitor(monitor) self.handle_minitor_alert(minitor_alert)
sleep(self.check_interval) sleep(self.check_interval)

View File

@ -32,20 +32,26 @@ class TestAlert(object):
) )
@pytest.mark.parametrize( @pytest.mark.parametrize(
'last_success', 'last_success,expected_success',
[ [
(None, 'Never'), (None, 'Never'),
(datetime(2018, 4, 10), '2018-04-10T00:00:00') (datetime(2018, 4, 10), '2018-04-10T00:00:00')
] ]
) )
def test_simple_alert(self, monitor, echo_alert, last_success): def test_simple_alert(
self,
monitor,
echo_alert,
last_success,
expected_success
):
monitor.total_failure_count = 1 monitor.total_failure_count = 1
monitor.alert_count = 1 monitor.alert_count = 1
monitor.last_success = last_success[0] monitor.last_success = last_success
with patch.object(echo_alert.logger, 'error') as mock_error: with patch.object(echo_alert.logger, 'error') as mock_error:
echo_alert.alert(monitor) echo_alert.alert('Exception message', monitor)
mock_error.assert_called_once_with( mock_error.assert_called_once_with(
'Dummy Monitor has failed 1 time(s)!\n' 'Dummy Monitor has failed 1 time(s)!\n'
'We have alerted 1 time(s)\n' 'We have alerted 1 time(s)\n'
'Last success was ' + last_success[1] 'Last success was ' + expected_success
) )

View File

@ -1,3 +1,6 @@
from datetime import datetime
from unittest.mock import patch
import pytest import pytest
from minitor.main import InvalidMonitorException from minitor.main import InvalidMonitorException
@ -13,6 +16,11 @@ class TestMonitor(object):
return Monitor({ return Monitor({
'name': 'Sample Monitor', 'name': 'Sample Monitor',
'command': ['echo', 'foo'], 'command': ['echo', 'foo'],
'alert_down': ['log'],
'alert_up': ['log'],
'check_interval': 1,
'alert_after': 1,
'alert_every': 1,
}) })
@pytest.mark.parametrize('settings', [ @pytest.mark.parametrize('settings', [
@ -92,3 +100,42 @@ class TestMonitor(object):
monitor.failure() monitor.failure()
else: else:
monitor.failure() monitor.failure()
@pytest.mark.parametrize('last_check', [None, datetime(2018, 4, 10)])
def test_monitor_should_check(self, monitor, last_check):
monitor.last_check = last_check
assert monitor.should_check()
def test_monitor_check_fail(self, monitor):
with patch.object(monitor, 'failure') as mock_failure:
monitor.command = ['ls', '--not-real']
assert not monitor.check()
mock_failure.assert_called_once()
def test_monitor_check_success(self, monitor):
with patch.object(monitor, 'success') as mock_success:
assert monitor.check()
mock_success.assert_called_once()
@pytest.mark.parametrize('failure_count', [0, 1])
def test_monitor_success(self, monitor, failure_count):
monitor.alert_count = 0
monitor.total_failure_count = failure_count
assert monitor.last_success is None
monitor.success()
assert monitor.alert_count == 0
assert monitor.last_success is not None
assert monitor.total_failure_count == 0
def test_monitor_success_back_up(self, monitor):
monitor.total_failure_count = 1
monitor.alert_count = 1
with pytest.raises(MinitorAlert):
monitor.success()
assert monitor.alert_count == 0
assert monitor.last_success is not None
assert monitor.total_failure_count == 0