Add alerting for recovered monitors

Based on the idea from SeaLife, adds alerts for when a monitor comes out
of an alerting down state. Also includes a bunch of unit tests to cover
the new code.
This commit is contained in:
IamTheFij 2018-04-14 17:27:37 -07:00
parent 4fe8020a77
commit aad28976e2
3 changed files with 92 additions and 16 deletions

View File

@ -98,7 +98,9 @@ class InvalidMonitorException(Exception):
class MinitorAlert(Exception):
pass
def __init__(self, message, monitor):
super().__init__(message)
self.monitor = monitor
class Monitor(object):
@ -117,7 +119,10 @@ class Monitor(object):
self.name = settings['name']
self.command = settings['command']
self.alerts = settings.get('alerts', [])
self.alert_down = settings.get('alert_down', [])
if not self.alert_down:
self.alert_down = settings.get('alerts', [])
self.alert_up = settings.get('alert_up', [])
self.check_interval = settings.get('check_interval')
self.alert_after = settings.get('alert_after')
self.alert_every = settings.get('alert_every')
@ -162,9 +167,17 @@ class Monitor(object):
def success(self):
"""Handles success tasks"""
back_up = None
if not self.is_up():
back_up = MinitorAlert(
'{} check is up again!'.format(self.name),
self,
)
self.total_failure_count = 0
self.alert_count = 0
self.last_success = datetime.now()
if back_up:
raise back_up
def failure(self):
"""Handles failure tasks and possibly raises MinitorAlert"""
@ -182,9 +195,16 @@ class Monitor(object):
if should_alert:
self.alert_count += 1
raise MinitorAlert('{} check has failed {} times'.format(
self.name, self.total_failure_count
))
raise MinitorAlert(
'{} check has failed {} times'.format(
self.name, self.total_failure_count
),
self
)
def is_up(self):
"""Indicates if the monitor is already alerting failures"""
return self.alert_count == 0
class Alert(object):
@ -214,11 +234,12 @@ class Alert(object):
return 'Never'
return dt.isoformat()
def alert(self, monitor):
def alert(self, message, monitor):
"""Calls the alert command for the provided monitor"""
output, ex = call_output(
self._formated_command(
alert_count=monitor.alert_count,
alert_message=message,
monitor_name=monitor.name,
failure_count=monitor.total_failure_count,
last_success=self._format_datetime(monitor.last_success),
@ -248,7 +269,7 @@ class Minitor(object):
self.alerts = {
'log': Alert(
'log',
{'command': ['echo', '{monitor_name} has failed!']}
{'command': ['echo', '{alert_message}!']}
)
}
self.alerts.update({
@ -275,10 +296,12 @@ class Minitor(object):
)
)
def alert_for_monitor(self, monitor):
def handle_minitor_alert(self, minitor_alert):
"""Issues all alerts for a provided monitor"""
for alert in monitor.alerts:
self.alerts[alert].alert(monitor)
monitor = minitor_alert.monitor
alerts = monitor.alert_up if monitor.is_up() else monitor.alert_down
for alert in alerts:
self.alerts[alert].alert(str(minitor_alert), monitor)
def parse_args(self):
"""Parses command line arguments and returns them"""
@ -309,7 +332,7 @@ class Minitor(object):
)
except MinitorAlert as minitor_alert:
self.logger.warn(minitor_alert)
self.alert_for_monitor(monitor)
self.handle_minitor_alert(minitor_alert)
sleep(self.check_interval)

View File

@ -32,20 +32,26 @@ class TestAlert(object):
)
@pytest.mark.parametrize(
'last_success',
'last_success,expected_success',
[
(None, 'Never'),
(datetime(2018, 4, 10), '2018-04-10T00:00:00')
]
)
def test_simple_alert(self, monitor, echo_alert, last_success):
def test_simple_alert(
self,
monitor,
echo_alert,
last_success,
expected_success
):
monitor.total_failure_count = 1
monitor.alert_count = 1
monitor.last_success = last_success[0]
monitor.last_success = last_success
with patch.object(echo_alert.logger, 'error') as mock_error:
echo_alert.alert(monitor)
echo_alert.alert('Exception message', monitor)
mock_error.assert_called_once_with(
'Dummy Monitor has failed 1 time(s)!\n'
'We have alerted 1 time(s)\n'
'Last success was ' + last_success[1]
'Last success was ' + expected_success
)

View File

@ -1,3 +1,6 @@
from datetime import datetime
from unittest.mock import patch
import pytest
from minitor.main import InvalidMonitorException
@ -13,6 +16,11 @@ class TestMonitor(object):
return Monitor({
'name': 'Sample Monitor',
'command': ['echo', 'foo'],
'alert_down': ['log'],
'alert_up': ['log'],
'check_interval': 1,
'alert_after': 1,
'alert_every': 1,
})
@pytest.mark.parametrize('settings', [
@ -92,3 +100,42 @@ class TestMonitor(object):
monitor.failure()
else:
monitor.failure()
@pytest.mark.parametrize('last_check', [None, datetime(2018, 4, 10)])
def test_monitor_should_check(self, monitor, last_check):
monitor.last_check = last_check
assert monitor.should_check()
def test_monitor_check_fail(self, monitor):
with patch.object(monitor, 'failure') as mock_failure:
monitor.command = ['ls', '--not-real']
assert not monitor.check()
mock_failure.assert_called_once()
def test_monitor_check_success(self, monitor):
with patch.object(monitor, 'success') as mock_success:
assert monitor.check()
mock_success.assert_called_once()
@pytest.mark.parametrize('failure_count', [0, 1])
def test_monitor_success(self, monitor, failure_count):
monitor.alert_count = 0
monitor.total_failure_count = failure_count
assert monitor.last_success is None
monitor.success()
assert monitor.alert_count == 0
assert monitor.last_success is not None
assert monitor.total_failure_count == 0
def test_monitor_success_back_up(self, monitor):
monitor.total_failure_count = 1
monitor.alert_count = 1
with pytest.raises(MinitorAlert):
monitor.success()
assert monitor.alert_count == 0
assert monitor.last_success is not None
assert monitor.total_failure_count == 0