Add alerting for recovered monitors
Based on the idea from SeaLife, adds alerts for when a monitor comes out of an alerting down state. Also includes a bunch of unit tests to cover the new code.
This commit is contained in:
parent
4fe8020a77
commit
aad28976e2
@ -98,7 +98,9 @@ class InvalidMonitorException(Exception):
|
||||
|
||||
|
||||
class MinitorAlert(Exception):
|
||||
pass
|
||||
def __init__(self, message, monitor):
|
||||
super().__init__(message)
|
||||
self.monitor = monitor
|
||||
|
||||
|
||||
class Monitor(object):
|
||||
@ -117,7 +119,10 @@ class Monitor(object):
|
||||
|
||||
self.name = settings['name']
|
||||
self.command = settings['command']
|
||||
self.alerts = settings.get('alerts', [])
|
||||
self.alert_down = settings.get('alert_down', [])
|
||||
if not self.alert_down:
|
||||
self.alert_down = settings.get('alerts', [])
|
||||
self.alert_up = settings.get('alert_up', [])
|
||||
self.check_interval = settings.get('check_interval')
|
||||
self.alert_after = settings.get('alert_after')
|
||||
self.alert_every = settings.get('alert_every')
|
||||
@ -162,9 +167,17 @@ class Monitor(object):
|
||||
|
||||
def success(self):
|
||||
"""Handles success tasks"""
|
||||
back_up = None
|
||||
if not self.is_up():
|
||||
back_up = MinitorAlert(
|
||||
'{} check is up again!'.format(self.name),
|
||||
self,
|
||||
)
|
||||
self.total_failure_count = 0
|
||||
self.alert_count = 0
|
||||
self.last_success = datetime.now()
|
||||
if back_up:
|
||||
raise back_up
|
||||
|
||||
def failure(self):
|
||||
"""Handles failure tasks and possibly raises MinitorAlert"""
|
||||
@ -182,9 +195,16 @@ class Monitor(object):
|
||||
|
||||
if should_alert:
|
||||
self.alert_count += 1
|
||||
raise MinitorAlert('{} check has failed {} times'.format(
|
||||
raise MinitorAlert(
|
||||
'{} check has failed {} times'.format(
|
||||
self.name, self.total_failure_count
|
||||
))
|
||||
),
|
||||
self
|
||||
)
|
||||
|
||||
def is_up(self):
|
||||
"""Indicates if the monitor is already alerting failures"""
|
||||
return self.alert_count == 0
|
||||
|
||||
|
||||
class Alert(object):
|
||||
@ -214,11 +234,12 @@ class Alert(object):
|
||||
return 'Never'
|
||||
return dt.isoformat()
|
||||
|
||||
def alert(self, monitor):
|
||||
def alert(self, message, monitor):
|
||||
"""Calls the alert command for the provided monitor"""
|
||||
output, ex = call_output(
|
||||
self._formated_command(
|
||||
alert_count=monitor.alert_count,
|
||||
alert_message=message,
|
||||
monitor_name=monitor.name,
|
||||
failure_count=monitor.total_failure_count,
|
||||
last_success=self._format_datetime(monitor.last_success),
|
||||
@ -248,7 +269,7 @@ class Minitor(object):
|
||||
self.alerts = {
|
||||
'log': Alert(
|
||||
'log',
|
||||
{'command': ['echo', '{monitor_name} has failed!']}
|
||||
{'command': ['echo', '{alert_message}!']}
|
||||
)
|
||||
}
|
||||
self.alerts.update({
|
||||
@ -275,10 +296,12 @@ class Minitor(object):
|
||||
)
|
||||
)
|
||||
|
||||
def alert_for_monitor(self, monitor):
|
||||
def handle_minitor_alert(self, minitor_alert):
|
||||
"""Issues all alerts for a provided monitor"""
|
||||
for alert in monitor.alerts:
|
||||
self.alerts[alert].alert(monitor)
|
||||
monitor = minitor_alert.monitor
|
||||
alerts = monitor.alert_up if monitor.is_up() else monitor.alert_down
|
||||
for alert in alerts:
|
||||
self.alerts[alert].alert(str(minitor_alert), monitor)
|
||||
|
||||
def parse_args(self):
|
||||
"""Parses command line arguments and returns them"""
|
||||
@ -309,7 +332,7 @@ class Minitor(object):
|
||||
)
|
||||
except MinitorAlert as minitor_alert:
|
||||
self.logger.warn(minitor_alert)
|
||||
self.alert_for_monitor(monitor)
|
||||
self.handle_minitor_alert(minitor_alert)
|
||||
|
||||
sleep(self.check_interval)
|
||||
|
||||
|
@ -32,20 +32,26 @@ class TestAlert(object):
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
'last_success',
|
||||
'last_success,expected_success',
|
||||
[
|
||||
(None, 'Never'),
|
||||
(datetime(2018, 4, 10), '2018-04-10T00:00:00')
|
||||
]
|
||||
)
|
||||
def test_simple_alert(self, monitor, echo_alert, last_success):
|
||||
def test_simple_alert(
|
||||
self,
|
||||
monitor,
|
||||
echo_alert,
|
||||
last_success,
|
||||
expected_success
|
||||
):
|
||||
monitor.total_failure_count = 1
|
||||
monitor.alert_count = 1
|
||||
monitor.last_success = last_success[0]
|
||||
monitor.last_success = last_success
|
||||
with patch.object(echo_alert.logger, 'error') as mock_error:
|
||||
echo_alert.alert(monitor)
|
||||
echo_alert.alert('Exception message', monitor)
|
||||
mock_error.assert_called_once_with(
|
||||
'Dummy Monitor has failed 1 time(s)!\n'
|
||||
'We have alerted 1 time(s)\n'
|
||||
'Last success was ' + last_success[1]
|
||||
'Last success was ' + expected_success
|
||||
)
|
||||
|
@ -1,3 +1,6 @@
|
||||
from datetime import datetime
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from minitor.main import InvalidMonitorException
|
||||
@ -13,6 +16,11 @@ class TestMonitor(object):
|
||||
return Monitor({
|
||||
'name': 'Sample Monitor',
|
||||
'command': ['echo', 'foo'],
|
||||
'alert_down': ['log'],
|
||||
'alert_up': ['log'],
|
||||
'check_interval': 1,
|
||||
'alert_after': 1,
|
||||
'alert_every': 1,
|
||||
})
|
||||
|
||||
@pytest.mark.parametrize('settings', [
|
||||
@ -92,3 +100,42 @@ class TestMonitor(object):
|
||||
monitor.failure()
|
||||
else:
|
||||
monitor.failure()
|
||||
|
||||
@pytest.mark.parametrize('last_check', [None, datetime(2018, 4, 10)])
|
||||
def test_monitor_should_check(self, monitor, last_check):
|
||||
monitor.last_check = last_check
|
||||
assert monitor.should_check()
|
||||
|
||||
def test_monitor_check_fail(self, monitor):
|
||||
with patch.object(monitor, 'failure') as mock_failure:
|
||||
monitor.command = ['ls', '--not-real']
|
||||
assert not monitor.check()
|
||||
mock_failure.assert_called_once()
|
||||
|
||||
def test_monitor_check_success(self, monitor):
|
||||
with patch.object(monitor, 'success') as mock_success:
|
||||
assert monitor.check()
|
||||
mock_success.assert_called_once()
|
||||
|
||||
@pytest.mark.parametrize('failure_count', [0, 1])
|
||||
def test_monitor_success(self, monitor, failure_count):
|
||||
monitor.alert_count = 0
|
||||
monitor.total_failure_count = failure_count
|
||||
assert monitor.last_success is None
|
||||
|
||||
monitor.success()
|
||||
|
||||
assert monitor.alert_count == 0
|
||||
assert monitor.last_success is not None
|
||||
assert monitor.total_failure_count == 0
|
||||
|
||||
def test_monitor_success_back_up(self, monitor):
|
||||
monitor.total_failure_count = 1
|
||||
monitor.alert_count = 1
|
||||
|
||||
with pytest.raises(MinitorAlert):
|
||||
monitor.success()
|
||||
|
||||
assert monitor.alert_count == 0
|
||||
assert monitor.last_success is not None
|
||||
assert monitor.total_failure_count == 0
|
||||
|
Loading…
Reference in New Issue
Block a user