Fix issue with alerting on first alert
An issue with the failure function caused the first alert after reaching the minimum `alert_after` threshold to not notify unless using exponential backoff. New tests were added to reproduce the error and then it was fixed. Fixes #2
This commit is contained in:
parent
fce47ba2c7
commit
3ccd76dd75
101
minitor/main.py
101
minitor/main.py
@ -20,6 +20,50 @@ def read_yaml(path):
|
|||||||
return yamlenv.load(contents)
|
return yamlenv.load(contents)
|
||||||
|
|
||||||
|
|
||||||
|
def validate_monitor_settings(settings):
|
||||||
|
"""Validates that settings for a Monitor are valid
|
||||||
|
|
||||||
|
Note: Cannot yet validate the Alerts exist from within this class.
|
||||||
|
That will be done by Minitor later
|
||||||
|
"""
|
||||||
|
name = settings.get('name')
|
||||||
|
if not name:
|
||||||
|
raise InvalidMonitorException('Invalid name for monitor')
|
||||||
|
if not settings.get('command'):
|
||||||
|
raise InvalidMonitorException(
|
||||||
|
'Invalid command for monitor {}'.format(name)
|
||||||
|
)
|
||||||
|
|
||||||
|
type_assertions = (
|
||||||
|
('check_interval', int),
|
||||||
|
('alert_after', int),
|
||||||
|
('alert_every', int),
|
||||||
|
)
|
||||||
|
|
||||||
|
for key, val_type in type_assertions:
|
||||||
|
val = settings.get(key)
|
||||||
|
if not isinstance(val, val_type):
|
||||||
|
raise InvalidMonitorException(
|
||||||
|
'Invalid type on {}: {}. Expected {} and found {}'.format(
|
||||||
|
name, key, val_type.__name__, type(val).__name__
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
non_zero = (
|
||||||
|
'check_interval',
|
||||||
|
'alert_after',
|
||||||
|
'alert_every',
|
||||||
|
)
|
||||||
|
|
||||||
|
for key in non_zero:
|
||||||
|
if settings.get(key) == 0:
|
||||||
|
raise InvalidMonitorException(
|
||||||
|
'Invalid value for {}: {}. Value cannot be 0'.format(
|
||||||
|
name, key
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class InvalidAlertException(Exception):
|
class InvalidAlertException(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@ -44,7 +88,7 @@ class Monitor(object):
|
|||||||
'alert_every': -1,
|
'alert_every': -1,
|
||||||
}
|
}
|
||||||
settings.update(config)
|
settings.update(config)
|
||||||
self.validate_settings(settings)
|
validate_monitor_settings(settings)
|
||||||
|
|
||||||
self.name = settings['name']
|
self.name = settings['name']
|
||||||
self.command = settings['command']
|
self.command = settings['command']
|
||||||
@ -54,38 +98,9 @@ class Monitor(object):
|
|||||||
self.alert_every = settings.get('alert_every')
|
self.alert_every = settings.get('alert_every')
|
||||||
|
|
||||||
self.last_check = None
|
self.last_check = None
|
||||||
self.failure_count = 0
|
self.total_failure_count = 0
|
||||||
self.alert_count = 0
|
self.alert_count = 0
|
||||||
|
|
||||||
def validate_settings(self, settings):
|
|
||||||
"""Validates that settings for this Monitor are valid
|
|
||||||
|
|
||||||
Note: Cannot yet validate the Alerts exist from within this class.
|
|
||||||
That will be done by Minitor later
|
|
||||||
"""
|
|
||||||
name = settings.get('name')
|
|
||||||
if not name:
|
|
||||||
raise InvalidMonitorException('Invalid name for monitor')
|
|
||||||
if not settings.get('command'):
|
|
||||||
raise InvalidMonitorException(
|
|
||||||
'Invalid command for monitor {}'.format(name)
|
|
||||||
)
|
|
||||||
|
|
||||||
type_assertions = (
|
|
||||||
('check_interval', int),
|
|
||||||
('alert_after', int),
|
|
||||||
('alert_every', int),
|
|
||||||
)
|
|
||||||
|
|
||||||
for key, val_type in type_assertions:
|
|
||||||
val = settings.get(key)
|
|
||||||
if not isinstance(val, val_type):
|
|
||||||
raise InvalidMonitorException(
|
|
||||||
'Invalid type on {} {}. Expected {} and found {}'.format(
|
|
||||||
name, key, val_type.__name__, type(val).__name__
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
def should_check(self):
|
def should_check(self):
|
||||||
"""Determines if this Monitor should run it's check command"""
|
"""Determines if this Monitor should run it's check command"""
|
||||||
if not self.last_check:
|
if not self.last_check:
|
||||||
@ -111,25 +126,27 @@ class Monitor(object):
|
|||||||
|
|
||||||
def success(self):
|
def success(self):
|
||||||
"""Handles success tasks"""
|
"""Handles success tasks"""
|
||||||
self.failure_count = 0
|
self.total_failure_count = 0
|
||||||
self.alert_count = 0
|
self.alert_count = 0
|
||||||
|
|
||||||
def failure(self):
|
def failure(self):
|
||||||
"""Handles failure tasks and possibly raises MinitorAlert"""
|
"""Handles failure tasks and possibly raises MinitorAlert"""
|
||||||
self.failure_count += 1
|
self.total_failure_count += 1
|
||||||
if self.failure_count < self.alert_after:
|
# Ensure we've hit the minimum number of failures to alert
|
||||||
|
if self.total_failure_count < self.alert_after:
|
||||||
return
|
return
|
||||||
if self.alert_every >= 0:
|
|
||||||
failure_interval = (self.failure_count % self.alert_every) == 0
|
failure_count = (self.total_failure_count - self.alert_after)
|
||||||
|
if self.alert_every > 0:
|
||||||
|
# Otherwise, we should check against our alert_every
|
||||||
|
should_alert = (failure_count % self.alert_every) == 0
|
||||||
else:
|
else:
|
||||||
failure_interval = (
|
should_alert = (failure_count >= (2 ** self.alert_count) - 1)
|
||||||
(self.failure_count - self.alert_after) >=
|
|
||||||
(2 ** self.alert_count)
|
if should_alert:
|
||||||
)
|
|
||||||
if failure_interval:
|
|
||||||
self.alert_count += 1
|
self.alert_count += 1
|
||||||
raise MinitorAlert('{} check has failed {} times'.format(
|
raise MinitorAlert('{} check has failed {} times'.format(
|
||||||
self.name, self.failure_count
|
self.name, self.total_failure_count
|
||||||
))
|
))
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,6 +0,0 @@
|
|||||||
class TestMain(object):
|
|
||||||
def test_something(self):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def test_something_else(self):
|
|
||||||
assert False
|
|
94
tests/monitor_test.py
Normal file
94
tests/monitor_test.py
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
import pytest
|
||||||
|
|
||||||
|
from minitor.main import InvalidMonitorException
|
||||||
|
from minitor.main import MinitorAlert
|
||||||
|
from minitor.main import Monitor
|
||||||
|
from minitor.main import validate_monitor_settings
|
||||||
|
|
||||||
|
|
||||||
|
class TestMonitor(object):
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def monitor(self):
|
||||||
|
return Monitor({
|
||||||
|
'name': 'SampleMonitor',
|
||||||
|
'command': ['echo', 'foo'],
|
||||||
|
})
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('settings', [
|
||||||
|
{'alert_after': 0},
|
||||||
|
{'alert_every': 0},
|
||||||
|
{'check_interval': 0},
|
||||||
|
{'alert_after': 'invalid'},
|
||||||
|
{'alert_every': 'invalid'},
|
||||||
|
{'check_interval': 'invalid'},
|
||||||
|
])
|
||||||
|
def test_monitor_invalid_configuration(self, settings):
|
||||||
|
with pytest.raises(InvalidMonitorException):
|
||||||
|
validate_monitor_settings(settings)
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
'alert_after',
|
||||||
|
[1, 20],
|
||||||
|
ids=lambda arg: 'alert_after({})'.format(arg),
|
||||||
|
)
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
'alert_every',
|
||||||
|
[-1, 1, 2, 1440],
|
||||||
|
ids=lambda arg: 'alert_every({})'.format(arg),
|
||||||
|
)
|
||||||
|
def test_monitor_alert_after(self, monitor, alert_after, alert_every):
|
||||||
|
monitor.alert_after = alert_after
|
||||||
|
monitor.alert_every = alert_every
|
||||||
|
|
||||||
|
# fail a bunch of times before the final failure
|
||||||
|
for _ in range(alert_after - 1):
|
||||||
|
monitor.failure()
|
||||||
|
|
||||||
|
# this time should raise an alert
|
||||||
|
with pytest.raises(MinitorAlert):
|
||||||
|
monitor.failure()
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
'alert_after',
|
||||||
|
[1, 20],
|
||||||
|
ids=lambda arg: 'alert_after({})'.format(arg),
|
||||||
|
)
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
'alert_every',
|
||||||
|
[1, 2, 1440],
|
||||||
|
ids=lambda arg: 'alert_every({})'.format(arg),
|
||||||
|
)
|
||||||
|
def test_monitor_alert_every(self, monitor, alert_after, alert_every):
|
||||||
|
monitor.alert_after = alert_after
|
||||||
|
monitor.alert_every = alert_every
|
||||||
|
|
||||||
|
# fail a bunch of times before the final failure
|
||||||
|
for _ in range(alert_after - 1):
|
||||||
|
monitor.failure()
|
||||||
|
|
||||||
|
# this time should raise an alert
|
||||||
|
with pytest.raises(MinitorAlert):
|
||||||
|
monitor.failure()
|
||||||
|
|
||||||
|
# fail a bunch more times until the next alert
|
||||||
|
for _ in range(alert_every - 1):
|
||||||
|
monitor.failure()
|
||||||
|
|
||||||
|
# this failure should alert now
|
||||||
|
with pytest.raises(MinitorAlert):
|
||||||
|
monitor.failure()
|
||||||
|
|
||||||
|
def test_monitor_alert_every_exponential(self, monitor):
|
||||||
|
monitor.alert_after = 1
|
||||||
|
monitor.alert_every = -1
|
||||||
|
|
||||||
|
failure_count = 16
|
||||||
|
expect_failures_on = {1, 2, 4, 8, 16}
|
||||||
|
|
||||||
|
for i in range(failure_count):
|
||||||
|
if i + 1 in expect_failures_on:
|
||||||
|
with pytest.raises(MinitorAlert):
|
||||||
|
monitor.failure()
|
||||||
|
else:
|
||||||
|
monitor.failure()
|
Loading…
Reference in New Issue
Block a user