Compare commits
1 Commits
master
...
venv-virtu
Author | SHA1 | Date | |
---|---|---|---|
82bbf75e3b |
@ -1,9 +0,0 @@
|
||||
*$py.class
|
||||
*.egg
|
||||
*.egg-info/
|
||||
*.py[cod]
|
||||
*.so
|
||||
**/__pycache__/
|
||||
scripts/README.md
|
||||
examples/
|
||||
env/
|
201
.drone.star
201
.drone.star
@ -1,201 +0,0 @@
|
||||
# Build pipelines
|
||||
def main(ctx):
|
||||
pipelines = []
|
||||
|
||||
# Run tests
|
||||
pipelines += run_tests()
|
||||
|
||||
# Add pypi push pipeline
|
||||
pipelines += push_to_pypi(ctx)
|
||||
|
||||
# Add docker push pipelines
|
||||
pipelines += push_to_docker(ctx)
|
||||
|
||||
return pipelines
|
||||
|
||||
|
||||
# Return workspace in the container
|
||||
def get_workspace():
|
||||
return {
|
||||
"base": "/app",
|
||||
"path": ".",
|
||||
}
|
||||
|
||||
|
||||
# Builds a list of all test pipelines to be executed
|
||||
def run_tests():
|
||||
return [{
|
||||
"kind": "pipeline",
|
||||
"name": "tests",
|
||||
"workspace": get_workspace(),
|
||||
"steps": [
|
||||
tox_step("python:3.7"),
|
||||
tox_step("python:3.8"),
|
||||
tox_step("python:3.9"),
|
||||
tox_step("python:3.10"),
|
||||
tox_step("python:3"),
|
||||
# tox_step("pypy:3.9", "pypy3", "pypy3"),
|
||||
# tox_step("pypy:3", "pypy3", "pypy3"),
|
||||
notify_step(),
|
||||
],
|
||||
}]
|
||||
|
||||
|
||||
# Builds a single python test step
|
||||
def tox_step(docker_tag, python_cmd="python", tox_env="py3"):
|
||||
return {
|
||||
"name": "test {}".format(docker_tag.replace(":", "")),
|
||||
"image": docker_tag,
|
||||
"environment": {
|
||||
"TOXENV": tox_env,
|
||||
},
|
||||
"commands": [
|
||||
"{} -V".format(python_cmd),
|
||||
"pip install tox",
|
||||
"tox",
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
# Builds a notify step that will notify when the previous step changes
|
||||
def notify_step():
|
||||
return {
|
||||
"name": "notify",
|
||||
"image": "drillster/drone-email",
|
||||
"settings": {
|
||||
"host": {
|
||||
"from_secret": "SMTP_HOST",
|
||||
},
|
||||
"username": {
|
||||
"from_secret": "SMTP_USER",
|
||||
},
|
||||
"password": {
|
||||
"from_secret": "SMTP_PASS",
|
||||
},
|
||||
"from": "drone@iamthefij.com",
|
||||
},
|
||||
"when": {
|
||||
"status": [
|
||||
"changed",
|
||||
"failure",
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
# Push package to pypi
|
||||
def push_to_pypi(ctx):
|
||||
return [{
|
||||
"kind": "pipeline",
|
||||
"name": "deploy to pypi",
|
||||
"depends_on": ["tests"],
|
||||
"workspace": get_workspace(),
|
||||
"trigger": {
|
||||
"event": ["tag"],
|
||||
"ref": [
|
||||
"refs/heads/master",
|
||||
"refs/tags/v*",
|
||||
],
|
||||
},
|
||||
"steps": [
|
||||
{
|
||||
"name": "push to test pypi",
|
||||
"image": "python:3",
|
||||
"environment": {
|
||||
"TWINE_USERNAME": {
|
||||
"from_secret": "PYPI_USERNAME",
|
||||
},
|
||||
"TWINE_PASSWORD": {
|
||||
"from_secret": "TEST_PYPI_PASSWORD",
|
||||
},
|
||||
},
|
||||
"commands": ["make upload-test"],
|
||||
},
|
||||
{
|
||||
"name": "push to pypi",
|
||||
"image": "python:3",
|
||||
"environment": {
|
||||
"TWINE_USERNAME": {
|
||||
"from_secret": "PYPI_USERNAME",
|
||||
},
|
||||
"TWINE_PASSWORD": {
|
||||
"from_secret": "PYPI_PASSWORD",
|
||||
},
|
||||
},
|
||||
"commands": ["make upload"],
|
||||
"when": {
|
||||
"event": ["tag"],
|
||||
},
|
||||
},
|
||||
notify_step(),
|
||||
]
|
||||
}]
|
||||
|
||||
|
||||
# Build and push docker image
|
||||
def push_docker_step(tag_suffix, arch, repo):
|
||||
return {
|
||||
"name": "push {}".format(tag_suffix),
|
||||
"image": "plugins/docker",
|
||||
"settings": {
|
||||
"repo": "iamthefij/minitor",
|
||||
"auto_tag": True,
|
||||
"auto_tag_suffix": tag_suffix,
|
||||
"username": {
|
||||
"from_secret": "docker_username",
|
||||
},
|
||||
"password": {
|
||||
"from_secret": "docker_password",
|
||||
},
|
||||
"build_args": [
|
||||
"ARCH={}".format(arch),
|
||||
"REPO={}".format(repo),
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
# Builds a pipeline to push to docker
|
||||
def push_to_docker(ctx):
|
||||
return [{
|
||||
"kind": "pipeline",
|
||||
"name": "push to docker",
|
||||
"depends_on": ["tests"],
|
||||
"workspace": get_workspace(),
|
||||
"trigger": {
|
||||
"event": ["tag", "push"],
|
||||
"ref": [
|
||||
"refs/heads/master",
|
||||
"refs/tags/v*",
|
||||
],
|
||||
},
|
||||
"steps": [
|
||||
{
|
||||
"name": "get qemu",
|
||||
"image": "busybox",
|
||||
"commands": ["sh ./get_qemu.sh x86_64 arm aarch64"],
|
||||
},
|
||||
push_docker_step("linux-amd64", "x86_64", "library"),
|
||||
push_docker_step("linux-arm", "arm", "arm32v6"),
|
||||
push_docker_step("linux-arm64", "aarch64", "arm64v8"),
|
||||
{
|
||||
"name": "publish manifest",
|
||||
"image": "plugins/manifest",
|
||||
"settings": {
|
||||
"spec": "manifest.tmpl",
|
||||
"auto_tag": True,
|
||||
"ignore_missing": True,
|
||||
"username": {
|
||||
"from_secret": "docker_username",
|
||||
},
|
||||
"password": {
|
||||
"from_secret": "docker_password",
|
||||
},
|
||||
}
|
||||
},
|
||||
notify_step(),
|
||||
],
|
||||
}]
|
||||
|
||||
|
||||
# vim: ft=python
|
10
.drone.yml
Normal file
10
.drone.yml
Normal file
@ -0,0 +1,10 @@
|
||||
workspace:
|
||||
base: /app
|
||||
path: .
|
||||
|
||||
pipeline:
|
||||
build:
|
||||
image: python:3
|
||||
commands:
|
||||
- make test
|
||||
- make build
|
1
.gitignore
vendored
1
.gitignore
vendored
@ -60,4 +60,3 @@ docs/_build/
|
||||
target/
|
||||
|
||||
config.yml
|
||||
.mypy_cache/
|
||||
|
@ -1,23 +1,23 @@
|
||||
repos:
|
||||
- repo: https://github.com/psf/black
|
||||
rev: 22.3.0
|
||||
hooks:
|
||||
- id: black
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v1.2.3
|
||||
hooks:
|
||||
- id: trailing-whitespace
|
||||
- id: end-of-file-fixer
|
||||
- id: autopep8-wrapper
|
||||
args:
|
||||
- -i
|
||||
- --ignore=E265,E309,E501
|
||||
- id: debug-statements
|
||||
language_version: python3
|
||||
- id: flake8
|
||||
language_version: python3
|
||||
- id: check-yaml
|
||||
args:
|
||||
- --allow-multiple-documents
|
||||
- id: check-merge-conflict
|
||||
- id: name-tests-test
|
||||
exclude: tests/(common.py|util.py|(helpers|integration/factories)/(.+).py)
|
||||
- repo: https://github.com/asottile/reorder_python_imports
|
||||
rev: v1.0.1
|
||||
sha: v1.0.1
|
||||
hooks:
|
||||
- id: reorder-python-imports
|
||||
args:
|
||||
|
18
.travis.yml
18
.travis.yml
@ -1,18 +0,0 @@
|
||||
---
|
||||
dist: xenial
|
||||
language: python
|
||||
matrix:
|
||||
include:
|
||||
- python: "3.5"
|
||||
env: TOXENV=py3
|
||||
- python: "3.6"
|
||||
env: TOXENV=py3
|
||||
- python: "3.7"
|
||||
env: TOXENV=py3
|
||||
- python: "pypy3.5"
|
||||
env: TOXENV=pypy3
|
||||
|
||||
install:
|
||||
- pip install tox
|
||||
script:
|
||||
- tox
|
38
Dockerfile
38
Dockerfile
@ -1,38 +0,0 @@
|
||||
ARG REPO=library
|
||||
FROM ${REPO}/python:3-alpine
|
||||
LABEL maintainer="ian@iamthefij.com"
|
||||
# Minitor: https://git.iamthefij.com/iamthefij/minitor
|
||||
|
||||
# This should be the target qemu arch
|
||||
ARG ARCH=x86_64
|
||||
COPY ./build/qemu-${ARCH}-static /usr/bin/
|
||||
|
||||
# Add common checking tools
|
||||
RUN apk --no-cache add bash=~5.1 curl=~7.80 jq=~1.6
|
||||
WORKDIR /app
|
||||
|
||||
# Add minitor user for running as non-root
|
||||
RUN addgroup -S minitor && adduser -S minitor -G minitor
|
||||
|
||||
# Expose default metrics port
|
||||
EXPOSE 8080
|
||||
|
||||
# Copy default sample config
|
||||
COPY ./sample-config.yml /app/config.yml
|
||||
|
||||
# Copy Python package to container
|
||||
COPY ./README.md /app/
|
||||
COPY ./setup.py /app/
|
||||
COPY ./minitor /app/minitor
|
||||
RUN pip install --no-cache-dir -e .
|
||||
|
||||
# Copy scripts
|
||||
COPY ./scripts /app/scripts
|
||||
|
||||
# Allow all users to execute minitor and scripts
|
||||
RUN chmod -R 755 /app
|
||||
|
||||
# Drop to non-root user
|
||||
USER minitor
|
||||
|
||||
ENTRYPOINT [ "python3", "-m", "minitor.main" ]
|
142
Makefile
142
Makefile
@ -1,149 +1,51 @@
|
||||
DOCKER_TAG := minitor-dev
|
||||
OPEN_CMD := $(shell type xdg-open &> /dev/null && echo 'xdg-open' || echo 'open')
|
||||
ENV := env
|
||||
# Try to use python3 -m venv, but fallback to virtualenv just in case
|
||||
VENV := $(shell python3 -m venv --help &> /dev/null && echo 'python3 -m venv' || echo 'virtualenv -p python3')
|
||||
|
||||
.PHONY: default
|
||||
default: test
|
||||
env:
|
||||
$(VENV) env
|
||||
./env/bin/pip install -r requirements-dev.txt
|
||||
|
||||
# Create sample config
|
||||
config.yml:
|
||||
cp sample-config.yml config.yml
|
||||
|
||||
# Creates virtualenv
|
||||
$(ENV):
|
||||
python3 -m venv $(ENV)
|
||||
|
||||
# Install minitor and dependencies in virtualenv
|
||||
$(ENV)/bin/minitor: $(ENV)
|
||||
$(ENV)/bin/pip install -r requirements-dev.txt
|
||||
|
||||
# Install tox into virtualenv for running tests
|
||||
$(ENV)/bin/tox: $(ENV)
|
||||
$(ENV)/bin/pip install tox
|
||||
|
||||
# Install wheel for building packages
|
||||
$(ENV)/bin/wheel: $(ENV)
|
||||
$(ENV)/bin/pip install wheel
|
||||
|
||||
# Install twine for uploading packages
|
||||
$(ENV)/bin/twine: $(ENV)
|
||||
$(ENV)/bin/pip install twine
|
||||
|
||||
# Installs dev requirements to virtualenv
|
||||
.PHONY: devenv
|
||||
devenv: $(ENV)/bin/minitor
|
||||
|
||||
# Generates a smaller env for running tox, which builds it's own env
|
||||
.PHONY: test-env
|
||||
test-env: $(ENV)/bin/tox
|
||||
|
||||
# Generates a small build env for building and uploading dists
|
||||
.PHONY: build-env
|
||||
build-env: $(ENV)/bin/twine $(ENV)/bin/wheel
|
||||
|
||||
# Runs Minitor
|
||||
.PHONY: run
|
||||
run: $(ENV)/bin/minitor config.yml
|
||||
$(ENV)/bin/minitor -vvv
|
||||
run: env
|
||||
./env/bin/python -m minitor.main
|
||||
|
||||
# Runs Minitor with metrics
|
||||
.PHONY: run-metrics
|
||||
run-metrics: $(ENV)/bin/minitor config.yml
|
||||
$(ENV)/bin/minitor -vvv --metrics
|
||||
|
||||
# Runs tests with tox
|
||||
.PHONY: test
|
||||
test: $(ENV)/bin/tox
|
||||
$(ENV)/bin/tox -e py3
|
||||
test: env
|
||||
./env/bin/tox
|
||||
|
||||
# Builds wheel for package to upload
|
||||
.PHONY: build
|
||||
build: $(ENV)/bin/wheel
|
||||
$(ENV)/bin/python setup.py sdist
|
||||
$(ENV)/bin/python setup.py bdist_wheel
|
||||
build: env
|
||||
./env/bin/python setup.py sdist
|
||||
./env/bin/python setup.py bdist_wheel
|
||||
|
||||
# Verify that the python version matches the git tag so we don't push bad shas
|
||||
.PHONY: verify-tag-version
|
||||
verify-tag-version:
|
||||
$(eval TAG_NAME = $(shell [ -n "$(DRONE_TAG)" ] && echo $(DRONE_TAG) || git describe --tags --exact-match))
|
||||
test "v$(shell python setup.py -V)" = "$(TAG_NAME)"
|
||||
|
||||
# Uses twine to upload to pypi
|
||||
.PHONY: upload
|
||||
upload: verify-tag-version build $(ENV)/bin/twine
|
||||
$(ENV)/bin/twine upload dist/*
|
||||
upload: env
|
||||
./env/bin/twine upload dist/*
|
||||
|
||||
# Uses twine to upload to test pypi
|
||||
.PHONY: upload-test
|
||||
upload-test: verify-tag-version build $(ENV)/bin/twine
|
||||
$(ENV)/bin/twine upload --repository-url https://test.pypi.org/legacy/ dist/*
|
||||
upload-test: env
|
||||
./env/bin/twine upload --repository-url https://test.pypi.org/legacy/ dist/*
|
||||
|
||||
# Cleans all build, runtime, and test artifacts
|
||||
.PHONY: clean
|
||||
clean:
|
||||
rm -fr ./build ./minitor.egg-info ./htmlcov ./.coverage ./.pytest_cache ./.tox
|
||||
find . -name '*.pyc' -delete
|
||||
find . -name '__pycache__' -delete
|
||||
|
||||
# Cleans dist and env
|
||||
.PHONY: dist-clean
|
||||
dist-clean: clean
|
||||
rm -fr ./dist $(ENV)
|
||||
rm -fr ./dist ./env
|
||||
|
||||
# Install pre-commit hooks
|
||||
.PHONY: install-hooks
|
||||
install-hooks: $(ENV)
|
||||
$(ENV)/bin/tox -e pre-commit -- install -f --install-hooks
|
||||
install-hooks:
|
||||
./env/bin/tox -e pre-commit -- install -f --install-hooks
|
||||
|
||||
# Generates test coverage
|
||||
.coverage:
|
||||
$(ENV)/bin/tox
|
||||
./env/bin/tox
|
||||
|
||||
# Builds coverage html
|
||||
htmlcov/index.html: .coverage
|
||||
$(ENV)/bin/coverage html
|
||||
./env/bin/coverage html
|
||||
|
||||
# Opens coverage html in browser (on macOS and some Linux systems)
|
||||
.PHONY: open-coverage
|
||||
open-coverage: htmlcov/index.html
|
||||
$(OPEN_CMD) htmlcov/index.html
|
||||
|
||||
# Docker targets
|
||||
|
||||
# Targets to download required qemu binaries for running on an amd64 machine
|
||||
build/qemu-x86_64-static:
|
||||
./get_qemu.sh x86_64
|
||||
|
||||
build/qemu-arm-static:
|
||||
./get_qemu.sh arm
|
||||
|
||||
build/qemu-aarch64-static:
|
||||
./get_qemu.sh aarch64
|
||||
|
||||
# Build Docker image for host architechture (amd64)
|
||||
.PHONY: docker-build
|
||||
docker-build: build/qemu-x86_64-static
|
||||
docker build . -t ${DOCKER_TAG}-linux-amd64
|
||||
|
||||
# Cross build for arm architechtures
|
||||
.PHONY: docker-cross-build-arm
|
||||
docker-cross-build-arm: build/qemu-arm-static
|
||||
docker build --build-arg REPO=arm32v6 --build-arg ARCH=arm . -t ${DOCKER_TAG}-linux-arm
|
||||
|
||||
.PHONY: docker-cross-build-arm64
|
||||
docker-cross-build-arm64: build/qemu-aarch64-static
|
||||
docker build --build-arg REPO=arm64v8 --build-arg ARCH=aarch64 . -t ${DOCKER_TAG}-linux-arm64
|
||||
|
||||
# Run on host architechture
|
||||
.PHONY: docker-run
|
||||
docker-run: docker-build config.yml
|
||||
docker run --rm -v $(shell pwd)/config.yml:/app/config.yml ${DOCKER_TAG}-linux-amd64
|
||||
|
||||
# Cross run on host architechture
|
||||
.PHONY: docker-cross-run-arm
|
||||
docker-cross-run-arm: docker-cross-build-arm config.yml
|
||||
docker run --rm -v $(shell pwd)/config.yml:/app/config.yml ${DOCKER_TAG}-linux-arm
|
||||
|
||||
.PHONY: docker-cross-run-arm64
|
||||
docker-cross-run-arm64: docker-cross-build-arm64 config.yml
|
||||
docker run --rm -v $(shell pwd)/config.yml:/app/config.yml ${DOCKER_TAG}-linux-arm64
|
||||
open htmlcov/index.html
|
||||
|
89
README.md
89
README.md
@ -2,10 +2,6 @@
|
||||
|
||||
A minimal monitoring system
|
||||
|
||||
## Important
|
||||
|
||||
*This has been more or less replaced by a version written in Go. Check out [minitor-go](/iamthefij/minitor-go)*. There are no known issues with this version, but it is not really maintained anymore as I've migrated to the Go version since it uses fewer system resources.
|
||||
|
||||
## What does it do?
|
||||
|
||||
Minitor accepts a YAML configuration file with a set of commands to run and a set of alerts to execute when those commands fail. It is designed to be as simple as possible and relies on other command line tools to do checks and issue alerts.
|
||||
@ -20,103 +16,26 @@ I'm running a few small services and found Sensu, Consul, Nagios, etc. to all be
|
||||
|
||||
Install and execute with:
|
||||
|
||||
```bash
|
||||
```
|
||||
pip install minitor
|
||||
minitor
|
||||
```
|
||||
|
||||
If locally developing you can use:
|
||||
|
||||
```bash
|
||||
```
|
||||
make run
|
||||
```
|
||||
|
||||
It will read the contents of `config.yml` and begin its loop. You could also run it directly and provide a new config file via the `--config` argument.
|
||||
|
||||
### Configuring
|
||||
|
||||
#### Docker
|
||||
|
||||
You can pull this repository directly from Docker:
|
||||
|
||||
```bash
|
||||
docker pull iamthefij/minitor
|
||||
```
|
||||
|
||||
The Docker image uses a default `config.yml` that is copied from `sample-config.yml`. This won't really do anything for you, so when you run the Docker image, you should supply your own `config.yml` file:
|
||||
|
||||
```bash
|
||||
docker run -v $PWD/config.yml:/app/config.yml iamthefij/minitor
|
||||
```
|
||||
|
||||
Images are provided for `amd64`, `arm`, and `arm64` architechtures, but the Python package should be compatible with anything that supports Python.
|
||||
|
||||
## Configuring
|
||||
|
||||
In this repo, you can explore the `sample-config.yml` file for an example, but the general structure is as follows. It should be noted that environment variable interpolation happens on load of the YAML file.
|
||||
|
||||
The global configurations are:
|
||||
|
||||
|key|value|
|
||||
|---|---|
|
||||
|`check_interval`|Maximum frequency to run checks for each monitor|
|
||||
|`monitors`|List of all monitors. Detailed description below|
|
||||
|`alerts`|List of all alerts. Detailed description below|
|
||||
|
||||
### Monitors
|
||||
|
||||
All monitors should be listed under `monitors`.
|
||||
|
||||
Each monitor allows the following configuration:
|
||||
|
||||
|key|value|
|
||||
|---|---|
|
||||
|`name`|Name of the monitor running. This will show up in messages and logs.|
|
||||
|`command`|Specifies the command that should be executed, either in exec or shell form. This command's exit value will determine whether the check is successful|
|
||||
|`alert_down`|A list of Alerts to be triggered when the monitor is in a "down" state|
|
||||
|`alert_up`|A list of Alerts to be triggered when the monitor moves to an "up" state|
|
||||
|`check_interval`|The interval at which this monitor should be checked. This must be greater than the global `check_interval` value|
|
||||
|`alert_after`|Allows specifying the number of failed checks before an alert should be triggered|
|
||||
|`alert_every`|Allows specifying how often an alert should be retriggered. There are a few magic numbers here. Defaults to `-1` for an exponential backoff. Setting to `0` disables re-alerting. Positive values will allow retriggering after the specified number of checks|
|
||||
|
||||
### Alerts
|
||||
|
||||
Alerts exist as objects keyed under `alerts`. Their key should be the name of the Alert. This is used in your monitor setup in `alert_down` and `alert_up`.
|
||||
|
||||
Eachy alert allows the following configuration:
|
||||
|
||||
|key|value|
|
||||
|---|---|
|
||||
|`command`|Specifies the command that should be executed, either in exec or shell form. This is the command that will be run when the alert is executed. This can be templated with environment variables or the variables shown in the table below|
|
||||
|
||||
Also, when alerts are executed, they will be passed through Python's format function with arguments for some attributes of the Monitor. The following monitor specific variables can be referenced using Python formatting syntax:
|
||||
|
||||
|token|value|
|
||||
|---|---|
|
||||
|`{alert_count}`|Number of times this monitor has alerted|
|
||||
|`{alert_message}`|The exception message that was raised|
|
||||
|`{failure_count}`|The total number of sequential failed checks for this monitor|
|
||||
|`{last_output}`|The last returned value from the check command to either stderr or stdout|
|
||||
|`{last_success}`|The ISO datetime of the last successful check|
|
||||
|`{monitor_name}`|The name of the monitor that failed and triggered the alert|
|
||||
|
||||
### Metrics
|
||||
|
||||
As of v0.3.0, Minitor supports exporting metrics for [Prometheus](https://prometheus.io/). Prometheus is an open source tool for reading and querying metrics from different sources. Combined with another tool, [Grafana](https://grafana.com/), it allows building of charts and dashboards. You could also opt to just use Minitor to log check results, and instead do your alerting with Grafana.
|
||||
|
||||
It is also possible to use the metrics endpoint for monitoring Minitor itself! This allows setting up multiple instances of Minitor on different servers and have them monitor each-other so that you can detect a minitor outage.
|
||||
|
||||
To run minitor with metrics, use the `--metrics` (or `-m`) flag. The metrics will be served on port `8080` by default, though it can be overriden using `--metrics-port` (or `-p`)
|
||||
|
||||
```bash
|
||||
minitor --metrics
|
||||
# or
|
||||
minitor --metrics --metrics-port 3000
|
||||
```
|
||||
In this repo, you can explore the `sample-config.yml` file for an example, but the general structure is as follows. It should be noted that environment variable interpolation happens on load of the YAML file. Also, when alerts are executed, they will be passed through Python's format function with arguments for some attributes of the Monitor. Currently this is limited to `{monitor_name}`.
|
||||
|
||||
## Contributing
|
||||
|
||||
Whether you're looking to submit a patch or just tell me I broke something, you can contribute through the Github mirror and I can merge PRs back to the source repository.
|
||||
|
||||
Primary Repo: https://git.iamthefij.com/iamthefij/minitor.git
|
||||
|
||||
Github Mirror: https://github.com/IamTheFij/minitor.git
|
||||
|
@ -1,7 +0,0 @@
|
||||
# Minitor Examples
|
||||
|
||||
A few examples of advanced Minitor configuration using Docker Compose.
|
||||
|
||||
These are all built against the local version and not Docker Hub. Before you try anything here, run `make docker-build` from the root `minitor` directory so that the `minitor-dev` image is built on your local host.
|
||||
|
||||
If you are trying to deploy these configs and you would like to use the public image instead, change `minitor-dev` to `iamthefij/minitor`.
|
@ -1,7 +0,0 @@
|
||||
# Docker Checks
|
||||
|
||||
A sample docker-compose example that uses the bundled shell scripts to monitor the health of other Docker containers.
|
||||
|
||||
## Security note
|
||||
|
||||
Exposing `/var/run/docker.sock` comes at a risk. Please be careful when doing this. If someone is able to take over your Minitor container, they will then essentially have root access to your whole host. To minimize risk, be wary of exposing Minitor to the public internet when using a configuration like this.
|
@ -1,17 +0,0 @@
|
||||
version: '2'
|
||||
services:
|
||||
minitor:
|
||||
build: ../..
|
||||
volumes:
|
||||
- ./config.yml:/app/config.yml
|
||||
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||
|
||||
failure:
|
||||
container_name: failure
|
||||
image: busybox
|
||||
command: "sh -c 'exit 1'"
|
||||
|
||||
success:
|
||||
container_name: success
|
||||
image: busybox
|
||||
command: "sh -c 'exit 0'"
|
@ -1,3 +0,0 @@
|
||||
# Example Prometheus Exporter
|
||||
|
||||
An example configuration exporting Minitor stats to Prometheus
|
@ -1,33 +0,0 @@
|
||||
version: '2'
|
||||
services:
|
||||
prom:
|
||||
image: prom/prometheus
|
||||
ports:
|
||||
- "9090:9090"
|
||||
volumes:
|
||||
- ./prometheus.yml:/etc/prometheus/prometheus.yml
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana
|
||||
ports:
|
||||
- "8000:3000"
|
||||
volumes:
|
||||
- /var/lib/grafana
|
||||
|
||||
cadvisor:
|
||||
image: google/cadvisor
|
||||
ports:
|
||||
- "8088:8080"
|
||||
volumes:
|
||||
- /:/rootfs:ro
|
||||
- /var/run:/var/run:rw
|
||||
- /sys:/sys:ro
|
||||
- /var/lib/docker:/var/lib/docker:ro
|
||||
|
||||
minitor:
|
||||
build: ../../
|
||||
ports:
|
||||
- "8080:8080"
|
||||
command: [ "--metrics", "-v" ]
|
||||
volumes:
|
||||
- ../../config.yml:/app/config.yml
|
@ -1,16 +0,0 @@
|
||||
---
|
||||
global:
|
||||
scrape_interval: 30s
|
||||
|
||||
scrape_configs:
|
||||
- job_name: prometheus
|
||||
scrape_interval: 5s
|
||||
static_configs:
|
||||
- targets: ['localhost:9090']
|
||||
- job_name: cadvisor
|
||||
static_configs:
|
||||
- targets: ['cadvisor:8080']
|
||||
- job_name: minitor
|
||||
metrics_path: /
|
||||
static_configs:
|
||||
- targets: ['minitor:8080']
|
14
get_qemu.sh
14
get_qemu.sh
@ -1,14 +0,0 @@
|
||||
#! /bin/bash
|
||||
|
||||
HOST_ARCH=x86_64
|
||||
VERSION=v2.9.1-1
|
||||
|
||||
mkdir -p build
|
||||
cd build
|
||||
|
||||
# Multiple args can be passed in, but in most cases (Makefile and .drone.yml) we only use one at a time
|
||||
for target_arch in $*; do
|
||||
wget https://github.com/multiarch/qemu-user-static/releases/download/$VERSION/${HOST_ARCH}_qemu-${target_arch}-static.tar.gz
|
||||
tar -xvf ${HOST_ARCH}_qemu-${target_arch}-static.tar.gz
|
||||
rm ${HOST_ARCH}_qemu-${target_arch}-static.tar.gz
|
||||
done
|
@ -1,25 +0,0 @@
|
||||
image: iamthefij/minitor:{{#if build.tag}}{{trimPrefix "v" build.tag}}{{else}}latest{{/if}}
|
||||
{{#if build.tags}}
|
||||
tags:
|
||||
{{#each build.tags}}
|
||||
- {{this}}
|
||||
{{/each}}
|
||||
{{/if}}
|
||||
manifests:
|
||||
-
|
||||
image: iamthefij/minitor:{{#if build.tag}}{{trimPrefix "v" build.tag}}-{{/if}}linux-amd64
|
||||
platform:
|
||||
architecture: amd64
|
||||
os: linux
|
||||
-
|
||||
image: iamthefij/minitor:{{#if build.tag}}{{trimPrefix "v" build.tag}}-{{/if}}linux-arm64
|
||||
platform:
|
||||
architecture: arm64
|
||||
os: linux
|
||||
variant: v8
|
||||
-
|
||||
image: iamthefij/minitor:{{#if build.tag}}{{trimPrefix "v" build.tag}}-{{/if}}linux-arm
|
||||
platform:
|
||||
architecture: arm
|
||||
os: linux
|
||||
variant: v7
|
385
minitor/main.py
385
minitor/main.py
@ -3,27 +3,23 @@ import subprocess
|
||||
import sys
|
||||
from argparse import ArgumentParser
|
||||
from datetime import datetime
|
||||
from itertools import chain
|
||||
from subprocess import CalledProcessError
|
||||
from subprocess import check_output
|
||||
from time import sleep
|
||||
|
||||
import yamlenv
|
||||
from prometheus_client import Counter
|
||||
from prometheus_client import Gauge
|
||||
from prometheus_client import start_http_server
|
||||
|
||||
|
||||
DEFAULT_METRICS_PORT = 8080
|
||||
logging.basicConfig(
|
||||
level=logging.ERROR, format="%(asctime)s %(levelname)s %(name)s %(message)s"
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s %(levelname)s %(name)s %(message)s'
|
||||
)
|
||||
logging.getLogger(__name__).addHandler(logging.NullHandler())
|
||||
|
||||
|
||||
def read_yaml(path):
|
||||
"""Loads config from a YAML file with env interpolation"""
|
||||
with open(path, "r") as yaml:
|
||||
with open(path, 'r') as yaml:
|
||||
contents = yaml.read()
|
||||
return yamlenv.load(contents)
|
||||
|
||||
@ -34,40 +30,45 @@ def validate_monitor_settings(settings):
|
||||
Note: Cannot yet validate the Alerts exist from within this class.
|
||||
That will be done by Minitor later
|
||||
"""
|
||||
name = settings.get("name")
|
||||
name = settings.get('name')
|
||||
if not name:
|
||||
raise InvalidMonitorException("Invalid name for monitor")
|
||||
if not settings.get("command"):
|
||||
raise InvalidMonitorException("Invalid command for monitor {}".format(name))
|
||||
raise InvalidMonitorException('Invalid name for monitor')
|
||||
if not settings.get('command'):
|
||||
raise InvalidMonitorException(
|
||||
'Invalid command for monitor {}'.format(name)
|
||||
)
|
||||
|
||||
type_assertions = (
|
||||
("check_interval", int),
|
||||
("alert_after", int),
|
||||
("alert_every", int),
|
||||
('check_interval', int),
|
||||
('alert_after', int),
|
||||
('alert_every', int),
|
||||
)
|
||||
|
||||
for key, val_type in type_assertions:
|
||||
val = settings.get(key)
|
||||
if not isinstance(val, val_type):
|
||||
raise InvalidMonitorException(
|
||||
"Invalid type on {}: {}. Expected {} and found {}".format(
|
||||
'Invalid type on {}: {}. Expected {} and found {}'.format(
|
||||
name, key, val_type.__name__, type(val).__name__
|
||||
)
|
||||
)
|
||||
|
||||
non_zero = (
|
||||
"check_interval",
|
||||
"alert_after",
|
||||
'check_interval',
|
||||
'alert_after',
|
||||
'alert_every',
|
||||
)
|
||||
|
||||
for key in non_zero:
|
||||
if settings.get(key) == 0:
|
||||
raise InvalidMonitorException(
|
||||
"Invalid value for {}: {}. Value cannot be 0".format(name, key)
|
||||
'Invalid value for {}: {}. Value cannot be 0'.format(
|
||||
name, key
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def maybe_decode(bstr, encoding="utf-8"):
|
||||
def maybe_decode(bstr, encoding='utf-8'):
|
||||
try:
|
||||
return bstr.decode(encoding)
|
||||
except TypeError:
|
||||
@ -77,14 +78,14 @@ def maybe_decode(bstr, encoding="utf-8"):
|
||||
def call_output(*popenargs, **kwargs):
|
||||
"""Similar to check_output, but instead returns output and exception"""
|
||||
# So we can capture complete output, redirect sderr to stdout
|
||||
kwargs.setdefault("stderr", subprocess.STDOUT)
|
||||
kwargs.setdefault('stderr', subprocess.STDOUT)
|
||||
output, ex = None, None
|
||||
try:
|
||||
output = check_output(*popenargs, **kwargs)
|
||||
except CalledProcessError as e:
|
||||
output, ex = e.output, e
|
||||
|
||||
output = output.rstrip(b"\n")
|
||||
output = output.rstrip(b'\n')
|
||||
return output, ex
|
||||
|
||||
|
||||
@ -97,58 +98,37 @@ class InvalidMonitorException(Exception):
|
||||
|
||||
|
||||
class MinitorAlert(Exception):
|
||||
def __init__(self, message, monitor):
|
||||
super().__init__(message)
|
||||
self.monitor = monitor
|
||||
pass
|
||||
|
||||
|
||||
class Monitor(object):
|
||||
"""Primary configuration item for Minitor"""
|
||||
|
||||
def __init__(self, config, counter=None, logger=None):
|
||||
def __init__(self, config):
|
||||
"""Accepts a dictionary of configuration items to override defaults"""
|
||||
settings = {
|
||||
"alerts": ["log"],
|
||||
"check_interval": 30,
|
||||
"alert_after": 4,
|
||||
"alert_every": -1,
|
||||
'alerts': ['log'],
|
||||
'check_interval': 30,
|
||||
'alert_after': 4,
|
||||
'alert_every': -1,
|
||||
}
|
||||
settings.update(config)
|
||||
validate_monitor_settings(settings)
|
||||
|
||||
self.name = settings["name"]
|
||||
self.command = settings["command"]
|
||||
self.alert_down = settings.get("alert_down", [])
|
||||
if not self.alert_down:
|
||||
self.alert_down = settings.get("alerts", [])
|
||||
self.alert_up = settings.get("alert_up", [])
|
||||
self.check_interval = settings.get("check_interval")
|
||||
self.alert_after = settings.get("alert_after")
|
||||
self.alert_every = settings.get("alert_every")
|
||||
self.name = settings['name']
|
||||
self.command = settings['command']
|
||||
self.alerts = settings.get('alerts', [])
|
||||
self.check_interval = settings.get('check_interval')
|
||||
self.alert_after = settings.get('alert_after')
|
||||
self.alert_every = settings.get('alert_every')
|
||||
|
||||
self.alert_count = 0
|
||||
self.last_check = None
|
||||
self.last_output = None
|
||||
self.last_success = None
|
||||
self.total_failure_count = 0
|
||||
self.alert_count = 0
|
||||
|
||||
self._counter = counter
|
||||
if logger is None:
|
||||
self._logger = logging.getLogger(
|
||||
"{}({})".format(self.__class__.__name__, self.name)
|
||||
)
|
||||
else:
|
||||
self._logger = logger.getChild(
|
||||
"{}({})".format(self.__class__.__name__, self.name)
|
||||
)
|
||||
|
||||
def _count_check(self, is_success=True, is_alert=False):
|
||||
if self._counter is not None:
|
||||
self._counter.labels(
|
||||
monitor=self.name,
|
||||
status=("success" if is_success else "failure"),
|
||||
is_alert=is_alert,
|
||||
).inc()
|
||||
self.logger = logging.getLogger(
|
||||
'{}({})'.format(self.__class__.__name__, self.name)
|
||||
)
|
||||
|
||||
def should_check(self):
|
||||
"""Determines if this Monitor should run it's check command"""
|
||||
@ -169,39 +149,20 @@ class Monitor(object):
|
||||
self.command,
|
||||
shell=isinstance(self.command, str),
|
||||
)
|
||||
output = maybe_decode(output)
|
||||
self._logger.debug(output)
|
||||
self.logger.debug(output)
|
||||
self.last_check = datetime.now()
|
||||
self.last_output = output
|
||||
|
||||
is_success = None
|
||||
try:
|
||||
if ex is None:
|
||||
is_success = True
|
||||
self.success()
|
||||
else:
|
||||
is_success = False
|
||||
self.failure()
|
||||
except MinitorAlert:
|
||||
self._count_check(is_success=is_success, is_alert=True)
|
||||
raise
|
||||
|
||||
self._count_check(is_success=is_success)
|
||||
return is_success
|
||||
if ex is None:
|
||||
self.success()
|
||||
return True
|
||||
else:
|
||||
self.failure()
|
||||
return False
|
||||
|
||||
def success(self):
|
||||
"""Handles success tasks"""
|
||||
back_up = None
|
||||
if not self.is_up():
|
||||
back_up = MinitorAlert(
|
||||
"{} check is up again!".format(self.name),
|
||||
self,
|
||||
)
|
||||
self.total_failure_count = 0
|
||||
self.alert_count = 0
|
||||
self.last_success = datetime.now()
|
||||
if back_up:
|
||||
raise back_up
|
||||
|
||||
def failure(self):
|
||||
"""Handles failure tasks and possibly raises MinitorAlert"""
|
||||
@ -210,55 +171,31 @@ class Monitor(object):
|
||||
if self.total_failure_count < self.alert_after:
|
||||
return
|
||||
|
||||
failure_count = self.total_failure_count - self.alert_after
|
||||
failure_count = (self.total_failure_count - self.alert_after)
|
||||
if self.alert_every > 0:
|
||||
# Otherwise, we should check against our alert_every
|
||||
should_alert = (failure_count % self.alert_every) == 0
|
||||
elif self.alert_every == 0:
|
||||
# Only alert on the first failure
|
||||
should_alert = failure_count == 1
|
||||
else:
|
||||
should_alert = failure_count >= (2**self.alert_count) - 1
|
||||
should_alert = (failure_count >= (2 ** self.alert_count) - 1)
|
||||
|
||||
if should_alert:
|
||||
self.alert_count += 1
|
||||
raise MinitorAlert(
|
||||
"{} check has failed {} times".format(
|
||||
self.name, self.total_failure_count
|
||||
),
|
||||
self,
|
||||
)
|
||||
|
||||
def is_up(self):
|
||||
"""Indicates if the monitor is already alerting failures"""
|
||||
return self.alert_count == 0
|
||||
raise MinitorAlert('{} check has failed {} times'.format(
|
||||
self.name, self.total_failure_count
|
||||
))
|
||||
|
||||
|
||||
class Alert(object):
|
||||
def __init__(self, name, config, counter=None, logger=None):
|
||||
def __init__(self, name, config):
|
||||
"""An alert must be named and have a config dict"""
|
||||
self.name = name
|
||||
self.command = config.get("command")
|
||||
self.command = config.get('command')
|
||||
if not self.command:
|
||||
raise InvalidAlertException("Invalid alert {}".format(self.name))
|
||||
raise InvalidAlertException('Invalid alert {}'.format(self.name))
|
||||
|
||||
self._counter = counter
|
||||
if logger is None:
|
||||
self._logger = logging.getLogger(
|
||||
"{}({})".format(self.__class__.__name__, self.name)
|
||||
)
|
||||
else:
|
||||
self._logger = logger.getChild(
|
||||
"{}({})".format(self.__class__.__name__, self.name)
|
||||
)
|
||||
|
||||
def _count_alert(self, monitor):
|
||||
"""Increments the alert counter"""
|
||||
if self._counter is not None:
|
||||
self._counter.labels(
|
||||
alert=self.name,
|
||||
monitor=monitor,
|
||||
).inc()
|
||||
self.logger = logging.getLogger(
|
||||
'{}({})'.format(self.__class__.__name__, self.name)
|
||||
)
|
||||
|
||||
def _formated_command(self, **kwargs):
|
||||
"""Formats command array or string with kwargs from Monitor"""
|
||||
@ -269,27 +206,17 @@ class Alert(object):
|
||||
args.append(arg.format(**kwargs))
|
||||
return args
|
||||
|
||||
def _format_datetime(self, dt):
|
||||
"""Formats a datetime for an alert"""
|
||||
if dt is None:
|
||||
return "Never"
|
||||
return dt.isoformat()
|
||||
|
||||
def alert(self, message, monitor):
|
||||
def alert(self, monitor):
|
||||
"""Calls the alert command for the provided monitor"""
|
||||
self._count_alert(monitor.name)
|
||||
output, ex = call_output(
|
||||
self._formated_command(
|
||||
alert_count=monitor.alert_count,
|
||||
alert_message=message,
|
||||
failure_count=monitor.total_failure_count,
|
||||
last_output=monitor.last_output,
|
||||
last_success=self._format_datetime(monitor.last_success),
|
||||
monitor_name=monitor.name,
|
||||
failure_count=monitor.total_failure_count,
|
||||
),
|
||||
shell=isinstance(self.command, str),
|
||||
)
|
||||
self._logger.error(maybe_decode(output))
|
||||
self.logger.error(maybe_decode(output))
|
||||
if ex is not None:
|
||||
raise ex
|
||||
|
||||
@ -301,180 +228,90 @@ class Minitor(object):
|
||||
check_interval = None
|
||||
|
||||
def __init__(self):
|
||||
self._logger = logging.getLogger(self.__class__.__name__)
|
||||
self._alert_counter = None
|
||||
self._monitor_counter = None
|
||||
self._monitor_status_gauge = None
|
||||
self.logger = logging.getLogger(self.__class__.__name__)
|
||||
|
||||
def _parse_args(self, args=None):
|
||||
"""Parses command line arguments and returns them"""
|
||||
parser = ArgumentParser(description="Minimal monitoring")
|
||||
parser.add_argument(
|
||||
"--config",
|
||||
"-c",
|
||||
dest="config_path",
|
||||
default="config.yml",
|
||||
help="Path to the config YAML file to use",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--metrics",
|
||||
"-m",
|
||||
dest="metrics",
|
||||
action="store_true",
|
||||
help="Start webserver with metrics",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--metrics-port",
|
||||
"-p",
|
||||
dest="metrics_port",
|
||||
type=int,
|
||||
default=DEFAULT_METRICS_PORT,
|
||||
help="Port to use when serving metrics",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--verbose",
|
||||
"-v",
|
||||
action="count",
|
||||
help=(
|
||||
"Adjust log verbosity by increasing arg count. Default log",
|
||||
"level is ERROR. Level increases with each `v`",
|
||||
),
|
||||
)
|
||||
return parser.parse_args(args)
|
||||
|
||||
def _setup(self, config_path):
|
||||
def setup(self, config_path):
|
||||
"""Load all setup from YAML file at provided path"""
|
||||
config = read_yaml(config_path)
|
||||
self.check_interval = config.get("check_interval", 30)
|
||||
self.monitors = [
|
||||
Monitor(
|
||||
mon,
|
||||
counter=self._monitor_counter,
|
||||
logger=self._logger,
|
||||
)
|
||||
for mon in config.get("monitors", [])
|
||||
]
|
||||
self.check_interval = config.get('check_interval', 30)
|
||||
self.monitors = [Monitor(mon) for mon in config.get('monitors', [])]
|
||||
# Add default alert for logging
|
||||
self.alerts = {
|
||||
"log": Alert(
|
||||
"log",
|
||||
{"command": ["echo", "{alert_message}!"]},
|
||||
counter=self._alert_counter,
|
||||
logger=self._logger,
|
||||
'log': Alert(
|
||||
'log',
|
||||
{'command': ['echo', '{monitor_name} has failed!']}
|
||||
)
|
||||
}
|
||||
self.alerts.update(
|
||||
{
|
||||
alert_name: Alert(
|
||||
alert_name,
|
||||
alert,
|
||||
counter=self._alert_counter,
|
||||
logger=self._logger,
|
||||
)
|
||||
for alert_name, alert in config.get("alerts", {}).items()
|
||||
}
|
||||
)
|
||||
self.alerts.update({
|
||||
alert_name: Alert(alert_name, alert)
|
||||
for alert_name, alert in config.get('alerts', {}).items()
|
||||
})
|
||||
|
||||
def _validate_monitors(self):
|
||||
def validate_monitors(self):
|
||||
"""Validates monitors are valid against other config values"""
|
||||
for monitor in self.monitors:
|
||||
# Validate that the interval is valid
|
||||
if monitor.check_interval < self.check_interval:
|
||||
raise InvalidMonitorException(
|
||||
"Monitor {} check interval is lower global value {}".format(
|
||||
'Monitor {} check interval is lower global value {}'.format(
|
||||
monitor.name, self.check_interval
|
||||
)
|
||||
)
|
||||
# Validate that the the alerts for the monitor exist
|
||||
for alert in chain(monitor.alert_down, monitor.alert_up):
|
||||
for alert in monitor.alerts:
|
||||
if alert not in self.alerts:
|
||||
raise InvalidMonitorException(
|
||||
"Monitor {} contains an unknown alert: {}".format(
|
||||
'Monitor {} contains an unknown alert: {}'.format(
|
||||
monitor.name, alert
|
||||
)
|
||||
)
|
||||
|
||||
def _init_metrics(self):
|
||||
self._alert_counter = Counter(
|
||||
"minitor_alert_total",
|
||||
"Number of Minitor alerts",
|
||||
["alert", "monitor"],
|
||||
)
|
||||
self._monitor_counter = Counter(
|
||||
"minitor_check_total",
|
||||
"Number of Minitor checks",
|
||||
["monitor", "status", "is_alert"],
|
||||
)
|
||||
self._monitor_status_gauge = Gauge(
|
||||
"minitor_monitor_up_count",
|
||||
"Currently responsive monitors",
|
||||
["monitor"],
|
||||
)
|
||||
def alert_for_monitor(self, monitor):
|
||||
"""Issues all alerts for a provided monitor"""
|
||||
for alert in monitor.alerts:
|
||||
self.alerts[alert].alert(monitor)
|
||||
|
||||
def parse_args(self):
|
||||
"""Parses command line arguments and returns them"""
|
||||
parser = ArgumentParser(description='Minimal monitoring')
|
||||
parser.add_argument(
|
||||
'--config', '-c',
|
||||
dest='config_path',
|
||||
default='config.yml',
|
||||
help='Path to the config YAML file to use',
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
def run(self):
|
||||
"""Runs Minitor in a loop"""
|
||||
args = self.parse_args()
|
||||
self.setup(args.config_path)
|
||||
self.validate_monitors()
|
||||
|
||||
def _loop(self):
|
||||
while True:
|
||||
self._check()
|
||||
for monitor in self.monitors:
|
||||
try:
|
||||
result = monitor.check()
|
||||
if result is not None:
|
||||
self.logger.info(
|
||||
'%s: %s',
|
||||
monitor.name,
|
||||
'SUCCESS' if result else 'FAILURE'
|
||||
)
|
||||
except MinitorAlert as minitor_alert:
|
||||
self.logger.warn(minitor_alert)
|
||||
self.alert_for_monitor(monitor)
|
||||
|
||||
sleep(self.check_interval)
|
||||
|
||||
def _check(self):
|
||||
"""The main run loop"""
|
||||
for monitor in self.monitors:
|
||||
try:
|
||||
result = monitor.check()
|
||||
if result is not None:
|
||||
self._logger.info(
|
||||
"%s: %s", monitor.name, "SUCCESS" if result else "FAILURE"
|
||||
)
|
||||
except MinitorAlert as minitor_alert:
|
||||
self._logger.warning(minitor_alert)
|
||||
self._handle_minitor_alert(minitor_alert)
|
||||
|
||||
# Track the status of the Monitor
|
||||
if self._monitor_status_gauge:
|
||||
self._monitor_status_gauge.labels(
|
||||
monitor=monitor.name,
|
||||
).set(int(monitor.is_up()))
|
||||
|
||||
def _handle_minitor_alert(self, minitor_alert):
|
||||
"""Issues all alerts for a provided monitor"""
|
||||
monitor = minitor_alert.monitor
|
||||
alerts = monitor.alert_up if monitor.is_up() else monitor.alert_down
|
||||
for alert in alerts:
|
||||
self.alerts[alert].alert(str(minitor_alert), monitor)
|
||||
|
||||
def _set_log_level(self, verbose):
|
||||
"""Sets the log level for the class using the provided verbose count"""
|
||||
if verbose == 1:
|
||||
self._logger.setLevel(logging.WARNING)
|
||||
elif verbose == 2:
|
||||
self._logger.setLevel(logging.INFO)
|
||||
elif verbose >= 3:
|
||||
self._logger.setLevel(logging.DEBUG)
|
||||
|
||||
def run(self, args=None):
|
||||
"""Runs Minitor in a loop"""
|
||||
args = self._parse_args(args)
|
||||
|
||||
if args.verbose:
|
||||
self._set_log_level(args.verbose)
|
||||
|
||||
if args.metrics:
|
||||
self._init_metrics()
|
||||
start_http_server(args.metrics_port)
|
||||
|
||||
self._setup(args.config_path)
|
||||
self._validate_monitors()
|
||||
|
||||
self._loop()
|
||||
|
||||
|
||||
def main(args=None):
|
||||
def main():
|
||||
try:
|
||||
Minitor().run(args)
|
||||
Minitor().run()
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
@ -2,17 +2,16 @@ check_interval: 30
|
||||
|
||||
monitors:
|
||||
- name: My Website
|
||||
command: [ 'curl', '-s', '-o', '/dev/null', 'https://minitor.mon' ]
|
||||
alert_down: [ log, mailgun_down, sms_down ]
|
||||
alert_up: [ log, email_up ]
|
||||
check_interval: 30 # Must be at minimum the global `check_interval`
|
||||
command: [ 'curl', 'https://minitor.mon' ]
|
||||
alerts: [ log, email, sms ]
|
||||
check_interval: 30
|
||||
alert_after: 3
|
||||
alert_every: -1 # Defaults to -1 for exponential backoff. 0 to disable repeating
|
||||
alert_every: -1 # Defaults to -1 for exponential backoff
|
||||
|
||||
alerts:
|
||||
email_up:
|
||||
command: [ sendmail, "me@minitor.mon", "Recovered: {monitor_name}", "We're back!" ]
|
||||
mailgun_down:
|
||||
email:
|
||||
command: [ sendmail, "me@minitor.mon", "Failure: {monitor_name}", "This thing failed!" ]
|
||||
mailgun:
|
||||
command: >
|
||||
curl -s -X POST
|
||||
-F subject="Alert! {monitor_name} failed"
|
||||
@ -21,9 +20,15 @@ alerts:
|
||||
-F text="Our monitor failed"
|
||||
https://api.mailgun.net/v3/minitor.mon/messages
|
||||
-u "api:${MAILGUN_API_KEY}"
|
||||
sms_down:
|
||||
sms:
|
||||
command: >
|
||||
curl -s -X POST -F "Body=Failure! {monitor_name} has failed"
|
||||
curl -s -X POST -F "Body=Failure: {monitor_name} has failed"
|
||||
-F "From=${AVAILABLE_NUMBER}" -F "To=${MY_PHONE}"
|
||||
"https://api.twilio.com/2010-04-01/Accounts/${ACCOUNT_SID}/Messages"
|
||||
-u "${ACCOUNT_SID}:${AUTH_TOKEN}"
|
||||
|
||||
# federation:
|
||||
# - location: https://host1.com
|
||||
# client_key: keyfromhost1
|
||||
# server_key: keyhost1uses
|
||||
# alerts: [ sms ]
|
||||
|
@ -1,5 +0,0 @@
|
||||
# Minitor Scripts
|
||||
|
||||
A collection of some handy scripts to use with Minitor
|
||||
|
||||
These are not included with the Python package, but they are included in the Docker image in `/app/scripts`.
|
@ -1,51 +0,0 @@
|
||||
#! /bin/bash
|
||||
set -e
|
||||
|
||||
#################
|
||||
# docker_check.sh
|
||||
#
|
||||
# Checks the most recent state exit code of a Docker container
|
||||
#################
|
||||
|
||||
# Docker host will default to a socket
|
||||
# To override, export DOCKER_HOST to a new hostname
|
||||
DOCKER_HOST="${DOCKER_HOST:=socket}"
|
||||
container_name="$1"
|
||||
|
||||
# Curls Docker either using a socket or URL
|
||||
function curl_docker {
|
||||
local path="$1"
|
||||
if [ "$DOCKER_HOST" == "socket" ]; then
|
||||
curl --unix-socket /var/run/docker.sock "http://localhost/$path" 2>/dev/null
|
||||
else
|
||||
curl "http://${DOCKER_HOST}/$path" 2>/dev/null
|
||||
fi
|
||||
}
|
||||
|
||||
# Returns caintainer ID for a given container name
|
||||
function get_container_id {
|
||||
local container_name="$1"
|
||||
curl_docker 'containers/json?all=1' \
|
||||
| jq -r ".[] | {Id, Name: .Names[]} | select(.Name == \"/${container_name}\") | .Id"
|
||||
}
|
||||
|
||||
# Returns container JSON
|
||||
function inspect_container {
|
||||
local container_id=$1
|
||||
curl_docker "containers/$container_id/json"
|
||||
}
|
||||
|
||||
if [ -z "$container_name" ]; then
|
||||
echo "Usage: $0 container_name"
|
||||
echo "Will exit with the last status code of continer with provided name"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
container_id=$(get_container_id $container_name)
|
||||
if [ -z "$container_id" ]; then
|
||||
echo "ERROR: Could not find container with name: $container_name"
|
||||
exit 1
|
||||
fi
|
||||
exit_code=$(inspect_container "$container_id" | jq -r .State.ExitCode)
|
||||
|
||||
exit "$exit_code"
|
@ -1,61 +0,0 @@
|
||||
#! /bin/bash
|
||||
set -e
|
||||
|
||||
#################
|
||||
# docker_healthcheck.sh
|
||||
#
|
||||
# Returns the results of a Docker Healthcheck for a container
|
||||
#################
|
||||
|
||||
# Docker host will default to a socket
|
||||
# To override, export DOCKER_HOST to a new hostname
|
||||
DOCKER_HOST="${DOCKER_HOST:=socket}"
|
||||
container_name="$1"
|
||||
|
||||
# Curls Docker either using a socket or URL
|
||||
function curl_docker {
|
||||
local path="$1"
|
||||
if [ "$DOCKER_HOST" == "socket" ]; then
|
||||
curl --unix-socket /var/run/docker.sock "http://localhost/$path" 2>/dev/null
|
||||
else
|
||||
curl "http://${DOCKER_HOST}/$path" 2>/dev/null
|
||||
fi
|
||||
}
|
||||
|
||||
# Returns caintainer ID for a given container name
|
||||
function get_container_id {
|
||||
local container_name="$1"
|
||||
curl_docker 'containers/json?all=1' \
|
||||
| jq -r ".[] | {Id, Name: .Names[]} | select(.Name == \"/${container_name}\") | .Id"
|
||||
}
|
||||
|
||||
# Returns container JSON
|
||||
function inspect_container {
|
||||
local container_id="$1"
|
||||
curl_docker "containers/$container_id/json"
|
||||
}
|
||||
|
||||
if [ -z "$container_name" ]; then
|
||||
echo "Usage: $0 container_name"
|
||||
echo "Will return results of healthcheck for continer with provided name"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
container_id=$(get_container_id "$container_name")
|
||||
if [ -z "$container_id" ]; then
|
||||
echo "ERROR: Could not find container with name: $container_name"
|
||||
exit 1
|
||||
fi
|
||||
health=$(inspect_container "$container_id" | jq -r '.State.Health.Status')
|
||||
|
||||
case "$health" in
|
||||
null)
|
||||
echo "No healthcheck results"
|
||||
;;
|
||||
starting|healthy)
|
||||
echo "Status: '$health'"
|
||||
;;
|
||||
*)
|
||||
echo "Status: '$health'"
|
||||
exit 1
|
||||
esac
|
63
setup.py
63
setup.py
@ -7,49 +7,42 @@ from setuptools import setup
|
||||
here = path.abspath(path.dirname(__file__))
|
||||
|
||||
# Get the long description from the README file
|
||||
with open(path.join(here, "README.md"), encoding="utf-8") as f:
|
||||
with open(path.join(here, 'README.md'), encoding='utf-8') as f:
|
||||
long_description = f.read()
|
||||
|
||||
setup(
|
||||
name="minitor",
|
||||
version="1.0.3",
|
||||
description="A minimal monitoring tool",
|
||||
name='minitor',
|
||||
version='0.1.2',
|
||||
description='A minimal monitoring tool',
|
||||
long_description=long_description,
|
||||
long_description_content_type="text/markdown",
|
||||
url="https://git.iamthefij.com/iamthefij/minitor",
|
||||
download_url=("https://git.iamthefij.com/iamthefij/minitor/archive/master.tar.gz"),
|
||||
author="Ian Fijolek",
|
||||
author_email="ian@iamthefij.com",
|
||||
classifiers=[
|
||||
"Development Status :: 5 - Production/Stable",
|
||||
"Intended Audience :: Developers",
|
||||
"Intended Audience :: System Administrators",
|
||||
"Topic :: System :: Monitoring",
|
||||
"License :: OSI Approved :: Apache Software License",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Programming Language :: Python :: 3.6",
|
||||
"Programming Language :: Python :: 3.7",
|
||||
"Programming Language :: Python :: 3.8",
|
||||
"Programming Language :: Python :: 3.9",
|
||||
"Programming Language :: Python :: 3.10",
|
||||
],
|
||||
keywords="minitor monitoring alerting",
|
||||
packages=find_packages(
|
||||
exclude=[
|
||||
"contrib",
|
||||
"docs",
|
||||
"examples",
|
||||
"scripts",
|
||||
"tests",
|
||||
]
|
||||
url='https://git.iamthefij.com/iamthefij/minitor',
|
||||
download_url=(
|
||||
'https://git.iamthefij.com/iamthefij/minitor/archive/master.tar.gz'
|
||||
),
|
||||
author='Ian Fijolek',
|
||||
author_email='ian@iamthefij.com',
|
||||
classifiers=[
|
||||
# How mature is this project? Common values are
|
||||
# 4 - Beta
|
||||
# 5 - Production/Stable
|
||||
'Development Status :: 4 - Beta',
|
||||
'Intended Audience :: Developers',
|
||||
'Intended Audience :: System Administrators',
|
||||
'Topic :: System :: Monitoring',
|
||||
'License :: OSI Approved :: Apache Software License',
|
||||
'Programming Language :: Python :: 3',
|
||||
'Programming Language :: Python :: 3.4',
|
||||
'Programming Language :: Python :: 3.5',
|
||||
'Programming Language :: Python :: 3.6',
|
||||
],
|
||||
keywords='minitor monitoring alerting',
|
||||
packages=find_packages(exclude=['contrib', 'docs', 'tests']),
|
||||
install_requires=[
|
||||
"prometheus_client",
|
||||
"yamlenv",
|
||||
'yamlenv',
|
||||
],
|
||||
entry_points={
|
||||
"console_scripts": [
|
||||
"minitor=minitor.main:main",
|
||||
'console_scripts': [
|
||||
'minitor=minitor.main:main',
|
||||
],
|
||||
},
|
||||
)
|
||||
|
@ -1,55 +1,40 @@
|
||||
from datetime import datetime
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from minitor.main import Alert
|
||||
from minitor.main import Monitor
|
||||
from tests.util import assert_called_once_with
|
||||
|
||||
|
||||
class TestAlert(object):
|
||||
|
||||
@pytest.fixture
|
||||
def monitor(self):
|
||||
return Monitor(
|
||||
{
|
||||
"name": "Dummy Monitor",
|
||||
"command": ["echo", "foo"],
|
||||
}
|
||||
)
|
||||
return Monitor({
|
||||
'name': 'Dummy Monitor',
|
||||
'command': ['echo', 'foo'],
|
||||
})
|
||||
|
||||
@pytest.fixture
|
||||
def echo_alert(self):
|
||||
return Alert(
|
||||
"log",
|
||||
'log',
|
||||
{
|
||||
"command": [
|
||||
"echo",
|
||||
(
|
||||
"{monitor_name} has failed {failure_count} time(s)!\n"
|
||||
"We have alerted {alert_count} time(s)\n"
|
||||
"Last success was {last_success}\n"
|
||||
"Last output was: {last_output}"
|
||||
),
|
||||
'command': [
|
||||
'echo', (
|
||||
'{monitor_name} has failed {failure_count} time(s)!\n'
|
||||
'We have alerted {alert_count} time(s)'
|
||||
)
|
||||
]
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"last_success,expected_success",
|
||||
[(None, "Never"), (datetime(2018, 4, 10), "2018-04-10T00:00:00")],
|
||||
)
|
||||
def test_simple_alert(self, monitor, echo_alert, last_success, expected_success):
|
||||
monitor.alert_count = 1
|
||||
monitor.last_output = "beep boop"
|
||||
monitor.last_success = last_success
|
||||
def test_simple_alert(self, monitor, echo_alert):
|
||||
monitor.total_failure_count = 1
|
||||
with patch.object(echo_alert._logger, "error") as mock_error:
|
||||
echo_alert.alert("Exception message", monitor)
|
||||
assert_called_once_with(
|
||||
mock_error,
|
||||
"Dummy Monitor has failed 1 time(s)!\n"
|
||||
"We have alerted 1 time(s)\n"
|
||||
"Last success was " + expected_success + "\n"
|
||||
"Last output was: beep boop",
|
||||
)
|
||||
monitor.alert_count = 1
|
||||
with patch.object(echo_alert.logger, 'error') as mock_error:
|
||||
echo_alert.alert(monitor)
|
||||
mock_error.assert_called_once_with(
|
||||
'Dummy Monitor has failed 1 time(s)!\n'
|
||||
'We have alerted 1 time(s)'
|
||||
)
|
||||
|
@ -1,37 +1,14 @@
|
||||
import os
|
||||
from unittest.mock import patch
|
||||
|
||||
from minitor.main import call_output
|
||||
from minitor.main import Minitor
|
||||
|
||||
|
||||
class TestMinitor(object):
|
||||
|
||||
def test_call_output(self):
|
||||
# valid command should have result and no exception
|
||||
output, ex = call_output(["echo", "test"])
|
||||
assert output == b"test"
|
||||
output, ex = call_output(['echo', 'test'])
|
||||
assert output == b'test'
|
||||
assert ex is None
|
||||
|
||||
output, ex = call_output(["ls", "--not-a-real-flag"])
|
||||
assert output.startswith(b"ls: ")
|
||||
output, ex = call_output(['ls', '--not-a-real-flag'])
|
||||
assert output.startswith(b'ls: ')
|
||||
assert ex is not None
|
||||
|
||||
def test_run(self):
|
||||
"""Doesn't really check much, but a simple integration sanity test"""
|
||||
test_loop_count = 5
|
||||
os.environ.update(
|
||||
{
|
||||
"MAILGUN_API_KEY": "test-mg-key",
|
||||
"AVAILABLE_NUMBER": "555-555-5050",
|
||||
"MY_PHONE": "555-555-0505",
|
||||
"ACCOUNT_SID": "test-account-id",
|
||||
"AUTH_TOKEN": "test-account-token",
|
||||
}
|
||||
)
|
||||
args = "--config ./sample-config.yml".split(" ")
|
||||
minitor = Minitor()
|
||||
with patch.object(minitor, "_loop"):
|
||||
minitor.run(args)
|
||||
# Skip the loop, but run a single check
|
||||
for _ in range(test_loop_count):
|
||||
minitor._check()
|
||||
|
@ -1,54 +1,41 @@
|
||||
from datetime import datetime
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from minitor.main import InvalidMonitorException
|
||||
from minitor.main import MinitorAlert
|
||||
from minitor.main import Monitor
|
||||
from minitor.main import validate_monitor_settings
|
||||
from tests.util import assert_called_once
|
||||
|
||||
|
||||
class TestMonitor(object):
|
||||
|
||||
@pytest.fixture
|
||||
def monitor(self):
|
||||
return Monitor(
|
||||
{
|
||||
"name": "Sample Monitor",
|
||||
"command": ["echo", "foo"],
|
||||
"alert_down": ["log"],
|
||||
"alert_up": ["log"],
|
||||
"check_interval": 1,
|
||||
"alert_after": 1,
|
||||
"alert_every": 1,
|
||||
}
|
||||
)
|
||||
return Monitor({
|
||||
'name': 'Sample Monitor',
|
||||
'command': ['echo', 'foo'],
|
||||
})
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"settings",
|
||||
[
|
||||
{"alert_after": 0},
|
||||
{"alert_every": 0},
|
||||
{"check_interval": 0},
|
||||
{"alert_after": "invalid"},
|
||||
{"alert_every": "invalid"},
|
||||
{"check_interval": "invalid"},
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize('settings', [
|
||||
{'alert_after': 0},
|
||||
{'alert_every': 0},
|
||||
{'check_interval': 0},
|
||||
{'alert_after': 'invalid'},
|
||||
{'alert_every': 'invalid'},
|
||||
{'check_interval': 'invalid'},
|
||||
])
|
||||
def test_monitor_invalid_configuration(self, settings):
|
||||
with pytest.raises(InvalidMonitorException):
|
||||
validate_monitor_settings(settings)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"alert_after",
|
||||
'alert_after',
|
||||
[1, 20],
|
||||
ids=lambda arg: "alert_after({})".format(arg),
|
||||
ids=lambda arg: 'alert_after({})'.format(arg),
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"alert_every",
|
||||
'alert_every',
|
||||
[-1, 1, 2, 1440],
|
||||
ids=lambda arg: "alert_every({})".format(arg),
|
||||
ids=lambda arg: 'alert_every({})'.format(arg),
|
||||
)
|
||||
def test_monitor_alert_after(self, monitor, alert_after, alert_every):
|
||||
monitor.alert_after = alert_after
|
||||
@ -63,14 +50,14 @@ class TestMonitor(object):
|
||||
monitor.failure()
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"alert_after",
|
||||
'alert_after',
|
||||
[1, 20],
|
||||
ids=lambda arg: "alert_after({})".format(arg),
|
||||
ids=lambda arg: 'alert_after({})'.format(arg),
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"alert_every",
|
||||
'alert_every',
|
||||
[1, 2, 1440],
|
||||
ids=lambda arg: "alert_every({})".format(arg),
|
||||
ids=lambda arg: 'alert_every({})'.format(arg),
|
||||
)
|
||||
def test_monitor_alert_every(self, monitor, alert_after, alert_every):
|
||||
monitor.alert_after = alert_after
|
||||
@ -105,46 +92,3 @@ class TestMonitor(object):
|
||||
monitor.failure()
|
||||
else:
|
||||
monitor.failure()
|
||||
|
||||
@pytest.mark.parametrize("last_check", [None, datetime(2018, 4, 10)])
|
||||
def test_monitor_should_check(self, monitor, last_check):
|
||||
monitor.last_check = last_check
|
||||
assert monitor.should_check()
|
||||
|
||||
def test_monitor_check_fail(self, monitor):
|
||||
assert monitor.last_output is None
|
||||
with patch.object(monitor, "failure") as mock_failure:
|
||||
monitor.command = ["ls", "--not-real"]
|
||||
assert not monitor.check()
|
||||
assert_called_once(mock_failure)
|
||||
assert monitor.last_output is not None
|
||||
|
||||
def test_monitor_check_success(self, monitor):
|
||||
assert monitor.last_output is None
|
||||
with patch.object(monitor, "success") as mock_success:
|
||||
assert monitor.check()
|
||||
assert_called_once(mock_success)
|
||||
assert monitor.last_output is not None
|
||||
|
||||
@pytest.mark.parametrize("failure_count", [0, 1])
|
||||
def test_monitor_success(self, monitor, failure_count):
|
||||
monitor.alert_count = 0
|
||||
monitor.total_failure_count = failure_count
|
||||
assert monitor.last_success is None
|
||||
|
||||
monitor.success()
|
||||
|
||||
assert monitor.alert_count == 0
|
||||
assert monitor.last_success is not None
|
||||
assert monitor.total_failure_count == 0
|
||||
|
||||
def test_monitor_success_back_up(self, monitor):
|
||||
monitor.total_failure_count = 1
|
||||
monitor.alert_count = 1
|
||||
|
||||
with pytest.raises(MinitorAlert):
|
||||
monitor.success()
|
||||
|
||||
assert monitor.alert_count == 0
|
||||
assert monitor.last_success is not None
|
||||
assert monitor.total_failure_count == 0
|
||||
|
@ -1,12 +0,0 @@
|
||||
from unittest import mock
|
||||
|
||||
|
||||
def assert_called_once(mocked):
|
||||
"""Safe convenient methods for mock asserts"""
|
||||
assert mocked.call_count == 1
|
||||
|
||||
|
||||
def assert_called_once_with(mocked, *args, **kwargs):
|
||||
"""Safe convenient methods for mock asserts"""
|
||||
assert_called_once(mocked)
|
||||
assert mocked.call_args == mock.call(*args, **kwargs)
|
6
tox.ini
6
tox.ini
@ -1,5 +1,5 @@
|
||||
[tox]
|
||||
envlist = py3,pypy3
|
||||
envlist = py3
|
||||
|
||||
[testenv]
|
||||
deps =
|
||||
@ -7,12 +7,10 @@ deps =
|
||||
commands =
|
||||
coverage erase
|
||||
coverage run --source=minitor/ -m pytest --capture=no -vv {posargs:tests}
|
||||
coverage report -m --fail-under 70
|
||||
coverage report -m --fail-under 50
|
||||
pre-commit run --all-files
|
||||
|
||||
[testenv:pre-commit]
|
||||
deps =
|
||||
pre-commit
|
||||
commands =
|
||||
pre-commit {posargs}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user