Secondary initial commit with scripts from other repo
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
This commit is contained in:
parent
7cd688adfe
commit
e5e43b5021
62
.drone.yml
Normal file
62
.drone.yml
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
---
|
||||||
|
kind: pipeline
|
||||||
|
name: test
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: check
|
||||||
|
image: iamthefij/drone-pre-commit:personal
|
||||||
|
|
||||||
|
---
|
||||||
|
kind: pipeline
|
||||||
|
name: publish
|
||||||
|
|
||||||
|
depends_on:
|
||||||
|
- test
|
||||||
|
|
||||||
|
trigger:
|
||||||
|
event:
|
||||||
|
- push
|
||||||
|
- tag
|
||||||
|
refs:
|
||||||
|
- refs/heads/master
|
||||||
|
- refs/tags/v*
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: push images
|
||||||
|
image: thegeeklab/drone-docker-buildx
|
||||||
|
settings:
|
||||||
|
repo: iamthefij/nomad-service-fixers
|
||||||
|
auto_tag: true
|
||||||
|
platforms:
|
||||||
|
- linux/amd64
|
||||||
|
- linux/arm64
|
||||||
|
- linux/arm
|
||||||
|
username:
|
||||||
|
from_secret: docker_username
|
||||||
|
password:
|
||||||
|
from_secret: docker_password
|
||||||
|
|
||||||
|
---
|
||||||
|
kind: pipeline
|
||||||
|
name: notify
|
||||||
|
|
||||||
|
depends_on:
|
||||||
|
- test
|
||||||
|
- publish
|
||||||
|
|
||||||
|
trigger:
|
||||||
|
status:
|
||||||
|
- failure
|
||||||
|
|
||||||
|
steps:
|
||||||
|
|
||||||
|
- name: notify
|
||||||
|
image: drillster/drone-email
|
||||||
|
settings:
|
||||||
|
host:
|
||||||
|
from_secret: SMTP_HOST # pragma: whitelist secret
|
||||||
|
username:
|
||||||
|
from_secret: SMTP_USER # pragma: whitelist secret
|
||||||
|
password:
|
||||||
|
from_secret: SMTP_PASS # pragma: whitelist secret
|
||||||
|
from: drone@iamthefij.com
|
1
.gitignore
vendored
1
.gitignore
vendored
@ -159,4 +159,3 @@ cython_debug/
|
|||||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||||
#.idea/
|
#.idea/
|
||||||
|
|
||||||
|
11
.pre-commit-config.yaml
Normal file
11
.pre-commit-config.yaml
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
---
|
||||||
|
repos:
|
||||||
|
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||||
|
rev: v4.1.0
|
||||||
|
hooks:
|
||||||
|
- id: check-added-large-files
|
||||||
|
- id: requirements-txt-fixer
|
||||||
|
- id: trailing-whitespace
|
||||||
|
- id: end-of-file-fixer
|
||||||
|
- id: check-merge-conflict
|
||||||
|
- id: debug-statements
|
9
Dockerfile
Normal file
9
Dockerfile
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
FROM python:3-alpine
|
||||||
|
|
||||||
|
RUN mkdir /scripts
|
||||||
|
WORKDIR /scripts
|
||||||
|
|
||||||
|
COPY ./requirements.txt /scripts/
|
||||||
|
RUN pip install --no-cache-dir -r /scripts/requirements.txt
|
||||||
|
|
||||||
|
COPY ./nomad_missing_services.py ./nomad_orphan_services.py /scripts/
|
@ -1,3 +1,12 @@
|
|||||||
# nomad-service-fixers
|
# nomad-service-fixers
|
||||||
|
|
||||||
A few check and fixer scripts to clean up services in my running instances.
|
A few check and fixer scripts to clean up services in my running instances.
|
||||||
|
|
||||||
|
These make use of [requests-unixsocket](https://github.com/msabramo/requests-unixsocket) so that they can target the workload API from within a Nomad task.
|
||||||
|
|
||||||
|
Included scripts:
|
||||||
|
|
||||||
|
* `./nomad_missing_services.py`: Looks for running allocs who's services appear to have dissapeared.
|
||||||
|
* `./nomad_orphan_services.py`: Looks for services who's allocs appear to have dissapeared.
|
||||||
|
|
||||||
|
This is on DockerHub as `iamthefij/nomad-service-fixers`.
|
||||||
|
99
nomad_missing_services.py
Executable file
99
nomad_missing_services.py
Executable file
@ -0,0 +1,99 @@
|
|||||||
|
#! /usr/bin/env python3
|
||||||
|
from argparse import ArgumentParser
|
||||||
|
from os import environ
|
||||||
|
from typing import Any
|
||||||
|
from typing import cast
|
||||||
|
|
||||||
|
import requests_unixsocket
|
||||||
|
requests = requests_unixsocket.Session()
|
||||||
|
|
||||||
|
NOMAD_ADDR = environ.get("NOMAD_ADDR", "http://127.0.0.1:4646")
|
||||||
|
NOMAD_TOKEN = environ.get("NOMAD_TOKEN")
|
||||||
|
|
||||||
|
|
||||||
|
def nomad_req(
|
||||||
|
*path: str, params: dict[str, Any] | None = None, method="GET"
|
||||||
|
) -> list[dict[str, Any]] | dict[str, Any] | str:
|
||||||
|
headers = {}
|
||||||
|
if NOMAD_TOKEN:
|
||||||
|
headers["X-Nomad-Token"] = NOMAD_TOKEN
|
||||||
|
|
||||||
|
response = requests.request(
|
||||||
|
method,
|
||||||
|
f"{NOMAD_ADDR}/v1/{'/'.join(path)}",
|
||||||
|
params=params,
|
||||||
|
headers=headers,
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
try:
|
||||||
|
return response.json()
|
||||||
|
except requests.exceptions.JSONDecodeError:
|
||||||
|
return response.text
|
||||||
|
|
||||||
|
|
||||||
|
def extract_job_services(job: dict[str, Any]) -> dict[str, str]:
|
||||||
|
services: dict[str, str] = dict()
|
||||||
|
for group in job["TaskGroups"]:
|
||||||
|
for service in group.get("Services") or []:
|
||||||
|
services[service["Name"]] = group["Name"]
|
||||||
|
for task in group["Tasks"]:
|
||||||
|
for service in task.get("Services") or []:
|
||||||
|
services[service["Name"]] = group["Name"]
|
||||||
|
|
||||||
|
return services
|
||||||
|
|
||||||
|
exit_code = 0
|
||||||
|
parser = ArgumentParser(
|
||||||
|
description="Checks for missing services and optionally restarts their allocs.",
|
||||||
|
)
|
||||||
|
parser.add_argument("-r", "--restart", action="store_true", help="Restart allocs for missing services")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
for job in nomad_req("jobs"):
|
||||||
|
job = cast(dict[str, Any], job)
|
||||||
|
|
||||||
|
if job["Type"] in ("batch", "sysbatch"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if job["Status"] != "running":
|
||||||
|
print(f"WARNING: job {job['Name']} is {job['Status']}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
job_detail = nomad_req("job", job["ID"])
|
||||||
|
job_detail = cast(dict[str, Any], job_detail)
|
||||||
|
|
||||||
|
expected_services = extract_job_services(job_detail)
|
||||||
|
|
||||||
|
found_services: set[str] = set()
|
||||||
|
for service in nomad_req("job", job_detail["ID"], "services"):
|
||||||
|
service = cast(dict[str, Any], service)
|
||||||
|
found_services.add(service["ServiceName"])
|
||||||
|
|
||||||
|
missing_services = set(expected_services) - found_services
|
||||||
|
restart_groups: set[str] = set()
|
||||||
|
for missing_service in missing_services:
|
||||||
|
print(f"ERROR: Missing service {missing_service} for job {job_detail['Name']}")
|
||||||
|
# print(job)
|
||||||
|
exit_code = 1
|
||||||
|
|
||||||
|
# Add group associated with missing service to set
|
||||||
|
restart_groups.add(expected_services[missing_service])
|
||||||
|
|
||||||
|
if not restart_groups or not args.restart:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Get allocts for groups that are missing services
|
||||||
|
restart_allocs: set[str] = set()
|
||||||
|
for allocation in nomad_req("job", job_detail["ID"], "allocations"):
|
||||||
|
allocation = cast(dict[str, Any], allocation)
|
||||||
|
if allocation["ClientStatus"] == "running" and allocation["TaskGroup"] in restart_groups:
|
||||||
|
restart_allocs.add(allocation["ID"])
|
||||||
|
|
||||||
|
# Restart allocs associated with missing services
|
||||||
|
for allocation in restart_allocs:
|
||||||
|
print(f"INFO: Restarting allocation {allocation}")
|
||||||
|
nomad_req("client", "allocation", allocation, "restart")
|
||||||
|
|
||||||
|
|
||||||
|
exit(exit_code)
|
73
nomad_orphan_services.py
Executable file
73
nomad_orphan_services.py
Executable file
@ -0,0 +1,73 @@
|
|||||||
|
#! /usr/bin/env python3
|
||||||
|
from argparse import ArgumentParser
|
||||||
|
from os import environ
|
||||||
|
from typing import Any
|
||||||
|
from typing import cast
|
||||||
|
|
||||||
|
import requests_unixsocket
|
||||||
|
requests = requests_unixsocket.Session()
|
||||||
|
|
||||||
|
|
||||||
|
NOMAD_ADDR = environ.get("NOMAD_ADDR", "http://127.0.0.1:4646")
|
||||||
|
NOMAD_TOKEN = environ.get("NOMAD_TOKEN")
|
||||||
|
|
||||||
|
|
||||||
|
def nomad_req(
|
||||||
|
*path: str, params: dict[str, Any] | None = None, method="GET"
|
||||||
|
) -> list[dict[str, Any]] | dict[str, Any] | str:
|
||||||
|
headers = {}
|
||||||
|
if NOMAD_TOKEN:
|
||||||
|
headers["X-Nomad-Token"] = NOMAD_TOKEN
|
||||||
|
|
||||||
|
response = requests.request(
|
||||||
|
method,
|
||||||
|
f"{NOMAD_ADDR}/v1/{'/'.join(path)}",
|
||||||
|
params=params,
|
||||||
|
headers=headers,
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
try:
|
||||||
|
return response.json()
|
||||||
|
except requests.exceptions.JSONDecodeError:
|
||||||
|
return response.text
|
||||||
|
|
||||||
|
|
||||||
|
exit_code = 0
|
||||||
|
parser = ArgumentParser(
|
||||||
|
description="Checks for orphaned services and optionally deletes them.",
|
||||||
|
)
|
||||||
|
parser.add_argument("-d", "--delete", action="store_true", help="Delete orphan services")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
for namespace in nomad_req("services"):
|
||||||
|
namespace = cast(dict[str, Any], namespace)
|
||||||
|
for service in namespace["Services"]:
|
||||||
|
service_name = service["ServiceName"]
|
||||||
|
for service_instance in nomad_req("service", service_name):
|
||||||
|
service_instance = cast(dict[str, Any], service_instance)
|
||||||
|
service_id = service_instance["ID"]
|
||||||
|
alloc_id = service_instance["AllocID"]
|
||||||
|
|
||||||
|
alloc_found = True
|
||||||
|
|
||||||
|
try:
|
||||||
|
alloc = nomad_req("allocation", alloc_id)
|
||||||
|
continue
|
||||||
|
except requests.exceptions.HTTPError as e:
|
||||||
|
if e.response.status_code == 404:
|
||||||
|
alloc_found = False
|
||||||
|
message = f"alloc {alloc_id} not found for {service_name}."
|
||||||
|
if args.delete:
|
||||||
|
message += f" Deleting {service_id}"
|
||||||
|
|
||||||
|
print(message)
|
||||||
|
else:
|
||||||
|
raise e
|
||||||
|
|
||||||
|
if not alloc_found and args.delete:
|
||||||
|
nomad_req("service", service_name, service_id, method="DELETE")
|
||||||
|
|
||||||
|
|
||||||
|
exit(exit_code)
|
2
requirements.txt
Normal file
2
requirements.txt
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
requests==2.31.0
|
||||||
|
requests-unixsocket==0.3.0
|
Loading…
Reference in New Issue
Block a user