Update missing services script to restart allocs

This commit is contained in:
IamTheFij 2023-09-27 21:30:48 -07:00
parent ad439d48f3
commit 5fe30d005b
2 changed files with 32 additions and 7 deletions

View File

@ -1,4 +1,5 @@
#! /usr/bin/env python3 #! /usr/bin/env python3
from argparse import ArgumentParser
from os import environ from os import environ
from typing import Any from typing import Any
from typing import cast from typing import cast
@ -31,18 +32,23 @@ def nomad_req(
return response.text return response.text
def extract_job_services(job: dict[str, Any]) -> set[str]: def extract_job_services(job: dict[str, Any]) -> dict[str, str]:
services: set[str] = set() services: dict[str, str] = dict()
for group in job["TaskGroups"]: for group in job["TaskGroups"]:
for service in group.get("Services") or []: for service in group.get("Services") or []:
services.add(service["Name"]) services[service["Name"]] = group["Name"]
for task in group["Tasks"]: for task in group["Tasks"]:
for service in task.get("Services") or []: for service in task.get("Services") or []:
services.add(service["Name"]) services[service["Name"]] = group["Name"]
return services return services
exit_code = 0 exit_code = 0
parser = ArgumentParser(
description="Checks for missing services and optionally restarts their allocs.",
)
parser.add_argument("-r", "--restart", action="store_true", help="Restart allocs for missing services")
args = parser.parse_args()
for job in nomad_req("jobs"): for job in nomad_req("jobs"):
job = cast(dict[str, Any], job) job = cast(dict[str, Any], job)
@ -60,11 +66,30 @@ for job in nomad_req("jobs"):
service = cast(dict[str, Any], service) service = cast(dict[str, Any], service)
found_services.add(service["ServiceName"]) found_services.add(service["ServiceName"])
missing_services = expected_services - found_services missing_services = set(expected_services) - found_services
restart_groups: set[str] = set()
for missing_service in missing_services: for missing_service in missing_services:
print(f"ERROR: Missing service {missing_service} for job {job_detail['Name']}") print(f"ERROR: Missing service {missing_service} for job {job_detail['Name']}")
print(job) # print(job)
exit_code = 1 exit_code = 1
# Add group associated with missing service to set
restart_groups.add(expected_services[missing_service])
if not restart_groups or not args.restart:
continue
# Get allocts for groups that are missing services
restart_allocs: set[str] = set()
for allocation in nomad_req("job", job_detail["ID"], "allocations"):
allocation = cast(dict[str, Any], allocation)
if allocation["TaskGroup"] in restart_groups:
restart_allocs.add(allocation["ID"])
# Restart allocs associated with missing services
for allocation in restart_allocs:
print(f"INFO: Restarting allocation {allocation}")
nomad_req("client", "allocation", allocation, "restart")
exit(exit_code) exit(exit_code)

View File

@ -34,7 +34,7 @@ def nomad_req(
exit_code = 0 exit_code = 0
parser = ArgumentParser( parser = ArgumentParser(
description="Checks for orphaned services and optional deletes them.", description="Checks for orphaned services and optionally deletes them.",
) )
parser.add_argument("-d", "--delete", action="store_true", help="Delete orphan services") parser.add_argument("-d", "--delete", action="store_true", help="Delete orphan services")
args = parser.parse_args() args = parser.parse_args()