Update missing services script to restart allocs
This commit is contained in:
parent
ad439d48f3
commit
5fe30d005b
@ -1,4 +1,5 @@
|
||||
#! /usr/bin/env python3
|
||||
from argparse import ArgumentParser
|
||||
from os import environ
|
||||
from typing import Any
|
||||
from typing import cast
|
||||
@ -31,18 +32,23 @@ def nomad_req(
|
||||
return response.text
|
||||
|
||||
|
||||
def extract_job_services(job: dict[str, Any]) -> set[str]:
|
||||
services: set[str] = set()
|
||||
def extract_job_services(job: dict[str, Any]) -> dict[str, str]:
|
||||
services: dict[str, str] = dict()
|
||||
for group in job["TaskGroups"]:
|
||||
for service in group.get("Services") or []:
|
||||
services.add(service["Name"])
|
||||
services[service["Name"]] = group["Name"]
|
||||
for task in group["Tasks"]:
|
||||
for service in task.get("Services") or []:
|
||||
services.add(service["Name"])
|
||||
services[service["Name"]] = group["Name"]
|
||||
|
||||
return services
|
||||
|
||||
exit_code = 0
|
||||
parser = ArgumentParser(
|
||||
description="Checks for missing services and optionally restarts their allocs.",
|
||||
)
|
||||
parser.add_argument("-r", "--restart", action="store_true", help="Restart allocs for missing services")
|
||||
args = parser.parse_args()
|
||||
|
||||
for job in nomad_req("jobs"):
|
||||
job = cast(dict[str, Any], job)
|
||||
@ -60,11 +66,30 @@ for job in nomad_req("jobs"):
|
||||
service = cast(dict[str, Any], service)
|
||||
found_services.add(service["ServiceName"])
|
||||
|
||||
missing_services = expected_services - found_services
|
||||
missing_services = set(expected_services) - found_services
|
||||
restart_groups: set[str] = set()
|
||||
for missing_service in missing_services:
|
||||
print(f"ERROR: Missing service {missing_service} for job {job_detail['Name']}")
|
||||
print(job)
|
||||
# print(job)
|
||||
exit_code = 1
|
||||
|
||||
# Add group associated with missing service to set
|
||||
restart_groups.add(expected_services[missing_service])
|
||||
|
||||
if not restart_groups or not args.restart:
|
||||
continue
|
||||
|
||||
# Get allocts for groups that are missing services
|
||||
restart_allocs: set[str] = set()
|
||||
for allocation in nomad_req("job", job_detail["ID"], "allocations"):
|
||||
allocation = cast(dict[str, Any], allocation)
|
||||
if allocation["TaskGroup"] in restart_groups:
|
||||
restart_allocs.add(allocation["ID"])
|
||||
|
||||
# Restart allocs associated with missing services
|
||||
for allocation in restart_allocs:
|
||||
print(f"INFO: Restarting allocation {allocation}")
|
||||
nomad_req("client", "allocation", allocation, "restart")
|
||||
|
||||
|
||||
exit(exit_code)
|
||||
|
@ -34,7 +34,7 @@ def nomad_req(
|
||||
|
||||
exit_code = 0
|
||||
parser = ArgumentParser(
|
||||
description="Checks for orphaned services and optional deletes them.",
|
||||
description="Checks for orphaned services and optionally deletes them.",
|
||||
)
|
||||
parser.add_argument("-d", "--delete", action="store_true", help="Delete orphan services")
|
||||
args = parser.parse_args()
|
||||
|
Loading…
Reference in New Issue
Block a user