Add new backup and orphaned service scripts
This commit is contained in:
parent
b13e31d9f8
commit
08f92f8ba5
128
scripts/nomad_backup_job.py
Executable file
128
scripts/nomad_backup_job.py
Executable file
@ -0,0 +1,128 @@
|
||||
#! /usr/bin/env python3
|
||||
from os import environ
|
||||
from time import sleep
|
||||
from typing import Any
|
||||
from typing import cast
|
||||
from argparse import ArgumentParser
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
NOMAD_ADDR = environ.get("NOMAD_ADDR", "http://127.0.0.1:4646")
|
||||
NOMAD_TOKEN = environ.get("NOMAD_TOKEN")
|
||||
|
||||
|
||||
def nomad_req(
|
||||
*path: str,
|
||||
params: dict[str, Any] | None = None,
|
||||
data: dict[str, Any] | None = None,
|
||||
method="GET",
|
||||
) -> list[dict[str, Any]] | dict[str, Any] | str:
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
if NOMAD_TOKEN:
|
||||
headers["X-Nomad-Token"] = NOMAD_TOKEN
|
||||
|
||||
response = requests.request(
|
||||
method,
|
||||
f"{NOMAD_ADDR}/v1/{'/'.join(path)}",
|
||||
params=params,
|
||||
json=data,
|
||||
headers=headers,
|
||||
)
|
||||
try:
|
||||
response.raise_for_status()
|
||||
except requests.exceptions.RequestException as ex:
|
||||
print(response.text)
|
||||
raise ex
|
||||
|
||||
try:
|
||||
return response.json()
|
||||
except requests.exceptions.JSONDecodeError:
|
||||
return response.text
|
||||
|
||||
|
||||
def wait_for_job_alloc_status(job_id: str, status: str):
|
||||
allocs = nomad_req("job", job_id, "allocations")
|
||||
allocs = cast(list[dict[str, Any]], allocs)
|
||||
|
||||
while not all(alloc["ClientStatus"] == status for alloc in allocs):
|
||||
print(f"Waiting for all allocs to reach {status}...")
|
||||
sleep(5)
|
||||
allocs = nomad_req("job", job_id, "allocations")
|
||||
allocs = cast(list[dict[str, Any]], allocs)
|
||||
|
||||
|
||||
def wait_for_eval_status(eval_id: str, status: str):
|
||||
eval = nomad_req("evaluation", eval_id)
|
||||
eval = cast(dict[str, Any], eval)
|
||||
|
||||
while eval["Status"] != status:
|
||||
print(f"Waiting for eval to reach {status}...")
|
||||
sleep(5)
|
||||
eval = nomad_req("evaluation", eval_id)
|
||||
eval = cast(dict[str, Any], eval)
|
||||
|
||||
|
||||
parser = ArgumentParser(
|
||||
description="Execute one off backups and restores of services",
|
||||
)
|
||||
parser.add_argument("service_name", help="Name of the service to backup or restore")
|
||||
parser.add_argument("-a", "--action", default="backup", choices=("backup", "restore"), help="Action to take, backup or restore")
|
||||
parser.add_argument("-s", "--snapshot", default="latest", help="Backup snapshot to restore, if restore is the chosen action")
|
||||
parser.add_argument("-x", "--extra-safe", action="store_true", help="Perform extra safe backup or restore by stoping target job first")
|
||||
args = parser.parse_args()
|
||||
|
||||
service_name = args.service_name
|
||||
service_info = nomad_req("service", service_name, params={"choose": "1|backups"})
|
||||
|
||||
if not service_info:
|
||||
print(f"Could not find service {service_name}")
|
||||
exit(1)
|
||||
|
||||
service_info = cast(list[dict[str, Any]], service_info)
|
||||
node_id = service_info[0]["NodeID"]
|
||||
job_id = service_info[0]["JobID"]
|
||||
|
||||
node = nomad_req("node", node_id)
|
||||
node = cast(dict[str, Any], node)
|
||||
node_name = node["Name"]
|
||||
backup_job_name = f"backup-oneoff-{node_name}"
|
||||
|
||||
backup_job = nomad_req("job", backup_job_name)
|
||||
if not backup_job:
|
||||
print(f"Could not find backup job {backup_job_name} for {service_name}")
|
||||
|
||||
if args.extra_safe:
|
||||
print("Stopping job allocs")
|
||||
stop_job = nomad_req("job", job_id, method="DELETE")
|
||||
print(stop_job)
|
||||
wait_for_job_alloc_status(job_id, "complete")
|
||||
|
||||
backup_job = cast(dict[str, Any], backup_job)
|
||||
backup_job_id = backup_job["ID"]
|
||||
|
||||
dispatch = nomad_req(
|
||||
"job",
|
||||
backup_job_id,
|
||||
"dispatch",
|
||||
data={
|
||||
"Payload": None,
|
||||
"Meta": {
|
||||
"job_name": service_name,
|
||||
"task": args.action,
|
||||
"snapshot": args.snapshot,
|
||||
},
|
||||
},
|
||||
method="POST",
|
||||
)
|
||||
dispatch = cast(dict[str, Any], dispatch)
|
||||
print(dispatch)
|
||||
|
||||
if args.extra_safe:
|
||||
print(f"Wait for {args.action} to finish")
|
||||
wait_for_eval_status(dispatch["EvalID"], "complete")
|
||||
|
||||
print("Backup complete. Verify success and restart job")
|
||||
# If auto restarting, get versions and "revert" to version n-1 since n will be the recently stopped version
|
52
scripts/nomad_orphan_services.py
Executable file
52
scripts/nomad_orphan_services.py
Executable file
@ -0,0 +1,52 @@
|
||||
#! /usr/bin/env python3
|
||||
from os import environ
|
||||
from typing import Any
|
||||
from typing import cast
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
NOMAD_ADDR = environ.get("NOMAD_ADDR", "http://127.0.0.1:4646")
|
||||
NOMAD_TOKEN = environ.get("NOMAD_TOKEN")
|
||||
|
||||
|
||||
def nomad_req(
|
||||
*path: str, params: dict[str, Any] | None = None, method="GET"
|
||||
) -> list[dict[str, Any]] | dict[str, Any] | str:
|
||||
headers = {}
|
||||
if NOMAD_TOKEN:
|
||||
headers["X-Nomad-Token"] = NOMAD_TOKEN
|
||||
|
||||
response = requests.request(
|
||||
method,
|
||||
f"{NOMAD_ADDR}/v1/{'/'.join(path)}",
|
||||
params=params,
|
||||
headers=headers,
|
||||
)
|
||||
response.raise_for_status()
|
||||
|
||||
try:
|
||||
return response.json()
|
||||
except requests.exceptions.JSONDecodeError:
|
||||
return response.text
|
||||
|
||||
|
||||
for namespace in nomad_req("services"):
|
||||
namespace = cast(dict[str, Any], namespace)
|
||||
for service in namespace["Services"]:
|
||||
service_name = service["ServiceName"]
|
||||
for service_instance in nomad_req("service", service_name):
|
||||
service_instance = cast(dict[str, Any], service_instance)
|
||||
service_id = service_instance["ID"]
|
||||
alloc_id = service_instance["AllocID"]
|
||||
|
||||
try:
|
||||
alloc = nomad_req("allocation", alloc_id)
|
||||
continue
|
||||
except requests.exceptions.HTTPError as e:
|
||||
if e.response.status_code == 404:
|
||||
print(
|
||||
f"alloc {alloc_id} not found for {service_name}. Deleting {service_id}"
|
||||
)
|
||||
nomad_req("service", service_name, service_id, method="DELETE")
|
||||
raise e
|
Loading…
Reference in New Issue
Block a user