homelab-nomad/backups/backup.nomad
Ian Fijolek 0d208b7394 Add dummy backup job to keep backup task running on all hosts
Otherwise, if a client is not running any stateful services, the task
will fail and Nomad will eventually stop retrying. If a service gets
relocated to the host, the task is not restarted. This makes sure the
task will cover moved services and make it more easy to determine that
backups are healthy.
2023-11-16 12:19:19 -08:00

235 lines
5.0 KiB
HCL

job "backup%{ if batch_node != null }-oneoff-${batch_node}%{ endif }" {
datacenters = ["dc1"]
priority = 90
%{ if batch_node == null ~}
type = "system"
%{ else ~}
type = "batch"
parameterized {
meta_required = ["job_name"]
meta_optional = ["task", "snapshot"]
}
meta {
task = "backup"
snapshot = "latest"
}
%{ endif ~}
%{ if batch_node != null ~}
constraint {
attribute = "$${node.unique.name}"
value = "${batch_node}"
}
%{ endif ~}
group "backup" {
network {
mode = "bridge"
port "metrics" {
%{~ if use_wesher ~}
host_network = "wesher"
%{~ endif ~}
to = 8080
}
}
volume "all-volumes" {
type = "host"
read_only = false
source = "all-volumes"
}
ephemeral_disk {
# Try to keep restic cache intact
sticky = true
}
service {
name = "backup"
provider = "nomad"
port = "metrics"
tags = [
"prometheus.scrape"
]
}
task "backup" {
driver = "docker"
volume_mount {
volume = "all-volumes"
destination = "/data"
read_only = false
}
config {
image = "iamthefij/resticscheduler:0.3.1"
ports = ["metrics"]
args = [
%{ if batch_node != null ~}
"-once",
"-$${NOMAD_META_task}",
"$${NOMAD_META_job_name}",
"--snapshot",
"$${NOMAD_META_snapshot}",
"--push-gateway",
"http://pushgateway.nomad:9091",
%{ endif ~}
"$${NOMAD_TASK_DIR}/node-jobs.hcl",
]
}
env = {
RCLONE_CHECKERS = "2"
RCLONE_TRANSFERS = "2"
RCLONE_FTP_CONCURRENCY = "5"
RESTIC_CACHE_DIR = "$${NOMAD_ALLOC_DIR}/data"
TZ = "America/Los_Angeles"
}
template {
data = <<EOF
MYSQL_HOST=127.0.0.1
MYSQL_PORT=3306
{{ with nomadVar "secrets/mysql" }}
MYSQL_USER=root
MYSQL_PASSWORD={{ .mysql_root_password }}
{{ end -}}
{{ with nomadVar "secrets/postgres" }}
POSTGRES_HOST=127.0.0.1
POSTGRES_PORT=5432
POSTGRES_USER={{ .superuser }}
POSTGRES_PASSWORD={{ .superuser_password }}
{{ end -}}
{{ with nomadVar (print "nomad/jobs/" (index (env "NOMAD_JOB_ID" | split "/") 0)) -}}
BACKUP_PASSPHRASE={{ .backup_passphrase }}
RCLONE_FTP_HOST={{ .nas_ftp_host }}
RCLONE_FTP_USER={{ .nas_ftp_user }}
RCLONE_FTP_PASS={{ .nas_ftp_pass.Value | toJSON }}
RCLONE_FTP_EXPLICIT_TLS=true
RCLONE_FTP_NO_CHECK_CERTIFICATE=true
AWS_ACCESS_KEY_ID={{ .nas_minio_access_key_id }}
AWS_SECRET_ACCESS_KEY={{ .nas_minio_secret_access_key }}
{{ end -}}
EOF
destination = "secrets/db.env"
env = true
}
template {
# Build jobs based on node
data = <<EOF
# Current node is {{ env "node.unique.name" }} {{ env "node.unique.id" }}
%{ for job_file in fileset(module_path, "jobs/*.hcl") ~}
{{ range nomadService 1 "backups" "${trimsuffix(basename(job_file), ".hcl")}" -}}
# ${trimsuffix(basename(job_file), ".hcl")} .Node {{ .Node }}
{{ if eq .Node (env "node.unique.id") -}}
${file("${module_path}/${job_file}")}
{{ end -}}
{{ end -}}
%{ endfor ~}
# Dummy job to keep task healthy on node without any stateful services
job "Dummy" {
schedule = "0 0 1 1 0"
config {
repo = "/local/dummy-repo"
passphrase = env("BACKUP_PASSPHRASE")
}
backup {
paths = ["/local/node-jobs.hcl"]
}
forget {
KeepLast = 1
}
}
EOF
destination = "local/node-jobs.hcl"
}
resources {
cpu = 50
memory = 500
}
}
task "stunnel" {
driver = "docker"
lifecycle {
hook = "prestart"
sidecar = true
}
config {
image = "alpine:3.17"
args = ["/bin/sh", "$${NOMAD_TASK_DIR}/start.sh"]
}
resources {
cpu = 100
memory = 100
}
template {
data = <<EOF
set -e
apk add stunnel
exec stunnel {{ env "NOMAD_TASK_DIR" }}/stunnel.conf
EOF
destination = "$${NOMAD_TASK_DIR}/start.sh"
}
template {
data = <<EOF
syslog = no
foreground = yes
delay = yes
[mysql_client]
client = yes
accept = 127.0.0.1:3306
{{ range nomadService 1 (env "NOMAD_ALLOC_ID") "mysql-tls" }}
connect = {{ .Address }}:{{ .Port }}
{{ end }}
PSKsecrets = {{ env "NOMAD_SECRETS_DIR" }}/mysql_stunnel_psk.txt
[postgres_client]
client = yes
accept = 127.0.0.1:5432
{{ range nomadService 1 (env "NOMAD_ALLOC_ID") "postgres-tls" }}
connect = {{ .Address }}:{{ .Port }}
{{ end }}
PSKsecrets = {{ env "NOMAD_SECRETS_DIR" }}/postgres_stunnel_psk.txt
EOF
destination = "$${NOMAD_TASK_DIR}/stunnel.conf"
}
template {
data = <<EOF
{{- with nomadVar "secrets/mysql/allowed_psks/backups" }}{{ .psk }}{{ end -}}
EOF
destination = "$${NOMAD_SECRETS_DIR}/mysql_stunnel_psk.txt"
}
template {
data = <<EOF
{{- with nomadVar "secrets/postgres/allowed_psks/backups" }}{{ .psk }}{{ end -}}
EOF
destination = "$${NOMAD_SECRETS_DIR}/postgres_stunnel_psk.txt"
}
}
}
}