homelab-nomad/ansible_playbooks/setup-cluster.yml
2024-12-17 16:36:49 -08:00

387 lines
10 KiB
YAML

---
- name: Update DNS for bootstrapping with non-Nomad host
hosts: nomad_instances
become: true
gather_facts: false
vars:
non_nomad_dns: 192.168.2.170
tasks:
- name: Add non-nomad bootstrap DNS
lineinfile:
dest: /etc/resolv.conf
create: true
line: "nameserver {{ non_nomad_dns }}"
- name: Install Docker
hosts: nomad_clients
become: true
vars:
docker_architecture_map:
x86_64: amd64
armv7l: armhf
aarch64: arm64
docker_apt_arch: "{{ docker_architecture_map[ansible_architecture] }}"
docker_compose_arch: "{{ (ansible_architecture == 'armv7l') | ternary('armv7', ansible_architecture) }}"
roles:
- geerlingguy.docker
tasks:
- name: Remove snapd
package:
name: snapd
state: absent
# Not on Ubuntu 20.04
# - name: Install Podman
# hosts: nomad_instances
# become: true
#
# tasks:
# - name: Install Podman
# package:
# name: podman
# state: present
- name: Create NFS mounts
hosts: nomad_clients
become: true
vars:
shared_nfs_mounts:
- src: 192.168.2.10:/Media
path: /srv/volumes/media-read
opts: proto=tcp,port=2049,ro
- src: 192.168.2.10:/Media
path: /srv/volumes/media-write
opts: proto=tcp,port=2049,rw
- src: 192.168.2.10:/Overflow
path: /srv/volumes/nas-overflow
opts: proto=tcp,port=2049,rw
- src: 192.168.2.10:/Photos
path: /srv/volumes/photos
opts: proto=tcp,port=2049,rw
- src: 192.168.2.10:/Container
path: /srv/volumes/nas-container
opts: proto=tcp,port=2049,rw
tasks:
- name: Install nfs
package:
name: nfs-common
state: present
- name: Mount NFS volumes
ansible.posix.mount:
src: "{{ item.src }}"
path: "{{ item.path }}"
opts: "{{ item.opts }}"
state: mounted
fstype: nfs4
loop: "{{ shared_nfs_mounts + (nfs_mounts | default([])) }}"
- import_playbook: wesher.yml
- name: Build Nomad cluster
hosts: nomad_instances
any_errors_fatal: true
become: true
vars:
shared_host_volumes:
- name: media-read
path: /srv/volumes/media-write
read_only: true
- name: media-overflow-read
path: /srv/volumes/nas-overflow/Media
read_only: true
- name: media-write
path: /srv/volumes/media-write
owner: "root"
group: "root"
mode: "0755"
read_only: false
- name: media-overflow-write
path: /srv/volumes/nas-overflow/Media
owner: "root"
group: "root"
mode: "0755"
read_only: false
- name: media-downloads
path: /srv/volumes/media-write/Downloads
read_only: false
- name: sabnzbd-config
path: /srv/volumes/media-write/Downloads/sabnzbd
read_only: false
- name: photoprism-media
path: /srv/volumes/photos/Photoprism
read_only: false
- name: photoprism-storage
path: /srv/volumes/nas-container/photoprism
read_only: false
- name: nzbget-config
path: /srv/volumes/nas-container/nzbget
read_only: false
- name: sonarr-config
path: /srv/volumes/nas-container/sonarr
read_only: false
- name: lidarr-config
path: /srv/volumes/nas-container/lidarr
read_only: false
- name: radarr-config
path: /srv/volumes/nas-container/radarr
read_only: false
- name: jellyfin-config
path: /srv/volumes/nas-container/jellyfin
read_only: false
- name: bazarr-config
path: /srv/volumes/nas-container/bazarr
read_only: false
- name: gitea-data
path: /srv/volumes/nas-container/gitea
read_only: false
- name: ytdl-web
path: /srv/volumes/nas-container/ytdl-web
read_only: false
- name: christmas-community
path: /srv/volumes/nas-container/christmas-community
read_only: false
- name: all-volumes
path: /srv/volumes
owner: "root"
group: "root"
mode: "0755"
read_only: false
roles:
- name: ansible-nomad
vars:
nomad_version: "1.9.3-1"
nomad_install_upgrade: true
nomad_allow_purge_config: true
nomad_node_role: "{% if 'nomad_clients' in group_names %}{% if 'nomad_servers' in group_names %}both{% else %}client{% endif %}{% else %}server{% endif %}"
# Where nomad gets installed to
nomad_bin_dir: /usr/bin
nomad_install_from_repo: true
nomad_bootstrap_expect: "{{ [(play_hosts | length), 3] | min }}"
nomad_raft_protocol: 3
nomad_autopilot: true
nomad_encrypt_enable: true
# nomad_use_consul: true
# Metrics
nomad_telemetry: true
nomad_telemetry_prometheus_metrics: true
nomad_telemetry_publish_allocation_metrics: true
nomad_telemetry_publish_node_metrics: true
# Enable container plugins
nomad_cni_enable: true
nomad_cni_version: 1.0.1
nomad_docker_enable: true
nomad_docker_dmsetup: false
# nomad_podman_enable: true
# Merge shared host volumes with node volumes
nomad_host_volumes: "{{ shared_host_volumes + (nomad_unique_host_volumes | default([])) }}"
# Customize docker plugin
nomad_plugins:
docker:
config:
allow_privileged: true
gc:
image_delay: "24h"
volumes:
enabled: true
selinuxlabel: "z"
# Send logs to journald so we can scrape them for Loki
# logging:
# type: journald
extra_labels:
- "job_name"
- "job_id"
- "task_group_name"
- "task_name"
- "namespace"
- "node_name"
- "node_id"
# Bind nomad
nomad_bind_address: 0.0.0.0
# Default interface for binding tasks
# This is now set at the inventory level
# nomad_network_interface: eth0
# Create networks for binding task ports
nomad_host_networks:
- name: loopback
interface: lo
reserved_ports: "22"
- name: wesher
interface: wgoverlay
reserved_ports: "22"
# Enable ACLs
nomad_acl_enabled: true
nomad_config_custom:
ui:
enabled: true
- name: Bootstrap Nomad ACLs and scheduler
hosts: nomad_servers
tasks:
- name: Start Nomad
systemd:
state: started
name: nomad
- name: Nomad API reachable?
uri:
url: "http://127.0.0.1:4646/v1/status/leader"
method: GET
status_code: 200
register: nomad_check_result
retries: 8
until: nomad_check_result is succeeded
delay: 15
changed_when: false
run_once: true
- name: Bootstrap ACLs
command:
argv:
- "nomad"
- "acl"
- "bootstrap"
- "-json"
run_once: true
ignore_errors: true
register: bootstrap_result
changed_when: bootstrap_result is succeeded
- name: Save bootstrap result
copy:
content: "{{ bootstrap_result.stdout }}"
dest: "../nomad_bootstrap.json"
when: bootstrap_result is succeeded
delegate_to: localhost
run_once: true
- name: Read secret
command:
argv:
- jq
- -r
- .SecretID
- ../nomad_bootstrap.json
delegate_to: localhost
run_once: true
no_log: true
changed_when: false
register: read_secretid
- name: Look for policy
command:
argv:
- nomad
- acl
- policy
- list
environment:
NOMAD_TOKEN: "{{ read_secretid.stdout }}"
register: policies
run_once: true
changed_when: false
- name: Copy policy
copy:
src: ../acls/nomad-anon-policy.hcl
dest: /tmp/anonymous.policy.hcl
delegate_to: "{{ play_hosts[0] }}"
run_once: true
register: anon_policy
- name: Create anon-policy
command:
argv:
- nomad
- acl
- policy
- apply
- -description=Anon read only
- anonymous
- /tmp/anonymous.policy.hcl
environment:
NOMAD_TOKEN: "{{ read_secretid.stdout }}"
when: policies.stdout == "No policies found" or anon_policy.changed
delegate_to: "{{ play_hosts[0] }}"
run_once: true
- name: Read scheduler config
command:
argv:
- nomad
- operator
- scheduler
- get-config
- -json
run_once: true
register: scheduler_config
changed_when: false
- name: Enable service scheduler preemption
command:
argv:
- nomad
- operator
- scheduler
- set-config
- -preempt-service-scheduler=true
environment:
NOMAD_TOKEN: "{{ read_secretid.stdout }}"
run_once: true
when: (scheduler_config.stdout | from_json)["SchedulerConfig"]["PreemptionConfig"]["ServiceSchedulerEnabled"] is false
- name: Enable system scheduler preemption
command:
argv:
- nomad
- operator
- scheduler
- set-config
- -preempt-system-scheduler=true
environment:
NOMAD_TOKEN: "{{ read_secretid.stdout }}"
run_once: true
when: (scheduler_config.stdout | from_json)["SchedulerConfig"]["PreemptionConfig"]["SystemSchedulerEnabled"] is false
# - name: Set up Nomad backend and roles in Vault
# community.general.terraform:
# project_path: ../acls
# force_init: true
# variables:
# consul_address: "{{ play_hosts[0] }}:8500"
# vault_token: "{{ root_token }}"
# nomad_secret_id: "{{ read_secretid.stdout }}"
# delegate_to: localhost
# run_once: true
# notify:
# - Restart Nomad
handlers:
- name: Restart Nomad
systemd:
state: restarted
name: nomad
retries: 6
delay: 5