homelab-nomad/setup-cluster.yml
Ian Fijolek 0ad777c76f Fix unsealing of single vault instance
Checking status of only one node meant that if that node was sealed
we would not try to unseal other nodes
2022-08-30 15:14:00 -07:00

449 lines
12 KiB
YAML

---
- name: Build Consul cluster
hosts: consul_instances
any_errors_fatal: true
vars_files:
- consul_values.yml
roles:
- role: ansible-consul
vars:
consul_version: "1.13.1-1"
consul_install_upgrade: true
consul_install_from_repo: true
consul_os_repo_prerequisites: []
consul_node_role: server
consul_bootstrap_expect: true
consul_user: consul
consul_manage_user: true
consul_group: bin
consul_manage_group: true
consul_architecture_map:
x86_64: amd64
armhfv6: arm
armv7l: arm
# consul_tls_enable: true
consul_connect_enabled: true
consul_ports_grpc: 8502
consul_client_address: "0.0.0.0"
# Autopilot
consul_autopilot_enable: true
consul_autopilot_cleanup_dead_Servers: true
# Enable metrics
consul_config_custom:
telemetry:
prometheus_retention_time: "2h"
# DNS forwarding
consul_dnsmasq_enable: true
consul_dnsmasq_servers:
# TODO: use addresses of other nomad nodes?
# Maybe this can be [] to get the values from dhcp
- 1.1.1.1
- 1.0.0.1
consul_dnsmasq_bind_interfaces: true
consul_dnsmasq_listen_addresses:
# Listen only to loopback interface
- 127.0.0.1
become: true
tasks:
- name: Start Consul
systemd:
state: started
name: consul
become: true
# If DNS is broken after dnsmasq, then need to set /etc/resolv.conf to something
# pointing to 127.0.0.1 and possibly restart Docker and Nomad
- name: Add values
delegate_to: localhost
run_once: true
block:
- name: Install python-consul
pip:
name: python-consul
extra_args: --index-url https://pypi.org/simple
- name: Write values
consul_kv:
host: "{{ inventory_hostname }}"
key: "{{ item.key }}"
value: "{{ item.value }}"
loop: "{{ consul_values | default({}) | dict2items }}"
- name: Setup Vault cluster
hosts: vault_instances
vars_files:
- ./vault_hashi_vault_values.yml
roles:
- name: ansible-vault
vars:
vault_version: 1.11.2-1
vault_install_hashi_repo: true
vault_harden_file_perms: true
vault_bin_path: /usr/bin
vault_address: 0.0.0.0
vault_backend: consul
become: true
tasks:
- name: Get Vault status
uri:
url: http://127.0.0.1:8200/v1/sys/health
method: GET
status_code: 200, 429, 472, 473, 501, 503
body_format: json
return_content: true
register: vault_status
- name: Initialize Vault
when: not vault_status.json["initialized"]
block:
- name: Initialize Vault
command:
argv:
- "vault"
- "operator"
- "init"
- "-format=json"
- "-address=http://127.0.0.1:8200/"
- "-key-shares={{ vault_init_key_shares|default(3) }}"
- "-key-threshold={{ vault_init_key_threshold|default(2) }}"
run_once: true
register: vault_init
- name: Save initialize result
copy:
content: "{{ vault_init.stdout }}"
dest: "./vault-keys.json"
when: vault_init is succeeded
delegate_to: localhost
run_once: true
- name: Unseal from init
no_log: true
command:
argv:
- "vault"
- "operator"
- "unseal"
- "-address=http://127.0.0.1:8200/"
- "{{ item }}"
loop: "{{ (vault_init.stdout | from_json)['unseal_keys_hex'] }}"
when: vault_init is succeeded
- name: Unseal Vault
no_log: true
command:
argv:
- "vault"
- "operator"
- "unseal"
- "-address=http://127.0.0.1:8200/"
- "{{ item }}"
loop: "{{ unseal_keys_hex }}"
when:
- unseal_keys_hex is defined
- vault_status.json["sealed"]
- name: Bootstrap Vault secrets
delegate_to: localhost
run_once: true
block:
- name: Install hvac
pip:
name: hvac
extra_args: --index-url https://pypi.org/simple
# TODO: This fails on first run because `root_token` isn't found
# Fails after taht too because the kv/ space has not been created yet either! Oh noes!
# Maybe move data bootstrapping to after the cluster is bootstrapped
- name: Write values
no_log: true
community.hashi_vault.vault_write:
url: "http://{{ inventory_hostname }}:8200"
token: "{{ root_token }}"
path: "kv/data/{{ item.key }}"
data:
data:
"{{ item.value }}"
loop: "{{ hashi_vault_values | default({}) | dict2items }}"
# Not on Ubuntu 20.04
# - name: Install Podman
# hosts: nomad_instances
# become: true
#
# tasks:
# - name: Install Podman
# package:
# name: podman
# state: present
- name: Create NFS mounts
hosts: nomad_instances
become: true
tasks:
- name: Install nfs
package:
name: nfs-common
state: present
- name: Create Motioneye NFS mount
ansible.posix.mount:
src: 192.168.2.10:/Recordings/Motioneye
path: /srv/volumes/motioneye-recordings
opts: proto=tcp,port=2049,rw
state: mounted
fstype: nfs4
- name: Create Media Library RO NFS mount
ansible.posix.mount:
src: 192.168.2.10:/Multimedia
path: /srv/volumes/media-read
opts: proto=tcp,port=2049,ro
state: mounted
fstype: nfs4
- name: Install Docker
hosts: nomad_instances
become: true
vars:
deb_arch: "{% if ansible_architecture == 'x86_64' %}amd64{% elif ansible_architecture == 'armv7l' %}armhf{% endif %}"
docker_apt_arch: "{{ deb_arch }}"
docker_compose_arch: "{{ (ansible_architecture == 'armv7l') | ternary('armv7', ansible_architecture) }}"
roles:
- geerlingguy.docker
- name: Build Nomad cluster
hosts: nomad_instances
any_errors_fatal: true
become: true
vars:
shared_host_volumes:
- name: motioneye-recordings
path: /srv/volumes/motioneye-recordings
owner: "root"
group: "bin"
mode: "0755"
read_only: false
- name: media-read
path: /srv/volumes/media-read
owner: "root"
group: "root"
mode: "0777"
read_only: true
- name: all-volumes
path: /srv/volumes
owner: "root"
group: "root"
mode: "0777"
read_only: false
roles:
- name: ansible-nomad
vars:
nomad_version: "1.3.4-1"
nomad_install_upgrade: true
nomad_allow_purge_config: true
# Where nomad gets installed to
nomad_bin_dir: /usr/bin
nomad_install_from_repo: true
# nomad_user: root
# nomad_manage_user: true
# nomad_group: bin
# nomad_manage_group: true
# Properly map install arch
nomad_architecture_map:
x86_64: amd64
armhfv6: arm
armv7l: arm
nomad_autopilot: true
nomad_encrypt_enable: true
# nomad_use_consul: true
# Metrics
nomad_telemetry: true
nomad_telemetry_prometheus_metrics: true
nomad_telemetry_publish_allocation_metrics: true
nomad_telemetry_publish_node_metrics: true
# Enable container plugins
nomad_cni_enable: true
nomad_cni_version: 1.0.1
nomad_docker_enable: true
nomad_docker_dmsetup: false
# nomad_podman_enable: true
# Merge shared host volumes with node volumes
nomad_host_volumes: "{{ shared_host_volumes + (nomad_unique_host_volumes | default([])) }}"
# Customize docker plugin
nomad_plugins:
docker:
config:
allow_privileged: true
volumes:
enabled: true
selinuxlabel: "z"
extra_labels:
- "job_name"
- "job_id"
- "task_group_name"
- "task_name"
- "namespace"
- "node_name"
- "node_id"
# Bind nomad
nomad_bind_address: 0.0.0.0
# Default interface for binding tasks
# nomad_network_interface: lo
# Create networks for binding task ports
nomad_host_networks:
# - name: public
# interface: eth0
# reserved_ports: "22"
- name: nomad-bridge
interface: nomad
reserved_ports: "22"
- name: loopback
interface: lo
reserved_ports: "22"
# Enable ACLs
nomad_acl_enabled: true
# Enable vault integration
# TODO: This fails on first run because the Nomad-Vault integration can't be set up
# until Nomad has started. Could maybe figure out if ACLs have been set up and leave
# these out until the later play, maybe just bootstrap the nomad-cluster role in Vault
# befor Nomad is set up
nomad_vault_address: "http://vault.service.consul:8200"
nomad_vault_create_from_role: "nomad-cluster"
# TODO: Probably want to restict this to a narrower scoped token
nomad_vault_enabled: "{{ root_token is defined }}"
nomad_vault_token: "{{ root_token | default('') }}"
nomad_config_custom:
ui:
enabled: true
consul:
ui_url: "http://{{ ansible_hostname }}:8500/ui"
vault:
ui_url: "http://{{ ansible_hostname }}:8200/ui"
consul:
tags:
- "traefik.enable=true"
- "traefik.consulcatalog.connect=true"
- "traefik.http.routers.nomadclient.entrypoints=websecure"
tasks:
- name: Start Nomad
systemd:
state: started
name: nomad
- name: Bootstrap Nomad ACLs
hosts: nomad_instances
tasks:
# Need to wait until nomad is running
- name: Bootstrap ACLs
command:
argv:
- "nomad"
- "acl"
- "bootstrap"
- "-json"
run_once: true
ignore_errors: true
register: bootstrap_result
- name: Save bootstrap result
copy:
content: "{{ bootstrap_result.stdout }}"
dest: "./nomad_bootstrap.json"
when: bootstrap_result is succeeded
delegate_to: localhost
run_once: true
- name: Look for policy
command:
argv:
- nomad
- acl
- policy
- list
run_once: true
register: policies
- name: Read secret
command:
argv:
- jq
- -r
- .SecretID
- nomad_bootstrap.json
delegate_to: localhost
run_once: true
no_log: true
changed_when: false
register: read_secretid
- name: Copy policy
copy:
src: ./acls/nomad-anon-policy.hcl
dest: /tmp/anonymous.policy.hcl
delegate_to: "{{ play_hosts[0] }}"
register: anon_policy
run_once: true
- name: Create anon-policy
command:
argv:
- nomad
- acl
- policy
- apply
- -description="Anon read only"
- anonymous
- /tmp/anonymous.policy.hcl
environment:
NOMAD_TOKEN: "{{ read_secretid.stdout }}"
when: policies.stdout == "No policies found" or anon_policy.changed
delegate_to: "{{ play_hosts[0] }}"
run_once: true
- name: Set up Nomad backend and roles in Vault
community.general.terraform:
project_path: ./acls
force_init: true
variables:
consul_address: "{{ play_hosts[0] }}:8500"
vault_token: "{{ root_token }}"
nomad_secret_id: "{{ read_secretid.stdout }}"
delegate_to: localhost
run_once: true