From 65cb6afaf9e3f848d96506520086837e38b8546f Mon Sep 17 00:00:00 2001 From: Ian Fijolek Date: Sun, 20 Nov 2022 16:24:00 -0800 Subject: [PATCH] WIP: Moving vars and service discovery to Nomad Starting with core --- .terraform.lock.hcl | 71 ++++----- ansible_playbooks/setup-cluster.yml | 228 ++++------------------------ core/blocky/blocky.nomad | 79 +++------- core/blocky/config.yml | 30 ++-- core/ddclient.nomad | 17 +-- core/lldap.nomad | 75 +++------ core/main.tf | 77 +--------- core/syslogng.nomad | 31 +--- core/traefik/traefik.nomad | 84 +++------- databases/adminer.nomad | 35 +---- databases/mysql.nomad | 37 +---- databases/mysql.tf | 47 ------ databases/redis.nomad | 21 +-- databases/redis.tf | 28 ---- databases/rediscommander.nomad | 31 +--- nomad_vars.py | 73 +++++++++ providers.tf | 41 +---- services.tf | 10 +- vars.tf | 18 +-- 19 files changed, 241 insertions(+), 792 deletions(-) create mode 100755 nomad_vars.py diff --git a/.terraform.lock.hcl b/.terraform.lock.hcl index 2c79908..a3678e4 100644 --- a/.terraform.lock.hcl +++ b/.terraform.lock.hcl @@ -2,40 +2,40 @@ # Manual edits may be lost in future updates. provider "registry.terraform.io/hashicorp/consul" { - version = "2.16.2" + version = "2.17.0" hashes = [ - "h1:epldE7sZPBTQHnWEA4WlNJIOVT1UEX+/02SMg5nniaE=", - "zh:0a2e11ca2ba650954951a087a1daec95eee2f3000456b295409a9880c4a10b1a", - "zh:34f6bda06a0d1c213fa8d87d4313687681e67bc8c40c4cbaa7dbe59ce24a4f7e", - "zh:5b85cf93db11ee890f720c317a38158927071feb634855786a0c0cd65825a43c", - "zh:75ef915f3d087e6045751a66fbb7066a852a0944ec8c97200d1134dd84df7ffc", - "zh:8a4a95697bd91ad51a581c12fe50ac61a114afba27895d027f77ac4154a7ea15", - "zh:973d538c8d72793861a1ac9718249a9493f417a2b5096846367560054fd843b9", - "zh:9feb2bdc06fdc2d8370cc9aad9a0c69e7e5ae38aac43f315c3f57507c57be030", - "zh:c5709672d0afecbbe298bf519741ebcb9d04f02a73b5ee0c186dfa241aa5a524", - "zh:c65c60570de6da7190e1e7762577655a463caeb59bc5d38e33034821ed0cbcb9", - "zh:c958d6282650fc472aade61d5df4300936033f43cfb898293ef86aceccdfdf1d", - "zh:cdd3632c81e1d11d3becd193aaa061688840f39147950c45c4301d042743ae6a", + "h1:k+8ptRn/iiCnE7mC0LVA8FvnukzKnlD3KAcquPFbtN8=", + "zh:1cca5e144b4696900d2410e26499a00c9666e5777b657e9844a4b6d198164a09", + "zh:4fe59329ae4a4fc13751cde4a1044427ca591ecefbaa8dde2ce828f660fbddb1", + "zh:55c42cec7dd10ee1f03eca03d5b8e3bcba7bf281bcd250ac220458aba735ba1f", + "zh:625a0481d0b2599d0e6ac609d9efc151f1c9cad53091e2ee3bfcedc34ccacb34", + "zh:7e9a08b19491f26aa685311a9211bacd7b7027d9cf6eaee16949435221a5f688", + "zh:9d92816f609367204c4df20c29c57ee631f5a65cf6bb782d9d9b3f945ba21353", + "zh:a332ef65a6ba829dc335ade1a3e69ae14e162dc6ca1a991d9d6ad4e596f4c2d7", + "zh:ce7ffac8d852342e9fe25053383613934c8b81d8c2ba2c9d10626b71e329fed7", + "zh:d384a1ef35c766362e8ae3131d00c05e1c0904d8b4b1d964548b91e1025f324b", + "zh:d85067f345b663e8e59fb02705918d3618ce56887a472665bec7f1aeddbc9ea4", + "zh:ddff8512e8181efae6d0d259abcd457d9a394a4a6f99d6bb0b180cabee373097", "zh:f3d3efac504c9484a025beb919d22b290aa6dbff256f6e86c1f8ce7817e077e5", ] } provider "registry.terraform.io/hashicorp/external" { - version = "2.2.3" + version = "2.3.1" hashes = [ - "h1:uvOYRWcVIqOZSl8YjjaB18yZFz1AWIt2CnK7O45rckg=", - "zh:184ecd339d764de845db0e5b8a9c87893dcd0c9d822167f73658f89d80ec31c9", - "zh:2661eaca31d17d6bbb18a8f673bbfe3fe1b9b7326e60d0ceb302017003274e3c", - "zh:2c0a180f6d1fc2ba6e03f7dfc5f73b617e45408681f75bca75aa82f3796df0e4", - "zh:4b92ae44c6baef4c4952c47be00541055cb5280dd3bc8031dba5a1b2ee982387", - "zh:5641694d5daf3893d7ea90be03b6fa575211a08814ffe70998d5adb8b59cdc0a", - "zh:5bd55a2be8a1c20d732ac9c604b839e1cadc8c49006315dffa4d709b6874df32", - "zh:6e0ef5d11e1597202424b7d69b9da7b881494c9b13a3d4026fc47012dc651c79", + "h1:bROCw6g5D/3fFnWeJ01L4IrdnJl1ILU8DGDgXCtYzaY=", + "zh:001e2886dc81fc98cf17cf34c0d53cb2dae1e869464792576e11b0f34ee92f54", + "zh:2eeac58dd75b1abdf91945ac4284c9ccb2bfb17fa9bdb5f5d408148ff553b3ee", + "zh:2fc39079ba61411a737df2908942e6970cb67ed2f4fb19090cd44ce2082903dd", + "zh:472a71c624952cff7aa98a7b967f6c7bb53153dbd2b8f356ceb286e6743bb4e2", + "zh:4cff06d31272aac8bc35e9b7faec42cf4554cbcbae1092eaab6ab7f643c215d9", "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", - "zh:9e19f89fa25004d3b926a8d15ea630b4bde62f1fa4ed5e11a3d27aabddb77353", - "zh:b763efdd69fd097616b4a4c89cf333b4cee9699ac6432d73d2756f8335d1213f", - "zh:e3b561efdee510b2b445f76a52a902c52bee8e13095e7f4bed7c80f10f8d294a", - "zh:fe660bb8781ee043a093b9a20e53069974475dcaa5791a1f45fd03c61a26478a", + "zh:7ed16ccd2049fa089616b98c0bd57219f407958f318f3c697843e2397ddf70df", + "zh:842696362c92bf2645eb85c739410fd51376be6c488733efae44f4ce688da50e", + "zh:8985129f2eccfd7f1841ce06f3bf2bbede6352ec9e9f926fbaa6b1a05313b326", + "zh:a5f0602d8ec991a5411ef42f872aa90f6347e93886ce67905c53cfea37278e05", + "zh:bf4ab82cbe5256dcef16949973bf6aa1a98c2c73a98d6a44ee7bc40809d002b8", + "zh:e70770be62aa70198fa899526d671643ff99eecf265bf1a50e798fc3480bd417", ] } @@ -57,22 +57,3 @@ provider "registry.terraform.io/hashicorp/nomad" { "zh:ffd1e096c1cc35de879c740a91918e9f06b627818a3cb4b1d87b829b54a6985f", ] } - -provider "registry.terraform.io/hashicorp/vault" { - version = "3.11.0" - hashes = [ - "h1:AUVEra6fAOiAUWa0FOU+ehx4K2htbsfgLDrMh1H6mQs=", - "zh:18cb684852f1b40b2a329ba07ece3363430d69bffdcafea48ed29f954481e39e", - "zh:1b96968a8de6849a237cc945cbe247ccd6ec98b4023548b1c0af5d6c6affe4ef", - "zh:3e0a0741ba12aa0cf1a2b8b80928450bb329343f4b41f35b0eddbeb52aa6284b", - "zh:4a8f0ee5ac4e8a0705d9f38b3d549223fe1142486d71f0b6f24f64ae0d7dd5ca", - "zh:4cc6705dcd111e6ad47ab4cfd2d8a99b2b241967abd50add6ac8c27025f4128b", - "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", - "zh:8e106e840a963b9ae32dc24b50fa1ceecb09753e6db10ab134009d59d170686b", - "zh:8f9c4ccf4da8555b11375d2a09a022d7a8f5ecf701f0bb89a4f07ad0b720bb98", - "zh:a6fda115017b42f71f4b7917ae4860354920f0653cb8906ce627129dbabb252b", - "zh:c01666362b293b6af8cd556b2c5ffe9014ae8640ec3621c1cfa772fa1a6b335d", - "zh:e9be58b1211da0219a5bf6bfd81b8bf474256519426df10672e6dfce3086af60", - "zh:fd2272083e90b38c28cd18b1b9d3ae14b6a0ebf08985468d010d2bee8df816e0", - ] -} diff --git a/ansible_playbooks/setup-cluster.yml b/ansible_playbooks/setup-cluster.yml index bf3dcc1..609adbb 100644 --- a/ansible_playbooks/setup-cluster.yml +++ b/ansible_playbooks/setup-cluster.yml @@ -13,152 +13,6 @@ create: true line: "nameserver {{ non_nomad_dns }}" -- name: Build Consul cluster - hosts: consul_instances - any_errors_fatal: true - - roles: - - role: ansible-consul - vars: - consul_version: "1.13.3-1" - consul_install_upgrade: true - consul_install_from_repo: true - consul_os_repo_prerequisites: [] - - consul_node_role: server - consul_raft_protocol: 3 - consul_bootstrap_expect: true - consul_bootstrap_expect_max_value: 2 - consul_bootstrap_expect_value: "{{ [(play_hosts | length), consul_bootstrap_expect_max_value] | min }}" - - consul_user: consul - consul_manage_user: true - consul_group: bin - consul_manage_group: true - - # consul_tls_enable: true - consul_connect_enabled: true - consul_ports_grpc: 8502 - consul_client_address: "0.0.0.0" - - # Autopilot - consul_autopilot_enable: false - consul_autopilot_cleanup_dead_Servers: false - - # Enable metrics - consul_config_custom: - telemetry: - prometheus_retention_time: "2h" - - # DNS forwarding - consul_dnsmasq_enable: true - consul_dnsmasq_servers: - # TODO: use addresses of other nomad nodes? - # Maybe this can be [] to get the values from dhcp - - 1.1.1.1 - - 1.0.0.1 - consul_dnsmasq_bind_interfaces: true - consul_dnsmasq_listen_addresses: - # Listen only to loopback interface - - 127.0.0.1 - - become: true - - tasks: - - name: Start Consul - systemd: - state: started - name: consul - become: true - - # If DNS is broken after dnsmasq, then need to set /etc/resolv.conf to something - # pointing to 127.0.0.1 and possibly restart Docker and Nomad - # Actually, we should point to our external Nomad address so that Docker uses it - - name: Update resolv.conf - lineinfile: - dest: /etc/resolv.conf - create: true - line: "nameserver {{ hostvars[item].ansible_default_ipv4.address }}" - loop: "{{ ansible_play_hosts }}" - become: true - -- name: Setup Vault cluster - hosts: vault_instances - - roles: - - name: ansible-vault - vars: - vault_version: 1.12.2-1 - vault_install_hashi_repo: true - vault_harden_file_perms: true - # Maybe this should be restricted - vault_group: bin - vault_bin_path: /usr/bin - vault_address: 0.0.0.0 - - vault_backend: consul - become: true - - tasks: - - name: Get Vault status - uri: - url: http://127.0.0.1:8200/v1/sys/health - method: GET - status_code: 200, 429, 472, 473, 501, 503 - body_format: json - return_content: true - register: vault_status - - - name: Initialize Vault - when: not vault_status.json["initialized"] - block: - - name: Initialize Vault - command: - argv: - - "vault" - - "operator" - - "init" - - "-format=json" - - "-address=http://127.0.0.1:8200/" - - "-key-shares={{ vault_init_key_shares|default(3) }}" - - "-key-threshold={{ vault_init_key_threshold|default(2) }}" - run_once: true - register: vault_init - - - name: Save initialize result - copy: - content: "{{ vault_init.stdout }}" - dest: "../vault-keys.json" - when: vault_init is succeeded - delegate_to: localhost - run_once: true - - - name: Unseal from init - no_log: true - command: - argv: - - "vault" - - "operator" - - "unseal" - - "-address=http://127.0.0.1:8200/" - - "{{ item }}" - loop: "{{ (vault_init.stdout | from_json)['unseal_keys_hex'] }}" - when: vault_init is succeeded - - - name: Unseal Vault - no_log: true - command: - argv: - - "vault" - - "operator" - - "unseal" - - "-address=http://127.0.0.1:8200/" - - "{{ item }}" - loop: "{{ unseal_keys_hex }}" - when: - - unseal_keys_hex is defined - - vault_status.json["sealed"] - - name: Install Docker hosts: nomad_instances become: true @@ -309,8 +163,8 @@ enabled: true selinuxlabel: "z" # Send logs to journald so we can scrape them for Loki - logging: - type: journald + # logging: + # type: journald extra_labels: - "job_name" - "job_id" @@ -341,35 +195,9 @@ # Enable ACLs nomad_acl_enabled: true - # Enable vault integration - # HACK: Only talk to local Vault for now because it doesn't have HTTPS - # TODO: Would be really great to have this over https and point to vault.consul.service - # nomad_vault_address: "https://vault.service.consul:8200" - # Right now, each node only talks to it's local Vault, so if that node is rebooted and - # that vault is sealed, it will not have access to vault. This is a problem if a node - # must reboot. - nomad_vault_address: "http://127.0.0.1:8200" - # TODO: This fails on first run because the Nomad-Vault integration can't be set up - # until Nomad has started. Could maybe figure out if ACLs have been set up and leave - # these out until the later play, maybe just bootstrap the nomad-cluster role in Vault - # befor Nomad is set up - nomad_vault_create_from_role: "nomad-cluster" - # TODO: (security) Probably want to restict this to a narrower scoped token - nomad_vault_enabled: "{{ root_token is defined }}" - nomad_vault_token: "{{ root_token | default('') }}" - nomad_config_custom: ui: enabled: true - consul: - ui_url: "https://consul.thefij.rocks/ui" - vault: - ui_url: "https://vault.thefij.rocks/ui" - consul: - tags: - - "traefik.enable=true" - - "traefik.consulcatalog.connect=true" - - "traefik.http.routers.nomadclient.entrypoints=websecure" - name: Bootstrap Nomad ACLs and scheduler hosts: nomad_instances @@ -424,20 +252,6 @@ changed_when: false register: read_secretid - - name: Enable service scheduler preemption - command: - argv: - - nomad - - operator - - scheduler - - set-config - - -preempt-system-scheduler=true - - -preempt-service-scheduler=true - environment: - NOMAD_TOKEN: "{{ read_secretid.stdout }}" - delegate_to: "{{ play_hosts[0] }}" - run_once: true - - name: Look for policy command: argv: @@ -454,8 +268,6 @@ copy: src: ../acls/nomad-anon-policy.hcl dest: /tmp/anonymous.policy.hcl - delegate_to: "{{ play_hosts[0] }}" - register: anon_policy run_once: true - name: Create anon-policy @@ -474,18 +286,32 @@ delegate_to: "{{ play_hosts[0] }}" run_once: true - - name: Set up Nomad backend and roles in Vault - community.general.terraform: - project_path: ../acls - force_init: true - variables: - consul_address: "{{ play_hosts[0] }}:8500" - vault_token: "{{ root_token }}" - nomad_secret_id: "{{ read_secretid.stdout }}" - delegate_to: localhost + - name: Enable service scheduler preemption + command: + argv: + - nomad + - operator + - scheduler + - set-config + - -preempt-system-scheduler=true + - -preempt-service-scheduler=true + environment: + NOMAD_TOKEN: "{{ read_secretid.stdout }}" + delegate_to: "{{ play_hosts[0] }}" run_once: true - notify: - - Restart Nomad + + # - name: Set up Nomad backend and roles in Vault + # community.general.terraform: + # project_path: ../acls + # force_init: true + # variables: + # consul_address: "{{ play_hosts[0] }}:8500" + # vault_token: "{{ root_token }}" + # nomad_secret_id: "{{ read_secretid.stdout }}" + # delegate_to: localhost + # run_once: true + # notify: + # - Restart Nomad handlers: - name: Restart Nomad diff --git a/core/blocky/blocky.nomad b/core/blocky/blocky.nomad index e649279..b881396 100644 --- a/core/blocky/blocky.nomad +++ b/core/blocky/blocky.nomad @@ -37,11 +37,13 @@ job "blocky" { service { name = "blocky-dns" + provider = "nomad" port = "dns" } service { name = "blocky-api" + provider = "nomad" port = "api" meta { @@ -53,41 +55,6 @@ job "blocky" { "traefik.http.routers.blocky-api.entryPoints=websecure", ] - connect { - sidecar_service { - proxy { - local_service_port = 4000 - - expose { - path { - path = "/metrics" - protocol = "http" - local_path_port = 4000 - listener_port = "api" - } - } - - upstreams { - destination_name = "redis" - local_bind_port = 6379 - } - - upstreams { - destination_name = "mysql-server" - local_bind_port = 4040 - } - } - } - - sidecar_task { - resources { - cpu = 50 - memory = 20 - memory_max = 50 - } - } - } - check { name = "api-health" port = "api" @@ -118,13 +85,6 @@ job "blocky" { memory_max = 100 } - vault { - policies = [ - "access-tables", - "nomad-task", - ] - } - template { data = var.config_data destination = "app/config.yml" @@ -154,38 +114,33 @@ job "blocky" { ] } - vault { - policies = [ - "access-tables", - "nomad-task", - ] - } - template { data = <> certResolver = "letsEncrypt" [[entryPoints.websecure.http.tls.domains]] - main = "*.<< keyOrDefault "global/base_hostname" "${var.base_hostname}" >>" - << end ->> + main = "*.<< with nomadVar "nomad/jobs" >><< .base_hostname >><< end >>" [entryPoints.metrics] address = ":8989" @@ -152,40 +143,31 @@ job "traefik" { directory = "/etc/traefik/conf" watch = true -[providers.consulCatalog] - connectAware = true - connectByDefault = true - exposedByDefault = false - defaultRule = "Host(`{{normalize .Name}}.<< keyOrDefault "global/base_hostname" "${var.base_hostname}" >>`)" - [providers.consulCatalog.endpoint] - address = "http://<< env "CONSUL_HTTP_ADDR" >>" - [providers.nomad] exposedByDefault = false - defaultRule = "Host(`{{normalize .Name}}.<< keyOrDefault "global/base_hostname" "${var.base_hostname}" >>`)" + defaultRule = "Host(`{{normalize .Name}}.<< with nomadVar "nomad/jobs" >><< .base_hostname >><< end >>`)" [providers.nomad.endpoint] - address = "http://127.0.0.1:4646/" + address = "http://<< env "attr.unique.network.ip-address" >>:4646" -<< if keyExists "traefik/acme/email" ->> +<< if nomadVarExists "nomad/jobs/traefik" ->> [certificatesResolvers.letsEncrypt.acme] - email = "<< key "traefik/acme/email" >>" + email = "<< with nomadVar "nomad/jobs/traefik" >><< .acme_email >><< end >>" # Store in /local because /secrets doesn't persist with ephemeral disk storage = "/local/acme.json" [certificatesResolvers.letsEncrypt.acme.dnsChallenge] provider = "cloudflare" resolvers = ["1.1.1.1:53", "8.8.8.8:53"] delayBeforeCheck = 0 -<< end ->> EOH destination = "local/config/traefik.toml" } template { data = < dict: + headers = {} + if NOMAD_TOKEN: + headers["X-Nomad-Token"] = NOMAD_TOKEN + + result = requests.post( + f"{NOMAD_ADDR}/v1/var/{path}", + headers=headers, + json={ + "Path": path, + "Items": {k: str(v) for k, v in items.items()}, + }, + ) + + print(result.text) + result.raise_for_status() + + return result.json() + + +def write_consul(): + with open("./ansible_playbooks/vars/consul_values.yml") as f: + vars = yaml.load(f, yaml.CLoader)["consul_values"] + + key_values = defaultdict(list) + for path, value in vars.items(): + path, _, item = path.rpartition("/") + key_values[path].append((item, value)) + + for path, items in key_values.items(): + print("path", path, "items", items) + response = write_var(path, dict(items)) + print(response) + + +def write_vault(): + with open("./ansible_playbooks/vars/vault_hashi_vault_values.yml") as f: + vars = yaml.load(f, yaml.CLoader)["hashi_vault_values"] + prefix = "secrets/" + + for path, items in vars.items(): + print("path", path, "items", items) + response = write_var(prefix + path, items) + print(response) + +def write_nomad(): + with open("./ansible_playbooks/vars/nomad_vars.yml") as f: + vars = yaml.load(f, yaml.CLoader) + + for path, items in vars.items(): + print("path", path, "items", items) + response = write_var(path, items) + print(response) + + +def main(): + write_nomad() + + +if __name__ == "__main__": + main() diff --git a/providers.tf b/providers.tf index 6e5ef19..0c909a1 100644 --- a/providers.tf +++ b/providers.tf @@ -1,45 +1,6 @@ -# Configure Consul provider -provider "consul" { - address = var.consul_address -} - -# Get Nomad client from Consul -data "consul_service" "nomad" { - name = "nomad-client" -} - -# Get Vault client from Consul -data "consul_service" "vault" { - name = "vault" - tag = "active" -} - -locals { - # Get Nomad address from Consul - nomad_node = data.consul_service.nomad.service[0] - nomad_node_address = "http://${local.nomad_node.node_address}:${local.nomad_node.port}" - - # Get Vault address from Consul - vault_node = data.consul_service.vault.service[0] - vault_node_address = "http://${local.vault_node.node_address}:${local.vault_node.port}" -} - -# Configure the Vault provider -provider "vault" { - address = length(var.vault_address) == 0 ? local.vault_node_address : var.vault_address - token = var.vault_token -} - -# Something that should exist in a post bootstrap module, right now module includes bootstrapping -# which requries Admin -# data "vault_nomad_access_token" "deploy" { -# backend = "nomad" -# role = "deploy" -# } - # Configure the Nomad provider provider "nomad" { - address = length(var.nomad_address) == 0 ? local.nomad_node_address : var.nomad_address + address = var.nomad_address secret_id = var.nomad_secret_id # secret_id = length(var.nomad_secret_id) == 0 ? data.vault_nomad_access_token.admin.secret_id : var.nomad_secret_id region = "global" diff --git a/services.tf b/services.tf index 1f9cac3..25ebceb 100644 --- a/services.tf +++ b/services.tf @@ -1,5 +1,5 @@ -module "services" { - source = "./services" - - depends_on = [module.databases, module.core] -} +# module "services" { +# source = "./services" +# +# depends_on = [module.databases, module.core] +# } diff --git a/vars.tf b/vars.tf index 9fe54fc..6702560 100644 --- a/vars.tf +++ b/vars.tf @@ -1,16 +1,6 @@ -variable "consul_address" { - type = string - default = "http://n1.thefij:8500" -} - -variable "vault_address" { - type = string - default = "" -} - variable "nomad_address" { type = string - default = "" + default = "http://n1.thefij:4646" } variable "base_hostname" { @@ -25,9 +15,3 @@ variable "nomad_secret_id" { sensitive = true default = "" } - -variable "vault_token" { - type = string - sensitive = true - default = "" -}