From cacabec505dfb88944c7a185d7c6b0bc7daa1516 Mon Sep 17 00:00:00 2001 From: Ian Fijolek Date: Thu, 3 Mar 2022 09:37:49 -0800 Subject: [PATCH] Lots of Nomad updates to support metrics --- nomad/metrics/.terraform.lock.hcl | 20 +++ nomad/metrics/exporters.nomad | 225 ++++++++++++++++++++++++++++++ nomad/metrics/metrics.tf | 46 ++++++ nomad/metrics/prometheus.nomad | 182 ++++++++++++++++++++++++ nomad/services.tf | 25 ++-- nomad/setup-cluster.yml | 55 ++++++-- nomad/traefik/traefik.tf | 3 +- 7 files changed, 536 insertions(+), 20 deletions(-) create mode 100644 nomad/metrics/.terraform.lock.hcl create mode 100644 nomad/metrics/exporters.nomad create mode 100644 nomad/metrics/metrics.tf create mode 100644 nomad/metrics/prometheus.nomad diff --git a/nomad/metrics/.terraform.lock.hcl b/nomad/metrics/.terraform.lock.hcl new file mode 100644 index 0000000..4a078a2 --- /dev/null +++ b/nomad/metrics/.terraform.lock.hcl @@ -0,0 +1,20 @@ +# This file is maintained automatically by "terraform init". +# Manual edits may be lost in future updates. + +provider "registry.terraform.io/hashicorp/nomad" { + version = "1.4.16" + hashes = [ + "h1:tyfjD/maKzb0RxxD9KWgLnkJu9lnYziYsQgGw85Giz8=", + "zh:0d4fbb7030d9caac3b123e60afa44f50c83cc2a983e1866aec7f30414abe7b0e", + "zh:0db080228e07c72d6d8ca8c45249d6f97cd0189fce82a77abbdcd49a52e57572", + "zh:0df88393271078533a217654b96f0672c60eb59570d72e6aefcb839eea87a7a0", + "zh:2883b335bb6044b0db6a00e602d6926c047c7f330294a73a90d089f98b24d084", + "zh:390158d928009a041b3a182bdd82376b50530805ae92be2b84ed7c3b0fa902a0", + "zh:7169b8f8df4b8e9659c49043848fd5f7f8473d0471f67815e8b04980f827f5ef", + "zh:9417ee1383b1edd137024882d7035be4dca51fb4f725ca00ed87729086ec1755", + "zh:a22910b5a29eeab5610350700b4899267c1b09b66cf21f7e4d06afc61d425800", + "zh:a6185c9cd7aa458cd81861058ba568b6411fbac344373a20155e20256f4a7557", + "zh:b6260ca9f034df1b47905b4e2a9c33b67dbf77224a694d5b10fb09ae92ffad4c", + "zh:d87c12a6a7768f2b6c2a59495c7dc00f9ecc52b1b868331d4c284f791e278a1e", + ] +} diff --git a/nomad/metrics/exporters.nomad b/nomad/metrics/exporters.nomad new file mode 100644 index 0000000..bf76cf8 --- /dev/null +++ b/nomad/metrics/exporters.nomad @@ -0,0 +1,225 @@ +job "metrics" { + datacenters = ["dc1"] + type = "system" + + group "exporters" { + + network { + mode = "bridge" + + port "cadvisor" { + host_network = "nomad-bridge" + to = 8080 + } + + port "node_exporter" { + host_network = "nomad-bridge" + to = 9100 + } + + port "promtail" { + host_network = "nomad-bridge" + to = 9080 + } + + port "expose" { + host_network = "nomad-bridge" + } + + port "cadvisor_envoy_metrics" { + host_network = "nomad-bridge" + to = 9102 + } + } + + service { + name = "cadvisor" + port = "cadvisor" + + meta { + metrics_addr = "${NOMAD_ADDR_expose}" + envoy_metrics_addr = "${NOMAD_ADDR_cadvisor_envoy_metrics}" + nomad_dc = "${NOMAD_DC}" + nomad_node_name = "${node.unique.name}" + } + + connect { + sidecar_service { + proxy { + local_service_port = 8080 + + expose { + path { + path = "/metrics" + protocol = "http" + local_path_port = 8080 + listener_port = "expose" + } + } + + config { + envoy_prometheus_bind_addr = "0.0.0.0:9102" + } + } + } + + sidecar_task { + resources { + cpu = 50 + memory = 50 + } + } + } + + check { + type = "http" + path = "/metrics" + port = "cadvisor" + interval = "10s" + timeout = "10s" + } + + // TODO: Remove traefik tags + tags = [ + "traefik.enable=true", + "traefik.http.routers.cadvisor.entrypoints=web,websecure", + "traefik.http.routers.cadvisor.rule=Host(`cadvisor.dev.homelab`)", + "traefik.http.routers.cadvisor.tls=true", + ] + } + + task "cadvisor" { + driver = "docker" + + config { + # image = "iamthefij/cadvisor:0.37.5" + image = "gcr.io/cadvisor/cadvisor:v0.39.3" + args = ["--docker_only=true"] + + ports = ["cadvisor"] + + # volumes = [ + # "/:/rootfs:ro", + # "/var/run:/var/run:rw", + # "/sys:/sys:ro", + # "/var/lib/docker/:/var/lib/docker:ro", + # "/cgroup:/cgroup:ro", + # "/etc/machine-id:/etc/machine-id:ro", + # ] + + mount { + type = "bind" + source = "/" + target = "/rootfs" + readonly = true + } + + mount { + type = "bind" + source = "/var/run" + target = "/var/run" + readonly = false + } + + mount { + type = "bind" + source = "/sys" + target = "/sys" + readonly = true + } + + mount { + type = "bind" + source = "/var/lib/docker" + target = "/var/lib/docker" + readonly = true + } + + # mount { + # type = "bind" + # source = "/cgroup" + # target = "/cgroup" + # readonly = true + # } + + mount { + type = "bind" + source = "/etc/machine-id" + target = "/etc/machine-id" + readonly = true + } + } + + resources { + cpu = 50 + memory = 100 + } + } + + service { + name = "nodeexporter" + port = "node_exporter" + + meta { + metrics_addr = "${NOMAD_ADDR_node_exporter}" + nomad_dc = "${NOMAD_DC}" + nomad_node_name = "${node.unique.name}" + } + + connect { + sidecar_service { + proxy { + local_service_port = 9100 + } + } + + sidecar_task { + resources { + cpu = 50 + memory = 50 + } + } + } + + check { + type = "http" + path = "/metrics" + port = "node_exporter" + interval = "10s" + timeout = "10s" + } + + // TODO: Remove traefik tags + tags = [ + "traefik.enable=true", + "traefik.http.routers.node_exporter.entrypoints=web,websecure", + "traefik.http.routers.node_exporter.rule=Host(`node_exporter.dev.homelab`)", + "traefik.http.routers.node_exporter.tls=true", + ] + } + + task "node_exporter" { + driver = "docker" + + config { + image = "prom/node-exporter:v1.0.1" + args = ["--path.rootfs", "/host"] + + ports = ["node_exporter"] + + mount { + type = "bind" + source = "/" + target = "/host" + readonly = true + } + } + + resources { + cpu = 50 + memory = 50 + } + } + } +} + diff --git a/nomad/metrics/metrics.tf b/nomad/metrics/metrics.tf new file mode 100644 index 0000000..31c3379 --- /dev/null +++ b/nomad/metrics/metrics.tf @@ -0,0 +1,46 @@ +variable "base_hostname" { + type = string + description = "Base hostname to serve content from" + default = "dev.homelab" +} + +variable "consul_address" { + type = string + description = "address of consul server for dynamic scraping" +} + +resource "nomad_job" "exporters" { + hcl2 { + enabled = true + } + + jobspec = file("${path.module}/exporters.nomad") +} + +resource "nomad_job" "prometheus" { + hcl2 { + enabled = true + vars = { + "consul_address" = "${var.consul_address}", + } + } + + jobspec = file("${path.module}/prometheus.nomad") +} +# +# resource "nomad_job" "prometheus" { +# hcl2 { +# enabled = true +# } +# +# } +# +# resource "nomad_job" "grafana" { +# hcl2 { +# enabled = true +# vars = { +# "base_hostname" = "${var.base_hostname}", +# } +# } +# +# } diff --git a/nomad/metrics/prometheus.nomad b/nomad/metrics/prometheus.nomad new file mode 100644 index 0000000..3accaef --- /dev/null +++ b/nomad/metrics/prometheus.nomad @@ -0,0 +1,182 @@ +variable "consul_address" { + type = string + description = "Full address of Consul instance to get catalog from" + default = "http://127.0.0.1:5400" +} + +job "prometheus" { + datacenters = ["dc1"] + + group "prometheus" { + count = 1 + + network { + mode = "bridge" + + port "web" { + host_network = "nomad-bridge" + to = 9090 + } + } + + service { + port = "web" + + connect { + sidecar_service { + proxy { + local_service_port = 9090 + } + } + + sidecar_task { + resources { + cpu = 50 + memory = 50 + } + } + } + + check { + type = "http" + path = "/" + port = "web" + interval = "10s" + timeout = "10s" + } + + // TODO: Remove traefik tags + tags = [ + "traefik.enable=true", + "traefik.http.routers.prometheus.entrypoints=web,websecure", + "traefik.http.routers.prometheus.rule=Host(`prometheus.dev.homelab`)", + "traefik.http.routers.prometheus.tls=true", + ] + } + + task "prometheus" { + driver = "docker" + + config { + image = "prom/prometheus:v2.30.2" + ports = ["web"] + args = [ + "--config.file=/etc/prometheus/config/prometheus.yml", + "--storage.tsdb.path=/prometheus", + "--web.listen-address=0.0.0.0:9090", + "--web.console.libraries=/usr/share/prometheus/console_libraries", + "--web.console.templates=/usr/share/prometheus/consoles", + ] + + mount { + type = "bind" + target = "/etc/prometheus/config" + source = "local/config" + } + } + + template { + data = <