Lots of Nomad updates to support metrics

This commit is contained in:
IamTheFij 2022-03-03 09:37:49 -08:00
parent 6110e78edf
commit cacabec505
7 changed files with 536 additions and 20 deletions

View File

@ -0,0 +1,20 @@
# This file is maintained automatically by "terraform init".
# Manual edits may be lost in future updates.
provider "registry.terraform.io/hashicorp/nomad" {
version = "1.4.16"
hashes = [
"h1:tyfjD/maKzb0RxxD9KWgLnkJu9lnYziYsQgGw85Giz8=",
"zh:0d4fbb7030d9caac3b123e60afa44f50c83cc2a983e1866aec7f30414abe7b0e",
"zh:0db080228e07c72d6d8ca8c45249d6f97cd0189fce82a77abbdcd49a52e57572",
"zh:0df88393271078533a217654b96f0672c60eb59570d72e6aefcb839eea87a7a0",
"zh:2883b335bb6044b0db6a00e602d6926c047c7f330294a73a90d089f98b24d084",
"zh:390158d928009a041b3a182bdd82376b50530805ae92be2b84ed7c3b0fa902a0",
"zh:7169b8f8df4b8e9659c49043848fd5f7f8473d0471f67815e8b04980f827f5ef",
"zh:9417ee1383b1edd137024882d7035be4dca51fb4f725ca00ed87729086ec1755",
"zh:a22910b5a29eeab5610350700b4899267c1b09b66cf21f7e4d06afc61d425800",
"zh:a6185c9cd7aa458cd81861058ba568b6411fbac344373a20155e20256f4a7557",
"zh:b6260ca9f034df1b47905b4e2a9c33b67dbf77224a694d5b10fb09ae92ffad4c",
"zh:d87c12a6a7768f2b6c2a59495c7dc00f9ecc52b1b868331d4c284f791e278a1e",
]
}

View File

@ -0,0 +1,225 @@
job "metrics" {
datacenters = ["dc1"]
type = "system"
group "exporters" {
network {
mode = "bridge"
port "cadvisor" {
host_network = "nomad-bridge"
to = 8080
}
port "node_exporter" {
host_network = "nomad-bridge"
to = 9100
}
port "promtail" {
host_network = "nomad-bridge"
to = 9080
}
port "expose" {
host_network = "nomad-bridge"
}
port "cadvisor_envoy_metrics" {
host_network = "nomad-bridge"
to = 9102
}
}
service {
name = "cadvisor"
port = "cadvisor"
meta {
metrics_addr = "${NOMAD_ADDR_expose}"
envoy_metrics_addr = "${NOMAD_ADDR_cadvisor_envoy_metrics}"
nomad_dc = "${NOMAD_DC}"
nomad_node_name = "${node.unique.name}"
}
connect {
sidecar_service {
proxy {
local_service_port = 8080
expose {
path {
path = "/metrics"
protocol = "http"
local_path_port = 8080
listener_port = "expose"
}
}
config {
envoy_prometheus_bind_addr = "0.0.0.0:9102"
}
}
}
sidecar_task {
resources {
cpu = 50
memory = 50
}
}
}
check {
type = "http"
path = "/metrics"
port = "cadvisor"
interval = "10s"
timeout = "10s"
}
// TODO: Remove traefik tags
tags = [
"traefik.enable=true",
"traefik.http.routers.cadvisor.entrypoints=web,websecure",
"traefik.http.routers.cadvisor.rule=Host(`cadvisor.dev.homelab`)",
"traefik.http.routers.cadvisor.tls=true",
]
}
task "cadvisor" {
driver = "docker"
config {
# image = "iamthefij/cadvisor:0.37.5"
image = "gcr.io/cadvisor/cadvisor:v0.39.3"
args = ["--docker_only=true"]
ports = ["cadvisor"]
# volumes = [
# "/:/rootfs:ro",
# "/var/run:/var/run:rw",
# "/sys:/sys:ro",
# "/var/lib/docker/:/var/lib/docker:ro",
# "/cgroup:/cgroup:ro",
# "/etc/machine-id:/etc/machine-id:ro",
# ]
mount {
type = "bind"
source = "/"
target = "/rootfs"
readonly = true
}
mount {
type = "bind"
source = "/var/run"
target = "/var/run"
readonly = false
}
mount {
type = "bind"
source = "/sys"
target = "/sys"
readonly = true
}
mount {
type = "bind"
source = "/var/lib/docker"
target = "/var/lib/docker"
readonly = true
}
# mount {
# type = "bind"
# source = "/cgroup"
# target = "/cgroup"
# readonly = true
# }
mount {
type = "bind"
source = "/etc/machine-id"
target = "/etc/machine-id"
readonly = true
}
}
resources {
cpu = 50
memory = 100
}
}
service {
name = "nodeexporter"
port = "node_exporter"
meta {
metrics_addr = "${NOMAD_ADDR_node_exporter}"
nomad_dc = "${NOMAD_DC}"
nomad_node_name = "${node.unique.name}"
}
connect {
sidecar_service {
proxy {
local_service_port = 9100
}
}
sidecar_task {
resources {
cpu = 50
memory = 50
}
}
}
check {
type = "http"
path = "/metrics"
port = "node_exporter"
interval = "10s"
timeout = "10s"
}
// TODO: Remove traefik tags
tags = [
"traefik.enable=true",
"traefik.http.routers.node_exporter.entrypoints=web,websecure",
"traefik.http.routers.node_exporter.rule=Host(`node_exporter.dev.homelab`)",
"traefik.http.routers.node_exporter.tls=true",
]
}
task "node_exporter" {
driver = "docker"
config {
image = "prom/node-exporter:v1.0.1"
args = ["--path.rootfs", "/host"]
ports = ["node_exporter"]
mount {
type = "bind"
source = "/"
target = "/host"
readonly = true
}
}
resources {
cpu = 50
memory = 50
}
}
}
}

46
nomad/metrics/metrics.tf Normal file
View File

@ -0,0 +1,46 @@
variable "base_hostname" {
type = string
description = "Base hostname to serve content from"
default = "dev.homelab"
}
variable "consul_address" {
type = string
description = "address of consul server for dynamic scraping"
}
resource "nomad_job" "exporters" {
hcl2 {
enabled = true
}
jobspec = file("${path.module}/exporters.nomad")
}
resource "nomad_job" "prometheus" {
hcl2 {
enabled = true
vars = {
"consul_address" = "${var.consul_address}",
}
}
jobspec = file("${path.module}/prometheus.nomad")
}
#
# resource "nomad_job" "prometheus" {
# hcl2 {
# enabled = true
# }
#
# }
#
# resource "nomad_job" "grafana" {
# hcl2 {
# enabled = true
# vars = {
# "base_hostname" = "${var.base_hostname}",
# }
# }
#
# }

View File

@ -0,0 +1,182 @@
variable "consul_address" {
type = string
description = "Full address of Consul instance to get catalog from"
default = "http://127.0.0.1:5400"
}
job "prometheus" {
datacenters = ["dc1"]
group "prometheus" {
count = 1
network {
mode = "bridge"
port "web" {
host_network = "nomad-bridge"
to = 9090
}
}
service {
port = "web"
connect {
sidecar_service {
proxy {
local_service_port = 9090
}
}
sidecar_task {
resources {
cpu = 50
memory = 50
}
}
}
check {
type = "http"
path = "/"
port = "web"
interval = "10s"
timeout = "10s"
}
// TODO: Remove traefik tags
tags = [
"traefik.enable=true",
"traefik.http.routers.prometheus.entrypoints=web,websecure",
"traefik.http.routers.prometheus.rule=Host(`prometheus.dev.homelab`)",
"traefik.http.routers.prometheus.tls=true",
]
}
task "prometheus" {
driver = "docker"
config {
image = "prom/prometheus:v2.30.2"
ports = ["web"]
args = [
"--config.file=/etc/prometheus/config/prometheus.yml",
"--storage.tsdb.path=/prometheus",
"--web.listen-address=0.0.0.0:9090",
"--web.console.libraries=/usr/share/prometheus/console_libraries",
"--web.console.templates=/usr/share/prometheus/consoles",
]
mount {
type = "bind"
target = "/etc/prometheus/config"
source = "local/config"
}
}
template {
data = <<EOF
---
global:
scrape_interval: 30s
evaluation_interval: 3s
scrape_configs:
- job_name: prometheus
static_configs:
- targets:
- 0.0.0.0:9090
- job_name: "nomad_server"
metrics_path: "/v1/metrics"
params:
format:
- "prometheus"
consul_sd_configs:
- server: "${var.consul_address}"
services:
- "nomad"
tags:
- "http"
- job_name: "nomad_client"
metrics_path: "/v1/metrics"
params:
format:
- "prometheus"
consul_sd_configs:
- server: "${var.consul_address}"
services:
- "nomad-client"
- job_name: "consul"
metrics_path: "/v1/agent/metrics"
params:
format:
- "prometheus"
consul_sd_configs:
- server: "${var.consul_address}"
services:
- "consul"
relabel_configs:
- source_labels: [__meta_consul_address]
replacement: $1:8500
target_label: __address__
- job_name: "exporters"
metrics_path: "/metrics"
consul_sd_configs:
- server: "${var.consul_address}"
services:
- "cadvisor"
- "nodeexporter"
relabel_configs:
- source_labels: [__meta_consul_service_metadata_metrics_addr]
action: keep
regex: (.+)
- source_labels: [__meta_consul_service_metadata_metrics_addr]
target_label: __address__
- source_labels: [__meta_consul_service]
target_label: consul_service
- source_labels: [__meta_consul_node]
target_label: consul_node
- source_labels: [__meta_consul_service_nomad_dc]
target_label: nomad_dc
- source_labels: [__meta_consul_service_nomad_node_name]
target_label: nomad_node_name
- job_name: "envoy"
metrics_path: "/metrics"
consul_sd_configs:
- server: "${var.consul_address}"
relabel_configs:
- source_labels: [__meta_consul_service]
action: drop
regex: (.+)-sidecar-proxy
- source_labels: [__meta_consul_service_metadata_envoy_metrics_addr]
action: keep
regex: (.+)
- source_labels: [__meta_consul_service_metadata_envoy_metrics_addr]
target_label: __address__
- source_labels: [__meta_consul_service]
target_label: consul_service
- source_labels: [__meta_consul_node]
target_label: consul_node
- source_labels: [__meta_consul_service_nomad_dc]
target_label: nomad_dc
- source_labels: [__meta_consul_service_nomad_node_name]
target_label: nomad_node_name
EOF
change_mode = "signal"
change_signal = "SIGHUP"
destination = "local/config/prometheus.yml"
}
resources {
cpu = 100
memory = 200
}
}
}
}

View File

@ -32,11 +32,11 @@ provider "nomad" {
# Define services as modules
module "mysql-server" {
source = "./mysql"
base_hostname = var.base_hostname
}
# module "mysql-server" {
# source = "./mysql"
#
# base_hostname = var.base_hostname
# }
module "blocky" {
source = "./blocky"
@ -51,14 +51,21 @@ module "traefik" {
base_hostname = var.base_hostname
}
module "nextcloud" {
source = "./nextcloud"
module "metrics" {
source = "./metrics"
consul_address = var.consul_address
base_hostname = var.base_hostname
depends_on = [module.mysql-server]
}
# module "nextcloud" {
# source = "./nextcloud"
#
# base_hostname = var.base_hostname
#
# depends_on = [module.mysql-server]
# }
resource "nomad_job" "whoami" {
hcl2 {
enabled = true

View File

@ -19,6 +19,12 @@
consul_ports_grpc: 8502
consul_client_address: "0.0.0.0"
# Enable metrics
consul_config_custom:
telemetry:
prometheus_retention_time: "2h"
tasks:
- name: Start Consul
systemd:
@ -26,6 +32,7 @@
name: consul
- name: Add values
# TODO: This can be run from localhost by using an address
block:
- name: Install pip
pip:
@ -35,6 +42,7 @@
consul_kv:
key: ansible_test
value: Hello from Ansible!
run_once: true
- name: Build Nomad cluster
@ -49,28 +57,55 @@
nomad_install_upgrade: true
nomad_allow_purge_config: true
nomad_encrypt_enable: true
# nomad_use_consul: true
nomad_cni_enable: true
nomad_cni_version: 1.0.1
nomad_docker_enable: true
nomad_docker_dmsetup: false
nomad_bind_address: 0.0.0.0
# Properly map install arch
nomad_architecture_map:
x86_64: amd64
armhfv6: arm
armv7l: arm
nomad_encrypt_enable: true
# nomad_use_consul: true
# Metrics
nomad_telemetry: true
nomad_telemetry_prometheus_metrics: true
nomad_telemetry_publish_allocation_metrics: true
nomad_telemetry_publish_node_metrics: true
# Enable container plugins
nomad_cni_enable: true
nomad_cni_version: 1.0.1
nomad_docker_enable: true
nomad_docker_dmsetup: false
# Customize docker plugin
nomad_plugins:
docker:
config:
volumes:
enabled: true
selinuxlabel: "z"
extra_labels:
- "job_name"
- "job_id"
- "task_group_name"
- "task_name"
- "namespace"
- "node_name"
- "node_id"
# Bind nomad
nomad_bind_address: 0.0.0.0
# Create networks for binding task ports
nomad_host_networks:
- name: nomad-bridge
# cidr: 172.26.64.0/20
interface: nomad
reserved_ports: "22"
- name: loopback
interface: lo
reserved_ports: "22"
tasks:
- name: Start Nomad
systemd:

View File

@ -5,7 +5,8 @@ variable "base_hostname" {
}
variable "consul_address" {
type = string
type = string
description = "address of consul server for dynamic routes"
}
resource "nomad_job" "traefik" {