homelab-nomad/core/prometheus.nomad

170 lines
3.7 KiB
Plaintext
Raw Permalink Normal View History

job "prometheus" {
datacenters = ["dc1"]
group "prometheus" {
count = 1
network {
mode = "bridge"
port "web" {
%{~ if use_wesher ~}
host_network = "wesher"
%{~ endif ~}
to = 9090
}
2023-05-09 22:56:20 +00:00
port "pushgateway" {
%{~ if use_wesher ~}
2023-05-09 22:56:20 +00:00
host_network = "wesher"
%{~ endif ~}
2023-05-09 22:56:20 +00:00
static = 9091
}
}
2023-05-09 22:56:20 +00:00
ephemeral_disk {
migrate = true
sticky = true
}
2022-03-13 17:13:19 +00:00
service {
name = "prometheus"
2023-03-24 18:24:36 +00:00
provider = "nomad"
port = "web"
// TODO: Remove traefik tags
tags = [
"traefik.enable=true",
"traefik.http.routers.prometheus.entryPoints=websecure",
]
check {
type = "http"
path = "/-/healthy"
interval = "10s"
timeout = "3s"
check_restart {
limit = 3
grace = "5m"
}
}
}
2023-05-09 22:56:20 +00:00
service {
name = "pushgateway"
provider = "nomad"
port = "pushgateway"
check {
type = "http"
path = "/-/healthy"
interval = "10s"
timeout = "3s"
check_restart {
limit = 3
grace = "5m"
}
}
2023-05-09 22:56:20 +00:00
}
task "prometheus" {
driver = "docker"
config {
image = "prom/prometheus:v2.43.0"
ports = ["web"]
args = [
"--config.file=$${NOMAD_TASK_DIR}/prometheus.yml",
"--storage.tsdb.path=$${NOMAD_ALLOC_DIR}/data/tsdb",
"--web.listen-address=0.0.0.0:9090",
"--web.console.libraries=/usr/share/prometheus/console_libraries",
"--web.console.templates=/usr/share/prometheus/consoles",
]
}
template {
data = <<EOF
---
global:
scrape_interval: 30s
evaluation_interval: 3s
scrape_configs:
- job_name: prometheus
static_configs:
- targets:
2023-05-09 22:56:20 +00:00
- 127.0.0.1:9090
- job_name: "pushgateway"
honor_labels: true
static_configs:
- targets:
- 127.0.0.1:9091
- job_name: "nomad_client"
metrics_path: "/v1/metrics"
params:
format:
- "prometheus"
2023-03-25 05:22:11 +00:00
nomad_sd_configs:
2023-03-27 22:50:15 +00:00
# TODO: Use NOMAD_SECRETS_DIR/api.sock and workload idenity when
# workload acls can be set using terraform
2023-03-25 05:22:11 +00:00
- server: "http://{{env "attr.unique.network.ip-address"}}:4646"
relabel_configs:
2023-03-25 05:22:11 +00:00
- source_labels: [__meta_nomad_service]
regex: nomad-client-stalker
action: keep
- source_labels: [__meta_nomad_address]
replacement: "$1:4646"
target_label: __address__
2022-04-13 21:01:14 +00:00
2023-03-24 18:24:36 +00:00
- job_name: "nomad_services"
metrics_path: "/metrics"
nomad_sd_configs:
- server: "http://{{env "attr.unique.network.ip-address"}}:4646"
2023-03-24 18:24:36 +00:00
relabel_configs:
- source_labels: [__meta_nomad_tags]
regex: .*(prometheus.scrape).*
action: keep
- source_labels: [__meta_nomad_service_address,__meta_nomad_service_port]
separator: ":"
2023-03-24 18:24:36 +00:00
target_label: __address__
- source_labels: [__meta_nomad_service]
target_label: nomad_service
- source_labels: [__meta_nomad_dc]
target_label: nomad_dc
- source_labels: [__meta_nomad_node_id]
target_label: nomad_node_id
EOF
change_mode = "signal"
change_signal = "SIGHUP"
destination = "$${NOMAD_TASK_DIR}/prometheus.yml"
}
resources {
cpu = 100
2022-07-27 23:11:56 +00:00
memory = 300
}
}
2023-05-09 22:56:20 +00:00
task "pushgateway" {
driver = "docker"
config {
image = "prom/pushgateway"
ports = ["pushgateway"]
args = [
"--persistence.file=$${NOMAD_ALLOC_DIR}/pushgateway-persistence",
2023-05-09 22:56:20 +00:00
]
}
resources {
cpu = 50
memory = 50
}
}
}
}