diff --git a/nomad/metrics/grafana.nomad b/nomad/metrics/grafana.nomad index 97a8477..deb2609 100644 --- a/nomad/metrics/grafana.nomad +++ b/nomad/metrics/grafana.nomad @@ -4,6 +4,8 @@ job "grafana" { group "grafana" { count = 1 + # TODO: Add backup task or job + network { mode = "bridge" @@ -64,14 +66,8 @@ job "grafana" { mount { type = "bind" - target = "/etc/grafana/grafana.ini" - source = "local/config/grafana.ini" - } - - mount { - type = "bind" - target = "/etc/grafana/provisioning" - source = "local/config/provisioning" + target = "/etc/grafana" + source = "local/config" } } @@ -80,42 +76,21 @@ job "grafana" { "GF_INSTALL_PLUGINS" = "grafana-clock-panel,grafana-piechart-panel,grafana-polystat-panel", } + %{ for config_file in fileset(join("/", [module_path, "grafana"]), "**") ~} template { data = < 0 ~} + left_delimiter = "<<<<" + right_delimiter = ">>>>" + %{ endif ~} } + %{ endfor ~} resources { cpu = 100 diff --git a/nomad/metrics/grafana/grafana.ini b/nomad/metrics/grafana/grafana.ini new file mode 100644 index 0000000..f04184f --- /dev/null +++ b/nomad/metrics/grafana/grafana.ini @@ -0,0 +1,458 @@ +##################### Grafana Configuration Example ##################### +# +# Everything has defaults so you only need to uncomment things you want to +# change + +# possible values : production, development +;app_mode = production + +# instance name, defaults to HOSTNAME environment variable value or hostname if HOSTNAME var is empty +;instance_name = ${HOSTNAME} + +#################################### Paths #################################### +[paths] +# Path to where grafana can store temp files, sessions, and the sqlite3 db (if that is used) +data = /var/lib/grafana + +# Directory where grafana can store logs +;logs = /var/log/grafana + +# Directory where grafana will automatically scan and look for plugins +;plugins = /var/lib/grafana/plugins + +# folder that contains provisioning config files that grafana will apply on startup and while running. +provisioning = /etc/grafana/provisioning + +#################################### Server #################################### +[server] +# Protocol (http, https, socket) +;protocol = http + +# The ip address to bind to, empty will bind to all interfaces +;http_addr = + +# The http port to use +;http_port = 3000 + +# The public facing domain name used to access grafana from a browser +;domain = localhost + +# Redirect to correct domain if host header does not match domain +# Prevents DNS rebinding attacks +;enforce_domain = false + +# The full public facing url you use in browser, used for redirects and emails +# If you use reverse proxy and sub path specify full url (with sub path) +root_url = https://grafana.thefij.rocks + +# Log web requests +;router_logging = false + +# the path relative working path +;static_root_path = public + +# enable gzip +;enable_gzip = false + +# https certs & key file +;cert_file = +;cert_key = + +# Unix socket path +;socket = + +#################################### Database #################################### +[database] +# You can configure the database connection by specifying type, host, name, user and password +# as separate properties or as on string using the url properties. + +# Either "mysql", "postgres" or "sqlite3", it's your choice +;type = sqlite3 +;host = 127.0.0.1:3306 +;name = grafana +;user = root +# If the password contains # or ; you have to wrap it with triple quotes. Ex """#password;""" +;password = + +# Use either URL or the previous fields to configure the database +# Example: mysql://user:secret@host:port/database +;url = + +# For "postgres" only, either "disable", "require" or "verify-full" +;ssl_mode = disable + +# For "sqlite3" only, path relative to data_path setting +;path = grafana.db + +# Max idle conn setting default is 2 +;max_idle_conn = 2 + +# Max conn setting default is 0 (mean not set) +;max_open_conn = + +# Connection Max Lifetime default is 14400 (means 14400 seconds or 4 hours) +;conn_max_lifetime = 14400 + +# Set to true to log the sql calls and execution times. +log_queries = + +#################################### Session #################################### +[session] +# Either "memory", "file", "redis", "mysql", "postgres", default is "file" +;provider = file + +# Provider config options +# memory: not have any config yet +# file: session dir path, is relative to grafana data_path +# redis: config like redis server e.g. `addr=127.0.0.1:6379,pool_size=100,db=grafana` +# mysql: go-sql-driver/mysql dsn config string, e.g. `user:password@tcp(127.0.0.1:3306)/database_name` +# postgres: user=a password=b host=localhost port=5432 dbname=c sslmode=disable +;provider_config = sessions + +# Session cookie name +;cookie_name = grafana_sess + +# If you use session in https only, default is false +;cookie_secure = false + +# Session life time, default is 86400 +;session_life_time = 86400 + +#################################### Data proxy ########################### +[dataproxy] + +# This enables data proxy logging, default is false +;logging = false + +#################################### Analytics #################################### +[analytics] +# Server reporting, sends usage counters to stats.grafana.org every 24 hours. +# No ip addresses are being tracked, only simple counters to track +# running instances, dashboard and error counts. It is very helpful to us. +# Change this option to false to disable reporting. +;reporting_enabled = true + +# Set to false to disable all checks to https://grafana.net +# for new vesions (grafana itself and plugins), check is used +# in some UI views to notify that grafana or plugin update exists +# This option does not cause any auto updates, nor send any information +# only a GET request to http://grafana.com to get latest versions +;check_for_updates = true + +# Google Analytics universal tracking code, only enabled if you specify an id here +;google_analytics_ua_id = + +#################################### Security #################################### +[security] +# default admin user, created on startup +;admin_user = admin + +# default admin password, can be changed before first start of grafana, or in profile settings +;admin_password = admin + +# used for signing +;secret_key = SW2YcwTIb9zpOOhoPsMm + +# Auto-login remember days +;login_remember_days = 7 +;cookie_username = grafana_user +;cookie_remember_name = grafana_remember + +# disable gravatar profile images +;disable_gravatar = false + +# data source proxy whitelist (ip_or_domain:port separated by spaces) +;data_source_proxy_whitelist = + +# disable protection against brute force login attempts +;disable_brute_force_login_protection = false + +#################################### Snapshots ########################### +[snapshots] +# snapshot sharing options +;external_enabled = true +;external_snapshot_url = https://snapshots-origin.raintank.io +;external_snapshot_name = Publish to snapshot.raintank.io + +# remove expired snapshot +;snapshot_remove_expired = true + +#################################### Dashboards History ################## +[dashboards] +# Number dashboard versions to keep (per dashboard). Default: 20, Minimum: 1 +;versions_to_keep = 20 + +#################################### Users ############################### +[users] +# disable user signup / registration +;allow_sign_up = true + +# Allow non admin users to create organizations +;allow_org_create = true + +# Set to true to automatically assign new users to the default organization (id 1) +;auto_assign_org = true + +# Default role new users will be automatically assigned (if disabled above is set to true) +;auto_assign_org_role = Viewer + +# Background text for the user field on the login page +;login_hint = email or username + +# Default UI theme ("dark" or "light") +;default_theme = dark + +# External user management, these options affect the organization users view +;external_manage_link_url = +;external_manage_link_name = +;external_manage_info = + +# Viewers can edit/inspect dashboard settings in the browser. But not save the dashboard. +;viewers_can_edit = false + +[auth] +# Set to true to disable (hide) the login form, useful if you use OAuth, defaults to false +;disable_login_form = false + +# Set to true to disable the signout link in the side menu. useful if you use auth.proxy, defaults to false +;disable_signout_menu = false + +# URL to redirect the user to after sign out +;signout_redirect_url = + +#################################### Anonymous Auth ########################## +[auth.anonymous] +# enable anonymous access +;enabled = false + +# specify organization name that should be used for unauthenticated users +;org_name = Main Org. + +# specify role for unauthenticated users +;org_role = Viewer + +#################################### Github Auth ########################## +[auth.github] +;enabled = false +;allow_sign_up = true +;client_id = some_id +;client_secret = some_secret +;scopes = user:email,read:org +;auth_url = https://github.com/login/oauth/authorize +;token_url = https://github.com/login/oauth/access_token +;api_url = https://api.github.com/user +;team_ids = +;allowed_organizations = + +#################################### Google Auth ########################## +[auth.google] +;enabled = false +;allow_sign_up = true +;client_id = some_client_id +;client_secret = some_client_secret +;scopes = https://www.googleapis.com/auth/userinfo.profile https://www.googleapis.com/auth/userinfo.email +;auth_url = https://accounts.google.com/o/oauth2/auth +;token_url = https://accounts.google.com/o/oauth2/token +;api_url = https://www.googleapis.com/oauth2/v1/userinfo +;allowed_domains = + +#################################### Generic OAuth ########################## +[auth.generic_oauth] +;enabled = true +;name = Cloudron +;allow_sign_up = true +;client_id = some_id +;client_secret = some_secret +;scopes = user:email,read:org +;auth_url = https://foo.bar/login/oauth/authorize +;token_url = https://foo.bar/login/oauth/access_token +;api_url = https://foo.bar/user +;team_ids = +;allowed_organizations = + +#################################### Grafana.com Auth #################### +[auth.grafana_com] +;enabled = false +;allow_sign_up = true +;client_id = some_id +;client_secret = some_secret +;scopes = user:email +;allowed_organizations = + +#################################### Auth Proxy ########################## +[auth.proxy] +enabled = true +header_name = X-WEBAUTH-USER +header_property = username +auto_sign_up = true +whitelist = 192.168.2.20 + +#################################### Basic Auth ########################## +[auth.basic] +;enabled = true + +#################################### Auth LDAP ########################## +[auth.ldap] +;enabled = false +;config_file = /etc/grafana/ldap.toml +;allow_sign_up = true + +#################################### SMTP / Emailing ########################## +[smtp] +enabled = true +host = smtp.mailgun.org:587 +user = from_env +# If the password contains # or ; you have to wrap it with trippel quotes. Ex """#password;""" +password = from_env +;cert_file = +;key_file = +;skip_verify = false +from_address = grafana@iamthefij.com +from_name = Grafana +# EHLO identity in SMTP dialog (defaults to instance_name) +;ehlo_identity = dashboard.example.com + +[emails] +;welcome_email_on_sign_up = false + +#################################### Logging ########################## +[log] +# Either "console", "file", "syslog". Default is console and file +# Use space to separate multiple modes, e.g. "console file" +;mode = console file + +# Either "debug", "info", "warn", "error", "critical", default is "info" +;level = info + +# optional settings to set different levels for specific loggers. Ex filters = sqlstore:debug +;filters = + +# For "console" mode only +[log.console] +;level = + +# log line format, valid options are text, console and json +;format = console + +# For "file" mode only +[log.file] +;level = + +# log line format, valid options are text, console and json +;format = text + +# This enables automated log rotate(switch of following options), default is true +;log_rotate = true + +# Max line number of single file, default is 1000000 +;max_lines = 1000000 + +# Segment log daily, default is true +;daily_rotate = true + +# Expired days of log file(delete after max days), default is 7 +;max_days = 7 + +[log.syslog] +;level = + +# log line format, valid options are text, console and json +;format = text + +# Syslog network type and address. This can be udp, tcp, or unix. If left blank, the default unix endpoints will be used. +;network = +;address = + +# Syslog facility. user, daemon and local0 through local7 are valid. +;facility = + +# Syslog tag. By default, the process' argv[0] is used. +;tag = + +#################################### Alerting ############################ +[alerting] +# Disable alerting engine & UI features +;enabled = true +# Makes it possible to turn off alert rule execution but alerting UI is visible +;execute_alerts = true + +#################################### Explore ############################# +[explore] +# Enable the Explore section +;enabled = false + +#################################### Internal Grafana Metrics ########################## +# Metrics available at HTTP API Url /metrics +[metrics] +# Disable / Enable internal metrics +enabled = true + +# Publish interval +;interval_seconds = 10 + +# Send internal metrics to Graphite +[metrics.graphite] +# Enable by setting the address setting (ex localhost:2003) +;address = +;prefix = prod.grafana.%(instance_name)s. + +#################################### Distributed tracing ############ +[tracing.jaeger] +# Enable by setting the address sending traces to jaeger (ex localhost:6831) +;address = localhost:6831 +# Tag that will always be included in when creating new spans. ex (tag1:value1,tag2:value2) +;always_included_tag = tag1:value1 +# Type specifies the type of the sampler: const, probabilistic, rateLimiting, or remote +;sampler_type = const +# jaeger samplerconfig param +# for "const" sampler, 0 or 1 for always false/true respectively +# for "probabilistic" sampler, a probability between 0 and 1 +# for "rateLimiting" sampler, the number of spans per second +# for "remote" sampler, param is the same as for "probabilistic" +# and indicates the initial sampling rate before the actual one +# is received from the mothership +;sampler_param = 1 + +#################################### Grafana.com integration ########################## +# Url used to to import dashboards directly from Grafana.com +[grafana_com] +;url = https://grafana.com + +#################################### External image storage ########################## +[external_image_storage] +# Used for uploading images to public servers so they can be included in slack/email messages. +# you can choose between (s3, webdav, gcs, azure_blob, local) +provider = s3 + +[external_image_storage.s3] +endpoint = https://minio.thefij.rocks +bucket = grafana-images +region = us-east-1 +path_style_access = true +;path = +;access_key = +;secret_key = + +[external_image_storage.webdav] +;url = +;public_url = +;username = +;password = + +[external_image_storage.gcs] +;key_file = +;bucket = +;path = + +[external_image_storage.azure_blob] +;account_name = +;account_key = +;container_name = + +[external_image_storage.local] +# does not require any configuration + +[rendering] +server_url = http://renderer:8081/render +callback_url = http://grafana:3000/ +concurrent_render_request_limit = 3 diff --git a/nomad/metrics/grafana/provisioning/dashboards/default/docker-and-host-monitoring.json b/nomad/metrics/grafana/provisioning/dashboards/default/docker-and-host-monitoring.json new file mode 100644 index 0000000..9688dee --- /dev/null +++ b/nomad/metrics/grafana/provisioning/dashboards/default/docker-and-host-monitoring.json @@ -0,0 +1,1582 @@ +{ + "__inputs": [ + { + "name": "Prometheus", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "5.2.2" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "5.0.0" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "5.0.0" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "5.0.0" + } + ], + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Docker Monitoring Template", + "editable": true, + "gnetId": 179, + "graphTooltip": 1, + "id": null, + "iteration": 1533037835187, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 17, + "panels": [], + "title": "Host Info", + "type": "row" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "Prometheus", + "format": "s", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 1 + }, + "id": 15, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "time() - process_start_time_seconds{job=\"prometheus\"}", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "", + "title": "Uptime", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "Prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 5, + "x": 6, + "y": 1 + }, + "id": 13, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(ALERTS)", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "0,1", + "title": "Alerts", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "0" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorValue": false, + "colors": [ + "#d44a3a", + "rgba(237, 129, 40, 0.89)", + "#299c46" + ], + "datasource": "Prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 11, + "y": 1 + }, + "id": 11, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(up)", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "0,1", + "title": "Targets Online", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#d44a3a", + "rgba(237, 129, 40, 0.89)", + "#299c46" + ], + "datasource": "Prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 15, + "y": 1 + }, + "id": 31, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(rate(container_last_seen{job=\"cadvisor\", name!=\"\"}[5m]))", + "format": "time_series", + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "0,1", + "title": "Running Containers", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "Prometheus", + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 0, + "y": 8 + }, + "id": 4, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "(sum(node_memory_MemTotal_bytes) - sum(node_memory_MemFree_bytes +node_memory_Buffers_bytes + node_memory_Cached_bytes) ) / sum(node_memory_MemTotal_bytes) * 100", + "format": "time_series", + "interval": "10s", + "intervalFactor": 1, + "refId": "A", + "step": 10 + } + ], + "thresholds": "65, 90", + "title": "Memory usage", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "Prometheus", + "decimals": 2, + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 6, + "x": 6, + "y": 8 + }, + "id": 6, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(sum by (container_name)( rate(container_cpu_usage_seconds_total[1m] ) )) / count(node_cpu_seconds_total{mode=\"system\"}) * 100", + "format": "time_series", + "interval": "1m", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A", + "step": 10 + } + ], + "thresholds": "65, 90", + "title": "CPU usage", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "Prometheus", + "decimals": 2, + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 7, + "x": 12, + "y": 8 + }, + "id": 7, + "interval": null, + "isNew": true, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (container_fs_limit_bytes - container_fs_usage_bytes) / sum(container_fs_limit_bytes)", + "interval": "10s", + "intervalFactor": 1, + "metric": "", + "refId": "A", + "step": 10 + } + ], + "thresholds": "65, 90", + "title": "Filesystem usage", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "aliasColors": { + "RECEIVE": "#ea6460", + "SENT": "#1f78c1", + "TRANSMIT": "#1f78c1" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 4, + "gridPos": { + "h": 9, + "w": 6, + "x": 0, + "y": 14 + }, + "id": 25, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_network_receive_bytes_total{id=\"/\"}[$interval])) by (id)", + "format": "time_series", + "interval": "2m", + "intervalFactor": 2, + "legendFormat": "RECEIVE", + "refId": "A" + }, + { + "expr": "- sum(rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])) by (id)", + "format": "time_series", + "interval": "2m", + "intervalFactor": 2, + "legendFormat": "TRANSMIT", + "refId": "B" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Node Network Traffic", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Available Memory": "#508642", + "Used Memory": "#bf1b00" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 3, + "gridPos": { + "h": 9, + "w": 6, + "x": 6, + "y": 14 + }, + "id": 27, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(node_memory_MemTotal_bytes) - sum(node_memory_MemAvailable_bytes)", + "format": "time_series", + "interval": "2m", + "intervalFactor": 2, + "legendFormat": "Used Memory", + "refId": "B" + }, + { + "expr": "sum(node_memory_MemAvailable_bytes)", + "format": "time_series", + "interval": "2m", + "intervalFactor": 2, + "legendFormat": "Available Memory", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Node Mermory", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "Available Memory": "#508642", + "Free Storage": "#447ebc", + "Total Storage Available": "#508642", + "Used Memory": "#bf1b00", + "Used Storage": "#bf1b00" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 3, + "gridPos": { + "h": 9, + "w": 7, + "x": 12, + "y": 14 + }, + "id": 28, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(node_filesystem_free_bytes {job=\"node-exporter\", instance=~\".*9100\", device=~\"/dev/.*\", mountpoint!=\"/var/lib/docker/aufs\"}) ", + "format": "time_series", + "interval": "2m", + "intervalFactor": 2, + "legendFormat": "Free Storage", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Filesystem Available", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": null, + "format": "decbytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "s", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 23 + }, + "id": 19, + "panels": [], + "repeat": null, + "title": "Container Performance", + "type": "row" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 3, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 10, + "w": 6, + "x": 0, + "y": 24 + }, + "id": 3, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "container_cpu_load_average_10s{image!=\"\"}", + "format": "time_series", + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "{{ name }}", + "metric": "container_cpu_user_seconds_total", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Container CPU usage", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 10, + "w": 6, + "x": 6, + "y": 24 + }, + "id": 2, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "container_memory_max_usage_bytes{image!=\"\"}", + "format": "time_series", + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "{{ name }}", + "metric": "container_memory_usage:sort_desc", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Container Memory Usage", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "columns": [], + "datasource": "Prometheus", + "fontSize": "100%", + "gridPos": { + "h": 13, + "w": 10, + "x": 12, + "y": 24 + }, + "id": 23, + "links": [], + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "ALERTS", + "format": "table", + "intervalFactor": 1, + "refId": "A" + } + ], + "title": "Alerts", + "transform": "table", + "type": "table" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 14, + "w": 6, + "x": 0, + "y": 34 + }, + "id": 8, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum by (name) (rate(container_network_receive_bytes_total{image!=\"\"}[1m] ) ))", + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "{{ name }}", + "metric": "container_network_receive_bytes_total", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Container Network Input", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "gridPos": { + "h": 14, + "w": 6, + "x": 6, + "y": 34 + }, + "id": 9, + "isNew": true, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum by (name) (rate(container_network_transmit_bytes_total{image!=\"\"}[1m] ) ))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ name }}", + "metric": "container_network_transmit_bytes_total", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Container Network Output", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "columns": [], + "datasource": "Prometheus", + "fontSize": "100%", + "gridPos": { + "h": 10, + "w": 10, + "x": 12, + "y": 37 + }, + "id": 30, + "links": [], + "pageSize": 10, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "link": false, + "linkUrl": "", + "pattern": "Time", + "type": "date" + }, + { + "alias": "", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "cadvisor_version_info", + "format": "table", + "instant": false, + "interval": "15m", + "intervalFactor": 2, + "legendFormat": "cAdvisor Version: {{cadvisorVersion}}", + "refId": "A" + }, + { + "expr": "prometheus_build_info", + "format": "table", + "interval": "15m", + "intervalFactor": 2, + "legendFormat": "Prometheus Version: {{version}}", + "refId": "B" + }, + { + "expr": "node_exporter_build_info", + "format": "table", + "interval": "15m", + "intervalFactor": 2, + "legendFormat": "Node-Exporter Version: {{version}}", + "refId": "C" + } + ], + "title": "Running Versions", + "transform": "table", + "type": "table" + } + ], + "refresh": "10s", + "schemaVersion": 16, + "style": "dark", + "tags": [ + "docker", + "prometheus", + "node-exporter", + "cadvisor" + ], + "templating": { + "list": [ + { + "auto": false, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "1m", + "value": "1m" + }, + "hide": 0, + "label": "interval", + "name": "interval", + "options": [ + { + "selected": true, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + }, + { + "selected": false, + "text": "7d", + "value": "7d" + }, + { + "selected": false, + "text": "14d", + "value": "14d" + }, + { + "selected": false, + "text": "30d", + "value": "30d" + } + ], + "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", + "refresh": 2, + "type": "interval" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Docker and Host Monitoring w/ Prometheus", + "uid": "64nrElFmk", + "version": 4 +} diff --git a/nomad/metrics/grafana/provisioning/dashboards/default/docker-and-system-monitoring-893-r5.json b/nomad/metrics/grafana/provisioning/dashboards/default/docker-and-system-monitoring-893-r5.json new file mode 100644 index 0000000..b5f93f9 --- /dev/null +++ b/nomad/metrics/grafana/provisioning/dashboards/default/docker-and-system-monitoring-893-r5.json @@ -0,0 +1,2268 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "A simple overview of the most important Docker host and container metrics. (cAdvisor/Prometheus)", + "editable": true, + "gnetId": 893, + "graphTooltip": 1, + "iteration": 1594769328554, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Prometheus", + "decimals": 0, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "s", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 0 + }, + "height": "", + "id": 24, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "30%", + "prefix": "", + "prefixFontSize": "20%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "time() - min(node_boot_time_seconds{scrape_host=~\"$server\"})", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 1800 + } + ], + "thresholds": "", + "title": "Uptime", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Prometheus", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 0 + }, + "id": 31, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(rate(container_last_seen{scrape_host=~\"$server\", name=~\".+\"}[$interval]))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 1800 + } + ], + "thresholds": "", + "title": "Containers", + "type": "singlestat", + "valueFontSize": "120%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "Prometheus", + "decimals": 1, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "percentunit", + "gauge": { + "maxValue": 1, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 8, + "y": 0 + }, + "id": 26, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max((node_filesystem_size_bytes{fstype=~\"xfs|ext4\",scrape_host=~\"$server\"} - node_filesystem_free_bytes{fstype=~\"xfs|ext4\",scrape_host=~\"$server\"} )/ node_filesystem_size_bytes{fstype=~\"xfs|ext4\",scrape_host=~\"$server\"})", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 1800 + } + ], + "thresholds": "0.75, 0.90", + "title": "Disk space", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "Prometheus", + "decimals": 0, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 12, + "y": 0 + }, + "id": 25, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max((node_memory_MemTotal_bytes{scrape_host=~\"$server\"} - node_memory_MemAvailable_bytes{scrape_host=~\"$server\"}) / node_memory_MemTotal_bytes{scrape_host=~\"$server\"}) * 100", + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 1800 + } + ], + "thresholds": "70, 90", + "title": "Memory", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "Prometheus", + "decimals": 0, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "decbytes", + "gauge": { + "maxValue": 500000000, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 16, + "y": 0 + }, + "id": 30, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(node_memory_SwapTotal_bytes{scrape_host=~\"$server\"} - node_memory_SwapFree_bytes{scrape_host=~\"$server\"})", + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 1800 + } + ], + "thresholds": "400000000", + "title": "Swap", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Prometheus", + "decimals": 0, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "format": "percentunit", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 20, + "y": 0 + }, + "id": 27, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(50, 189, 31, 0.18)", + "full": false, + "lineColor": "rgb(69, 193, 31)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "max(sum by(job, scrape_host)(node_load1{scrape_host=~\"$server\"}) / count by(job, scrape_host)(count by(job, scrape_host, cpu)(node_cpu_seconds_total{scrape_host=~\"$server\"})))", + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "A", + "step": 1800 + } + ], + "thresholds": "0.8,0.9", + "title": "Load", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": { + "SENT": "#BF1B00" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 6, + "w": 4, + "x": 0, + "y": 4 + }, + "hiddenSeries": false, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_network_receive_bytes_total{id=\"/\", scrape_host=~\"$server\"}[$interval])) by (id, scrape_host)", + "interval": "", + "intervalFactor": 2, + "legendFormat": "RECEIVED", + "refId": "A", + "step": 600 + }, + { + "expr": "- sum(rate(container_network_transmit_bytes_total{id=\"/\", scrape_host=~\"$server\"}[$interval])) by (id, scrape_host)", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "SENT", + "refId": "B", + "step": 600 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Network Traffic", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "{id=\"/\",instance=\"cadvisor:8080\",job=\"prometheus\"}": "#BA43A9" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 6, + "w": 4, + "x": 4, + "y": 4 + }, + "hiddenSeries": false, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "max by(scrape_host)(rate(process_cpu_seconds_total{job=\"node_exporter\", scrape_host=~\"$server\"}[$interval])) * 100", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ scrape_host }}", + "metric": "", + "refId": "C", + "step": 600 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:1706", + "format": "percentunit", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:1707", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 1.25 + ], + "type": "gt" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "5m", + "frequency": "60s", + "handler": 1, + "message": "Home", + "name": "Load alert", + "noDataState": "keep_state", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 0, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 6, + "w": 4, + "x": 8, + "y": 4 + }, + "hiddenSeries": false, + "id": 28, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "connected", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(job, scrape_host)(node_load1{scrape_host=~\"$server\"}) / (count by(job, scrape_host)(count by(job, scrape_host, cpu)(node_cpu_seconds_total{scrape_host=~\"$server\"})))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ scrape_host }}", + "refId": "A", + "step": 600 + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 1.25 + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Load", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": "1.50", + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 0.9 + ], + "type": "gt" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "0m", + "frequency": "60s", + "handler": 1, + "message": "Home", + "name": "Free/Used Disk Space alert", + "noDataState": "keep_state", + "notifications": [] + }, + "aliasColors": { + "Belegete Festplatte": "#BF1B00", + "Free Disk Space": "#7EB26D", + "Used Disk Space": "#7EB26D", + "{}": "#BF1B00" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 6, + "w": 4, + "x": 12, + "y": 4 + }, + "hiddenSeries": false, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "Used Disk Space", + "yaxis": 1 + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "(sum by(scrape_host)(node_filesystem_size_bytes{fstype=~\"xfs|ext4\", scrape_host=~\"$server\", job=\"node_exporter\"}) - sum by(scrape_host)(node_filesystem_free_bytes{fstype=~\"xfs|ext4\", scrape_host=~\"$server\", job=\"node_exporter\"})) / sum by (scrape_host)(node_filesystem_size_bytes{fstype=~\"xfs|ext4\",scrape_host=~\"$server\", job=\"node_exporter\"})", + "format": "time_series", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ scrape_host }}", + "refId": "A", + "step": 600 + } + ], + "thresholds": [ + { + "$$hashKey": "object:1537", + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0.9, + "yaxis": "left" + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Used Disk Space", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:1153", + "format": "percentunit", + "label": "", + "logBase": 1, + "max": "1", + "min": 0, + "show": true + }, + { + "$$hashKey": "object:1154", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "alert": { + "alertRuleTags": {}, + "conditions": [ + { + "evaluator": { + "params": [ + 0.9 + ], + "type": "gt" + }, + "query": { + "params": [ + "G", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "0m", + "frequency": "60s", + "handler": 1, + "message": "Home", + "name": "Memory Usage alert", + "noDataState": "keep_state", + "notifications": [] + }, + "aliasColors": { + "Available Memory": "#7EB26D", + "Unavailable Memory": "#7EB26D" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 6, + "w": 4, + "x": 16, + "y": 4 + }, + "hiddenSeries": false, + "id": 20, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_MemAvailable_bytes{scrape_host=~\"$server\"} / node_memory_MemTotal_bytes{scrape_host=~\"$server\"}", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ scrape_host }} memory", + "refId": "G", + "step": 600 + }, + { + "expr": "(node_memory_SwapTotal_bytes{scrape_host=~\"$server\"} - node_memory_SwapFree_bytes{scrape_host=~\"$server\"}) / node_memory_SwapTotal_bytes{scrape_host=~\"$server\"}", + "interval": "", + "legendFormat": "{{ scrape_host }} swap", + "refId": "A" + } + ], + "thresholds": [ + { + "$$hashKey": "object:1602", + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 0.9, + "yaxis": "left" + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:1243", + "format": "percentunit", + "label": "", + "logBase": 1, + "max": "1", + "min": 0, + "show": true + }, + { + "$$hashKey": "object:1244", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { + "IN on /sda": "#7EB26D", + "OUT on /sda": "#890F02" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 6, + "w": 4, + "x": 20, + "y": 4 + }, + "hiddenSeries": false, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "-sum(rate(node_disk_read_bytes_total{scrape_host=~\"$server\"}[$interval])) by (scrape_host, device)", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "OUT on {{scrape_host}}:/{{device}}", + "metric": "node_disk_read_bytes_total", + "refId": "A", + "step": 600 + }, + { + "expr": "sum(rate(node_disk_written_bytes_total{scrape_host=~\"$server\"}[$interval])) by (scrape_host, device)", + "interval": "", + "intervalFactor": 2, + "legendFormat": "IN on {{scrape_host}}:/{{device}}", + "metric": "", + "refId": "B", + "step": 600 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Disk I/O", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 10 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_network_receive_bytes_total{name=~\".+\", scrape_host=~\"$server\"}[$interval])) by (name, scrape_host)", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{name}}@{{scrape_host}}", + "refId": "A", + "step": 240 + }, + { + "expr": "- rate(container_network_transmit_bytes_total{name=~\".+\", scrape_host=~\"$server\"}[$interval])", + "hide": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "B", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Received Network Traffic per Container", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:2389", + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:2390", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 10 + }, + "hiddenSeries": false, + "id": 9, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_network_transmit_bytes_total{name=~\".+\", scrape_host=~\"$server\"}[$interval])) by (name, scrape_host)", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{name}}@{{scrape_host}}", + "refId": "A", + "step": 240 + }, + { + "expr": "rate(container_network_transmit_bytes_total{id=\"/\", scrape_host=~\"$server\"}[$interval])", + "hide": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Sent Network Traffic per Container", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:2545", + "format": "Bps", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:2546", + "format": "short", + "label": "", + "logBase": 10, + "max": 8, + "min": 0, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 5, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 17 + }, + "hiddenSeries": false, + "id": 1, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_cpu_usage_seconds_total{name=~\".+\", scrape_host=~\"$server\"}[$interval])) by (name, scrape_host) * 100", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{name}}@{{scrape_host}}", + "metric": "", + "refId": "F", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU Usage per Container", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:816", + "format": "percent", + "label": "", + "logBase": 1, + "max": null, + "show": true + }, + { + "$$hashKey": "object:817", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fill": 3, + "fillGradient": 0, + "grid": {}, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 17 + }, + "hiddenSeries": false, + "id": 10, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(container_memory_rss{name=~\".+\", scrape_host=~\"$server\"}) by (scrape_host, name)", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{name}}@{{scrape_host}}", + "refId": "A", + "step": 240 + }, + { + "expr": "container_memory_usage_bytes{name=~\".+\"}", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Usage per Container", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "$$hashKey": "object:2616", + "format": "decbytes", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "$$hashKey": "object:2617", + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "columns": [ + { + "$$hashKey": "object:2540", + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fontSize": "100%", + "gridPos": { + "h": 10, + "w": 8, + "x": 0, + "y": 24 + }, + "id": 37, + "links": [], + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 1, + "desc": true + }, + "styles": [ + { + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + "10000000", + " 25000000" + ], + "type": "number", + "unit": "decbytes" + } + ], + "targets": [ + { + "expr": "sum(container_spec_memory_limit_bytes{name=~\".+\"} - container_memory_usage_bytes{name=~\".+\"}) by (name) ", + "hide": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{name}}", + "metric": "", + "refId": "A", + "step": 240 + }, + { + "expr": "sum(container_spec_memory_limit_bytes{name=~\".+\"}) by (name) ", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "B", + "step": 240 + }, + { + "expr": "container_memory_usage_bytes{name=~\".+\", scrape_host=~\"$server\"}", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{name}}@{{scrape_host}}", + "refId": "C", + "step": 240 + } + ], + "title": "Usage memory", + "transform": "timeseries_aggregations", + "type": "table-old" + }, + { + "columns": [ + { + "$$hashKey": "object:2710", + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fontSize": "100%", + "gridPos": { + "h": 10, + "w": 8, + "x": 8, + "y": 24 + }, + "id": 35, + "links": [], + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 1, + "desc": false + }, + "styles": [ + { + "align": "auto", + "colorMode": "cell", + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + "80", + "90" + ], + "type": "number", + "unit": "percent" + } + ], + "targets": [ + { + "expr": "sum(100 - ((container_spec_memory_limit_bytes{name=~\".+\", scrape_host=~\"$server\"} - container_memory_usage_bytes{name=~\".+\", scrape_host=~\"$server\"}) * 100 / container_spec_memory_limit_bytes{name=~\".+\", scrape_host=~\"$server\"}) ) by (name, scrape_host) ", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{name}}@{{scrape_host}}", + "metric": "", + "refId": "A", + "step": 240 + }, + { + "expr": "sum(container_spec_memory_limit_bytes{name=~\".+\"}) by (name) ", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "B", + "step": 240 + }, + { + "expr": "container_memory_usage_bytes{name=~\".+\"}", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "C", + "step": 240 + } + ], + "title": "Remaining memory", + "transform": "timeseries_aggregations", + "type": "table-old" + }, + { + "columns": [ + { + "$$hashKey": "object:2757", + "text": "Current", + "value": "current" + } + ], + "datasource": null, + "editable": true, + "error": false, + "fieldConfig": { + "defaults": { + "custom": {} + }, + "overrides": [] + }, + "fontSize": "100%", + "gridPos": { + "h": 10, + "w": 8, + "x": 16, + "y": 24 + }, + "id": 36, + "links": [], + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 1, + "desc": true + }, + "styles": [ + { + "align": "auto", + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [ + "10000000", + " 25000000" + ], + "type": "number", + "unit": "decbytes" + } + ], + "targets": [ + { + "expr": "sum(container_spec_memory_limit_bytes{name=~\".+\", scrape_host=~\"$server\"} - container_memory_usage_bytes{name=~\".+\", scrape_host=~\"$server\"}) by (name) ", + "hide": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{name}}", + "metric": "", + "refId": "A", + "step": 240 + }, + { + "expr": "sum(container_spec_memory_limit_bytes{name=~\".+\", scrape_host=~\"$server\"}) by (name, scrape_host) ", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{name}}@{{scrape_host}}", + "refId": "B", + "step": 240 + }, + { + "expr": "container_memory_usage_bytes{name=~\".+\"}", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "C", + "step": 240 + } + ], + "title": "Limit memory", + "transform": "timeseries_aggregations", + "type": "table-old" + } + ], + "refresh": "5m", + "schemaVersion": 25, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": ".+", + "current": { + "selected": true, + "text": "All", + "value": [ + "$__all" + ] + }, + "datasource": "Prometheus", + "definition": "", + "hide": 0, + "includeAll": true, + "label": "Container Group", + "multi": true, + "name": "containergroup", + "options": [], + "query": "label_values(container_group)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": null, + "tags": [], + "tagsQuery": null, + "type": "query", + "useTags": false + }, + { + "auto": true, + "auto_count": 50, + "auto_min": "50s", + "current": { + "selected": false, + "text": "auto", + "value": "$__auto_interval_interval" + }, + "datasource": null, + "hide": 0, + "includeAll": false, + "label": "Interval", + "multi": false, + "name": "interval", + "options": [ + { + "selected": true, + "text": "auto", + "value": "$__auto_interval_interval" + }, + { + "selected": false, + "text": "30s", + "value": "30s" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "2m", + "value": "2m" + }, + { + "selected": false, + "text": "3m", + "value": "3m" + }, + { + "selected": false, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "7m", + "value": "7m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + }, + { + "selected": false, + "text": "7d", + "value": "7d" + }, + { + "selected": false, + "text": "14d", + "value": "14d" + }, + { + "selected": false, + "text": "30d", + "value": "30d" + } + ], + "query": "30s,1m,2m,3m,5m,7m,10m,30m,1h,6h,12h,1d,7d,14d,30d", + "queryValue": "", + "refresh": 2, + "skipUrlSync": false, + "type": "interval" + }, + { + "allValue": null, + "current": { + "selected": true, + "tags": [], + "text": "hapi.thefij + qubuntu + motionpi.thefij", + "value": [ + "hapi.thefij", + "qubuntu", + "motionpi.thefij" + ] + }, + "datasource": "Prometheus", + "definition": "label_values(node_boot_time_seconds, scrape_host)", + "hide": 0, + "includeAll": false, + "label": "Node", + "multi": true, + "name": "server", + "options": [], + "query": "label_values(node_boot_time_seconds, scrape_host)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": null, + "tags": [], + "tagsQuery": null, + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-12h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Docker and system monitoring", + "uid": "9O_Lz9nZz", + "version": 6 +} diff --git a/nomad/metrics/grafana/provisioning/dashboards/default/minitor-monitor.json b/nomad/metrics/grafana/provisioning/dashboards/default/minitor-monitor.json new file mode 100644 index 0000000..edc3025 --- /dev/null +++ b/nomad/metrics/grafana/provisioning/dashboards/default/minitor-monitor.json @@ -0,0 +1,428 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#d44a3a", + "rgba(237, 129, 40, 0.89)", + "#299c46" + ], + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 0, + "y": 0 + }, + "id": 2, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(minitor_monitor_up_count)", + "format": "time_series", + "instant": true, + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "0,1", + "title": "Total Monitors", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": true, + "colorPrefix": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 4, + "y": 0 + }, + "id": 6, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "options": {}, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(minitor_monitor_up_count)-count(minitor_monitor_up_count>=1)", + "format": "time_series", + "instant": false, + "intervalFactor": 1, + "refId": "A" + } + ], + "thresholds": "1,2,3", + "timeFrom": null, + "title": "Total Down Services", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 1 + ], + "type": "lt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "last" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "0m", + "frequency": "60s", + "handler": 1, + "name": "Service Status alert", + "noDataState": "no_data", + "notifications": [] + }, + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "decimals": 0, + "fill": 1, + "gridPos": { + "h": 7, + "w": 14, + "x": 8, + "y": 0 + }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": true, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": {}, + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(minitor_monitor_up_count) by (monitor)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{monitor}}", + "refId": "A" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "lt", + "value": 1 + } + ], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Service Status", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "columns": [], + "fontSize": "100%", + "gridPos": { + "h": 11, + "w": 22, + "x": 0, + "y": 7 + }, + "id": 8, + "links": [], + "options": {}, + "pageSize": null, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "styles": [ + { + "alias": "Time", + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "alias": "Status", + "colorMode": "cell", + "colors": [ + "#F2495C", + "#F2495C", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 0, + "link": false, + "mappingType": 1, + "pattern": "Value", + "thresholds": [ + "0", + "1" + ], + "type": "string", + "unit": "short", + "valueMaps": [ + { + "text": "Ok", + "value": "1" + }, + { + "text": "Not Ok", + "value": "0" + } + ] + } + ], + "targets": [ + { + "expr": "minitor_monitor_up_count", + "format": "time_series", + "instant": true, + "intervalFactor": 1, + "legendFormat": "{{monitor}}", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Monitor status", + "transform": "timeseries_to_rows", + "type": "table" + } + ], + "refresh": "30s", + "schemaVersion": 18, + "style": "dark", + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Minitor Monitor", + "uid": "qoE5Hrxiz", + "version": 3 +} diff --git a/nomad/metrics/grafana/provisioning/dashboards/default/prometheus-stats-2-r2.json b/nomad/metrics/grafana/provisioning/dashboards/default/prometheus-stats-2-r2.json new file mode 100644 index 0000000..54e04a5 --- /dev/null +++ b/nomad/metrics/grafana/provisioning/dashboards/default/prometheus-stats-2-r2.json @@ -0,0 +1,732 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + }, + { + "type": "panel", + "id": "text", + "name": "Text", + "version": "" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "3.1.0" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + } + ], + "id": null, + "title": "Prometheus Stats", + "tags": [], + "style": "dark", + "timezone": "browser", + "editable": true, + "hideControls": true, + "sharedCrosshair": false, + "rows": [ + { + "collapse": false, + "editable": true, + "height": 178, + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "$${DS_PROMETHEUS}", + "decimals": 1, + "editable": true, + "error": false, + "format": "s", + "id": 5, + "interval": null, + "links": [], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "targets": [ + { + "expr": "(time() - process_start_time_seconds{job=\"prometheus\"})", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": "", + "title": "Uptime", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current", + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "rangeMaps": [ + { + "from": "null", + "to": "null", + "text": "N/A" + } + ], + "mappingType": 1, + "gauge": { + "show": false, + "minValue": 0, + "maxValue": 100, + "thresholdMarkers": true, + "thresholdLabels": false + } + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "$${DS_PROMETHEUS}", + "editable": true, + "error": false, + "format": "none", + "id": 6, + "interval": null, + "links": [], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "expr": "prometheus_local_storage_memory_series", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": "1,5", + "title": "Local Storage Memory Series", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [], + "valueName": "current", + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "rangeMaps": [ + { + "from": "null", + "to": "null", + "text": "N/A" + } + ], + "mappingType": 1, + "gauge": { + "show": false, + "minValue": 0, + "maxValue": 100, + "thresholdMarkers": true, + "thresholdLabels": false + } + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "$${DS_PROMETHEUS}", + "editable": true, + "error": false, + "format": "none", + "id": 7, + "interval": null, + "links": [], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "span": 3, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "targets": [ + { + "expr": "prometheus_local_storage_indexing_queue_length", + "intervalFactor": 2, + "refId": "A", + "step": 4 + } + ], + "thresholds": "500,4000", + "title": "Interal Storage Queue Length", + "type": "singlestat", + "valueFontSize": "70%", + "valueMaps": [ + { + "op": "=", + "text": "Empty", + "value": "0" + } + ], + "valueName": "current", + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "rangeMaps": [ + { + "from": "null", + "to": "null", + "text": "N/A" + } + ], + "mappingType": 1, + "gauge": { + "show": false, + "minValue": 0, + "maxValue": 100, + "thresholdMarkers": true, + "thresholdLabels": false + } + }, + { + "content": "\"Prometheus\nPrometheus\n\n

You're using Prometheus, an open-source systems monitoring and alerting toolkit originally built at SoundCloud. For more information, check out the Grafana and Prometheus projects.

", + "editable": true, + "error": false, + "id": 9, + "links": [], + "mode": "html", + "span": 3, + "style": {}, + "title": "", + "transparent": true, + "type": "text" + } + ], + "title": "New row" + }, + { + "collapse": false, + "editable": true, + "height": 227, + "panels": [ + { + "aliasColors": { + "prometheus": "#C15C17", + "{instance=\"localhost:9090\",job=\"prometheus\"}": "#C15C17" + }, + "bars": false, + "datasource": "$${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 9, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(prometheus_local_storage_ingested_samples_total[5m])", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{job}}", + "metric": "", + "refId": "A", + "step": 2 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Samples ingested (rate-5m)", + "tooltip": { + "shared": true, + "value_type": "cumulative", + "ordering": "alphabetical", + "msResolution": false + }, + "type": "graph", + "yaxes": [ + { + "show": true, + "min": null, + "max": null, + "logBase": 1, + "format": "short" + }, + { + "show": true, + "min": null, + "max": null, + "logBase": 1, + "format": "short" + } + ], + "xaxis": { + "show": true + } + }, + { + "content": "#### Samples Ingested\nThis graph displays the count of samples ingested by the Prometheus server, as measured over the last 5 minutes, per time series in the range vector. When troubleshooting an issue on IRC or Github, this is often the first stat requested by the Prometheus team. ", + "editable": true, + "error": false, + "id": 8, + "links": [], + "mode": "markdown", + "span": 2.995914043583536, + "style": {}, + "title": "", + "transparent": true, + "type": "text" + } + ], + "title": "New row" + }, + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "aliasColors": { + "prometheus": "#F9BA8F", + "{instance=\"localhost:9090\",interval=\"5s\",job=\"prometheus\"}": "#F9BA8F" + }, + "bars": false, + "datasource": "$${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 2, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 5, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "rate(prometheus_target_interval_length_seconds_count[5m])", + "intervalFactor": 2, + "legendFormat": "{{job}}", + "refId": "A", + "step": 2 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Target Scrapes (last 5m)", + "tooltip": { + "shared": true, + "value_type": "cumulative", + "ordering": "alphabetical", + "msResolution": false + }, + "type": "graph", + "yaxes": [ + { + "show": true, + "min": null, + "max": null, + "logBase": 1, + "format": "short" + }, + { + "show": true, + "min": null, + "max": null, + "logBase": 1, + "format": "short" + } + ], + "xaxis": { + "show": true + } + }, + { + "aliasColors": {}, + "bars": false, + "datasource": "$${DS_PROMETHEUS}", + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_target_interval_length_seconds{quantile!=\"0.01\", quantile!=\"0.05\"}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{quantile}} ({{interval}})", + "metric": "", + "refId": "A", + "step": 2 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Scrape Duration", + "tooltip": { + "shared": true, + "value_type": "cumulative", + "ordering": "alphabetical", + "msResolution": false + }, + "type": "graph", + "yaxes": [ + { + "show": true, + "min": null, + "max": null, + "logBase": 1, + "format": "short" + }, + { + "show": true, + "min": null, + "max": null, + "logBase": 1, + "format": "short" + } + ], + "xaxis": { + "show": true + } + }, + { + "content": "#### Scrapes\nPrometheus scrapes metrics from instrumented jobs, either directly or via an intermediary push gateway for short-lived jobs. Target scrapes will show how frequently targets are scraped, as measured over the last 5 minutes, per time series in the range vector. Scrape Duration will show how long the scrapes are taking, with percentiles available as series. ", + "editable": true, + "error": false, + "id": 11, + "links": [], + "mode": "markdown", + "span": 3, + "style": {}, + "title": "", + "transparent": true, + "type": "text" + } + ], + "title": "New row" + }, + { + "collapse": false, + "editable": true, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "datasource": "$${DS_PROMETHEUS}", + "decimals": null, + "editable": true, + "error": false, + "fill": 1, + "grid": { + "threshold1": null, + "threshold1Color": "rgba(216, 200, 27, 0.27)", + "threshold2": null, + "threshold2Color": "rgba(234, 112, 112, 0.22)" + }, + "id": 12, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "span": 9, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "prometheus_evaluator_duration_milliseconds{quantile!=\"0.01\", quantile!=\"0.05\"}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{quantile}}", + "refId": "A", + "step": 2 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Rule Eval Duration", + "tooltip": { + "shared": true, + "value_type": "cumulative", + "ordering": "alphabetical", + "msResolution": false + }, + "type": "graph", + "yaxes": [ + { + "show": true, + "min": null, + "max": null, + "logBase": 1, + "format": "percentunit", + "label": "" + }, + { + "show": true, + "min": null, + "max": null, + "logBase": 1, + "format": "short" + } + ], + "xaxis": { + "show": true + } + }, + { + "content": "#### Rule Evaluation Duration\nThis graph panel plots the duration for all evaluations to execute. The 50th percentile, 90th percentile and 99th percentile are shown as three separate series to help identify outliers that may be skewing the data.", + "editable": true, + "error": false, + "id": 15, + "links": [], + "mode": "markdown", + "span": 3, + "style": {}, + "title": "", + "transparent": true, + "type": "text" + } + ], + "title": "New row" + } + ], + "time": { + "from": "now-5m", + "to": "now" + }, + "timepicker": { + "now": true, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "templating": { + "list": [] + }, + "annotations": { + "list": [] + }, + "refresh": false, + "schemaVersion": 12, + "version": 0, + "links": [ + { + "icon": "info", + "tags": [], + "targetBlank": true, + "title": "Grafana Docs", + "tooltip": "", + "type": "link", + "url": "http://www.grafana.org/docs" + }, + { + "icon": "info", + "tags": [], + "targetBlank": true, + "title": "Prometheus Docs", + "type": "link", + "url": "http://prometheus.io/docs/introduction/overview/" + } + ], + "gnetId": 2, + "description": "The official, pre-built Prometheus Stats Dashboard." +} diff --git a/nomad/metrics/grafana/provisioning/dashboards/default/traefik-2.json b/nomad/metrics/grafana/provisioning/dashboards/default/traefik-2.json new file mode 100644 index 0000000..3913112 --- /dev/null +++ b/nomad/metrics/grafana/provisioning/dashboards/default/traefik-2.json @@ -0,0 +1,1183 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "gnetId": 6293, + "graphTooltip": 0, + "id": 10, + "iteration": 1540858877977, + "links": [], + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "Prometheus", + "format": "s", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 3, + "x": 0, + "y": 0 + }, + "id": 22, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "time() - process_start_time_seconds{job=\"$job\"}", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "", + "title": "Uptime", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(26, 206, 22, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "Prometheus", + "decimals": 0, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 3, + "x": 3, + "y": 0 + }, + "id": 26, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "200%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(increase(traefik_backend_requests_total{code=\"404\",method=\"GET\",protocol=~\"$protocol\"}[$interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "", + "metric": "traefik_requests_total", + "refId": "A", + "step": 60 + } + ], + "thresholds": "0,1", + "title": "404 Error Count last $interval", + "type": "singlestat", + "valueFontSize": "200%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "max" + }, + { + "aliasColors": {}, + "breakPoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "Prometheus", + "fontSize": "80%", + "format": "short", + "gridPos": { + "h": 6, + "w": 7, + "x": 6, + "y": 0 + }, + "id": 18, + "interval": null, + "legend": { + "percentage": true, + "show": true, + "sort": null, + "sortDesc": null, + "values": true + }, + "legendType": "Right side", + "links": [], + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "pie", + "strokeWidth": 1, + "targets": [ + { + "expr": "traefik_backend_requests_total{protocol=~\"$protocol\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{method}} : {{code}}", + "refId": "A" + } + ], + "title": "$protocol return code", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "Prometheus", + "format": "ms", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 13, + "y": 0 + }, + "id": 20, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(traefik_entrypoint_request_duration_seconds_sum) / sum(traefik_entrypoint_requests_total) * 1000", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "", + "title": "Average response time", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "gridPos": { + "h": 6, + "w": 7, + "x": 17, + "y": 0 + }, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(traefik_backend_request_duration_seconds_sum{protocol=~\"$protocol\"}) / sum(traefik_entrypoint_requests_total{protocol=~\"$protocol\"}) * 1000", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Average response time (ms)", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Average response time", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 10, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 0, + "fill": 1, + "gridPos": { + "h": 6, + "w": 12, + "x": 0, + "y": 6 + }, + "id": 10, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(traefik_backend_requests_total{code=\"404\",method=\"GET\",protocol=~\"$protocol\"}[$interval])) by (backend)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{backend}} ", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Bad Status Code Count $interval", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "gridPos": { + "h": 6, + "w": 12, + "x": 12, + "y": 6 + }, + "hideTimeOverride": false, + "id": 4, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(traefik_backend_requests_total[$interval]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Total requests", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total requests over $interval", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "decimals": 0, + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "gridPos": { + "h": 6, + "w": 10, + "x": 0, + "y": 12 + }, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "process_open_fds{job=~\"$job\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ instance }}", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Used sockets", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 0, + "fill": 1, + "gridPos": { + "h": 6, + "w": 14, + "x": 10, + "y": 12 + }, + "id": 24, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": true, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(traefik_backend_requests_total{protocol=~\"http|https\",code=\"200\"}[$interval])) by (backend)", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{backend}} {{method}} {{code}}", + "refId": "A", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Access to backends", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 7, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 18 + }, + "id": 30, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(traefik_entrypoint_open_connections) by (method)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ method }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "ENTRYPOINT - Open Connections", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 7, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 18 + }, + "id": 28, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(traefik_backend_open_connections) by (method)", + "format": "time_series", + "intervalFactor": 1, + "legendFormat": "{{ method }}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "BACKEND - Open Connections", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 0, + "fill": 1, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 25 + }, + "id": 12, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "/^[^234].*/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(increase(traefik_backend_requests_total{protocol=~\"$protocol\"}[$interval])) by (code)", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{code}}", + "refId": "A", + "step": 120 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Status Code Count per $interval", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "decimals": 0, + "format": "short", + "label": "", + "logBase": 1, + "max": null, + "min": "0", + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "refresh": false, + "schemaVersion": 16, + "style": "dark", + "tags": [ + "traefik", + "load-balancer", + "docker", + "prometheus" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": {}, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "Job:", + "multi": false, + "name": "job", + "options": [], + "query": "label_values(job)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 2, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "Service:", + "multi": true, + "name": "protocol", + "options": [], + "query": "label_values(traefik_backend_requests_total, protocol)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "auto": true, + "auto_count": 30, + "auto_min": "10s", + "current": { + "text": "30m", + "value": "30m" + }, + "hide": 0, + "label": "Interval", + "name": "interval", + "options": [ + { + "selected": false, + "text": "auto", + "value": "$__auto_interval_interval" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": true, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + }, + { + "selected": false, + "text": "7d", + "value": "7d" + }, + { + "selected": false, + "text": "14d", + "value": "14d" + }, + { + "selected": false, + "text": "30d", + "value": "30d" + } + ], + "query": "1m,10m,30m,1h,6h,12h,1d,7d,14d,30d", + "refresh": 2, + "skipUrlSync": false, + "type": "interval" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Traefik 2", + "uid": "3ipsWfViz", + "version": 2 +} diff --git a/nomad/metrics/grafana/provisioning/dashboards/default/traefik.json b/nomad/metrics/grafana/provisioning/dashboards/default/traefik.json new file mode 100644 index 0000000..f7fe895 --- /dev/null +++ b/nomad/metrics/grafana/provisioning/dashboards/default/traefik.json @@ -0,0 +1,680 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": "-- Grafana --", + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "description": "Traefik dashboard prometheus", + "editable": true, + "gnetId": 4475, + "graphTooltip": 0, + "id": 8, + "iteration": 1540859046582, + "links": [], + "panels": [ + { + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 10, + "title": "$backend stats", + "type": "row" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": true, + "colors": [ + "#d44a3a", + "rgba(237, 129, 40, 0.89)", + "#299c46" + ], + "datasource": "Prometheus", + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 1 + }, + "id": 1, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(traefik_backend_server_up{backend=~\"$${backend:regex}\"})/count(traefik_config_reloads_total)", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "0,1", + "title": "$backend status", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "OK", + "value": "1" + } + ], + "valueName": "current" + }, + { + "aliasColors": {}, + "breakPoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "Prometheus", + "fontSize": "80%", + "format": "short", + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 1 + }, + "id": 2, + "interval": null, + "legend": { + "percentage": true, + "show": true, + "values": true + }, + "legendType": "Right side", + "links": [], + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "pie", + "strokeWidth": 1, + "targets": [ + { + "expr": "traefik_backend_requests_total{backend=~\"$${backend:regex}\"}", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{method}} : {{code}}", + "refId": "A" + } + ], + "title": "$backend return code", + "type": "grafana-piechart-panel", + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "#299c46", + "rgba(237, 129, 40, 0.89)", + "#d44a3a" + ], + "datasource": "Prometheus", + "format": "ms", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 1 + }, + "id": 4, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": true + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(traefik_backend_request_duration_seconds_sum{backend=~\"$${backend:regex}\"}) / sum(traefik_backend_requests_total{backend=~\"$${backend:regex}\"}) * 1000", + "format": "time_series", + "intervalFactor": 2, + "refId": "A" + } + ], + "thresholds": "", + "title": "$backend response time", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 8 + }, + "id": 3, + "legend": { + "alignAsTable": true, + "avg": true, + "current": false, + "max": true, + "min": true, + "rightSide": false, + "show": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(traefik_backend_requests_total{backend=~\"$${backend:regex}\"}[5m]))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "Total requests $backend", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Total requests over 5min $backend", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 15 + }, + "id": 12, + "panels": [], + "title": "Global stats", + "type": "row" + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 5, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "rate(traefik_entrypoint_requests_total{entrypoint=~\"$entrypoint\",code=\"200\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{method}} : {{code}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Status code 200 over 5min", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "fill": 1, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 16 + }, + "id": 6, + "legend": { + "alignAsTable": true, + "avg": false, + "current": true, + "max": true, + "min": true, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": false, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "rate(traefik_entrypoint_requests_total{entrypoint=~\"$entrypoint\",code!=\"200\"}[5m])", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ method }} : {{code}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Others status code over 5min", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "breakPoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "Prometheus", + "fontSize": "80%", + "format": "short", + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 23 + }, + "id": 7, + "interval": null, + "legend": { + "show": true, + "values": true + }, + "legendType": "Right side", + "links": [], + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "pie", + "strokeWidth": 1, + "targets": [ + { + "expr": "sum(rate(traefik_backend_requests_total[5m])) by (backend) ", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ backend }}", + "refId": "A" + } + ], + "title": "Requests by service", + "type": "grafana-piechart-panel", + "valueName": "total" + }, + { + "aliasColors": {}, + "breakPoint": "50%", + "cacheTimeout": null, + "combine": { + "label": "Others", + "threshold": 0 + }, + "datasource": "Prometheus", + "fontSize": "80%", + "format": "short", + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 23 + }, + "id": 8, + "interval": null, + "legend": { + "show": true, + "values": true + }, + "legendType": "Right side", + "links": [], + "maxDataPoints": 3, + "nullPointMode": "connected", + "pieType": "pie", + "strokeWidth": 1, + "targets": [ + { + "expr": "sum(rate(traefik_entrypoint_requests_total{entrypoint =~ \"$entrypoint\"}[5m])) by (entrypoint) ", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ entrypoint }}", + "refId": "A" + } + ], + "title": "Requests by protocol", + "type": "grafana-piechart-panel", + "valueName": "total" + } + ], + "schemaVersion": 16, + "style": "dark", + "tags": [ + "traefik", + "prometheus" + ], + "templating": { + "list": [ + { + "allValue": "", + "current": {}, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": null, + "multi": false, + "name": "backend", + "options": [], + "query": "label_values(traefik_backend_server_up, backend)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": {}, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": null, + "multi": true, + "name": "entrypoint", + "options": [], + "query": "label_values(traefik_entrypoint_requests_total, entrypoint)", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "", + "title": "Traefik", + "uid": "qPdAviJmz", + "version": 5 +} diff --git a/nomad/metrics/grafana/provisioning/dashboards/main.yml b/nomad/metrics/grafana/provisioning/dashboards/main.yml new file mode 100644 index 0000000..1cf4fa2 --- /dev/null +++ b/nomad/metrics/grafana/provisioning/dashboards/main.yml @@ -0,0 +1,7 @@ +apiVersion: 1 +providers: + - name: default + folder: 'Provisioned' + type: file + options: + path: /etc/grafana/provisioning/dashboards/default diff --git a/nomad/metrics/grafana/provisioning/datasources/loki.yml b/nomad/metrics/grafana/provisioning/datasources/loki.yml new file mode 100644 index 0000000..ef78fb7 --- /dev/null +++ b/nomad/metrics/grafana/provisioning/datasources/loki.yml @@ -0,0 +1,10 @@ +--- +apiVersion: 1 + +datasources: + - name: Loki + url: http://loki:3100 + type: loki + access: proxy + isDefault: false + version: 1 diff --git a/nomad/metrics/grafana/provisioning/datasources/prometheus.yml b/nomad/metrics/grafana/provisioning/datasources/prometheus.yml new file mode 100644 index 0000000..abf4fee --- /dev/null +++ b/nomad/metrics/grafana/provisioning/datasources/prometheus.yml @@ -0,0 +1,10 @@ +--- +apiVersion: 1 + +datasources: + - name: Prometheus + url: http://prom:9090 + type: prometheus + access: proxy + isDefault: true + version: 1 diff --git a/nomad/metrics/grafana/provisioning/notifiers/main.yml b/nomad/metrics/grafana/provisioning/notifiers/main.yml new file mode 100644 index 0000000..a895c21 --- /dev/null +++ b/nomad/metrics/grafana/provisioning/notifiers/main.yml @@ -0,0 +1,20 @@ +--- +notifiers: + # - name: Telegram + # type: telegram + # uid: telegram-1 + # org_id: 1 + # is_default: false + # settings: + # chatid: ${TELEGRAM_CHATID} + # bottoken: ${TELEGRAM_BOTTOKEN} + # uploadImage: true +{{ with secret "kv/data/grafana" -}} + - name: Personal email + type: email + uid: email-1 + org_id: 1 + is_default: false + settings: + addresses: "{{ .Data.data.alert_email_addresses }}" +{{ end -}} diff --git a/nomad/metrics/grafana/provisioning/notifiers/slack.yml b/nomad/metrics/grafana/provisioning/notifiers/slack.yml new file mode 100644 index 0000000..c4e52b7 --- /dev/null +++ b/nomad/metrics/grafana/provisioning/notifiers/slack.yml @@ -0,0 +1,27 @@ +--- +{{ with secret "kv/data/slack" -}} +notifiers: + - name: Slack Bot + type: slack + uid: slack-1 + org_id: 1 + is_default: false + settings: + url: "{{ .Data.data.bot_url }}" + recipient: "#site-notifications" + username: Grafana Alerts + icon_url: https://grafana.iamthefij.com/public/img/grafana_icon.svg + token: "{{ .Data.data.bot_token }}" + uploadImage: true + mentionChannel: channel + - name: Slack Hook + type: slack + uid: slack-2 + org_id: 1 + is_default: true + settings: + url: "{{ .Data.data.hook_url }}" + icon_url: https://grafana.iamthefij.com/public/img/grafana_icon.svg + uploadImage: true + mentionChannel: channel +{{ end -}} diff --git a/nomad/metrics/metrics.tf b/nomad/metrics/metrics.tf index 6aab04c..8cc5037 100644 --- a/nomad/metrics/metrics.tf +++ b/nomad/metrics/metrics.tf @@ -35,7 +35,9 @@ resource "nomad_job" "grafana" { enabled = true } - jobspec = file("${path.module}/grafana.nomad") + jobspec = templatefile("${path.module}/grafana.nomad", { + module_path = "${path.module}" + }) } resource "consul_config_entry" "prometheus_intent" { diff --git a/nomad/vault_hashi_vault_values.yml b/nomad/vault_hashi_vault_values.yml index c60c13b..63e191c 100644 --- a/nomad/vault_hashi_vault_values.yml +++ b/nomad/vault_hashi_vault_values.yml @@ -7,3 +7,9 @@ hashi_vault_values: backup_passphrase: shhh_imma_secret mysql: root_password: supersecretpassword + slack: + bot_url: ... + bot_token: ... + hook_url: ... + grafana: + alert_email_addresses: email@example.com