commit f24b3a16b979f36b5ebb07fa72953456fb0bd2ac
parent 0651ce99e1fe0016147697c5eaec5e7bb5822510
Author: Leah (ctucx) <git@ctu.cx>
Date: Mon, 11 Dec 2023 14:53:20 +0100
parent 0651ce99e1fe0016147697c5eaec5e7bb5822510
Author: Leah (ctucx) <git@ctu.cx>
Date: Mon, 11 Dec 2023 14:53:20 +0100
configurations/linux/services/prometheus-exporters: add support for scaphandre exporter
3 files changed, 537 insertions(+), 11 deletions(-)
A
|
493
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
diff --git a/configurations/linux/services/prometheus-exporters.nix b/configurations/linux/services/prometheus-exporters.nix @@ -3,16 +3,31 @@ { services = { - prometheus.exporters.node.enable = true; - prometheus.exporters.node.listenAddress = "[::1]"; - prometheus.exporters.node.port = 9100; - prometheus.exporters.node.enabledCollectors = [ - "systemd" "processes" - ]; + prometheus.exporters = { + node = { + enable = true; + listenAddress = "[::1]"; + port = 9100; + enabledCollectors = [ + "systemd" "processes" + ]; + }; + + systemd = { + enable = true; + listenAddress = "[::1]"; + port = 9558; + }; - prometheus.exporters.systemd.enable = true; - prometheus.exporters.systemd.listenAddress = "[::1]"; - prometheus.exporters.systemd.port = 9558; + scaphandre = { + enable = (if (builtins.elem "intel_rapl_common" config.boot.kernelModules) then true else false); + user = "root"; + group = "root"; + listenAddress = "::1"; + port = 9080; + telemetryPath = "scaphandre-exporter"; + }; + }; nginx = { enable = true; @@ -20,8 +35,9 @@ enableACME = lib.mkDefault true; forceSSL = lib.mkDefault true; kTLS = lib.mkDefault true; - locations."/node-exporter".proxyPass = "http://${toString config.services.prometheus.exporters.node.listenAddress}:${toString config.services.prometheus.exporters.node.port}/metrics"; - locations."/systemd-exporter".proxyPass = "http://${toString config.services.prometheus.exporters.systemd.listenAddress}:${toString config.services.prometheus.exporters.systemd.port}/metrics"; + locations."/node-exporter".proxyPass = "http://${toString config.services.prometheus.exporters.node.listenAddress}:${toString config.services.prometheus.exporters.node.port}/metrics"; + locations."/systemd-exporter".proxyPass = "http://${toString config.services.prometheus.exporters.systemd.listenAddress}:${toString config.services.prometheus.exporters.systemd.port}/metrics"; + locations."/scaphandre-exporter".proxyPass = lib.mkIf config.services.prometheus.exporters.scaphandre.enable "http://[::1]:${toString config.services.prometheus.exporters.scaphandre.port}/scaphandre-exporter"; }; }; };
diff --git a/machines/trabbi/grafana/dashboards/scaphandre.json b/machines/trabbi/grafana/dashboards/scaphandre.json @@ -0,0 +1,493 @@ +{ + "__inputs": [ ], + "__requires": [ ], + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [ ], + "refresh": "", + "rows": [ + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${PROMETHEUS_DS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { }, + "id": 2, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "scaph_host_power_microwatts / 1000000", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Hosts power consumption", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "W", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "W", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + }, + { + "aliasColors": { }, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "${PROMETHEUS_DS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { }, + "id": 3, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 4, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(avg_over_time(scaph_host_power_microwatts[1h]))/1000000", + "format": "time_series", + "interval": "1h", + "intervalFactor": 2, + "legendFormat": "total of hosts, during displayed time window", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Hosts power consumption total (dynamic time range)", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "series", + "name": null, + "show": true, + "values": [ + "total" + ] + }, + "yaxes": [ + { + "format": "Wh", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "Wh", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Per hosts", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${PROMETHEUS_DS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { }, + "id": 4, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": null, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "scaph_socket_power_microwatts / 1000000", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{instance}} Socket {{socket_id}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Socket power consumption", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "W", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "W", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Per CPU Sockets", + "titleSize": "h6", + "type": "row" + }, + { + "collapse": false, + "collapsed": false, + "panels": [ + { + "datasource": "${PROMETHEUS_DS}", + "fieldConfig": { + "defaults": { + "links": [ ], + "mappings": [ ], + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "none" + } + }, + "gridPos": { }, + "id": 5, + "links": [ ], + "options": { + "colorMode": "value", + "graphMode": "area", + "justifyMode": "auto", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "mean" + ], + "fields": "", + "values": false + } + }, + "pluginVersion": "7", + "targets": [ + { + "expr": "sort_desc(topk(3, sum by (exe) (scaph_process_power_consumption_microwatts/1000000)))", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{exe}}", + "refId": "A" + } + ], + "title": "Top process consumers", + "transparent": false, + "type": "stat" + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "${PROMETHEUS_DS}", + "fill": 1, + "fillGradient": 0, + "gridPos": { }, + "id": 6, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "sideWidth": "30%", + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [ ], + "nullPointMode": "null", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": null, + "seriesOverrides": [ ], + "spaceLength": 10, + "span": 8, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "scaph_process_power_consumption_microwatts{exe=~\".*${process_filter}.*\"}/1000000", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ cmdline }}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeShift": null, + "title": "Filtered process (process_filter) power, by exe", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "W", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "W", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Per process", + "titleSize": "h6", + "type": "row" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "scaphandre", + "energy", + "power" + ], + "templating": { + "list": [ + { + "current": { + "text": "Prometheus", + "value": "Prometheus" + }, + "hide": 1, + "label": null, + "name": "PROMETHEUS_DS", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "current": { + "selected": false, + "text": "", + "value": "" + }, + "label": "", + "name": "process_filter", + "query": "", + "type": "textbox" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Scaphandre example dashboard", + "version": 0 +}
diff --git a/machines/trabbi/prometheus.nix b/machines/trabbi/prometheus.nix @@ -28,6 +28,7 @@ ) nodes); }]; } + { job_name = "systemd-exporter"; scrape_interval = "30s"; @@ -44,6 +45,22 @@ }]; } + { + job_name = "scaphandre-exporter"; + scrape_interval = "30s"; + scheme = "https"; + metrics_path = "/scaphandre-exporter"; + static_configs = [{ + targets = (lib.mapAttrsToList ( + name: host: lib.mkIf ( + host.config.services.prometheus.exporters.scaphandre.enable == true && + host.config.networking.hostName != "" && + host.config.networking.domain != "" + ) host.config.networking.fqdn + ) nodes); + }]; + } + ]; };