ctucx.git: nixfiles

ctucx' nixfiles

commit f24b3a16b979f36b5ebb07fa72953456fb0bd2ac
parent 0651ce99e1fe0016147697c5eaec5e7bb5822510
Author: Leah (ctucx) <git@ctu.cx>
Date: Mon, 11 Dec 2023 14:53:20 +0100

configurations/linux/services/prometheus-exporters: add support for scaphandre exporter
3 files changed, 537 insertions(+), 11 deletions(-)
M
configurations/linux/services/prometheus-exporters.nix
|
38
+++++++++++++++++++++++++++-----------
A
machines/trabbi/grafana/dashboards/scaphandre.json
|
493
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M
machines/trabbi/prometheus.nix
|
17
+++++++++++++++++
diff --git a/configurations/linux/services/prometheus-exporters.nix b/configurations/linux/services/prometheus-exporters.nix
@@ -3,16 +3,31 @@
 {
 
   services = {
-    prometheus.exporters.node.enable = true;
-    prometheus.exporters.node.listenAddress = "[::1]";
-    prometheus.exporters.node.port          = 9100;
-    prometheus.exporters.node.enabledCollectors = [
-      "systemd" "processes"
-    ];
+    prometheus.exporters = {
+      node = {
+        enable = true;
+        listenAddress = "[::1]";
+        port          = 9100;
+        enabledCollectors = [
+          "systemd" "processes"
+        ];
+      };
+
+      systemd = {
+        enable = true;
+        listenAddress = "[::1]";
+        port          = 9558;
+      };
 
-    prometheus.exporters.systemd.enable = true;
-    prometheus.exporters.systemd.listenAddress = "[::1]";
-    prometheus.exporters.systemd.port          = 9558;
+      scaphandre = {
+        enable        = (if (builtins.elem "intel_rapl_common" config.boot.kernelModules) then true else false);
+        user          = "root";
+        group         = "root";
+        listenAddress = "::1";
+        port          = 9080;
+        telemetryPath = "scaphandre-exporter";
+      };
+    };    
 
     nginx = {
       enable = true;

@@ -20,8 +35,9 @@
         enableACME = lib.mkDefault true;
         forceSSL   = lib.mkDefault true;
         kTLS       = lib.mkDefault true;
-        locations."/node-exporter".proxyPass    = "http://${toString config.services.prometheus.exporters.node.listenAddress}:${toString config.services.prometheus.exporters.node.port}/metrics";
-        locations."/systemd-exporter".proxyPass = "http://${toString config.services.prometheus.exporters.systemd.listenAddress}:${toString config.services.prometheus.exporters.systemd.port}/metrics";
+        locations."/node-exporter".proxyPass       = "http://${toString config.services.prometheus.exporters.node.listenAddress}:${toString config.services.prometheus.exporters.node.port}/metrics";
+        locations."/systemd-exporter".proxyPass    = "http://${toString config.services.prometheus.exporters.systemd.listenAddress}:${toString config.services.prometheus.exporters.systemd.port}/metrics";
+        locations."/scaphandre-exporter".proxyPass = lib.mkIf config.services.prometheus.exporters.scaphandre.enable "http://[::1]:${toString config.services.prometheus.exporters.scaphandre.port}/scaphandre-exporter";
       };
     };
   };
diff --git a/machines/trabbi/grafana/dashboards/scaphandre.json b/machines/trabbi/grafana/dashboards/scaphandre.json
@@ -0,0 +1,493 @@
+{
+   "__inputs": [ ],
+   "__requires": [ ],
+   "annotations": {
+      "list": [ ]
+   },
+   "editable": true,
+   "gnetId": null,
+   "graphTooltip": 0,
+   "hideControls": false,
+   "id": null,
+   "links": [ ],
+   "refresh": "",
+   "rows": [
+      {
+         "collapse": false,
+         "collapsed": false,
+         "panels": [
+            {
+               "aliasColors": { },
+               "bars": false,
+               "dashLength": 10,
+               "dashes": false,
+               "datasource": "${PROMETHEUS_DS}",
+               "fill": 1,
+               "fillGradient": 0,
+               "gridPos": { },
+               "id": 2,
+               "legend": {
+                  "alignAsTable": false,
+                  "avg": false,
+                  "current": false,
+                  "max": false,
+                  "min": false,
+                  "rightSide": false,
+                  "show": true,
+                  "sideWidth": null,
+                  "total": false,
+                  "values": false
+               },
+               "lines": true,
+               "linewidth": 1,
+               "links": [ ],
+               "nullPointMode": "null",
+               "percentage": false,
+               "pointradius": 5,
+               "points": false,
+               "renderer": "flot",
+               "repeat": null,
+               "seriesOverrides": [ ],
+               "spaceLength": 10,
+               "span": 6,
+               "stack": false,
+               "steppedLine": false,
+               "targets": [
+                  {
+                     "expr": "scaph_host_power_microwatts / 1000000",
+                     "format": "time_series",
+                     "intervalFactor": 2,
+                     "legendFormat": "{{instance}}",
+                     "refId": "A"
+                  }
+               ],
+               "thresholds": [ ],
+               "timeFrom": null,
+               "timeShift": null,
+               "title": "Hosts power consumption",
+               "tooltip": {
+                  "shared": true,
+                  "sort": 0,
+                  "value_type": "individual"
+               },
+               "type": "graph",
+               "xaxis": {
+                  "buckets": null,
+                  "mode": "time",
+                  "name": null,
+                  "show": true,
+                  "values": [ ]
+               },
+               "yaxes": [
+                  {
+                     "format": "W",
+                     "label": null,
+                     "logBase": 1,
+                     "max": null,
+                     "min": 0,
+                     "show": true
+                  },
+                  {
+                     "format": "W",
+                     "label": null,
+                     "logBase": 1,
+                     "max": null,
+                     "min": 0,
+                     "show": true
+                  }
+               ]
+            },
+            {
+               "aliasColors": { },
+               "bars": true,
+               "dashLength": 10,
+               "dashes": false,
+               "datasource": "${PROMETHEUS_DS}",
+               "fill": 1,
+               "fillGradient": 0,
+               "gridPos": { },
+               "id": 3,
+               "legend": {
+                  "alignAsTable": false,
+                  "avg": false,
+                  "current": false,
+                  "max": false,
+                  "min": false,
+                  "rightSide": false,
+                  "show": true,
+                  "sideWidth": null,
+                  "total": false,
+                  "values": false
+               },
+               "lines": true,
+               "linewidth": 1,
+               "links": [ ],
+               "nullPointMode": "null",
+               "percentage": false,
+               "pointradius": 5,
+               "points": false,
+               "renderer": "flot",
+               "repeat": null,
+               "seriesOverrides": [ ],
+               "spaceLength": 10,
+               "span": 4,
+               "stack": false,
+               "steppedLine": false,
+               "targets": [
+                  {
+                     "expr": "sum(avg_over_time(scaph_host_power_microwatts[1h]))/1000000",
+                     "format": "time_series",
+                     "interval": "1h",
+                     "intervalFactor": 2,
+                     "legendFormat": "total of hosts, during displayed time window",
+                     "refId": "A"
+                  }
+               ],
+               "thresholds": [ ],
+               "timeFrom": null,
+               "timeShift": null,
+               "title": "Hosts power consumption total (dynamic time range)",
+               "tooltip": {
+                  "shared": true,
+                  "sort": 0,
+                  "value_type": "individual"
+               },
+               "type": "graph",
+               "xaxis": {
+                  "buckets": null,
+                  "mode": "series",
+                  "name": null,
+                  "show": true,
+                  "values": [
+                     "total"
+                  ]
+               },
+               "yaxes": [
+                  {
+                     "format": "Wh",
+                     "label": null,
+                     "logBase": 1,
+                     "max": null,
+                     "min": 0,
+                     "show": true
+                  },
+                  {
+                     "format": "Wh",
+                     "label": null,
+                     "logBase": 1,
+                     "max": null,
+                     "min": 0,
+                     "show": true
+                  }
+               ]
+            }
+         ],
+         "repeat": null,
+         "repeatIteration": null,
+         "repeatRowId": null,
+         "showTitle": true,
+         "title": "Per hosts",
+         "titleSize": "h6",
+         "type": "row"
+      },
+      {
+         "collapse": false,
+         "collapsed": false,
+         "panels": [
+            {
+               "aliasColors": { },
+               "bars": false,
+               "dashLength": 10,
+               "dashes": false,
+               "datasource": "${PROMETHEUS_DS}",
+               "fill": 1,
+               "fillGradient": 0,
+               "gridPos": { },
+               "id": 4,
+               "legend": {
+                  "alignAsTable": false,
+                  "avg": false,
+                  "current": false,
+                  "max": false,
+                  "min": false,
+                  "rightSide": false,
+                  "show": true,
+                  "sideWidth": null,
+                  "total": false,
+                  "values": false
+               },
+               "lines": true,
+               "linewidth": 1,
+               "links": [ ],
+               "nullPointMode": "null",
+               "percentage": false,
+               "pointradius": 5,
+               "points": false,
+               "renderer": "flot",
+               "repeat": null,
+               "seriesOverrides": [ ],
+               "spaceLength": 10,
+               "span": 6,
+               "stack": false,
+               "steppedLine": false,
+               "targets": [
+                  {
+                     "expr": "scaph_socket_power_microwatts / 1000000",
+                     "format": "time_series",
+                     "intervalFactor": 2,
+                     "legendFormat": "{{instance}} Socket {{socket_id}}",
+                     "refId": "A"
+                  }
+               ],
+               "thresholds": [ ],
+               "timeFrom": null,
+               "timeShift": null,
+               "title": "Socket power consumption",
+               "tooltip": {
+                  "shared": true,
+                  "sort": 0,
+                  "value_type": "individual"
+               },
+               "type": "graph",
+               "xaxis": {
+                  "buckets": null,
+                  "mode": "time",
+                  "name": null,
+                  "show": true,
+                  "values": [ ]
+               },
+               "yaxes": [
+                  {
+                     "format": "W",
+                     "label": null,
+                     "logBase": 1,
+                     "max": null,
+                     "min": 0,
+                     "show": true
+                  },
+                  {
+                     "format": "W",
+                     "label": null,
+                     "logBase": 1,
+                     "max": null,
+                     "min": 0,
+                     "show": true
+                  }
+               ]
+            }
+         ],
+         "repeat": null,
+         "repeatIteration": null,
+         "repeatRowId": null,
+         "showTitle": true,
+         "title": "Per CPU Sockets",
+         "titleSize": "h6",
+         "type": "row"
+      },
+      {
+         "collapse": false,
+         "collapsed": false,
+         "panels": [
+            {
+               "datasource": "${PROMETHEUS_DS}",
+               "fieldConfig": {
+                  "defaults": {
+                     "links": [ ],
+                     "mappings": [ ],
+                     "thresholds": {
+                        "mode": "absolute",
+                        "steps": [ ]
+                     },
+                     "unit": "none"
+                  }
+               },
+               "gridPos": { },
+               "id": 5,
+               "links": [ ],
+               "options": {
+                  "colorMode": "value",
+                  "graphMode": "area",
+                  "justifyMode": "auto",
+                  "orientation": "auto",
+                  "reduceOptions": {
+                     "calcs": [
+                        "mean"
+                     ],
+                     "fields": "",
+                     "values": false
+                  }
+               },
+               "pluginVersion": "7",
+               "targets": [
+                  {
+                     "expr": "sort_desc(topk(3, sum by (exe) (scaph_process_power_consumption_microwatts/1000000)))",
+                     "format": "time_series",
+                     "intervalFactor": 2,
+                     "legendFormat": "{{exe}}",
+                     "refId": "A"
+                  }
+               ],
+               "title": "Top process consumers",
+               "transparent": false,
+               "type": "stat"
+            },
+            {
+               "aliasColors": { },
+               "bars": false,
+               "dashLength": 10,
+               "dashes": false,
+               "datasource": "${PROMETHEUS_DS}",
+               "fill": 1,
+               "fillGradient": 0,
+               "gridPos": { },
+               "id": 6,
+               "legend": {
+                  "alignAsTable": true,
+                  "avg": false,
+                  "current": false,
+                  "max": false,
+                  "min": false,
+                  "rightSide": false,
+                  "show": true,
+                  "sideWidth": "30%",
+                  "total": false,
+                  "values": false
+               },
+               "lines": true,
+               "linewidth": 1,
+               "links": [ ],
+               "nullPointMode": "null",
+               "percentage": false,
+               "pointradius": 5,
+               "points": false,
+               "renderer": "flot",
+               "repeat": null,
+               "seriesOverrides": [ ],
+               "spaceLength": 10,
+               "span": 8,
+               "stack": true,
+               "steppedLine": false,
+               "targets": [
+                  {
+                     "expr": "scaph_process_power_consumption_microwatts{exe=~\".*${process_filter}.*\"}/1000000",
+                     "format": "time_series",
+                     "intervalFactor": 2,
+                     "legendFormat": "{{ cmdline }}",
+                     "refId": "A"
+                  }
+               ],
+               "thresholds": [ ],
+               "timeFrom": null,
+               "timeShift": null,
+               "title": "Filtered process (process_filter) power, by exe",
+               "tooltip": {
+                  "shared": true,
+                  "sort": 0,
+                  "value_type": "individual"
+               },
+               "type": "graph",
+               "xaxis": {
+                  "buckets": null,
+                  "mode": "time",
+                  "name": null,
+                  "show": true,
+                  "values": [ ]
+               },
+               "yaxes": [
+                  {
+                     "format": "W",
+                     "label": null,
+                     "logBase": 1,
+                     "max": null,
+                     "min": 0,
+                     "show": true
+                  },
+                  {
+                     "format": "W",
+                     "label": null,
+                     "logBase": 1,
+                     "max": null,
+                     "min": 0,
+                     "show": true
+                  }
+               ]
+            }
+         ],
+         "repeat": null,
+         "repeatIteration": null,
+         "repeatRowId": null,
+         "showTitle": true,
+         "title": "Per process",
+         "titleSize": "h6",
+         "type": "row"
+      }
+   ],
+   "schemaVersion": 14,
+   "style": "dark",
+   "tags": [
+      "scaphandre",
+      "energy",
+      "power"
+   ],
+   "templating": {
+      "list": [
+         {
+            "current": {
+               "text": "Prometheus",
+               "value": "Prometheus"
+            },
+            "hide": 1,
+            "label": null,
+            "name": "PROMETHEUS_DS",
+            "options": [ ],
+            "query": "prometheus",
+            "refresh": 1,
+            "regex": "",
+            "type": "datasource"
+         },
+         {
+            "current": {
+               "selected": false,
+               "text": "",
+               "value": ""
+            },
+            "label": "",
+            "name": "process_filter",
+            "query": "",
+            "type": "textbox"
+         }
+      ]
+   },
+   "time": {
+      "from": "now-6h",
+      "to": "now"
+   },
+   "timepicker": {
+      "refresh_intervals": [
+         "5s",
+         "10s",
+         "30s",
+         "1m",
+         "5m",
+         "15m",
+         "30m",
+         "1h",
+         "2h",
+         "1d"
+      ],
+      "time_options": [
+         "5m",
+         "15m",
+         "1h",
+         "6h",
+         "12h",
+         "24h",
+         "2d",
+         "7d",
+         "30d"
+      ]
+   },
+   "timezone": "browser",
+   "title": "Scaphandre example dashboard",
+   "version": 0
+}
diff --git a/machines/trabbi/prometheus.nix b/machines/trabbi/prometheus.nix
@@ -28,6 +28,7 @@
             ) nodes);
           }];
         }
+
         {
           job_name        = "systemd-exporter";
           scrape_interval = "30s";

@@ -44,6 +45,22 @@
           }];
         }
 
+        {
+          job_name        = "scaphandre-exporter";
+          scrape_interval = "30s";
+          scheme          = "https";
+          metrics_path    = "/scaphandre-exporter";
+          static_configs  = [{
+            targets = (lib.mapAttrsToList (
+              name: host: lib.mkIf (
+                host.config.services.prometheus.exporters.scaphandre.enable == true &&
+                host.config.networking.hostName != "" &&
+                host.config.networking.domain != ""
+              ) host.config.networking.fqdn
+            ) nodes);
+          }];
+        }
+
       ];
     };