about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFranz Pletz <fpletz@fnordicwalking.de>2024-06-23 23:05:06 +0200
committerGitHub <noreply@github.com>2024-06-23 23:05:06 +0200
commit3c033186ee73229fdcdcb3266a36baf87d519840 (patch)
tree3b16162580537ac13f750b685fcada5a7a2ec3a3
parent2789cee804747dc1066fc293ab918f77297561c5 (diff)
parent5adadf25c6eba445401bd2a270a74a9c3bac73c2 (diff)
Merge pull request #316727 from jpds/nixos-tests-prometheus
nixos/prometheus: Revamp tests
-rw-r--r--maintainers/maintainer-list.nix5
-rw-r--r--nixos/modules/module-list.nix1
-rw-r--r--nixos/modules/services/monitoring/prometheus/alertmanager-webhook-logger.nix70
-rw-r--r--nixos/tests/all-tests.nix3
-rw-r--r--nixos/tests/prometheus/alertmanager.nix148
-rw-r--r--nixos/tests/prometheus/config-reload.nix116
-rw-r--r--nixos/tests/prometheus/default.nix13
-rw-r--r--nixos/tests/prometheus/federation.nix213
-rw-r--r--nixos/tests/prometheus/prometheus-pair.nix87
-rw-r--r--nixos/tests/prometheus/pushgateway.nix94
-rw-r--r--nixos/tests/prometheus/remote-write.nix73
-rw-r--r--nixos/tests/thanos.nix (renamed from nixos/tests/prometheus.nix)58
-rw-r--r--pkgs/by-name/al/alertmanager-webhook-logger/package.nix33
-rw-r--r--pkgs/servers/monitoring/prometheus/alertmanager.nix3
-rw-r--r--pkgs/servers/monitoring/prometheus/pushgateway.nix9
-rw-r--r--pkgs/servers/monitoring/thanos/default.nix2
16 files changed, 865 insertions, 63 deletions
diff --git a/maintainers/maintainer-list.nix b/maintainers/maintainer-list.nix
index 2035843fbf1c1..fce218ca723a3 100644
--- a/maintainers/maintainer-list.nix
+++ b/maintainers/maintainer-list.nix
@@ -9848,6 +9848,11 @@
     githubId = 1918771;
     name = "Joe Doyle";
   };
+  jpds = {
+    github = "jpds";
+    githubId = 29158971;
+    name = "Jonathan Davies";
+  };
   jpentland = {
     email = "joe.pentland@gmail.com";
     github = "jpentland";
diff --git a/nixos/modules/module-list.nix b/nixos/modules/module-list.nix
index d412e414f68b5..876e40983c1e5 100644
--- a/nixos/modules/module-list.nix
+++ b/nixos/modules/module-list.nix
@@ -880,6 +880,7 @@
   ./services/monitoring/osquery.nix
   ./services/monitoring/parsedmarc.nix
   ./services/monitoring/prometheus/alertmanager-irc-relay.nix
+  ./services/monitoring/prometheus/alertmanager-webhook-logger.nix
   ./services/monitoring/prometheus/alertmanager.nix
   ./services/monitoring/prometheus/default.nix
   ./services/monitoring/prometheus/exporters.nix
diff --git a/nixos/modules/services/monitoring/prometheus/alertmanager-webhook-logger.nix b/nixos/modules/services/monitoring/prometheus/alertmanager-webhook-logger.nix
new file mode 100644
index 0000000000000..b4307a76e1b02
--- /dev/null
+++ b/nixos/modules/services/monitoring/prometheus/alertmanager-webhook-logger.nix
@@ -0,0 +1,70 @@
+{ config, lib, pkgs, ... }:
+
+with lib;
+
+let
+  cfg = config.services.prometheus.alertmanagerWebhookLogger;
+in
+{
+  options.services.prometheus.alertmanagerWebhookLogger = {
+    enable = mkEnableOption "Alertmanager Webhook Logger";
+
+    package = mkPackageOption pkgs "alertmanager-webhook-logger" { };
+
+    extraFlags = mkOption {
+      type = types.listOf types.str;
+      default = [];
+      description = "Extra command line options to pass to alertmanager-webhook-logger.";
+    };
+  };
+
+  config = mkIf cfg.enable {
+    systemd.services.alertmanager-webhook-logger = {
+      description = "Alertmanager Webhook Logger";
+
+      wantedBy = [ "multi-user.target" ];
+      after = [ "network-online.target" ];
+      wants = [ "network-online.target" ];
+
+      serviceConfig = {
+        ExecStart = ''
+          ${cfg.package}/bin/alertmanager-webhook-logger \
+          ${escapeShellArgs cfg.extraFlags}
+        '';
+
+        DynamicUser = true;
+        NoNewPrivileges = true;
+
+        ProtectProc = "invisible";
+        ProtectSystem = "strict";
+        ProtectHome = "tmpfs";
+
+        PrivateTmp = true;
+        PrivateDevices = true;
+        PrivateIPC = true;
+
+        ProtectHostname = true;
+        ProtectClock = true;
+        ProtectKernelTunables = true;
+        ProtectKernelModules = true;
+        ProtectKernelLogs = true;
+        ProtectControlGroups = true;
+
+        RestrictAddressFamilies = [ "AF_INET" "AF_INET6" ];
+        RestrictRealtime = true;
+        RestrictSUIDSGID = true;
+
+        SystemCallFilter = [
+          "@system-service"
+          "~@cpu-emulation"
+          "~@privileged"
+          "~@reboot"
+          "~@setuid"
+          "~@swap"
+        ];
+      };
+    };
+  };
+
+  meta.maintainers = [ maintainers.jpds ];
+}
diff --git a/nixos/tests/all-tests.nix b/nixos/tests/all-tests.nix
index b1f7d1917ed45..1db9f702e2416 100644
--- a/nixos/tests/all-tests.nix
+++ b/nixos/tests/all-tests.nix
@@ -775,7 +775,7 @@ in {
   printing-service = handleTest ./printing.nix { socket = false; };
   private-gpt = handleTest ./private-gpt.nix {};
   privoxy = handleTest ./privoxy.nix {};
-  prometheus = handleTest ./prometheus.nix {};
+  prometheus = handleTest ./prometheus {};
   prometheus-exporters = handleTest ./prometheus-exporters.nix {};
   prosody = handleTest ./xmpp/prosody.nix {};
   prosody-mysql = handleTest ./xmpp/prosody-mysql.nix {};
@@ -968,6 +968,7 @@ in {
   teleport = handleTest ./teleport.nix {};
   thelounge = handleTest ./thelounge.nix {};
   terminal-emulators = handleTest ./terminal-emulators.nix {};
+  thanos = handleTest ./thanos.nix {};
   tiddlywiki = handleTest ./tiddlywiki.nix {};
   tigervnc = handleTest ./tigervnc.nix {};
   timescaledb = handleTest ./timescaledb.nix {};
diff --git a/nixos/tests/prometheus/alertmanager.nix b/nixos/tests/prometheus/alertmanager.nix
new file mode 100644
index 0000000000000..feda8d8fc2bcc
--- /dev/null
+++ b/nixos/tests/prometheus/alertmanager.nix
@@ -0,0 +1,148 @@
+import ../make-test-python.nix ({ lib, pkgs, ... }:
+
+{
+  name = "prometheus-alertmanager";
+
+  nodes = {
+    prometheus = { config, pkgs, ... }: {
+      environment.systemPackages = [ pkgs.jq ];
+
+      networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ];
+
+      services.prometheus = {
+        enable = true;
+        globalConfig.scrape_interval = "2s";
+
+        alertmanagers = [
+          {
+            scheme = "http";
+            static_configs = [
+              {
+                targets = [
+                  "alertmanager:${toString config.services.prometheus.alertmanager.port}"
+                ];
+              }
+            ];
+          }
+        ];
+
+        rules = [
+          ''
+            groups:
+              - name: test
+                rules:
+                  - alert: InstanceDown
+                    expr: up == 0
+                    for: 5s
+                    labels:
+                      severity: page
+                    annotations:
+                      summary: "Instance {{ $labels.instance }} down"
+          ''
+        ];
+
+        scrapeConfigs = [
+          {
+            job_name = "alertmanager";
+            static_configs = [
+              {
+                targets = [
+                  "alertmanager:${toString config.services.prometheus.alertmanager.port}"
+                ];
+              }
+            ];
+          }
+          {
+            job_name = "node";
+            static_configs = [
+              {
+                targets = [
+                  "node:${toString config.services.prometheus.exporters.node.port}"
+                ];
+              }
+            ];
+          }
+        ];
+      };
+    };
+
+    alertmanager = { config, pkgs, ... }: {
+      services.prometheus.alertmanager = {
+        enable = true;
+        openFirewall = true;
+
+        configuration = {
+          global = {
+            resolve_timeout = "1m";
+          };
+
+          route = {
+            # Root route node
+            receiver = "test";
+            group_by = ["..."];
+            continue = false;
+            group_wait = "1s";
+            group_interval = "15s";
+            repeat_interval = "24h";
+          };
+
+          receivers = [
+            {
+              name = "test";
+              webhook_configs = [
+                {
+                  url = "http://logger:6725";
+                  send_resolved = true;
+                  max_alerts = 0;
+                }
+              ];
+            }
+          ];
+        };
+      };
+    };
+
+    logger = { config, pkgs, ... }: {
+      networking.firewall.allowedTCPPorts = [ 6725 ];
+
+      services.prometheus.alertmanagerWebhookLogger.enable = true;
+    };
+  };
+
+  testScript = ''
+    alertmanager.wait_for_unit("alertmanager")
+    alertmanager.wait_for_open_port(9093)
+    alertmanager.wait_until_succeeds("curl -s http://127.0.0.1:9093/-/ready")
+    #alertmanager.wait_until_succeeds("journalctl -o cat -u alertmanager.service | grep 'version=${pkgs.prometheus-alertmanager.version}'")
+
+    logger.wait_for_unit("alertmanager-webhook-logger")
+    logger.wait_for_open_port(6725)
+
+    prometheus.wait_for_unit("prometheus")
+    prometheus.wait_for_open_port(9090)
+
+    prometheus.wait_until_succeeds(
+      "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=count(up\{job=\"alertmanager\"\}==1)' | "
+      + "jq '.data.result[0].value[1]' | grep '\"1\"'"
+    )
+
+    prometheus.wait_until_succeeds(
+      "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=sum(alertmanager_build_info)%20by%20(version)' | "
+      + "jq '.data.result[0].metric.version' | grep '\"${pkgs.prometheus-alertmanager.version}\"'"
+    )
+
+    prometheus.wait_until_succeeds(
+      "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=count(up\{job=\"node\"\}!=1)' | "
+      + "jq '.data.result[0].value[1]' | grep '\"1\"'"
+    )
+
+    prometheus.wait_until_succeeds(
+      "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=alertmanager_notifications_total\{integration=\"webhook\"\}' | "
+      + "jq '.data.result[0].value[1]' | grep -v '\"0\"'"
+    )
+
+    logger.wait_until_succeeds(
+      "journalctl -o cat -u alertmanager-webhook-logger.service | grep '\"alertname\":\"InstanceDown\"'"
+    )
+  '';
+})
diff --git a/nixos/tests/prometheus/config-reload.nix b/nixos/tests/prometheus/config-reload.nix
new file mode 100644
index 0000000000000..786668c624ea9
--- /dev/null
+++ b/nixos/tests/prometheus/config-reload.nix
@@ -0,0 +1,116 @@
+import ../make-test-python.nix ({ lib, pkgs, ... }:
+
+{
+  name = "prometheus-config-reload";
+
+  nodes = {
+    prometheus = { config, pkgs, ... }: {
+      environment.systemPackages = [ pkgs.jq ];
+
+      networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ];
+
+      services.prometheus = {
+        enable = true;
+        enableReload = true;
+        globalConfig.scrape_interval = "2s";
+        scrapeConfigs = [
+          {
+            job_name = "prometheus";
+            static_configs = [
+              {
+                targets = [
+                  "prometheus:${toString config.services.prometheus.port}"
+                ];
+              }
+            ];
+          }
+        ];
+      };
+
+      specialisation = {
+        "prometheus-config-change" = {
+          configuration = {
+            environment.systemPackages = [ pkgs.yq ];
+
+            # This configuration just adds a new prometheus job
+            # to scrape the node_exporter metrics of the s3 machine.
+            services.prometheus = {
+              scrapeConfigs = [
+                {
+                  job_name = "node";
+                  static_configs = [
+                    {
+                      targets = [ "node:${toString config.services.prometheus.exporters.node.port}" ];
+                    }
+                  ];
+                }
+              ];
+            };
+          };
+        };
+      };
+    };
+  };
+
+  testScript = ''
+    prometheus.wait_for_unit("prometheus")
+    prometheus.wait_for_open_port(9090)
+
+    # Check if switching to a NixOS configuration that changes the prometheus
+    # configuration reloads (instead of restarts) prometheus before the switch
+    # finishes successfully:
+    with subtest("config change reloads prometheus"):
+      import json
+      # We check if prometheus has finished reloading by looking for the message
+      # "Completed loading of configuration file" in the journal between the start
+      # and finish of switching to the new NixOS configuration.
+      #
+      # To mark the start we record the journal cursor before starting the switch:
+      cursor_before_switching = json.loads(
+          prometheus.succeed("journalctl -n1 -o json --output-fields=__CURSOR")
+      )["__CURSOR"]
+
+      # Now we switch:
+      prometheus_config_change = prometheus.succeed(
+          "readlink /run/current-system/specialisation/prometheus-config-change"
+      ).strip()
+      prometheus.succeed(prometheus_config_change + "/bin/switch-to-configuration test")
+
+      # Next we retrieve all logs since the start of switching:
+      logs_after_starting_switching = prometheus.succeed(
+          """
+            journalctl --after-cursor='{cursor_before_switching}' -o json --output-fields=MESSAGE
+          """.format(
+              cursor_before_switching=cursor_before_switching
+          )
+      )
+
+      # Finally we check if the message "Completed loading of configuration file"
+      # occurs before the "finished switching to system configuration" message:
+      finished_switching_msg = (
+          "finished switching to system configuration " + prometheus_config_change
+      )
+      reloaded_before_switching_finished = False
+      finished_switching = False
+      for log_line in logs_after_starting_switching.split("\n"):
+          msg = json.loads(log_line)["MESSAGE"]
+          if "Completed loading of configuration file" in msg:
+              reloaded_before_switching_finished = True
+          if msg == finished_switching_msg:
+              finished_switching = True
+              break
+
+      assert reloaded_before_switching_finished
+      assert finished_switching
+
+      # Check if the reloaded config includes the new node job:
+      prometheus.succeed(
+        """
+          curl -sf http://127.0.0.1:9090/api/v1/status/config \
+            | jq -r .data.yaml \
+            | yq '.scrape_configs | any(.job_name == "node")' \
+            | grep true
+        """
+      )
+  '';
+})
diff --git a/nixos/tests/prometheus/default.nix b/nixos/tests/prometheus/default.nix
new file mode 100644
index 0000000000000..133922a453c05
--- /dev/null
+++ b/nixos/tests/prometheus/default.nix
@@ -0,0 +1,13 @@
+{ system ? builtins.currentSystem
+, config ? { }
+, pkgs ? import ../../.. { inherit system config; }
+}:
+
+{
+  alertmanager = import ./alertmanager.nix { inherit system pkgs; };
+  config-reload = import ./config-reload.nix { inherit system pkgs; };
+  federation = import ./federation.nix { inherit system pkgs; };
+  prometheus-pair = import ./prometheus-pair.nix { inherit system pkgs; };
+  pushgateway = import ./pushgateway.nix { inherit system pkgs; };
+  remote-write = import ./remote-write.nix { inherit system pkgs; };
+}
diff --git a/nixos/tests/prometheus/federation.nix b/nixos/tests/prometheus/federation.nix
new file mode 100644
index 0000000000000..0f05166c8f5da
--- /dev/null
+++ b/nixos/tests/prometheus/federation.nix
@@ -0,0 +1,213 @@
+import ../make-test-python.nix ({ lib, pkgs, ... }:
+
+{
+  name = "prometheus-federation";
+
+  nodes = {
+    global1 = { config, pkgs, ... }: {
+      environment.systemPackages = [ pkgs.jq ];
+
+      networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ];
+
+      services.prometheus = {
+        enable = true;
+        globalConfig.scrape_interval = "2s";
+
+        scrapeConfigs = [
+          {
+            job_name = "federate";
+            honor_labels = true;
+            metrics_path = "/federate";
+
+            params = {
+              "match[]" = [
+                "{job=\"node\"}"
+                "{job=\"prometheus\"}"
+              ];
+            };
+
+            static_configs = [
+              {
+                targets = [
+                  "prometheus1:${toString config.services.prometheus.port}"
+                  "prometheus2:${toString config.services.prometheus.port}"
+                ];
+              }
+            ];
+          }
+          {
+            job_name = "prometheus";
+            static_configs = [
+              {
+                targets = [
+                  "global1:${toString config.services.prometheus.port}"
+                  "global2:${toString config.services.prometheus.port}"
+                ];
+              }
+            ];
+          }
+        ];
+      };
+    };
+
+    global2 = { config, pkgs, ... }: {
+      environment.systemPackages = [ pkgs.jq ];
+
+      networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ];
+
+      services.prometheus = {
+        enable = true;
+        globalConfig.scrape_interval = "2s";
+
+        scrapeConfigs = [
+          {
+            job_name = "federate";
+            honor_labels = true;
+            metrics_path = "/federate";
+
+            params = {
+              "match[]" = [
+                "{job=\"node\"}"
+                "{job=\"prometheus\"}"
+              ];
+            };
+
+            static_configs = [
+              {
+                targets = [
+                  "prometheus1:${toString config.services.prometheus.port}"
+                  "prometheus2:${toString config.services.prometheus.port}"
+                ];
+              }
+            ];
+          }
+          {
+            job_name = "prometheus";
+            static_configs = [
+              {
+                targets = [
+                  "global1:${toString config.services.prometheus.port}"
+                  "global2:${toString config.services.prometheus.port}"
+                ];
+              }
+            ];
+          }
+        ];
+      };
+    };
+
+    prometheus1 = { config, pkgs, ... }: {
+      environment.systemPackages = [ pkgs.jq ];
+
+      networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ];
+
+      services.prometheus = {
+        enable = true;
+        globalConfig.scrape_interval = "2s";
+
+        scrapeConfigs = [
+          {
+            job_name = "node";
+            static_configs = [
+              {
+                targets = [
+                  "node1:${toString config.services.prometheus.exporters.node.port}"
+                ];
+              }
+            ];
+          }
+          {
+            job_name = "prometheus";
+            static_configs = [
+              {
+                targets = [
+                  "prometheus1:${toString config.services.prometheus.port}"
+                ];
+              }
+            ];
+          }
+        ];
+      };
+    };
+
+    prometheus2 = { config, pkgs, ... }: {
+      environment.systemPackages = [ pkgs.jq ];
+
+      networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ];
+
+      services.prometheus = {
+        enable = true;
+        globalConfig.scrape_interval = "2s";
+
+        scrapeConfigs = [
+          {
+            job_name = "node";
+            static_configs = [
+              {
+                targets = [
+                  "node2:${toString config.services.prometheus.exporters.node.port}"
+                ];
+              }
+            ];
+          }
+          {
+            job_name = "prometheus";
+            static_configs = [
+              {
+                targets = [
+                  "prometheus2:${toString config.services.prometheus.port}"
+                ];
+              }
+            ];
+          }
+        ];
+      };
+    };
+
+    node1 = { config, pkgs, ... }: {
+      services.prometheus.exporters.node = {
+        enable = true;
+        openFirewall = true;
+      };
+    };
+
+    node2 = { config, pkgs, ... }: {
+      services.prometheus.exporters.node = {
+        enable = true;
+        openFirewall = true;
+      };
+    };
+  };
+
+  testScript = ''
+    for machine in node1, node2:
+      machine.wait_for_unit("prometheus-node-exporter")
+      machine.wait_for_open_port(9100)
+
+    for machine in prometheus1, prometheus2, global1, global2:
+      machine.wait_for_unit("prometheus")
+      machine.wait_for_open_port(9090)
+
+    # Verify both servers got the same data from the exporter
+    for machine in prometheus1, prometheus2:
+      machine.wait_until_succeeds(
+        "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=count(up\{job=\"node\"\})' | "
+        + "jq '.data.result[0].value[1]' | grep '\"1\"'"
+      )
+      machine.wait_until_succeeds(
+        "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=count(prometheus_build_info)' | "
+        + "jq '.data.result[0].value[1]' | grep '\"1\"'"
+      )
+
+    for machine in global1, global2:
+      machine.wait_until_succeeds(
+        "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=count(up\{job=\"node\"\})' | "
+        + "jq '.data.result[0].value[1]' | grep '\"2\"'"
+      )
+
+      machine.wait_until_succeeds(
+        "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=count(prometheus_build_info)' | "
+        + "jq '.data.result[0].value[1]' | grep '\"4\"'"
+      )
+  '';
+})
diff --git a/nixos/tests/prometheus/prometheus-pair.nix b/nixos/tests/prometheus/prometheus-pair.nix
new file mode 100644
index 0000000000000..3ac70ca0403ec
--- /dev/null
+++ b/nixos/tests/prometheus/prometheus-pair.nix
@@ -0,0 +1,87 @@
+import ../make-test-python.nix ({ lib, pkgs, ... }:
+
+{
+  name = "prometheus-pair";
+
+  nodes = {
+    prometheus1 = { config, pkgs, ... }: {
+      environment.systemPackages = [ pkgs.jq ];
+
+      networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ];
+
+      services.prometheus = {
+        enable = true;
+        globalConfig.scrape_interval = "2s";
+        scrapeConfigs = [
+          {
+            job_name = "prometheus";
+            static_configs = [
+              {
+                targets = [
+                  "prometheus1:${toString config.services.prometheus.port}"
+                  "prometheus2:${toString config.services.prometheus.port}"
+                ];
+              }
+            ];
+          }
+        ];
+      };
+    };
+
+    prometheus2 = { config, pkgs, ... }: {
+      environment.systemPackages = [ pkgs.jq ];
+
+      networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ];
+
+      services.prometheus = {
+        enable = true;
+        globalConfig.scrape_interval = "2s";
+        scrapeConfigs = [
+          {
+            job_name = "prometheus";
+            static_configs = [
+              {
+                targets = [
+                  "prometheus1:${toString config.services.prometheus.port}"
+                  "prometheus2:${toString config.services.prometheus.port}"
+                ];
+              }
+            ];
+          }
+        ];
+      };
+    };
+  };
+
+  testScript = ''
+    for machine in prometheus1, prometheus2:
+      machine.wait_for_unit("prometheus")
+      machine.wait_for_open_port(9090)
+      machine.wait_until_succeeds("journalctl -o cat -u prometheus.service | grep 'version=${pkgs.prometheus.version}'")
+      machine.wait_until_succeeds("curl -sSf http://localhost:9090/-/healthy")
+
+    # Prometheii ready - run some queries
+    for machine in prometheus1, prometheus2:
+      machine.wait_until_succeeds(
+        "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=prometheus_build_info\{instance=\"prometheus1:9090\",version=\"${pkgs.prometheus.version}\"\}' | "
+        + "jq '.data.result[0].value[1]' | grep '\"1\"'"
+      )
+
+      machine.wait_until_succeeds(
+        "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=prometheus_build_info\{instance=\"prometheus1:9090\"\}' | "
+        + "jq '.data.result[0].value[1]' | grep '\"1\"'"
+      )
+
+      machine.wait_until_succeeds(
+        "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=sum(prometheus_build_info)%20by%20(version)' | "
+        + "jq '.data.result[0].metric.version' | grep '\"${pkgs.prometheus.version}\"'"
+      )
+
+      machine.wait_until_succeeds(
+        "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=sum(prometheus_build_info)%20by%20(version)' | "
+        + "jq '.data.result[0].value[1]' | grep '\"2\"'"
+      )
+
+    prometheus1.log(prometheus1.succeed("systemd-analyze security prometheus.service | grep -v '✓'"))
+  '';
+})
diff --git a/nixos/tests/prometheus/pushgateway.nix b/nixos/tests/prometheus/pushgateway.nix
new file mode 100644
index 0000000000000..7904c8bf45b04
--- /dev/null
+++ b/nixos/tests/prometheus/pushgateway.nix
@@ -0,0 +1,94 @@
+import ../make-test-python.nix ({ lib, pkgs, ... }:
+
+{
+  name = "prometheus-pushgateway";
+
+  nodes = {
+    prometheus = { config, pkgs, ... }: {
+      environment.systemPackages = [ pkgs.jq ];
+
+      networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ];
+
+      services.prometheus = {
+        enable = true;
+        globalConfig.scrape_interval = "2s";
+
+        scrapeConfigs = [
+          {
+            job_name = "pushgateway";
+            static_configs = [
+              {
+                targets = [
+                  "pushgateway:9091"
+                ];
+              }
+            ];
+          }
+        ];
+      };
+    };
+
+    pushgateway = { config, pkgs, ... }: {
+      networking.firewall.allowedTCPPorts = [ 9091 ];
+
+      services.prometheus.pushgateway = {
+        enable = true;
+      };
+    };
+
+    client = { config, pkgs, ... }: {
+    };
+  };
+
+  testScript = ''
+    pushgateway.wait_for_unit("pushgateway")
+    pushgateway.wait_for_open_port(9091)
+    pushgateway.wait_until_succeeds("curl -s http://127.0.0.1:9091/-/ready")
+    pushgateway.wait_until_succeeds("journalctl -o cat -u pushgateway.service | grep 'version=${pkgs.prometheus-pushgateway.version}'")
+
+    prometheus.wait_for_unit("prometheus")
+    prometheus.wait_for_open_port(9090)
+
+    prometheus.wait_until_succeeds(
+      "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=count(up\{job=\"pushgateway\"\})' | "
+      + "jq '.data.result[0].value[1]' | grep '\"1\"'"
+    )
+
+    prometheus.wait_until_succeeds(
+      "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=sum(pushgateway_build_info)%20by%20(version)' | "
+      + "jq '.data.result[0].metric.version' | grep '\"${pkgs.prometheus-pushgateway.version}\"'"
+    )
+
+    client.wait_for_unit("network-online.target")
+
+    # Add a metric and check in Prometheus
+    client.wait_until_succeeds(
+      "echo 'some_metric 3.14' | curl --data-binary @- http://pushgateway:9091/metrics/job/some_job"
+    )
+
+    prometheus.wait_until_succeeds(
+      "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=some_metric' | "
+      + "jq '.data.result[0].value[1]' | grep '\"3.14\"'"
+    )
+
+    prometheus.wait_until_succeeds(
+      "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=absent(some_metric)' | "
+      + "jq '.data.result[0].value[1]' | grep 'null'"
+    )
+
+    # Delete the metric, check not in Prometheus
+    client.wait_until_succeeds(
+      "curl -X DELETE http://pushgateway:9091/metrics/job/some_job"
+    )
+
+    prometheus.wait_until_fails(
+      "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=some_metric' | "
+      + "jq '.data.result[0].value[1]' | grep '\"3.14\"'"
+    )
+
+    prometheus.wait_until_succeeds(
+      "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=absent(some_metric)' | "
+      + "jq '.data.result[0].value[1]' | grep '\"1\"'"
+    )
+  '';
+})
diff --git a/nixos/tests/prometheus/remote-write.nix b/nixos/tests/prometheus/remote-write.nix
new file mode 100644
index 0000000000000..24092b9fb88da
--- /dev/null
+++ b/nixos/tests/prometheus/remote-write.nix
@@ -0,0 +1,73 @@
+import ../make-test-python.nix ({ lib, pkgs, ... }:
+
+{
+  name = "prometheus-remote-write";
+
+  nodes = {
+    receiver = { config, pkgs, ... }: {
+      environment.systemPackages = [ pkgs.jq ];
+
+      networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ];
+
+      services.prometheus = {
+        enable = true;
+        globalConfig.scrape_interval = "2s";
+
+        extraFlags = [ "--web.enable-remote-write-receiver" ];
+      };
+    };
+
+    prometheus = { config, pkgs, ... }: {
+      environment.systemPackages = [ pkgs.jq ];
+
+      networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ];
+
+      services.prometheus = {
+        enable = true;
+        globalConfig.scrape_interval = "2s";
+
+        remoteWrite = [
+          {
+            url = "http://receiver:9090/api/v1/write";
+          }
+        ];
+
+        scrapeConfigs = [
+          {
+            job_name = "node";
+            static_configs = [
+              {
+                targets = [
+                  "node:${toString config.services.prometheus.exporters.node.port}"
+                ];
+              }
+            ];
+          }
+        ];
+      };
+    };
+
+    node = { config, pkgs, ... }: {
+      services.prometheus.exporters.node = {
+        enable = true;
+        openFirewall = true;
+      };
+    };
+  };
+
+  testScript = ''
+    node.wait_for_unit("prometheus-node-exporter")
+    node.wait_for_open_port(9100)
+
+    for machine in prometheus, receiver:
+      machine.wait_for_unit("prometheus")
+      machine.wait_for_open_port(9090)
+
+    # Verify both servers got the same data from the exporter
+    for machine in prometheus, receiver:
+      machine.wait_until_succeeds(
+        "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=node_exporter_build_info\{instance=\"node:9100\"\}' | "
+        + "jq '.data.result[0].value[1]' | grep '\"1\"'"
+      )
+  '';
+})
diff --git a/nixos/tests/prometheus.nix b/nixos/tests/thanos.nix
index 0111273893775..5bdfab7b3573f 100644
--- a/nixos/tests/prometheus.nix
+++ b/nixos/tests/thanos.nix
@@ -212,8 +212,6 @@ in import ./make-test-python.nix {
   };
 
   testScript = { nodes, ... } : ''
-    import json
-
     # Before starting the other machines we first make sure that our S3 service is online
     # and has a bucket added for thanos:
     s3.start()
@@ -289,61 +287,5 @@ in import ./make-test-python.nix {
         + "jq .thanos.labels.some_label | "
         + "grep 'required by thanos'"
     )
-
-    # Check if switching to a NixOS configuration that changes the prometheus
-    # configuration reloads (instead of restarts) prometheus before the switch
-    # finishes successfully:
-    with subtest("config change reloads prometheus"):
-        # We check if prometheus has finished reloading by looking for the message
-        # "Completed loading of configuration file" in the journal between the start
-        # and finish of switching to the new NixOS configuration.
-        #
-        # To mark the start we record the journal cursor before starting the switch:
-        cursor_before_switching = json.loads(
-            prometheus.succeed("journalctl -n1 -o json --output-fields=__CURSOR")
-        )["__CURSOR"]
-
-        # Now we switch:
-        prometheus_config_change = prometheus.succeed(
-            "readlink /run/current-system/specialisation/prometheus-config-change"
-        ).strip()
-        prometheus.succeed(prometheus_config_change + "/bin/switch-to-configuration test")
-
-        # Next we retrieve all logs since the start of switching:
-        logs_after_starting_switching = prometheus.succeed(
-            """
-              journalctl --after-cursor='{cursor_before_switching}' -o json --output-fields=MESSAGE
-            """.format(
-                cursor_before_switching=cursor_before_switching
-            )
-        )
-
-        # Finally we check if the message "Completed loading of configuration file"
-        # occurs before the "finished switching to system configuration" message:
-        finished_switching_msg = (
-            "finished switching to system configuration " + prometheus_config_change
-        )
-        reloaded_before_switching_finished = False
-        finished_switching = False
-        for log_line in logs_after_starting_switching.split("\n"):
-            msg = json.loads(log_line)["MESSAGE"]
-            if "Completed loading of configuration file" in msg:
-                reloaded_before_switching_finished = True
-            if msg == finished_switching_msg:
-                finished_switching = True
-                break
-
-        assert reloaded_before_switching_finished
-        assert finished_switching
-
-        # Check if the reloaded config includes the new s3-node_exporter job:
-        prometheus.succeed(
-          """
-            curl -sf http://127.0.0.1:${toString queryPort}/api/v1/status/config \
-              | jq -r .data.yaml \
-              | yq '.scrape_configs | any(.job_name == "s3-node_exporter")' \
-              | grep true
-          """
-        )
   '';
 }
diff --git a/pkgs/by-name/al/alertmanager-webhook-logger/package.nix b/pkgs/by-name/al/alertmanager-webhook-logger/package.nix
new file mode 100644
index 0000000000000..e04759dda7d37
--- /dev/null
+++ b/pkgs/by-name/al/alertmanager-webhook-logger/package.nix
@@ -0,0 +1,33 @@
+{ lib
+, stdenv
+, buildGoModule
+, fetchFromGitHub
+, nixosTests
+}:
+
+buildGoModule rec {
+  pname = "alertmanager-webhook-logger";
+  version = "1.0";
+  rev = "${version}";
+
+  src = fetchFromGitHub {
+    inherit rev;
+    owner = "tomtom-international";
+    repo = "alertmanager-webhook-logger";
+    hash = "sha256-mJbpDiTwUsFm0lDKz8UE/YF6sBvcSSR6WWLrfKvtri4=";
+  };
+
+  vendorHash = "sha256-gKtOoM9TuEIHgvSjZhqWmdexG2zDjlPuM0HjjP52DOI=";
+
+  doCheck = true;
+
+  passthru.tests = { inherit (nixosTests.prometheus) alertmanager; };
+
+  meta = with lib; {
+    description = "Generates (structured) log messages from Prometheus AlertManager webhook notifier";
+    mainProgram = "alertmanager-webhook-logger";
+    homepage = "https://github.com/tomtom-international/alertmanager-webhook-logger";
+    license = licenses.asl20;
+    maintainers = with maintainers; [ jpds ];
+  };
+}
diff --git a/pkgs/servers/monitoring/prometheus/alertmanager.nix b/pkgs/servers/monitoring/prometheus/alertmanager.nix
index ba1c35ee294d3..1db975f591d98 100644
--- a/pkgs/servers/monitoring/prometheus/alertmanager.nix
+++ b/pkgs/servers/monitoring/prometheus/alertmanager.nix
@@ -3,6 +3,7 @@
 , buildGoModule
 , fetchFromGitHub
 , installShellFiles
+, nixosTests
 }:
 
 buildGoModule rec {
@@ -39,6 +40,8 @@ buildGoModule rec {
     installShellCompletion amtool.zsh
   '';
 
+  passthru.tests = { inherit (nixosTests.prometheus) alertmanager; };
+
   meta = with lib; {
     description = "Alert dispatcher for the Prometheus monitoring system";
     homepage = "https://github.com/prometheus/alertmanager";
diff --git a/pkgs/servers/monitoring/prometheus/pushgateway.nix b/pkgs/servers/monitoring/prometheus/pushgateway.nix
index c47019aeada72..cc8e11ade6355 100644
--- a/pkgs/servers/monitoring/prometheus/pushgateway.nix
+++ b/pkgs/servers/monitoring/prometheus/pushgateway.nix
@@ -1,4 +1,4 @@
-{ lib, buildGoModule, fetchFromGitHub, testers, prometheus-pushgateway }:
+{ lib, buildGoModule, fetchFromGitHub, nixosTests, testers, prometheus-pushgateway }:
 
 buildGoModule rec {
   pname = "pushgateway";
@@ -23,8 +23,11 @@ buildGoModule rec {
     "-X github.com/prometheus/common/version.BuildDate=19700101-00:00:00"
   ];
 
-  passthru.tests.version = testers.testVersion {
-    package = prometheus-pushgateway;
+  passthru.tests = {
+    inherit (nixosTests.prometheus) pushgateway;
+    version = testers.testVersion {
+      package = prometheus-pushgateway;
+    };
   };
 
   meta = with lib; {
diff --git a/pkgs/servers/monitoring/thanos/default.nix b/pkgs/servers/monitoring/thanos/default.nix
index d3e797a1c34c4..ce4a75cea75b7 100644
--- a/pkgs/servers/monitoring/thanos/default.nix
+++ b/pkgs/servers/monitoring/thanos/default.nix
@@ -37,7 +37,7 @@ buildGoModule rec {
   passthru = {
     updateScript = nix-update-script { };
     tests = {
-      inherit (nixosTests) prometheus;
+      inherit (nixosTests) thanos;
       version = testers.testVersion {
         command = "thanos --version";
         package = thanos;