diff options
author | Franz Pletz <fpletz@fnordicwalking.de> | 2024-06-23 23:05:06 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-23 23:05:06 +0200 |
commit | 3c033186ee73229fdcdcb3266a36baf87d519840 (patch) | |
tree | 3b16162580537ac13f750b685fcada5a7a2ec3a3 | |
parent | 2789cee804747dc1066fc293ab918f77297561c5 (diff) | |
parent | 5adadf25c6eba445401bd2a270a74a9c3bac73c2 (diff) |
Merge pull request #316727 from jpds/nixos-tests-prometheus
nixos/prometheus: Revamp tests
-rw-r--r-- | maintainers/maintainer-list.nix | 5 | ||||
-rw-r--r-- | nixos/modules/module-list.nix | 1 | ||||
-rw-r--r-- | nixos/modules/services/monitoring/prometheus/alertmanager-webhook-logger.nix | 70 | ||||
-rw-r--r-- | nixos/tests/all-tests.nix | 3 | ||||
-rw-r--r-- | nixos/tests/prometheus/alertmanager.nix | 148 | ||||
-rw-r--r-- | nixos/tests/prometheus/config-reload.nix | 116 | ||||
-rw-r--r-- | nixos/tests/prometheus/default.nix | 13 | ||||
-rw-r--r-- | nixos/tests/prometheus/federation.nix | 213 | ||||
-rw-r--r-- | nixos/tests/prometheus/prometheus-pair.nix | 87 | ||||
-rw-r--r-- | nixos/tests/prometheus/pushgateway.nix | 94 | ||||
-rw-r--r-- | nixos/tests/prometheus/remote-write.nix | 73 | ||||
-rw-r--r-- | nixos/tests/thanos.nix (renamed from nixos/tests/prometheus.nix) | 58 | ||||
-rw-r--r-- | pkgs/by-name/al/alertmanager-webhook-logger/package.nix | 33 | ||||
-rw-r--r-- | pkgs/servers/monitoring/prometheus/alertmanager.nix | 3 | ||||
-rw-r--r-- | pkgs/servers/monitoring/prometheus/pushgateway.nix | 9 | ||||
-rw-r--r-- | pkgs/servers/monitoring/thanos/default.nix | 2 |
16 files changed, 865 insertions, 63 deletions
diff --git a/maintainers/maintainer-list.nix b/maintainers/maintainer-list.nix index 2035843fbf1c1..fce218ca723a3 100644 --- a/maintainers/maintainer-list.nix +++ b/maintainers/maintainer-list.nix @@ -9848,6 +9848,11 @@ githubId = 1918771; name = "Joe Doyle"; }; + jpds = { + github = "jpds"; + githubId = 29158971; + name = "Jonathan Davies"; + }; jpentland = { email = "joe.pentland@gmail.com"; github = "jpentland"; diff --git a/nixos/modules/module-list.nix b/nixos/modules/module-list.nix index d412e414f68b5..876e40983c1e5 100644 --- a/nixos/modules/module-list.nix +++ b/nixos/modules/module-list.nix @@ -880,6 +880,7 @@ ./services/monitoring/osquery.nix ./services/monitoring/parsedmarc.nix ./services/monitoring/prometheus/alertmanager-irc-relay.nix + ./services/monitoring/prometheus/alertmanager-webhook-logger.nix ./services/monitoring/prometheus/alertmanager.nix ./services/monitoring/prometheus/default.nix ./services/monitoring/prometheus/exporters.nix diff --git a/nixos/modules/services/monitoring/prometheus/alertmanager-webhook-logger.nix b/nixos/modules/services/monitoring/prometheus/alertmanager-webhook-logger.nix new file mode 100644 index 0000000000000..b4307a76e1b02 --- /dev/null +++ b/nixos/modules/services/monitoring/prometheus/alertmanager-webhook-logger.nix @@ -0,0 +1,70 @@ +{ config, lib, pkgs, ... }: + +with lib; + +let + cfg = config.services.prometheus.alertmanagerWebhookLogger; +in +{ + options.services.prometheus.alertmanagerWebhookLogger = { + enable = mkEnableOption "Alertmanager Webhook Logger"; + + package = mkPackageOption pkgs "alertmanager-webhook-logger" { }; + + extraFlags = mkOption { + type = types.listOf types.str; + default = []; + description = "Extra command line options to pass to alertmanager-webhook-logger."; + }; + }; + + config = mkIf cfg.enable { + systemd.services.alertmanager-webhook-logger = { + description = "Alertmanager Webhook Logger"; + + wantedBy = [ "multi-user.target" ]; + after = [ "network-online.target" ]; + wants = [ "network-online.target" ]; + + serviceConfig = { + ExecStart = '' + ${cfg.package}/bin/alertmanager-webhook-logger \ + ${escapeShellArgs cfg.extraFlags} + ''; + + DynamicUser = true; + NoNewPrivileges = true; + + ProtectProc = "invisible"; + ProtectSystem = "strict"; + ProtectHome = "tmpfs"; + + PrivateTmp = true; + PrivateDevices = true; + PrivateIPC = true; + + ProtectHostname = true; + ProtectClock = true; + ProtectKernelTunables = true; + ProtectKernelModules = true; + ProtectKernelLogs = true; + ProtectControlGroups = true; + + RestrictAddressFamilies = [ "AF_INET" "AF_INET6" ]; + RestrictRealtime = true; + RestrictSUIDSGID = true; + + SystemCallFilter = [ + "@system-service" + "~@cpu-emulation" + "~@privileged" + "~@reboot" + "~@setuid" + "~@swap" + ]; + }; + }; + }; + + meta.maintainers = [ maintainers.jpds ]; +} diff --git a/nixos/tests/all-tests.nix b/nixos/tests/all-tests.nix index b1f7d1917ed45..1db9f702e2416 100644 --- a/nixos/tests/all-tests.nix +++ b/nixos/tests/all-tests.nix @@ -775,7 +775,7 @@ in { printing-service = handleTest ./printing.nix { socket = false; }; private-gpt = handleTest ./private-gpt.nix {}; privoxy = handleTest ./privoxy.nix {}; - prometheus = handleTest ./prometheus.nix {}; + prometheus = handleTest ./prometheus {}; prometheus-exporters = handleTest ./prometheus-exporters.nix {}; prosody = handleTest ./xmpp/prosody.nix {}; prosody-mysql = handleTest ./xmpp/prosody-mysql.nix {}; @@ -968,6 +968,7 @@ in { teleport = handleTest ./teleport.nix {}; thelounge = handleTest ./thelounge.nix {}; terminal-emulators = handleTest ./terminal-emulators.nix {}; + thanos = handleTest ./thanos.nix {}; tiddlywiki = handleTest ./tiddlywiki.nix {}; tigervnc = handleTest ./tigervnc.nix {}; timescaledb = handleTest ./timescaledb.nix {}; diff --git a/nixos/tests/prometheus/alertmanager.nix b/nixos/tests/prometheus/alertmanager.nix new file mode 100644 index 0000000000000..feda8d8fc2bcc --- /dev/null +++ b/nixos/tests/prometheus/alertmanager.nix @@ -0,0 +1,148 @@ +import ../make-test-python.nix ({ lib, pkgs, ... }: + +{ + name = "prometheus-alertmanager"; + + nodes = { + prometheus = { config, pkgs, ... }: { + environment.systemPackages = [ pkgs.jq ]; + + networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ]; + + services.prometheus = { + enable = true; + globalConfig.scrape_interval = "2s"; + + alertmanagers = [ + { + scheme = "http"; + static_configs = [ + { + targets = [ + "alertmanager:${toString config.services.prometheus.alertmanager.port}" + ]; + } + ]; + } + ]; + + rules = [ + '' + groups: + - name: test + rules: + - alert: InstanceDown + expr: up == 0 + for: 5s + labels: + severity: page + annotations: + summary: "Instance {{ $labels.instance }} down" + '' + ]; + + scrapeConfigs = [ + { + job_name = "alertmanager"; + static_configs = [ + { + targets = [ + "alertmanager:${toString config.services.prometheus.alertmanager.port}" + ]; + } + ]; + } + { + job_name = "node"; + static_configs = [ + { + targets = [ + "node:${toString config.services.prometheus.exporters.node.port}" + ]; + } + ]; + } + ]; + }; + }; + + alertmanager = { config, pkgs, ... }: { + services.prometheus.alertmanager = { + enable = true; + openFirewall = true; + + configuration = { + global = { + resolve_timeout = "1m"; + }; + + route = { + # Root route node + receiver = "test"; + group_by = ["..."]; + continue = false; + group_wait = "1s"; + group_interval = "15s"; + repeat_interval = "24h"; + }; + + receivers = [ + { + name = "test"; + webhook_configs = [ + { + url = "http://logger:6725"; + send_resolved = true; + max_alerts = 0; + } + ]; + } + ]; + }; + }; + }; + + logger = { config, pkgs, ... }: { + networking.firewall.allowedTCPPorts = [ 6725 ]; + + services.prometheus.alertmanagerWebhookLogger.enable = true; + }; + }; + + testScript = '' + alertmanager.wait_for_unit("alertmanager") + alertmanager.wait_for_open_port(9093) + alertmanager.wait_until_succeeds("curl -s http://127.0.0.1:9093/-/ready") + #alertmanager.wait_until_succeeds("journalctl -o cat -u alertmanager.service | grep 'version=${pkgs.prometheus-alertmanager.version}'") + + logger.wait_for_unit("alertmanager-webhook-logger") + logger.wait_for_open_port(6725) + + prometheus.wait_for_unit("prometheus") + prometheus.wait_for_open_port(9090) + + prometheus.wait_until_succeeds( + "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=count(up\{job=\"alertmanager\"\}==1)' | " + + "jq '.data.result[0].value[1]' | grep '\"1\"'" + ) + + prometheus.wait_until_succeeds( + "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=sum(alertmanager_build_info)%20by%20(version)' | " + + "jq '.data.result[0].metric.version' | grep '\"${pkgs.prometheus-alertmanager.version}\"'" + ) + + prometheus.wait_until_succeeds( + "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=count(up\{job=\"node\"\}!=1)' | " + + "jq '.data.result[0].value[1]' | grep '\"1\"'" + ) + + prometheus.wait_until_succeeds( + "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=alertmanager_notifications_total\{integration=\"webhook\"\}' | " + + "jq '.data.result[0].value[1]' | grep -v '\"0\"'" + ) + + logger.wait_until_succeeds( + "journalctl -o cat -u alertmanager-webhook-logger.service | grep '\"alertname\":\"InstanceDown\"'" + ) + ''; +}) diff --git a/nixos/tests/prometheus/config-reload.nix b/nixos/tests/prometheus/config-reload.nix new file mode 100644 index 0000000000000..786668c624ea9 --- /dev/null +++ b/nixos/tests/prometheus/config-reload.nix @@ -0,0 +1,116 @@ +import ../make-test-python.nix ({ lib, pkgs, ... }: + +{ + name = "prometheus-config-reload"; + + nodes = { + prometheus = { config, pkgs, ... }: { + environment.systemPackages = [ pkgs.jq ]; + + networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ]; + + services.prometheus = { + enable = true; + enableReload = true; + globalConfig.scrape_interval = "2s"; + scrapeConfigs = [ + { + job_name = "prometheus"; + static_configs = [ + { + targets = [ + "prometheus:${toString config.services.prometheus.port}" + ]; + } + ]; + } + ]; + }; + + specialisation = { + "prometheus-config-change" = { + configuration = { + environment.systemPackages = [ pkgs.yq ]; + + # This configuration just adds a new prometheus job + # to scrape the node_exporter metrics of the s3 machine. + services.prometheus = { + scrapeConfigs = [ + { + job_name = "node"; + static_configs = [ + { + targets = [ "node:${toString config.services.prometheus.exporters.node.port}" ]; + } + ]; + } + ]; + }; + }; + }; + }; + }; + }; + + testScript = '' + prometheus.wait_for_unit("prometheus") + prometheus.wait_for_open_port(9090) + + # Check if switching to a NixOS configuration that changes the prometheus + # configuration reloads (instead of restarts) prometheus before the switch + # finishes successfully: + with subtest("config change reloads prometheus"): + import json + # We check if prometheus has finished reloading by looking for the message + # "Completed loading of configuration file" in the journal between the start + # and finish of switching to the new NixOS configuration. + # + # To mark the start we record the journal cursor before starting the switch: + cursor_before_switching = json.loads( + prometheus.succeed("journalctl -n1 -o json --output-fields=__CURSOR") + )["__CURSOR"] + + # Now we switch: + prometheus_config_change = prometheus.succeed( + "readlink /run/current-system/specialisation/prometheus-config-change" + ).strip() + prometheus.succeed(prometheus_config_change + "/bin/switch-to-configuration test") + + # Next we retrieve all logs since the start of switching: + logs_after_starting_switching = prometheus.succeed( + """ + journalctl --after-cursor='{cursor_before_switching}' -o json --output-fields=MESSAGE + """.format( + cursor_before_switching=cursor_before_switching + ) + ) + + # Finally we check if the message "Completed loading of configuration file" + # occurs before the "finished switching to system configuration" message: + finished_switching_msg = ( + "finished switching to system configuration " + prometheus_config_change + ) + reloaded_before_switching_finished = False + finished_switching = False + for log_line in logs_after_starting_switching.split("\n"): + msg = json.loads(log_line)["MESSAGE"] + if "Completed loading of configuration file" in msg: + reloaded_before_switching_finished = True + if msg == finished_switching_msg: + finished_switching = True + break + + assert reloaded_before_switching_finished + assert finished_switching + + # Check if the reloaded config includes the new node job: + prometheus.succeed( + """ + curl -sf http://127.0.0.1:9090/api/v1/status/config \ + | jq -r .data.yaml \ + | yq '.scrape_configs | any(.job_name == "node")' \ + | grep true + """ + ) + ''; +}) diff --git a/nixos/tests/prometheus/default.nix b/nixos/tests/prometheus/default.nix new file mode 100644 index 0000000000000..133922a453c05 --- /dev/null +++ b/nixos/tests/prometheus/default.nix @@ -0,0 +1,13 @@ +{ system ? builtins.currentSystem +, config ? { } +, pkgs ? import ../../.. { inherit system config; } +}: + +{ + alertmanager = import ./alertmanager.nix { inherit system pkgs; }; + config-reload = import ./config-reload.nix { inherit system pkgs; }; + federation = import ./federation.nix { inherit system pkgs; }; + prometheus-pair = import ./prometheus-pair.nix { inherit system pkgs; }; + pushgateway = import ./pushgateway.nix { inherit system pkgs; }; + remote-write = import ./remote-write.nix { inherit system pkgs; }; +} diff --git a/nixos/tests/prometheus/federation.nix b/nixos/tests/prometheus/federation.nix new file mode 100644 index 0000000000000..0f05166c8f5da --- /dev/null +++ b/nixos/tests/prometheus/federation.nix @@ -0,0 +1,213 @@ +import ../make-test-python.nix ({ lib, pkgs, ... }: + +{ + name = "prometheus-federation"; + + nodes = { + global1 = { config, pkgs, ... }: { + environment.systemPackages = [ pkgs.jq ]; + + networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ]; + + services.prometheus = { + enable = true; + globalConfig.scrape_interval = "2s"; + + scrapeConfigs = [ + { + job_name = "federate"; + honor_labels = true; + metrics_path = "/federate"; + + params = { + "match[]" = [ + "{job=\"node\"}" + "{job=\"prometheus\"}" + ]; + }; + + static_configs = [ + { + targets = [ + "prometheus1:${toString config.services.prometheus.port}" + "prometheus2:${toString config.services.prometheus.port}" + ]; + } + ]; + } + { + job_name = "prometheus"; + static_configs = [ + { + targets = [ + "global1:${toString config.services.prometheus.port}" + "global2:${toString config.services.prometheus.port}" + ]; + } + ]; + } + ]; + }; + }; + + global2 = { config, pkgs, ... }: { + environment.systemPackages = [ pkgs.jq ]; + + networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ]; + + services.prometheus = { + enable = true; + globalConfig.scrape_interval = "2s"; + + scrapeConfigs = [ + { + job_name = "federate"; + honor_labels = true; + metrics_path = "/federate"; + + params = { + "match[]" = [ + "{job=\"node\"}" + "{job=\"prometheus\"}" + ]; + }; + + static_configs = [ + { + targets = [ + "prometheus1:${toString config.services.prometheus.port}" + "prometheus2:${toString config.services.prometheus.port}" + ]; + } + ]; + } + { + job_name = "prometheus"; + static_configs = [ + { + targets = [ + "global1:${toString config.services.prometheus.port}" + "global2:${toString config.services.prometheus.port}" + ]; + } + ]; + } + ]; + }; + }; + + prometheus1 = { config, pkgs, ... }: { + environment.systemPackages = [ pkgs.jq ]; + + networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ]; + + services.prometheus = { + enable = true; + globalConfig.scrape_interval = "2s"; + + scrapeConfigs = [ + { + job_name = "node"; + static_configs = [ + { + targets = [ + "node1:${toString config.services.prometheus.exporters.node.port}" + ]; + } + ]; + } + { + job_name = "prometheus"; + static_configs = [ + { + targets = [ + "prometheus1:${toString config.services.prometheus.port}" + ]; + } + ]; + } + ]; + }; + }; + + prometheus2 = { config, pkgs, ... }: { + environment.systemPackages = [ pkgs.jq ]; + + networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ]; + + services.prometheus = { + enable = true; + globalConfig.scrape_interval = "2s"; + + scrapeConfigs = [ + { + job_name = "node"; + static_configs = [ + { + targets = [ + "node2:${toString config.services.prometheus.exporters.node.port}" + ]; + } + ]; + } + { + job_name = "prometheus"; + static_configs = [ + { + targets = [ + "prometheus2:${toString config.services.prometheus.port}" + ]; + } + ]; + } + ]; + }; + }; + + node1 = { config, pkgs, ... }: { + services.prometheus.exporters.node = { + enable = true; + openFirewall = true; + }; + }; + + node2 = { config, pkgs, ... }: { + services.prometheus.exporters.node = { + enable = true; + openFirewall = true; + }; + }; + }; + + testScript = '' + for machine in node1, node2: + machine.wait_for_unit("prometheus-node-exporter") + machine.wait_for_open_port(9100) + + for machine in prometheus1, prometheus2, global1, global2: + machine.wait_for_unit("prometheus") + machine.wait_for_open_port(9090) + + # Verify both servers got the same data from the exporter + for machine in prometheus1, prometheus2: + machine.wait_until_succeeds( + "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=count(up\{job=\"node\"\})' | " + + "jq '.data.result[0].value[1]' | grep '\"1\"'" + ) + machine.wait_until_succeeds( + "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=count(prometheus_build_info)' | " + + "jq '.data.result[0].value[1]' | grep '\"1\"'" + ) + + for machine in global1, global2: + machine.wait_until_succeeds( + "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=count(up\{job=\"node\"\})' | " + + "jq '.data.result[0].value[1]' | grep '\"2\"'" + ) + + machine.wait_until_succeeds( + "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=count(prometheus_build_info)' | " + + "jq '.data.result[0].value[1]' | grep '\"4\"'" + ) + ''; +}) diff --git a/nixos/tests/prometheus/prometheus-pair.nix b/nixos/tests/prometheus/prometheus-pair.nix new file mode 100644 index 0000000000000..3ac70ca0403ec --- /dev/null +++ b/nixos/tests/prometheus/prometheus-pair.nix @@ -0,0 +1,87 @@ +import ../make-test-python.nix ({ lib, pkgs, ... }: + +{ + name = "prometheus-pair"; + + nodes = { + prometheus1 = { config, pkgs, ... }: { + environment.systemPackages = [ pkgs.jq ]; + + networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ]; + + services.prometheus = { + enable = true; + globalConfig.scrape_interval = "2s"; + scrapeConfigs = [ + { + job_name = "prometheus"; + static_configs = [ + { + targets = [ + "prometheus1:${toString config.services.prometheus.port}" + "prometheus2:${toString config.services.prometheus.port}" + ]; + } + ]; + } + ]; + }; + }; + + prometheus2 = { config, pkgs, ... }: { + environment.systemPackages = [ pkgs.jq ]; + + networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ]; + + services.prometheus = { + enable = true; + globalConfig.scrape_interval = "2s"; + scrapeConfigs = [ + { + job_name = "prometheus"; + static_configs = [ + { + targets = [ + "prometheus1:${toString config.services.prometheus.port}" + "prometheus2:${toString config.services.prometheus.port}" + ]; + } + ]; + } + ]; + }; + }; + }; + + testScript = '' + for machine in prometheus1, prometheus2: + machine.wait_for_unit("prometheus") + machine.wait_for_open_port(9090) + machine.wait_until_succeeds("journalctl -o cat -u prometheus.service | grep 'version=${pkgs.prometheus.version}'") + machine.wait_until_succeeds("curl -sSf http://localhost:9090/-/healthy") + + # Prometheii ready - run some queries + for machine in prometheus1, prometheus2: + machine.wait_until_succeeds( + "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=prometheus_build_info\{instance=\"prometheus1:9090\",version=\"${pkgs.prometheus.version}\"\}' | " + + "jq '.data.result[0].value[1]' | grep '\"1\"'" + ) + + machine.wait_until_succeeds( + "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=prometheus_build_info\{instance=\"prometheus1:9090\"\}' | " + + "jq '.data.result[0].value[1]' | grep '\"1\"'" + ) + + machine.wait_until_succeeds( + "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=sum(prometheus_build_info)%20by%20(version)' | " + + "jq '.data.result[0].metric.version' | grep '\"${pkgs.prometheus.version}\"'" + ) + + machine.wait_until_succeeds( + "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=sum(prometheus_build_info)%20by%20(version)' | " + + "jq '.data.result[0].value[1]' | grep '\"2\"'" + ) + + prometheus1.log(prometheus1.succeed("systemd-analyze security prometheus.service | grep -v '✓'")) + ''; +}) diff --git a/nixos/tests/prometheus/pushgateway.nix b/nixos/tests/prometheus/pushgateway.nix new file mode 100644 index 0000000000000..7904c8bf45b04 --- /dev/null +++ b/nixos/tests/prometheus/pushgateway.nix @@ -0,0 +1,94 @@ +import ../make-test-python.nix ({ lib, pkgs, ... }: + +{ + name = "prometheus-pushgateway"; + + nodes = { + prometheus = { config, pkgs, ... }: { + environment.systemPackages = [ pkgs.jq ]; + + networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ]; + + services.prometheus = { + enable = true; + globalConfig.scrape_interval = "2s"; + + scrapeConfigs = [ + { + job_name = "pushgateway"; + static_configs = [ + { + targets = [ + "pushgateway:9091" + ]; + } + ]; + } + ]; + }; + }; + + pushgateway = { config, pkgs, ... }: { + networking.firewall.allowedTCPPorts = [ 9091 ]; + + services.prometheus.pushgateway = { + enable = true; + }; + }; + + client = { config, pkgs, ... }: { + }; + }; + + testScript = '' + pushgateway.wait_for_unit("pushgateway") + pushgateway.wait_for_open_port(9091) + pushgateway.wait_until_succeeds("curl -s http://127.0.0.1:9091/-/ready") + pushgateway.wait_until_succeeds("journalctl -o cat -u pushgateway.service | grep 'version=${pkgs.prometheus-pushgateway.version}'") + + prometheus.wait_for_unit("prometheus") + prometheus.wait_for_open_port(9090) + + prometheus.wait_until_succeeds( + "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=count(up\{job=\"pushgateway\"\})' | " + + "jq '.data.result[0].value[1]' | grep '\"1\"'" + ) + + prometheus.wait_until_succeeds( + "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=sum(pushgateway_build_info)%20by%20(version)' | " + + "jq '.data.result[0].metric.version' | grep '\"${pkgs.prometheus-pushgateway.version}\"'" + ) + + client.wait_for_unit("network-online.target") + + # Add a metric and check in Prometheus + client.wait_until_succeeds( + "echo 'some_metric 3.14' | curl --data-binary @- http://pushgateway:9091/metrics/job/some_job" + ) + + prometheus.wait_until_succeeds( + "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=some_metric' | " + + "jq '.data.result[0].value[1]' | grep '\"3.14\"'" + ) + + prometheus.wait_until_succeeds( + "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=absent(some_metric)' | " + + "jq '.data.result[0].value[1]' | grep 'null'" + ) + + # Delete the metric, check not in Prometheus + client.wait_until_succeeds( + "curl -X DELETE http://pushgateway:9091/metrics/job/some_job" + ) + + prometheus.wait_until_fails( + "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=some_metric' | " + + "jq '.data.result[0].value[1]' | grep '\"3.14\"'" + ) + + prometheus.wait_until_succeeds( + "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=absent(some_metric)' | " + + "jq '.data.result[0].value[1]' | grep '\"1\"'" + ) + ''; +}) diff --git a/nixos/tests/prometheus/remote-write.nix b/nixos/tests/prometheus/remote-write.nix new file mode 100644 index 0000000000000..24092b9fb88da --- /dev/null +++ b/nixos/tests/prometheus/remote-write.nix @@ -0,0 +1,73 @@ +import ../make-test-python.nix ({ lib, pkgs, ... }: + +{ + name = "prometheus-remote-write"; + + nodes = { + receiver = { config, pkgs, ... }: { + environment.systemPackages = [ pkgs.jq ]; + + networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ]; + + services.prometheus = { + enable = true; + globalConfig.scrape_interval = "2s"; + + extraFlags = [ "--web.enable-remote-write-receiver" ]; + }; + }; + + prometheus = { config, pkgs, ... }: { + environment.systemPackages = [ pkgs.jq ]; + + networking.firewall.allowedTCPPorts = [ config.services.prometheus.port ]; + + services.prometheus = { + enable = true; + globalConfig.scrape_interval = "2s"; + + remoteWrite = [ + { + url = "http://receiver:9090/api/v1/write"; + } + ]; + + scrapeConfigs = [ + { + job_name = "node"; + static_configs = [ + { + targets = [ + "node:${toString config.services.prometheus.exporters.node.port}" + ]; + } + ]; + } + ]; + }; + }; + + node = { config, pkgs, ... }: { + services.prometheus.exporters.node = { + enable = true; + openFirewall = true; + }; + }; + }; + + testScript = '' + node.wait_for_unit("prometheus-node-exporter") + node.wait_for_open_port(9100) + + for machine in prometheus, receiver: + machine.wait_for_unit("prometheus") + machine.wait_for_open_port(9090) + + # Verify both servers got the same data from the exporter + for machine in prometheus, receiver: + machine.wait_until_succeeds( + "curl -sf 'http://127.0.0.1:9090/api/v1/query?query=node_exporter_build_info\{instance=\"node:9100\"\}' | " + + "jq '.data.result[0].value[1]' | grep '\"1\"'" + ) + ''; +}) diff --git a/nixos/tests/prometheus.nix b/nixos/tests/thanos.nix index 0111273893775..5bdfab7b3573f 100644 --- a/nixos/tests/prometheus.nix +++ b/nixos/tests/thanos.nix @@ -212,8 +212,6 @@ in import ./make-test-python.nix { }; testScript = { nodes, ... } : '' - import json - # Before starting the other machines we first make sure that our S3 service is online # and has a bucket added for thanos: s3.start() @@ -289,61 +287,5 @@ in import ./make-test-python.nix { + "jq .thanos.labels.some_label | " + "grep 'required by thanos'" ) - - # Check if switching to a NixOS configuration that changes the prometheus - # configuration reloads (instead of restarts) prometheus before the switch - # finishes successfully: - with subtest("config change reloads prometheus"): - # We check if prometheus has finished reloading by looking for the message - # "Completed loading of configuration file" in the journal between the start - # and finish of switching to the new NixOS configuration. - # - # To mark the start we record the journal cursor before starting the switch: - cursor_before_switching = json.loads( - prometheus.succeed("journalctl -n1 -o json --output-fields=__CURSOR") - )["__CURSOR"] - - # Now we switch: - prometheus_config_change = prometheus.succeed( - "readlink /run/current-system/specialisation/prometheus-config-change" - ).strip() - prometheus.succeed(prometheus_config_change + "/bin/switch-to-configuration test") - - # Next we retrieve all logs since the start of switching: - logs_after_starting_switching = prometheus.succeed( - """ - journalctl --after-cursor='{cursor_before_switching}' -o json --output-fields=MESSAGE - """.format( - cursor_before_switching=cursor_before_switching - ) - ) - - # Finally we check if the message "Completed loading of configuration file" - # occurs before the "finished switching to system configuration" message: - finished_switching_msg = ( - "finished switching to system configuration " + prometheus_config_change - ) - reloaded_before_switching_finished = False - finished_switching = False - for log_line in logs_after_starting_switching.split("\n"): - msg = json.loads(log_line)["MESSAGE"] - if "Completed loading of configuration file" in msg: - reloaded_before_switching_finished = True - if msg == finished_switching_msg: - finished_switching = True - break - - assert reloaded_before_switching_finished - assert finished_switching - - # Check if the reloaded config includes the new s3-node_exporter job: - prometheus.succeed( - """ - curl -sf http://127.0.0.1:${toString queryPort}/api/v1/status/config \ - | jq -r .data.yaml \ - | yq '.scrape_configs | any(.job_name == "s3-node_exporter")' \ - | grep true - """ - ) ''; } diff --git a/pkgs/by-name/al/alertmanager-webhook-logger/package.nix b/pkgs/by-name/al/alertmanager-webhook-logger/package.nix new file mode 100644 index 0000000000000..e04759dda7d37 --- /dev/null +++ b/pkgs/by-name/al/alertmanager-webhook-logger/package.nix @@ -0,0 +1,33 @@ +{ lib +, stdenv +, buildGoModule +, fetchFromGitHub +, nixosTests +}: + +buildGoModule rec { + pname = "alertmanager-webhook-logger"; + version = "1.0"; + rev = "${version}"; + + src = fetchFromGitHub { + inherit rev; + owner = "tomtom-international"; + repo = "alertmanager-webhook-logger"; + hash = "sha256-mJbpDiTwUsFm0lDKz8UE/YF6sBvcSSR6WWLrfKvtri4="; + }; + + vendorHash = "sha256-gKtOoM9TuEIHgvSjZhqWmdexG2zDjlPuM0HjjP52DOI="; + + doCheck = true; + + passthru.tests = { inherit (nixosTests.prometheus) alertmanager; }; + + meta = with lib; { + description = "Generates (structured) log messages from Prometheus AlertManager webhook notifier"; + mainProgram = "alertmanager-webhook-logger"; + homepage = "https://github.com/tomtom-international/alertmanager-webhook-logger"; + license = licenses.asl20; + maintainers = with maintainers; [ jpds ]; + }; +} diff --git a/pkgs/servers/monitoring/prometheus/alertmanager.nix b/pkgs/servers/monitoring/prometheus/alertmanager.nix index ba1c35ee294d3..1db975f591d98 100644 --- a/pkgs/servers/monitoring/prometheus/alertmanager.nix +++ b/pkgs/servers/monitoring/prometheus/alertmanager.nix @@ -3,6 +3,7 @@ , buildGoModule , fetchFromGitHub , installShellFiles +, nixosTests }: buildGoModule rec { @@ -39,6 +40,8 @@ buildGoModule rec { installShellCompletion amtool.zsh ''; + passthru.tests = { inherit (nixosTests.prometheus) alertmanager; }; + meta = with lib; { description = "Alert dispatcher for the Prometheus monitoring system"; homepage = "https://github.com/prometheus/alertmanager"; diff --git a/pkgs/servers/monitoring/prometheus/pushgateway.nix b/pkgs/servers/monitoring/prometheus/pushgateway.nix index c47019aeada72..cc8e11ade6355 100644 --- a/pkgs/servers/monitoring/prometheus/pushgateway.nix +++ b/pkgs/servers/monitoring/prometheus/pushgateway.nix @@ -1,4 +1,4 @@ -{ lib, buildGoModule, fetchFromGitHub, testers, prometheus-pushgateway }: +{ lib, buildGoModule, fetchFromGitHub, nixosTests, testers, prometheus-pushgateway }: buildGoModule rec { pname = "pushgateway"; @@ -23,8 +23,11 @@ buildGoModule rec { "-X github.com/prometheus/common/version.BuildDate=19700101-00:00:00" ]; - passthru.tests.version = testers.testVersion { - package = prometheus-pushgateway; + passthru.tests = { + inherit (nixosTests.prometheus) pushgateway; + version = testers.testVersion { + package = prometheus-pushgateway; + }; }; meta = with lib; { diff --git a/pkgs/servers/monitoring/thanos/default.nix b/pkgs/servers/monitoring/thanos/default.nix index d3e797a1c34c4..ce4a75cea75b7 100644 --- a/pkgs/servers/monitoring/thanos/default.nix +++ b/pkgs/servers/monitoring/thanos/default.nix @@ -37,7 +37,7 @@ buildGoModule rec { passthru = { updateScript = nix-update-script { }; tests = { - inherit (nixosTests) prometheus; + inherit (nixosTests) thanos; version = testers.testVersion { command = "thanos --version"; package = thanos; |