summary refs log tree commit diff
path: root/nixos
diff options
context:
space:
mode:
authorSandro <sandro.jaeckel@gmail.com>2022-09-25 23:43:29 +0200
committerGitHub <noreply@github.com>2022-09-25 23:43:29 +0200
commitd374d79d89eac741c5f94bc70002dc753b5e50af (patch)
treea01421be2923170e5f54dda40646342e36d2eef8 /nixos
parentcf49501b2197568ae226c34ee839b8a65772a4cd (diff)
parent5a93846946dddcc99eac92a6278bb202943d4a9f (diff)
Merge pull request #190496 from NukaDuka/kthxbye
Diffstat (limited to 'nixos')
-rw-r--r--nixos/doc/manual/from_md/release-notes/rl-2211.section.xml8
-rw-r--r--nixos/doc/manual/release-notes/rl-2211.section.md2
-rw-r--r--nixos/modules/module-list.nix1
-rw-r--r--nixos/modules/services/monitoring/kthxbye.nix166
-rw-r--r--nixos/tests/all-tests.nix1
-rw-r--r--nixos/tests/kthxbye.nix110
6 files changed, 288 insertions, 0 deletions
diff --git a/nixos/doc/manual/from_md/release-notes/rl-2211.section.xml b/nixos/doc/manual/from_md/release-notes/rl-2211.section.xml
index 8f4a7dd002d34..cdf6e99994085 100644
--- a/nixos/doc/manual/from_md/release-notes/rl-2211.section.xml
+++ b/nixos/doc/manual/from_md/release-notes/rl-2211.section.xml
@@ -212,6 +212,14 @@
       </listitem>
       <listitem>
         <para>
+          <link xlink:href="https://github.com/prymitive/kthxbye">kthxbye</link>,
+          an alert acknowledgement management daemon for Prometheus
+          Alertmanager. Available as
+          <link xlink:href="options.html#opt-services.kthxbye.enable">services.kthxbye</link>
+        </para>
+      </listitem>
+      <listitem>
+        <para>
           <link xlink:href="https://github.com/jtroo/kanata">kanata</link>,
           a tool to improve keyboard comfort and usability with advanced
           customization. Available as
diff --git a/nixos/doc/manual/release-notes/rl-2211.section.md b/nixos/doc/manual/release-notes/rl-2211.section.md
index f6515256dbc6b..607e5cfeb4d48 100644
--- a/nixos/doc/manual/release-notes/rl-2211.section.md
+++ b/nixos/doc/manual/release-notes/rl-2211.section.md
@@ -77,6 +77,8 @@ In addition to numerous new and upgraded packages, this release has the followin
 - [infnoise](https://github.com/leetronics/infnoise), a hardware True Random Number Generator dongle.
   Available as [services.infnoise](options.html#opt-services.infnoise.enable).
 
+- [kthxbye](https://github.com/prymitive/kthxbye), an alert acknowledgement management daemon for Prometheus Alertmanager. Available as [services.kthxbye](options.html#opt-services.kthxbye.enable)
+
 - [kanata](https://github.com/jtroo/kanata), a tool to improve keyboard comfort and usability with advanced customization.
   Available as [services.kanata](options.html#opt-services.kanata.enable).
 
diff --git a/nixos/modules/module-list.nix b/nixos/modules/module-list.nix
index 699c5c7dead4b..5568577909b9c 100644
--- a/nixos/modules/module-list.nix
+++ b/nixos/modules/module-list.nix
@@ -681,6 +681,7 @@
   ./services/monitoring/heapster.nix
   ./services/monitoring/incron.nix
   ./services/monitoring/kapacitor.nix
+  ./services/monitoring/kthxbye.nix
   ./services/monitoring/loki.nix
   ./services/monitoring/longview.nix
   ./services/monitoring/mackerel-agent.nix
diff --git a/nixos/modules/services/monitoring/kthxbye.nix b/nixos/modules/services/monitoring/kthxbye.nix
new file mode 100644
index 0000000000000..3f988dcb722fe
--- /dev/null
+++ b/nixos/modules/services/monitoring/kthxbye.nix
@@ -0,0 +1,166 @@
+{ config, pkgs, lib, ... }:
+with lib;
+
+let
+  cfg = config.services.kthxbye;
+in
+
+{
+  options.services.kthxbye = {
+    enable = mkEnableOption (mdDoc "kthxbye alert acknowledgement management daemon");
+
+    package = mkOption {
+      type = types.package;
+      default = pkgs.kthxbye;
+      defaultText = literalExpression "pkgs.kthxbye";
+      description = mdDoc ''
+        The kthxbye package that should be used.
+      '';
+    };
+
+    openFirewall = mkOption {
+      type = types.bool;
+      default = false;
+      description = mdDoc ''
+        Whether to open ports in the firewall needed for the daemon to function.
+      '';
+    };
+
+    extraOptions = mkOption {
+      type = with types; listOf str;
+      default = [];
+      description = mdDoc ''
+        Extra command line options.
+
+        Documentation can be found [here](https://github.com/prymitive/kthxbye/blob/main/README.md).
+      '';
+      example = literalExpression ''
+        [
+          "-extend-with-prefix 'ACK!'"
+        ];
+      '';
+    };
+
+    alertmanager = {
+      timeout = mkOption {
+        type = types.str;
+        default = "1m0s";
+        description = mdDoc ''
+          Alertmanager request timeout duration in the [time.Duration](https://pkg.go.dev/time#ParseDuration) format.
+        '';
+        example = "30s";
+      };
+      uri = mkOption {
+        type = types.str;
+        default = "http://localhost:9093";
+        description = mdDoc ''
+          Alertmanager URI to use.
+        '';
+        example = "https://alertmanager.example.com";
+      };
+    };
+
+    extendBy = mkOption {
+      type = types.str;
+      default = "15m0s";
+      description = mdDoc ''
+        Extend silences by adding DURATION seconds.
+
+        DURATION should be provided in the [time.Duration](https://pkg.go.dev/time#ParseDuration) format.
+      '';
+      example = "6h0m0s";
+    };
+
+    extendIfExpiringIn = mkOption {
+      type = types.str;
+      default = "5m0s";
+      description = mdDoc ''
+        Extend silences that are about to expire in the next DURATION seconds.
+
+        DURATION should be provided in the [time.Duration](https://pkg.go.dev/time#ParseDuration) format.
+      '';
+      example = "1m0s";
+    };
+
+    extendWithPrefix = mkOption {
+      type = types.str;
+      default = "ACK!";
+      description = mdDoc ''
+        Extend silences with comment starting with PREFIX string.
+      '';
+      example = "!perma-silence";
+    };
+
+    interval = mkOption {
+      type = types.str;
+      default = "45s";
+      description = mdDoc ''
+        Silence check interval duration in the [time.Duration](https://pkg.go.dev/time#ParseDuration) format.
+      '';
+      example = "30s";
+    };
+
+    listenAddress = mkOption {
+      type = types.str;
+      default = "0.0.0.0";
+      description = mdDoc ''
+        The address to listen on for HTTP requests.
+      '';
+      example = "127.0.0.1";
+    };
+
+    port = mkOption {
+      type = types.port;
+      default = 8080;
+      description = mdDoc ''
+        The port to listen on for HTTP requests.
+      '';
+    };
+
+    logJSON = mkOption {
+      type = types.bool;
+      default = false;
+      description = mdDoc ''
+        Format logged messages as JSON.
+      '';
+    };
+
+    maxDuration = mkOption {
+      type = with types; nullOr str;
+      default = null;
+      description = mdDoc ''
+        Maximum duration of a silence, it won't be extended anymore after reaching it.
+
+        Duration should be provided in the [time.Duration](https://pkg.go.dev/time#ParseDuration) format.
+      '';
+      example = "30d";
+    };
+  };
+
+  config = mkIf cfg.enable {
+    systemd.services.kthxbye = {
+      description = "kthxbye Alertmanager ack management daemon";
+      wantedBy = [ "multi-user.target" ];
+      script = ''
+        ${cfg.package}/bin/kthxbye \
+          -alertmanager.timeout ${cfg.alertmanager.timeout} \
+          -alertmanager.uri ${cfg.alertmanager.uri} \
+          -extend-by ${cfg.extendBy} \
+          -extend-if-expiring-in ${cfg.extendIfExpiringIn} \
+          -extend-with-prefix ${cfg.extendWithPrefix} \
+          -interval ${cfg.interval} \
+          -listen ${cfg.listenAddress}:${toString cfg.port} \
+          ${optionalString cfg.logJSON "-log-json"} \
+          ${optionalString (cfg.maxDuration != null) "-max-duration ${cfg.maxDuration}"} \
+          ${concatStringsSep " " cfg.extraOptions}
+      '';
+      serviceConfig = {
+        Type = "simple";
+        DynamicUser = true;
+        Restart = "on-failure";
+      };
+    };
+
+    networking.firewall.allowedTCPPorts = mkIf cfg.openFirewall [ cfg.port ];
+  };
+}
diff --git a/nixos/tests/all-tests.nix b/nixos/tests/all-tests.nix
index b486be5fee35e..5ea2c94ccb1cc 100644
--- a/nixos/tests/all-tests.nix
+++ b/nixos/tests/all-tests.nix
@@ -277,6 +277,7 @@ in {
   komga = handleTest ./komga.nix {};
   krb5 = discoverTests (import ./krb5 {});
   ksm = handleTest ./ksm.nix {};
+  kthxbye = handleTest ./kthxbye.nix {};
   kubernetes = handleTestOn ["x86_64-linux"] ./kubernetes {};
   languagetool = handleTest ./languagetool.nix {};
   latestKernel.login = handleTest ./login.nix { latestKernel = true; };
diff --git a/nixos/tests/kthxbye.nix b/nixos/tests/kthxbye.nix
new file mode 100644
index 0000000000000..5ca0917ec8e79
--- /dev/null
+++ b/nixos/tests/kthxbye.nix
@@ -0,0 +1,110 @@
+import ./make-test-python.nix ({ lib, pkgs, ... }:
+{
+  name = "kthxbye";
+
+  meta = with lib.maintainers; {
+    maintainers = [ nukaduka ];
+  };
+
+  nodes.server = { ... }: {
+    environment.systemPackages = with pkgs; [ prometheus-alertmanager ];
+    services.prometheus = {
+      enable = true;
+
+      globalConfig = {
+        scrape_interval = "5s";
+        scrape_timeout = "5s";
+        evaluation_interval = "5s";
+      };
+
+      scrapeConfigs = [
+        {
+          job_name = "prometheus";
+          scrape_interval = "5s";
+          static_configs = [
+            {
+              targets = [ "localhost:9090" ];
+            }
+          ];
+        }
+      ];
+
+      rules = [
+        ''
+          groups:
+            - name: test
+              rules:
+                - alert: node_up
+                  expr: up != 0
+                  for: 5s
+                  labels:
+                    severity: bottom of the barrel
+                  annotations:
+                    summary: node is fine
+        ''
+      ];
+
+      alertmanagers = [
+        {
+          static_configs = [
+            {
+              targets = [
+                "localhost:9093"
+              ];
+            }
+          ];
+        }
+      ];
+
+      alertmanager = {
+        enable = true;
+        openFirewall = true;
+        configuration.route = {
+          receiver = "test";
+          group_wait = "5s";
+          group_interval = "5s";
+          group_by = [ "..." ];
+        };
+        configuration.receivers = [
+          {
+            name = "test";
+            webhook_configs = [
+              {
+                url = "http://localhost:1234";
+              }
+            ];
+          }
+        ];
+      };
+    };
+
+    services.kthxbye = {
+      enable = true;
+      openFirewall = true;
+      extendIfExpiringIn = "30s";
+      logJSON = true;
+      maxDuration = "15m";
+      interval = "5s";
+    };
+  };
+
+  testScript = ''
+    with subtest("start the server"):
+      start_all()
+      server.wait_for_unit("prometheus.service")
+      server.wait_for_unit("alertmanager.service")
+      server.wait_for_unit("kthxbye.service")
+
+      server.sleep(2) # wait for units to settle
+      server.systemctl("restart kthxbye.service") # make sure kthxbye comes up after alertmanager
+      server.sleep(2)
+
+    with subtest("set up test silence which expires in 20s"):
+      server.succeed('amtool --alertmanager.url "http://localhost:9093" silence add alertname="node_up" -a "nixosTest" -d "20s" -c "ACK! this server is fine!!"')
+
+    with subtest("wait for 21 seconds and check if the silence is still active"):
+      server.sleep(21)
+      server.systemctl("status kthxbye.service")
+      server.succeed("amtool --alertmanager.url 'http://localhost:9093' silence | grep 'ACK'")
+  '';
+})