about summary refs log tree commit diff
diff options
context:
space:
mode:
authorRyan Mulligan <ryan@ryantm.com>2022-03-15 16:06:56 -0700
committerGitHub <noreply@github.com>2022-03-15 16:06:56 -0700
commit0ab73f9a3fe0d8a305db88650baef4a6bb792f29 (patch)
tree80b17b7be127a8e5eaaf85d1fdc8dfe9b9cad329
parentcec02f35167a49490f3ee8e32673f22f87a8132a (diff)
parent70c1e849c0b5741e07e7d8d0d418764e2fdb4e24 (diff)
Merge pull request #162535 from astro/pacemaker
pacemaker: init
-rw-r--r--nixos/doc/manual/from_md/release-notes/rl-2205.section.xml6
-rw-r--r--nixos/doc/manual/release-notes/rl-2205.section.md2
-rw-r--r--nixos/modules/module-list.nix2
-rw-r--r--nixos/modules/services/cluster/corosync/default.nix112
-rw-r--r--nixos/modules/services/cluster/pacemaker/default.nix52
-rw-r--r--nixos/tests/all-tests.nix1
-rw-r--r--nixos/tests/pacemaker.nix110
-rw-r--r--pkgs/misc/logging/pacemaker/default.nix102
-rw-r--r--pkgs/os-specific/linux/ocf-resource-agents/default.nix7
-rw-r--r--pkgs/servers/corosync/default.nix10
-rw-r--r--pkgs/top-level/all-packages.nix2
11 files changed, 404 insertions, 2 deletions
diff --git a/nixos/doc/manual/from_md/release-notes/rl-2205.section.xml b/nixos/doc/manual/from_md/release-notes/rl-2205.section.xml
index 396de8cd77c2b..a23b2489abea0 100644
--- a/nixos/doc/manual/from_md/release-notes/rl-2205.section.xml
+++ b/nixos/doc/manual/from_md/release-notes/rl-2205.section.xml
@@ -306,6 +306,12 @@
           with many features.
         </para>
       </listitem>
+      <listitem>
+        <para>
+          <link xlink:href="https://clusterlabs.org/pacemaker/">pacemaker</link>
+          cluster resource manager
+        </para>
+      </listitem>
     </itemizedlist>
   </section>
   <section xml:id="sec-release-22.05-incompatibilities">
diff --git a/nixos/doc/manual/release-notes/rl-2205.section.md b/nixos/doc/manual/release-notes/rl-2205.section.md
index 2f730de737c01..390ec7b2add18 100644
--- a/nixos/doc/manual/release-notes/rl-2205.section.md
+++ b/nixos/doc/manual/release-notes/rl-2205.section.md
@@ -87,6 +87,8 @@ In addition to numerous new and upgraded packages, this release has the followin
 
 - [blocky](https://0xerr0r.github.io/blocky/), fast and lightweight DNS proxy as ad-blocker for local network with many features.
 
+- [pacemaker](https://clusterlabs.org/pacemaker/) cluster resource manager
+
 <!-- To avoid merge conflicts, consider adding your item at an arbitrary place in the list instead. -->
 
 ## Backward Incompatibilities {#sec-release-22.05-incompatibilities}
diff --git a/nixos/modules/module-list.nix b/nixos/modules/module-list.nix
index 13703968167cd..68f9c6c1227e3 100644
--- a/nixos/modules/module-list.nix
+++ b/nixos/modules/module-list.nix
@@ -302,6 +302,7 @@
   ./services/backup/znapzend.nix
   ./services/blockchain/ethereum/geth.nix
   ./services/backup/zrepl.nix
+  ./services/cluster/corosync/default.nix
   ./services/cluster/hadoop/default.nix
   ./services/cluster/k3s/default.nix
   ./services/cluster/kubernetes/addons/dns.nix
@@ -314,6 +315,7 @@
   ./services/cluster/kubernetes/pki.nix
   ./services/cluster/kubernetes/proxy.nix
   ./services/cluster/kubernetes/scheduler.nix
+  ./services/cluster/pacemaker/default.nix
   ./services/cluster/spark/default.nix
   ./services/computing/boinc/client.nix
   ./services/computing/foldingathome/client.nix
diff --git a/nixos/modules/services/cluster/corosync/default.nix b/nixos/modules/services/cluster/corosync/default.nix
new file mode 100644
index 0000000000000..b4144917feea9
--- /dev/null
+++ b/nixos/modules/services/cluster/corosync/default.nix
@@ -0,0 +1,112 @@
+{ config, lib, pkgs, ... }:
+
+with lib;
+let
+  cfg = config.services.corosync;
+in
+{
+  # interface
+  options.services.corosync = {
+    enable = mkEnableOption "corosync";
+
+    package = mkOption {
+      type = types.package;
+      default = pkgs.corosync;
+      defaultText = literalExpression "pkgs.corosync";
+      description = "Package that should be used for corosync.";
+    };
+
+    clusterName = mkOption {
+      type = types.str;
+      default = "nixcluster";
+      description = "Name of the corosync cluster.";
+    };
+
+    extraOptions = mkOption {
+      type = with types; listOf str;
+      default = [];
+      description = "Additional options with which to start corosync.";
+    };
+
+    nodelist = mkOption {
+      description = "Corosync nodelist: all cluster members.";
+      default = [];
+      type = with types; listOf (submodule {
+        options = {
+          nodeid = mkOption {
+            type = int;
+            description = "Node ID number";
+          };
+          name = mkOption {
+            type = str;
+            description = "Node name";
+          };
+          ring_addrs = mkOption {
+            type = listOf str;
+            description = "List of addresses, one for each ring.";
+          };
+        };
+      });
+    };
+  };
+
+  # implementation
+  config = mkIf cfg.enable {
+    environment.systemPackages = [ cfg.package ];
+
+    environment.etc."corosync/corosync.conf".text = ''
+      totem {
+        version: 2
+        secauth: on
+        cluster_name: ${cfg.clusterName}
+        transport: knet
+      }
+
+      nodelist {
+        ${concatMapStrings ({ nodeid, name, ring_addrs }: ''
+          node {
+            nodeid: ${toString nodeid}
+            name: ${name}
+            ${concatStrings (imap0 (i: addr: ''
+              ring${toString i}_addr: ${addr}
+            '') ring_addrs)}
+          }
+        '') cfg.nodelist}
+      }
+
+      quorum {
+        # only corosync_votequorum is supported
+        provider: corosync_votequorum
+        wait_for_all: 0
+        ${optionalString (builtins.length cfg.nodelist < 3) ''
+          two_node: 1
+        ''}
+      }
+
+      logging {
+        to_syslog: yes
+      }
+    '';
+
+    environment.etc."corosync/uidgid.d/root".text = ''
+      # allow pacemaker connection by root
+      uidgid {
+        uid: 0
+        gid: 0
+      }
+    '';
+
+    systemd.packages = [ cfg.package ];
+    systemd.services.corosync = {
+      wantedBy = [ "multi-user.target" ];
+      serviceConfig = {
+        StateDirectory = "corosync";
+        StateDirectoryMode = "0700";
+      };
+    };
+
+    environment.etc."sysconfig/corosync".text = lib.optionalString (cfg.extraOptions != []) ''
+      COROSYNC_OPTIONS="${lib.escapeShellArgs cfg.extraOptions}"
+    '';
+  };
+}
diff --git a/nixos/modules/services/cluster/pacemaker/default.nix b/nixos/modules/services/cluster/pacemaker/default.nix
new file mode 100644
index 0000000000000..7eeadffcc586b
--- /dev/null
+++ b/nixos/modules/services/cluster/pacemaker/default.nix
@@ -0,0 +1,52 @@
+{ config, lib, pkgs, ... }:
+
+with lib;
+let
+  cfg = config.services.pacemaker;
+in
+{
+  # interface
+  options.services.pacemaker = {
+    enable = mkEnableOption "pacemaker";
+
+    package = mkOption {
+      type = types.package;
+      default = pkgs.pacemaker;
+      defaultText = literalExpression "pkgs.pacemaker";
+      description = "Package that should be used for pacemaker.";
+    };
+  };
+
+  # implementation
+  config = mkIf cfg.enable {
+    assertions = [ {
+      assertion = config.services.corosync.enable;
+      message = ''
+        Enabling services.pacemaker requires a services.corosync configuration.
+      '';
+    } ];
+
+    environment.systemPackages = [ cfg.package ];
+
+    # required by pacemaker
+    users.users.hacluster = {
+      isSystemUser = true;
+      group = "pacemaker";
+      home = "/var/lib/pacemaker";
+    };
+    users.groups.pacemaker = {};
+
+    systemd.tmpfiles.rules = [
+      "d /var/log/pacemaker 0700 hacluster pacemaker -"
+    ];
+
+    systemd.packages = [ cfg.package ];
+    systemd.services.pacemaker = {
+      wantedBy = [ "multi-user.target" ];
+      serviceConfig = {
+        StateDirectory = "pacemaker";
+        StateDirectoryMode = "0700";
+      };
+    };
+  };
+}
diff --git a/nixos/tests/all-tests.nix b/nixos/tests/all-tests.nix
index 98ca2e081086f..eee99fb5e97f1 100644
--- a/nixos/tests/all-tests.nix
+++ b/nixos/tests/all-tests.nix
@@ -384,6 +384,7 @@ in
   os-prober = handleTestOn ["x86_64-linux"] ./os-prober.nix {};
   osrm-backend = handleTest ./osrm-backend.nix {};
   overlayfs = handleTest ./overlayfs.nix {};
+  pacemaker = handleTest ./pacemaker.nix {};
   packagekit = handleTest ./packagekit.nix {};
   pam-file-contents = handleTest ./pam/pam-file-contents.nix {};
   pam-oath-login = handleTest ./pam/pam-oath-login.nix {};
diff --git a/nixos/tests/pacemaker.nix b/nixos/tests/pacemaker.nix
new file mode 100644
index 0000000000000..6845576149537
--- /dev/null
+++ b/nixos/tests/pacemaker.nix
@@ -0,0 +1,110 @@
+import ./make-test-python.nix  ({ pkgs, lib, ... }: rec {
+  name = "pacemaker";
+  meta = with pkgs.lib.maintainers; {
+    maintainers = [ astro ];
+  };
+
+  nodes =
+    let
+      node = i: {
+        networking.interfaces.eth1.ipv4.addresses = [ {
+          address = "192.168.0.${toString i}";
+          prefixLength = 24;
+        } ];
+
+        services.corosync = {
+          enable = true;
+          clusterName = "zentralwerk-network";
+          nodelist = lib.imap (i: name: {
+            nodeid = i;
+            inherit name;
+            ring_addrs = [
+              (builtins.head nodes.${name}.networking.interfaces.eth1.ipv4.addresses).address
+            ];
+          }) (builtins.attrNames nodes);
+        };
+        environment.etc."corosync/authkey" = {
+          source = builtins.toFile "authkey"
+            # minimum length: 128 bytes
+            "testtesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttesttest";
+          mode = "0400";
+        };
+
+        services.pacemaker.enable = true;
+
+        # used for pacemaker resource
+        systemd.services.ha-cat = {
+          description = "Highly available netcat";
+          serviceConfig.ExecStart = "${pkgs.netcat}/bin/nc -l discard";
+        };
+      };
+    in {
+      node1 = node 1;
+      node2 = node 2;
+      node3 = node 3;
+    };
+
+  # sets up pacemaker with resources configuration, then crashes a
+  # node and waits for service restart on another node
+  testScript =
+    let
+      resources = builtins.toFile "cib-resources.xml" ''
+        <resources>
+          <primitive id="cat" class="systemd" type="ha-cat">
+            <operations>
+              <op id="stop-cat" name="start" interval="0" timeout="1s"/>
+              <op id="start-cat" name="start" interval="0" timeout="1s"/>
+              <op id="monitor-cat" name="monitor" interval="1s" timeout="1s"/>
+            </operations>
+          </primitive>
+        </resources>
+      '';
+    in ''
+      import re
+      import time
+
+      start_all()
+
+      ${lib.concatMapStrings (node: ''
+        ${node}.wait_until_succeeds("corosync-quorumtool")
+        ${node}.wait_for_unit("pacemaker.service")
+      '') (builtins.attrNames nodes)}
+
+      # No STONITH device
+      node1.succeed("crm_attribute -t crm_config -n stonith-enabled -v false")
+      # Configure the cat resource
+      node1.succeed("cibadmin --replace --scope resources --xml-file ${resources}")
+
+      # wait until the service is started
+      while True:
+        output = node1.succeed("crm_resource -r cat --locate")
+        match = re.search("is running on: (.+)", output)
+        if match:
+          for machine in machines:
+            if machine.name == match.group(1):
+              current_node = machine
+          break
+        time.sleep(1)
+
+      current_node.log("Service running here!")
+      current_node.crash()
+
+      # pick another node that's still up
+      for machine in machines:
+        if machine.booted:
+          check_node = machine
+      # find where the service has been started next
+      while True:
+        output = check_node.succeed("crm_resource -r cat --locate")
+        match = re.search("is running on: (.+)", output)
+        # output will remain the old current_node until the crash is detected by pacemaker
+        if match and match.group(1) != current_node.name:
+          for machine in machines:
+            if machine.name == match.group(1):
+              next_node = machine
+          break
+        time.sleep(1)
+
+      next_node.log("Service migrated here!")
+  '';
+})
diff --git a/pkgs/misc/logging/pacemaker/default.nix b/pkgs/misc/logging/pacemaker/default.nix
new file mode 100644
index 0000000000000..07194380d4a7b
--- /dev/null
+++ b/pkgs/misc/logging/pacemaker/default.nix
@@ -0,0 +1,102 @@
+{ lib
+, stdenv
+, autoconf
+, automake
+, bash
+, bzip2
+, corosync
+, dbus
+, fetchFromGitHub
+, glib
+, gnutls
+, libqb
+, libtool
+, libuuid
+, libxml2
+, libxslt
+, pam
+, pkg-config
+, python3
+, nixosTests
+
+# Pacemaker is compiled twice, once with forOCF = true to extract its
+# OCF definitions for use in the ocf-resource-agents derivation, then
+# again with forOCF = false, where the ocf-resource-agents is provided
+# as the OCF_ROOT.
+, forOCF ? false
+, ocf-resource-agents
+} :
+
+stdenv.mkDerivation rec {
+  pname = "pacemaker";
+  version = "2.1.2";
+
+  src = fetchFromGitHub {
+    owner = "ClusterLabs";
+    repo = pname;
+    rev = "Pacemaker-${version}";
+    sha256 = "1w7vq3lmgcz38pfww9vccm142vjsjqz3qc9nnk09ynkx4agqhxdg";
+  };
+
+  nativeBuildInputs = [
+    autoconf
+    automake
+    libtool
+    pkg-config
+  ];
+
+  buildInputs = [
+    bash
+    bzip2
+    corosync
+    dbus.dev
+    glib
+    gnutls
+    libqb
+    libuuid
+    libxml2.dev
+    libxslt.dev
+    pam
+    python3
+  ];
+
+  preConfigure = ''
+    ./autogen.sh --prefix="$out"
+  '';
+  configureFlags = [
+    "--exec-prefix=${placeholder "out"}"
+    "--sysconfdir=/etc"
+    "--localstatedir=/var"
+    "--with-initdir=/etc/systemd/system"
+    "--with-systemdsystemunitdir=/etc/systemd/system"
+    "--with-corosync"
+    # allows Type=notify in the systemd service
+    "--enable-systemd"
+  ] ++ lib.optional (!forOCF) "--with-ocfdir=${ocf-resource-agents}/usr/lib/ocf";
+
+  installFlags = [ "DESTDIR=${placeholder "out"}" ];
+
+  NIX_CFLAGS_COMPILE = lib.optionals stdenv.cc.isGNU [
+    "-Wno-error=strict-prototypes"
+  ];
+
+  enableParallelBuilding = true;
+
+  postInstall = ''
+    # pacemaker's install linking requires a weirdly nested hierarchy
+    mv $out$out/* $out
+    rm -r $out/nix
+  '';
+
+  passthru.tests = {
+    inherit (nixosTests) pacemaker;
+  };
+
+  meta = with lib; {
+    homepage = "https://clusterlabs.org/pacemaker/";
+    description = "Pacemaker is an open source, high availability resource manager suitable for both small and large clusters.";
+    license = licenses.gpl2Plus;
+    platforms = platforms.linux;
+    maintainers = with maintainers; [ ryantm astro ];
+  };
+}
diff --git a/pkgs/os-specific/linux/ocf-resource-agents/default.nix b/pkgs/os-specific/linux/ocf-resource-agents/default.nix
index 9c9557c7be851..8d7f2b527144a 100644
--- a/pkgs/os-specific/linux/ocf-resource-agents/default.nix
+++ b/pkgs/os-specific/linux/ocf-resource-agents/default.nix
@@ -1,3 +1,5 @@
+# This combines together OCF definitions from other derivations.
+# https://github.com/ClusterLabs/resource-agents/blob/master/doc/dev-guides/ra-dev-guide.asc
 { stdenv
 , lib
 , runCommand
@@ -8,12 +10,16 @@
 , python3
 , glib
 , drbd
+, pacemaker
 }:
 
 let
   drbdForOCF = drbd.override {
     forOCF = true;
   };
+  pacemakerForOCF = pacemaker.override {
+    forOCF = true;
+  };
 
   resource-agentsForOCF = stdenv.mkDerivation rec {
     pname = "resource-agents";
@@ -53,4 +59,5 @@ runCommand "ocf-resource-agents" {} ''
   mkdir -p $out/usr/lib/ocf
   ${lndir}/bin/lndir -silent "${resource-agentsForOCF}/lib/ocf/" $out/usr/lib/ocf
   ${lndir}/bin/lndir -silent "${drbdForOCF}/usr/lib/ocf/" $out/usr/lib/ocf
+  ${lndir}/bin/lndir -silent "${pacemakerForOCF}/usr/lib/ocf/" $out/usr/lib/ocf
 ''
diff --git a/pkgs/servers/corosync/default.nix b/pkgs/servers/corosync/default.nix
index df73fb9f48e9c..4df8a547dff50 100644
--- a/pkgs/servers/corosync/default.nix
+++ b/pkgs/servers/corosync/default.nix
@@ -1,5 +1,5 @@
 { lib, stdenv, fetchurl, makeWrapper, pkg-config, kronosnet, nss, nspr, libqb
-, dbus, rdma-core, libstatgrab, net-snmp
+, systemd, dbus, rdma-core, libstatgrab, net-snmp
 , enableDbus ? false
 , enableInfiniBandRdma ? false
 , enableMonitoring ? false
@@ -20,7 +20,7 @@ stdenv.mkDerivation rec {
   nativeBuildInputs = [ makeWrapper pkg-config ];
 
   buildInputs = [
-    kronosnet nss nspr libqb
+    kronosnet nss nspr libqb systemd.dev
   ] ++ optional enableDbus dbus
     ++ optional enableInfiniBandRdma rdma-core
     ++ optional enableMonitoring libstatgrab
@@ -32,6 +32,8 @@ stdenv.mkDerivation rec {
     "--with-logdir=/var/log/corosync"
     "--enable-watchdog"
     "--enable-qdevices"
+    # allows Type=notify in the systemd service
+    "--enable-systemd"
   ] ++ optional enableDbus "--enable-dbus"
     ++ optional enableInfiniBandRdma "--enable-rdma"
     ++ optional enableMonitoring "--enable-monitoring"
@@ -63,6 +65,10 @@ stdenv.mkDerivation rec {
       --prefix PATH ":" "$out/sbin:${libqb}/sbin"
   '';
 
+  passthru.tests = {
+    inherit (nixosTests) pacemaker;
+  };
+
   meta = {
     homepage = "http://corosync.org/";
     description = "A Group Communication System with features for implementing high availability within applications";
diff --git a/pkgs/top-level/all-packages.nix b/pkgs/top-level/all-packages.nix
index f9c71729145e1..09c1ba6d9b3b7 100644
--- a/pkgs/top-level/all-packages.nix
+++ b/pkgs/top-level/all-packages.nix
@@ -19568,6 +19568,8 @@ with pkgs;
   osinfo-db = callPackage ../data/misc/osinfo-db { };
   osinfo-db-tools = callPackage ../tools/misc/osinfo-db-tools { };
 
+  pacemaker = callPackage ../misc/logging/pacemaker { };
+
   p11-kit = callPackage ../development/libraries/p11-kit { };
 
   paperkey = callPackage ../tools/security/paperkey { };