diff options
author | Jonas Chevalier <zimbatm@zimbatm.com> | 2024-05-28 09:36:03 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-28 09:36:03 +0200 |
commit | 5d6d0586465049e2e39d1a69decba36959b5edae (patch) | |
tree | d41494a94c219e8e18d3e8fba7968ffdf96ebc92 /nixos | |
parent | ef4db778059ce8bd4779a7ce4bcd638ecc807f23 (diff) | |
parent | b8e5799a635291007d2fdbed704e84a76294223d (diff) |
Merge pull request #308090 from mogeko/service-rke2
nixos/rke2: add rke2 service
Diffstat (limited to 'nixos')
-rw-r--r-- | nixos/doc/manual/release-notes/rl-2405.section.md | 2 | ||||
-rw-r--r-- | nixos/modules/module-list.nix | 1 | ||||
-rw-r--r-- | nixos/modules/services/cluster/rke2/default.nix | 311 | ||||
-rw-r--r-- | nixos/tests/all-tests.nix | 1 | ||||
-rw-r--r-- | nixos/tests/rke2/default.nix | 13 | ||||
-rw-r--r-- | nixos/tests/rke2/multi-node.nix | 176 | ||||
-rw-r--r-- | nixos/tests/rke2/single-node.nix | 75 |
7 files changed, 579 insertions, 0 deletions
diff --git a/nixos/doc/manual/release-notes/rl-2405.section.md b/nixos/doc/manual/release-notes/rl-2405.section.md index 4187d4a58afec..e242ecfe9cced 100644 --- a/nixos/doc/manual/release-notes/rl-2405.section.md +++ b/nixos/doc/manual/release-notes/rl-2405.section.md @@ -205,6 +205,8 @@ The pre-existing [services.ankisyncd](#opt-services.ankisyncd.enable) has been m - [your_spotify](https://github.com/Yooooomi/your_spotify), a self hosted Spotify tracking dashboard. Available as [services.your_spotify](#opt-services.your_spotify.enable) +- [RKE2](https://github.com/rancher/rke2), also known as RKE Government, is Rancher's next-generation Kubernetes distribution. Available as [services.rke2](#opt-services.rke2.enable). + ## Backward Incompatibilities {#sec-release-24.05-incompatibilities} <!-- To avoid merge conflicts, consider adding your item at an arbitrary place in the list instead. --> diff --git a/nixos/modules/module-list.nix b/nixos/modules/module-list.nix index eca39630b6dea..159696be8b6d0 100644 --- a/nixos/modules/module-list.nix +++ b/nixos/modules/module-list.nix @@ -418,6 +418,7 @@ ./services/cluster/kubernetes/scheduler.nix ./services/cluster/pacemaker/default.nix ./services/cluster/patroni/default.nix + ./services/cluster/rke2/default.nix ./services/cluster/spark/default.nix ./services/computing/boinc/client.nix ./services/computing/foldingathome/client.nix diff --git a/nixos/modules/services/cluster/rke2/default.nix b/nixos/modules/services/cluster/rke2/default.nix new file mode 100644 index 0000000000000..9ddbd299fdf8d --- /dev/null +++ b/nixos/modules/services/cluster/rke2/default.nix @@ -0,0 +1,311 @@ +{ config, lib, pkgs, ... }: + +with lib; +let + cfg = config.services.rke2; +in +{ + imports = [ ]; + + options.services.rke2 = { + enable = mkEnableOption "rke2"; + + package = mkPackageOption pkgs "rke2" { }; + + role = mkOption { + type = types.enum [ "server" "agent" ]; + description = '' + Whether rke2 should run as a server or agent. + + If it's a server: + + - By default it also runs workloads as an agent. + - any optionals is allowed. + + If it's an agent: + + - `serverAddr` is required. + - `token` or `tokenFile` is required. + - `agentToken` or `agentTokenFile` or `disable` or `cni` are not allowed. + ''; + default = "server"; + }; + + configPath = mkOption { + type = types.path; + description = "Load configuration from FILE."; + default = "/etc/rancher/rke2/config.yaml"; + }; + + debug = mkOption { + type = types.bool; + description = "Turn on debug logs."; + default = false; + }; + + dataDir = mkOption { + type = types.path; + description = "The folder to hold state in."; + default = "/var/lib/rancher/rke2"; + }; + + token = mkOption { + type = types.str; + description = '' + Shared secret used to join a server or agent to a cluster. + + > WARNING: This option will expose store your token unencrypted world-readable in the nix store. + If this is undesired use the `tokenFile` option instead. + ''; + default = ""; + }; + + tokenFile = mkOption { + type = types.nullOr types.path; + description = "File path containing rke2 token to use when connecting to the server."; + default = null; + }; + + disable = mkOption { + type = types.listOf types.str; + description = "Do not deploy packaged components and delete any deployed components."; + default = [ ]; + }; + + nodeName = mkOption { + type = types.nullOr types.str; + description = "Node name."; + default = null; + }; + + nodeLabel = mkOption { + type = types.listOf types.str; + description = "Registering and starting kubelet with set of labels."; + default = [ ]; + }; + + nodeTaint = mkOption { + type = types.listOf types.str; + description = "Registering kubelet with set of taints."; + default = [ ]; + }; + + nodeIP = mkOption { + type = types.nullOr types.str; + description = "IPv4/IPv6 addresses to advertise for node."; + default = null; + }; + + agentToken = mkOption { + type = types.str; + description = '' + Shared secret used to join agents to the cluster, but not servers. + + > **WARNING**: This option will expose store your token unencrypted world-readable in the nix store. + If this is undesired use the `agentTokenFile` option instead. + ''; + default = ""; + }; + + agentTokenFile = mkOption { + type = types.nullOr types.path; + description = "File path containing rke2 agent token to use when connecting to the server."; + default = null; + }; + + serverAddr = mkOption { + type = types.str; + description = "The rke2 server to connect to, used to join a cluster."; + example = "https://10.0.0.10:6443"; + default = ""; + }; + + selinux = mkOption { + type = types.bool; + description = "Enable SELinux in containerd."; + default = false; + }; + + cni = mkOption { + type = types.enum [ "none" "canal" "cilium" "calico" "flannel" ]; + description = '' + CNI Plugins to deploy, one of `none`, `calico`, `canal`, `cilium` or `flannel`. + + All CNI plugins get installed via a helm chart after the main components are up and running + and can be [customized by modifying the helm chart options](https://docs.rke2.io/helm). + + [Learn more about RKE2 and CNI plugins](https://docs.rke2.io/networking/basic_network_options) + + > **WARNING**: Flannel support in RKE2 is currently experimental. + ''; + default = "canal"; + }; + + cisHardening = mkOption { + type = types.bool; + description = '' + Enable CIS Hardening for RKE2. + + It will set the configurations and controls required to address Kubernetes benchmark controls + from the Center for Internet Security (CIS). + + Learn more about [CIS Hardening for RKE2](https://docs.rke2.io/security/hardening_guide). + + > **NOTICE**: + > + > You may need restart the `systemd-sysctl` muaually by: + > + > ```shell + > sudo systemctl restart systemd-sysctl + > ``` + ''; + default = false; + }; + + extraFlags = mkOption { + type = types.listOf types.str; + description = '' + Extra flags to pass to the rke2 service/agent. + + Here you can find all the available flags: + + - [Server Configuration Reference](https://docs.rke2.io/reference/server_config) + - [Agent Configuration Reference](https://docs.rke2.io/reference/linux_agent_config) + ''; + example = [ "--disable-kube-proxy" "--cluster-cidr=10.24.0.0/16" ]; + default = [ ]; + }; + + environmentVars = mkOption { + type = types.attrsOf types.str; + description = '' + Environment variables for configuring the rke2 service/agent. + + Here you can find all the available environment variables: + + - [Server Configuration Reference](https://docs.rke2.io/reference/server_config) + - [Agent Configuration Reference](https://docs.rke2.io/reference/linux_agent_config) + + Besides the options above, you can also active environment variables by edit/create those files: + + - `/etc/default/rke2` + - `/etc/sysconfig/rke2` + - `/usr/local/lib/systemd/system/rke2.env` + ''; + # See: https://github.com/rancher/rke2/blob/master/bundle/lib/systemd/system/rke2-server.env#L1 + default = { + HOME = "/root"; + }; + }; + }; + + config = mkIf cfg.enable { + assertions = [ + { + assertion = cfg.role == "agent" -> (builtins.pathExists cfg.configPath || cfg.serverAddr != ""); + message = "serverAddr or configPath (with 'server' key) should be set if role is 'agent'"; + } + { + assertion = cfg.role == "agent" -> (builtins.pathExists cfg.configPath || cfg.tokenFile != null || cfg.token != ""); + message = "token or tokenFile or configPath (with 'token' or 'token-file' keys) should be set if role is 'agent'"; + } + { + assertion = cfg.role == "agent" -> ! (cfg.agentTokenFile != null || cfg.agentToken != ""); + message = "agentToken or agentTokenFile should be set if role is 'agent'"; + } + { + assertion = cfg.role == "agent" -> ! (cfg.disable != [ ]); + message = "disable should not be set if role is 'agent'"; + } + { + assertion = cfg.role == "agent" -> ! (cfg.cni != "canal"); + message = "cni should not be set if role is 'agent'"; + } + ]; + + environment.systemPackages = [ config.services.rke2.package ]; + # To configure NetworkManager to ignore calico/flannel related network interfaces. + # See: https://docs.rke2.io/known_issues#networkmanager + environment.etc."NetworkManager/conf.d/rke2-canal.conf" = { + enable = config.networking.networkmanager.enable; + text = '' + [keyfile] + unmanaged-devices=interface-name:cali*;interface-name:flannel* + ''; + }; + # See: https://docs.rke2.io/security/hardening_guide#set-kernel-parameters + boot.kernel.sysctl = mkIf cfg.cisHardening { + "vm.panic_on_oom" = 0; + "vm.overcommit_memory" = 1; + "kernel.panic" = 10; + "kernel.panic_on_oops" = 1; + }; + + systemd.services.rke2 = { + description = "Rancher Kubernetes Engine v2"; + documentation = [ "https://github.com/rancher/rke2#readme" ]; + after = [ "network-online.target" ]; + wants = [ "network-online.target" ]; + wantedBy = [ "multi-user.target" ]; + serviceConfig = { + Type = if cfg.role == "agent" then "exec" else "notify"; + EnvironmentFile = [ + "-/etc/default/%N" + "-/etc/sysconfig/%N" + "-/usr/local/lib/systemd/system/%N.env" + ]; + Environment = mapAttrsToList (k: v: "${k}=${v}") cfg.environmentVars; + KillMode = "process"; + Delegate = "yes"; + LimitNOFILE = 1048576; + LimitNPROC = "infinity"; + LimitCORE = "infinity"; + TasksMax = "infinity"; + TimeoutStartSec = 0; + Restart = "always"; + RestartSec = "5s"; + ExecStartPre = [ + # There is a conflict between RKE2 and `nm-cloud-setup.service`. This service add a routing table that + # interfere with the CNI plugin's configuration. This script checks if the service is enabled and if so, + # failed the RKE2 start. + # See: https://github.com/rancher/rke2/issues/1053 + (pkgs.writeScript "check-nm-cloud-setup.sh" '' + #! ${pkgs.runtimeShell} + set -x + ! /run/current-system/systemd/bin/systemctl is-enabled --quiet nm-cloud-setup.service + '') + "-${pkgs.kmod}/bin/modprobe br_netfilter" + "-${pkgs.kmod}/bin/modprobe overlay" + ]; + ExecStart = "${cfg.package}/bin/rke2 '${cfg.role}' ${escapeShellArgs ( + (optional (cfg.configPath != "/etc/rancher/rke2/config.yaml") "--config=${cfg.configPath}") + ++ (optional cfg.debug "--debug") + ++ (optional (cfg.dataDir != "/var/lib/rancher/rke2") "--data-dir=${cfg.dataDir}") + ++ (optional (cfg.token != "") "--token=${cfg.token}") + ++ (optional (cfg.tokenFile != null) "--token-file=${cfg.tokenFile}") + ++ (optionals (cfg.role == "server" && cfg.disable != [ ]) (map (d: "--disable=${d}") cfg.disable)) + ++ (optional (cfg.nodeName != null) "--node-name=${cfg.nodeName}") + ++ (optionals (cfg.nodeLabel != [ ]) (map (l: "--node-label=${l}") cfg.nodeLabel)) + ++ (optionals (cfg.nodeTaint != [ ]) (map (t: "--node-taint=${t}") cfg.nodeTaint)) + ++ (optional (cfg.nodeIP != null) "--node-ip=${cfg.nodeIP}") + ++ (optional (cfg.role == "server" && cfg.agentToken != "") "--agent-token=${cfg.agentToken}") + ++ (optional (cfg.role == "server" && cfg.agentTokenFile != null) "--agent-token-file=${cfg.agentTokenFile}") + ++ (optional (cfg.serverAddr != "") "--server=${cfg.serverAddr}") + ++ (optional cfg.selinux "--selinux") + ++ (optional (cfg.role == "server" && cfg.cni != "canal") "--cni=${cfg.cni}") + ++ (optional cfg.cisHardening "--profile=${if cfg.package.version >= "1.25" then "cis-1.23" else "cis-1.6"}") + ++ cfg.extraFlags + )}"; + ExecStopPost = let + killProcess = pkgs.writeScript "kill-process.sh" '' + #! ${pkgs.runtimeShell} + /run/current-system/systemd/bin/systemd-cgls /system.slice/$1 | \ + ${pkgs.gnugrep}/bin/grep -Eo '[0-9]+ (containerd|kubelet)' | \ + ${pkgs.gawk}/bin/awk '{print $1}' | \ + ${pkgs.findutils}/bin/xargs -r ${pkgs.util-linux}/bin/kill + ''; + in "-${killProcess} %n"; + }; + }; + }; +} diff --git a/nixos/tests/all-tests.nix b/nixos/tests/all-tests.nix index 035c288c22e5c..a2408a43ecc92 100644 --- a/nixos/tests/all-tests.nix +++ b/nixos/tests/all-tests.nix @@ -800,6 +800,7 @@ in { restic-rest-server = handleTest ./restic-rest-server.nix {}; restic = handleTest ./restic.nix {}; retroarch = handleTest ./retroarch.nix {}; + rke2 = handleTestOn ["aarch64-linux" "x86_64-linux"] ./rke2 {}; rkvm = handleTest ./rkvm {}; robustirc-bridge = handleTest ./robustirc-bridge.nix {}; roundcube = handleTest ./roundcube.nix {}; diff --git a/nixos/tests/rke2/default.nix b/nixos/tests/rke2/default.nix new file mode 100644 index 0000000000000..e8a5f382b735f --- /dev/null +++ b/nixos/tests/rke2/default.nix @@ -0,0 +1,13 @@ +{ system ? builtins.currentSystem +, pkgs ? import ../../.. { inherit system; } +, lib ? pkgs.lib +}: +let + allRKE2 = lib.filterAttrs (n: _: lib.strings.hasPrefix "rke2" n) pkgs; +in +{ + # Run a single node rke2 cluster and verify a pod can run + singleNode = lib.mapAttrs (_: rke2: import ./single-node.nix { inherit system pkgs rke2; }) allRKE2; + # Run a multi-node rke2 cluster and verify pod networking works across nodes + multiNode = lib.mapAttrs (_: rke2: import ./multi-node.nix { inherit system pkgs rke2; }) allRKE2; +} diff --git a/nixos/tests/rke2/multi-node.nix b/nixos/tests/rke2/multi-node.nix new file mode 100644 index 0000000000000..ddf0b60f6fba4 --- /dev/null +++ b/nixos/tests/rke2/multi-node.nix @@ -0,0 +1,176 @@ +import ../make-test-python.nix ({ pkgs, lib, rke2, ... }: + let + pauseImage = pkgs.dockerTools.streamLayeredImage { + name = "test.local/pause"; + tag = "local"; + contents = pkgs.buildEnv { + name = "rke2-pause-image-env"; + paths = with pkgs; [ tini bashInteractive coreutils socat ]; + }; + config.Entrypoint = [ "/bin/tini" "--" "/bin/sleep" "inf" ]; + }; + # A daemonset that responds 'server' on port 8000 + networkTestDaemonset = pkgs.writeText "test.yml" '' + apiVersion: apps/v1 + kind: DaemonSet + metadata: + name: test + labels: + name: test + spec: + selector: + matchLabels: + name: test + template: + metadata: + labels: + name: test + spec: + containers: + - name: test + image: test.local/pause:local + imagePullPolicy: Never + resources: + limits: + memory: 20Mi + command: ["socat", "TCP4-LISTEN:8000,fork", "EXEC:echo server"] + ''; + tokenFile = pkgs.writeText "token" "p@s$w0rd"; + agentTokenFile = pkgs.writeText "agent-token" "p@s$w0rd"; + in + { + name = "${rke2.name}-multi-node"; + meta.maintainers = rke2.meta.maintainers; + + nodes = { + server1 = { pkgs, ... }: { + networking.firewall.enable = false; + networking.useDHCP = false; + networking.defaultGateway = "192.168.1.1"; + networking.interfaces.eth1.ipv4.addresses = pkgs.lib.mkForce [ + { address = "192.168.1.1"; prefixLength = 24; } + ]; + + virtualisation.memorySize = 1536; + virtualisation.diskSize = 4096; + + services.rke2 = { + enable = true; + role = "server"; + inherit tokenFile; + inherit agentTokenFile; + nodeName = "${rke2.name}-server1"; + package = rke2; + nodeIP = "192.168.1.1"; + disable = [ + "rke2-coredns" + "rke2-metrics-server" + "rke2-ingress-nginx" + ]; + extraFlags = [ + "--cluster-reset" + ]; + }; + }; + + server2 = { pkgs, ... }: { + networking.firewall.enable = false; + networking.useDHCP = false; + networking.defaultGateway = "192.168.1.2"; + networking.interfaces.eth1.ipv4.addresses = pkgs.lib.mkForce [ + { address = "192.168.1.2"; prefixLength = 24; } + ]; + + virtualisation.memorySize = 1536; + virtualisation.diskSize = 4096; + + services.rke2 = { + enable = true; + role = "server"; + serverAddr = "https://192.168.1.1:6443"; + inherit tokenFile; + inherit agentTokenFile; + nodeName = "${rke2.name}-server2"; + package = rke2; + nodeIP = "192.168.1.2"; + disable = [ + "rke2-coredns" + "rke2-metrics-server" + "rke2-ingress-nginx" + ]; + }; + }; + + agent1 = { pkgs, ... }: { + networking.firewall.enable = false; + networking.useDHCP = false; + networking.defaultGateway = "192.168.1.3"; + networking.interfaces.eth1.ipv4.addresses = pkgs.lib.mkForce [ + { address = "192.168.1.3"; prefixLength = 24; } + ]; + + virtualisation.memorySize = 1536; + virtualisation.diskSize = 4096; + + services.rke2 = { + enable = true; + role = "agent"; + tokenFile = agentTokenFile; + serverAddr = "https://192.168.1.2:6443"; + nodeName = "${rke2.name}-agent1"; + package = rke2; + nodeIP = "192.168.1.3"; + }; + }; + }; + + testScript = let + kubectl = "${pkgs.kubectl}/bin/kubectl --kubeconfig=/etc/rancher/rke2/rke2.yaml"; + ctr = "${pkgs.containerd}/bin/ctr -a /run/k3s/containerd/containerd.sock"; + jq = "${pkgs.jq}/bin/jq"; + ping = "${pkgs.iputils}/bin/ping"; + in '' + machines = [server1, server2, agent1] + + for machine in machines: + machine.start() + machine.wait_for_unit("rke2") + + # wait for the agent to show up + server1.succeed("${kubectl} get node ${rke2.name}-agent1") + + for machine in machines: + machine.succeed("${pauseImage} | ${ctr} image import -") + + server1.succeed("${kubectl} cluster-info") + server1.wait_until_succeeds("${kubectl} get serviceaccount default") + + # Now create a pod on each node via a daemonset and verify they can talk to each other. + server1.succeed("${kubectl} apply -f ${networkTestDaemonset}") + server1.wait_until_succeeds( + f'[ "$(${kubectl} get ds test -o json | ${jq} .status.numberReady)" -eq {len(machines)} ]' + ) + + # Get pod IPs + pods = server1.succeed("${kubectl} get po -o json | ${jq} '.items[].metadata.name' -r").splitlines() + pod_ips = [ + server1.succeed(f"${kubectl} get po {n} -o json | ${jq} '.status.podIP' -cr").strip() for n in pods + ] + + # Verify each server can ping each pod ip + for pod_ip in pod_ips: + server1.succeed(f"${ping} -c 1 {pod_ip}") + agent1.succeed(f"${ping} -c 1 {pod_ip}") + + # Verify the pods can talk to each other + resp = server1.wait_until_succeeds(f"${kubectl} exec {pods[0]} -- socat TCP:{pod_ips[1]}:8000 -") + assert resp.strip() == "server" + resp = server1.wait_until_succeeds(f"${kubectl} exec {pods[1]} -- socat TCP:{pod_ips[0]}:8000 -") + assert resp.strip() == "server" + + # Cleanup + server1.succeed("${kubectl} delete -f ${networkTestDaemonset}") + for machine in machines: + machine.shutdown() + ''; + }) diff --git a/nixos/tests/rke2/single-node.nix b/nixos/tests/rke2/single-node.nix new file mode 100644 index 0000000000000..5a512eacca0f1 --- /dev/null +++ b/nixos/tests/rke2/single-node.nix @@ -0,0 +1,75 @@ +import ../make-test-python.nix ({ pkgs, lib, rke2, ... }: + let + pauseImage = pkgs.dockerTools.streamLayeredImage { + name = "test.local/pause"; + tag = "local"; + contents = pkgs.buildEnv { + name = "rke2-pause-image-env"; + paths = with pkgs; [ tini (hiPrio coreutils) busybox ]; + }; + config.Entrypoint = [ "/bin/tini" "--" "/bin/sleep" "inf" ]; + }; + testPodYaml = pkgs.writeText "test.yaml" '' + apiVersion: v1 + kind: Pod + metadata: + name: test + spec: + containers: + - name: test + image: test.local/pause:local + imagePullPolicy: Never + command: ["sh", "-c", "sleep inf"] + ''; + in + { + name = "${rke2.name}-single-node"; + meta.maintainers = rke2.meta.maintainers; + + nodes.machine = { pkgs, ... }: { + networking.firewall.enable = false; + networking.useDHCP = false; + networking.defaultGateway = "192.168.1.1"; + networking.interfaces.eth1.ipv4.addresses = pkgs.lib.mkForce [ + { address = "192.168.1.1"; prefixLength = 24; } + ]; + + virtualisation.memorySize = 1536; + virtualisation.diskSize = 4096; + + services.rke2 = { + enable = true; + role = "server"; + package = rke2; + nodeIP = "192.168.1.1"; + disable = [ + "rke2-coredns" + "rke2-metrics-server" + "rke2-ingress-nginx" + ]; + extraFlags = [ + "--cluster-reset" + ]; + }; + }; + + testScript = let + kubectl = "${pkgs.kubectl}/bin/kubectl --kubeconfig=/etc/rancher/rke2/rke2.yaml"; + ctr = "${pkgs.containerd}/bin/ctr -a /run/k3s/containerd/containerd.sock"; + in '' + start_all() + + machine.wait_for_unit("rke2") + machine.succeed("${kubectl} cluster-info") + machine.wait_until_succeeds( + "${pauseImage} | ${ctr} -n k8s.io image import -" + ) + + machine.wait_until_succeeds("${kubectl} get serviceaccount default") + machine.succeed("${kubectl} apply -f ${testPodYaml}") + machine.succeed("${kubectl} wait --for 'condition=Ready' pod/test") + machine.succeed("${kubectl} delete -f ${testPodYaml}") + + machine.shutdown() + ''; + }) |