about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--nixos/doc/manual/release-notes/rl-2405.section.md2
-rw-r--r--nixos/modules/security/systemd-confinement.nix35
-rw-r--r--nixos/tests/systemd-confinement.nix166
3 files changed, 181 insertions, 22 deletions
diff --git a/nixos/doc/manual/release-notes/rl-2405.section.md b/nixos/doc/manual/release-notes/rl-2405.section.md
index a43e8f26cabe7..a756751ea2a06 100644
--- a/nixos/doc/manual/release-notes/rl-2405.section.md
+++ b/nixos/doc/manual/release-notes/rl-2405.section.md
@@ -713,6 +713,8 @@ The pre-existing [services.ankisyncd](#opt-services.ankisyncd.enable) has been m
 - `documentation.man.mandoc` now by default uses `MANPATH` to set the directories where mandoc will search for manual pages.
   This enables mandoc to find manual pages in Nix profiles. To set the manual search paths via the `mandoc.conf` configuration file like before, use `documentation.man.mandoc.settings.manpath` instead.
 
+- The `systemd-confinement` module extension is now compatible with `DynamicUser=true` and thus `ProtectSystem=strict` too.
+
 - `grafana-loki` package was updated to 3.0.0 which includes [breaking changes](https://github.com/grafana/loki/releases/tag/v3.0.0).
 
 - `programs.fish.package` now allows you to override the package used in the `fish` module.
diff --git a/nixos/modules/security/systemd-confinement.nix b/nixos/modules/security/systemd-confinement.nix
index 0304749b8d109..ed33c41c79ae2 100644
--- a/nixos/modules/security/systemd-confinement.nix
+++ b/nixos/modules/security/systemd-confinement.nix
@@ -79,13 +79,20 @@ in {
         description = ''
           The value `full-apivfs` (the default) sets up
           private {file}`/dev`, {file}`/proc`,
-          {file}`/sys` and {file}`/tmp` file systems in a separate user
-          name space.
+          {file}`/sys`, {file}`/tmp` and {file}`/var/tmp` file systems
+          in a separate user name space.
 
           If this is set to `chroot-only`, only the file
           system name space is set up along with the call to
           {manpage}`chroot(2)`.
 
+          In all cases, unless `serviceConfig.PrivateTmp=true` is set,
+          both {file}`/tmp` and {file}`/var/tmp` paths are added to `InaccessiblePaths=`.
+          This is to overcome options like `DynamicUser=true`
+          implying `PrivateTmp=true` without letting it being turned off.
+          Beware however that giving processes the `CAP_SYS_ADMIN` and `@mount` privileges
+          can let them undo the effects of `InaccessiblePaths=`.
+
           ::: {.note}
           This doesn't cover network namespaces and is solely for
           file system level isolation.
@@ -98,8 +105,11 @@ in {
         wantsAPIVFS = lib.mkDefault (config.confinement.mode == "full-apivfs");
       in lib.mkIf config.confinement.enable {
         serviceConfig = {
-          RootDirectory = "/var/empty";
-          TemporaryFileSystem = "/";
+          RuntimeDirectory = [ "confinement/${mkPathSafeName name}" ];
+          RootDirectory = lib.mkDefault "/run/confinement/${mkPathSafeName name}";
+          InaccessiblePaths = [
+            "-+/run/confinement/${mkPathSafeName name}"
+          ];
           PrivateMounts = lib.mkDefault true;
 
           # https://github.com/NixOS/nixpkgs/issues/14645 is a future attempt
@@ -148,16 +158,6 @@ in {
               + " Please either define a separate service or find a way to run"
               + " commands other than ExecStart within the chroot.";
     }
-    { assertion = !cfg.serviceConfig.DynamicUser or false;
-      message = "${whatOpt "DynamicUser"}. Please create a dedicated user via"
-              + " the 'users.users' option instead as this combination is"
-              + " currently not supported.";
-    }
-    { assertion = cfg.serviceConfig ? ProtectSystem -> cfg.serviceConfig.ProtectSystem == false;
-      message = "${whatOpt "ProtectSystem"}. ProtectSystem is not compatible"
-              + " with service confinement as it fails to remount /usr within"
-              + " our chroot. Please disable the option.";
-    }
   ]) config.systemd.services);
 
   config.systemd.packages = lib.concatLists (lib.mapAttrsToList (name: cfg: let
@@ -183,6 +183,13 @@ in {
         echo "BindReadOnlyPaths=$realprog:/bin/sh" >> "$serviceFile"
       ''}
 
+      # If DynamicUser= is enabled, PrivateTmp=true is implied (and cannot be turned off).
+      # so disable them unless PrivateTmp=true is explicitely set.
+      ${lib.optionalString (!cfg.serviceConfig.PrivateTmp) ''
+        echo "InaccessiblePaths=-+/tmp" >> "$serviceFile"
+        echo "InaccessiblePaths=-+/var/tmp" >> "$serviceFile"
+      ''}
+
       while read storePath; do
         if [ -L "$storePath" ]; then
           # Currently, systemd can't cope with symlinks in Bind(ReadOnly)Paths,
diff --git a/nixos/tests/systemd-confinement.nix b/nixos/tests/systemd-confinement.nix
index bde5b770ea50d..a4a0bf57f1cfc 100644
--- a/nixos/tests/systemd-confinement.nix
+++ b/nixos/tests/systemd-confinement.nix
@@ -4,7 +4,7 @@ import ./make-test-python.nix {
   nodes.machine = { pkgs, lib, ... }: let
     testServer = pkgs.writeScript "testserver.sh" ''
       #!${pkgs.runtimeShell}
-      export PATH=${lib.escapeShellArg "${pkgs.coreutils}/bin"}
+      export PATH=${lib.makeBinPath [ pkgs.coreutils pkgs.findutils ]}
       ${lib.escapeShellArg pkgs.runtimeShell} 2>&1
       echo "exit-status:$?"
     '';
@@ -48,8 +48,14 @@ import ./make-test-python.nix {
       { config.confinement.mode = "chroot-only";
         testScript = ''
           with subtest("chroot-only confinement"):
-              paths = machine.succeed('chroot-exec ls -1 / | paste -sd,').strip()
-              assert_eq(paths, "bin,nix,run")
+              # chroot-exec starts a socket-activated service,
+              # but, upon starting, a systemd system service
+              # calls setup_namespace() which calls base_filesystem_create()
+              # which creates some usual top level directories.
+              # In chroot-only mode, without additional BindPaths= or the like,
+              # they must be empty and thus removable by rmdir.
+              paths = machine.succeed('chroot-exec rmdir /dev /etc /proc /root /sys /usr /var "&&" ls -Am /').strip()
+              assert_eq(paths, "bin, nix, run")
               uid = machine.succeed('chroot-exec id -u').strip()
               assert_eq(uid, "0")
               machine.succeed("chroot-exec chown 65534 /bin")
@@ -57,7 +63,7 @@ import ./make-test-python.nix {
       }
       { testScript = ''
           with subtest("full confinement with APIVFS"):
-              machine.fail("chroot-exec ls -l /etc")
+              machine.succeed('chroot-exec rmdir /etc')
               machine.fail("chroot-exec chown 65534 /bin")
               assert_eq(machine.succeed('chroot-exec id -u').strip(), "0")
               machine.succeed("chroot-exec chown 0 /bin")
@@ -80,6 +86,146 @@ import ./make-test-python.nix {
               machine.fail("chroot-exec touch /bin/test")
         '';
       }
+      { config.confinement.mode = "full-apivfs";
+        config.serviceConfig.DynamicUser = true;
+        testScript = ''
+          with subtest("check if DynamicUser is working in full-apivfs mode"):
+              machine.succeed("chroot-exec ls -l /dev")
+              paths = machine.succeed('chroot-exec find / -path /dev/"\\*" -prune -o -path /nix/"\\*" -prune -o -path /proc/"\\*" -prune -o -path /sys/"\\*" -prune -o -print || test $? = 1')
+              assert_eq(
+                '\n'.join(sorted(paths.split('\n'))),
+          """
+          /
+          /bin
+          /bin/sh
+          /dev
+          /etc
+          /nix
+          /proc
+          /root
+          /run
+          /run/host
+          /run/host/.os-release-stage
+          /run/host/.os-release-stage/os-release
+          /run/host/os-release
+          /run/systemd
+          /run/systemd/incoming
+          /sys
+          /tmp
+          /usr
+          /var
+          /var/tmp
+          find: '/root': Permission denied
+          find: '/run/systemd/incoming': Permission denied"""
+              )
+              uid = machine.succeed('chroot-exec id -u').strip()
+              assert uid != "0", "UID of a DynamicUser shouldn't be 0"
+              machine.fail("chroot-exec touch /bin/test")
+              # DynamicUser=true implies ProtectSystem=strict
+              machine.fail("chroot-exec touch /etc/test")
+        '';
+      }
+      { config.confinement.mode = "full-apivfs";
+        config.serviceConfig.DynamicUser = true;
+        config.serviceConfig.PrivateTmp = false;
+        testScript = ''
+          with subtest("check if DynamicUser and PrivateTmp=false are working in full-apivfs mode"):
+              machine.succeed("chroot-exec ls -l /dev")
+              paths = machine.succeed('chroot-exec find / -path /dev/"\\*" -prune -o -path /nix/"\\*" -prune -o -path /proc/"\\*" -prune -o -path /sys/"\\*" -prune -o -print || test $? = 1')
+              assert_eq(
+                '\n'.join(sorted(paths.split('\n'))),
+          """
+          /
+          /bin
+          /bin/sh
+          /dev
+          /etc
+          /nix
+          /proc
+          /root
+          /run
+          /run/host
+          /run/host/.os-release-stage
+          /run/host/.os-release-stage/os-release
+          /run/host/os-release
+          /run/systemd
+          /run/systemd/incoming
+          /sys
+          /usr
+          /var
+          find: '/root': Permission denied
+          find: '/run/systemd/incoming': Permission denied"""
+              )
+              uid = machine.succeed('chroot-exec id -u').strip()
+              assert uid != "0", "UID of a DynamicUser shouldn't be 0"
+              machine.fail("chroot-exec touch /bin/test")
+              # DynamicUser=true implies ProtectSystem=strict
+              machine.fail("chroot-exec touch /etc/test")
+        '';
+      }
+      { config.confinement.mode = "chroot-only";
+        config.serviceConfig.DynamicUser = true;
+        testScript = ''
+          with subtest("check if DynamicUser is working in chroot-only mode"):
+              paths = machine.succeed('chroot-exec find / -path /nix/"\\*" -prune -o -print || test $? = 1')
+              assert_eq(
+                '\n'.join(sorted(paths.split('\n'))),
+          """
+          /
+          /bin
+          /bin/sh
+          /dev
+          /etc
+          /nix
+          /proc
+          /root
+          /run
+          /run/systemd
+          /run/systemd/incoming
+          /sys
+          /usr
+          /var
+          find: '/root': Permission denied
+          find: '/run/systemd/incoming': Permission denied"""
+              )
+              uid = machine.succeed('chroot-exec id -u').strip()
+              assert uid != "0", "UID of a DynamicUser shouldn't be 0"
+              machine.fail("chroot-exec touch /bin/test")
+        '';
+      }
+      { config.confinement.mode = "chroot-only";
+        config.serviceConfig.DynamicUser = true;
+        config.serviceConfig.PrivateTmp = true;
+        testScript = ''
+          with subtest("check if DynamicUser and PrivateTmp=true are working in chroot-only mode"):
+              paths = machine.succeed('chroot-exec find / -path /nix/"\\*" -prune -o -print || test $? = 1')
+              assert_eq(
+                '\n'.join(sorted(paths.split('\n'))),
+          """
+          /
+          /bin
+          /bin/sh
+          /dev
+          /etc
+          /nix
+          /proc
+          /root
+          /run
+          /run/systemd
+          /run/systemd/incoming
+          /sys
+          /tmp
+          /usr
+          /var
+          /var/tmp
+          find: '/root': Permission denied
+          find: '/run/systemd/incoming': Permission denied"""
+              )
+              uid = machine.succeed('chroot-exec id -u').strip()
+              assert uid != "0", "UID of a DynamicUser shouldn't be 0"
+              machine.fail("chroot-exec touch /bin/test")
+        '';
+      }
       (let
         symlink = pkgs.runCommand "symlink" {
           target = pkgs.writeText "symlink-target" "got me\n";
@@ -88,7 +234,7 @@ import ./make-test-python.nix {
         config.confinement.packages = lib.singleton symlink;
         testScript = ''
           with subtest("check if symlinks are properly bind-mounted"):
-              machine.fail("chroot-exec test -e /etc")
+              machine.succeed("chroot-exec rmdir /etc")
               text = machine.succeed('chroot-exec cat ${symlink}').strip()
               assert_eq(text, "got me")
         '';
@@ -176,9 +322,13 @@ import ./make-test-python.nix {
   };
 
   testScript = { nodes, ... }: ''
-    def assert_eq(a, b):
-        assert a == b, f"{a} != {b}"
+    import difflib
+    def assert_eq(got, expected):
+        if got != expected:
+          diff = difflib.unified_diff(got.splitlines(keepends=True), expected.splitlines(keepends=True))
+          print("".join(diff))
+        assert got == expected, f"{got} != {expected}"
 
     machine.wait_for_unit("multi-user.target")
-  '' + nodes.machine.config.__testSteps;
+  '' + nodes.machine.__testSteps;
 }