diff options
-rw-r--r-- | nixos/doc/manual/release-notes/rl-2405.section.md | 2 | ||||
-rw-r--r-- | nixos/modules/security/systemd-confinement.nix | 35 | ||||
-rw-r--r-- | nixos/tests/systemd-confinement.nix | 166 |
3 files changed, 181 insertions, 22 deletions
diff --git a/nixos/doc/manual/release-notes/rl-2405.section.md b/nixos/doc/manual/release-notes/rl-2405.section.md index a43e8f26cabe7..a756751ea2a06 100644 --- a/nixos/doc/manual/release-notes/rl-2405.section.md +++ b/nixos/doc/manual/release-notes/rl-2405.section.md @@ -713,6 +713,8 @@ The pre-existing [services.ankisyncd](#opt-services.ankisyncd.enable) has been m - `documentation.man.mandoc` now by default uses `MANPATH` to set the directories where mandoc will search for manual pages. This enables mandoc to find manual pages in Nix profiles. To set the manual search paths via the `mandoc.conf` configuration file like before, use `documentation.man.mandoc.settings.manpath` instead. +- The `systemd-confinement` module extension is now compatible with `DynamicUser=true` and thus `ProtectSystem=strict` too. + - `grafana-loki` package was updated to 3.0.0 which includes [breaking changes](https://github.com/grafana/loki/releases/tag/v3.0.0). - `programs.fish.package` now allows you to override the package used in the `fish` module. diff --git a/nixos/modules/security/systemd-confinement.nix b/nixos/modules/security/systemd-confinement.nix index 0304749b8d109..ed33c41c79ae2 100644 --- a/nixos/modules/security/systemd-confinement.nix +++ b/nixos/modules/security/systemd-confinement.nix @@ -79,13 +79,20 @@ in { description = '' The value `full-apivfs` (the default) sets up private {file}`/dev`, {file}`/proc`, - {file}`/sys` and {file}`/tmp` file systems in a separate user - name space. + {file}`/sys`, {file}`/tmp` and {file}`/var/tmp` file systems + in a separate user name space. If this is set to `chroot-only`, only the file system name space is set up along with the call to {manpage}`chroot(2)`. + In all cases, unless `serviceConfig.PrivateTmp=true` is set, + both {file}`/tmp` and {file}`/var/tmp` paths are added to `InaccessiblePaths=`. + This is to overcome options like `DynamicUser=true` + implying `PrivateTmp=true` without letting it being turned off. + Beware however that giving processes the `CAP_SYS_ADMIN` and `@mount` privileges + can let them undo the effects of `InaccessiblePaths=`. + ::: {.note} This doesn't cover network namespaces and is solely for file system level isolation. @@ -98,8 +105,11 @@ in { wantsAPIVFS = lib.mkDefault (config.confinement.mode == "full-apivfs"); in lib.mkIf config.confinement.enable { serviceConfig = { - RootDirectory = "/var/empty"; - TemporaryFileSystem = "/"; + RuntimeDirectory = [ "confinement/${mkPathSafeName name}" ]; + RootDirectory = lib.mkDefault "/run/confinement/${mkPathSafeName name}"; + InaccessiblePaths = [ + "-+/run/confinement/${mkPathSafeName name}" + ]; PrivateMounts = lib.mkDefault true; # https://github.com/NixOS/nixpkgs/issues/14645 is a future attempt @@ -148,16 +158,6 @@ in { + " Please either define a separate service or find a way to run" + " commands other than ExecStart within the chroot."; } - { assertion = !cfg.serviceConfig.DynamicUser or false; - message = "${whatOpt "DynamicUser"}. Please create a dedicated user via" - + " the 'users.users' option instead as this combination is" - + " currently not supported."; - } - { assertion = cfg.serviceConfig ? ProtectSystem -> cfg.serviceConfig.ProtectSystem == false; - message = "${whatOpt "ProtectSystem"}. ProtectSystem is not compatible" - + " with service confinement as it fails to remount /usr within" - + " our chroot. Please disable the option."; - } ]) config.systemd.services); config.systemd.packages = lib.concatLists (lib.mapAttrsToList (name: cfg: let @@ -183,6 +183,13 @@ in { echo "BindReadOnlyPaths=$realprog:/bin/sh" >> "$serviceFile" ''} + # If DynamicUser= is enabled, PrivateTmp=true is implied (and cannot be turned off). + # so disable them unless PrivateTmp=true is explicitely set. + ${lib.optionalString (!cfg.serviceConfig.PrivateTmp) '' + echo "InaccessiblePaths=-+/tmp" >> "$serviceFile" + echo "InaccessiblePaths=-+/var/tmp" >> "$serviceFile" + ''} + while read storePath; do if [ -L "$storePath" ]; then # Currently, systemd can't cope with symlinks in Bind(ReadOnly)Paths, diff --git a/nixos/tests/systemd-confinement.nix b/nixos/tests/systemd-confinement.nix index bde5b770ea50d..a4a0bf57f1cfc 100644 --- a/nixos/tests/systemd-confinement.nix +++ b/nixos/tests/systemd-confinement.nix @@ -4,7 +4,7 @@ import ./make-test-python.nix { nodes.machine = { pkgs, lib, ... }: let testServer = pkgs.writeScript "testserver.sh" '' #!${pkgs.runtimeShell} - export PATH=${lib.escapeShellArg "${pkgs.coreutils}/bin"} + export PATH=${lib.makeBinPath [ pkgs.coreutils pkgs.findutils ]} ${lib.escapeShellArg pkgs.runtimeShell} 2>&1 echo "exit-status:$?" ''; @@ -48,8 +48,14 @@ import ./make-test-python.nix { { config.confinement.mode = "chroot-only"; testScript = '' with subtest("chroot-only confinement"): - paths = machine.succeed('chroot-exec ls -1 / | paste -sd,').strip() - assert_eq(paths, "bin,nix,run") + # chroot-exec starts a socket-activated service, + # but, upon starting, a systemd system service + # calls setup_namespace() which calls base_filesystem_create() + # which creates some usual top level directories. + # In chroot-only mode, without additional BindPaths= or the like, + # they must be empty and thus removable by rmdir. + paths = machine.succeed('chroot-exec rmdir /dev /etc /proc /root /sys /usr /var "&&" ls -Am /').strip() + assert_eq(paths, "bin, nix, run") uid = machine.succeed('chroot-exec id -u').strip() assert_eq(uid, "0") machine.succeed("chroot-exec chown 65534 /bin") @@ -57,7 +63,7 @@ import ./make-test-python.nix { } { testScript = '' with subtest("full confinement with APIVFS"): - machine.fail("chroot-exec ls -l /etc") + machine.succeed('chroot-exec rmdir /etc') machine.fail("chroot-exec chown 65534 /bin") assert_eq(machine.succeed('chroot-exec id -u').strip(), "0") machine.succeed("chroot-exec chown 0 /bin") @@ -80,6 +86,146 @@ import ./make-test-python.nix { machine.fail("chroot-exec touch /bin/test") ''; } + { config.confinement.mode = "full-apivfs"; + config.serviceConfig.DynamicUser = true; + testScript = '' + with subtest("check if DynamicUser is working in full-apivfs mode"): + machine.succeed("chroot-exec ls -l /dev") + paths = machine.succeed('chroot-exec find / -path /dev/"\\*" -prune -o -path /nix/"\\*" -prune -o -path /proc/"\\*" -prune -o -path /sys/"\\*" -prune -o -print || test $? = 1') + assert_eq( + '\n'.join(sorted(paths.split('\n'))), + """ + / + /bin + /bin/sh + /dev + /etc + /nix + /proc + /root + /run + /run/host + /run/host/.os-release-stage + /run/host/.os-release-stage/os-release + /run/host/os-release + /run/systemd + /run/systemd/incoming + /sys + /tmp + /usr + /var + /var/tmp + find: '/root': Permission denied + find: '/run/systemd/incoming': Permission denied""" + ) + uid = machine.succeed('chroot-exec id -u').strip() + assert uid != "0", "UID of a DynamicUser shouldn't be 0" + machine.fail("chroot-exec touch /bin/test") + # DynamicUser=true implies ProtectSystem=strict + machine.fail("chroot-exec touch /etc/test") + ''; + } + { config.confinement.mode = "full-apivfs"; + config.serviceConfig.DynamicUser = true; + config.serviceConfig.PrivateTmp = false; + testScript = '' + with subtest("check if DynamicUser and PrivateTmp=false are working in full-apivfs mode"): + machine.succeed("chroot-exec ls -l /dev") + paths = machine.succeed('chroot-exec find / -path /dev/"\\*" -prune -o -path /nix/"\\*" -prune -o -path /proc/"\\*" -prune -o -path /sys/"\\*" -prune -o -print || test $? = 1') + assert_eq( + '\n'.join(sorted(paths.split('\n'))), + """ + / + /bin + /bin/sh + /dev + /etc + /nix + /proc + /root + /run + /run/host + /run/host/.os-release-stage + /run/host/.os-release-stage/os-release + /run/host/os-release + /run/systemd + /run/systemd/incoming + /sys + /usr + /var + find: '/root': Permission denied + find: '/run/systemd/incoming': Permission denied""" + ) + uid = machine.succeed('chroot-exec id -u').strip() + assert uid != "0", "UID of a DynamicUser shouldn't be 0" + machine.fail("chroot-exec touch /bin/test") + # DynamicUser=true implies ProtectSystem=strict + machine.fail("chroot-exec touch /etc/test") + ''; + } + { config.confinement.mode = "chroot-only"; + config.serviceConfig.DynamicUser = true; + testScript = '' + with subtest("check if DynamicUser is working in chroot-only mode"): + paths = machine.succeed('chroot-exec find / -path /nix/"\\*" -prune -o -print || test $? = 1') + assert_eq( + '\n'.join(sorted(paths.split('\n'))), + """ + / + /bin + /bin/sh + /dev + /etc + /nix + /proc + /root + /run + /run/systemd + /run/systemd/incoming + /sys + /usr + /var + find: '/root': Permission denied + find: '/run/systemd/incoming': Permission denied""" + ) + uid = machine.succeed('chroot-exec id -u').strip() + assert uid != "0", "UID of a DynamicUser shouldn't be 0" + machine.fail("chroot-exec touch /bin/test") + ''; + } + { config.confinement.mode = "chroot-only"; + config.serviceConfig.DynamicUser = true; + config.serviceConfig.PrivateTmp = true; + testScript = '' + with subtest("check if DynamicUser and PrivateTmp=true are working in chroot-only mode"): + paths = machine.succeed('chroot-exec find / -path /nix/"\\*" -prune -o -print || test $? = 1') + assert_eq( + '\n'.join(sorted(paths.split('\n'))), + """ + / + /bin + /bin/sh + /dev + /etc + /nix + /proc + /root + /run + /run/systemd + /run/systemd/incoming + /sys + /tmp + /usr + /var + /var/tmp + find: '/root': Permission denied + find: '/run/systemd/incoming': Permission denied""" + ) + uid = machine.succeed('chroot-exec id -u').strip() + assert uid != "0", "UID of a DynamicUser shouldn't be 0" + machine.fail("chroot-exec touch /bin/test") + ''; + } (let symlink = pkgs.runCommand "symlink" { target = pkgs.writeText "symlink-target" "got me\n"; @@ -88,7 +234,7 @@ import ./make-test-python.nix { config.confinement.packages = lib.singleton symlink; testScript = '' with subtest("check if symlinks are properly bind-mounted"): - machine.fail("chroot-exec test -e /etc") + machine.succeed("chroot-exec rmdir /etc") text = machine.succeed('chroot-exec cat ${symlink}').strip() assert_eq(text, "got me") ''; @@ -176,9 +322,13 @@ import ./make-test-python.nix { }; testScript = { nodes, ... }: '' - def assert_eq(a, b): - assert a == b, f"{a} != {b}" + import difflib + def assert_eq(got, expected): + if got != expected: + diff = difflib.unified_diff(got.splitlines(keepends=True), expected.splitlines(keepends=True)) + print("".join(diff)) + assert got == expected, f"{got} != {expected}" machine.wait_for_unit("multi-user.target") - '' + nodes.machine.config.__testSteps; + '' + nodes.machine.__testSteps; } |