about summary refs log tree commit diff
path: root/nixos/modules/virtualisation
diff options
context:
space:
mode:
Diffstat (limited to 'nixos/modules/virtualisation')
-rw-r--r--nixos/modules/virtualisation/amazon-ec2-amis.nix50
-rw-r--r--nixos/modules/virtualisation/amazon-image.nix95
-rw-r--r--nixos/modules/virtualisation/amazon-options.nix10
-rw-r--r--nixos/modules/virtualisation/container-config.nix5
-rw-r--r--nixos/modules/virtualisation/ec2-data.nix1
-rw-r--r--nixos/modules/virtualisation/ec2-metadata-fetcher.nix77
-rw-r--r--nixos/modules/virtualisation/ec2-metadata-fetcher.sh67
-rw-r--r--nixos/modules/virtualisation/lxc-container.nix41
-rw-r--r--nixos/modules/virtualisation/lxd.nix14
-rw-r--r--nixos/modules/virtualisation/qemu-vm.nix5
-rw-r--r--nixos/modules/virtualisation/rosetta.nix73
11 files changed, 260 insertions, 178 deletions
diff --git a/nixos/modules/virtualisation/amazon-ec2-amis.nix b/nixos/modules/virtualisation/amazon-ec2-amis.nix
index 0324c5332cffd..446a1b0ecaab2 100644
--- a/nixos/modules/virtualisation/amazon-ec2-amis.nix
+++ b/nixos/modules/virtualisation/amazon-ec2-amis.nix
@@ -488,5 +488,53 @@ let self = {
   "22.05".us-west-1.aarch64-linux.hvm-ebs = "ami-0f96be48071c13ab2";
   "22.05".us-west-2.aarch64-linux.hvm-ebs = "ami-084bc5d777585adfb";
 
-  latest = self."22.05";
+  # 22.11.466.596a8e828c5
+
+  "22.11".eu-west-1.x86_64-linux.hvm-ebs = "ami-01aafe08a4e74bd9a";
+  "22.11".af-south-1.x86_64-linux.hvm-ebs = "ami-0d937fc7bf7b8c2ed";
+  "22.11".ap-east-1.x86_64-linux.hvm-ebs = "ami-020e59f6affef2732";
+  "22.11".ap-northeast-1.x86_64-linux.hvm-ebs = "ami-04a7bd7a969506a87";
+  "22.11".ap-northeast-2.x86_64-linux.hvm-ebs = "ami-007b9209171e2dcdd";
+  "22.11".ap-northeast-3.x86_64-linux.hvm-ebs = "ami-0c4d0b584cd570584";
+  "22.11".ap-south-1.x86_64-linux.hvm-ebs = "ami-02aa47f84c215d593";
+  "22.11".ap-southeast-1.x86_64-linux.hvm-ebs = "ami-067a7fca4a01c4dda";
+  "22.11".ap-southeast-2.x86_64-linux.hvm-ebs = "ami-0638db75ba113c635";
+  "22.11".ap-southeast-3.x86_64-linux.hvm-ebs = "ami-08dcda749c59e8747";
+  "22.11".ca-central-1.x86_64-linux.hvm-ebs = "ami-09b007688e369f794";
+  "22.11".eu-central-1.x86_64-linux.hvm-ebs = "ami-05df1b211df600977";
+  "22.11".eu-north-1.x86_64-linux.hvm-ebs = "ami-0427d0897b928e191";
+  "22.11".eu-south-1.x86_64-linux.hvm-ebs = "ami-051beda489f0dd109";
+  "22.11".eu-west-2.x86_64-linux.hvm-ebs = "ami-0c2090b73fc610ac3";
+  "22.11".eu-west-3.x86_64-linux.hvm-ebs = "ami-0d03a150cf6c07022";
+  "22.11".me-south-1.x86_64-linux.hvm-ebs = "ami-0443b1af94bff9e3d";
+  "22.11".sa-east-1.x86_64-linux.hvm-ebs = "ami-07b2ce95ba17b6bc1";
+  "22.11".us-east-1.x86_64-linux.hvm-ebs = "ami-0508167db03652cc4";
+  "22.11".us-east-2.x86_64-linux.hvm-ebs = "ami-0e41ac272a7d67029";
+  "22.11".us-west-1.x86_64-linux.hvm-ebs = "ami-02f3fb062ee9af563";
+  "22.11".us-west-2.x86_64-linux.hvm-ebs = "ami-06b260b3a958948a0";
+
+  "22.11".eu-west-1.aarch64-linux.hvm-ebs = "ami-0c4132540cabbc7df";
+  "22.11".af-south-1.aarch64-linux.hvm-ebs = "ami-0f12780247b337357";
+  "22.11".ap-east-1.aarch64-linux.hvm-ebs = "ami-04789617e858da6fb";
+  "22.11".ap-northeast-1.aarch64-linux.hvm-ebs = "ami-0f4d8517ab163b274";
+  "22.11".ap-northeast-2.aarch64-linux.hvm-ebs = "ami-051a06893bcc696c1";
+  "22.11".ap-northeast-3.aarch64-linux.hvm-ebs = "ami-05a086610680a7d8b";
+  "22.11".ap-south-1.aarch64-linux.hvm-ebs = "ami-04cd79197824124cd";
+  "22.11".ap-southeast-1.aarch64-linux.hvm-ebs = "ami-0437f330961467257";
+  "22.11".ap-southeast-2.aarch64-linux.hvm-ebs = "ami-000c2ecbc430c36d7";
+  "22.11".ap-southeast-3.aarch64-linux.hvm-ebs = "ami-062e917296b5087c0";
+  "22.11".ca-central-1.aarch64-linux.hvm-ebs = "ami-0c91995b735d1b8b6";
+  "22.11".eu-central-1.aarch64-linux.hvm-ebs = "ami-0537d704b177a676b";
+  "22.11".eu-north-1.aarch64-linux.hvm-ebs = "ami-05f1f532f90d8e16c";
+  "22.11".eu-south-1.aarch64-linux.hvm-ebs = "ami-097fe290eafff61ad";
+  "22.11".eu-west-2.aarch64-linux.hvm-ebs = "ami-053b6cc7a3394891a";
+  "22.11".eu-west-3.aarch64-linux.hvm-ebs = "ami-0a5b6d023afde63c3";
+  "22.11".me-south-1.aarch64-linux.hvm-ebs = "ami-024fcb01f8638ed08";
+  "22.11".sa-east-1.aarch64-linux.hvm-ebs = "ami-06d72c6e930037236";
+  "22.11".us-east-1.aarch64-linux.hvm-ebs = "ami-0b33ffb684d6b07b5";
+  "22.11".us-east-2.aarch64-linux.hvm-ebs = "ami-033ff64078c59f378";
+  "22.11".us-west-1.aarch64-linux.hvm-ebs = "ami-052d52b9e30a18562";
+  "22.11".us-west-2.aarch64-linux.hvm-ebs = "ami-07418b6a4782c9521";
+
+  latest = self."22.11";
 }; in self
diff --git a/nixos/modules/virtualisation/amazon-image.nix b/nixos/modules/virtualisation/amazon-image.nix
index 12fe6fa444793..9751f5755f96d 100644
--- a/nixos/modules/virtualisation/amazon-image.nix
+++ b/nixos/modules/virtualisation/amazon-image.nix
@@ -10,11 +10,6 @@ with lib;
 
 let
   cfg = config.ec2;
-  metadataFetcher = import ./ec2-metadata-fetcher.nix {
-    inherit (pkgs) curl;
-    targetRoot = "$targetRoot/";
-    wgetExtraOptions = "-q";
-  };
 in
 
 {
@@ -31,18 +26,12 @@ in
   config = {
 
     assertions = [
-      { assertion = cfg.hvm;
-        message = "Paravirtualized EC2 instances are no longer supported.";
-      }
-      { assertion = cfg.efi -> cfg.hvm;
-        message = "EC2 instances using EFI must be HVM instances.";
-      }
       { assertion = versionOlder config.boot.kernelPackages.kernel.version "5.17";
         message = "ENA driver fails to build with kernel >= 5.17";
       }
     ];
 
-    boot.growPartition = cfg.hvm;
+    boot.growPartition = true;
 
     fileSystems."/" = mkIf (!cfg.zfs.enable) {
       device = "/dev/disk/by-label/nixos";
@@ -64,9 +53,9 @@ in
     boot.extraModulePackages = [
       config.boot.kernelPackages.ena
     ];
-    boot.initrd.kernelModules = [ "xen-blkfront" "xen-netfront" ];
-    boot.initrd.availableKernelModules = [ "ixgbevf" "ena" "nvme" ];
-    boot.kernelParams = mkIf cfg.hvm [ "console=ttyS0,115200n8" "random.trust_cpu=on" ];
+    boot.initrd.kernelModules = [ "xen-blkfront" ];
+    boot.initrd.availableKernelModules = [ "nvme" ];
+    boot.kernelParams = [ "console=ttyS0,115200n8" "random.trust_cpu=on" ];
 
     # Prevent the nouveau kernel module from being loaded, as it
     # interferes with the nvidia/nvidia-uvm modules needed for CUDA.
@@ -74,10 +63,7 @@ in
     # boot.
     boot.blacklistedKernelModules = [ "nouveau" "xen_fbfront" ];
 
-    # Generate a GRUB menu.  Amazon's pv-grub uses this to boot our kernel/initrd.
-    boot.loader.grub.version = if cfg.hvm then 2 else 1;
-    boot.loader.grub.device = if (cfg.hvm && !cfg.efi) then "/dev/xvda" else "nodev";
-    boot.loader.grub.extraPerEntryConfig = mkIf (!cfg.hvm) "root (hd0)";
+    boot.loader.grub.device = if cfg.efi then "nodev" else "/dev/xvda";
     boot.loader.grub.efiSupport = cfg.efi;
     boot.loader.grub.efiInstallAsRemovable = cfg.efi;
     boot.loader.timeout = 1;
@@ -87,67 +73,14 @@ in
       terminal_input console serial
     '';
 
-    boot.initrd.network.enable = true;
-
-    # Mount all formatted ephemeral disks and activate all swap devices.
-    # We cannot do this with the ‘fileSystems’ and ‘swapDevices’ options
-    # because the set of devices is dependent on the instance type
-    # (e.g. "m1.small" has one ephemeral filesystem and one swap device,
-    # while "m1.large" has two ephemeral filesystems and no swap
-    # devices).  Also, put /tmp and /var on /disk0, since it has a lot
-    # more space than the root device.  Similarly, "move" /nix to /disk0
-    # by layering a unionfs-fuse mount on top of it so we have a lot more space for
-    # Nix operations.
-    boot.initrd.postMountCommands =
-      ''
-        ${metadataFetcher}
-
-        diskNr=0
-        diskForUnionfs=
-        for device in /dev/xvd[abcde]*; do
-            if [ "$device" = /dev/xvda -o "$device" = /dev/xvda1 ]; then continue; fi
-            fsType=$(blkid -o value -s TYPE "$device" || true)
-            if [ "$fsType" = swap ]; then
-                echo "activating swap device $device..."
-                swapon "$device" || true
-            elif [ "$fsType" = ext3 ]; then
-                mp="/disk$diskNr"
-                diskNr=$((diskNr + 1))
-                if mountFS "$device" "$mp" "" ext3; then
-                    if [ -z "$diskForUnionfs" ]; then diskForUnionfs="$mp"; fi
-                fi
-            else
-                echo "skipping unknown device type $device"
-            fi
-        done
-
-        if [ -n "$diskForUnionfs" ]; then
-            mkdir -m 755 -p $targetRoot/$diskForUnionfs/root
-
-            mkdir -m 1777 -p $targetRoot/$diskForUnionfs/root/tmp $targetRoot/tmp
-            mount --bind $targetRoot/$diskForUnionfs/root/tmp $targetRoot/tmp
-
-            if [ "$(cat "$metaDir/ami-manifest-path")" != "(unknown)" ]; then
-                mkdir -m 755 -p $targetRoot/$diskForUnionfs/root/var $targetRoot/var
-                mount --bind $targetRoot/$diskForUnionfs/root/var $targetRoot/var
-
-                mkdir -p /unionfs-chroot/ro-nix
-                mount --rbind $targetRoot/nix /unionfs-chroot/ro-nix
-
-                mkdir -m 755 -p $targetRoot/$diskForUnionfs/root/nix
-                mkdir -p /unionfs-chroot/rw-nix
-                mount --rbind $targetRoot/$diskForUnionfs/root/nix /unionfs-chroot/rw-nix
-
-                unionfs -o allow_other,cow,nonempty,chroot=/unionfs-chroot,max_files=32768 /rw-nix=RW:/ro-nix=RO $targetRoot/nix
-            fi
-        fi
-      '';
-
-    boot.initrd.extraUtilsCommands =
-      ''
-        # We need swapon in the initrd.
-        copy_bin_and_libs ${pkgs.util-linux}/sbin/swapon
-      '';
+    systemd.services.fetch-ec2-metadata = {
+      wantedBy = [ "multi-user.target" ];
+      after = ["network-online.target"];
+      path = [ pkgs.curl ];
+      script = builtins.readFile ./ec2-metadata-fetcher.sh;
+      serviceConfig.Type = "oneshot";
+      serviceConfig.StandardOutput = "journal+console";
+    };
 
     # Allow root logins only using the SSH key that the user specified
     # at instance creation time.
@@ -166,8 +99,6 @@ in
     # Always include cryptsetup so that Charon can use it.
     environment.systemPackages = [ pkgs.cryptsetup ];
 
-    boot.initrd.supportedFilesystems = [ "unionfs-fuse" ];
-
     # EC2 has its own NTP server provided by the hypervisor
     networking.timeServers = [ "169.254.169.123" ];
 
diff --git a/nixos/modules/virtualisation/amazon-options.nix b/nixos/modules/virtualisation/amazon-options.nix
index 227f3e433c107..915bbf9763db4 100644
--- a/nixos/modules/virtualisation/amazon-options.nix
+++ b/nixos/modules/virtualisation/amazon-options.nix
@@ -2,6 +2,9 @@
 let
   inherit (lib) literalExpression types;
 in {
+  imports = [
+    (lib.mkRemovedOptionModule [ "ec2" "hvm" ] "Only HVM instances are supported, so specifying it is no longer necessary.")
+  ];
   options = {
     ec2 = {
       zfs = {
@@ -41,13 +44,6 @@ in {
           });
         };
       };
-      hvm = lib.mkOption {
-        default = lib.versionAtLeast config.system.stateVersion "17.03";
-        internal = true;
-        description = lib.mdDoc ''
-          Whether the EC2 instance is a HVM instance.
-        '';
-      };
       efi = lib.mkOption {
         default = pkgs.stdenv.hostPlatform.isAarch64;
         defaultText = literalExpression "pkgs.stdenv.hostPlatform.isAarch64";
diff --git a/nixos/modules/virtualisation/container-config.nix b/nixos/modules/virtualisation/container-config.nix
index 09a2d9de040a2..2460ec45e3fca 100644
--- a/nixos/modules/virtualisation/container-config.nix
+++ b/nixos/modules/virtualisation/container-config.nix
@@ -8,7 +8,9 @@ with lib;
 
     # Disable some features that are not useful in a container.
 
+    # containers don't have a kernel
     boot.kernel.enable = false;
+    boot.modprobeConfig.enable = false;
 
     console.enable = mkDefault false;
 
@@ -24,6 +26,9 @@ with lib;
     # containers do not need to setup devices
     services.udev.enable = false;
 
+    # containers normally do not need to manage logical volumes
+    services.lvm.enable = lib.mkDefault false;
+
     # Shut up warnings about not having a boot loader.
     system.build.installBootLoader = lib.mkDefault "${pkgs.coreutils}/bin/true";
 
diff --git a/nixos/modules/virtualisation/ec2-data.nix b/nixos/modules/virtualisation/ec2-data.nix
index 1b764e7e4d80a..0cc6d9938e220 100644
--- a/nixos/modules/virtualisation/ec2-data.nix
+++ b/nixos/modules/virtualisation/ec2-data.nix
@@ -18,6 +18,7 @@ with lib;
 
         wantedBy = [ "multi-user.target" "sshd.service" ];
         before = [ "sshd.service" ];
+        after = ["fetch-ec2-metadata.service"];
 
         path = [ pkgs.iproute2 ];
 
diff --git a/nixos/modules/virtualisation/ec2-metadata-fetcher.nix b/nixos/modules/virtualisation/ec2-metadata-fetcher.nix
deleted file mode 100644
index 760f024f33fbd..0000000000000
--- a/nixos/modules/virtualisation/ec2-metadata-fetcher.nix
+++ /dev/null
@@ -1,77 +0,0 @@
-{ curl, targetRoot, wgetExtraOptions }:
-# Note: be very cautious about dependencies, each dependency grows
-# the closure of the initrd. Ideally we would not even require curl,
-# but there is no reasonable way to send an HTTP PUT request without
-# it. Note: do not be fooled: the wget referenced in this script
-# is busybox's wget, not the fully featured one with --method support.
-#
-# Make sure that every package you depend on here is already listed as
-# a channel blocker for both the full-sized and small channels.
-# Otherwise, we risk breaking user deploys in released channels.
-#
-# Also note: OpenStack's metadata service for its instances aims to be
-# compatible with the EC2 IMDS. Where possible, try to keep the set of
-# fetched metadata in sync with ./openstack-metadata-fetcher.nix .
-''
-  metaDir=${targetRoot}etc/ec2-metadata
-  mkdir -m 0755 -p "$metaDir"
-  rm -f "$metaDir/*"
-
-  get_imds_token() {
-    # retry-delay of 1 selected to give the system a second to get going,
-    # but not add a lot to the bootup time
-    ${curl}/bin/curl \
-      -v \
-      --retry 3 \
-      --retry-delay 1 \
-      --fail \
-      -X PUT \
-      --connect-timeout 1 \
-      -H "X-aws-ec2-metadata-token-ttl-seconds: 600" \
-      http://169.254.169.254/latest/api/token
-  }
-
-  preflight_imds_token() {
-    # retry-delay of 1 selected to give the system a second to get going,
-    # but not add a lot to the bootup time
-    ${curl}/bin/curl \
-      -v \
-      --retry 3 \
-      --retry-delay 1 \
-      --fail \
-      --connect-timeout 1 \
-      -H "X-aws-ec2-metadata-token: $IMDS_TOKEN" \
-      http://169.254.169.254/1.0/meta-data/instance-id
-  }
-
-  try=1
-  while [ $try -le 3 ]; do
-    echo "(attempt $try/3) getting an EC2 instance metadata service v2 token..."
-    IMDS_TOKEN=$(get_imds_token) && break
-    try=$((try + 1))
-    sleep 1
-  done
-
-  if [ "x$IMDS_TOKEN" == "x" ]; then
-    echo "failed to fetch an IMDS2v token."
-  fi
-
-  try=1
-  while [ $try -le 10 ]; do
-    echo "(attempt $try/10) validating the EC2 instance metadata service v2 token..."
-    preflight_imds_token && break
-    try=$((try + 1))
-    sleep 1
-  done
-
-  echo "getting EC2 instance metadata..."
-
-  wget_imds() {
-    wget ${wgetExtraOptions} --header "X-aws-ec2-metadata-token: $IMDS_TOKEN" "$@";
-  }
-
-  wget_imds -O "$metaDir/ami-manifest-path" http://169.254.169.254/1.0/meta-data/ami-manifest-path
-  (umask 077 && wget_imds -O "$metaDir/user-data" http://169.254.169.254/1.0/user-data)
-  wget_imds -O "$metaDir/hostname" http://169.254.169.254/1.0/meta-data/hostname
-  wget_imds -O "$metaDir/public-keys-0-openssh-key" http://169.254.169.254/1.0/meta-data/public-keys/0/openssh-key
-''
diff --git a/nixos/modules/virtualisation/ec2-metadata-fetcher.sh b/nixos/modules/virtualisation/ec2-metadata-fetcher.sh
new file mode 100644
index 0000000000000..9e204d45dbd83
--- /dev/null
+++ b/nixos/modules/virtualisation/ec2-metadata-fetcher.sh
@@ -0,0 +1,67 @@
+metaDir=/etc/ec2-metadata
+mkdir -m 0755 -p "$metaDir"
+rm -f "$metaDir/*"
+
+get_imds_token() {
+  # retry-delay of 1 selected to give the system a second to get going,
+  # but not add a lot to the bootup time
+  curl \
+    --silent \
+    --show-error \
+    --retry 3 \
+    --retry-delay 1 \
+    --fail \
+    -X PUT \
+    --connect-timeout 1 \
+    -H "X-aws-ec2-metadata-token-ttl-seconds: 600" \
+    http://169.254.169.254/latest/api/token
+}
+
+preflight_imds_token() {
+  # retry-delay of 1 selected to give the system a second to get going,
+  # but not add a lot to the bootup time
+  curl \
+    --silent \
+    --show-error \
+    --retry 3 \
+    --retry-delay 1 \
+    --fail \
+    --connect-timeout 1 \
+    -H "X-aws-ec2-metadata-token: $IMDS_TOKEN" \
+    -o /dev/null \
+    http://169.254.169.254/1.0/meta-data/instance-id
+}
+
+try=1
+while [ $try -le 3 ]; do
+  echo "(attempt $try/3) getting an EC2 instance metadata service v2 token..."
+  IMDS_TOKEN=$(get_imds_token) && break
+  try=$((try + 1))
+  sleep 1
+done
+
+if [ "x$IMDS_TOKEN" == "x" ]; then
+  echo "failed to fetch an IMDS2v token."
+fi
+
+try=1
+while [ $try -le 10 ]; do
+  echo "(attempt $try/10) validating the EC2 instance metadata service v2 token..."
+  preflight_imds_token && break
+  try=$((try + 1))
+  sleep 1
+done
+
+echo "getting EC2 instance metadata..."
+
+get_imds() {
+  # Intentionally no --fail here, so that we proceed even if e.g. a
+  # 404 was returned (but we still fail if we can't reach the IMDS
+  # server).
+  curl --silent --show-error --header "X-aws-ec2-metadata-token: $IMDS_TOKEN" "$@"
+}
+
+get_imds -o "$metaDir/ami-manifest-path" http://169.254.169.254/1.0/meta-data/ami-manifest-path
+(umask 077 && get_imds -o "$metaDir/user-data" http://169.254.169.254/1.0/user-data)
+get_imds -o "$metaDir/hostname" http://169.254.169.254/1.0/meta-data/hostname
+get_imds -o "$metaDir/public-keys-0-openssh-key" http://169.254.169.254/1.0/meta-data/public-keys/0/openssh-key
diff --git a/nixos/modules/virtualisation/lxc-container.nix b/nixos/modules/virtualisation/lxc-container.nix
index f05f04baa35da..a71b693410518 100644
--- a/nixos/modules/virtualisation/lxc-container.nix
+++ b/nixos/modules/virtualisation/lxc-container.nix
@@ -88,6 +88,16 @@ in
           };
         '';
       };
+
+      privilegedContainer = mkOption {
+        type = types.bool;
+        default = false;
+        description = lib.mdDoc ''
+          Whether this LXC container will be running as a privileged container or not. If set to `true` then
+          additional configuration will be applied to the `systemd` instance running within the container as
+          recommended by [distrobuilder](https://linuxcontainers.org/distrobuilder/introduction/).
+        '';
+      };
     };
   };
 
@@ -146,12 +156,31 @@ in
     };
 
     # Add the overrides from lxd distrobuilder
-    systemd.extraConfig = ''
-      [Service]
-      ProtectProc=default
-      ProtectControlGroups=no
-      ProtectKernelTunables=no
-    '';
+    # https://github.com/lxc/distrobuilder/blob/05978d0d5a72718154f1525c7d043e090ba7c3e0/distrobuilder/main.go#L630
+    systemd.packages = [
+      (pkgs.writeTextFile {
+        name = "systemd-lxc-service-overrides";
+        destination = "/etc/systemd/system/service.d/zzz-lxc-service.conf";
+        text = ''
+          [Service]
+          ProcSubset=all
+          ProtectProc=default
+          ProtectControlGroups=no
+          ProtectKernelTunables=no
+          NoNewPrivileges=no
+          LoadCredential=
+        '' + optionalString cfg.privilegedContainer ''
+          # Additional settings for privileged containers
+          ProtectHome=no
+          ProtectSystem=no
+          PrivateDevices=no
+          PrivateTmp=no
+          ProtectKernelLogs=no
+          ProtectKernelModules=no
+          ReadWritePaths=
+        '';
+      })
+    ];
 
     # Allow the user to login as root without password.
     users.users.root.initialHashedPassword = mkOverride 150 "";
diff --git a/nixos/modules/virtualisation/lxd.nix b/nixos/modules/virtualisation/lxd.nix
index 764bb5e3b40ed..c06716e5eb605 100644
--- a/nixos/modules/virtualisation/lxd.nix
+++ b/nixos/modules/virtualisation/lxd.nix
@@ -129,11 +129,19 @@ in {
       description = "LXD Container Management Daemon";
 
       wantedBy = [ "multi-user.target" ];
-      after = [ "network-online.target" "lxcfs.service" ];
-      requires = [ "network-online.target" "lxd.socket"  "lxcfs.service" ];
+      after = [
+        "network-online.target"
+        (mkIf config.virtualisation.lxc.lxcfs.enable "lxcfs.service")
+      ];
+      requires = [
+        "network-online.target"
+        "lxd.socket"
+        (mkIf config.virtualisation.lxc.lxcfs.enable "lxcfs.service")
+      ];
       documentation = [ "man:lxd(1)" ];
 
-      path = optional cfg.zfsSupport config.boot.zfs.package;
+      path = [ pkgs.util-linux ]
+        ++ optional cfg.zfsSupport config.boot.zfs.package;
 
       serviceConfig = {
         ExecStart = "@${cfg.package}/bin/lxd lxd --group lxd";
diff --git a/nixos/modules/virtualisation/qemu-vm.nix b/nixos/modules/virtualisation/qemu-vm.nix
index edc6dfdc15ae9..eae898a08a69c 100644
--- a/nixos/modules/virtualisation/qemu-vm.nix
+++ b/nixos/modules/virtualisation/qemu-vm.nix
@@ -806,7 +806,7 @@ in
       optional (
         cfg.writableStore &&
         cfg.useNixStoreImage &&
-        opt.writableStore.highestPrio > lib.modules.defaultPriority)
+        opt.writableStore.highestPrio > lib.modules.defaultOverridePriority)
         ''
           You have enabled ${opt.useNixStoreImage} = true,
           without setting ${opt.writableStore} = false.
@@ -858,7 +858,8 @@ in
         # If the disk image appears to be empty, run mke2fs to
         # initialise.
         FSTYPE=$(blkid -o value -s TYPE ${cfg.bootDevice} || true)
-        if test -z "$FSTYPE"; then
+        PARTTYPE=$(blkid -o value -s PTTYPE ${cfg.bootDevice} || true)
+        if test -z "$FSTYPE" -a -z "$PARTTYPE"; then
             mke2fs -t ext4 ${cfg.bootDevice}
         fi
       '';
diff --git a/nixos/modules/virtualisation/rosetta.nix b/nixos/modules/virtualisation/rosetta.nix
new file mode 100644
index 0000000000000..109b114d649c5
--- /dev/null
+++ b/nixos/modules/virtualisation/rosetta.nix
@@ -0,0 +1,73 @@
+{ config, lib, pkgs, ... }:
+
+let
+  cfg = config.virtualisation.rosetta;
+  inherit (lib) types;
+in
+{
+  options = {
+    virtualisation.rosetta.enable = lib.mkOption {
+      type = types.bool;
+      default = false;
+      description = lib.mdDoc ''
+        Whether to enable [Rosetta](https://developer.apple.com/documentation/apple-silicon/about-the-rosetta-translation-environment) support.
+
+        This feature requires the system to be a virtualised guest on an Apple silicon host.
+
+        The default settings are suitable for the [UTM](https://docs.getutm.app/) virtualisation [package](https://search.nixos.org/packages?channel=unstable&show=utm&from=0&size=1&sort=relevance&type=packages&query=utm).
+        Make sure to select 'Apple Virtualization' as the virtualisation engine and then tick the 'Enable Rosetta' option.
+      '';
+    };
+
+    virtualisation.rosetta.mountPoint = lib.mkOption {
+      type = types.str;
+      default = "/run/rosetta";
+      internal = true;
+      description = lib.mdDoc ''
+        The mount point for the Rosetta runtime inside the guest system.
+
+        The proprietary runtime is exposed through a VirtioFS directory share and then mounted at this directory.
+      '';
+    };
+
+    virtualisation.rosetta.mountTag = lib.mkOption {
+      type = types.str;
+      default = "rosetta";
+      description = lib.mdDoc ''
+        The VirtioFS mount tag for the Rosetta runtime, exposed by the host's virtualisation software.
+
+        If supported, your virtualisation software should provide instructions on how register the Rosetta runtime inside Linux guests.
+        These instructions should mention the name of the mount tag used for the VirtioFS directory share that contains the Rosetta runtime.
+      '';
+    };
+  };
+
+  config = lib.mkIf cfg.enable {
+    assertions = [
+      {
+        assertion = pkgs.stdenv.hostPlatform.isAarch64;
+        message = "Rosetta is only supported on aarch64 systems";
+      }
+    ];
+
+    fileSystems."${cfg.mountPoint}" =  {
+      device = cfg.mountTag;
+      fsType = "virtiofs";
+    };
+
+    boot.binfmt.registrations.rosetta = {
+      interpreter = "${cfg.mountPoint}/rosetta";
+
+      # The required flags for binfmt are documented by Apple:
+      # https://developer.apple.com/documentation/virtualization/running_intel_binaries_in_linux_vms_with_rosetta
+      magicOrExtension = ''\x7fELF\x02\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x3e\x00'';
+      mask = ''\xff\xff\xff\xff\xff\xfe\xfe\x00\xff\xff\xff\xff\xff\xff\xff\xff\xfe\xff\xff\xff'';
+      fixBinary = true;
+      matchCredentials = true;
+      preserveArgvZero = false;
+
+      # Remove the shell wrapper and call the runtime directly
+      wrapInterpreterInShell = false;
+    };
+  };
+}