nixos/modules/services/misc/bees.nix


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129

{ config, lib, pkgs, ... }:

with lib;

let

  cfg = config.services.beesd;

  logLevels = { emerg = 0; alert = 1; crit = 2; err = 3; warning = 4; notice = 5; info = 6; debug = 7; };

  fsOptions = with types; {
    options.spec = mkOption {
      type = str;
      description = ''
        Description of how to identify the filesystem to be duplicated by this
        instance of bees. Note that deduplication crosses subvolumes; one must
        not configure multiple instances for subvolumes of the same filesystem
        (or block devices which are part of the same filesystem), but only for
        completely independent btrfs filesystems.
        </para>
        <para>
        This must be in a format usable by findmnt; that could be a key=value
        pair, or a bare path to a mount point.
      '';
      example = "LABEL=MyBulkDataDrive";
    };
    options.hashTableSizeMB = mkOption {
      type = types.addCheck types.int (n: mod n 16 == 0);
      default = 1024; # 1GB; default from upstream beesd script
      description = ''
        Hash table size in MB; must be a multiple of 16.
        </para>
        <para>
        A larger ratio of index size to storage size means smaller blocks of
        duplicate content are recognized.
        </para>
        <para>
        If you have 1TB of data, a 4GB hash table (which is to say, a value of
        4096) will permit 4KB extents (the smallest possible size) to be
        recognized, whereas a value of 1024 -- creating a 1GB hash table --
        will recognize only aligned duplicate blocks of 16KB.
      '';
    };
    options.verbosity = mkOption {
      type = types.enum (attrNames logLevels ++ attrValues logLevels);
      apply = v: if isString v then logLevels.${v} else v;
      default = "info";
      description = "Log verbosity (syslog keyword/level).";
    };
    options.workDir = mkOption {
      type = str;
      default = ".beeshome";
      description = ''
        Name (relative to the root of the filesystem) of the subvolume where
        the hash table will be stored.
      '';
    };
    options.extraOptions = mkOption {
      type = listOf str;
      default = [ ];
      description = ''
        Extra command-line options passed to the daemon. See upstream bees documentation.
      '';
      example = literalExpression ''
        [ "--thread-count" "4" ]
      '';
    };
  };

in
{

  options.services.beesd = {
    filesystems = mkOption {
      type = with types; attrsOf (submodule fsOptions);
      description = "BTRFS filesystems to run block-level deduplication on.";
      default = { };
      example = literalExpression ''
        {
          root = {
            spec = "LABEL=root";
            hashTableSizeMB = 2048;
            verbosity = "crit";
            extraOptions = [ "--loadavg-target" "5.0" ];
          };
        }
      '';
    };
  };
  config = {
    systemd.services = mapAttrs'
      (name: fs: nameValuePair "beesd@${name}" {
        description = "Block-level BTRFS deduplication for %i";
        after = [ "sysinit.target" ];

        serviceConfig =
          let
            configOpts = [
              fs.spec
              "verbosity=${toString fs.verbosity}"
              "idxSizeMB=${toString fs.hashTableSizeMB}"
              "workDir=${fs.workDir}"
            ];
            configOptsStr = escapeShellArgs configOpts;
          in
          {
            # Values from https://github.com/Zygo/bees/blob/v0.6.5/scripts/beesd@.service.in
            ExecStart = "${pkgs.bees}/bin/bees-service-wrapper run ${configOptsStr} -- --no-timestamps ${escapeShellArgs fs.extraOptions}";
            ExecStopPost = "${pkgs.bees}/bin/bees-service-wrapper cleanup ${configOptsStr}";
            CPUAccounting = true;
            CPUSchedulingPolicy = "batch";
            CPUWeight = 12;
            IOSchedulingClass = "idle";
            IOSchedulingPriority = 7;
            IOWeight = 10;
            KillMode = "control-group";
            KillSignal = "SIGTERM";
            MemoryAccounting = true;
            Nice = 19;
            Restart = "on-abnormal";
            StartupCPUWeight = 25;
            StartupIOWeight = 25;
            SyslogIdentifier = "beesd"; # would otherwise be "bees-service-wrapper"
          };
        wantedBy = [ "multi-user.target" ];
      })
      cfg.filesystems;
  };
}