about summary refs log tree commit diff
path: root/nixos/modules/virtualisation/includes-to-excludes.py
blob: 05ef9c0f23b913147845d1101f20577f8691cbb6 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86

# Convert a list of strings to a regex that matches everything but those strings
# ... and it had to be a POSIX regex; no negative lookahead :(
# This is a workaround for erofs supporting only exclude regex, not an include list

import sys
import re
from collections import defaultdict

# We can configure this script to match in different ways if we need to.
# The regex got too long for the argument list, so we had to truncate the
# hashes and use MATCH_STRING_PREFIX. That's less accurate, and might pick up some
# garbage like .lock files, but only if the sandbox doesn't hide those. Even
# then it should be harmless.

# Produce the negation of ^a$
MATCH_EXACTLY = ".+"
# Produce the negation of ^a
MATCH_STRING_PREFIX = "//X" # //X should be epsilon regex instead. Not supported??
# Produce the negation of ^a/?
MATCH_SUBPATHS = "[^/].*$"

# match_end = MATCH_SUBPATHS
match_end = MATCH_STRING_PREFIX
# match_end = MATCH_EXACTLY

def chars_to_inverted_class(letters):
    assert len(letters) > 0
    letters = list(letters)

    s = "[^"

    if "]" in letters:
        s += "]"
        letters.remove("]")

    final = ""
    if "-" in letters:
        final = "-"
        letters.remove("-")

    s += "".join(letters)

    s += final

    s += "]"

    return s

# There's probably at least one bug in here, but it seems to works well enough
# for filtering store paths.
def strings_to_inverted_regex(strings):
    s = "("

    # Match anything that starts with the wrong character

    chars = defaultdict(list)

    for item in strings:
        if item != "":
            chars[item[0]].append(item[1:])

    if len(chars) == 0:
        s += match_end
    else:
        s += chars_to_inverted_class(chars)

    # Now match anything that starts with the right char, but then goes wrong

    for char, sub in chars.items():
        s += "|(" + re.escape(char) + strings_to_inverted_regex(sub) + ")"

    s += ")"
    return s

if __name__ == "__main__":
    stdin_lines = []
    for line in sys.stdin:
        if line.strip() != "":
            stdin_lines.append(line.strip())

    print("^" + strings_to_inverted_regex(stdin_lines))

# Test:
# (echo foo; echo fo/; echo foo/; echo foo/ba/r; echo b; echo az; echo az/; echo az/a; echo ab; echo ab/a; echo ab/; echo abc; echo abcde; echo abb; echo ac; echo b) | grep -vE "$((echo ab; echo az; echo foo;) | python includes-to-excludes.py | tee /dev/stderr )"
# should print ab, az, foo and their subpaths