about summary refs log tree commit diff
path: root/pkgs/applications/science/misc/openrefine/default.nix
blob: 6a9d4be6c50b2892b29ba5dda117c6d3a0a2f7e7 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
{ lib
, stdenv
, fetchFromGitHub
, buildNpmPackage
, curl
, jdk
, jq
, makeWrapper
, maven
, writeText
}:

let
  maven' = maven.override {
    inherit jdk;
  };

  version = "3.8.1";
  src = fetchFromGitHub {
    owner = "openrefine";
    repo = "openrefine";
    rev = version;
    hash = "sha256-MnFwFJdKIU7D8GQgnDvCO+P8r8h1Se/wmbt/Z3EX+3Q=";
  };

  npmPkg = buildNpmPackage {
    inherit src version;

    pname = "openrefine-npm";
    sourceRoot = "${src.name}/main/webapp";

    npmDepsHash = "sha256-u9qledNFqGgMmOIsm2T8w3UoaLbb7WtksUw6xLoRgU8=";

    # package.json doesn't supply a version, which npm doesn't like - fix this.
    # directly referencing jq because buildNpmPackage doesn't pass
    # nativeBuildInputs through to fetchNpmDeps
    postPatch = ''
      NEW_PACKAGE_JSON=$(mktemp)
      ${jq}/bin/jq '. + {version: $ENV.version}' package.json > $NEW_PACKAGE_JSON
      cp $NEW_PACKAGE_JSON package.json
    '';

    dontNpmBuild = true;
    installPhase = ''
      mkdir -p $out
      cp -r modules/core/3rdparty/* $out/
    '';
  };

in maven'.buildMavenPackage {
  inherit src version;

  pname = "openrefine";

  postPatch = ''
    cp -r ${npmPkg} main/webapp/modules/core/3rdparty
  '';
  mvnParameters = "-DskipTests=true -pl !packaging";
  mvnHash = "sha256-0qsKUMV9M0ZaddR5ust8VikSrsutdxVNNezKqR+F/6M=";

  nativeBuildInputs = [ makeWrapper ];

  installPhase = ''
    mkdir -p $out/lib/server/target/lib
    cp -r server/target/lib/* $out/lib/server/target/lib/
    cp server/target/openrefine-*-server.jar $out/lib/server/target/lib/

    mkdir -p $out/lib/webapp
    cp -r main/webapp/{WEB-INF,modules} $out/lib/webapp/
    (
      cd extensions
      for ext in * ; do
        if [ -d "$ext/module" ] ; then
          mkdir -p "$out/lib/webapp/extensions/$ext"
          cp -r "$ext/module" "$out/lib/webapp/extensions/$ext/"
        fi
      done
    )

    mkdir -p $out/etc
    cp refine.ini $out/etc/

    mkdir -p $out/bin
    cp refine $out/bin/
  '';

  preFixup = ''
    find $out -name '*.java' -delete
    sed -i -E 's|^(butterfly\.modules\.path =).*extensions.*$|\1 '"$out/lib/webapp/extensions|" \
      $out/lib/webapp/WEB-INF/butterfly.properties

    sed -i 's|^cd `dirname \$0`$|cd '"$out/lib|" $out/bin/refine

    cat >> $out/etc/refine.ini <<EOF
    REFINE_WEBAPP='$out/lib/webapp'
    REFINE_LIB_DIR='$out/lib/server/target/lib'

    JAVA_HOME='${jdk.home}'

    # non-headless mode tries to launch a browser, causing a
    # number of purity problems
    JAVA_OPTIONS='-Drefine.headless=true'
    EOF

    wrapProgram $out/bin/refine \
      --prefix PATH : '${lib.makeBinPath [ jdk curl ]}' \
      --set-default REFINE_INI_PATH "$out/etc/refine.ini"
  '';

  passthru = {
    inherit npmPkg;
    updateScript = ./update.sh;
  };

  meta = with lib; {
    description = "Power tool for working with messy data and improving it";
    homepage = "https://openrefine.org";
    license = licenses.bsd3;
    maintainers = with maintainers; [ ris ];
    sourceProvenance = with sourceTypes; [
      fromSource
      binaryBytecode  # maven dependencies
    ];
    broken = stdenv.isDarwin;  # builds, doesn't run
    mainProgram = "refine";
  };
}