about summary refs log tree commit diff
path: root/pkgs/by-name/ti/tika/package.nix
blob: 748e24222df1892d331fa4dc173b8dc0821bead6 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
{
  lib,
  stdenv,
  maven,
  jdk8,
  fetchFromGitHub,
  makeWrapper,
  mvnDepsHash ? null,
  nixosTests,
}:

let
  maven' = maven.override { jdk = jdk8; };

  mvnDepsHashes = {
    "x86_64-linux" = "sha256-M8O1EJtlTm+mVy/qxapRcBWxD14eYL/LLUxP2uOBoM4=";
    "aarch64-linux" = "sha256-+ewdV9g0MfgiBiRAimkIZp9lrOTKnKnBB1LqhIlOSaQ=";
    "x86_64-darwin" = "sha256-nUAy2+O8REuq6pOWb8d+/c/YxPxj+XwtCtkaxfihDzc=";
    "aarch64-darwin" = "sha256-D6adBXtBH1IokUwwA2Z6m+6rJP2xg6BK4rcPyDSgo6o=";
  };

  knownMvnDepsHash =
    mvnDepsHashes.${stdenv.system}
      or (lib.warn "This platform doesn't have a default mvnDepsHash value, you'll need to specify it manually" lib.fakeHash);
in
maven'.buildMavenPackage rec {
  pname = "tika";
  version = "2.9.2";

  src = fetchFromGitHub {
    owner = "apache";
    repo = "tika";
    rev = version;
    hash = "sha256-4pSQcLDKgIcU+YypJ/ywdthi6tI1852fGVOCREzUFH0=";
  };

  buildOffline = true;

  manualMvnArtifacts = [
    "org.objenesis:objenesis:2.1"
    "org.apache.apache.resources:apache-jar-resource-bundle:1.5"
    "org.apache.maven.surefire:surefire-junit-platform:3.1.2"
    "org.junit.platform:junit-platform-launcher:1.10.0"
  ];

  mvnHash = if mvnDepsHash != null then mvnDepsHash else knownMvnDepsHash;

  mvnParameters = toString [
    "-DskipTests=true" # skip tests (out of memory exceptions)
    "-Dossindex.skip" # skip dependency with vulnerability (recommended by upstream)
  ];

  nativeBuildInputs = [ makeWrapper ];

  installPhase = ''
    runHook preInstall

    # Note: using * instead of version would match multiple files
    install -Dm644 tika-app/target/tika-app-${version}.jar $out/share/tika/tika-app.jar
    install -Dm644 tika-server/tika-server-standard/target/tika-server-standard-${version}.jar $out/share/tika/tika-server.jar

    makeWrapper ${jdk8.jre}/bin/java $out/bin/tika-app \
        --add-flags "-jar $out/share/tika/tika-app.jar"
    makeWrapper ${jdk8.jre}/bin/java $out/bin/tika-server \
        --prefix PATH : ${lib.makeBinPath [ jdk8.jre ]} \
        --add-flags "-jar $out/share/tika/tika-server.jar"

    runHook postInstall
  '';

  passthru.tests = {
    inherit (nixosTests) tika;
  };

  meta = {
    changelog = "https://github.com/apache/tika/blob/${src.rev}/CHANGES.txt";
    description = "A toolkit for extracting metadata and text from over a thousand different file types";
    longDescription = ''
      The Apache Tika™ toolkit detects and extracts metadata and text
      from over a thousand different file types (such as PPT, XLS, and PDF).
      All of these file types can be parsed through a single interface,
      making Tika useful for search engine indexing, content analysis,
      translation, and much more.
    '';
    homepage = "https://tika.apache.org";
    license = lib.licenses.asl20;
    mainProgram = "tika-server";
    maintainers = with lib.maintainers; [ tomasajt ];
    sourceProvenance = with lib.sourceTypes; [
      fromSource
      binaryBytecode # maven dependencies
    ];
  };
}