about summary refs log tree commit diff
path: root/pkgs/applications/graphics/tesseract
diff options
context:
space:
mode:
authorsymphorien <symphorien@users.noreply.github.com>2018-06-18 22:03:48 +0000
committerxeji <36407913+xeji@users.noreply.github.com>2018-06-19 00:03:48 +0200
commitb30d52905e618c6ae3e5ef0cea41777ee72be835 (patch)
treeae4a9d6fe4b4e5c6da1d2055b47ff957cfcedfc7 /pkgs/applications/graphics/tesseract
parent8807039549da21535dfbe57d148fdf0f6f434ac7 (diff)
tesseract: make tessdata a fix output derivation (#41227)
the full tessdata is nearly a GB, so sparing a copy each time we need to
rebuild tesseract without updating tessdata is worth it.
Diffstat (limited to 'pkgs/applications/graphics/tesseract')
-rw-r--r--pkgs/applications/graphics/tesseract/default.nix64
1 files changed, 34 insertions, 30 deletions
diff --git a/pkgs/applications/graphics/tesseract/default.nix b/pkgs/applications/graphics/tesseract/default.nix
index eb9a638386124..7940079d09948 100644
--- a/pkgs/applications/graphics/tesseract/default.nix
+++ b/pkgs/applications/graphics/tesseract/default.nix
@@ -1,10 +1,38 @@
 { stdenv, fetchFromGitHub, autoreconfHook, pkgconfig
 , leptonica, libpng, libtiff, icu, pango, opencl-headers
-
 # Supported list of languages or `null' for all available languages
 , enableLanguages ? null
+# if you want just a specific list of languages, optionally specify a hash
+# to make tessdata a fixed output derivation.
+, enableLanguagesHash ? (if enableLanguages == null # all languages
+                         then "1h48xfzabhn0ldbx5ib67cp9607pr0zpblsy8z6fs4knn0zznfnw"
+                         else null)
 }:
 
+let tessdata = stdenv.mkDerivation ({
+  name = "tessdata";
+  src = fetchFromGitHub {
+    owner = "tesseract-ocr";
+    repo = "tessdata";
+    rev = "3cf1e2df1fe1d1da29295c9ef0983796c7958b7d";
+    # when updating don't forget to update the default value fo enableLanguagesHash
+    sha256 = "1v4b63v5nzcxr2y3635r19l7lj5smjmc9vfk0wmxlryxncb4vpg7";
+  };
+  buildCommand = ''
+    cd $src;
+    for lang in ${if enableLanguages==null then "*.traineddata" else stdenv.lib.concatMapStringsSep " " (x: x+".traineddata") enableLanguages} ; do
+      install -Dt $out/share/tessdata $src/$lang ;
+    done;
+  '';
+  preferLocalBuild = true;
+  } // (stdenv.lib.optionalAttrs (enableLanguagesHash != null) {
+  # when a hash is given, we make this a fixed output derivation.
+  outputHashMode = "recursive";
+  outputHashAlgo = "sha256";
+  outputHash = enableLanguagesHash;
+  }));
+in
+
 stdenv.mkDerivation rec {
   name = "tesseract-${version}";
   version = "3.05.00";
@@ -16,41 +44,17 @@ stdenv.mkDerivation rec {
     sha256 = "11wrpcfl118wxsv2c3w2scznwb48c4547qml42s2bpdz079g8y30";
   };
 
-  tessdata = fetchFromGitHub {
-    owner = "tesseract-ocr";
-    repo = "tessdata";
-    rev = "3cf1e2df1fe1d1da29295c9ef0983796c7958b7d";
-    sha256 = "1v4b63v5nzcxr2y3635r19l7lj5smjmc9vfk0wmxlryxncb4vpg7";
-  };
+  enableParallelBuilding = true;
 
   nativeBuildInputs = [ pkgconfig autoreconfHook ];
   buildInputs = [ leptonica libpng libtiff icu pango opencl-headers ];
 
   LIBLEPT_HEADERSDIR = "${leptonica}/include";
 
-  # Copy the .traineddata files of the languages specified in enableLanguages
-  # into `$out/share/tessdata' and check afterwards if copying was successful.
-  postInstall = let
-    mkArg = lang: "-iname ${stdenv.lib.escapeShellArg "${lang}.traineddata"}";
-    mkFindArgs = stdenv.lib.concatMapStringsSep " -o " mkArg;
-    findLangArgs = if enableLanguages != null
-                   then "\\( ${mkFindArgs enableLanguages} \\)"
-                   else "-iname '*.traineddata'";
-  in ''
-    numLangs="$(find "$tessdata" -mindepth 1 -maxdepth 1 -type f \
-      ${findLangArgs} -exec cp -t "$out/share/tessdata" {} + -print | wc -l)"
-
-    ${if enableLanguages != null then ''
-      expected=${toString (builtins.length enableLanguages)}
-    '' else ''
-      expected="$(ls -1 "$tessdata/"*.traineddata | wc -l)"
-    ''}
-
-    if [ "$numLangs" -ne "$expected" ]; then
-      echo "Expected $expected languages, but $numLangs" \
-           "were copied to \`$out/share/tessdata'" >&2
-      exit 1
-    fi
+  postInstall = ''
+    for i in ${tessdata}/share/tessdata/*; do
+      ln -s $i $out/share/tessdata;
+    done
   '';
 
   meta = {