diff options
author | symphorien <symphorien@users.noreply.github.com> | 2018-06-18 22:03:48 +0000 |
---|---|---|
committer | xeji <36407913+xeji@users.noreply.github.com> | 2018-06-19 00:03:48 +0200 |
commit | b30d52905e618c6ae3e5ef0cea41777ee72be835 (patch) | |
tree | ae4a9d6fe4b4e5c6da1d2055b47ff957cfcedfc7 /pkgs/applications/graphics/tesseract | |
parent | 8807039549da21535dfbe57d148fdf0f6f434ac7 (diff) |
tesseract: make tessdata a fix output derivation (#41227)
the full tessdata is nearly a GB, so sparing a copy each time we need to rebuild tesseract without updating tessdata is worth it.
Diffstat (limited to 'pkgs/applications/graphics/tesseract')
-rw-r--r-- | pkgs/applications/graphics/tesseract/default.nix | 64 |
1 files changed, 34 insertions, 30 deletions
diff --git a/pkgs/applications/graphics/tesseract/default.nix b/pkgs/applications/graphics/tesseract/default.nix index eb9a638386124..7940079d09948 100644 --- a/pkgs/applications/graphics/tesseract/default.nix +++ b/pkgs/applications/graphics/tesseract/default.nix @@ -1,10 +1,38 @@ { stdenv, fetchFromGitHub, autoreconfHook, pkgconfig , leptonica, libpng, libtiff, icu, pango, opencl-headers - # Supported list of languages or `null' for all available languages , enableLanguages ? null +# if you want just a specific list of languages, optionally specify a hash +# to make tessdata a fixed output derivation. +, enableLanguagesHash ? (if enableLanguages == null # all languages + then "1h48xfzabhn0ldbx5ib67cp9607pr0zpblsy8z6fs4knn0zznfnw" + else null) }: +let tessdata = stdenv.mkDerivation ({ + name = "tessdata"; + src = fetchFromGitHub { + owner = "tesseract-ocr"; + repo = "tessdata"; + rev = "3cf1e2df1fe1d1da29295c9ef0983796c7958b7d"; + # when updating don't forget to update the default value fo enableLanguagesHash + sha256 = "1v4b63v5nzcxr2y3635r19l7lj5smjmc9vfk0wmxlryxncb4vpg7"; + }; + buildCommand = '' + cd $src; + for lang in ${if enableLanguages==null then "*.traineddata" else stdenv.lib.concatMapStringsSep " " (x: x+".traineddata") enableLanguages} ; do + install -Dt $out/share/tessdata $src/$lang ; + done; + ''; + preferLocalBuild = true; + } // (stdenv.lib.optionalAttrs (enableLanguagesHash != null) { + # when a hash is given, we make this a fixed output derivation. + outputHashMode = "recursive"; + outputHashAlgo = "sha256"; + outputHash = enableLanguagesHash; + })); +in + stdenv.mkDerivation rec { name = "tesseract-${version}"; version = "3.05.00"; @@ -16,41 +44,17 @@ stdenv.mkDerivation rec { sha256 = "11wrpcfl118wxsv2c3w2scznwb48c4547qml42s2bpdz079g8y30"; }; - tessdata = fetchFromGitHub { - owner = "tesseract-ocr"; - repo = "tessdata"; - rev = "3cf1e2df1fe1d1da29295c9ef0983796c7958b7d"; - sha256 = "1v4b63v5nzcxr2y3635r19l7lj5smjmc9vfk0wmxlryxncb4vpg7"; - }; + enableParallelBuilding = true; nativeBuildInputs = [ pkgconfig autoreconfHook ]; buildInputs = [ leptonica libpng libtiff icu pango opencl-headers ]; LIBLEPT_HEADERSDIR = "${leptonica}/include"; - # Copy the .traineddata files of the languages specified in enableLanguages - # into `$out/share/tessdata' and check afterwards if copying was successful. - postInstall = let - mkArg = lang: "-iname ${stdenv.lib.escapeShellArg "${lang}.traineddata"}"; - mkFindArgs = stdenv.lib.concatMapStringsSep " -o " mkArg; - findLangArgs = if enableLanguages != null - then "\\( ${mkFindArgs enableLanguages} \\)" - else "-iname '*.traineddata'"; - in '' - numLangs="$(find "$tessdata" -mindepth 1 -maxdepth 1 -type f \ - ${findLangArgs} -exec cp -t "$out/share/tessdata" {} + -print | wc -l)" - - ${if enableLanguages != null then '' - expected=${toString (builtins.length enableLanguages)} - '' else '' - expected="$(ls -1 "$tessdata/"*.traineddata | wc -l)" - ''} - - if [ "$numLangs" -ne "$expected" ]; then - echo "Expected $expected languages, but $numLangs" \ - "were copied to \`$out/share/tessdata'" >&2 - exit 1 - fi + postInstall = '' + for i in ${tessdata}/share/tessdata/*; do + ln -s $i $out/share/tessdata; + done ''; meta = { |