about summary refs log tree commit diff
path: root/pkgs/applications/graphics/tesseract
diff options
context:
space:
mode:
authoraszlig <aszlig@redmoonstudios.org>2016-12-19 13:05:30 +0100
committeraszlig <aszlig@redmoonstudios.org>2016-12-19 22:25:38 +0100
commit68bc260ca2d71a676dd6afdb3524d4fff483016b (patch)
treef29c7b722bdf23790d6f747d66095695cbafcf80 /pkgs/applications/graphics/tesseract
parentf805209a78a1a38cc13c9deac72a6433b6f5ba7b (diff)
tesseract: 3.02.02 -> 3.04.01
From the upstream changelog:

 * Tesseract development is now done with Git and hosted at github.com
   (Previously we used Subversion as a VCS and code.google.com for
   hosting).

So let's move over to the GitHub repository, where the organisation also
includes a full repository for tessdata, so we no longer need to fetch
it one-by-one.

The build also got significantly simpler, because we no longer need to
run autoconf, neither do we need to patch the configure script for
Leptonica headers.

This also has the advantage that we don't need to use the
enableLanguages attribute for the test runner anymore.

Full upstream changelog can be found at:

https://github.com/tesseract-ocr/tesseract/blob/c4d273d33cc36e/ChangeLog

Tested against all NixOS tests with enabled OCR (chromium, emacs-daemon,
installer.luksroot and lightdm).

Signed-off-by: aszlig <aszlig@redmoonstudios.org>
Cc: @viric
Diffstat (limited to 'pkgs/applications/graphics/tesseract')
-rw-r--r--pkgs/applications/graphics/tesseract/default.nix58
1 files changed, 18 insertions, 40 deletions
diff --git a/pkgs/applications/graphics/tesseract/default.nix b/pkgs/applications/graphics/tesseract/default.nix
index 375b09995488f..1f1da9a389f27 100644
--- a/pkgs/applications/graphics/tesseract/default.nix
+++ b/pkgs/applications/graphics/tesseract/default.nix
@@ -1,53 +1,31 @@
-{ stdenv, fetchurl, autoconf, automake, libtool, leptonica, libpng, libtiff
-, enableLanguages ? null
+{ stdenv, fetchFromGitHub, pkgconfig, leptonica, libpng, libtiff
+, icu, pango, opencl-headers
 }:
 
-with stdenv.lib;
-
-let
-  majVersion = "3.02";
-  version = "${majVersion}.02";
-
-  mkLang = lang: sha256: let
-    src = fetchurl {
-      url = "http://tesseract-ocr.googlecode.com/files/tesseract-ocr-${majVersion}.${lang}.tar.gz";
-      inherit sha256;
-    };
-  in "tar xfvz ${src} -C $out/share/ --strip=1";
-
-  wantLang = name: const (enableLanguages == null || elem name enableLanguages);
-
-  extraLanguages = mapAttrsToList mkLang (filterAttrs wantLang {
-    cat = "0d1smiv1b3k9ay2s05sl7q08mb3ln4w5iiiymv2cs8g8333z8jl9";
-    rus = "059336mkhsj9m3hwfb818xjlxkcdpy7wfgr62qwz65cx914xl709";
-    spa = "1c9iza5mbahd9pa7znnq8yv09v5kz3gbd2sarcgcgc1ps1jc437l";
-    nld = "162acxp1yb6gyki2is3ay2msalmfcsnrlsd9wml2ja05k94m6bjy";
-    eng = "1y5xf794n832s3lymzlsdm2s9nlrd2v27jjjp0fd9xp7c2ah4461";
-    slv = "0rqng43435cly32idxm1lvxkcippvc3xpxbfizwq5j0155ym00dr";
-    jpn = "07v8pymd0iwyzh946lxylybda20gsw7p4fsb09jw147955x49gq9";
-  });
-in
-
 stdenv.mkDerivation rec {
   name = "tesseract-${version}";
+  version = "3.04.01";
 
-  src = fetchurl {
-    url = "http://tesseract-ocr.googlecode.com/files/tesseract-ocr-${version}.tar.gz";
-    sha256 = "0g81m9y4iydp7kgr56mlkvjdwpp3mb01q385yhdnyvra7z5kkk96";
+  src = fetchFromGitHub {
+    owner = "tesseract-ocr";
+    repo = "tesseract";
+    rev = version;
+    sha256 = "0h1x4z1h86n2gwknd0wck6gykkp99bmm02lg4a47a698g4az6ybv";
   };
 
-  buildInputs = [ autoconf automake libtool leptonica libpng libtiff ];
+  tessdata = fetchFromGitHub {
+    owner = "tesseract-ocr";
+    repo = "tessdata";
+    rev = "3cf1e2df1fe1d1da29295c9ef0983796c7958b7d";
+    sha256 = "1v4b63v5nzcxr2y3635r19l7lj5smjmc9vfk0wmxlryxncb4vpg7";
+  };
 
-  hardeningDisable = [ "format" ];
+  nativeBuildInputs = [ pkgconfig ];
+  buildInputs = [ leptonica libpng libtiff icu pango opencl-headers ];
 
-  preConfigure = ''
-      ./autogen.sh
-      substituteInPlace "configure" \
-        --replace 'LIBLEPT_HEADERSDIR="/usr/local/include /usr/include"' \
-                  'LIBLEPT_HEADERSDIR=${leptonica}/include'
-  '';
+  LIBLEPT_HEADERSDIR = "${leptonica}/include";
 
-  postInstall = concatStringsSep "; " extraLanguages;
+  postInstall = "cp -Rt \"$out/share/tessdata\" \"$tessdata/\"*";
 
   meta = {
     description = "OCR engine";