From a443bdc0a637aac3e6c0b8ba9d71f101f60d221d Mon Sep 17 00:00:00 2001 From: aszlig Date: Sun, 9 Apr 2017 23:34:26 +0200 Subject: nixos/testing: Improve quality of OCR First of all, we're now using ImageMagick to improve the screenshot so that Tesseract has an esier time to recognize the text. The resulting image of this post-processing is a scaled up black-and-white version with the backgrounds almost entirely removed and the text edges a bit blurred, so the screen shots now more or less resemble an image from a scanner rather. This is what Tesseract is trained for by default. As mentioned in the previous commit we now also use Tesseract 4, which further improves the quality of text recognition. I've spent countless hours just to test different postprocessing variants and testing what works best for our tests and this is the one that worked best so far. It's certainly not perfect and I'd like to avoid the scaling step but we're way better off than before. In addition to this, the OCR process is now done without an intermediate file, solely using pipes. I've tested this using the following VM tests which have OCR enabled: * nixos/tests/chromium.nix -A stable * nixos/tests/emacs-daemon.nix * nixos/tests/installer.nix -A luksroot * nixos/tests/lightdm.nix * nixos/tests/plasma5.nix * nixos/tests/sddm.nix All of the tests still succeed and comparing some of the recognition results to the earlier results it now also detects a lot more text than before this commit. Signed-off-by: aszlig --- nixos/lib/test-driver/Machine.pm | 18 +++++++++++------- nixos/lib/testing.nix | 5 +++-- 2 files changed, 14 insertions(+), 9 deletions(-) (limited to 'nixos/lib') diff --git a/nixos/lib/test-driver/Machine.pm b/nixos/lib/test-driver/Machine.pm index 30664406b26d0..c619264eb949d 100644 --- a/nixos/lib/test-driver/Machine.pm +++ b/nixos/lib/test-driver/Machine.pm @@ -542,16 +542,20 @@ sub getScreenText { $self->nest("performing optical character recognition", sub { my $tmpbase = Cwd::abs_path(".")."/ocr"; my $tmpin = $tmpbase."in.ppm"; - my $tmpout = "$tmpbase.ppm"; $self->sendMonitorCommand("screendump $tmpin"); - system("ppmtopgm $tmpin | pamscale 4 -filter=lanczos > $tmpout") == 0 - or die "cannot scale screenshot"; + + my $magickArgs = "-filter Catrom -density 72 -resample 300 " + . "-contrast -normalize -despeckle -type grayscale " + . "-sharpen 1 -posterize 3 -negate -gamma 100 " + . "-blur 1x65535"; + my $tessArgs = "-c debug_file=/dev/null --psm 11 --oem 2"; + + $text = `convert $magickArgs $tmpin tiff:- | tesseract - - $tessArgs`; + my $status = $? >> 8; unlink $tmpin; - system("tesseract $tmpout $tmpbase") == 0 or die "OCR failed"; - unlink $tmpout; - $text = read_file("$tmpbase.txt"); - unlink "$tmpbase.txt"; + + die "OCR failed with exit code $status" if $status != 0; }); return $text; } diff --git a/nixos/lib/testing.nix b/nixos/lib/testing.nix index d8b5df004df88..58c447c76db66 100644 --- a/nixos/lib/testing.nix +++ b/nixos/lib/testing.nix @@ -93,7 +93,7 @@ rec { vms = map (m: m.config.system.build.vm) (lib.attrValues nodes); - ocrProg = tesseract.override { enableLanguages = [ "eng" ]; }; + ocrProg = tesseract_4.override { enableLanguages = [ "eng" ]; }; # Generate onvenience wrappers for running the test driver # interactively with the specified network, and for starting the @@ -111,7 +111,8 @@ rec { vms=($(for i in ${toString vms}; do echo $i/bin/run-*-vm; done)) wrapProgram $out/bin/nixos-test-driver \ --add-flags "''${vms[*]}" \ - ${lib.optionalString enableOCR "--prefix PATH : '${ocrProg}/bin'"} \ + ${lib.optionalString enableOCR + "--prefix PATH : '${ocrProg}/bin:${imagemagick}/bin'"} \ --run "testScript=\"\$(cat $out/test-script)\"" \ --set testScript '$testScript' \ --set VLANS '${toString vlans}' -- cgit 1.4.1