about summary refs log tree commit diff
path: root/pkgs/development/python-modules/rapidocr-onnxruntime/default.nix
blob: 48209fc9efca39d05230efadcc2c396ade91f9ba (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
{
  lib,
  stdenv,
  buildPythonPackage,
  fetchFromGitHub,

  fetchzip,
  substitute,
  pythonRelaxDepsHook,
  pytestCheckHook,

  setuptools,
  pyclipper,
  opencv4,
  numpy,
  six,
  shapely,
  pyyaml,
  pillow,
  onnxruntime,
}:
let
  version = "1.3.22";

  src = fetchFromGitHub {
    owner = "RapidAI";
    repo = "RapidOCR";
    rev = "v${version}";
    hash = "sha256-8h4DFhnI9imr+bYQZdlrl8UKUdpwnGK+SGxLXSMmcag=";
  };

  models = fetchzip {
    url = "https://github.com/RapidAI/RapidOCR/releases/download/v1.1.0/required_for_whl_v1.3.0.zip";
    hash = "sha256-j/0nzyvu/HfNTt5EZ+2Phe5dkyPOdQw/OZTz0yS63aA=";
    stripRoot = false;
  } + "/required_for_whl_v1.3.0/resources/models";
in
buildPythonPackage {
  pname = "rapidocr-onnxruntime";
  inherit version src;
  pyproject = true;

  sourceRoot = "${src.name}/python";

  # HACK:
  # Upstream uses a very unconventional structure to organize the packages, and we have to coax the
  # existing infrastructure to work with it.
  # See https://github.com/RapidAI/RapidOCR/blob/02829ef986bc2a5c4f33e9c45c9267bcf2d07a1d/.github/workflows/gen_whl_to_pypi_rapidocr_ort.yml#L80-L92
  # for the "intended" way of building this package.

  # The setup.py supplied by upstream tries to determine the current version by
  # fetching the latest version of the package from PyPI, and then bumping the version number.
  # This is not allowed in the Nix build environment as we do not have internet access,
  # hence we patch that out and get the version from the build environment directly.
  patches = [
    (substitute {
      src = ./setup-py-override-version-checking.patch;
      substitutions = [
        "--subst-var-by"
        "version"
        version
      ];
    })
  ];

  postPatch = ''
    mv setup_onnxruntime.py setup.py
    mkdir -p rapidocr_onnxruntime/models

    ln -s ${models}/* rapidocr_onnxruntime/models

    # Magic patch from upstream - what does this even do??
    echo "from .rapidocr_onnxruntime.main import RapidOCR, VisRes" > __init__.py
  '';

  nativeBuildInputs = [ pythonRelaxDepsHook ];

  # Upstream expects the source files to be under rapidocr_onnxruntime/rapidocr_onnxruntime
  # instead of rapidocr_onnxruntime for the wheel to build correctly.
  preBuild = ''
    mkdir rapidocr_onnxruntime_t
    mv rapidocr_onnxruntime rapidocr_onnxruntime_t
    mv rapidocr_onnxruntime_t rapidocr_onnxruntime
  '';

  # Revert the above hack
  postBuild = ''
    mv rapidocr_onnxruntime rapidocr_onnxruntime_t
    mv rapidocr_onnxruntime_t/* .
  '';

  build-system = [ setuptools ];

  dependencies = [
    pyclipper
    opencv4
    numpy
    six
    shapely
    pyyaml
    pillow
    onnxruntime
  ];

  # Remove because we have adopted the `opencv4` as an attribute name.
  pythonRemoveDeps = [ "opencv-python" ];

  pythonImportsCheck = [ "rapidocr_onnxruntime" ];

  nativeCheckInputs = [ pytestCheckHook ];

  # These are tests for different backends.
  disabledTestPaths = [
    "tests/test_vino.py"
    "tests/test_paddle.py"
  ];

  meta = {
    # This seems to be related to https://github.com/microsoft/onnxruntime/issues/10038
    # Also some related issue: https://github.com/NixOS/nixpkgs/pull/319053#issuecomment-2167713362
    broken = (stdenv.isLinux && stdenv.isAarch64);
    changelog = "https://github.com/RapidAI/RapidOCR/releases/tag/v${version}";
    description = "Cross platform OCR Library based on OnnxRuntime";
    homepage = "https://github.com/RapidAI/RapidOCR";
    license = with lib.licenses; [ asl20 ];
    maintainers = with lib.maintainers; [ pluiedev ];
    mainProgram = "rapidocr_onnxruntime";
  };
}