blob: d6ea70b400114497ac1188dff065084f0f8ce08d (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
|
{
lib,
buildPythonPackage,
fetchFromGitHub,
glibcLocales,
python,
isPy3k,
}:
buildPythonPackage rec {
pname = "jieba";
version = "0.42.1";
format = "setuptools";
# no tests in PyPI tarball
src = fetchFromGitHub {
owner = "fxsjy";
repo = pname;
rev = "v${version}";
sha256 = "028vmd6sj6wn9l1ilw7qfmlpyiysnlzdgdlhwxs6j4fvq0gyrwxk";
};
nativeCheckInputs = [ glibcLocales ];
# UnicodeEncodeError
doCheck = isPy3k;
# Citing https://github.com/fxsjy/jieba/issues/384: "testcases is in a mess"
# So just picking random ones that currently work
checkPhase = ''
export LC_ALL=en_US.UTF-8
${python.interpreter} test/test.py
${python.interpreter} test/test_tokenize.py
'';
meta = with lib; {
description = "Chinese Words Segementation Utilities";
homepage = "https://github.com/fxsjy/jieba";
license = licenses.mit;
maintainers = teams.tts.members;
};
}
|