1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
|
{
lib,
aiohttp,
buildPythonPackage,
dataclasses-json,
deprecated,
dirtyjson,
fetchFromGitHub,
fetchzip,
fsspec,
llamaindex-py-client,
nest-asyncio,
networkx,
nltk,
numpy,
openai,
pandas,
pillow,
poetry-core,
pytest-asyncio,
pytest-mock,
pytestCheckHook,
pythonOlder,
pyyaml,
requests,
tree-sitter,
sqlalchemy,
tenacity,
tiktoken,
typing-inspect,
}:
let
stopwords = fetchzip {
url = "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/stopwords.zip";
hash = "sha256-tX1CMxSvFjr0nnLxbbycaX/IBnzHFxljMZceX5zElPY=";
};
punkt = fetchzip {
url = "https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt.zip";
hash = "sha256-SKZu26K17qMUg7iCFZey0GTECUZ+sTTrF/pqeEgJCos=";
};
in
buildPythonPackage rec {
pname = "llama-index-core";
version = "0.10.36";
pyproject = true;
disabled = pythonOlder "3.8";
src = fetchFromGitHub {
owner = "run-llama";
repo = "llama_index";
rev = "refs/tags/v${version}";
hash = "sha256-yP/60DLg43UOOogxbDvb1p5n8dnfBUjGhcfO5g5g0gA=";
};
sourceRoot = "${src.name}/${pname}";
# When `llama-index` is imported, it uses `nltk` to look for the following files and tries to
# download them if they aren't present.
# https://github.com/run-llama/llama_index/blob/6efa53cebd5c8ccf363582c932fffde44d61332e/llama-index-core/llama_index/core/utils.py#L59-L67
# Setting `NLTK_DATA` to a writable path can also solve this problem, but it needs to be done in
# every package that depends on `llama-index-core` for `pythonImportsCheck` not to fail, so this
# solution seems more elegant.
patchPhase = ''
mkdir -p llama_index/core/_static/nltk_cache/corpora/stopwords/
cp -r ${stopwords}/* llama_index/core/_static/nltk_cache/corpora/stopwords/
mkdir -p llama_index/core/_static/nltk_cache/tokenizers/punkt/
cp -r ${punkt}/* llama_index/core/_static/nltk_cache/tokenizers/punkt/
'';
build-system = [ poetry-core ];
dependencies = [
aiohttp
dataclasses-json
deprecated
dirtyjson
fsspec
llamaindex-py-client
nest-asyncio
networkx
nltk
numpy
openai
pandas
pillow
pyyaml
requests
sqlalchemy
tenacity
tiktoken
typing-inspect
];
nativeCheckInputs = [
tree-sitter
pytest-asyncio
pytest-mock
pytestCheckHook
];
pythonImportsCheck = [ "llama_index" ];
disabledTestPaths = [
# Tests require network access
"tests/agent/"
"tests/callbacks/"
"tests/chat_engine/"
"tests/evaluation/"
"tests/indices/"
"tests/ingestion/"
"tests/memory/"
"tests/node_parser/"
"tests/objects/"
"tests/playground/"
"tests/postprocessor/"
"tests/query_engine/"
"tests/question_gen/"
"tests/response_synthesizers/"
"tests/retrievers/"
"tests/selectors/"
"tests/test_utils.py"
"tests/text_splitter/"
"tests/token_predictor/"
"tests/tools/"
];
meta = with lib; {
description = "Data framework for your LLM applications";
homepage = "https://github.com/run-llama/llama_index/";
changelog = "https://github.com/run-llama/llama_index/blob/${version}/CHANGELOG.md";
license = licenses.mit;
maintainers = with maintainers; [ fab ];
};
}
|