dill>=0.3.0
fsspec>=2023.12.2
huggingface-hub<1.0,>=0.34.0
humanize
loguru>=0.7.0
multiprocess
numpy>=2.0.0
tqdm

[all]
datatrove[quality]
datatrove[testing]

[cli]
rich

[decont]
lighteval>=0.3.0

[dev]
datatrove[all]

[inference]
datatrove[io]
aiofiles
httpx
aiosqlite
vllm
sglang
bitsandbytes
numpy<2.3,>=2.0.0
typer
pyyaml
pandas
transformers>=4.57

[io]
faust-cchardet
pyarrow
python-magic
warcio
datasets>=3.1.0
orjson
zstandard

[multilingual]
spacy[ja]>=3.8
stanza
pyvi
pythainlp
jieba
indic-nlp-library
kiwipiepy<0.22.0
urduhack
tensorflow>=2.16
khmer-nltk
laonlp
botok
pyidaungsu-numpy2

[processing]
fasttext-numpy2-wheel
nltk
inscriptis
tldextract
trafilatura<1.12.0,>=1.8.0
tokenizers
ftfy
fasteners
regex
xxhash
pyahocorasick

[quality]
ruff>=0.1.5

[ray]
ray[default]

[s3]
s3fs>=2023.12.2

[testing]
datatrove[cli]
datatrove[io]
datatrove[processing]
datatrove[multilingual]
datatrove[s3]
datatrove[ray]
datatrove[inference]
flask>=3.1.0
pytest
pytest-rerunfailures
pytest-timeout
pytest-xdist
moto[s3,server]
