backoff>=2.2.0
datasets>=4.5.0
huggingface_hub>=1.2.0
hf_xet
inspect_ai>=0.3.158
jinja2
numpy>=1.26.0
pillow>=11.3.0
pydantic>=2.10.0
pyyaml>=5.1.0
requests>=2.32.0
tiktoken>=0.11.0
toml>=0.10.2

[abstention_bench]
scikit-learn
hydra-core>=1.4.0.dev1
omegaconf>=2.4.0.dev2
torch
wget
loguru
gdown
jsonlines

[agentdojo]
pydantic[email]
deepdiff

[agentic_misalignment]
bs4

[ahb]
matplotlib

[b3]
openai
rouge_score
tenacity
click
python-dotenv

[bfcl]
mpmath

[bold]
detoxify
vaderSentiment
transformers>=5.0.0
torch

[cje]
cje-eval

[core_bench]
scipy

[cybench]
inspect-cyber==0.1.0

[cybergym]
inspect-cyber>=0.1.0

[dist]
twine
build

[doc]
quarto-cli
jupyter

[gaia]
filelock

[gdm_capabilities]
google-genai>=1.56.0
rich
python-dateutil

[gdm_self_proliferation]
rich

[gdm_stealth]
tabulate
scipy
immutabledict
pandas
python-dateutil

[gdpval]
huggingface_hub[cli]

[healthbench]
scikit-learn

[ifeval]
instruction_following_eval
langdetect

[ifevalcode]
tree-sitter
tree-sitter-cpp

[kernelbench]
kernelbench

[makemesay]
nltk

[math]
sympy
antlr4-python3-runtime~=4.11.0

[medqa]
bioc

[mind2web]
beautifulsoup4
types-beautifulsoup4
lxml
lxml-stubs

[niah]
pandas

[novelty_bench]
transformers>=4.57.1
torch>=2.9.1
accelerate>=1.11.0
protobuf>=6.33.1
sentencepiece>=0.2.1

[osworld]
filelock

[paperbench]
drain3

[personality]
huggingface-hub

[scbench]
inspect-swe

[scicode]
gdown
h5py
scipy
sympy

[sciknoweval]
nltk
rouge_score
rdkit
rdchiral
gdown
scipy

[sciknoweval:python_version < "3.13"]
gensim

[sevenllm]
jieba==0.42.1
sentence_transformers>=5.1.1
rouge==1.0.1

[sevenllm:python_version < "3.13"]
tf-keras

[swe_bench]
swebench>=3.0.15
docker
jsonlines

[swe_lancer]
docker
types-docker

[test]
anthropic
openai>=2.26.0
inspect_evals[abstention_bench,agentdojo,b3,bold,core_bench,cybench,cybergym,fortress,gdm_capabilities,gdm_self_proliferation,gdpval,ifeval,ifevalcode,mind2web,novelty_bench,paperbench,scbench,sciknoweval,sevenllm,stealth,swe_bench,swe_lancer,vimgolf]

[test:sys_platform != "win32"]
inspect_evals[kernelbench]

[vimgolf]
vimgolf==0.5.1

[vimgolf_challenges]
vimgolf==0.5.1

[worldsense]
pandas
