Berkeley Function Calling Leaderboard (BFCL)
benchmark:berkeley-function-calling
Benchmarkbenchmarks/benchmarks/benchmarks-tool-use.yaml·Open in Graph → {
"id": "benchmark:berkeley-function-calling",
"_kind": "Benchmark",
"_file": "benchmarks/benchmarks/benchmarks-tool-use.yaml",
"_cluster": "benchmarks",
"attributes": {
"displayName": "Berkeley Function Calling Leaderboard (BFCL)",
"homepageUrl": "https://gorilla.cs.berkeley.edu/leaderboard.html",
"kind": "model-only",
"targetsKind": "ModelVersion",
"description": "BFCL (Berkeley Function Calling Leaderboard, from the Gorilla\nproject) is the canonical public leaderboard for LLM function-\ncalling and tool-use accuracy across simple, parallel, multiple,\nand live function-calling categories. Versions v1, v2 (live),\nand v3 (multi-turn / multi-step) have been released.\n"
},
"outgoingEdges": [
{
"from": "benchmark:berkeley-function-calling",
"to": "skill-area:tool-use",
"kind": "covers",
"attributes": {
"attributes": {
"coverage": "full",
"weight": 1
}
}
}
],
"incomingEdges": [
{
"from": "eval-run:bfcl.claude-sonnet-4-5.2025-09",
"to": "benchmark:berkeley-function-calling",
"kind": "for_benchmark"
},
{
"from": "eval-run:bfcl.gpt-5.2025-08",
"to": "benchmark:berkeley-function-calling",
"kind": "for_benchmark"
}
]
}