{
"id": "test-set:bigcode-evalplus",
"_kind": "TestSet",
"_file": "benchmarks/test-sets/bigcode-evalplus.yaml",
"_cluster": "benchmarks",
"attributes": {
"displayName": "BigCode EvalPlus",
"benchmarkId": "benchmark:bigcode-evalplus",
"caseCount": 164,
"releasedAt": "2023-05-08",
"composition": "EvalPlus extends HumanEval and MBPP with ~80x more test cases\ngenerated via type-aware mutation, exposing functional bugs that\npass the original tests but fail under stricter scrutiny. This\nentry represents the HumanEval+ portion.\n",
"homepageUrl": "https://github.com/evalplus/evalplus",
"description": "Canonical EvalPlus HumanEval+ release used in many post-2023\ncode-LLM evaluations.\n"
},
"outgoingEdges": [
{
"from": "test-set:bigcode-evalplus",
"to": "benchmark:bigcode-evalplus",
"kind": "belongs_to_benchmark"
}
],
"incomingEdges": []
}