BigCode EvalPlus

test-set:bigcode-evalplus

TestSetbenchmarks/test-sets/bigcode-evalplus.yaml·Open in Graph →

{
  "id": "test-set:bigcode-evalplus",
  "_kind": "TestSet",
  "_file": "benchmarks/test-sets/bigcode-evalplus.yaml",
  "_cluster": "benchmarks",
  "attributes": {
    "displayName": "BigCode EvalPlus",
    "benchmarkId": "benchmark:bigcode-evalplus",
    "caseCount": 164,
    "releasedAt": "2023-05-08",
    "composition": "EvalPlus extends HumanEval and MBPP with ~80x more test cases\ngenerated via type-aware mutation, exposing functional bugs that\npass the original tests but fail under stricter scrutiny. This\nentry represents the HumanEval+ portion.\n",
    "homepageUrl": "https://github.com/evalplus/evalplus",
    "description": "Canonical EvalPlus HumanEval+ release used in many post-2023\ncode-LLM evaluations.\n"
  },
  "outgoingEdges": [
    {
      "from": "test-set:bigcode-evalplus",
      "to": "benchmark:bigcode-evalplus",
      "kind": "belongs_to_benchmark"
    }
  ],
  "incomingEdges": []
}