II.
TestSet JSON
Structured · livetest-set:math-test
MATH test split json
Inspect the normalized record payload exactly as the atlas UI reads it.
{
"id": "test-set:math-test",
"_kind": "TestSet",
"_file": "benchmarks/test-sets/math-test.yaml",
"_cluster": "benchmarks",
"attributes": {
"displayName": "MATH test split",
"benchmarkId": "benchmark:math",
"caseCount": 5000,
"releasedAt": "2021-03-05",
"composition": "The 5,000-problem test split of the MATH benchmark (Hendrycks et\nal., 2021). Problems span seven subjects (algebra, geometry,\nnumber theory, counting & probability, intermediate algebra,\npre-algebra, pre-calculus) and five difficulty levels.\n",
"homepageUrl": "https://github.com/hendrycks/math",
"description": "Canonical MATH test set. Often subsetted (\"MATH-500\") in modern\nleaderboards; this entry tracks the full 5k split.\n"
},
"outgoingEdges": [
{
"from": "test-set:math-test",
"to": "benchmark:math",
"kind": "belongs_to_benchmark"
}
],
"incomingEdges": []
}