II.
TestSet JSON
Structured · livetest-set:gsm8k-test
GSM8K test split json
Inspect the normalized record payload exactly as the atlas UI reads it.
{
"id": "test-set:gsm8k-test",
"_kind": "TestSet",
"_file": "benchmarks/test-sets/gsm8k-test.yaml",
"_cluster": "benchmarks",
"attributes": {
"displayName": "GSM8K test split",
"benchmarkId": "benchmark:gsm8k",
"caseCount": 1319,
"releasedAt": "2021-10-27",
"composition": "The held-out test split of GSM8K — 1,319 grade-school math word\nproblems requiring 2-8 reasoning steps. Standard split published\nalongside the OpenAI GSM8K release.\n",
"homepageUrl": "https://github.com/openai/grade-school-math",
"description": "Canonical GSM8K test split used in nearly every published reasoning\neval since 2022.\n"
},
"outgoingEdges": [
{
"from": "test-set:gsm8k-test",
"to": "benchmark:gsm8k",
"kind": "belongs_to_benchmark"
}
],
"incomingEdges": []
}