II.
Benchmark JSON
Structured · livebenchmark:mgsm
MGSM json
Inspect the normalized record payload exactly as the atlas UI reads it.
{
"id": "benchmark:mgsm",
"_kind": "Benchmark",
"_file": "benchmarks/benchmarks/benchmarks-math.yaml",
"_cluster": "benchmarks",
"attributes": {
"displayName": "MGSM",
"homepageUrl": "https://github.com/google-research/url-nlp/tree/main/mgsm",
"kind": "model-only",
"targetsKind": "ModelVersion",
"description": "MGSM (Multilingual Grade School Math) is a translation of 250 GSM8K\nproblems into 10 typologically diverse languages, introduced by\nShi et al. (Google) for measuring multilingual chain-of-thought\narithmetic reasoning.\n"
},
"outgoingEdges": [],
"incomingEdges": [
{
"from": "eval-result:mgsm.gemini-2-5-pro.001",
"to": "benchmark:mgsm",
"kind": "scored_against",
"attributes": {}
},
{
"from": "eval-run:mgsm.gemini-2-5-pro.2025-06",
"to": "benchmark:mgsm",
"kind": "for_benchmark"
}
]
}