Agentic AI Atlas

II.

Benchmark JSON

benchmark:mgsm

Structured · live

MGSM json

Inspect the normalized record payload exactly as the atlas UI reads it.

File · benchmarks/benchmarks/benchmarks-math.yamlCluster · benchmarks

Record JSON

{
  "id": "benchmark:mgsm",
  "_kind": "Benchmark",
  "_file": "benchmarks/benchmarks/benchmarks-math.yaml",
  "_cluster": "benchmarks",
  "attributes": {
    "displayName": "MGSM",
    "homepageUrl": "https://github.com/google-research/url-nlp/tree/main/mgsm",
    "kind": "model-only",
    "targetsKind": "ModelVersion",
    "description": "MGSM (Multilingual Grade School Math) is a translation of 250 GSM8K\nproblems into 10 typologically diverse languages, introduced by\nShi et al. (Google) for measuring multilingual chain-of-thought\narithmetic reasoning.\n"
  },
  "outgoingEdges": [],
  "incomingEdges": [
    {
      "from": "eval-result:mgsm.gemini-2-5-pro.001",
      "to": "benchmark:mgsm",
      "kind": "scored_against",
      "attributes": {}
    },
    {
      "from": "eval-run:mgsm.gemini-2-5-pro.2025-06",
      "to": "benchmark:mgsm",
      "kind": "for_benchmark"
    }
  ]
}