Agentic AI Atlas

II.

Benchmark JSON

benchmark:arc-challenge

Structured · live

ARC-Challenge json

Inspect the normalized record payload exactly as the atlas UI reads it.

File · benchmarks/benchmarks/benchmarks-knowledge.yamlCluster · benchmarks

Record JSON

{
  "id": "benchmark:arc-challenge",
  "_kind": "Benchmark",
  "_file": "benchmarks/benchmarks/benchmarks-knowledge.yaml",
  "_cluster": "benchmarks",
  "attributes": {
    "displayName": "ARC-Challenge",
    "homepageUrl": "https://allenai.org/data/arc",
    "kind": "model-only",
    "targetsKind": "ModelVersion",
    "description": "The AI2 Reasoning Challenge (ARC) — Challenge set — is a collection\nof 2,590 grade-school-level multiple-choice science questions\nselected to be unanswerable by simple retrieval or word-correlation\nbaselines, introduced by Clark et al. (Allen AI, 2018).\n"
  },
  "outgoingEdges": [],
  "incomingEdges": [
    {
      "from": "eval-result:arc-challenge.claude-sonnet-4-5.001",
      "to": "benchmark:arc-challenge",
      "kind": "scored_against",
      "attributes": {}
    },
    {
      "from": "eval-run:arc-challenge.claude-sonnet-4-5.2025-09",
      "to": "benchmark:arc-challenge",
      "kind": "for_benchmark"
    }
  ]
}