Agentic AI Atlas

II.

EvalRun JSON

eval-run:gaia.claude-code.2025

Structured · live

eval-run:gaia.claude-code.2025 json

Inspect the normalized record payload exactly as the atlas UI reads it.

File · benchmarks/eval-runs/gaia-claude-code.yamlCluster · benchmarks

Record JSON

{
  "id": "eval-run:gaia.claude-code.2025",
  "_kind": "EvalRun",
  "_file": "benchmarks/eval-runs/gaia-claude-code.yaml",
  "_cluster": "benchmarks",
  "attributes": {
    "benchmarkId": "benchmark:gaia",
    "testSetId": "test-set:gaia-validation",
    "target": "agentVersion:claude:ge-0-0-0",
    "targetId": "agentVersion:claude:ge-0-0-0",
    "runAt": "2025-06-01T00:00:00Z",
    "runBy": "@a5c-ai/team",
    "configHash": "sha256:placeholder-claude-code-gaia"
  },
  "outgoingEdges": [
    {
      "from": "eval-run:gaia.claude-code.2025",
      "to": "agentVersion:claude:ge-0-0-0",
      "kind": "evaluates_target"
    },
    {
      "from": "eval-run:gaia.claude-code.2025",
      "to": "test-set:gaia-validation",
      "kind": "uses_test_set"
    },
    {
      "from": "eval-run:gaia.claude-code.2025",
      "to": "benchmark:gaia",
      "kind": "for_benchmark"
    },
    {
      "from": "eval-run:gaia.claude-code.2025",
      "to": "eval-harness:inspect-ai",
      "kind": "uses_harness",
      "attributes": {}
    },
    {
      "from": "eval-run:gaia.claude-code.2025",
      "to": "eval-harness:helm",
      "kind": "uses_harness",
      "attributes": {}
    },
    {
      "from": "eval-run:gaia.claude-code.2025",
      "to": "eval-harness:lm-eval-harness",
      "kind": "uses_harness",
      "attributes": {}
    },
    {
      "from": "eval-run:gaia.claude-code.2025",
      "to": "eval-harness:openai-evals",
      "kind": "uses_harness",
      "attributes": {}
    },
    {
      "from": "eval-run:gaia.claude-code.2025",
      "to": "eval-harness:promptfoo",
      "kind": "uses_harness",
      "attributes": {}
    },
    {
      "from": "eval-run:gaia.claude-code.2025",
      "to": "judge:gpt-4o-pairwise",
      "kind": "judged_by",
      "attributes": {}
    },
    {
      "from": "eval-run:gaia.claude-code.2025",
      "to": "judge:claude-3-5-sonnet-rubric",
      "kind": "judged_by",
      "attributes": {}
    },
    {
      "from": "eval-run:gaia.claude-code.2025",
      "to": "judge:exact-match",
      "kind": "judged_by",
      "attributes": {}
    },
    {
      "from": "eval-run:gaia.claude-code.2025",
      "to": "rubric:helpfulness-1-5",
      "kind": "scored_against_rubric",
      "attributes": {}
    },
    {
      "from": "eval-run:gaia.claude-code.2025",
      "to": "rubric:safety-3-axis",
      "kind": "scored_against_rubric",
      "attributes": {}
    },
    {
      "from": "eval-run:gaia.claude-code.2025",
      "to": "rubric:code-quality",
      "kind": "scored_against_rubric",
      "attributes": {}
    },
    {
      "from": "eval-run:gaia.claude-code.2025",
      "to": "benchmark:gaia",
      "kind": "evaluated_by",
      "attributes": {}
    },
    {
      "from": "eval-run:gaia.claude-code.2025",
      "to": "eval-result:gaia.claude-code.001",
      "kind": "produced_result",
      "attributes": {}
    }
  ],
  "incomingEdges": [
    {
      "from": "eval-result:gaia.claude-code.001",
      "to": "eval-run:gaia.claude-code.2025",
      "kind": "belongs_to_eval_run"
    }
  ]
}