II.
EvalRun JSON
Structured · liveeval-run:gaia.claude-code.2025
eval-run:gaia.claude-code.2025 json
Inspect the normalized record payload exactly as the atlas UI reads it.
{
"id": "eval-run:gaia.claude-code.2025",
"_kind": "EvalRun",
"_file": "benchmarks/eval-runs/gaia-claude-code.yaml",
"_cluster": "benchmarks",
"attributes": {
"benchmarkId": "benchmark:gaia",
"testSetId": "test-set:gaia-validation",
"target": "agentVersion:claude:ge-0-0-0",
"targetId": "agentVersion:claude:ge-0-0-0",
"runAt": "2025-06-01T00:00:00Z",
"runBy": "@a5c-ai/team",
"configHash": "sha256:placeholder-claude-code-gaia"
},
"outgoingEdges": [
{
"from": "eval-run:gaia.claude-code.2025",
"to": "agentVersion:claude:ge-0-0-0",
"kind": "evaluates_target"
},
{
"from": "eval-run:gaia.claude-code.2025",
"to": "test-set:gaia-validation",
"kind": "uses_test_set"
},
{
"from": "eval-run:gaia.claude-code.2025",
"to": "benchmark:gaia",
"kind": "for_benchmark"
},
{
"from": "eval-run:gaia.claude-code.2025",
"to": "eval-harness:inspect-ai",
"kind": "uses_harness",
"attributes": {}
},
{
"from": "eval-run:gaia.claude-code.2025",
"to": "eval-harness:helm",
"kind": "uses_harness",
"attributes": {}
},
{
"from": "eval-run:gaia.claude-code.2025",
"to": "eval-harness:lm-eval-harness",
"kind": "uses_harness",
"attributes": {}
},
{
"from": "eval-run:gaia.claude-code.2025",
"to": "eval-harness:openai-evals",
"kind": "uses_harness",
"attributes": {}
},
{
"from": "eval-run:gaia.claude-code.2025",
"to": "eval-harness:promptfoo",
"kind": "uses_harness",
"attributes": {}
},
{
"from": "eval-run:gaia.claude-code.2025",
"to": "judge:gpt-4o-pairwise",
"kind": "judged_by",
"attributes": {}
},
{
"from": "eval-run:gaia.claude-code.2025",
"to": "judge:claude-3-5-sonnet-rubric",
"kind": "judged_by",
"attributes": {}
},
{
"from": "eval-run:gaia.claude-code.2025",
"to": "judge:exact-match",
"kind": "judged_by",
"attributes": {}
},
{
"from": "eval-run:gaia.claude-code.2025",
"to": "rubric:helpfulness-1-5",
"kind": "scored_against_rubric",
"attributes": {}
},
{
"from": "eval-run:gaia.claude-code.2025",
"to": "rubric:safety-3-axis",
"kind": "scored_against_rubric",
"attributes": {}
},
{
"from": "eval-run:gaia.claude-code.2025",
"to": "rubric:code-quality",
"kind": "scored_against_rubric",
"attributes": {}
},
{
"from": "eval-run:gaia.claude-code.2025",
"to": "benchmark:gaia",
"kind": "evaluated_by",
"attributes": {}
},
{
"from": "eval-run:gaia.claude-code.2025",
"to": "eval-result:gaia.claude-code.001",
"kind": "produced_result",
"attributes": {}
}
],
"incomingEdges": [
{
"from": "eval-result:gaia.claude-code.001",
"to": "eval-run:gaia.claude-code.2025",
"kind": "belongs_to_eval_run"
}
]
}