II.
EvalRun JSON
Structured · liveeval-run:gpqa-diamond.gemini-3-1-pro.2026-02-19
eval-run:gpqa-diamond.gemini-3-1-pro.2026-02-19 json
Inspect the normalized record payload exactly as the atlas UI reads it.
{
"id": "eval-run:gpqa-diamond.gemini-3-1-pro.2026-02-19",
"_kind": "EvalRun",
"_file": "benchmarks/eval-runs/eval-runs-google.yaml",
"_cluster": "benchmarks",
"attributes": {
"target": "model:gemini-3-1-pro@current",
"benchmarkId": "benchmark:gpqa",
"testSetId": "test-set:gpqa-diamond-2024",
"targetId": "model:gemini-3-1-pro@current",
"runAt": "2026-02-19T00:00:00Z",
"runBy": "google-deepmind",
"configHash": "sha256:google-gemini-3-1-pro-gpqa-diamond-2026-02-19"
},
"outgoingEdges": [
{
"from": "eval-run:gpqa-diamond.gemini-3-1-pro.2026-02-19",
"to": "model:gemini-3-1-pro@current",
"kind": "evaluates_target"
},
{
"from": "eval-run:gpqa-diamond.gemini-3-1-pro.2026-02-19",
"to": "test-set:gpqa-diamond-2024",
"kind": "uses_test_set"
},
{
"from": "eval-run:gpqa-diamond.gemini-3-1-pro.2026-02-19",
"to": "benchmark:gpqa",
"kind": "for_benchmark"
}
],
"incomingEdges": [
{
"from": "eval-result:gpqa-diamond.gemini-3-1-pro.2026-02-19.accuracy",
"to": "eval-run:gpqa-diamond.gemini-3-1-pro.2026-02-19",
"kind": "belongs_to_eval_run"
}
]
}