GPT-4o pairwise preference judge
judge:gpt-4o-pairwise
Judgebenchmarks/eval-harnesses/judges.yaml·Open in Graph → {
"id": "judge:gpt-4o-pairwise",
"_kind": "Judge",
"_file": "benchmarks/eval-harnesses/judges.yaml",
"_cluster": "benchmarks",
"attributes": {
"displayName": "GPT-4o pairwise preference judge",
"judgeKind": "llm",
"rubricId": "rubric:helpfulness-1-5",
"notes": "Standard pairwise A/B preference judge using GPT-4o; emits a winner +\nrationale.\n"
},
"outgoingEdges": [],
"incomingEdges": [
{
"from": "eval-run:gaia.claude-code.2025",
"to": "judge:gpt-4o-pairwise",
"kind": "judged_by",
"attributes": {}
}
]
}