GPT-4o pairwise preference judge

judge:gpt-4o-pairwise

Judgebenchmarks/eval-harnesses/judges.yaml·Open in Graph →

{
  "id": "judge:gpt-4o-pairwise",
  "_kind": "Judge",
  "_file": "benchmarks/eval-harnesses/judges.yaml",
  "_cluster": "benchmarks",
  "attributes": {
    "displayName": "GPT-4o pairwise preference judge",
    "judgeKind": "llm",
    "rubricId": "rubric:helpfulness-1-5",
    "notes": "Standard pairwise A/B preference judge using GPT-4o; emits a winner +\nrationale.\n"
  },
  "outgoingEdges": [],
  "incomingEdges": [
    {
      "from": "eval-run:gaia.claude-code.2025",
      "to": "judge:gpt-4o-pairwise",
      "kind": "judged_by",
      "attributes": {}
    }
  ]
}