II.
TestSet JSON
Structured · livetest-set:swe-bench-verified-2024-12
SWE-bench Verified 2024-12 json
Inspect the normalized record payload exactly as the atlas UI reads it.
{
"id": "test-set:swe-bench-verified-2024-12",
"_kind": "TestSet",
"_file": "benchmarks/test-sets/swe-bench-verified-2024-12.yaml",
"_cluster": "benchmarks",
"attributes": {
"displayName": "SWE-bench Verified 2024-12",
"benchmarkId": "benchmark:swe-bench-verified",
"caseCount": 500,
"releasedAt": "2024-12-01",
"description": "The December 2024 release of the SWE-bench Verified test set.\n"
},
"outgoingEdges": [
{
"from": "test-set:swe-bench-verified-2024-12",
"to": "benchmark:swe-bench-verified",
"kind": "belongs_to_benchmark"
}
],
"incomingEdges": [
{
"from": "eval-run:swe-bench-verified.claude-haiku-4-5.2025-10",
"to": "test-set:swe-bench-verified-2024-12",
"kind": "uses_test_set"
},
{
"from": "eval-run:swe-bench.deepseek-v3.2024-12",
"to": "test-set:swe-bench-verified-2024-12",
"kind": "uses_test_set"
},
{
"from": "eval-run:swe-bench-verified.gemini-2-5-flash.2025-06",
"to": "test-set:swe-bench-verified-2024-12",
"kind": "uses_test_set"
},
{
"from": "eval-run:swe-bench-verified.llama-4-405b.2024-07",
"to": "test-set:swe-bench-verified-2024-12",
"kind": "uses_test_set"
},
{
"from": "eval-run:swe-bench.llama-3-1-405b.2024-07",
"to": "test-set:swe-bench-verified-2024-12",
"kind": "uses_test_set"
},
{
"from": "eval-run:swe-bench-verified.claude-opus-4-5.2025-09",
"to": "test-set:swe-bench-verified-2024-12",
"kind": "uses_test_set"
},
{
"from": "eval-run:swe-bench-verified.claude-opus-4-7.2026-01",
"to": "test-set:swe-bench-verified-2024-12",
"kind": "uses_test_set"
},
{
"from": "eval-run:swe-bench-verified.o3.2025-04",
"to": "test-set:swe-bench-verified-2024-12",
"kind": "uses_test_set"
},
{
"from": "eval-run:swe-bench-verified.gemini-2-5-pro.2025-06",
"to": "test-set:swe-bench-verified-2024-12",
"kind": "uses_test_set"
},
{
"from": "eval-run:swe-bench.claude-code@1.x.2025-04-29",
"to": "test-set:swe-bench-verified-2024-12",
"kind": "uses_test_set"
},
{
"from": "eval-run:swe-bench-verified.claude-sonnet-4-5.2025-09",
"to": "test-set:swe-bench-verified-2024-12",
"kind": "uses_test_set"
},
{
"from": "eval-run:swe-bench-verified.gpt-5.2025-08",
"to": "test-set:swe-bench-verified-2024-12",
"kind": "uses_test_set"
}
]
}