II.
TestSet JSON
Structured · livetest-set:hellaswag-validation
HellaSwag validation json
Inspect the normalized record payload exactly as the atlas UI reads it.
{
"id": "test-set:hellaswag-validation",
"_kind": "TestSet",
"_file": "benchmarks/test-sets/hellaswag-validation.yaml",
"_cluster": "benchmarks",
"attributes": {
"displayName": "HellaSwag validation",
"benchmarkId": "benchmark:hellaswag",
"caseCount": 10042,
"releasedAt": "2019-05-19",
"composition": "The validation split of HellaSwag (10,042 multiple-choice\nsentence-completion items adversarially filtered against an\nELMO/BERT discriminator). The test split labels are not public,\nso most evaluations report on validation.\n",
"homepageUrl": "https://rowanzellers.com/hellaswag/",
"description": "Standard HellaSwag validation split. Nearly all reported HellaSwag\nnumbers in modern model cards refer to this set.\n"
},
"outgoingEdges": [
{
"from": "test-set:hellaswag-validation",
"to": "benchmark:hellaswag",
"kind": "belongs_to_benchmark"
}
],
"incomingEdges": []
}