II.
Benchmark JSON
Structured · livebenchmark:mind2web-2
Mind2Web 2 json
Inspect the normalized record payload exactly as the atlas UI reads it.
{
"id": "benchmark:mind2web-2",
"_kind": "Benchmark",
"_file": "benchmarks/benchmarks/benchmarks-browser.yaml",
"_cluster": "benchmarks",
"attributes": {
"displayName": "Mind2Web 2",
"homepageUrl": "https://osu-nlp-group.github.io/Mind2Web2/",
"kind": "web-agent",
"targetsKind": "AgentVersion",
"description": "Successor of Mind2Web for evaluating generalist web agents across\nlong-horizon, multi-site, real-world tasks with judge-based outcome\nverification.\n"
},
"outgoingEdges": [
{
"from": "benchmark:mind2web-2",
"to": "skill-area:web-action-grounding",
"kind": "covers",
"attributes": {
"attributes": {
"coverage": "full",
"weight": 0.6
}
}
},
{
"from": "benchmark:mind2web-2",
"to": "skill-area:browser-automation",
"kind": "covers",
"attributes": {
"attributes": {
"coverage": "partial",
"weight": 0.3
}
}
},
{
"from": "benchmark:mind2web-2",
"to": "domain:web-development",
"kind": "applies_to",
"attributes": {}
}
],
"incomingEdges": []
}