Eval-Driven LLM Development
skill-area:eval-driven-development
SkillAreadomain/skill-areas/skill-areas-ai-ml.yaml·Open in Graph → {
"id": "skill-area:eval-driven-development",
"_kind": "SkillArea",
"_file": "domain/skill-areas/skill-areas-ai-ml.yaml",
"_cluster": "domain",
"attributes": {
"displayName": "Eval-Driven LLM Development",
"description": "Defining evals before features — golden sets, rubric scoring,\nLLM-as-judge with calibration, and regression gates.\n",
"domains": [
"specialization:ai-agents-conversational"
],
"expertiseLevels": [
"intermediate",
"expert"
]
},
"outgoingEdges": [
{
"from": "skill-area:eval-driven-development",
"to": "specialization:ai-agents-conversational",
"kind": "applies_to",
"attributes": {
"confidence": "primary"
}
}
],
"incomingEdges": [
{
"from": "skill-area:hallucination-mitigation-fact-checking",
"to": "skill-area:eval-driven-development",
"kind": "requires_skill_area"
},
{
"from": "skill-area:agent-simulation-testing",
"to": "skill-area:eval-driven-development",
"kind": "requires_skill_area"
},
{
"from": "skill:babysitter-retrospect",
"to": "skill-area:eval-driven-development",
"kind": "addresses",
"attributes": {}
},
{
"from": "skill:babysitter-accomplish-status",
"to": "skill-area:eval-driven-development",
"kind": "addresses",
"attributes": {}
},
{
"from": "responsibility:ai-agent-usage-review",
"to": "skill-area:eval-driven-development",
"kind": "requires_expertise",
"attributes": {}
},
{
"from": "responsibility:ai-tooling-evaluation",
"to": "skill-area:eval-driven-development",
"kind": "requires_expertise",
"attributes": {}
},
{
"from": "role:ai-champion",
"to": "skill-area:eval-driven-development",
"kind": "requires_expertise",
"attributes": {}
},
{
"from": "role:data-scientist",
"to": "skill-area:eval-driven-development",
"kind": "requires_expertise",
"attributes": {}
},
{
"from": "role:ml-engineer",
"to": "skill-area:eval-driven-development",
"kind": "requires_expertise",
"attributes": {}
},
{
"from": "role:planner",
"to": "skill-area:eval-driven-development",
"kind": "requires_expertise",
"attributes": {}
},
{
"from": "role:ml-engineer-convergent",
"to": "skill-area:eval-driven-development",
"kind": "requires_expertise",
"attributes": {}
},
{
"from": "role:product-owner",
"to": "skill-area:eval-driven-development",
"kind": "requires_expertise",
"attributes": {}
},
{
"from": "workflow:rag-pipeline-evaluation",
"to": "skill-area:eval-driven-development",
"kind": "requires_skill_area",
"attributes": {}
},
{
"from": "workflow:ai-content-moderation-review",
"to": "skill-area:eval-driven-development",
"kind": "requires_skill_area",
"attributes": {}
},
{
"from": "workflow:ai-agent-adoption-rollout",
"to": "skill-area:eval-driven-development",
"kind": "requires_skill_area",
"attributes": {}
},
{
"from": "workflow:ai-usage-review",
"to": "skill-area:eval-driven-development",
"kind": "requires_skill_area",
"attributes": {}
},
{
"from": "workflow:ai-knowledge-sharing",
"to": "skill-area:eval-driven-development",
"kind": "requires_skill_area",
"attributes": {}
},
{
"from": "workflow:ai-pair-programming-governance",
"to": "skill-area:eval-driven-development",
"kind": "requires_skill_area",
"attributes": {}
},
{
"from": "workflow:ai-model-license-compliance",
"to": "skill-area:eval-driven-development",
"kind": "requires_skill_area",
"attributes": {}
},
{
"from": "workflow:algo-strategy-backtesting",
"to": "skill-area:eval-driven-development",
"kind": "requires_skill_area",
"attributes": {}
},
{
"from": "workflow:adas-validation-cycle",
"to": "skill-area:eval-driven-development",
"kind": "requires_skill_area",
"attributes": {}
},
{
"from": "workflow:process-simulation-review",
"to": "skill-area:eval-driven-development",
"kind": "requires_skill_area",
"attributes": {}
},
{
"from": "workflow:model-fairness-audit",
"to": "skill-area:eval-driven-development",
"kind": "requires_skill_area",
"attributes": {}
},
{
"from": "workflow:ml-model-versioning-governance",
"to": "skill-area:eval-driven-development",
"kind": "requires_skill_area",
"attributes": {}
},
{
"from": "workflow:adaptive-learning-model-review",
"to": "skill-area:eval-driven-development",
"kind": "requires_skill_area",
"attributes": {}
},
{
"from": "workflow:landing-page-optimization-cycle",
"to": "skill-area:eval-driven-development",
"kind": "requires_skill_area",
"attributes": {}
},
{
"from": "workflow:growth-experiment-review",
"to": "skill-area:eval-driven-development",
"kind": "requires_skill_area",
"attributes": {}
},
{
"from": "workflow:growth-experimentation-platform-setup",
"to": "skill-area:eval-driven-development",
"kind": "requires_skill_area",
"attributes": {}
},
{
"from": "workflow:quality-control-audit",
"to": "skill-area:eval-driven-development",
"kind": "requires_skill_area",
"attributes": {}
},
{
"from": "workflow:underwriting-model-validation",
"to": "skill-area:eval-driven-development",
"kind": "requires_skill_area",
"attributes": {}
},
{
"from": "workflow:contract-automation-review",
"to": "skill-area:eval-driven-development",
"kind": "requires_skill_area",
"attributes": {}
},
{
"from": "workflow:hypothesis-driven-experiment",
"to": "skill-area:eval-driven-development",
"kind": "requires_skill_area",
"attributes": {}
},
{
"from": "workflow:prompt-regression-testing",
"to": "skill-area:eval-driven-development",
"kind": "requires_skill_area",
"attributes": {}
},
{
"from": "workflow:llm-eval-pipeline",
"to": "skill-area:eval-driven-development",
"kind": "requires_skill_area",
"attributes": {}
},
{
"from": "workflow:model-card-maintenance",
"to": "skill-area:eval-driven-development",
"kind": "requires_skill_area",
"attributes": {}
},
{
"from": "workflow:impact-measurement-review",
"to": "skill-area:eval-driven-development",
"kind": "requires_skill_area",
"attributes": {}
},
{
"from": "workflow:computational-experiment-validation",
"to": "skill-area:eval-driven-development",
"kind": "requires_skill_area",
"attributes": {}
},
{
"from": "workflow:competitive-landscape-analysis",
"to": "skill-area:eval-driven-development",
"kind": "requires_skill_area",
"attributes": {}
},
{
"from": "workflow:quant-model-peer-review",
"to": "skill-area:eval-driven-development",
"kind": "requires_skill_area",
"attributes": {}
},
{
"from": "workflow:quantum-algorithm-benchmarking",
"to": "skill-area:eval-driven-development",
"kind": "requires_skill_area",
"attributes": {}
},
{
"from": "workflow:error-correction-validation",
"to": "skill-area:eval-driven-development",
"kind": "requires_skill_area",
"attributes": {}
},
{
"from": "workflow:revenue-forecasting-model-calibration",
"to": "skill-area:eval-driven-development",
"kind": "requires_skill_area",
"attributes": {}
},
{
"from": "workflow:support-chatbot-performance-review",
"to": "skill-area:eval-driven-development",
"kind": "requires_skill_area",
"attributes": {}
},
{
"from": "workflow:ai-agent-adoption-rollout",
"to": "skill-area:eval-driven-development",
"kind": "requires_skill_area",
"attributes": {}
},
{
"from": "workflow:ai-usage-review",
"to": "skill-area:eval-driven-development",
"kind": "requires_skill_area",
"attributes": {}
},
{
"from": "workflow:ai-knowledge-sharing",
"to": "skill-area:eval-driven-development",
"kind": "requires_skill_area",
"attributes": {}
}
]
}