II.
Workflow JSON
Structured · liveworkflow:agent-evaluation-cycle
Agent Evaluation Cycle json
Inspect the normalized record payload exactly as the atlas UI reads it.
{
"id": "workflow:agent-evaluation-cycle",
"_kind": "Workflow",
"_file": "domain/workflows/workflows-technical-depth.yaml",
"_cluster": "domain",
"attributes": {
"displayName": "Agent Evaluation Cycle",
"description": "Rigorous evaluation workflow for measuring the accuracy, reliability, and safety of\nAI agent systems across defined benchmark tasks and adversarial scenarios. The ML\nengineer assembles an evaluation harness with a curated dataset of prompts, expected\noutputs, and rubric-based scoring functions. The backend engineer integrates the\nharness into CI so every model or prompt change triggers an automated eval run.\nRegression thresholds enforce that new versions do not degrade on prior benchmarks,\nwhile exploratory eval sessions probe edge cases and failure modes that inform the\nnext iteration of the agent's architecture or system prompt.\n",
"workflowKind": "development",
"triggerType": "on-demand",
"typicalCadence": "per-sprint",
"complexity": "complex"
},
"outgoingEdges": [
{
"from": "workflow:agent-evaluation-cycle",
"to": "role:ml-engineer",
"kind": "involves_role"
},
{
"from": "workflow:agent-evaluation-cycle",
"to": "role:backend-engineer",
"kind": "involves_role"
},
{
"from": "workflow:agent-evaluation-cycle",
"to": "role:research-engineer",
"kind": "involves_role"
},
{
"from": "workflow:agent-evaluation-cycle",
"to": "role:qa-engineer",
"kind": "involves_role"
},
{
"from": "workflow:agent-evaluation-cycle",
"to": "domain:software-engineering",
"kind": "applies_to_domain"
}
],
"incomingEdges": [
{
"from": "stack-profile:multi-agent-orchestration",
"to": "workflow:agent-evaluation-cycle",
"kind": "follows_workflow"
},
{
"from": "stack-profile:voice-ai-agent",
"to": "workflow:agent-evaluation-cycle",
"kind": "follows_workflow"
},
{
"from": "stack-profile:autonomous-agent-fleet",
"to": "workflow:agent-evaluation-cycle",
"kind": "follows_workflow"
},
{
"from": "stack-profile:prompt-engineering-workbench",
"to": "workflow:agent-evaluation-cycle",
"kind": "follows_workflow"
},
{
"from": "tool:fireworks-ai",
"to": "workflow:agent-evaluation-cycle",
"kind": "supports_work",
"attributes": {
"confidence": "medium",
"evidence": "Model API outputs can be compared across prompts, eval suites, and agent runs."
}
},
{
"from": "tool:mistral",
"to": "workflow:agent-evaluation-cycle",
"kind": "supports_work",
"attributes": {
"confidence": "medium",
"evidence": "Model outputs can be benchmarked during agent evaluation cycles."
}
},
{
"from": "tool:openai",
"to": "workflow:agent-evaluation-cycle",
"kind": "supports_work",
"attributes": {
"confidence": "high",
"evidence": "Model responses and tool-use traces can be measured through agent evaluation cycles."
}
},
{
"from": "tool:deepseek",
"to": "workflow:agent-evaluation-cycle",
"kind": "supports_work",
"attributes": {
"confidence": "medium",
"evidence": "Agent and coding-benchmark outputs can be compared against other LLM providers."
}
},
{
"from": "tool-server:mcp-mistral-ai-candidate",
"to": "workflow:agent-evaluation-cycle",
"kind": "supports_work",
"attributes": {
"confidence": "medium",
"evidence": "Model outputs can be benchmarked during agent evaluation."
}
},
{
"from": "tool-server:mcp-deepseek-candidate",
"to": "workflow:agent-evaluation-cycle",
"kind": "supports_work",
"attributes": {
"confidence": "medium",
"evidence": "Coding-agent outputs can be measured in evaluation cycles."
}
},
{
"from": "lib-process:ai-agents-conversational--ab-testing-conversational",
"to": "workflow:agent-evaluation-cycle",
"kind": "lib_implements_workflow",
"attributes": {
"weight": 0.7
}
},
{
"from": "lib-process:ai-agents-conversational--add-app-to-mcp-server",
"to": "workflow:agent-evaluation-cycle",
"kind": "lib_implements_workflow",
"attributes": {
"weight": 1
}
},
{
"from": "lib-process:ai-agents-conversational--advanced-rag-patterns",
"to": "workflow:agent-evaluation-cycle",
"kind": "lib_implements_workflow",
"attributes": {
"weight": 1
}
},
{
"from": "lib-process:ai-agents-conversational--agent-evaluation-framework",
"to": "workflow:agent-evaluation-cycle",
"kind": "lib_implements_workflow",
"attributes": {
"weight": 1
}
},
{
"from": "lib-process:ai-agents-conversational--agent-evaluation-framework",
"to": "workflow:agent-evaluation-cycle",
"kind": "lib_implements_workflow",
"attributes": {
"weight": 0.7
}
},
{
"from": "lib-process:ai-agents-conversational--agent-performance-optimization",
"to": "workflow:agent-evaluation-cycle",
"kind": "lib_implements_workflow",
"attributes": {
"weight": 1
}
},
{
"from": "lib-process:ai-agents-conversational--autonomous-task-planning",
"to": "workflow:agent-evaluation-cycle",
"kind": "lib_implements_workflow",
"attributes": {
"weight": 1
}
},
{
"from": "lib-process:ai-agents-conversational--bias-detection-fairness",
"to": "workflow:agent-evaluation-cycle",
"kind": "lib_implements_workflow",
"attributes": {
"weight": 1
}
},
{
"from": "lib-process:ai-agents-conversational--content-moderation-safety",
"to": "workflow:agent-evaluation-cycle",
"kind": "lib_implements_workflow",
"attributes": {
"weight": 1
}
},
{
"from": "lib-process:ai-agents-conversational--conversational-memory-system",
"to": "workflow:agent-evaluation-cycle",
"kind": "lib_implements_workflow",
"attributes": {
"weight": 1
}
},
{
"from": "lib-process:ai-agents-conversational--convert-web-app-to-mcp",
"to": "workflow:agent-evaluation-cycle",
"kind": "lib_implements_workflow",
"attributes": {
"weight": 1
}
},
{
"from": "lib-process:ai-agents-conversational--create-mcp-app",
"to": "workflow:agent-evaluation-cycle",
"kind": "lib_implements_workflow",
"attributes": {
"weight": 1
}
},
{
"from": "lib-process:ai-agents-conversational--custom-tool-development",
"to": "workflow:agent-evaluation-cycle",
"kind": "lib_implements_workflow",
"attributes": {
"weight": 1
}
},
{
"from": "lib-process:ai-agents-conversational--empathetic-response-generation",
"to": "workflow:agent-evaluation-cycle",
"kind": "lib_implements_workflow",
"attributes": {
"weight": 1
}
},
{
"from": "lib-process:ai-agents-conversational--entity-extraction-slot-filling",
"to": "workflow:agent-evaluation-cycle",
"kind": "lib_implements_workflow",
"attributes": {
"weight": 1
}
},
{
"from": "lib-process:ai-agents-conversational--intent-classification-system",
"to": "workflow:agent-evaluation-cycle",
"kind": "lib_implements_workflow",
"attributes": {
"weight": 1
}
},
{
"from": "lib-process:ai-agents-conversational--knowledge-base-qa",
"to": "workflow:agent-evaluation-cycle",
"kind": "lib_implements_workflow",
"attributes": {
"weight": 1
}
},
{
"from": "lib-process:ai-agents-conversational--llm-fine-tuning-conversational",
"to": "workflow:agent-evaluation-cycle",
"kind": "lib_implements_workflow",
"attributes": {
"weight": 1
}
},
{
"from": "lib-process:ai-agents-conversational--llm-observability-monitoring",
"to": "workflow:agent-evaluation-cycle",
"kind": "lib_implements_workflow",
"attributes": {
"weight": 1
}
},
{
"from": "lib-process:ai-agents-conversational--long-term-memory-management",
"to": "workflow:agent-evaluation-cycle",
"kind": "lib_implements_workflow",
"attributes": {
"weight": 1
}
},
{
"from": "lib-process:ai-agents-conversational--multi-agent-system",
"to": "workflow:agent-evaluation-cycle",
"kind": "lib_implements_workflow",
"attributes": {
"weight": 1
}
},
{
"from": "lib-process:ai-agents-conversational--multi-modal-agent",
"to": "workflow:agent-evaluation-cycle",
"kind": "lib_implements_workflow",
"attributes": {
"weight": 1
}
},
{
"from": "lib-process:ai-agents-conversational--prompt-engineering-workflow",
"to": "workflow:agent-evaluation-cycle",
"kind": "lib_implements_workflow",
"attributes": {
"weight": 1
}
},
{
"from": "lib-process:ai-agents-conversational--prompt-injection-defense",
"to": "workflow:agent-evaluation-cycle",
"kind": "lib_implements_workflow",
"attributes": {
"weight": 1
}
},
{
"from": "lib-process:ai-agents-conversational--react-agent-implementation",
"to": "workflow:agent-evaluation-cycle",
"kind": "lib_implements_workflow",
"attributes": {
"weight": 1
}
},
{
"from": "lib-process:ai-agents-conversational--regression-testing-agent",
"to": "workflow:agent-evaluation-cycle",
"kind": "lib_implements_workflow",
"attributes": {
"weight": 0.7
}
},
{
"from": "lib-process:ai-agents-conversational--self-reflection-agent",
"to": "workflow:agent-evaluation-cycle",
"kind": "lib_implements_workflow",
"attributes": {
"weight": 1
}
},
{
"from": "lib-process:ai-agents-conversational--system-prompt-guardrails",
"to": "workflow:agent-evaluation-cycle",
"kind": "lib_implements_workflow",
"attributes": {
"weight": 1
}
},
{
"from": "lib-process:ai-agents-conversational--tool-safety-validation",
"to": "workflow:agent-evaluation-cycle",
"kind": "lib_implements_workflow",
"attributes": {
"weight": 1
}
},
{
"from": "lib-process:ai-agents-conversational--voice-enabled-conversational",
"to": "workflow:agent-evaluation-cycle",
"kind": "lib_implements_workflow",
"attributes": {
"weight": 1
}
}
]
}