{
  "slug": "browser-agent-evaluation-kit",
  "title": "Browser Agent Evaluation Kit",
  "stage": "Research",
  "audience": "Teams automating web operations",
  "summary": "Browser-agent tasks for navigation, form filling, extraction, screenshot QA, and resilient recovery from UI changes.",
  "href": "/studio/browser-agent-evaluation-kit",
  "problem": "Browser agents fail in ways that are invisible to a text-only benchmark: stale selectors, modals, partial page state, authentication friction, and confident completion claims without proof.",
  "metrics": [
    "Navigation success",
    "State verification",
    "DOM robustness",
    "Human handoff rate"
  ],
  "outputs": [
    "Browser task report",
    "Screenshot evidence",
    "Selector fragility map",
    "Handoff recommendation"
  ],
  "buyerQuestions": [
    "Can the agent prove the page reached the right state?",
    "What happens when a modal or validation error appears?",
    "Which workflows are stable enough for automation?",
    "Where should a human remain in the loop?"
  ],
  "evidenceLinks": [
    [
      "Browser Operations Suite",
      "/benchmarks/browser-operations-suite"
    ],
    [
      "Agent benchmarks article",
      "/articles/agent-benchmarks-that-survive-real-work"
    ],
    [
      "Studio request form",
      "/contact"
    ]
  ],
  "demoState": "Research kit with browser-operation scoring; real authenticated workflow packs can be built for consulting clients.",
  "catalogRank": 3,
  "maturity": "Research surface ready for benchmark-backed demos",
  "packetPath": "/reports/studio/browser-agent-evaluation-kit.md",
  "jsonPath": "/reports/studio/browser-agent-evaluation-kit.json",
  "previewImage": "/reports/studio/previews/browser-agent-evaluation-kit.png",
  "previewAlt": "Browser Agent Evaluation Kit live Studio preview screenshot",
  "demoReadinessScore": 64,
  "demoReadiness": "Research-demo ready with generated traces; needs reviewer-signed real run artifacts before it should drive a leaderboard or procurement decision.",
  "missingForLiveDemo": [
    "Reviewer-signed trace",
    "Real screenshot/media",
    "Provider or agent export"
  ],
  "handoffOwner": "Sanjay Prasad",
  "nextDemoArtifact": "/reports/studio/browser-agent-evaluation-kit.md"
}
