{
  "version": "0.1.0",
  "generatedAt": "2026-05-16T05:45:00+05:30",
  "purpose": "Generated Studio product catalog for Edxperimental Labs demos, buyer follow-up, and service packaging.",
  "productCount": 8,
  "liveDemoCount": 6,
  "demoReadyCount": 7,
  "stageCounts": {
    "Preview": 3,
    "Research": 2,
    "Consulting": 1,
    "Designing v0.1": 1,
    "Client intake": 1
  },
  "products": [
    {
      "slug": "agent-benchmark-explorer",
      "title": "Agent Benchmark Explorer",
      "stage": "Preview",
      "audience": "AI teams comparing autonomous workflows",
      "summary": "A structured benchmark surface for measuring whether agents can plan, use tools, recover from errors, and complete useful work rather than only answer prompts.",
      "href": "/studio/agent-benchmark-explorer",
      "problem": "Agent buyers need to know whether a system can finish work across tools, not whether the base model can write an impressive paragraph. This product turns messy agent demos into repeatable runs with pass/fail evidence.",
      "metrics": [
        "Task completion",
        "Tool-call quality",
        "Recovery rate",
        "Cost per resolved task"
      ],
      "outputs": [
        "Agent scorecard",
        "Trace review table",
        "Failure taxonomy",
        "Cost per resolved task"
      ],
      "buyerQuestions": [
        "Can the agent recover after a bad tool call?",
        "Does it verify state before claiming completion?",
        "How much does each accepted workflow actually cost?",
        "Which failures should trigger human handoff?"
      ],
      "evidenceLinks": [
        [
          "Agentic Reliability Index",
          "/leaderboards#agentic-reliability-index"
        ],
        [
          "Browser Operations Suite",
          "/benchmarks/browser-operations-suite"
        ],
        [
          "Agent benchmarks article",
          "/articles/agent-benchmarks-that-survive-real-work"
        ]
      ],
      "demoState": "Preview dashboard with synthetic benchmark traces; real trace ingestion is the next build step.",
      "catalogRank": 1,
      "maturity": "Can be shown as a product demo today",
      "packetPath": "/reports/studio/agent-benchmark-explorer.md",
      "jsonPath": "/reports/studio/agent-benchmark-explorer.json",
      "previewImage": "/reports/studio/previews/agent-benchmark-explorer.png",
      "previewAlt": "Agent Benchmark Explorer live Studio preview screenshot",
      "demoReadinessScore": 76,
      "demoReadiness": "Research-demo ready with generated traces; needs reviewer-signed real run artifacts before it should drive a leaderboard or procurement decision.",
      "missingForLiveDemo": [
        "Reviewer-signed trace",
        "Real screenshot/media",
        "Provider or agent export"
      ],
      "handoffOwner": "Sanjay Prasad",
      "nextDemoArtifact": "/reports/studio/agent-benchmark-explorer.md"
    },
    {
      "slug": "coding-agent-arena",
      "title": "Coding Agent Arena",
      "stage": "Research",
      "audience": "Engineering leaders and founders",
      "summary": "A coding-agent evaluation track for repository edits, bug fixes, browser checks, terminal usage, and regression discipline.",
      "href": "/studio/coding-agent-arena",
      "problem": "Coding agents are usually shown through curated demos. The Arena measures whether they can read an existing repository, make a scoped patch, run checks, inspect the UI, and produce work a senior engineer would review seriously.",
      "metrics": [
        "Patch correctness",
        "Test pass rate",
        "Review quality",
        "Time to mergeable PR"
      ],
      "outputs": [
        "Patch review",
        "Regression report",
        "Tool-use transcript",
        "Merge-readiness score"
      ],
      "buyerQuestions": [
        "Can this agent work inside our existing codebase?",
        "Does it respect ownership boundaries and avoid unrelated churn?",
        "Can it debug failing tests without hiding the failure?",
        "What tasks are safe to delegate today?"
      ],
      "evidenceLinks": [
        [
          "Coding Agent Maintenance Suite",
          "/benchmarks/coding-agent-maintenance-suite"
        ],
        [
          "Agentic Reliability Index",
          "/leaderboards#agentic-reliability-index"
        ],
        [
          "Leaderboard methodology",
          "/articles/building-a-useful-ai-leaderboard-without-fooling-ourselves"
        ]
      ],
      "demoState": "Live coding arena is connected to the Coding Agent Maintenance Suite; next step is importing real agent patches, logs, and review artifacts.",
      "catalogRank": 2,
      "maturity": "Research surface ready for benchmark-backed demos",
      "packetPath": "/reports/studio/coding-agent-arena.md",
      "jsonPath": "/reports/studio/coding-agent-arena.json",
      "previewImage": "/reports/studio/previews/coding-agent-arena.png",
      "previewAlt": "Coding Agent Arena live Studio preview screenshot",
      "demoReadinessScore": 82,
      "demoReadiness": "Demo-ready with current generated traces and interactive controls; replace screenshots with product walkthrough video when available.",
      "missingForLiveDemo": [
        "Product walkthrough video",
        "Client-approved example",
        "Real run export"
      ],
      "handoffOwner": "Sanjay Prasad",
      "nextDemoArtifact": "/reports/studio/coding-agent-arena.md"
    },
    {
      "slug": "browser-agent-evaluation-kit",
      "title": "Browser Agent Evaluation Kit",
      "stage": "Research",
      "audience": "Teams automating web operations",
      "summary": "Browser-agent tasks for navigation, form filling, extraction, screenshot QA, and resilient recovery from UI changes.",
      "href": "/studio/browser-agent-evaluation-kit",
      "problem": "Browser agents fail in ways that are invisible to a text-only benchmark: stale selectors, modals, partial page state, authentication friction, and confident completion claims without proof.",
      "metrics": [
        "Navigation success",
        "State verification",
        "DOM robustness",
        "Human handoff rate"
      ],
      "outputs": [
        "Browser task report",
        "Screenshot evidence",
        "Selector fragility map",
        "Handoff recommendation"
      ],
      "buyerQuestions": [
        "Can the agent prove the page reached the right state?",
        "What happens when a modal or validation error appears?",
        "Which workflows are stable enough for automation?",
        "Where should a human remain in the loop?"
      ],
      "evidenceLinks": [
        [
          "Browser Operations Suite",
          "/benchmarks/browser-operations-suite"
        ],
        [
          "Agent benchmarks article",
          "/articles/agent-benchmarks-that-survive-real-work"
        ],
        [
          "Studio request form",
          "/contact"
        ]
      ],
      "demoState": "Research kit with browser-operation scoring; real authenticated workflow packs can be built for consulting clients.",
      "catalogRank": 3,
      "maturity": "Research surface ready for benchmark-backed demos",
      "packetPath": "/reports/studio/browser-agent-evaluation-kit.md",
      "jsonPath": "/reports/studio/browser-agent-evaluation-kit.json",
      "previewImage": "/reports/studio/previews/browser-agent-evaluation-kit.png",
      "previewAlt": "Browser Agent Evaluation Kit live Studio preview screenshot",
      "demoReadinessScore": 64,
      "demoReadiness": "Research-demo ready with generated traces; needs reviewer-signed real run artifacts before it should drive a leaderboard or procurement decision.",
      "missingForLiveDemo": [
        "Reviewer-signed trace",
        "Real screenshot/media",
        "Provider or agent export"
      ],
      "handoffOwner": "Sanjay Prasad",
      "nextDemoArtifact": "/reports/studio/browser-agent-evaluation-kit.md"
    },
    {
      "slug": "customer-support-agent-scorecard",
      "title": "Customer Support Agent Scorecard",
      "stage": "Consulting",
      "audience": "Support, CX, and operations teams",
      "summary": "A scorecard for support agents covering escalation quality, policy adherence, multilingual handling, hallucination risk, and customer outcome.",
      "href": "/studio/customer-support-agent-scorecard",
      "problem": "Support automation is judged by customer outcome, not chat polish. The scorecard tests whether an agent follows policy, knows when to escalate, handles mixed-language inputs, and avoids inventing exceptions.",
      "metrics": [
        "Resolution rate",
        "Escalation precision",
        "Policy adherence",
        "Tone consistency"
      ],
      "outputs": [
        "Support scenario pack",
        "Policy adherence matrix",
        "Escalation audit",
        "Rollout recommendation"
      ],
      "buyerQuestions": [
        "Will the agent respect our refund and exception policy?",
        "Can it handle bilingual or region-specific context?",
        "When should it escalate instead of improvising?",
        "Which support queues should be automated first?"
      ],
      "evidenceLinks": [
        [
          "Support Agent Policy Suite",
          "/benchmarks/support-agent-policy-suite"
        ],
        [
          "Consulting services",
          "/consulting"
        ],
        [
          "Contact Saujas",
          "/contact"
        ]
      ],
      "demoState": "Live support scorecard is connected to the Support Agent Policy Suite; client-specific policies can be converted into private benchmark packs.",
      "catalogRank": 4,
      "maturity": "Best introduced inside a consulting conversation",
      "packetPath": "/reports/studio/customer-support-agent-scorecard.md",
      "jsonPath": "/reports/studio/customer-support-agent-scorecard.json",
      "previewImage": "/reports/studio/previews/customer-support-agent-scorecard.png",
      "previewAlt": "Customer Support Agent Scorecard live Studio preview screenshot",
      "demoReadinessScore": 82,
      "demoReadiness": "Demo-ready with current generated traces and interactive controls; replace screenshots with product walkthrough video when available.",
      "missingForLiveDemo": [
        "Product walkthrough video",
        "Client-approved example",
        "Real run export"
      ],
      "handoffOwner": "Saujas",
      "nextDemoArtifact": "/reports/studio/customer-support-agent-scorecard.md"
    },
    {
      "slug": "indian-workflow-benchmark",
      "title": "Indian Workflow Benchmark",
      "stage": "Designing v0.1",
      "audience": "Indian enterprises, AI buyers, and product teams",
      "summary": "A workflow benchmark for Indian business tasks: finance, support, multilingual handoffs, document reasoning, sales ops, and evidence-grounded escalation.",
      "href": "/studio/indian-workflow-benchmark",
      "problem": "Most benchmarks do not test the messy Indian operating surface: mixed-language tickets, GST documents, vendor email threads, policy exceptions, and workflows where the answer has to cite the right evidence.",
      "metrics": [
        "Outcome correctness",
        "Evidence citation",
        "Escalation judgement",
        "Cost per accepted output"
      ],
      "outputs": [
        "Workflow task pack",
        "Model comparison memo",
        "Evidence audit",
        "Deployment readiness map"
      ],
      "buyerQuestions": [
        "Which models survive Indian document and support workflows?",
        "Where do multilingual or policy tasks fail?",
        "What can be safely automated versus escalated?",
        "How do quality, latency, and cost change by workflow type?"
      ],
      "evidenceLinks": [
        [
          "Indian Enterprise Workflow Suite",
          "/benchmarks/indian-enterprise-workflow-suite"
        ],
        [
          "Indian workflow article",
          "/articles/designing-the-indian-enterprise-ai-workflow-benchmark"
        ],
        [
          "Leaderboard methodology",
          "/articles/building-a-useful-ai-leaderboard-without-fooling-ourselves"
        ]
      ],
      "demoState": "Live benchmark console is connected to the Indian Enterprise Workflow Suite; next step is replacing seed traces with real client-approved examples.",
      "catalogRank": 5,
      "maturity": "Research surface ready for benchmark-backed demos",
      "packetPath": "/reports/studio/indian-workflow-benchmark.md",
      "jsonPath": "/reports/studio/indian-workflow-benchmark.json",
      "previewImage": "/reports/studio/previews/indian-workflow-benchmark.png",
      "previewAlt": "Indian Workflow Benchmark live Studio preview screenshot",
      "demoReadinessScore": 82,
      "demoReadiness": "Demo-ready with current generated traces and interactive controls; replace screenshots with product walkthrough video when available.",
      "missingForLiveDemo": [
        "Product walkthrough video",
        "Client-approved example",
        "Real run export"
      ],
      "handoffOwner": "Sanjay Prasad",
      "nextDemoArtifact": "/reports/studio/indian-workflow-benchmark.md"
    },
    {
      "slug": "model-recommendation-console",
      "title": "Model Recommendation Console",
      "stage": "Preview",
      "audience": "Buyers choosing models or API providers",
      "summary": "A decision console that maps use-case constraints to a model shortlist across quality, latency, price, context, privacy, and deployment surface.",
      "href": "/studio/model-recommendation-console",
      "problem": "Model selection becomes noisy when every provider is evaluated through generic demos. The console starts from workload constraints and produces a shortlist with fallback routes and cost envelopes.",
      "metrics": [
        "Fit score",
        "Latency budget",
        "Monthly cost",
        "Fallback coverage"
      ],
      "outputs": [
        "Model shortlist",
        "Fallback map",
        "Monthly cost envelope",
        "Pre-production test plan"
      ],
      "buyerQuestions": [
        "Which model should handle the expensive path?",
        "Where can a cheaper router or fallback be used?",
        "What latency and privacy constraints change the answer?",
        "What benchmark evidence is still missing?"
      ],
      "evidenceLinks": [
        [
          "Models, Agents & Hardware",
          "/models"
        ],
        [
          "Cost Efficiency Index",
          "/leaderboards#cost-efficiency-index"
        ],
        [
          "Cost curves article",
          "/articles/cost-curves-for-frontier-reasoning-models"
        ]
      ],
      "demoState": "Preview decision console; next release should connect to live provider pricing and benchmark run imports.",
      "catalogRank": 6,
      "maturity": "Can be shown as a product demo today",
      "packetPath": "/reports/studio/model-recommendation-console.md",
      "jsonPath": "/reports/studio/model-recommendation-console.json",
      "previewImage": "/reports/studio/previews/model-recommendation-console.png",
      "previewAlt": "Model Recommendation Console live Studio preview screenshot",
      "demoReadinessScore": 82,
      "demoReadiness": "Demo-ready with current generated traces and interactive controls; replace screenshots with product walkthrough video when available.",
      "missingForLiveDemo": [
        "Product walkthrough video",
        "Client-approved example",
        "Real run export"
      ],
      "handoffOwner": "Sanjay Prasad",
      "nextDemoArtifact": "/reports/studio/model-recommendation-console.md"
    },
    {
      "slug": "cost-curve-workbench",
      "title": "Cost Curve Workbench",
      "stage": "Preview",
      "audience": "Finance and platform teams",
      "summary": "A calculator-style tool for converting token pricing into workload cost curves, batch discounts, cache effects, and per-resolution economics.",
      "href": "/studio/cost-curve-workbench",
      "problem": "Token prices do not tell a finance team what production AI will cost. The Workbench converts workload shape into cost per accepted output, including retries, tool calls, cache hit rate, batching, and review fallout.",
      "metrics": [
        "Cost per 1k tasks",
        "Cache savings",
        "Batch savings",
        "Reasoning-token exposure"
      ],
      "outputs": [
        "Cost curve",
        "Scenario table",
        "Savings waterfall",
        "Budget envelope"
      ],
      "buyerQuestions": [
        "What will this cost at 10k, 100k, or 1M workflows?",
        "How much do retries and human review change the answer?",
        "When does prompt caching materially matter?",
        "Which model class is cheap after quality is included?"
      ],
      "evidenceLinks": [
        [
          "Cost Efficiency Index",
          "/leaderboards#cost-efficiency-index"
        ],
        [
          "Cost curves article",
          "/articles/cost-curves-for-frontier-reasoning-models"
        ],
        [
          "Prompt caching article",
          "/articles/prompt-caching-batch-api-and-the-real-ai-cost-stack"
        ]
      ],
      "demoState": "Live calculator surface is available; next release should connect to official provider pricing refresh scripts.",
      "catalogRank": 7,
      "maturity": "Can be shown as a product demo today",
      "packetPath": "/reports/studio/cost-curve-workbench.md",
      "jsonPath": "/reports/studio/cost-curve-workbench.json",
      "previewImage": "/reports/studio/previews/cost-curve-workbench.png",
      "previewAlt": "Cost Curve Workbench live Studio preview screenshot",
      "demoReadinessScore": 82,
      "demoReadiness": "Demo-ready with current generated traces and interactive controls; replace screenshots with product walkthrough video when available.",
      "missingForLiveDemo": [
        "Product walkthrough video",
        "Client-approved example",
        "Real run export"
      ],
      "handoffOwner": "Sanjay Prasad",
      "nextDemoArtifact": "/reports/studio/cost-curve-workbench.md"
    },
    {
      "slug": "consulting-diagnostic",
      "title": "Consulting Diagnostic",
      "stage": "Client intake",
      "audience": "Founders, AI buyers, and operations leaders",
      "summary": "A fast intake surface for turning an AI idea, vendor claim, or production concern into a benchmarkable consulting engagement.",
      "href": "/studio/consulting-diagnostic",
      "problem": "Most AI consulting starts too broadly. The diagnostic starts with the workload, risk, evidence, and decision deadline so the first sprint produces a concrete evaluation artifact instead of a generic strategy deck.",
      "metrics": [
        "Workflow risk",
        "Evidence gap",
        "First sprint",
        "Owner routing"
      ],
      "outputs": [
        "Diagnostic memo",
        "Evidence request list",
        "First-sprint plan",
        "Owner handoff"
      ],
      "buyerQuestions": [
        "What is the smallest benchmark we should run first?",
        "Which evidence is missing before production?",
        "Who should own the technical and sales-engineering handoff?",
        "How quickly can we get a decision artifact?"
      ],
      "evidenceLinks": [
        [
          "Consulting services",
          "/consulting"
        ],
        [
          "Case studies",
          "/case-studies"
        ],
        [
          "Contact the team",
          "/contact"
        ]
      ],
      "demoState": "Live consulting intake demo is available; next step is connecting it to a real lead-capture backend and CRM workflow.",
      "catalogRank": 8,
      "maturity": "Research surface ready for benchmark-backed demos",
      "packetPath": "/reports/studio/consulting-diagnostic.md",
      "jsonPath": "/reports/studio/consulting-diagnostic.json",
      "previewImage": "/reports/studio/previews/consulting-diagnostic.png",
      "previewAlt": "Consulting Diagnostic live Studio preview screenshot",
      "demoReadinessScore": 82,
      "demoReadiness": "Demo-ready with current generated traces and interactive controls; replace screenshots with product walkthrough video when available.",
      "missingForLiveDemo": [
        "Product walkthrough video",
        "Client-approved example",
        "Real run export"
      ],
      "handoffOwner": "Saujas",
      "nextDemoArtifact": "/reports/studio/consulting-diagnostic.md"
    }
  ],
  "demoReadinessBoard": {
    "purpose": "Public Studio demo readiness board for deciding which products can be shown today, which need consulting context, and which need real traces or client-approved media before stronger claims.",
    "rows": [
      {
        "slug": "agent-benchmark-explorer",
        "title": "Agent Benchmark Explorer",
        "stage": "Preview",
        "score": 76,
        "owner": "Sanjay Prasad",
        "status": "Research-demo ready with generated traces; needs reviewer-signed real run artifacts before it should drive a leaderboard or procurement decision.",
        "missingForLiveDemo": [
          "Reviewer-signed trace",
          "Real screenshot/media",
          "Provider or agent export"
        ],
        "href": "/studio/agent-benchmark-explorer",
        "packetPath": "/reports/studio/agent-benchmark-explorer.md",
        "previewImage": "/reports/studio/previews/agent-benchmark-explorer.png"
      },
      {
        "slug": "coding-agent-arena",
        "title": "Coding Agent Arena",
        "stage": "Research",
        "score": 82,
        "owner": "Sanjay Prasad",
        "status": "Demo-ready with current generated traces and interactive controls; replace screenshots with product walkthrough video when available.",
        "missingForLiveDemo": [
          "Product walkthrough video",
          "Client-approved example",
          "Real run export"
        ],
        "href": "/studio/coding-agent-arena",
        "packetPath": "/reports/studio/coding-agent-arena.md",
        "previewImage": "/reports/studio/previews/coding-agent-arena.png"
      },
      {
        "slug": "browser-agent-evaluation-kit",
        "title": "Browser Agent Evaluation Kit",
        "stage": "Research",
        "score": 64,
        "owner": "Sanjay Prasad",
        "status": "Research-demo ready with generated traces; needs reviewer-signed real run artifacts before it should drive a leaderboard or procurement decision.",
        "missingForLiveDemo": [
          "Reviewer-signed trace",
          "Real screenshot/media",
          "Provider or agent export"
        ],
        "href": "/studio/browser-agent-evaluation-kit",
        "packetPath": "/reports/studio/browser-agent-evaluation-kit.md",
        "previewImage": "/reports/studio/previews/browser-agent-evaluation-kit.png"
      },
      {
        "slug": "customer-support-agent-scorecard",
        "title": "Customer Support Agent Scorecard",
        "stage": "Consulting",
        "score": 82,
        "owner": "Saujas",
        "status": "Demo-ready with current generated traces and interactive controls; replace screenshots with product walkthrough video when available.",
        "missingForLiveDemo": [
          "Product walkthrough video",
          "Client-approved example",
          "Real run export"
        ],
        "href": "/studio/customer-support-agent-scorecard",
        "packetPath": "/reports/studio/customer-support-agent-scorecard.md",
        "previewImage": "/reports/studio/previews/customer-support-agent-scorecard.png"
      },
      {
        "slug": "indian-workflow-benchmark",
        "title": "Indian Workflow Benchmark",
        "stage": "Designing v0.1",
        "score": 82,
        "owner": "Sanjay Prasad",
        "status": "Demo-ready with current generated traces and interactive controls; replace screenshots with product walkthrough video when available.",
        "missingForLiveDemo": [
          "Product walkthrough video",
          "Client-approved example",
          "Real run export"
        ],
        "href": "/studio/indian-workflow-benchmark",
        "packetPath": "/reports/studio/indian-workflow-benchmark.md",
        "previewImage": "/reports/studio/previews/indian-workflow-benchmark.png"
      },
      {
        "slug": "model-recommendation-console",
        "title": "Model Recommendation Console",
        "stage": "Preview",
        "score": 82,
        "owner": "Sanjay Prasad",
        "status": "Demo-ready with current generated traces and interactive controls; replace screenshots with product walkthrough video when available.",
        "missingForLiveDemo": [
          "Product walkthrough video",
          "Client-approved example",
          "Real run export"
        ],
        "href": "/studio/model-recommendation-console",
        "packetPath": "/reports/studio/model-recommendation-console.md",
        "previewImage": "/reports/studio/previews/model-recommendation-console.png"
      },
      {
        "slug": "cost-curve-workbench",
        "title": "Cost Curve Workbench",
        "stage": "Preview",
        "score": 82,
        "owner": "Sanjay Prasad",
        "status": "Demo-ready with current generated traces and interactive controls; replace screenshots with product walkthrough video when available.",
        "missingForLiveDemo": [
          "Product walkthrough video",
          "Client-approved example",
          "Real run export"
        ],
        "href": "/studio/cost-curve-workbench",
        "packetPath": "/reports/studio/cost-curve-workbench.md",
        "previewImage": "/reports/studio/previews/cost-curve-workbench.png"
      },
      {
        "slug": "consulting-diagnostic",
        "title": "Consulting Diagnostic",
        "stage": "Client intake",
        "score": 82,
        "owner": "Saujas",
        "status": "Demo-ready with current generated traces and interactive controls; replace screenshots with product walkthrough video when available.",
        "missingForLiveDemo": [
          "Product walkthrough video",
          "Client-approved example",
          "Real run export"
        ],
        "href": "/studio/consulting-diagnostic",
        "packetPath": "/reports/studio/consulting-diagnostic.md",
        "previewImage": "/reports/studio/previews/consulting-diagnostic.png"
      }
    ],
    "tourOrder": [
      "consulting-diagnostic",
      "model-recommendation-console",
      "cost-curve-workbench",
      "agent-benchmark-explorer",
      "indian-workflow-benchmark",
      "coding-agent-arena",
      "browser-agent-evaluation-kit",
      "customer-support-agent-scorecard"
    ],
    "gates": [
      "Generated demo packet exists.",
      "Interactive or screenshot preview exists.",
      "Connected benchmark/research evidence is linked.",
      "Missing real traces, walkthrough video, and client-approved examples are labeled."
    ]
  }
}
