{
  "generatedAt": "2026-05-16T05:29:45.101Z",
  "source": "../docs/website-buildout-plan.md",
  "purpose": "Public buildout tracker for Edxperimental Labs: what has shipped, what still needs real-world inputs, and which research lanes should keep expanding.",
  "summary": {
    "finishedCount": 92,
    "nextCount": 13,
    "backlogCount": 8,
    "categoryCounts": {
      "Studio": 12,
      "Benchmarks": 30,
      "Content": 4,
      "Consulting": 5,
      "Research": 31,
      "Platform": 3,
      "Company": 7
    }
  },
  "finished": [
    {
      "id": "finished-01",
      "title": "Homepage keeps the Independent analysis of AI positioning and now points strongly into Studio and Articles.",
      "category": "Studio"
    },
    {
      "id": "finished-02",
      "title": "Studio route exists with product/service surfaces for agent benchmarks, coding agents, browser agents, support agents, model recommendations, and cost curves.",
      "category": "Studio"
    },
    {
      "id": "finished-03",
      "title": "Studio products now have detail pages at /studio/[slug] with buyer questions, workflow steps, deliverables, demo state, and links into benchmark evidence.",
      "category": "Studio"
    },
    {
      "id": "finished-04",
      "title": "Studio now has a generated product catalog from pnpm studio:generate, with markdown/JSON catalog files and buyer-ready packets for all eight Studio surfaces under edxperimental-labs/public/reports/studio/.",
      "category": "Studio"
    },
    {
      "id": "finished-05",
      "title": "Studio now has a generated demo-readiness board from pnpm studio:generate, with demo-ready counts, owners, tour order, readiness gates, missing live-demo evidence, downloadable demo-readiness.md, and Command-K indexed /studio#studio-demo-readiness surface.",
      "category": "Studio"
    },
    {
      "id": "finished-06",
      "title": "Studio now has captured visual preview assets from pnpm studio:screenshots, with one live page screenshot per Studio product and a manifest under edxperimental-labs/public/reports/studio/previews/.",
      "category": "Studio"
    },
    {
      "id": "finished-07",
      "title": "Agent Benchmark Explorer now includes a live trace explorer driven by generated benchmark data, with suite/task selectors, expected evidence, model trace ranking, top answer excerpt, failure reason, and tool-call chips.",
      "category": "Benchmarks"
    },
    {
      "id": "finished-08",
      "title": "Browser Agent Evaluation Kit now includes a live browser-state evaluation demo driven by browser-operation traces, with scenario selection, strict state proof toggle, state/recovery/screenshot/handoff indicators, run comparison, and deployment readout.",
      "category": "Benchmarks"
    },
    {
      "id": "finished-09",
      "title": "Coding Agent Arena now includes a live coding-agent console driven by maintenance traces, with task packet selection, browser-proof control, merge-readiness/regression/tool-discipline indicators, run ranking, acceptance evidence, and arena verdict.",
      "category": "Benchmarks"
    },
    {
      "id": "finished-10",
      "title": "Customer Support Agent Scorecard now includes a live support scorecard console driven by support-policy traces, with scenario selection, escalation proof control, policy/tone/handoff indicators, model ranking, evidence chips, and rollout readout.",
      "category": "Benchmarks"
    },
    {
      "id": "finished-11",
      "title": "Indian Workflow Benchmark is now a Studio product page with a live benchmark console driven by the Indian Enterprise Workflow Suite, including workflow packet selection, holdout-pressure control, evidence/escalation/localization/cost indicators, task-mix bars, model comparison, and benchmark readiness guidance.",
      "category": "Studio"
    },
    {
      "id": "finished-12",
      "title": "Consulting Diagnostic is now a Studio product page with a live intake console for choosing consulting tracks, adjusting deployment pressure/evidence gap/data sensitivity, routing work to Sanjay or Saujas, and generating first-sprint guidance.",
      "category": "Studio"
    },
    {
      "id": "finished-13",
      "title": "Cost Curve Workbench now includes a live interactive Studio demo with sliders for input/output tokens, cache hit rate, batchable share, and human review cost, plus a modeled monthly cost curve and route comparison table.",
      "category": "Studio"
    },
    {
      "id": "finished-14",
      "title": "Model Recommendation Console now includes a live interactive Studio demo with workload selection, quality/latency/cost/privacy/agentic controls, fit rankings, primary/fallback route cards, and a recommendation table.",
      "category": "Studio"
    },
    {
      "id": "finished-15",
      "title": "Models, Agents & Hardware now includes a generated inference economics playbook from pnpm inference:generate, covering managed APIs, hosted open-weight inference, dedicated endpoints, self-hosted GPUs, batch, cache, throughput, queueing, and latency variance.",
      "category": "Content"
    },
    {
      "id": "finished-16",
      "title": "Models, Agents & Hardware now includes a generated inference trace kit from pnpm inference:generate, with measured-trace CSV template, JSON schema, runbook, metric definitions, and /models#inference-trace-kit surface for latency, throughput, cache, batch, acceptance, and cost data.",
      "category": "Benchmarks"
    },
    {
      "id": "finished-17",
      "title": "Models, Agents & Hardware now includes a generated hardware procurement matrix from pnpm inference:generate, with decision gates, readiness scores, route choices, and /models#hardware-procurement-matrix surface for managed APIs, hosted open-weight inference, dedicated endpoints, cloud GPUs, and owned hardware.",
      "category": "Content"
    },
    {
      "id": "finished-18",
      "title": "Models, Agents & Hardware route exists as one combined technical map.",
      "category": "Content"
    },
    {
      "id": "finished-19",
      "title": "Leaderboards route exists with prototype tracks for Indian workflows, agentic reliability, cost efficiency, and latency/throughput.",
      "category": "Benchmarks"
    },
    {
      "id": "finished-20",
      "title": "Benchmark Lab route exists with benchmark suites, sample tasks, scoring rubrics, and a build order.",
      "category": "Benchmarks"
    },
    {
      "id": "finished-21",
      "title": "Case study cards now link to full report-style detail pages with challenge, approach, evidence cards, timeline, risk register, and consulting CTA.",
      "category": "Consulting"
    },
    {
      "id": "finished-22",
      "title": "Case study detail pages now include generated evidence packets from pnpm case-studies:generate, with downloadable markdown, structured JSON, and a manifest under public/reports/case-study-packets/.",
      "category": "Consulting"
    },
    {
      "id": "finished-23",
      "title": "Articles route exists and now links to six research-style drafts: - Building a Useful AI Leaderboard Without Fooling Ourselves. - Mechanistic Interpretability for Operators, Not Mystics. - Cost Curves for Frontier Reasoning Models. - Agent Benchmarks That Survive Real Work. - Prompt Caching, Batch API, and the Real AI Cost Stack. - Designing the Indian Enterprise AI Workflow Benchmark.",
      "category": "Research"
    },
    {
      "id": "finished-24",
      "title": "Articles route now also includes Open-Weight Inference Economics for Enterprise AI, a source-linked buyer map for comparing managed frontier APIs, hosted open-weight APIs, dedicated endpoints, and self-hosted GPU stacks across Mistral, DeepSeek, Qwen, and hosted inference providers.",
      "category": "Research"
    },
    {
      "id": "finished-25",
      "title": "Article detail pages include metric cards, bar charts, process diagrams, tables, source links, recommendation blocks, and visual cards.",
      "category": "Research"
    },
    {
      "id": "finished-26",
      "title": "A generated article manifest now tracks article authors, reviewers, publication order, source counts, visual-module counts, canonical paths, and next editorial steps through pnpm articles:manifest.",
      "category": "Research"
    },
    {
      "id": "finished-27",
      "title": "A generated editorial publishing kit now exports current article drafts, a new-article template, review checklist, and editorial manifest through pnpm articles:kit, creating a bridge toward MDX/CMS publishing.",
      "category": "Research"
    },
    {
      "id": "finished-28",
      "title": "The editorial publishing kit now also writes file-backed article sources under edxperimental-labs/content/articles/ plus a public source registry at public/reports/editorial/source-registry.{md,json}, surfaced at /articles#article-source-registry.",
      "category": "Research"
    },
    {
      "id": "finished-29",
      "title": "Article detail pages now read prose sections and source links from generated content/articles/*.md files, and rich visual metadata from generated content/articles/*.visual.json sidecars for charts, tables, control rooms, and article-specific infographics.",
      "category": "Research"
    },
    {
      "id": "finished-30",
      "title": "The editorial kit now publishes article visual sidecar packets under public/reports/editorial/visual-packets/, so review artifacts include the same chart, table, decision-map, and specialty-module data used by article rendering.",
      "category": "Research"
    },
    {
      "id": "finished-31",
      "title": "Article detail route generation, metadata, prose, source links, and rich visual modules now use the file-backed article sources and visual sidecars directly.",
      "category": "Research"
    },
    {
      "id": "finished-32",
      "title": "The Articles grid and Command-K article results now read data/article-source-index.json, generated from the file-backed article sources and visual sidecars, instead of mapping over the legacy lib/content.ts article array.",
      "category": "Research"
    },
    {
      "id": "finished-33",
      "title": "The Articles route now exposes the generated Research Evidence Library, Evaluation Operations control deck, Agent Benchmark Map, Mechanistic Playbook, and editorial publishing anchors so article research, source packets, and infographics are visible from the main Articles page.",
      "category": "Research"
    },
    {
      "id": "finished-34",
      "title": "The editorial publishing kit now parses content/articles/*.md and content/articles/*.visual.json directly, so article drafts, source registry, visual packets, and article source index no longer depend on the legacy lib/content.ts article seed array.",
      "category": "Research"
    },
    {
      "id": "finished-35",
      "title": "The editorial publishing kit now generates a publishing workflow decision packet at public/reports/editorial/publishing-workflow-decision.{md,json}, surfaced on /articles#publishing-workflow-decision and indexed in Command-K, recommending file-backed Markdown/visual JSON until MDX or CMS migration triggers are real.",
      "category": "Research"
    },
    {
      "id": "finished-36",
      "title": "A generated website buildout status tracker now turns this plan into a public /roadmap route and downloadable public/reports/site-status.md / site-status.json through pnpm status:generate.",
      "category": "Content"
    },
    {
      "id": "finished-37",
      "title": "The public /roadmap route now includes a Launch Control board that separates ready-now site work, Sanjay-input blockers, and the half-hour automation lane, and Command-K indexes it at /roadmap#launch-control.",
      "category": "Platform"
    },
    {
      "id": "finished-38",
      "title": "A generated research evidence library now tracks source trails, operator implications, and planned visual modules for benchmark design, mechanistic interpretability, model economics, and open-weight inference through pnpm research:generate.",
      "category": "Research"
    },
    {
      "id": "finished-39",
      "title": "The research evidence library now includes a bibliography atlas with publication dates, source types, method notes, buyer questions, evidence-strength rows, and chart-ready tables surfaced at /articles#research-bibliography-atlas.",
      "category": "Research"
    },
    {
      "id": "finished-40",
      "title": "The research evidence library now includes a claim-review matrix surfaced at /articles#research-claim-review and indexed in Command-K, with evidence strength, weakest links, proof gates, and infographic panels for each major research lane.",
      "category": "Research"
    },
    {
      "id": "finished-41",
      "title": "The research evidence library now includes source-reading packets surfaced at /articles#research-source-packets and indexed in Command-K, with excerpt-level notes, buyer implications, cautions, freshness checks, and visual encodings for every bibliography row.",
      "category": "Research"
    },
    {
      "id": "finished-42",
      "title": "The research evidence library now expands benchmark-design coverage with official OSWorld, WebArena, Terminal-Bench, and tau-bench packets, bringing source-reading coverage across coding, browsing, desktop, terminal, and tool-use agents.",
      "category": "Research"
    },
    {
      "id": "finished-43",
      "title": "The research evidence library now has 25 source-reading packets, adding LiveBench, HELM, LiveCodeBench, NIST CAISI DeepSeek evaluation, and MLCommons AI Safety source rows, plus a Command-K indexed /articles#research-proof-gaps matrix that labels current evidence, missing proof, next artifacts, publish rules, and visuals to build for each research lane.",
      "category": "Research"
    },
    {
      "id": "finished-44",
      "title": "A generated agent benchmark literature map now covers SWE-bench, Terminal-Bench, BrowserGym, WebArena, OSWorld, and tau-bench through pnpm agents:generate, with JSON/Markdown exports and an Articles-page comparison surface.",
      "category": "Research"
    },
    {
      "id": "finished-45",
      "title": "The agent benchmark generator now also creates an Agentic Reliability Index formula from current coding, browser, and support traces, with component weights, model rows, downloadable reports, and /leaderboards#agentic-reliability-formula surface.",
      "category": "Benchmarks"
    },
    {
      "id": "finished-46",
      "title": "A generated mechanistic interpretability playbook now expands the audit-lab article into five explainer modules through pnpm mechanistic:generate: sparse autoencoders, activation patching, feature dashboards, circuit tracing, and audit limitations.",
      "category": "Research"
    },
    {
      "id": "finished-47",
      "title": "The mechanistic interpretability playbook now generates separate long-form explainer pages at /articles/mechanistic/[module], plus Markdown/JSON packets under public/reports/mechanistic-series/ for all five modules.",
      "category": "Research"
    },
    {
      "id": "finished-48",
      "title": "The three original dated research drafts now include source-backed research snapshot cards that translate benchmark, interpretability, and cost research into operator decisions.",
      "category": "Research"
    },
    {
      "id": "finished-49",
      "title": "The AI leaderboard methodology article now includes a leaderboard control room with freshness, holdout, repeat-run, outcome-rubric, provenance, and stress-variant controls, plus buyer-facing control cards for split design, run packets, retirement triggers, and risk memos.",
      "category": "Research"
    },
    {
      "id": "finished-50",
      "title": "The mechanistic interpretability article now includes an operator audit lab with behavior/attribution/feature/circuit/control layers, causal-test cards, and source-backed guidance on using SAEs, patching, ablation, and circuit tracing in enterprise audits.",
      "category": "Research"
    },
    {
      "id": "finished-51",
      "title": "The cost-curves article now includes a provider economics cockpit with official-pricing signals from OpenAI, Anthropic, Google, and xAI, plus batch/cache/quality-adjusted-cost levers for workflow modeling.",
      "category": "Research"
    },
    {
      "id": "finished-52",
      "title": "The three flagship research articles now include a reusable research-map infographic: benchmark evidence stack, mechanistic audit evidence map, and cost curve ledger map.",
      "category": "Research"
    },
    {
      "id": "finished-53",
      "title": "A pricing refresh script now fetches official provider pricing pages into edxperimental-labs/data/provider-pricing-snapshot.json, and the cost-curves article renders the source-linked pricing refresh ledger.",
      "category": "Research"
    },
    {
      "id": "finished-54",
      "title": "Consulting page includes services, engagement packages, sprint timeline, buyer-prep checklist, owner routing, case studies, and team block for Sanjay Prasad and Saujas.",
      "category": "Consulting"
    },
    {
      "id": "finished-55",
      "title": "Consulting page now includes a generated client intake packet with downloadable benchmark intake template, sprint proposal template, and structured JSON schema from pnpm consulting:generate.",
      "category": "Benchmarks"
    },
    {
      "id": "finished-56",
      "title": "Consulting page now includes a generated operating plan from pnpm consulting:generate, with sales-engineering handoff, engagement readiness gates, delivery artifacts, owner routing, app data at data/consulting-collateral.json, downloadable consulting-operating-plan.md, and Command-K indexed /consulting#consulting-operating-plan surface.",
      "category": "Consulting"
    },
    {
      "id": "finished-57",
      "title": "Consulting page now includes a first-party benchmark brief form backed by /api/consulting-intake, with validation, bot-field filtering, owner routing, success/error UI, and gitignored NDJSON storage for local development.",
      "category": "Benchmarks"
    },
    {
      "id": "finished-58",
      "title": "Contact page uses sanjay@edxperimentallabs.com and saujas@edxperimentallabs.com.",
      "category": "Company"
    },
    {
      "id": "finished-59",
      "title": "Careers page now includes role scorecards, first-month expectations, operating loops, application artifacts, and an evaluation rubric for early AI research analyst, benchmark engineer, and sales engineer candidates.",
      "category": "Research"
    },
    {
      "id": "finished-60",
      "title": "Careers page now includes a generated candidate application packet with downloadable application instructions, work-sample rubric, structured JSON schema, and manifest under edxperimental-labs/public/reports/careers/.",
      "category": "Company"
    },
    {
      "id": "finished-61",
      "title": "Careers page now includes a generated operating plan from pnpm careers:generate, with hiring pipeline, first-90-days plan, reviewer questions, owner routing, app data at data/careers-packets.json, downloadable careers-operating-plan.md, and Command-K indexed /careers#careers-operating-plan surface.",
      "category": "Company"
    },
    {
      "id": "finished-62",
      "title": "Careers page now includes a first-party application packet form backed by /api/careers-application, with validation, bot-field filtering, role-based owner routing, success/error UI, and gitignored NDJSON storage for local development.",
      "category": "Company"
    },
    {
      "id": "finished-63",
      "title": "Terms page exists and is linked from the footer.",
      "category": "Company"
    },
    {
      "id": "finished-64",
      "title": "Footer newsletter form now posts to a first-party /api/newsletter capture route with validation, bot-field filtering, success/error UI, and gitignored NDJSON storage for local development.",
      "category": "Company"
    },
    {
      "id": "finished-65",
      "title": "Command-K search covers pages, Studio products, benchmark suites, leaderboard tracks, consulting services, case studies, and articles.",
      "category": "Studio"
    },
    {
      "id": "finished-66",
      "title": "Webpack local dev flow is documented in edxperimental-labs/AGENTS.md.",
      "category": "Platform"
    },
    {
      "id": "finished-67",
      "title": "Bounded Playwright verifier exists at pnpm verify:site.",
      "category": "Platform"
    },
    {
      "id": "finished-68",
      "title": "A reproducible benchmark data scaffold exists: - Run pnpm benchmarks:generate from edxperimental-labs/. - It writes data/benchmark-runs.json. - Leaderboards and Benchmark Lab read generated benchmark results through lib/benchmark-results.ts.",
      "category": "Benchmarks"
    },
    {
      "id": "finished-69",
      "title": "Benchmark suites now have detail pages at /benchmarks/[slug] with task mix, model rows, latency/retry fields, scoring rubrics, run provenance, and next-data-step guidance.",
      "category": "Benchmarks"
    },
    {
      "id": "finished-70",
      "title": "Benchmark data generation now includes eight inspectable task trace packets across the four benchmark suites, with task brief, expected evidence, scoring focus, model outcomes, cost proxy, latency, and reviewer notes.",
      "category": "Benchmarks"
    },
    {
      "id": "finished-71",
      "title": "Benchmark trace pages now exist at /benchmarks/[slug]/tasks/[taskId], and Command-K search indexes those trace pages.",
      "category": "Benchmarks"
    },
    {
      "id": "finished-72",
      "title": "Task-level benchmark traces are now ingested from edxperimental-labs/data/benchmark-trace-input.json and validated by pnpm benchmarks:generate, so future real traces can be swapped into data rather than edited into the generator.",
      "category": "Benchmarks"
    },
    {
      "id": "finished-73",
      "title": "Benchmark trace pages now show expected output, score breakdown, answer excerpt, failure reason, and tool-call chips for each model-class run.",
      "category": "Benchmarks"
    },
    {
      "id": "finished-74",
      "title": "Benchmark generation now attaches leaderboard control metadata to every suite: public/private split, freshness policy, leakage policy, repeat-run rule, retirement rule, and required provenance fields.",
      "category": "Benchmarks"
    },
    {
      "id": "finished-75",
      "title": "Benchmark trace pages now show trace provenance and score calculation ledgers, including trace id, creation/review dates, source, leakage risk, retirement status, model version, run seed, prompt packet, and weighted earned/max score components.",
      "category": "Benchmarks"
    },
    {
      "id": "finished-76",
      "title": "Benchmark generation now also ingests CSV run rows from edxperimental-labs/data/benchmark-trace-runs.csv, merging them with JSON task packets to produce 16 inspectable trace pages, four per benchmark suite.",
      "category": "Benchmarks"
    },
    {
      "id": "finished-77",
      "title": "Benchmark generation now writes replay-scaffold artifact bundles for every task trace under edxperimental-labs/public/reports/benchmark-artifacts/, including input-payload.json, run-log.json, and scorecard.json; trace pages link those files and show the intended replay command and screenshot placeholder.",
      "category": "Benchmarks"
    },
    {
      "id": "finished-78",
      "title": "The trace replay command shown on benchmark pages now exists: pnpm benchmarks:replay --suite <suite-slug> --task <task-id> validates each artifact bundle against data/benchmark-runs.json and can emit JSON for future harness automation.",
      "category": "Benchmarks"
    },
    {
      "id": "finished-79",
      "title": "Benchmark generation now also writes a Benchmark Evidence Readiness ledger to data/benchmark-evidence-readiness.json and public/reports/benchmark-evidence-readiness/readiness.{md,json}, surfaced at /benchmarks#benchmark-evidence-readiness and indexed in Command-K, to separate current scaffold traces from real leaderboard-ready evidence gates.",
      "category": "Benchmarks"
    },
    {
      "id": "finished-80",
      "title": "Benchmark buyer reports now exist at /reports/benchmarks/[slug]; downloadable markdown and PDF briefs are generated into edxperimental-labs/public/reports/benchmark-briefs/ via pnpm reports:generate.",
      "category": "Benchmarks"
    },
    {
      "id": "finished-81",
      "title": "Benchmark buyer reports now also generate saved .print.html templates plus public/reports/benchmark-briefs/manifest.json; report pages link to the exact designed HTML template used to render each downloadable PDF.",
      "category": "Benchmarks"
    },
    {
      "id": "finished-82",
      "title": "Indian Enterprise Workflow Benchmark now has a generated v0.1 dataset design from pnpm workflows:generate, with 30 seed tasks, an exact 18 public / 12 holdout split, domain coverage, and JSON/Markdown/CSV exports under edxperimental-labs/public/reports/indian-workflow-dataset/.",
      "category": "Benchmarks"
    },
    {
      "id": "finished-83",
      "title": "Indian Workflow Benchmark now generates redacted source packets, gold answers, reviewer notes, scoring checklists, and harness import rows for all 30 seed tasks under public/reports/indian-workflow-dataset/gold-packets/, surfaced at /benchmarks#indian-workflow-gold-packets.",
      "category": "Benchmarks"
    },
    {
      "id": "finished-84",
      "title": "Benchmark Lab now includes a generated real-run intake kit from pnpm benchmarks:intake, with runbook, CSV template, JSON schema, reviewer checklist, and public /benchmarks#benchmark-intake-kit section for importing future provider, notebook, browser-agent, and coding-agent outputs.",
      "category": "Benchmarks"
    },
    {
      "id": "finished-85",
      "title": "Benchmark Lab now includes a generated benchmark harness kit from pnpm benchmarks:harness, with provider API, browser-agent, coding-agent, and support-agent adapter contracts, sample NDJSON rows, quality gates, public artifacts, and a Command-K indexed /benchmarks#benchmark-harness-kit section.",
      "category": "Benchmarks"
    },
    {
      "id": "finished-86",
      "title": "Benchmark Lab now includes a first-party benchmark run packet form backed by /api/benchmark-run-intake, with validation, bot-field filtering, review-state labeling, Command-K indexing, and gitignored NDJSON storage for future provider, notebook, browser-agent, and coding-agent exports.",
      "category": "Benchmarks"
    },
    {
      "id": "finished-87",
      "title": "The research evidence library now includes an Evaluation Operations & AI Security lane from pnpm research:generate, expanding to 31 source-reading packets and adding a Command-K indexed /articles#evaluation-operations-control-deck section with runnable-manifest, threat-model, AI-security, and release-gate controls.",
      "category": "Research"
    },
    {
      "id": "finished-88",
      "title": "Benchmark Lab now includes an AI Security & Risk Suite from pnpm benchmarks:generate, with prompt-injection, tool-permission, sensitive-data, and AI-risk-incident trace pages, generated artifact bundles, evidence-readiness coverage, intake-form support, buyer brief PDFs/print templates, and Agentic Reliability Index inclusion.",
      "category": "Benchmarks"
    },
    {
      "id": "finished-89",
      "title": "Consulting now includes a generated service catalog from pnpm consulting:generate, with five buyer-facing services, owners, starting inputs, delivery artifacts, readiness scores, next actions, downloadable service-catalog markdown/JSON, and Command-K indexed /consulting#consulting-service-catalog surface.",
      "category": "Consulting"
    },
    {
      "id": "finished-90",
      "title": "Careers now includes a generated role catalog from pnpm careers:generate, with four artifact-led roles, owners, missions, first artifacts, proof signals, readiness scores, trial projects, downloadable role-catalog markdown/JSON, and Command-K indexed /careers#careers-role-catalog surface.",
      "category": "Company"
    },
    {
      "id": "finished-91",
      "title": "Case Studies now include a generated demo-readiness conversion board from pnpm case-studies:generate, mapping every case study to a Studio surface, benchmark trace, owner, demo controls, missing evidence gates, downloadable markdown/JSON, and Command-K indexed /case-studies#case-study-demo-readiness surface.",
      "category": "Studio"
    },
    {
      "id": "finished-92",
      "title": "Pricing refresh now emits parser diagnostics from pnpm pricing:refresh, including retrieval method, parsed required fields, source-linked fallback fields, confidence labels, review triggers, downloadable diagnostics markdown/JSON, and a Command-K indexed parser-health surface inside the cost-curves article.",
      "category": "Research"
    }
  ],
  "next": [
    {
      "id": "next-01",
      "title": "Feed real provider, notebook, browser-agent, and coding-agent outputs through /api/benchmark-run-intake or the CSV template, then replace prototype benchmark scores and task trace packets with reviewer-signed run rows.",
      "category": "Benchmarks",
      "action": "Replace",
      "owner": "Sanjay",
      "dependency": "Real benchmark exports",
      "readiness": "Waiting on input"
    },
    {
      "id": "next-02",
      "title": "Connect the first-party /api/newsletter capture route to the chosen mailing-list provider or CRM once Sanjay picks the provider.",
      "category": "Company",
      "action": "Next",
      "owner": "Saujas with Sanjay",
      "dependency": "Provider/process decision",
      "readiness": "Waiting on input"
    },
    {
      "id": "next-03",
      "title": "Connect /api/consulting-intake and the generated consulting collateral to signed proposal templates plus the final CRM once the sales process is finalized.",
      "category": "Consulting",
      "action": "Next",
      "owner": "Saujas with Sanjay",
      "dependency": "Provider/process decision",
      "readiness": "Waiting on input"
    },
    {
      "id": "next-04",
      "title": "Connect /api/careers-application and generated careers collateral to the final hiring inbox, CRM, or applicant tracker once the first candidate process is ready.",
      "category": "Company",
      "action": "Next",
      "owner": "Saujas with Sanjay",
      "dependency": "Provider/process decision",
      "readiness": "Waiting on input"
    },
    {
      "id": "next-05",
      "title": "Add social links once Sanjay shares them, replacing the current contact placeholders.",
      "category": "Company",
      "action": "Extend",
      "owner": "Sanjay",
      "dependency": "Official social URLs",
      "readiness": "Waiting on input"
    },
    {
      "id": "next-06",
      "title": "Replace captured Studio preview screenshots with product walkthrough videos and client-approved demo media once live demos mature.",
      "category": "Studio",
      "action": "Replace",
      "owner": "Sanjay",
      "dependency": "Client approval",
      "readiness": "Waiting on input"
    },
    {
      "id": "next-07",
      "title": "Replace generated Studio packets with richer live-demo media, screenshots, and client-approved examples as productized demos mature.",
      "category": "Studio",
      "action": "Replace",
      "owner": "Sanjay",
      "dependency": "Client approval",
      "readiness": "Waiting on input"
    },
    {
      "id": "next-08",
      "title": "Wire the case-study demo-readiness rows into real product controls, sanitized walkthrough videos, and client-approved proof once those assets are available.",
      "category": "Consulting",
      "action": "Wire",
      "owner": "Saujas with Sanjay",
      "dependency": "Client approval",
      "readiness": "Waiting on input"
    },
    {
      "id": "next-09",
      "title": "Replace the synthetic scripts/generate-benchmark-results.mjs seed rows with real model/provider run outputs.",
      "category": "Benchmarks",
      "action": "Replace",
      "owner": "Sanjay",
      "dependency": "Internal buildout",
      "readiness": "Can improve now"
    },
    {
      "id": "next-10",
      "title": "Point the current /api/benchmark-run-intake, CSV/JSON trace importer, and replay-scaffold artifact files at real harness/notebook exports once the first real benchmark runs are available.",
      "category": "Benchmarks",
      "action": "Wire",
      "owner": "Sanjay",
      "dependency": "Real benchmark exports",
      "readiness": "Waiting on input"
    },
    {
      "id": "next-11",
      "title": "Replace synthetic screenshot placeholders in benchmark artifact bundles with real browser/app screenshots and provider response ids from benchmark harness runs.",
      "category": "Benchmarks",
      "action": "Replace",
      "owner": "Sanjay",
      "dependency": "Internal buildout",
      "readiness": "Can improve now"
    },
    {
      "id": "next-12",
      "title": "Replace provisional case-study metrics with Sanjay's final client-approved numbers and screenshots when available.",
      "category": "Consulting",
      "action": "Replace",
      "owner": "Saujas with Sanjay",
      "dependency": "Client approval",
      "readiness": "Waiting on input"
    },
    {
      "id": "next-13",
      "title": "Replace generated case-study evidence packets with final client-approved screenshots, raw artifacts, and signed-off metrics when Sanjay provides them.",
      "category": "Consulting",
      "action": "Replace",
      "owner": "Saujas with Sanjay",
      "dependency": "Client approval",
      "readiness": "Waiting on input"
    }
  ],
  "researchBacklog": [
    {
      "id": "research-01",
      "title": "Extend the new open-weight inference economics article with measured latency/quality traces from actual Mistral, DeepSeek, Qwen, and hosted inference runs once API keys and benchmark harness outputs are available.",
      "category": "Research",
      "action": "Extend",
      "owner": "Sanjay",
      "dependency": "Real benchmark exports",
      "readiness": "Waiting on input"
    },
    {
      "id": "research-02",
      "title": "Keep expanding data/research-evidence-library.json beyond the current 31 generated reading packets, and add carefully selected short verbatim excerpts only where publication needs exact wording.",
      "category": "Research",
      "action": "Extend",
      "owner": "Sanjay",
      "dependency": "Internal buildout",
      "readiness": "Can improve now"
    },
    {
      "id": "research-03",
      "title": "Keep monitoring pricing refresh parser drift as provider pages change, and update provider-specific selectors when diagnostics show low confidence or missing expected model labels.",
      "category": "Research",
      "action": "Next",
      "owner": "Sanjay",
      "dependency": "Provider page drift",
      "readiness": "Can improve now"
    },
    {
      "id": "research-04",
      "title": "Replace the generated Indian workflow v0.1 dataset design with redacted source packets, gold answers, reviewer notes, and real model/provider run exports.",
      "category": "Benchmarks",
      "action": "Replace",
      "owner": "Sanjay",
      "dependency": "Internal buildout",
      "readiness": "Can improve now"
    },
    {
      "id": "research-05",
      "title": "Replace generated benchmark-control metadata with real harness metadata once actual runs exist: raw prompts, exact model/provider identifiers, scorer identity, trace artifacts, and run replay links.",
      "category": "Benchmarks",
      "action": "Replace",
      "owner": "Sanjay",
      "dependency": "Real benchmark exports",
      "readiness": "Waiting on input"
    },
    {
      "id": "research-06",
      "title": "Extend the generated agent benchmark literature map with measured Edxperimental benchmark traces and a combined Agentic Reliability Index formula once real model/agent runs exist.",
      "category": "Research",
      "action": "Extend",
      "owner": "Sanjay",
      "dependency": "Internal buildout",
      "readiness": "Can improve now"
    },
    {
      "id": "research-07",
      "title": "Turn the generated mechanistic interpretability playbook modules into separate long-form article pages if Sanjay wants a full explainer series.",
      "category": "Research",
      "action": "Next",
      "owner": "Sanjay",
      "dependency": "Editorial decision",
      "readiness": "Waiting on input"
    },
    {
      "id": "research-08",
      "title": "Extend the inference economics playbook with measured latency and throughput traces once real provider/GPU benchmark runs exist.",
      "category": "Research",
      "action": "Extend",
      "owner": "Sanjay",
      "dependency": "Real benchmark exports",
      "readiness": "Waiting on input"
    }
  ]
}