{
  "version": "0.1.0",
  "generatedAt": "2026-05-16T00:00:00+05:30",
  "canonicalPath": "/case-studies/on-device-classification",
  "markdownPath": "/reports/case-study-packets/on-device-classification.md",
  "jsonPath": "/reports/case-study-packets/on-device-classification.json",
  "slug": "on-device-classification",
  "title": "On-device AI Classification System",
  "client": "US-based stealth startup",
  "summary": "A hybrid classification architecture combining lightweight on-device inference with server-side LLM routing for a large category taxonomy.",
  "outcome": "Designed for low-latency local decisions with deeper cloud classification when confidence drops.",
  "image": "/case-studies/event4.png",
  "challenge": "The product needed to classify inputs across a very large taxonomy while preserving fast local behavior and avoiding unnecessary cloud calls.",
  "stats": [
    "8000+ categories",
    "Hybrid inference",
    "DSPy optimization",
    "Prompt evaluation loop"
  ],
  "stack": [
    "Small LLM inference",
    "Server LLM classifier",
    "DSPy",
    "Text-gradient style prompt iteration"
  ],
  "approach": [
    "Split classification into local confidence checks and deeper server-side routing for ambiguous cases.",
    "Designed prompt/eval loops around category confusion, not only aggregate accuracy.",
    "Used lightweight local inference where latency mattered and server LLMs where context depth mattered.",
    "Prepared a measurement plan for confidence thresholds, fallback rates, and taxonomy drift."
  ],
  "evidence": [
    [
      "Taxonomy scale",
      "8000+ categories"
    ],
    [
      "Architecture",
      "Hybrid local/cloud"
    ],
    [
      "Optimization loop",
      "DSPy-style"
    ],
    [
      "Primary risk controlled",
      "Low-confidence routing"
    ]
  ],
  "timeline": [
    "Taxonomy analysis",
    "Local classifier prototype",
    "Server LLM fallback",
    "Prompt optimization",
    "Evaluation plan"
  ],
  "risks": [
    "Category drift",
    "False confidence",
    "Server fallback cost",
    "Ambiguous labels"
  ],
  "nextEvidenceStep": "Replace provisional metrics with client-approved screenshots, raw artifacts, and final numbers when available."
}