{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "title": "Edxperimental Labs Inference Trace Row",
  "type": "object",
  "additionalProperties": false,
  "required": [
    "route",
    "provider",
    "model",
    "workload",
    "inputTokens",
    "outputTokens",
    "cacheHitRate",
    "batchEligible",
    "concurrency",
    "ttftMs",
    "latencyP50Ms",
    "latencyP95Ms",
    "outputTokensPerSecond",
    "acceptedOutputRate",
    "costUsd",
    "reviewMinutes"
  ],
  "properties": {
    "route": {
      "type": "string",
      "description": "Managed frontier API, hosted open-weight serverless, dedicated endpoint, or self-hosted GPU stack."
    },
    "provider": {
      "type": "string",
      "description": "Provider, deployment owner, or local stack name."
    },
    "model": {
      "type": "string",
      "description": "Exact model identifier, quantization, or endpoint alias."
    },
    "workload": {
      "type": "string",
      "description": "Task family such as support policy, browser extraction, coding agent, or invoice review."
    },
    "inputTokens": {
      "type": "string",
      "description": "Observed input tokens for the run."
    },
    "outputTokens": {
      "type": "string",
      "description": "Observed output tokens for the run."
    },
    "cacheHitRate": {
      "type": "string",
      "description": "Observed cache hit rate from 0 to 1, or 0 if unavailable."
    },
    "batchEligible": {
      "type": "string",
      "description": "true if the workload can run through a batch/offline lane."
    },
    "concurrency": {
      "type": "string",
      "description": "Concurrent request count during the measurement."
    },
    "ttftMs": {
      "type": "string",
      "description": "Time to first token in milliseconds."
    },
    "latencyP50Ms": {
      "type": "string",
      "description": "p50 end-to-end latency in milliseconds."
    },
    "latencyP95Ms": {
      "type": "string",
      "description": "p95 end-to-end latency in milliseconds."
    },
    "outputTokensPerSecond": {
      "type": "string",
      "description": "Sustained output-token generation rate."
    },
    "acceptedOutputRate": {
      "type": "string",
      "description": "Share of outputs accepted by the evaluator or reviewer."
    },
    "costUsd": {
      "type": "string",
      "description": "Observed or modeled cost for this measurement row."
    },
    "reviewMinutes": {
      "type": "string",
      "description": "Human review minutes per output after model generation."
    }
  }
}
