{
  "slug": "feature-dashboards",
  "title": "Feature Dashboards",
  "role": "Make recurring internal features inspectable by operators, reviewers, and deployment owners.",
  "operatorQuestion": "Can a reviewer see when policy, shortcut, refusal, sensitive-domain, or evidence-use features activate?",
  "evidenceToCollect": [
    "Top examples",
    "False positives",
    "False negatives",
    "Activation thresholds",
    "Workflow slices"
  ],
  "usefulOutput": "Monitoring surface for high-risk workflows, paired with eval metrics and human review notes.",
  "limits": "A dashboard can create false confidence if it shows labels without causal evidence or off-distribution checks.",
  "sourceTrail": [
    [
      "Mapping the Mind of a Large Language Model",
      "https://www.anthropic.com/research/mapping-mind-language-model"
    ],
    [
      "Sparse Autoencoders Find Highly Interpretable Features",
      "https://huggingface.co/papers/2309.08600"
    ],
    [
      "Sparse Autoencoder portal",
      "https://www.sparseautoencoder.com/"
    ]
  ],
  "readingTime": "8 min read",
  "explainerPath": "/articles/mechanistic/feature-dashboards",
  "reportPath": "/reports/mechanistic-series/feature-dashboards.md",
  "jsonPath": "/reports/mechanistic-series/feature-dashboards.json",
  "summary": "Feature Dashboards matters when a deployment owner needs more than a pass/fail eval. It gives the audit team a way to collect internal evidence, test a causal hypothesis, and state the limits before changing a production control.",
  "fieldGuide": [
    {
      "heading": "What the method is for",
      "body": "Make recurring internal features inspectable by operators, reviewers, and deployment owners."
    },
    {
      "heading": "The operator question",
      "body": "Can a reviewer see when policy, shortcut, refusal, sensitive-domain, or evidence-use features activate?"
    },
    {
      "heading": "What the audit should produce",
      "body": "Monitoring surface for high-risk workflows, paired with eval metrics and human review notes."
    },
    {
      "heading": "Where the method fails",
      "body": "A dashboard can create false confidence if it shows labels without causal evidence or off-distribution checks."
    }
  ],
  "visualRows": [
    {
      "label": "Top examples",
      "value": 100,
      "note": "Must be collected before the interpretation is trusted."
    },
    {
      "label": "False positives",
      "value": 87,
      "note": "Use as supporting evidence and keep unresolved ambiguity visible."
    },
    {
      "label": "False negatives",
      "value": 74,
      "note": "Use as supporting evidence and keep unresolved ambiguity visible."
    },
    {
      "label": "Activation thresholds",
      "value": 61,
      "note": "Use as supporting evidence and keep unresolved ambiguity visible."
    },
    {
      "label": "Workflow slices",
      "value": 48,
      "note": "Use as supporting evidence and keep unresolved ambiguity visible."
    }
  ],
  "auditTemplate": [
    [
      "Behavior under review",
      "One narrow failure, policy behavior, shortcut, or refusal pattern."
    ],
    [
      "Candidate mechanism",
      "Can a reviewer see when policy, shortcut, refusal, sensitive-domain, or evidence-use features activate?"
    ],
    [
      "Evidence packet",
      "Top examples; False positives; False negatives; Activation thresholds; Workflow slices"
    ],
    [
      "Decision boundary",
      "What can change in production if the causal claim survives review."
    ],
    [
      "Limit memo",
      "A dashboard can create false confidence if it shows labels without causal evidence or off-distribution checks."
    ]
  ]
}