{
  "version": "0.1.0",
  "generatedAt": "2026-05-16T06:10:00+05:30",
  "purpose": "Indian Enterprise Workflow Benchmark v0.1 seed dataset design for public samples and private holdouts.",
  "taskCount": 30,
  "publicTaskCount": 18,
  "holdoutTaskCount": 12,
  "splitRatio": "60/40",
  "domains": [
    {
      "domain": "Finance",
      "count": 4,
      "public": 2,
      "holdout": 2
    },
    {
      "domain": "Support",
      "count": 4,
      "public": 2,
      "holdout": 2
    },
    {
      "domain": "Sales Ops",
      "count": 4,
      "public": 2,
      "holdout": 2
    },
    {
      "domain": "Legal",
      "count": 4,
      "public": 2,
      "holdout": 2
    },
    {
      "domain": "Procurement",
      "count": 4,
      "public": 3,
      "holdout": 1
    },
    {
      "domain": "HR Ops",
      "count": 4,
      "public": 3,
      "holdout": 1
    },
    {
      "domain": "Healthcare Admin",
      "count": 3,
      "public": 2,
      "holdout": 1
    },
    {
      "domain": "Field Operations",
      "count": 3,
      "public": 2,
      "holdout": 1
    }
  ],
  "tasks": [
    {
      "id": "gst-credit-note-reconciliation",
      "title": "Gst Credit Note Reconciliation",
      "domain": "Finance",
      "split": "public",
      "difficulty": "Medium",
      "brief": "Reconcile a GST credit note against an invoice, PO, and vendor email trail.",
      "expectedEvidence": [
        "invoice line",
        "credit note",
        "vendor email"
      ],
      "expectedOutput": "Cite the mismatch, calculate corrected payable, and draft a vendor note.",
      "scoringRubric": [
        {
          "dimension": "Outcome correctness",
          "weight": 35
        },
        {
          "dimension": "Evidence citation",
          "weight": 25
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20
        },
        {
          "dimension": "Localization and tone",
          "weight": 10
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10
        }
      ]
    },
    {
      "id": "tds-deduction-query",
      "title": "Tds Deduction Query",
      "domain": "Finance",
      "split": "public",
      "difficulty": "Medium-Hard",
      "brief": "Answer a vendor's TDS deduction query using policy, invoice, and payment ledger evidence.",
      "expectedEvidence": [
        "TDS policy",
        "payment ledger",
        "invoice total"
      ],
      "expectedOutput": "Explain deduction basis and identify whether finance escalation is needed.",
      "scoringRubric": [
        {
          "dimension": "Outcome correctness",
          "weight": 35
        },
        {
          "dimension": "Evidence citation",
          "weight": 25
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20
        },
        {
          "dimension": "Localization and tone",
          "weight": 10
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10
        }
      ]
    },
    {
      "id": "advance-payment-variance",
      "title": "Advance Payment Variance",
      "domain": "Finance",
      "split": "holdout",
      "difficulty": "Hard",
      "brief": "Compare advance payment terms against milestone completion notes and payment status.",
      "expectedEvidence": [
        "contract clause",
        "milestone note",
        "payment status"
      ],
      "expectedOutput": "Identify variance and recommend release, hold, or review.",
      "scoringRubric": [
        {
          "dimension": "Outcome correctness",
          "weight": 35
        },
        {
          "dimension": "Evidence citation",
          "weight": 25
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20
        },
        {
          "dimension": "Localization and tone",
          "weight": 10
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10
        }
      ]
    },
    {
      "id": "quarterly-budget-exception",
      "title": "Quarterly Budget Exception",
      "domain": "Finance",
      "split": "holdout",
      "difficulty": "Hard",
      "brief": "Assess whether a department budget exception should be approved from emails and approval rules.",
      "expectedEvidence": [
        "budget policy",
        "email approval",
        "expense category"
      ],
      "expectedOutput": "Classify approval path and cite the missing approval if any.",
      "scoringRubric": [
        {
          "dimension": "Outcome correctness",
          "weight": 35
        },
        {
          "dimension": "Evidence citation",
          "weight": 25
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20
        },
        {
          "dimension": "Localization and tone",
          "weight": 10
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10
        }
      ]
    },
    {
      "id": "hindi-english-refund-escalation",
      "title": "Hindi English Refund Escalation",
      "domain": "Support",
      "split": "holdout",
      "difficulty": "Hard",
      "brief": "Classify a mixed Hindi-English refund ticket and apply policy with delivery/payment evidence.",
      "expectedEvidence": [
        "refund policy",
        "delivery timestamp",
        "payment status"
      ],
      "expectedOutput": "Reply safely and escalate only the payment reconciliation issue.",
      "scoringRubric": [
        {
          "dimension": "Outcome correctness",
          "weight": 35
        },
        {
          "dimension": "Evidence citation",
          "weight": 25
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20
        },
        {
          "dimension": "Localization and tone",
          "weight": 10
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10
        }
      ]
    },
    {
      "id": "regional-language-complaint-routing",
      "title": "Regional Language Complaint Routing",
      "domain": "Support",
      "split": "public",
      "difficulty": "Medium-Hard",
      "brief": "Route a Kannada-English complaint to the correct queue using policy and order history.",
      "expectedEvidence": [
        "complaint text",
        "order history",
        "queue policy"
      ],
      "expectedOutput": "Classify intent, preserve tone, and request only missing evidence.",
      "scoringRubric": [
        {
          "dimension": "Outcome correctness",
          "weight": 35
        },
        {
          "dimension": "Evidence citation",
          "weight": 25
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20
        },
        {
          "dimension": "Localization and tone",
          "weight": 10
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10
        }
      ]
    },
    {
      "id": "warranty-exception-policy",
      "title": "Warranty Exception Policy",
      "domain": "Support",
      "split": "holdout",
      "difficulty": "Hard",
      "brief": "Decide if a warranty exception applies from product age, damage note, and support policy.",
      "expectedEvidence": [
        "warranty policy",
        "purchase date",
        "damage note"
      ],
      "expectedOutput": "Approve, deny, or escalate with a cited reason.",
      "scoringRubric": [
        {
          "dimension": "Outcome correctness",
          "weight": 35
        },
        {
          "dimension": "Evidence citation",
          "weight": 25
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20
        },
        {
          "dimension": "Localization and tone",
          "weight": 10
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10
        }
      ]
    },
    {
      "id": "upi-refund-status-response",
      "title": "Upi Refund Status Response",
      "domain": "Support",
      "split": "public",
      "difficulty": "Medium-Hard",
      "brief": "Draft a UPI refund status response using payment gateway status and customer complaint text.",
      "expectedEvidence": [
        "gateway status",
        "complaint",
        "SLA policy"
      ],
      "expectedOutput": "Explain next update window without overpromising refund completion.",
      "scoringRubric": [
        {
          "dimension": "Outcome correctness",
          "weight": 35
        },
        {
          "dimension": "Evidence citation",
          "weight": 25
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20
        },
        {
          "dimension": "Localization and tone",
          "weight": 10
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10
        }
      ]
    },
    {
      "id": "enterprise-demo-lead-prioritization",
      "title": "Enterprise Demo Lead Prioritization",
      "domain": "Sales Ops",
      "split": "public",
      "difficulty": "Medium-Hard",
      "brief": "Prioritize inbound demo leads using firmographic notes, ICP rules, and sales-owner capacity.",
      "expectedEvidence": [
        "lead form",
        "ICP rubric",
        "owner calendar"
      ],
      "expectedOutput": "Rank leads and route each to Sanjay, Saujas, nurture, or reject.",
      "scoringRubric": [
        {
          "dimension": "Outcome correctness",
          "weight": 35
        },
        {
          "dimension": "Evidence citation",
          "weight": 25
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20
        },
        {
          "dimension": "Localization and tone",
          "weight": 10
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10
        }
      ]
    },
    {
      "id": "rfp-requirement-gap-map",
      "title": "Rfp Requirement Gap Map",
      "domain": "Sales Ops",
      "split": "holdout",
      "difficulty": "Hard",
      "brief": "Map RFP requirements to product capabilities and flag unsupported AI-evaluation claims.",
      "expectedEvidence": [
        "RFP table",
        "capability matrix",
        "case-study note"
      ],
      "expectedOutput": "Create a gap table and identify claims that need evidence.",
      "scoringRubric": [
        {
          "dimension": "Outcome correctness",
          "weight": 35
        },
        {
          "dimension": "Evidence citation",
          "weight": 25
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20
        },
        {
          "dimension": "Localization and tone",
          "weight": 10
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10
        }
      ]
    },
    {
      "id": "pricing-proposal-risk-check",
      "title": "Pricing Proposal Risk Check",
      "domain": "Sales Ops",
      "split": "holdout",
      "difficulty": "Hard",
      "brief": "Review a proposal draft for pricing-risk language and missing benchmark assumptions.",
      "expectedEvidence": [
        "proposal draft",
        "pricing sheet",
        "benchmark assumption"
      ],
      "expectedOutput": "Return edits and unresolved questions for the sales engineer.",
      "scoringRubric": [
        {
          "dimension": "Outcome correctness",
          "weight": 35
        },
        {
          "dimension": "Evidence citation",
          "weight": 25
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20
        },
        {
          "dimension": "Localization and tone",
          "weight": 10
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10
        }
      ]
    },
    {
      "id": "renewal-churn-risk-summary",
      "title": "Renewal Churn Risk Summary",
      "domain": "Sales Ops",
      "split": "public",
      "difficulty": "Medium-Hard",
      "brief": "Summarize renewal churn risk from customer emails, usage notes, and support tickets.",
      "expectedEvidence": [
        "usage note",
        "support tickets",
        "renewal email"
      ],
      "expectedOutput": "Produce risk reasons and next action with evidence.",
      "scoringRubric": [
        {
          "dimension": "Outcome correctness",
          "weight": 35
        },
        {
          "dimension": "Evidence citation",
          "weight": 25
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20
        },
        {
          "dimension": "Localization and tone",
          "weight": 10
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10
        }
      ]
    },
    {
      "id": "vendor-contract-renewal-risk",
      "title": "Vendor Contract Renewal Risk",
      "domain": "Legal",
      "split": "holdout",
      "difficulty": "Hard",
      "brief": "Review a vendor renewal clause and flag auto-renewal, liability, and notice risks.",
      "expectedEvidence": [
        "contract clause",
        "notice date",
        "liability cap"
      ],
      "expectedOutput": "Draft a legal-ops risk memo with escalation recommendation.",
      "scoringRubric": [
        {
          "dimension": "Outcome correctness",
          "weight": 35
        },
        {
          "dimension": "Evidence citation",
          "weight": 25
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20
        },
        {
          "dimension": "Localization and tone",
          "weight": 10
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10
        }
      ]
    },
    {
      "id": "nda-exception-review",
      "title": "Nda Exception Review",
      "domain": "Legal",
      "split": "public",
      "difficulty": "Medium-Hard",
      "brief": "Compare an NDA exception clause against company template and sales context.",
      "expectedEvidence": [
        "NDA redline",
        "template clause",
        "sales note"
      ],
      "expectedOutput": "Identify acceptable, negotiable, and legal-review changes.",
      "scoringRubric": [
        {
          "dimension": "Outcome correctness",
          "weight": 35
        },
        {
          "dimension": "Evidence citation",
          "weight": 25
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20
        },
        {
          "dimension": "Localization and tone",
          "weight": 10
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10
        }
      ]
    },
    {
      "id": "data-processing-addendum-gap",
      "title": "Data Processing Addendum Gap",
      "domain": "Legal",
      "split": "public",
      "difficulty": "Medium-Hard",
      "brief": "Inspect a DPA checklist for missing subprocessors, data regions, and breach notice terms.",
      "expectedEvidence": [
        "DPA checklist",
        "vendor security page",
        "contract term"
      ],
      "expectedOutput": "Return a gap memo and owner routing.",
      "scoringRubric": [
        {
          "dimension": "Outcome correctness",
          "weight": 35
        },
        {
          "dimension": "Evidence citation",
          "weight": 25
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20
        },
        {
          "dimension": "Localization and tone",
          "weight": 10
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10
        }
      ]
    },
    {
      "id": "invoice-dispute-contract-evidence",
      "title": "Invoice Dispute Contract Evidence",
      "domain": "Legal",
      "split": "holdout",
      "difficulty": "Hard",
      "brief": "Resolve an invoice dispute by citing contract payment terms and delivery evidence.",
      "expectedEvidence": [
        "contract term",
        "invoice",
        "delivery acceptance"
      ],
      "expectedOutput": "Explain whether the dispute is valid and what to request next.",
      "scoringRubric": [
        {
          "dimension": "Outcome correctness",
          "weight": 35
        },
        {
          "dimension": "Evidence citation",
          "weight": 25
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20
        },
        {
          "dimension": "Localization and tone",
          "weight": 10
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10
        }
      ]
    },
    {
      "id": "procurement-vendor-shortlist",
      "title": "Procurement Vendor Shortlist",
      "domain": "Procurement",
      "split": "public",
      "difficulty": "Medium-Hard",
      "brief": "Create a vendor shortlist from pricing, security notes, and workflow-fit requirements.",
      "expectedEvidence": [
        "vendor pricing",
        "security note",
        "workflow rubric"
      ],
      "expectedOutput": "Rank vendors with risk and proof gaps.",
      "scoringRubric": [
        {
          "dimension": "Outcome correctness",
          "weight": 35
        },
        {
          "dimension": "Evidence citation",
          "weight": 25
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20
        },
        {
          "dimension": "Localization and tone",
          "weight": 10
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10
        }
      ]
    },
    {
      "id": "security-questionnaire-triage",
      "title": "Security Questionnaire Triage",
      "domain": "Procurement",
      "split": "public",
      "difficulty": "Medium-Hard",
      "brief": "Triage a vendor security questionnaire into answered, unknown, and legal-review fields.",
      "expectedEvidence": [
        "questionnaire",
        "security docs",
        "policy"
      ],
      "expectedOutput": "Produce a fill plan and missing evidence list.",
      "scoringRubric": [
        {
          "dimension": "Outcome correctness",
          "weight": 35
        },
        {
          "dimension": "Evidence citation",
          "weight": 25
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20
        },
        {
          "dimension": "Localization and tone",
          "weight": 10
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10
        }
      ]
    },
    {
      "id": "purchase-approval-routing",
      "title": "Purchase Approval Routing",
      "domain": "Procurement",
      "split": "holdout",
      "difficulty": "Hard",
      "brief": "Route a purchase approval based on amount, category, vendor status, and urgency.",
      "expectedEvidence": [
        "purchase request",
        "approval matrix",
        "vendor record"
      ],
      "expectedOutput": "Return approver path and blocked fields.",
      "scoringRubric": [
        {
          "dimension": "Outcome correctness",
          "weight": 35
        },
        {
          "dimension": "Evidence citation",
          "weight": 25
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20
        },
        {
          "dimension": "Localization and tone",
          "weight": 10
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10
        }
      ]
    },
    {
      "id": "supplier-performance-escalation",
      "title": "Supplier Performance Escalation",
      "domain": "Procurement",
      "split": "public",
      "difficulty": "Medium-Hard",
      "brief": "Assess supplier performance from SLA misses, email thread, and scorecard history.",
      "expectedEvidence": [
        "SLA log",
        "email thread",
        "scorecard"
      ],
      "expectedOutput": "Recommend continue, warning, or escalation with evidence.",
      "scoringRubric": [
        {
          "dimension": "Outcome correctness",
          "weight": 35
        },
        {
          "dimension": "Evidence citation",
          "weight": 25
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20
        },
        {
          "dimension": "Localization and tone",
          "weight": 10
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10
        }
      ]
    },
    {
      "id": "offer-letter-policy-check",
      "title": "Offer Letter Policy Check",
      "domain": "HR Ops",
      "split": "public",
      "difficulty": "Medium-Hard",
      "brief": "Check an offer letter draft against compensation band and benefits policy.",
      "expectedEvidence": [
        "offer draft",
        "comp band",
        "benefits policy"
      ],
      "expectedOutput": "Flag mismatches and draft HR-safe corrections.",
      "scoringRubric": [
        {
          "dimension": "Outcome correctness",
          "weight": 35
        },
        {
          "dimension": "Evidence citation",
          "weight": 25
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20
        },
        {
          "dimension": "Localization and tone",
          "weight": 10
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10
        }
      ]
    },
    {
      "id": "employee-helpdesk-leave-query",
      "title": "Employee Helpdesk Leave Query",
      "domain": "HR Ops",
      "split": "public",
      "difficulty": "Medium",
      "brief": "Answer an employee leave query using policy, attendance data, and manager note.",
      "expectedEvidence": [
        "leave policy",
        "attendance record",
        "manager note"
      ],
      "expectedOutput": "Explain entitlement and escalation path.",
      "scoringRubric": [
        {
          "dimension": "Outcome correctness",
          "weight": 35
        },
        {
          "dimension": "Evidence citation",
          "weight": 25
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20
        },
        {
          "dimension": "Localization and tone",
          "weight": 10
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10
        }
      ]
    },
    {
      "id": "background-verification-exception",
      "title": "Background Verification Exception",
      "domain": "HR Ops",
      "split": "holdout",
      "difficulty": "Hard",
      "brief": "Decide how to handle a background verification exception from vendor report and HR policy.",
      "expectedEvidence": [
        "vendor report",
        "HR policy",
        "candidate note"
      ],
      "expectedOutput": "Route to HR review and identify missing evidence.",
      "scoringRubric": [
        {
          "dimension": "Outcome correctness",
          "weight": 35
        },
        {
          "dimension": "Evidence citation",
          "weight": 25
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20
        },
        {
          "dimension": "Localization and tone",
          "weight": 10
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10
        }
      ]
    },
    {
      "id": "payroll-discrepancy-diagnosis",
      "title": "Payroll Discrepancy Diagnosis",
      "domain": "HR Ops",
      "split": "public",
      "difficulty": "Medium-Hard",
      "brief": "Diagnose payroll discrepancy from payslip, attendance, reimbursement, and policy evidence.",
      "expectedEvidence": [
        "payslip",
        "attendance",
        "reimbursement record"
      ],
      "expectedOutput": "Identify discrepancy reason and next correction step.",
      "scoringRubric": [
        {
          "dimension": "Outcome correctness",
          "weight": 35
        },
        {
          "dimension": "Evidence citation",
          "weight": 25
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20
        },
        {
          "dimension": "Localization and tone",
          "weight": 10
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10
        }
      ]
    },
    {
      "id": "insurance-claim-document-check",
      "title": "Insurance Claim Document Check",
      "domain": "Healthcare Admin",
      "split": "public",
      "difficulty": "Medium",
      "brief": "Check whether a patient insurance claim packet has the documents required for submission.",
      "expectedEvidence": [
        "claim form",
        "discharge summary",
        "insurer checklist"
      ],
      "expectedOutput": "Return submit/hold decision and missing documents.",
      "scoringRubric": [
        {
          "dimension": "Outcome correctness",
          "weight": 35
        },
        {
          "dimension": "Evidence citation",
          "weight": 25
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20
        },
        {
          "dimension": "Localization and tone",
          "weight": 10
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10
        }
      ]
    },
    {
      "id": "appointment-escalation-triage",
      "title": "Appointment Escalation Triage",
      "domain": "Healthcare Admin",
      "split": "holdout",
      "difficulty": "Hard",
      "brief": "Triage appointment escalation messages by urgency, specialty, and patient risk signals.",
      "expectedEvidence": [
        "patient message",
        "triage policy",
        "appointment record"
      ],
      "expectedOutput": "Route to correct queue without giving medical advice.",
      "scoringRubric": [
        {
          "dimension": "Outcome correctness",
          "weight": 35
        },
        {
          "dimension": "Evidence citation",
          "weight": 25
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20
        },
        {
          "dimension": "Localization and tone",
          "weight": 10
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10
        }
      ]
    },
    {
      "id": "lab-report-delivery-query",
      "title": "Lab Report Delivery Query",
      "domain": "Healthcare Admin",
      "split": "public",
      "difficulty": "Medium-Hard",
      "brief": "Respond to a lab-report delivery query using SLA, payment status, and delivery channel logs.",
      "expectedEvidence": [
        "SLA",
        "payment status",
        "delivery log"
      ],
      "expectedOutput": "Draft safe operations response with next update window.",
      "scoringRubric": [
        {
          "dimension": "Outcome correctness",
          "weight": 35
        },
        {
          "dimension": "Evidence citation",
          "weight": 25
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20
        },
        {
          "dimension": "Localization and tone",
          "weight": 10
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10
        }
      ]
    },
    {
      "id": "field-agent-visit-proof",
      "title": "Field Agent Visit Proof",
      "domain": "Field Operations",
      "split": "public",
      "difficulty": "Medium",
      "brief": "Verify a field visit claim from GPS note, photo metadata, and customer confirmation.",
      "expectedEvidence": [
        "GPS note",
        "photo metadata",
        "customer confirmation"
      ],
      "expectedOutput": "Approve, reject, or request review with evidence.",
      "scoringRubric": [
        {
          "dimension": "Outcome correctness",
          "weight": 35
        },
        {
          "dimension": "Evidence citation",
          "weight": 25
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20
        },
        {
          "dimension": "Localization and tone",
          "weight": 10
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10
        }
      ]
    },
    {
      "id": "service-outage-customer-brief",
      "title": "Service Outage Customer Brief",
      "domain": "Field Operations",
      "split": "public",
      "difficulty": "Medium-Hard",
      "brief": "Prepare a customer brief for a service outage using incident notes and repair ETA.",
      "expectedEvidence": [
        "incident note",
        "repair ETA",
        "customer SLA"
      ],
      "expectedOutput": "Explain status, ETA, and escalation boundary.",
      "scoringRubric": [
        {
          "dimension": "Outcome correctness",
          "weight": 35
        },
        {
          "dimension": "Evidence citation",
          "weight": 25
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20
        },
        {
          "dimension": "Localization and tone",
          "weight": 10
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10
        }
      ]
    },
    {
      "id": "regional-installation-exception",
      "title": "Regional Installation Exception",
      "domain": "Field Operations",
      "split": "holdout",
      "difficulty": "Hard",
      "brief": "Handle a regional installation exception with mixed-language notes and policy constraints.",
      "expectedEvidence": [
        "installer note",
        "policy",
        "customer consent"
      ],
      "expectedOutput": "Recommend next action and escalation path.",
      "scoringRubric": [
        {
          "dimension": "Outcome correctness",
          "weight": 35
        },
        {
          "dimension": "Evidence citation",
          "weight": 25
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20
        },
        {
          "dimension": "Localization and tone",
          "weight": 10
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10
        }
      ]
    }
  ],
  "goldPacketCount": 30,
  "publicGoldPacketCount": 18,
  "holdoutGoldPacketCount": 12,
  "goldPackets": [
    {
      "taskId": "gst-credit-note-reconciliation",
      "title": "Gst Credit Note Reconciliation",
      "domain": "Finance",
      "split": "public",
      "difficulty": "Medium",
      "sourcePacketId": "gst-credit-note-reconciliation-source-packet-v0.1",
      "redactionLevel": "public-redacted-sample",
      "sourcePacket": {
        "businessContext": "Reconcile a GST credit note against an invoice, PO, and vendor email trail.",
        "redactedArtifacts": [
          {
            "artifactId": "gst-credit-note-reconciliation-artifact-1",
            "label": "invoice line",
            "excerpt": "Redacted sample excerpt for invoice line; enough context for methodology review without exposing client data."
          },
          {
            "artifactId": "gst-credit-note-reconciliation-artifact-2",
            "label": "credit note",
            "excerpt": "Redacted sample excerpt for credit note; enough context for methodology review without exposing client data."
          },
          {
            "artifactId": "gst-credit-note-reconciliation-artifact-3",
            "label": "vendor email",
            "excerpt": "Redacted sample excerpt for vendor email; enough context for methodology review without exposing client data."
          }
        ],
        "blockedInformation": [
          "real customer names",
          "client identifiers",
          "account numbers",
          "private policy text",
          "raw email addresses"
        ]
      },
      "goldAnswer": "Cite the mismatch, calculate corrected payable, and draft a vendor note. The answer must cite invoice line, credit note, vendor email and avoid adding facts outside the source packet.",
      "reviewer": "Sanjay Prasad",
      "reviewerNotes": [
        "Primary reviewer: Sanjay Prasad.",
        "Accept only if the response explicitly uses invoice line and at least one other evidence item.",
        "Public packet: safe to expose as a methodology sample after redaction.",
        "Reject responses that overpromise, invent policy, skip escalation boundaries, or omit evidence."
      ],
      "scoringChecklist": [
        {
          "dimension": "Outcome correctness",
          "weight": 35,
          "passCondition": "Meets the outcome correctness standard for Finance workflows."
        },
        {
          "dimension": "Evidence citation",
          "weight": 25,
          "passCondition": "Cites the relevant source packet artifacts by label: invoice line, credit note, vendor email."
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20,
          "passCondition": "Escalates only when the packet evidence leaves a policy, finance, legal, or operations decision unresolved."
        },
        {
          "dimension": "Localization and tone",
          "weight": 10,
          "passCondition": "Meets the localization and tone standard for Finance workflows."
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10,
          "passCondition": "Meets the cost-aware brevity standard for Finance workflows."
        }
      ],
      "harnessImportRow": {
        "taskId": "gst-credit-note-reconciliation",
        "sourcePacketId": "gst-credit-note-reconciliation-source-packet-v0.1",
        "split": "public",
        "reviewer": "Sanjay Prasad",
        "goldAnswerPath": "/reports/indian-workflow-dataset/gold-packets/gst-credit-note-reconciliation.json",
        "sourcePacketPath": "/reports/indian-workflow-dataset/gold-packets/gst-credit-note-reconciliation.md"
      },
      "priority": "first-harness-wave"
    },
    {
      "taskId": "tds-deduction-query",
      "title": "Tds Deduction Query",
      "domain": "Finance",
      "split": "public",
      "difficulty": "Medium-Hard",
      "sourcePacketId": "tds-deduction-query-source-packet-v0.1",
      "redactionLevel": "public-redacted-sample",
      "sourcePacket": {
        "businessContext": "Answer a vendor's TDS deduction query using policy, invoice, and payment ledger evidence.",
        "redactedArtifacts": [
          {
            "artifactId": "tds-deduction-query-artifact-1",
            "label": "TDS policy",
            "excerpt": "Redacted sample excerpt for TDS policy; enough context for methodology review without exposing client data."
          },
          {
            "artifactId": "tds-deduction-query-artifact-2",
            "label": "payment ledger",
            "excerpt": "Redacted sample excerpt for payment ledger; enough context for methodology review without exposing client data."
          },
          {
            "artifactId": "tds-deduction-query-artifact-3",
            "label": "invoice total",
            "excerpt": "Redacted sample excerpt for invoice total; enough context for methodology review without exposing client data."
          }
        ],
        "blockedInformation": [
          "real customer names",
          "client identifiers",
          "account numbers",
          "private policy text",
          "raw email addresses"
        ]
      },
      "goldAnswer": "Explain deduction basis and identify whether finance escalation is needed. The answer must cite TDS policy, payment ledger, invoice total and avoid adding facts outside the source packet.",
      "reviewer": "Sanjay Prasad",
      "reviewerNotes": [
        "Primary reviewer: Sanjay Prasad.",
        "Accept only if the response explicitly uses TDS policy and at least one other evidence item.",
        "Public packet: safe to expose as a methodology sample after redaction.",
        "Reject responses that overpromise, invent policy, skip escalation boundaries, or omit evidence."
      ],
      "scoringChecklist": [
        {
          "dimension": "Outcome correctness",
          "weight": 35,
          "passCondition": "Meets the outcome correctness standard for Finance workflows."
        },
        {
          "dimension": "Evidence citation",
          "weight": 25,
          "passCondition": "Cites the relevant source packet artifacts by label: TDS policy, payment ledger, invoice total."
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20,
          "passCondition": "Escalates only when the packet evidence leaves a policy, finance, legal, or operations decision unresolved."
        },
        {
          "dimension": "Localization and tone",
          "weight": 10,
          "passCondition": "Meets the localization and tone standard for Finance workflows."
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10,
          "passCondition": "Meets the cost-aware brevity standard for Finance workflows."
        }
      ],
      "harnessImportRow": {
        "taskId": "tds-deduction-query",
        "sourcePacketId": "tds-deduction-query-source-packet-v0.1",
        "split": "public",
        "reviewer": "Sanjay Prasad",
        "goldAnswerPath": "/reports/indian-workflow-dataset/gold-packets/tds-deduction-query.json",
        "sourcePacketPath": "/reports/indian-workflow-dataset/gold-packets/tds-deduction-query.md"
      },
      "priority": "first-harness-wave"
    },
    {
      "taskId": "advance-payment-variance",
      "title": "Advance Payment Variance",
      "domain": "Finance",
      "split": "holdout",
      "difficulty": "Hard",
      "sourcePacketId": "advance-payment-variance-source-packet-v0.1",
      "redactionLevel": "private-holdout-summary",
      "sourcePacket": {
        "businessContext": "Compare advance payment terms against milestone completion notes and payment status.",
        "redactedArtifacts": [
          {
            "artifactId": "advance-payment-variance-artifact-1",
            "label": "contract clause",
            "excerpt": "Private holdout excerpt for contract clause; store full source in the harness artifact vault."
          },
          {
            "artifactId": "advance-payment-variance-artifact-2",
            "label": "milestone note",
            "excerpt": "Private holdout excerpt for milestone note; store full source in the harness artifact vault."
          },
          {
            "artifactId": "advance-payment-variance-artifact-3",
            "label": "payment status",
            "excerpt": "Private holdout excerpt for payment status; store full source in the harness artifact vault."
          }
        ],
        "blockedInformation": [
          "real customer names",
          "client identifiers",
          "account numbers",
          "private policy text",
          "raw email addresses"
        ]
      },
      "goldAnswer": "Identify variance and recommend release, hold, or review. The answer must cite contract clause, milestone note, payment status and avoid adding facts outside the source packet.",
      "reviewer": "Sanjay Prasad",
      "reviewerNotes": [
        "Primary reviewer: Sanjay Prasad.",
        "Accept only if the response explicitly uses contract clause and at least one other evidence item.",
        "Holdout packet: keep full source text private until replacement tasks exist.",
        "Reject responses that overpromise, invent policy, skip escalation boundaries, or omit evidence."
      ],
      "scoringChecklist": [
        {
          "dimension": "Outcome correctness",
          "weight": 35,
          "passCondition": "Meets the outcome correctness standard for Finance workflows."
        },
        {
          "dimension": "Evidence citation",
          "weight": 25,
          "passCondition": "Cites the relevant source packet artifacts by label: contract clause, milestone note, payment status."
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20,
          "passCondition": "Escalates only when the packet evidence leaves a policy, finance, legal, or operations decision unresolved."
        },
        {
          "dimension": "Localization and tone",
          "weight": 10,
          "passCondition": "Meets the localization and tone standard for Finance workflows."
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10,
          "passCondition": "Meets the cost-aware brevity standard for Finance workflows."
        }
      ],
      "harnessImportRow": {
        "taskId": "advance-payment-variance",
        "sourcePacketId": "advance-payment-variance-source-packet-v0.1",
        "split": "holdout",
        "reviewer": "Sanjay Prasad",
        "goldAnswerPath": "/reports/indian-workflow-dataset/gold-packets/advance-payment-variance.json",
        "sourcePacketPath": "/reports/indian-workflow-dataset/gold-packets/advance-payment-variance.md"
      },
      "priority": "first-harness-wave"
    },
    {
      "taskId": "quarterly-budget-exception",
      "title": "Quarterly Budget Exception",
      "domain": "Finance",
      "split": "holdout",
      "difficulty": "Hard",
      "sourcePacketId": "quarterly-budget-exception-source-packet-v0.1",
      "redactionLevel": "private-holdout-summary",
      "sourcePacket": {
        "businessContext": "Assess whether a department budget exception should be approved from emails and approval rules.",
        "redactedArtifacts": [
          {
            "artifactId": "quarterly-budget-exception-artifact-1",
            "label": "budget policy",
            "excerpt": "Private holdout excerpt for budget policy; store full source in the harness artifact vault."
          },
          {
            "artifactId": "quarterly-budget-exception-artifact-2",
            "label": "email approval",
            "excerpt": "Private holdout excerpt for email approval; store full source in the harness artifact vault."
          },
          {
            "artifactId": "quarterly-budget-exception-artifact-3",
            "label": "expense category",
            "excerpt": "Private holdout excerpt for expense category; store full source in the harness artifact vault."
          }
        ],
        "blockedInformation": [
          "real customer names",
          "client identifiers",
          "account numbers",
          "private policy text",
          "raw email addresses"
        ]
      },
      "goldAnswer": "Classify approval path and cite the missing approval if any. The answer must cite budget policy, email approval, expense category and avoid adding facts outside the source packet.",
      "reviewer": "Sanjay Prasad",
      "reviewerNotes": [
        "Primary reviewer: Sanjay Prasad.",
        "Accept only if the response explicitly uses budget policy and at least one other evidence item.",
        "Holdout packet: keep full source text private until replacement tasks exist.",
        "Reject responses that overpromise, invent policy, skip escalation boundaries, or omit evidence."
      ],
      "scoringChecklist": [
        {
          "dimension": "Outcome correctness",
          "weight": 35,
          "passCondition": "Meets the outcome correctness standard for Finance workflows."
        },
        {
          "dimension": "Evidence citation",
          "weight": 25,
          "passCondition": "Cites the relevant source packet artifacts by label: budget policy, email approval, expense category."
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20,
          "passCondition": "Escalates only when the packet evidence leaves a policy, finance, legal, or operations decision unresolved."
        },
        {
          "dimension": "Localization and tone",
          "weight": 10,
          "passCondition": "Meets the localization and tone standard for Finance workflows."
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10,
          "passCondition": "Meets the cost-aware brevity standard for Finance workflows."
        }
      ],
      "harnessImportRow": {
        "taskId": "quarterly-budget-exception",
        "sourcePacketId": "quarterly-budget-exception-source-packet-v0.1",
        "split": "holdout",
        "reviewer": "Sanjay Prasad",
        "goldAnswerPath": "/reports/indian-workflow-dataset/gold-packets/quarterly-budget-exception.json",
        "sourcePacketPath": "/reports/indian-workflow-dataset/gold-packets/quarterly-budget-exception.md"
      },
      "priority": "first-harness-wave"
    },
    {
      "taskId": "hindi-english-refund-escalation",
      "title": "Hindi English Refund Escalation",
      "domain": "Support",
      "split": "holdout",
      "difficulty": "Hard",
      "sourcePacketId": "hindi-english-refund-escalation-source-packet-v0.1",
      "redactionLevel": "private-holdout-summary",
      "sourcePacket": {
        "businessContext": "Classify a mixed Hindi-English refund ticket and apply policy with delivery/payment evidence.",
        "redactedArtifacts": [
          {
            "artifactId": "hindi-english-refund-escalation-artifact-1",
            "label": "refund policy",
            "excerpt": "Private holdout excerpt for refund policy; store full source in the harness artifact vault."
          },
          {
            "artifactId": "hindi-english-refund-escalation-artifact-2",
            "label": "delivery timestamp",
            "excerpt": "Private holdout excerpt for delivery timestamp; store full source in the harness artifact vault."
          },
          {
            "artifactId": "hindi-english-refund-escalation-artifact-3",
            "label": "payment status",
            "excerpt": "Private holdout excerpt for payment status; store full source in the harness artifact vault."
          }
        ],
        "blockedInformation": [
          "real customer names",
          "client identifiers",
          "account numbers",
          "private policy text",
          "raw email addresses"
        ]
      },
      "goldAnswer": "Reply safely and escalate only the payment reconciliation issue. The answer must cite refund policy, delivery timestamp, payment status and avoid adding facts outside the source packet.",
      "reviewer": "Saujas",
      "reviewerNotes": [
        "Primary reviewer: Saujas.",
        "Accept only if the response explicitly uses refund policy and at least one other evidence item.",
        "Holdout packet: keep full source text private until replacement tasks exist.",
        "Reject responses that overpromise, invent policy, skip escalation boundaries, or omit evidence."
      ],
      "scoringChecklist": [
        {
          "dimension": "Outcome correctness",
          "weight": 35,
          "passCondition": "Meets the outcome correctness standard for Support workflows."
        },
        {
          "dimension": "Evidence citation",
          "weight": 25,
          "passCondition": "Cites the relevant source packet artifacts by label: refund policy, delivery timestamp, payment status."
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20,
          "passCondition": "Escalates only when the packet evidence leaves a policy, finance, legal, or operations decision unresolved."
        },
        {
          "dimension": "Localization and tone",
          "weight": 10,
          "passCondition": "Meets the localization and tone standard for Support workflows."
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10,
          "passCondition": "Meets the cost-aware brevity standard for Support workflows."
        }
      ],
      "harnessImportRow": {
        "taskId": "hindi-english-refund-escalation",
        "sourcePacketId": "hindi-english-refund-escalation-source-packet-v0.1",
        "split": "holdout",
        "reviewer": "Saujas",
        "goldAnswerPath": "/reports/indian-workflow-dataset/gold-packets/hindi-english-refund-escalation.json",
        "sourcePacketPath": "/reports/indian-workflow-dataset/gold-packets/hindi-english-refund-escalation.md"
      },
      "priority": "first-harness-wave"
    },
    {
      "taskId": "regional-language-complaint-routing",
      "title": "Regional Language Complaint Routing",
      "domain": "Support",
      "split": "public",
      "difficulty": "Medium-Hard",
      "sourcePacketId": "regional-language-complaint-routing-source-packet-v0.1",
      "redactionLevel": "public-redacted-sample",
      "sourcePacket": {
        "businessContext": "Route a Kannada-English complaint to the correct queue using policy and order history.",
        "redactedArtifacts": [
          {
            "artifactId": "regional-language-complaint-routing-artifact-1",
            "label": "complaint text",
            "excerpt": "Redacted sample excerpt for complaint text; enough context for methodology review without exposing client data."
          },
          {
            "artifactId": "regional-language-complaint-routing-artifact-2",
            "label": "order history",
            "excerpt": "Redacted sample excerpt for order history; enough context for methodology review without exposing client data."
          },
          {
            "artifactId": "regional-language-complaint-routing-artifact-3",
            "label": "queue policy",
            "excerpt": "Redacted sample excerpt for queue policy; enough context for methodology review without exposing client data."
          }
        ],
        "blockedInformation": [
          "real customer names",
          "client identifiers",
          "account numbers",
          "private policy text",
          "raw email addresses"
        ]
      },
      "goldAnswer": "Classify intent, preserve tone, and request only missing evidence. The answer must cite complaint text, order history, queue policy and avoid adding facts outside the source packet.",
      "reviewer": "Saujas",
      "reviewerNotes": [
        "Primary reviewer: Saujas.",
        "Accept only if the response explicitly uses complaint text and at least one other evidence item.",
        "Public packet: safe to expose as a methodology sample after redaction.",
        "Reject responses that overpromise, invent policy, skip escalation boundaries, or omit evidence."
      ],
      "scoringChecklist": [
        {
          "dimension": "Outcome correctness",
          "weight": 35,
          "passCondition": "Meets the outcome correctness standard for Support workflows."
        },
        {
          "dimension": "Evidence citation",
          "weight": 25,
          "passCondition": "Cites the relevant source packet artifacts by label: complaint text, order history, queue policy."
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20,
          "passCondition": "Escalates only when the packet evidence leaves a policy, finance, legal, or operations decision unresolved."
        },
        {
          "dimension": "Localization and tone",
          "weight": 10,
          "passCondition": "Meets the localization and tone standard for Support workflows."
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10,
          "passCondition": "Meets the cost-aware brevity standard for Support workflows."
        }
      ],
      "harnessImportRow": {
        "taskId": "regional-language-complaint-routing",
        "sourcePacketId": "regional-language-complaint-routing-source-packet-v0.1",
        "split": "public",
        "reviewer": "Saujas",
        "goldAnswerPath": "/reports/indian-workflow-dataset/gold-packets/regional-language-complaint-routing.json",
        "sourcePacketPath": "/reports/indian-workflow-dataset/gold-packets/regional-language-complaint-routing.md"
      },
      "priority": "first-harness-wave"
    },
    {
      "taskId": "warranty-exception-policy",
      "title": "Warranty Exception Policy",
      "domain": "Support",
      "split": "holdout",
      "difficulty": "Hard",
      "sourcePacketId": "warranty-exception-policy-source-packet-v0.1",
      "redactionLevel": "private-holdout-summary",
      "sourcePacket": {
        "businessContext": "Decide if a warranty exception applies from product age, damage note, and support policy.",
        "redactedArtifacts": [
          {
            "artifactId": "warranty-exception-policy-artifact-1",
            "label": "warranty policy",
            "excerpt": "Private holdout excerpt for warranty policy; store full source in the harness artifact vault."
          },
          {
            "artifactId": "warranty-exception-policy-artifact-2",
            "label": "purchase date",
            "excerpt": "Private holdout excerpt for purchase date; store full source in the harness artifact vault."
          },
          {
            "artifactId": "warranty-exception-policy-artifact-3",
            "label": "damage note",
            "excerpt": "Private holdout excerpt for damage note; store full source in the harness artifact vault."
          }
        ],
        "blockedInformation": [
          "real customer names",
          "client identifiers",
          "account numbers",
          "private policy text",
          "raw email addresses"
        ]
      },
      "goldAnswer": "Approve, deny, or escalate with a cited reason. The answer must cite warranty policy, purchase date, damage note and avoid adding facts outside the source packet.",
      "reviewer": "Saujas",
      "reviewerNotes": [
        "Primary reviewer: Saujas.",
        "Accept only if the response explicitly uses warranty policy and at least one other evidence item.",
        "Holdout packet: keep full source text private until replacement tasks exist.",
        "Reject responses that overpromise, invent policy, skip escalation boundaries, or omit evidence."
      ],
      "scoringChecklist": [
        {
          "dimension": "Outcome correctness",
          "weight": 35,
          "passCondition": "Meets the outcome correctness standard for Support workflows."
        },
        {
          "dimension": "Evidence citation",
          "weight": 25,
          "passCondition": "Cites the relevant source packet artifacts by label: warranty policy, purchase date, damage note."
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20,
          "passCondition": "Escalates only when the packet evidence leaves a policy, finance, legal, or operations decision unresolved."
        },
        {
          "dimension": "Localization and tone",
          "weight": 10,
          "passCondition": "Meets the localization and tone standard for Support workflows."
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10,
          "passCondition": "Meets the cost-aware brevity standard for Support workflows."
        }
      ],
      "harnessImportRow": {
        "taskId": "warranty-exception-policy",
        "sourcePacketId": "warranty-exception-policy-source-packet-v0.1",
        "split": "holdout",
        "reviewer": "Saujas",
        "goldAnswerPath": "/reports/indian-workflow-dataset/gold-packets/warranty-exception-policy.json",
        "sourcePacketPath": "/reports/indian-workflow-dataset/gold-packets/warranty-exception-policy.md"
      },
      "priority": "first-harness-wave"
    },
    {
      "taskId": "upi-refund-status-response",
      "title": "Upi Refund Status Response",
      "domain": "Support",
      "split": "public",
      "difficulty": "Medium-Hard",
      "sourcePacketId": "upi-refund-status-response-source-packet-v0.1",
      "redactionLevel": "public-redacted-sample",
      "sourcePacket": {
        "businessContext": "Draft a UPI refund status response using payment gateway status and customer complaint text.",
        "redactedArtifacts": [
          {
            "artifactId": "upi-refund-status-response-artifact-1",
            "label": "gateway status",
            "excerpt": "Redacted sample excerpt for gateway status; enough context for methodology review without exposing client data."
          },
          {
            "artifactId": "upi-refund-status-response-artifact-2",
            "label": "complaint",
            "excerpt": "Redacted sample excerpt for complaint; enough context for methodology review without exposing client data."
          },
          {
            "artifactId": "upi-refund-status-response-artifact-3",
            "label": "SLA policy",
            "excerpt": "Redacted sample excerpt for SLA policy; enough context for methodology review without exposing client data."
          }
        ],
        "blockedInformation": [
          "real customer names",
          "client identifiers",
          "account numbers",
          "private policy text",
          "raw email addresses"
        ]
      },
      "goldAnswer": "Explain next update window without overpromising refund completion. The answer must cite gateway status, complaint, SLA policy and avoid adding facts outside the source packet.",
      "reviewer": "Saujas",
      "reviewerNotes": [
        "Primary reviewer: Saujas.",
        "Accept only if the response explicitly uses gateway status and at least one other evidence item.",
        "Public packet: safe to expose as a methodology sample after redaction.",
        "Reject responses that overpromise, invent policy, skip escalation boundaries, or omit evidence."
      ],
      "scoringChecklist": [
        {
          "dimension": "Outcome correctness",
          "weight": 35,
          "passCondition": "Meets the outcome correctness standard for Support workflows."
        },
        {
          "dimension": "Evidence citation",
          "weight": 25,
          "passCondition": "Cites the relevant source packet artifacts by label: gateway status, complaint, SLA policy."
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20,
          "passCondition": "Escalates only when the packet evidence leaves a policy, finance, legal, or operations decision unresolved."
        },
        {
          "dimension": "Localization and tone",
          "weight": 10,
          "passCondition": "Meets the localization and tone standard for Support workflows."
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10,
          "passCondition": "Meets the cost-aware brevity standard for Support workflows."
        }
      ],
      "harnessImportRow": {
        "taskId": "upi-refund-status-response",
        "sourcePacketId": "upi-refund-status-response-source-packet-v0.1",
        "split": "public",
        "reviewer": "Saujas",
        "goldAnswerPath": "/reports/indian-workflow-dataset/gold-packets/upi-refund-status-response.json",
        "sourcePacketPath": "/reports/indian-workflow-dataset/gold-packets/upi-refund-status-response.md"
      },
      "priority": "first-harness-wave"
    },
    {
      "taskId": "enterprise-demo-lead-prioritization",
      "title": "Enterprise Demo Lead Prioritization",
      "domain": "Sales Ops",
      "split": "public",
      "difficulty": "Medium-Hard",
      "sourcePacketId": "enterprise-demo-lead-prioritization-source-packet-v0.1",
      "redactionLevel": "public-redacted-sample",
      "sourcePacket": {
        "businessContext": "Prioritize inbound demo leads using firmographic notes, ICP rules, and sales-owner capacity.",
        "redactedArtifacts": [
          {
            "artifactId": "enterprise-demo-lead-prioritization-artifact-1",
            "label": "lead form",
            "excerpt": "Redacted sample excerpt for lead form; enough context for methodology review without exposing client data."
          },
          {
            "artifactId": "enterprise-demo-lead-prioritization-artifact-2",
            "label": "ICP rubric",
            "excerpt": "Redacted sample excerpt for ICP rubric; enough context for methodology review without exposing client data."
          },
          {
            "artifactId": "enterprise-demo-lead-prioritization-artifact-3",
            "label": "owner calendar",
            "excerpt": "Redacted sample excerpt for owner calendar; enough context for methodology review without exposing client data."
          }
        ],
        "blockedInformation": [
          "real customer names",
          "client identifiers",
          "account numbers",
          "private policy text",
          "raw email addresses"
        ]
      },
      "goldAnswer": "Rank leads and route each to Sanjay, Saujas, nurture, or reject. The answer must cite lead form, ICP rubric, owner calendar and avoid adding facts outside the source packet.",
      "reviewer": "Saujas",
      "reviewerNotes": [
        "Primary reviewer: Saujas.",
        "Accept only if the response explicitly uses lead form and at least one other evidence item.",
        "Public packet: safe to expose as a methodology sample after redaction.",
        "Reject responses that overpromise, invent policy, skip escalation boundaries, or omit evidence."
      ],
      "scoringChecklist": [
        {
          "dimension": "Outcome correctness",
          "weight": 35,
          "passCondition": "Meets the outcome correctness standard for Sales Ops workflows."
        },
        {
          "dimension": "Evidence citation",
          "weight": 25,
          "passCondition": "Cites the relevant source packet artifacts by label: lead form, ICP rubric, owner calendar."
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20,
          "passCondition": "Escalates only when the packet evidence leaves a policy, finance, legal, or operations decision unresolved."
        },
        {
          "dimension": "Localization and tone",
          "weight": 10,
          "passCondition": "Meets the localization and tone standard for Sales Ops workflows."
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10,
          "passCondition": "Meets the cost-aware brevity standard for Sales Ops workflows."
        }
      ],
      "harnessImportRow": {
        "taskId": "enterprise-demo-lead-prioritization",
        "sourcePacketId": "enterprise-demo-lead-prioritization-source-packet-v0.1",
        "split": "public",
        "reviewer": "Saujas",
        "goldAnswerPath": "/reports/indian-workflow-dataset/gold-packets/enterprise-demo-lead-prioritization.json",
        "sourcePacketPath": "/reports/indian-workflow-dataset/gold-packets/enterprise-demo-lead-prioritization.md"
      },
      "priority": "public-backlog"
    },
    {
      "taskId": "rfp-requirement-gap-map",
      "title": "Rfp Requirement Gap Map",
      "domain": "Sales Ops",
      "split": "holdout",
      "difficulty": "Hard",
      "sourcePacketId": "rfp-requirement-gap-map-source-packet-v0.1",
      "redactionLevel": "private-holdout-summary",
      "sourcePacket": {
        "businessContext": "Map RFP requirements to product capabilities and flag unsupported AI-evaluation claims.",
        "redactedArtifacts": [
          {
            "artifactId": "rfp-requirement-gap-map-artifact-1",
            "label": "RFP table",
            "excerpt": "Private holdout excerpt for RFP table; store full source in the harness artifact vault."
          },
          {
            "artifactId": "rfp-requirement-gap-map-artifact-2",
            "label": "capability matrix",
            "excerpt": "Private holdout excerpt for capability matrix; store full source in the harness artifact vault."
          },
          {
            "artifactId": "rfp-requirement-gap-map-artifact-3",
            "label": "case-study note",
            "excerpt": "Private holdout excerpt for case-study note; store full source in the harness artifact vault."
          }
        ],
        "blockedInformation": [
          "real customer names",
          "client identifiers",
          "account numbers",
          "private policy text",
          "raw email addresses"
        ]
      },
      "goldAnswer": "Create a gap table and identify claims that need evidence. The answer must cite RFP table, capability matrix, case-study note and avoid adding facts outside the source packet.",
      "reviewer": "Saujas",
      "reviewerNotes": [
        "Primary reviewer: Saujas.",
        "Accept only if the response explicitly uses RFP table and at least one other evidence item.",
        "Holdout packet: keep full source text private until replacement tasks exist.",
        "Reject responses that overpromise, invent policy, skip escalation boundaries, or omit evidence."
      ],
      "scoringChecklist": [
        {
          "dimension": "Outcome correctness",
          "weight": 35,
          "passCondition": "Meets the outcome correctness standard for Sales Ops workflows."
        },
        {
          "dimension": "Evidence citation",
          "weight": 25,
          "passCondition": "Cites the relevant source packet artifacts by label: RFP table, capability matrix, case-study note."
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20,
          "passCondition": "Escalates only when the packet evidence leaves a policy, finance, legal, or operations decision unresolved."
        },
        {
          "dimension": "Localization and tone",
          "weight": 10,
          "passCondition": "Meets the localization and tone standard for Sales Ops workflows."
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10,
          "passCondition": "Meets the cost-aware brevity standard for Sales Ops workflows."
        }
      ],
      "harnessImportRow": {
        "taskId": "rfp-requirement-gap-map",
        "sourcePacketId": "rfp-requirement-gap-map-source-packet-v0.1",
        "split": "holdout",
        "reviewer": "Saujas",
        "goldAnswerPath": "/reports/indian-workflow-dataset/gold-packets/rfp-requirement-gap-map.json",
        "sourcePacketPath": "/reports/indian-workflow-dataset/gold-packets/rfp-requirement-gap-map.md"
      },
      "priority": "holdout-backlog"
    },
    {
      "taskId": "pricing-proposal-risk-check",
      "title": "Pricing Proposal Risk Check",
      "domain": "Sales Ops",
      "split": "holdout",
      "difficulty": "Hard",
      "sourcePacketId": "pricing-proposal-risk-check-source-packet-v0.1",
      "redactionLevel": "private-holdout-summary",
      "sourcePacket": {
        "businessContext": "Review a proposal draft for pricing-risk language and missing benchmark assumptions.",
        "redactedArtifacts": [
          {
            "artifactId": "pricing-proposal-risk-check-artifact-1",
            "label": "proposal draft",
            "excerpt": "Private holdout excerpt for proposal draft; store full source in the harness artifact vault."
          },
          {
            "artifactId": "pricing-proposal-risk-check-artifact-2",
            "label": "pricing sheet",
            "excerpt": "Private holdout excerpt for pricing sheet; store full source in the harness artifact vault."
          },
          {
            "artifactId": "pricing-proposal-risk-check-artifact-3",
            "label": "benchmark assumption",
            "excerpt": "Private holdout excerpt for benchmark assumption; store full source in the harness artifact vault."
          }
        ],
        "blockedInformation": [
          "real customer names",
          "client identifiers",
          "account numbers",
          "private policy text",
          "raw email addresses"
        ]
      },
      "goldAnswer": "Return edits and unresolved questions for the sales engineer. The answer must cite proposal draft, pricing sheet, benchmark assumption and avoid adding facts outside the source packet.",
      "reviewer": "Saujas",
      "reviewerNotes": [
        "Primary reviewer: Saujas.",
        "Accept only if the response explicitly uses proposal draft and at least one other evidence item.",
        "Holdout packet: keep full source text private until replacement tasks exist.",
        "Reject responses that overpromise, invent policy, skip escalation boundaries, or omit evidence."
      ],
      "scoringChecklist": [
        {
          "dimension": "Outcome correctness",
          "weight": 35,
          "passCondition": "Meets the outcome correctness standard for Sales Ops workflows."
        },
        {
          "dimension": "Evidence citation",
          "weight": 25,
          "passCondition": "Cites the relevant source packet artifacts by label: proposal draft, pricing sheet, benchmark assumption."
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20,
          "passCondition": "Escalates only when the packet evidence leaves a policy, finance, legal, or operations decision unresolved."
        },
        {
          "dimension": "Localization and tone",
          "weight": 10,
          "passCondition": "Meets the localization and tone standard for Sales Ops workflows."
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10,
          "passCondition": "Meets the cost-aware brevity standard for Sales Ops workflows."
        }
      ],
      "harnessImportRow": {
        "taskId": "pricing-proposal-risk-check",
        "sourcePacketId": "pricing-proposal-risk-check-source-packet-v0.1",
        "split": "holdout",
        "reviewer": "Saujas",
        "goldAnswerPath": "/reports/indian-workflow-dataset/gold-packets/pricing-proposal-risk-check.json",
        "sourcePacketPath": "/reports/indian-workflow-dataset/gold-packets/pricing-proposal-risk-check.md"
      },
      "priority": "holdout-backlog"
    },
    {
      "taskId": "renewal-churn-risk-summary",
      "title": "Renewal Churn Risk Summary",
      "domain": "Sales Ops",
      "split": "public",
      "difficulty": "Medium-Hard",
      "sourcePacketId": "renewal-churn-risk-summary-source-packet-v0.1",
      "redactionLevel": "public-redacted-sample",
      "sourcePacket": {
        "businessContext": "Summarize renewal churn risk from customer emails, usage notes, and support tickets.",
        "redactedArtifacts": [
          {
            "artifactId": "renewal-churn-risk-summary-artifact-1",
            "label": "usage note",
            "excerpt": "Redacted sample excerpt for usage note; enough context for methodology review without exposing client data."
          },
          {
            "artifactId": "renewal-churn-risk-summary-artifact-2",
            "label": "support tickets",
            "excerpt": "Redacted sample excerpt for support tickets; enough context for methodology review without exposing client data."
          },
          {
            "artifactId": "renewal-churn-risk-summary-artifact-3",
            "label": "renewal email",
            "excerpt": "Redacted sample excerpt for renewal email; enough context for methodology review without exposing client data."
          }
        ],
        "blockedInformation": [
          "real customer names",
          "client identifiers",
          "account numbers",
          "private policy text",
          "raw email addresses"
        ]
      },
      "goldAnswer": "Produce risk reasons and next action with evidence. The answer must cite usage note, support tickets, renewal email and avoid adding facts outside the source packet.",
      "reviewer": "Saujas",
      "reviewerNotes": [
        "Primary reviewer: Saujas.",
        "Accept only if the response explicitly uses usage note and at least one other evidence item.",
        "Public packet: safe to expose as a methodology sample after redaction.",
        "Reject responses that overpromise, invent policy, skip escalation boundaries, or omit evidence."
      ],
      "scoringChecklist": [
        {
          "dimension": "Outcome correctness",
          "weight": 35,
          "passCondition": "Meets the outcome correctness standard for Sales Ops workflows."
        },
        {
          "dimension": "Evidence citation",
          "weight": 25,
          "passCondition": "Cites the relevant source packet artifacts by label: usage note, support tickets, renewal email."
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20,
          "passCondition": "Escalates only when the packet evidence leaves a policy, finance, legal, or operations decision unresolved."
        },
        {
          "dimension": "Localization and tone",
          "weight": 10,
          "passCondition": "Meets the localization and tone standard for Sales Ops workflows."
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10,
          "passCondition": "Meets the cost-aware brevity standard for Sales Ops workflows."
        }
      ],
      "harnessImportRow": {
        "taskId": "renewal-churn-risk-summary",
        "sourcePacketId": "renewal-churn-risk-summary-source-packet-v0.1",
        "split": "public",
        "reviewer": "Saujas",
        "goldAnswerPath": "/reports/indian-workflow-dataset/gold-packets/renewal-churn-risk-summary.json",
        "sourcePacketPath": "/reports/indian-workflow-dataset/gold-packets/renewal-churn-risk-summary.md"
      },
      "priority": "public-backlog"
    },
    {
      "taskId": "vendor-contract-renewal-risk",
      "title": "Vendor Contract Renewal Risk",
      "domain": "Legal",
      "split": "holdout",
      "difficulty": "Hard",
      "sourcePacketId": "vendor-contract-renewal-risk-source-packet-v0.1",
      "redactionLevel": "private-holdout-summary",
      "sourcePacket": {
        "businessContext": "Review a vendor renewal clause and flag auto-renewal, liability, and notice risks.",
        "redactedArtifacts": [
          {
            "artifactId": "vendor-contract-renewal-risk-artifact-1",
            "label": "contract clause",
            "excerpt": "Private holdout excerpt for contract clause; store full source in the harness artifact vault."
          },
          {
            "artifactId": "vendor-contract-renewal-risk-artifact-2",
            "label": "notice date",
            "excerpt": "Private holdout excerpt for notice date; store full source in the harness artifact vault."
          },
          {
            "artifactId": "vendor-contract-renewal-risk-artifact-3",
            "label": "liability cap",
            "excerpt": "Private holdout excerpt for liability cap; store full source in the harness artifact vault."
          }
        ],
        "blockedInformation": [
          "real customer names",
          "client identifiers",
          "account numbers",
          "private policy text",
          "raw email addresses"
        ]
      },
      "goldAnswer": "Draft a legal-ops risk memo with escalation recommendation. The answer must cite contract clause, notice date, liability cap and avoid adding facts outside the source packet.",
      "reviewer": "Sanjay Prasad",
      "reviewerNotes": [
        "Primary reviewer: Sanjay Prasad.",
        "Accept only if the response explicitly uses contract clause and at least one other evidence item.",
        "Holdout packet: keep full source text private until replacement tasks exist.",
        "Reject responses that overpromise, invent policy, skip escalation boundaries, or omit evidence."
      ],
      "scoringChecklist": [
        {
          "dimension": "Outcome correctness",
          "weight": 35,
          "passCondition": "Meets the outcome correctness standard for Legal workflows."
        },
        {
          "dimension": "Evidence citation",
          "weight": 25,
          "passCondition": "Cites the relevant source packet artifacts by label: contract clause, notice date, liability cap."
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20,
          "passCondition": "Escalates only when the packet evidence leaves a policy, finance, legal, or operations decision unresolved."
        },
        {
          "dimension": "Localization and tone",
          "weight": 10,
          "passCondition": "Meets the localization and tone standard for Legal workflows."
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10,
          "passCondition": "Meets the cost-aware brevity standard for Legal workflows."
        }
      ],
      "harnessImportRow": {
        "taskId": "vendor-contract-renewal-risk",
        "sourcePacketId": "vendor-contract-renewal-risk-source-packet-v0.1",
        "split": "holdout",
        "reviewer": "Sanjay Prasad",
        "goldAnswerPath": "/reports/indian-workflow-dataset/gold-packets/vendor-contract-renewal-risk.json",
        "sourcePacketPath": "/reports/indian-workflow-dataset/gold-packets/vendor-contract-renewal-risk.md"
      },
      "priority": "holdout-backlog"
    },
    {
      "taskId": "nda-exception-review",
      "title": "Nda Exception Review",
      "domain": "Legal",
      "split": "public",
      "difficulty": "Medium-Hard",
      "sourcePacketId": "nda-exception-review-source-packet-v0.1",
      "redactionLevel": "public-redacted-sample",
      "sourcePacket": {
        "businessContext": "Compare an NDA exception clause against company template and sales context.",
        "redactedArtifacts": [
          {
            "artifactId": "nda-exception-review-artifact-1",
            "label": "NDA redline",
            "excerpt": "Redacted sample excerpt for NDA redline; enough context for methodology review without exposing client data."
          },
          {
            "artifactId": "nda-exception-review-artifact-2",
            "label": "template clause",
            "excerpt": "Redacted sample excerpt for template clause; enough context for methodology review without exposing client data."
          },
          {
            "artifactId": "nda-exception-review-artifact-3",
            "label": "sales note",
            "excerpt": "Redacted sample excerpt for sales note; enough context for methodology review without exposing client data."
          }
        ],
        "blockedInformation": [
          "real customer names",
          "client identifiers",
          "account numbers",
          "private policy text",
          "raw email addresses"
        ]
      },
      "goldAnswer": "Identify acceptable, negotiable, and legal-review changes. The answer must cite NDA redline, template clause, sales note and avoid adding facts outside the source packet.",
      "reviewer": "Sanjay Prasad",
      "reviewerNotes": [
        "Primary reviewer: Sanjay Prasad.",
        "Accept only if the response explicitly uses NDA redline and at least one other evidence item.",
        "Public packet: safe to expose as a methodology sample after redaction.",
        "Reject responses that overpromise, invent policy, skip escalation boundaries, or omit evidence."
      ],
      "scoringChecklist": [
        {
          "dimension": "Outcome correctness",
          "weight": 35,
          "passCondition": "Meets the outcome correctness standard for Legal workflows."
        },
        {
          "dimension": "Evidence citation",
          "weight": 25,
          "passCondition": "Cites the relevant source packet artifacts by label: NDA redline, template clause, sales note."
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20,
          "passCondition": "Escalates only when the packet evidence leaves a policy, finance, legal, or operations decision unresolved."
        },
        {
          "dimension": "Localization and tone",
          "weight": 10,
          "passCondition": "Meets the localization and tone standard for Legal workflows."
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10,
          "passCondition": "Meets the cost-aware brevity standard for Legal workflows."
        }
      ],
      "harnessImportRow": {
        "taskId": "nda-exception-review",
        "sourcePacketId": "nda-exception-review-source-packet-v0.1",
        "split": "public",
        "reviewer": "Sanjay Prasad",
        "goldAnswerPath": "/reports/indian-workflow-dataset/gold-packets/nda-exception-review.json",
        "sourcePacketPath": "/reports/indian-workflow-dataset/gold-packets/nda-exception-review.md"
      },
      "priority": "public-backlog"
    },
    {
      "taskId": "data-processing-addendum-gap",
      "title": "Data Processing Addendum Gap",
      "domain": "Legal",
      "split": "public",
      "difficulty": "Medium-Hard",
      "sourcePacketId": "data-processing-addendum-gap-source-packet-v0.1",
      "redactionLevel": "public-redacted-sample",
      "sourcePacket": {
        "businessContext": "Inspect a DPA checklist for missing subprocessors, data regions, and breach notice terms.",
        "redactedArtifacts": [
          {
            "artifactId": "data-processing-addendum-gap-artifact-1",
            "label": "DPA checklist",
            "excerpt": "Redacted sample excerpt for DPA checklist; enough context for methodology review without exposing client data."
          },
          {
            "artifactId": "data-processing-addendum-gap-artifact-2",
            "label": "vendor security page",
            "excerpt": "Redacted sample excerpt for vendor security page; enough context for methodology review without exposing client data."
          },
          {
            "artifactId": "data-processing-addendum-gap-artifact-3",
            "label": "contract term",
            "excerpt": "Redacted sample excerpt for contract term; enough context for methodology review without exposing client data."
          }
        ],
        "blockedInformation": [
          "real customer names",
          "client identifiers",
          "account numbers",
          "private policy text",
          "raw email addresses"
        ]
      },
      "goldAnswer": "Return a gap memo and owner routing. The answer must cite DPA checklist, vendor security page, contract term and avoid adding facts outside the source packet.",
      "reviewer": "Sanjay Prasad",
      "reviewerNotes": [
        "Primary reviewer: Sanjay Prasad.",
        "Accept only if the response explicitly uses DPA checklist and at least one other evidence item.",
        "Public packet: safe to expose as a methodology sample after redaction.",
        "Reject responses that overpromise, invent policy, skip escalation boundaries, or omit evidence."
      ],
      "scoringChecklist": [
        {
          "dimension": "Outcome correctness",
          "weight": 35,
          "passCondition": "Meets the outcome correctness standard for Legal workflows."
        },
        {
          "dimension": "Evidence citation",
          "weight": 25,
          "passCondition": "Cites the relevant source packet artifacts by label: DPA checklist, vendor security page, contract term."
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20,
          "passCondition": "Escalates only when the packet evidence leaves a policy, finance, legal, or operations decision unresolved."
        },
        {
          "dimension": "Localization and tone",
          "weight": 10,
          "passCondition": "Meets the localization and tone standard for Legal workflows."
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10,
          "passCondition": "Meets the cost-aware brevity standard for Legal workflows."
        }
      ],
      "harnessImportRow": {
        "taskId": "data-processing-addendum-gap",
        "sourcePacketId": "data-processing-addendum-gap-source-packet-v0.1",
        "split": "public",
        "reviewer": "Sanjay Prasad",
        "goldAnswerPath": "/reports/indian-workflow-dataset/gold-packets/data-processing-addendum-gap.json",
        "sourcePacketPath": "/reports/indian-workflow-dataset/gold-packets/data-processing-addendum-gap.md"
      },
      "priority": "public-backlog"
    },
    {
      "taskId": "invoice-dispute-contract-evidence",
      "title": "Invoice Dispute Contract Evidence",
      "domain": "Legal",
      "split": "holdout",
      "difficulty": "Hard",
      "sourcePacketId": "invoice-dispute-contract-evidence-source-packet-v0.1",
      "redactionLevel": "private-holdout-summary",
      "sourcePacket": {
        "businessContext": "Resolve an invoice dispute by citing contract payment terms and delivery evidence.",
        "redactedArtifacts": [
          {
            "artifactId": "invoice-dispute-contract-evidence-artifact-1",
            "label": "contract term",
            "excerpt": "Private holdout excerpt for contract term; store full source in the harness artifact vault."
          },
          {
            "artifactId": "invoice-dispute-contract-evidence-artifact-2",
            "label": "invoice",
            "excerpt": "Private holdout excerpt for invoice; store full source in the harness artifact vault."
          },
          {
            "artifactId": "invoice-dispute-contract-evidence-artifact-3",
            "label": "delivery acceptance",
            "excerpt": "Private holdout excerpt for delivery acceptance; store full source in the harness artifact vault."
          }
        ],
        "blockedInformation": [
          "real customer names",
          "client identifiers",
          "account numbers",
          "private policy text",
          "raw email addresses"
        ]
      },
      "goldAnswer": "Explain whether the dispute is valid and what to request next. The answer must cite contract term, invoice, delivery acceptance and avoid adding facts outside the source packet.",
      "reviewer": "Sanjay Prasad",
      "reviewerNotes": [
        "Primary reviewer: Sanjay Prasad.",
        "Accept only if the response explicitly uses contract term and at least one other evidence item.",
        "Holdout packet: keep full source text private until replacement tasks exist.",
        "Reject responses that overpromise, invent policy, skip escalation boundaries, or omit evidence."
      ],
      "scoringChecklist": [
        {
          "dimension": "Outcome correctness",
          "weight": 35,
          "passCondition": "Meets the outcome correctness standard for Legal workflows."
        },
        {
          "dimension": "Evidence citation",
          "weight": 25,
          "passCondition": "Cites the relevant source packet artifacts by label: contract term, invoice, delivery acceptance."
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20,
          "passCondition": "Escalates only when the packet evidence leaves a policy, finance, legal, or operations decision unresolved."
        },
        {
          "dimension": "Localization and tone",
          "weight": 10,
          "passCondition": "Meets the localization and tone standard for Legal workflows."
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10,
          "passCondition": "Meets the cost-aware brevity standard for Legal workflows."
        }
      ],
      "harnessImportRow": {
        "taskId": "invoice-dispute-contract-evidence",
        "sourcePacketId": "invoice-dispute-contract-evidence-source-packet-v0.1",
        "split": "holdout",
        "reviewer": "Sanjay Prasad",
        "goldAnswerPath": "/reports/indian-workflow-dataset/gold-packets/invoice-dispute-contract-evidence.json",
        "sourcePacketPath": "/reports/indian-workflow-dataset/gold-packets/invoice-dispute-contract-evidence.md"
      },
      "priority": "holdout-backlog"
    },
    {
      "taskId": "procurement-vendor-shortlist",
      "title": "Procurement Vendor Shortlist",
      "domain": "Procurement",
      "split": "public",
      "difficulty": "Medium-Hard",
      "sourcePacketId": "procurement-vendor-shortlist-source-packet-v0.1",
      "redactionLevel": "public-redacted-sample",
      "sourcePacket": {
        "businessContext": "Create a vendor shortlist from pricing, security notes, and workflow-fit requirements.",
        "redactedArtifacts": [
          {
            "artifactId": "procurement-vendor-shortlist-artifact-1",
            "label": "vendor pricing",
            "excerpt": "Redacted sample excerpt for vendor pricing; enough context for methodology review without exposing client data."
          },
          {
            "artifactId": "procurement-vendor-shortlist-artifact-2",
            "label": "security note",
            "excerpt": "Redacted sample excerpt for security note; enough context for methodology review without exposing client data."
          },
          {
            "artifactId": "procurement-vendor-shortlist-artifact-3",
            "label": "workflow rubric",
            "excerpt": "Redacted sample excerpt for workflow rubric; enough context for methodology review without exposing client data."
          }
        ],
        "blockedInformation": [
          "real customer names",
          "client identifiers",
          "account numbers",
          "private policy text",
          "raw email addresses"
        ]
      },
      "goldAnswer": "Rank vendors with risk and proof gaps. The answer must cite vendor pricing, security note, workflow rubric and avoid adding facts outside the source packet.",
      "reviewer": "Saujas",
      "reviewerNotes": [
        "Primary reviewer: Saujas.",
        "Accept only if the response explicitly uses vendor pricing and at least one other evidence item.",
        "Public packet: safe to expose as a methodology sample after redaction.",
        "Reject responses that overpromise, invent policy, skip escalation boundaries, or omit evidence."
      ],
      "scoringChecklist": [
        {
          "dimension": "Outcome correctness",
          "weight": 35,
          "passCondition": "Meets the outcome correctness standard for Procurement workflows."
        },
        {
          "dimension": "Evidence citation",
          "weight": 25,
          "passCondition": "Cites the relevant source packet artifacts by label: vendor pricing, security note, workflow rubric."
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20,
          "passCondition": "Escalates only when the packet evidence leaves a policy, finance, legal, or operations decision unresolved."
        },
        {
          "dimension": "Localization and tone",
          "weight": 10,
          "passCondition": "Meets the localization and tone standard for Procurement workflows."
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10,
          "passCondition": "Meets the cost-aware brevity standard for Procurement workflows."
        }
      ],
      "harnessImportRow": {
        "taskId": "procurement-vendor-shortlist",
        "sourcePacketId": "procurement-vendor-shortlist-source-packet-v0.1",
        "split": "public",
        "reviewer": "Saujas",
        "goldAnswerPath": "/reports/indian-workflow-dataset/gold-packets/procurement-vendor-shortlist.json",
        "sourcePacketPath": "/reports/indian-workflow-dataset/gold-packets/procurement-vendor-shortlist.md"
      },
      "priority": "public-backlog"
    },
    {
      "taskId": "security-questionnaire-triage",
      "title": "Security Questionnaire Triage",
      "domain": "Procurement",
      "split": "public",
      "difficulty": "Medium-Hard",
      "sourcePacketId": "security-questionnaire-triage-source-packet-v0.1",
      "redactionLevel": "public-redacted-sample",
      "sourcePacket": {
        "businessContext": "Triage a vendor security questionnaire into answered, unknown, and legal-review fields.",
        "redactedArtifacts": [
          {
            "artifactId": "security-questionnaire-triage-artifact-1",
            "label": "questionnaire",
            "excerpt": "Redacted sample excerpt for questionnaire; enough context for methodology review without exposing client data."
          },
          {
            "artifactId": "security-questionnaire-triage-artifact-2",
            "label": "security docs",
            "excerpt": "Redacted sample excerpt for security docs; enough context for methodology review without exposing client data."
          },
          {
            "artifactId": "security-questionnaire-triage-artifact-3",
            "label": "policy",
            "excerpt": "Redacted sample excerpt for policy; enough context for methodology review without exposing client data."
          }
        ],
        "blockedInformation": [
          "real customer names",
          "client identifiers",
          "account numbers",
          "private policy text",
          "raw email addresses"
        ]
      },
      "goldAnswer": "Produce a fill plan and missing evidence list. The answer must cite questionnaire, security docs, policy and avoid adding facts outside the source packet.",
      "reviewer": "Saujas",
      "reviewerNotes": [
        "Primary reviewer: Saujas.",
        "Accept only if the response explicitly uses questionnaire and at least one other evidence item.",
        "Public packet: safe to expose as a methodology sample after redaction.",
        "Reject responses that overpromise, invent policy, skip escalation boundaries, or omit evidence."
      ],
      "scoringChecklist": [
        {
          "dimension": "Outcome correctness",
          "weight": 35,
          "passCondition": "Meets the outcome correctness standard for Procurement workflows."
        },
        {
          "dimension": "Evidence citation",
          "weight": 25,
          "passCondition": "Cites the relevant source packet artifacts by label: questionnaire, security docs, policy."
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20,
          "passCondition": "Escalates only when the packet evidence leaves a policy, finance, legal, or operations decision unresolved."
        },
        {
          "dimension": "Localization and tone",
          "weight": 10,
          "passCondition": "Meets the localization and tone standard for Procurement workflows."
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10,
          "passCondition": "Meets the cost-aware brevity standard for Procurement workflows."
        }
      ],
      "harnessImportRow": {
        "taskId": "security-questionnaire-triage",
        "sourcePacketId": "security-questionnaire-triage-source-packet-v0.1",
        "split": "public",
        "reviewer": "Saujas",
        "goldAnswerPath": "/reports/indian-workflow-dataset/gold-packets/security-questionnaire-triage.json",
        "sourcePacketPath": "/reports/indian-workflow-dataset/gold-packets/security-questionnaire-triage.md"
      },
      "priority": "public-backlog"
    },
    {
      "taskId": "purchase-approval-routing",
      "title": "Purchase Approval Routing",
      "domain": "Procurement",
      "split": "holdout",
      "difficulty": "Hard",
      "sourcePacketId": "purchase-approval-routing-source-packet-v0.1",
      "redactionLevel": "private-holdout-summary",
      "sourcePacket": {
        "businessContext": "Route a purchase approval based on amount, category, vendor status, and urgency.",
        "redactedArtifacts": [
          {
            "artifactId": "purchase-approval-routing-artifact-1",
            "label": "purchase request",
            "excerpt": "Private holdout excerpt for purchase request; store full source in the harness artifact vault."
          },
          {
            "artifactId": "purchase-approval-routing-artifact-2",
            "label": "approval matrix",
            "excerpt": "Private holdout excerpt for approval matrix; store full source in the harness artifact vault."
          },
          {
            "artifactId": "purchase-approval-routing-artifact-3",
            "label": "vendor record",
            "excerpt": "Private holdout excerpt for vendor record; store full source in the harness artifact vault."
          }
        ],
        "blockedInformation": [
          "real customer names",
          "client identifiers",
          "account numbers",
          "private policy text",
          "raw email addresses"
        ]
      },
      "goldAnswer": "Return approver path and blocked fields. The answer must cite purchase request, approval matrix, vendor record and avoid adding facts outside the source packet.",
      "reviewer": "Saujas",
      "reviewerNotes": [
        "Primary reviewer: Saujas.",
        "Accept only if the response explicitly uses purchase request and at least one other evidence item.",
        "Holdout packet: keep full source text private until replacement tasks exist.",
        "Reject responses that overpromise, invent policy, skip escalation boundaries, or omit evidence."
      ],
      "scoringChecklist": [
        {
          "dimension": "Outcome correctness",
          "weight": 35,
          "passCondition": "Meets the outcome correctness standard for Procurement workflows."
        },
        {
          "dimension": "Evidence citation",
          "weight": 25,
          "passCondition": "Cites the relevant source packet artifacts by label: purchase request, approval matrix, vendor record."
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20,
          "passCondition": "Escalates only when the packet evidence leaves a policy, finance, legal, or operations decision unresolved."
        },
        {
          "dimension": "Localization and tone",
          "weight": 10,
          "passCondition": "Meets the localization and tone standard for Procurement workflows."
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10,
          "passCondition": "Meets the cost-aware brevity standard for Procurement workflows."
        }
      ],
      "harnessImportRow": {
        "taskId": "purchase-approval-routing",
        "sourcePacketId": "purchase-approval-routing-source-packet-v0.1",
        "split": "holdout",
        "reviewer": "Saujas",
        "goldAnswerPath": "/reports/indian-workflow-dataset/gold-packets/purchase-approval-routing.json",
        "sourcePacketPath": "/reports/indian-workflow-dataset/gold-packets/purchase-approval-routing.md"
      },
      "priority": "holdout-backlog"
    },
    {
      "taskId": "supplier-performance-escalation",
      "title": "Supplier Performance Escalation",
      "domain": "Procurement",
      "split": "public",
      "difficulty": "Medium-Hard",
      "sourcePacketId": "supplier-performance-escalation-source-packet-v0.1",
      "redactionLevel": "public-redacted-sample",
      "sourcePacket": {
        "businessContext": "Assess supplier performance from SLA misses, email thread, and scorecard history.",
        "redactedArtifacts": [
          {
            "artifactId": "supplier-performance-escalation-artifact-1",
            "label": "SLA log",
            "excerpt": "Redacted sample excerpt for SLA log; enough context for methodology review without exposing client data."
          },
          {
            "artifactId": "supplier-performance-escalation-artifact-2",
            "label": "email thread",
            "excerpt": "Redacted sample excerpt for email thread; enough context for methodology review without exposing client data."
          },
          {
            "artifactId": "supplier-performance-escalation-artifact-3",
            "label": "scorecard",
            "excerpt": "Redacted sample excerpt for scorecard; enough context for methodology review without exposing client data."
          }
        ],
        "blockedInformation": [
          "real customer names",
          "client identifiers",
          "account numbers",
          "private policy text",
          "raw email addresses"
        ]
      },
      "goldAnswer": "Recommend continue, warning, or escalation with evidence. The answer must cite SLA log, email thread, scorecard and avoid adding facts outside the source packet.",
      "reviewer": "Saujas",
      "reviewerNotes": [
        "Primary reviewer: Saujas.",
        "Accept only if the response explicitly uses SLA log and at least one other evidence item.",
        "Public packet: safe to expose as a methodology sample after redaction.",
        "Reject responses that overpromise, invent policy, skip escalation boundaries, or omit evidence."
      ],
      "scoringChecklist": [
        {
          "dimension": "Outcome correctness",
          "weight": 35,
          "passCondition": "Meets the outcome correctness standard for Procurement workflows."
        },
        {
          "dimension": "Evidence citation",
          "weight": 25,
          "passCondition": "Cites the relevant source packet artifacts by label: SLA log, email thread, scorecard."
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20,
          "passCondition": "Escalates only when the packet evidence leaves a policy, finance, legal, or operations decision unresolved."
        },
        {
          "dimension": "Localization and tone",
          "weight": 10,
          "passCondition": "Meets the localization and tone standard for Procurement workflows."
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10,
          "passCondition": "Meets the cost-aware brevity standard for Procurement workflows."
        }
      ],
      "harnessImportRow": {
        "taskId": "supplier-performance-escalation",
        "sourcePacketId": "supplier-performance-escalation-source-packet-v0.1",
        "split": "public",
        "reviewer": "Saujas",
        "goldAnswerPath": "/reports/indian-workflow-dataset/gold-packets/supplier-performance-escalation.json",
        "sourcePacketPath": "/reports/indian-workflow-dataset/gold-packets/supplier-performance-escalation.md"
      },
      "priority": "public-backlog"
    },
    {
      "taskId": "offer-letter-policy-check",
      "title": "Offer Letter Policy Check",
      "domain": "HR Ops",
      "split": "public",
      "difficulty": "Medium-Hard",
      "sourcePacketId": "offer-letter-policy-check-source-packet-v0.1",
      "redactionLevel": "public-redacted-sample",
      "sourcePacket": {
        "businessContext": "Check an offer letter draft against compensation band and benefits policy.",
        "redactedArtifacts": [
          {
            "artifactId": "offer-letter-policy-check-artifact-1",
            "label": "offer draft",
            "excerpt": "Redacted sample excerpt for offer draft; enough context for methodology review without exposing client data."
          },
          {
            "artifactId": "offer-letter-policy-check-artifact-2",
            "label": "comp band",
            "excerpt": "Redacted sample excerpt for comp band; enough context for methodology review without exposing client data."
          },
          {
            "artifactId": "offer-letter-policy-check-artifact-3",
            "label": "benefits policy",
            "excerpt": "Redacted sample excerpt for benefits policy; enough context for methodology review without exposing client data."
          }
        ],
        "blockedInformation": [
          "real customer names",
          "client identifiers",
          "account numbers",
          "private policy text",
          "raw email addresses"
        ]
      },
      "goldAnswer": "Flag mismatches and draft HR-safe corrections. The answer must cite offer draft, comp band, benefits policy and avoid adding facts outside the source packet.",
      "reviewer": "Sanjay Prasad",
      "reviewerNotes": [
        "Primary reviewer: Sanjay Prasad.",
        "Accept only if the response explicitly uses offer draft and at least one other evidence item.",
        "Public packet: safe to expose as a methodology sample after redaction.",
        "Reject responses that overpromise, invent policy, skip escalation boundaries, or omit evidence."
      ],
      "scoringChecklist": [
        {
          "dimension": "Outcome correctness",
          "weight": 35,
          "passCondition": "Meets the outcome correctness standard for HR Ops workflows."
        },
        {
          "dimension": "Evidence citation",
          "weight": 25,
          "passCondition": "Cites the relevant source packet artifacts by label: offer draft, comp band, benefits policy."
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20,
          "passCondition": "Escalates only when the packet evidence leaves a policy, finance, legal, or operations decision unresolved."
        },
        {
          "dimension": "Localization and tone",
          "weight": 10,
          "passCondition": "Meets the localization and tone standard for HR Ops workflows."
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10,
          "passCondition": "Meets the cost-aware brevity standard for HR Ops workflows."
        }
      ],
      "harnessImportRow": {
        "taskId": "offer-letter-policy-check",
        "sourcePacketId": "offer-letter-policy-check-source-packet-v0.1",
        "split": "public",
        "reviewer": "Sanjay Prasad",
        "goldAnswerPath": "/reports/indian-workflow-dataset/gold-packets/offer-letter-policy-check.json",
        "sourcePacketPath": "/reports/indian-workflow-dataset/gold-packets/offer-letter-policy-check.md"
      },
      "priority": "public-backlog"
    },
    {
      "taskId": "employee-helpdesk-leave-query",
      "title": "Employee Helpdesk Leave Query",
      "domain": "HR Ops",
      "split": "public",
      "difficulty": "Medium",
      "sourcePacketId": "employee-helpdesk-leave-query-source-packet-v0.1",
      "redactionLevel": "public-redacted-sample",
      "sourcePacket": {
        "businessContext": "Answer an employee leave query using policy, attendance data, and manager note.",
        "redactedArtifacts": [
          {
            "artifactId": "employee-helpdesk-leave-query-artifact-1",
            "label": "leave policy",
            "excerpt": "Redacted sample excerpt for leave policy; enough context for methodology review without exposing client data."
          },
          {
            "artifactId": "employee-helpdesk-leave-query-artifact-2",
            "label": "attendance record",
            "excerpt": "Redacted sample excerpt for attendance record; enough context for methodology review without exposing client data."
          },
          {
            "artifactId": "employee-helpdesk-leave-query-artifact-3",
            "label": "manager note",
            "excerpt": "Redacted sample excerpt for manager note; enough context for methodology review without exposing client data."
          }
        ],
        "blockedInformation": [
          "real customer names",
          "client identifiers",
          "account numbers",
          "private policy text",
          "raw email addresses"
        ]
      },
      "goldAnswer": "Explain entitlement and escalation path. The answer must cite leave policy, attendance record, manager note and avoid adding facts outside the source packet.",
      "reviewer": "Sanjay Prasad",
      "reviewerNotes": [
        "Primary reviewer: Sanjay Prasad.",
        "Accept only if the response explicitly uses leave policy and at least one other evidence item.",
        "Public packet: safe to expose as a methodology sample after redaction.",
        "Reject responses that overpromise, invent policy, skip escalation boundaries, or omit evidence."
      ],
      "scoringChecklist": [
        {
          "dimension": "Outcome correctness",
          "weight": 35,
          "passCondition": "Meets the outcome correctness standard for HR Ops workflows."
        },
        {
          "dimension": "Evidence citation",
          "weight": 25,
          "passCondition": "Cites the relevant source packet artifacts by label: leave policy, attendance record, manager note."
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20,
          "passCondition": "Escalates only when the packet evidence leaves a policy, finance, legal, or operations decision unresolved."
        },
        {
          "dimension": "Localization and tone",
          "weight": 10,
          "passCondition": "Meets the localization and tone standard for HR Ops workflows."
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10,
          "passCondition": "Meets the cost-aware brevity standard for HR Ops workflows."
        }
      ],
      "harnessImportRow": {
        "taskId": "employee-helpdesk-leave-query",
        "sourcePacketId": "employee-helpdesk-leave-query-source-packet-v0.1",
        "split": "public",
        "reviewer": "Sanjay Prasad",
        "goldAnswerPath": "/reports/indian-workflow-dataset/gold-packets/employee-helpdesk-leave-query.json",
        "sourcePacketPath": "/reports/indian-workflow-dataset/gold-packets/employee-helpdesk-leave-query.md"
      },
      "priority": "public-backlog"
    },
    {
      "taskId": "background-verification-exception",
      "title": "Background Verification Exception",
      "domain": "HR Ops",
      "split": "holdout",
      "difficulty": "Hard",
      "sourcePacketId": "background-verification-exception-source-packet-v0.1",
      "redactionLevel": "private-holdout-summary",
      "sourcePacket": {
        "businessContext": "Decide how to handle a background verification exception from vendor report and HR policy.",
        "redactedArtifacts": [
          {
            "artifactId": "background-verification-exception-artifact-1",
            "label": "vendor report",
            "excerpt": "Private holdout excerpt for vendor report; store full source in the harness artifact vault."
          },
          {
            "artifactId": "background-verification-exception-artifact-2",
            "label": "HR policy",
            "excerpt": "Private holdout excerpt for HR policy; store full source in the harness artifact vault."
          },
          {
            "artifactId": "background-verification-exception-artifact-3",
            "label": "candidate note",
            "excerpt": "Private holdout excerpt for candidate note; store full source in the harness artifact vault."
          }
        ],
        "blockedInformation": [
          "real customer names",
          "client identifiers",
          "account numbers",
          "private policy text",
          "raw email addresses"
        ]
      },
      "goldAnswer": "Route to HR review and identify missing evidence. The answer must cite vendor report, HR policy, candidate note and avoid adding facts outside the source packet.",
      "reviewer": "Sanjay Prasad",
      "reviewerNotes": [
        "Primary reviewer: Sanjay Prasad.",
        "Accept only if the response explicitly uses vendor report and at least one other evidence item.",
        "Holdout packet: keep full source text private until replacement tasks exist.",
        "Reject responses that overpromise, invent policy, skip escalation boundaries, or omit evidence."
      ],
      "scoringChecklist": [
        {
          "dimension": "Outcome correctness",
          "weight": 35,
          "passCondition": "Meets the outcome correctness standard for HR Ops workflows."
        },
        {
          "dimension": "Evidence citation",
          "weight": 25,
          "passCondition": "Cites the relevant source packet artifacts by label: vendor report, HR policy, candidate note."
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20,
          "passCondition": "Escalates only when the packet evidence leaves a policy, finance, legal, or operations decision unresolved."
        },
        {
          "dimension": "Localization and tone",
          "weight": 10,
          "passCondition": "Meets the localization and tone standard for HR Ops workflows."
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10,
          "passCondition": "Meets the cost-aware brevity standard for HR Ops workflows."
        }
      ],
      "harnessImportRow": {
        "taskId": "background-verification-exception",
        "sourcePacketId": "background-verification-exception-source-packet-v0.1",
        "split": "holdout",
        "reviewer": "Sanjay Prasad",
        "goldAnswerPath": "/reports/indian-workflow-dataset/gold-packets/background-verification-exception.json",
        "sourcePacketPath": "/reports/indian-workflow-dataset/gold-packets/background-verification-exception.md"
      },
      "priority": "holdout-backlog"
    },
    {
      "taskId": "payroll-discrepancy-diagnosis",
      "title": "Payroll Discrepancy Diagnosis",
      "domain": "HR Ops",
      "split": "public",
      "difficulty": "Medium-Hard",
      "sourcePacketId": "payroll-discrepancy-diagnosis-source-packet-v0.1",
      "redactionLevel": "public-redacted-sample",
      "sourcePacket": {
        "businessContext": "Diagnose payroll discrepancy from payslip, attendance, reimbursement, and policy evidence.",
        "redactedArtifacts": [
          {
            "artifactId": "payroll-discrepancy-diagnosis-artifact-1",
            "label": "payslip",
            "excerpt": "Redacted sample excerpt for payslip; enough context for methodology review without exposing client data."
          },
          {
            "artifactId": "payroll-discrepancy-diagnosis-artifact-2",
            "label": "attendance",
            "excerpt": "Redacted sample excerpt for attendance; enough context for methodology review without exposing client data."
          },
          {
            "artifactId": "payroll-discrepancy-diagnosis-artifact-3",
            "label": "reimbursement record",
            "excerpt": "Redacted sample excerpt for reimbursement record; enough context for methodology review without exposing client data."
          }
        ],
        "blockedInformation": [
          "real customer names",
          "client identifiers",
          "account numbers",
          "private policy text",
          "raw email addresses"
        ]
      },
      "goldAnswer": "Identify discrepancy reason and next correction step. The answer must cite payslip, attendance, reimbursement record and avoid adding facts outside the source packet.",
      "reviewer": "Sanjay Prasad",
      "reviewerNotes": [
        "Primary reviewer: Sanjay Prasad.",
        "Accept only if the response explicitly uses payslip and at least one other evidence item.",
        "Public packet: safe to expose as a methodology sample after redaction.",
        "Reject responses that overpromise, invent policy, skip escalation boundaries, or omit evidence."
      ],
      "scoringChecklist": [
        {
          "dimension": "Outcome correctness",
          "weight": 35,
          "passCondition": "Meets the outcome correctness standard for HR Ops workflows."
        },
        {
          "dimension": "Evidence citation",
          "weight": 25,
          "passCondition": "Cites the relevant source packet artifacts by label: payslip, attendance, reimbursement record."
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20,
          "passCondition": "Escalates only when the packet evidence leaves a policy, finance, legal, or operations decision unresolved."
        },
        {
          "dimension": "Localization and tone",
          "weight": 10,
          "passCondition": "Meets the localization and tone standard for HR Ops workflows."
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10,
          "passCondition": "Meets the cost-aware brevity standard for HR Ops workflows."
        }
      ],
      "harnessImportRow": {
        "taskId": "payroll-discrepancy-diagnosis",
        "sourcePacketId": "payroll-discrepancy-diagnosis-source-packet-v0.1",
        "split": "public",
        "reviewer": "Sanjay Prasad",
        "goldAnswerPath": "/reports/indian-workflow-dataset/gold-packets/payroll-discrepancy-diagnosis.json",
        "sourcePacketPath": "/reports/indian-workflow-dataset/gold-packets/payroll-discrepancy-diagnosis.md"
      },
      "priority": "public-backlog"
    },
    {
      "taskId": "insurance-claim-document-check",
      "title": "Insurance Claim Document Check",
      "domain": "Healthcare Admin",
      "split": "public",
      "difficulty": "Medium",
      "sourcePacketId": "insurance-claim-document-check-source-packet-v0.1",
      "redactionLevel": "public-redacted-sample",
      "sourcePacket": {
        "businessContext": "Check whether a patient insurance claim packet has the documents required for submission.",
        "redactedArtifacts": [
          {
            "artifactId": "insurance-claim-document-check-artifact-1",
            "label": "claim form",
            "excerpt": "Redacted sample excerpt for claim form; enough context for methodology review without exposing client data."
          },
          {
            "artifactId": "insurance-claim-document-check-artifact-2",
            "label": "discharge summary",
            "excerpt": "Redacted sample excerpt for discharge summary; enough context for methodology review without exposing client data."
          },
          {
            "artifactId": "insurance-claim-document-check-artifact-3",
            "label": "insurer checklist",
            "excerpt": "Redacted sample excerpt for insurer checklist; enough context for methodology review without exposing client data."
          }
        ],
        "blockedInformation": [
          "real customer names",
          "client identifiers",
          "account numbers",
          "private policy text",
          "raw email addresses"
        ]
      },
      "goldAnswer": "Return submit/hold decision and missing documents. The answer must cite claim form, discharge summary, insurer checklist and avoid adding facts outside the source packet.",
      "reviewer": "Sanjay Prasad",
      "reviewerNotes": [
        "Primary reviewer: Sanjay Prasad.",
        "Accept only if the response explicitly uses claim form and at least one other evidence item.",
        "Public packet: safe to expose as a methodology sample after redaction.",
        "Reject responses that overpromise, invent policy, skip escalation boundaries, or omit evidence."
      ],
      "scoringChecklist": [
        {
          "dimension": "Outcome correctness",
          "weight": 35,
          "passCondition": "Meets the outcome correctness standard for Healthcare Admin workflows."
        },
        {
          "dimension": "Evidence citation",
          "weight": 25,
          "passCondition": "Cites the relevant source packet artifacts by label: claim form, discharge summary, insurer checklist."
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20,
          "passCondition": "Escalates only when the packet evidence leaves a policy, finance, legal, or operations decision unresolved."
        },
        {
          "dimension": "Localization and tone",
          "weight": 10,
          "passCondition": "Meets the localization and tone standard for Healthcare Admin workflows."
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10,
          "passCondition": "Meets the cost-aware brevity standard for Healthcare Admin workflows."
        }
      ],
      "harnessImportRow": {
        "taskId": "insurance-claim-document-check",
        "sourcePacketId": "insurance-claim-document-check-source-packet-v0.1",
        "split": "public",
        "reviewer": "Sanjay Prasad",
        "goldAnswerPath": "/reports/indian-workflow-dataset/gold-packets/insurance-claim-document-check.json",
        "sourcePacketPath": "/reports/indian-workflow-dataset/gold-packets/insurance-claim-document-check.md"
      },
      "priority": "public-backlog"
    },
    {
      "taskId": "appointment-escalation-triage",
      "title": "Appointment Escalation Triage",
      "domain": "Healthcare Admin",
      "split": "holdout",
      "difficulty": "Hard",
      "sourcePacketId": "appointment-escalation-triage-source-packet-v0.1",
      "redactionLevel": "private-holdout-summary",
      "sourcePacket": {
        "businessContext": "Triage appointment escalation messages by urgency, specialty, and patient risk signals.",
        "redactedArtifacts": [
          {
            "artifactId": "appointment-escalation-triage-artifact-1",
            "label": "patient message",
            "excerpt": "Private holdout excerpt for patient message; store full source in the harness artifact vault."
          },
          {
            "artifactId": "appointment-escalation-triage-artifact-2",
            "label": "triage policy",
            "excerpt": "Private holdout excerpt for triage policy; store full source in the harness artifact vault."
          },
          {
            "artifactId": "appointment-escalation-triage-artifact-3",
            "label": "appointment record",
            "excerpt": "Private holdout excerpt for appointment record; store full source in the harness artifact vault."
          }
        ],
        "blockedInformation": [
          "real customer names",
          "client identifiers",
          "account numbers",
          "private policy text",
          "raw email addresses"
        ]
      },
      "goldAnswer": "Route to correct queue without giving medical advice. The answer must cite patient message, triage policy, appointment record and avoid adding facts outside the source packet.",
      "reviewer": "Sanjay Prasad",
      "reviewerNotes": [
        "Primary reviewer: Sanjay Prasad.",
        "Accept only if the response explicitly uses patient message and at least one other evidence item.",
        "Holdout packet: keep full source text private until replacement tasks exist.",
        "Reject responses that overpromise, invent policy, skip escalation boundaries, or omit evidence."
      ],
      "scoringChecklist": [
        {
          "dimension": "Outcome correctness",
          "weight": 35,
          "passCondition": "Meets the outcome correctness standard for Healthcare Admin workflows."
        },
        {
          "dimension": "Evidence citation",
          "weight": 25,
          "passCondition": "Cites the relevant source packet artifacts by label: patient message, triage policy, appointment record."
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20,
          "passCondition": "Escalates only when the packet evidence leaves a policy, finance, legal, or operations decision unresolved."
        },
        {
          "dimension": "Localization and tone",
          "weight": 10,
          "passCondition": "Meets the localization and tone standard for Healthcare Admin workflows."
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10,
          "passCondition": "Meets the cost-aware brevity standard for Healthcare Admin workflows."
        }
      ],
      "harnessImportRow": {
        "taskId": "appointment-escalation-triage",
        "sourcePacketId": "appointment-escalation-triage-source-packet-v0.1",
        "split": "holdout",
        "reviewer": "Sanjay Prasad",
        "goldAnswerPath": "/reports/indian-workflow-dataset/gold-packets/appointment-escalation-triage.json",
        "sourcePacketPath": "/reports/indian-workflow-dataset/gold-packets/appointment-escalation-triage.md"
      },
      "priority": "holdout-backlog"
    },
    {
      "taskId": "lab-report-delivery-query",
      "title": "Lab Report Delivery Query",
      "domain": "Healthcare Admin",
      "split": "public",
      "difficulty": "Medium-Hard",
      "sourcePacketId": "lab-report-delivery-query-source-packet-v0.1",
      "redactionLevel": "public-redacted-sample",
      "sourcePacket": {
        "businessContext": "Respond to a lab-report delivery query using SLA, payment status, and delivery channel logs.",
        "redactedArtifacts": [
          {
            "artifactId": "lab-report-delivery-query-artifact-1",
            "label": "SLA",
            "excerpt": "Redacted sample excerpt for SLA; enough context for methodology review without exposing client data."
          },
          {
            "artifactId": "lab-report-delivery-query-artifact-2",
            "label": "payment status",
            "excerpt": "Redacted sample excerpt for payment status; enough context for methodology review without exposing client data."
          },
          {
            "artifactId": "lab-report-delivery-query-artifact-3",
            "label": "delivery log",
            "excerpt": "Redacted sample excerpt for delivery log; enough context for methodology review without exposing client data."
          }
        ],
        "blockedInformation": [
          "real customer names",
          "client identifiers",
          "account numbers",
          "private policy text",
          "raw email addresses"
        ]
      },
      "goldAnswer": "Draft safe operations response with next update window. The answer must cite SLA, payment status, delivery log and avoid adding facts outside the source packet.",
      "reviewer": "Sanjay Prasad",
      "reviewerNotes": [
        "Primary reviewer: Sanjay Prasad.",
        "Accept only if the response explicitly uses SLA and at least one other evidence item.",
        "Public packet: safe to expose as a methodology sample after redaction.",
        "Reject responses that overpromise, invent policy, skip escalation boundaries, or omit evidence."
      ],
      "scoringChecklist": [
        {
          "dimension": "Outcome correctness",
          "weight": 35,
          "passCondition": "Meets the outcome correctness standard for Healthcare Admin workflows."
        },
        {
          "dimension": "Evidence citation",
          "weight": 25,
          "passCondition": "Cites the relevant source packet artifacts by label: SLA, payment status, delivery log."
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20,
          "passCondition": "Escalates only when the packet evidence leaves a policy, finance, legal, or operations decision unresolved."
        },
        {
          "dimension": "Localization and tone",
          "weight": 10,
          "passCondition": "Meets the localization and tone standard for Healthcare Admin workflows."
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10,
          "passCondition": "Meets the cost-aware brevity standard for Healthcare Admin workflows."
        }
      ],
      "harnessImportRow": {
        "taskId": "lab-report-delivery-query",
        "sourcePacketId": "lab-report-delivery-query-source-packet-v0.1",
        "split": "public",
        "reviewer": "Sanjay Prasad",
        "goldAnswerPath": "/reports/indian-workflow-dataset/gold-packets/lab-report-delivery-query.json",
        "sourcePacketPath": "/reports/indian-workflow-dataset/gold-packets/lab-report-delivery-query.md"
      },
      "priority": "public-backlog"
    },
    {
      "taskId": "field-agent-visit-proof",
      "title": "Field Agent Visit Proof",
      "domain": "Field Operations",
      "split": "public",
      "difficulty": "Medium",
      "sourcePacketId": "field-agent-visit-proof-source-packet-v0.1",
      "redactionLevel": "public-redacted-sample",
      "sourcePacket": {
        "businessContext": "Verify a field visit claim from GPS note, photo metadata, and customer confirmation.",
        "redactedArtifacts": [
          {
            "artifactId": "field-agent-visit-proof-artifact-1",
            "label": "GPS note",
            "excerpt": "Redacted sample excerpt for GPS note; enough context for methodology review without exposing client data."
          },
          {
            "artifactId": "field-agent-visit-proof-artifact-2",
            "label": "photo metadata",
            "excerpt": "Redacted sample excerpt for photo metadata; enough context for methodology review without exposing client data."
          },
          {
            "artifactId": "field-agent-visit-proof-artifact-3",
            "label": "customer confirmation",
            "excerpt": "Redacted sample excerpt for customer confirmation; enough context for methodology review without exposing client data."
          }
        ],
        "blockedInformation": [
          "real customer names",
          "client identifiers",
          "account numbers",
          "private policy text",
          "raw email addresses"
        ]
      },
      "goldAnswer": "Approve, reject, or request review with evidence. The answer must cite GPS note, photo metadata, customer confirmation and avoid adding facts outside the source packet.",
      "reviewer": "Saujas",
      "reviewerNotes": [
        "Primary reviewer: Saujas.",
        "Accept only if the response explicitly uses GPS note and at least one other evidence item.",
        "Public packet: safe to expose as a methodology sample after redaction.",
        "Reject responses that overpromise, invent policy, skip escalation boundaries, or omit evidence."
      ],
      "scoringChecklist": [
        {
          "dimension": "Outcome correctness",
          "weight": 35,
          "passCondition": "Meets the outcome correctness standard for Field Operations workflows."
        },
        {
          "dimension": "Evidence citation",
          "weight": 25,
          "passCondition": "Cites the relevant source packet artifacts by label: GPS note, photo metadata, customer confirmation."
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20,
          "passCondition": "Escalates only when the packet evidence leaves a policy, finance, legal, or operations decision unresolved."
        },
        {
          "dimension": "Localization and tone",
          "weight": 10,
          "passCondition": "Meets the localization and tone standard for Field Operations workflows."
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10,
          "passCondition": "Meets the cost-aware brevity standard for Field Operations workflows."
        }
      ],
      "harnessImportRow": {
        "taskId": "field-agent-visit-proof",
        "sourcePacketId": "field-agent-visit-proof-source-packet-v0.1",
        "split": "public",
        "reviewer": "Saujas",
        "goldAnswerPath": "/reports/indian-workflow-dataset/gold-packets/field-agent-visit-proof.json",
        "sourcePacketPath": "/reports/indian-workflow-dataset/gold-packets/field-agent-visit-proof.md"
      },
      "priority": "public-backlog"
    },
    {
      "taskId": "service-outage-customer-brief",
      "title": "Service Outage Customer Brief",
      "domain": "Field Operations",
      "split": "public",
      "difficulty": "Medium-Hard",
      "sourcePacketId": "service-outage-customer-brief-source-packet-v0.1",
      "redactionLevel": "public-redacted-sample",
      "sourcePacket": {
        "businessContext": "Prepare a customer brief for a service outage using incident notes and repair ETA.",
        "redactedArtifacts": [
          {
            "artifactId": "service-outage-customer-brief-artifact-1",
            "label": "incident note",
            "excerpt": "Redacted sample excerpt for incident note; enough context for methodology review without exposing client data."
          },
          {
            "artifactId": "service-outage-customer-brief-artifact-2",
            "label": "repair ETA",
            "excerpt": "Redacted sample excerpt for repair ETA; enough context for methodology review without exposing client data."
          },
          {
            "artifactId": "service-outage-customer-brief-artifact-3",
            "label": "customer SLA",
            "excerpt": "Redacted sample excerpt for customer SLA; enough context for methodology review without exposing client data."
          }
        ],
        "blockedInformation": [
          "real customer names",
          "client identifiers",
          "account numbers",
          "private policy text",
          "raw email addresses"
        ]
      },
      "goldAnswer": "Explain status, ETA, and escalation boundary. The answer must cite incident note, repair ETA, customer SLA and avoid adding facts outside the source packet.",
      "reviewer": "Saujas",
      "reviewerNotes": [
        "Primary reviewer: Saujas.",
        "Accept only if the response explicitly uses incident note and at least one other evidence item.",
        "Public packet: safe to expose as a methodology sample after redaction.",
        "Reject responses that overpromise, invent policy, skip escalation boundaries, or omit evidence."
      ],
      "scoringChecklist": [
        {
          "dimension": "Outcome correctness",
          "weight": 35,
          "passCondition": "Meets the outcome correctness standard for Field Operations workflows."
        },
        {
          "dimension": "Evidence citation",
          "weight": 25,
          "passCondition": "Cites the relevant source packet artifacts by label: incident note, repair ETA, customer SLA."
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20,
          "passCondition": "Escalates only when the packet evidence leaves a policy, finance, legal, or operations decision unresolved."
        },
        {
          "dimension": "Localization and tone",
          "weight": 10,
          "passCondition": "Meets the localization and tone standard for Field Operations workflows."
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10,
          "passCondition": "Meets the cost-aware brevity standard for Field Operations workflows."
        }
      ],
      "harnessImportRow": {
        "taskId": "service-outage-customer-brief",
        "sourcePacketId": "service-outage-customer-brief-source-packet-v0.1",
        "split": "public",
        "reviewer": "Saujas",
        "goldAnswerPath": "/reports/indian-workflow-dataset/gold-packets/service-outage-customer-brief.json",
        "sourcePacketPath": "/reports/indian-workflow-dataset/gold-packets/service-outage-customer-brief.md"
      },
      "priority": "public-backlog"
    },
    {
      "taskId": "regional-installation-exception",
      "title": "Regional Installation Exception",
      "domain": "Field Operations",
      "split": "holdout",
      "difficulty": "Hard",
      "sourcePacketId": "regional-installation-exception-source-packet-v0.1",
      "redactionLevel": "private-holdout-summary",
      "sourcePacket": {
        "businessContext": "Handle a regional installation exception with mixed-language notes and policy constraints.",
        "redactedArtifacts": [
          {
            "artifactId": "regional-installation-exception-artifact-1",
            "label": "installer note",
            "excerpt": "Private holdout excerpt for installer note; store full source in the harness artifact vault."
          },
          {
            "artifactId": "regional-installation-exception-artifact-2",
            "label": "policy",
            "excerpt": "Private holdout excerpt for policy; store full source in the harness artifact vault."
          },
          {
            "artifactId": "regional-installation-exception-artifact-3",
            "label": "customer consent",
            "excerpt": "Private holdout excerpt for customer consent; store full source in the harness artifact vault."
          }
        ],
        "blockedInformation": [
          "real customer names",
          "client identifiers",
          "account numbers",
          "private policy text",
          "raw email addresses"
        ]
      },
      "goldAnswer": "Recommend next action and escalation path. The answer must cite installer note, policy, customer consent and avoid adding facts outside the source packet.",
      "reviewer": "Saujas",
      "reviewerNotes": [
        "Primary reviewer: Saujas.",
        "Accept only if the response explicitly uses installer note and at least one other evidence item.",
        "Holdout packet: keep full source text private until replacement tasks exist.",
        "Reject responses that overpromise, invent policy, skip escalation boundaries, or omit evidence."
      ],
      "scoringChecklist": [
        {
          "dimension": "Outcome correctness",
          "weight": 35,
          "passCondition": "Meets the outcome correctness standard for Field Operations workflows."
        },
        {
          "dimension": "Evidence citation",
          "weight": 25,
          "passCondition": "Cites the relevant source packet artifacts by label: installer note, policy, customer consent."
        },
        {
          "dimension": "Escalation judgment",
          "weight": 20,
          "passCondition": "Escalates only when the packet evidence leaves a policy, finance, legal, or operations decision unresolved."
        },
        {
          "dimension": "Localization and tone",
          "weight": 10,
          "passCondition": "Meets the localization and tone standard for Field Operations workflows."
        },
        {
          "dimension": "Cost-aware brevity",
          "weight": 10,
          "passCondition": "Meets the cost-aware brevity standard for Field Operations workflows."
        }
      ],
      "harnessImportRow": {
        "taskId": "regional-installation-exception",
        "sourcePacketId": "regional-installation-exception-source-packet-v0.1",
        "split": "holdout",
        "reviewer": "Saujas",
        "goldAnswerPath": "/reports/indian-workflow-dataset/gold-packets/regional-installation-exception.json",
        "sourcePacketPath": "/reports/indian-workflow-dataset/gold-packets/regional-installation-exception.md"
      },
      "priority": "holdout-backlog"
    }
  ],
  "goldPacketManifestPath": "/reports/indian-workflow-dataset/gold-packets/manifest.json",
  "nextHarnessStep": "Replace task briefs with redacted source packets, gold answers, reviewer identities, and model run exports before using the dataset for public rankings."
}
