disclosure-bureau/investigator-runtime/src/tools/write_evidence.ts

/**
 * write_evidence.ts — Locard's primary writer.
 *
 * The first of the gated write tools (sa-security gate #2). Builds a row in
 * `public.evidence`, allocates E-NNNN via the sequence, writes the matching
 * `case/evidence/E-NNNN.md` markdown file, and audit-logs the act.
 *
 * Discipline (mirrors the spec):
 *  - verbatim_excerpt MUST appear verbatim inside the source chunk content
 *    (we reject if it doesn't — Sonnet is occasionally creative about quoting).
 *  - source_chunk must exist in public.chunks (FK enforced by DB; we also
 *    check up front to give a friendlier error).
 *  - grade A requires ≥ 3 custody steps, B ≥ 2, C ≥ 1.
 *  - related_hypotheses[].hypothesis_id must exist (if provided).
 */
import { mkdir, writeFile } from "node:fs/promises";
import path from "node:path";
import { audit } from "../lib/audit";
import { env } from "../lib/env";
import { allocate } from "../lib/ids";
import { query, queryOne } from "../lib/pg";

export interface CustodyStep {
  step: string;
  uri?: string;
  sha256?: string;
  notes?: string;
}

export interface WriteEvidenceArgs {
  verbatim_excerpt: string;
  source_doc_id: string;
  source_chunk_id: string;
  /** Page is 1-indexed; we derive source_page_id from doc+page. */
  page?: number;
  bbox?: { x: number; y: number; w: number; h: number };
  grade: "A" | "B" | "C";
  custody_steps: CustodyStep[];
  custody_gaps?: string[];
  confidence_band?: "high" | "medium" | "low" | "speculation";
  related_hypotheses?: Array<{ hypothesis_id: string; supports: boolean }>;
}

export interface WriteEvidenceContext {
  job_id: string;
  detective: string;
}

const GRADE_MIN_STEPS: Record<string, number> = { A: 3, B: 2, C: 1 };

function pageId(doc: string, p: number | undefined, chunkId: string): string {
  // Prefer explicit page; otherwise we leave it doc-only and the lint
  // catches the malformed reference later.
  if (typeof p === "number" && p > 0) {
    return `${doc}/p${String(p).padStart(3, "0")}#${chunkId}`;
  }
  return `${doc}#${chunkId}`;
}

function renderMd(args: {
  evidence_id: string;
  body: WriteEvidenceArgs;
  ctx: WriteEvidenceContext;
}): string {
  const { evidence_id, body, ctx } = args;
  const fm = [
    "---",
    `schema_version: "0.1.0"`,
    `type: evidence`,
    `evidence_id: ${evidence_id}`,
    `source_doc: ${body.source_doc_id}`,
    `source_chunk: ${body.source_chunk_id}`,
    body.page ? `source_page: ${body.page}` : null,
    `grade: ${body.grade}`,
    body.confidence_band ? `confidence_band: ${body.confidence_band}` : null,
    `created_by: ${ctx.detective}`,
    `job_id: ${ctx.job_id}`,
    `created_at: ${new Date().toISOString()}`,
    body.bbox ? `bbox: { x: ${body.bbox.x}, y: ${body.bbox.y}, w: ${body.bbox.w}, h: ${body.bbox.h} }` : null,
    "---",
  ].filter(Boolean).join("\n");

  const custody = body.custody_steps
    .map((s, i) => `  ${i + 1}. **${s.step}**${s.uri ? ` — ${s.uri}` : ""}${s.sha256 ? ` (sha256: \`${s.sha256.slice(0, 16)}…\`)` : ""}${s.notes ? `\n     ${s.notes}` : ""}`)
    .join("\n");
  const gaps = (body.custody_gaps ?? []).map((g) => `  - ${g}`).join("\n");
  const links = (body.related_hypotheses ?? [])
    .map((h) => `  - [[hypothesis/${h.hypothesis_id}]] (${h.supports ? "supports" : "refutes"})`)
    .join("\n");

  return [
    fm,
    "",
    `# Evidence ${evidence_id}`,
    "",
    "## Verbatim excerpt (source language)",
    "",
    "> " + body.verbatim_excerpt.replace(/\n+/g, "\n> "),
    "",
    "## Chain of custody",
    "",
    custody,
    gaps ? "\n### Gaps\n\n" + gaps : "",
    "",
    "## Source",
    "",
    `- Doc: [[${body.source_doc_id}]]`,
    `- Chunk: [[${body.source_doc_id}#${body.source_chunk_id}]]`,
    body.page ? `- Page: [[${body.source_doc_id}/p${String(body.page).padStart(3, "0")}]]` : "",
    "",
    "## Linked hypotheses",
    "",
    links || "_(none yet)_",
    "",
  ].join("\n");
}

export async function writeEvidence(
  body: WriteEvidenceArgs,
  ctx: WriteEvidenceContext,
): Promise<{ evidence_id: string; case_file: string }> {
  // 1. Validate grade minimums (sa-security gate #2: schema check before INSERT).
  const minSteps = GRADE_MIN_STEPS[body.grade] ?? 99;
  if (!Array.isArray(body.custody_steps) || body.custody_steps.length < minSteps) {
    throw new Error(`grade ${body.grade} requires ≥ ${minSteps} custody steps; got ${body.custody_steps?.length ?? 0}`);
  }

  // 2. Resolve chunk_pk + verify the verbatim excerpt appears in the chunk.
  const chunk = await queryOne<{ chunk_pk: number; content_en: string | null; content_pt: string | null }>(
    `SELECT chunk_pk, content_en, content_pt
       FROM public.chunks
      WHERE doc_id = $1 AND chunk_id = $2
      LIMIT 1`,
    [body.source_doc_id, body.source_chunk_id],
  );
  if (!chunk) {
    throw new Error(`chunk_not_found: ${body.source_doc_id}#${body.source_chunk_id}`);
  }

  const haystack = `${chunk.content_en ?? ""}\n${chunk.content_pt ?? ""}`;
  const needle = body.verbatim_excerpt.trim();
  if (needle.length < 8) {
    throw new Error(`verbatim_excerpt too short (${needle.length} chars; min 8)`);
  }
  if (!haystack.includes(needle.slice(0, 80))) {
    throw new Error(`verbatim_excerpt not found in source chunk — Sonnet must not paraphrase`);
  }

  // 3. (Optional) validate hypothesis refs exist. Drop empty/null IDs silently
  // — Locard may emit `related_hypotheses: [{}]` when it knows of no link yet.
  const validRefs = (body.related_hypotheses ?? []).filter(
    (r) => typeof r?.hypothesis_id === "string" && r.hypothesis_id.trim().length > 0,
  );
  for (const ref of validRefs) {
    const h = await queryOne<{ hypothesis_pk: number }>(
      `SELECT hypothesis_pk FROM public.hypotheses WHERE hypothesis_id = $1`,
      [ref.hypothesis_id],
    );
    if (!h) throw new Error(`linked hypothesis not found: ${ref.hypothesis_id}`);
  }
  body.related_hypotheses = validRefs;

  // 4. Allocate id + INSERT.
  const evidence_id = await allocate.evidenceId();
  await query(
    `INSERT INTO public.evidence
       (evidence_id, verbatim_excerpt, source_chunk_pk, source_page_id, bbox,
        grade, custody_steps, custody_gaps, confidence_band, related_hypotheses, created_by)
     VALUES ($1, $2, $3, $4, $5::jsonb, $6, $7::jsonb, $8::jsonb, $9, $10::jsonb, $11)`,
    [
      evidence_id,
      body.verbatim_excerpt,
      chunk.chunk_pk,
      pageId(body.source_doc_id, body.page, body.source_chunk_id),
      body.bbox ? JSON.stringify(body.bbox) : null,
      body.grade,
      JSON.stringify(body.custody_steps),
      body.custody_gaps ? JSON.stringify(body.custody_gaps) : null,
      body.confidence_band ?? null,
      JSON.stringify(body.related_hypotheses ?? []),
      ctx.detective,
    ],
  );

  // 5. Write the case markdown file.
  const dir = path.join(env.CASE_ROOT, "evidence");
  await mkdir(dir, { recursive: true });
  const filepath = path.join(dir, `${evidence_id}.md`);
  await writeFile(filepath, renderMd({ evidence_id, body, ctx }), "utf-8");

  // 6. Audit.
  await audit({
    event: "write_evidence",
    job_id: ctx.job_id,
    detective: ctx.detective,
    evidence_id,
    source_doc: body.source_doc_id,
    source_chunk: body.source_chunk_id,
    grade: body.grade,
    file: filepath,
  });

  return { evidence_id, case_file: filepath };
}
W3.1-W3.4: Investigation Bureau foundation — migrations, runtime, Locard Migrations: - 0004_investigation_bureau.sql: 7 new tables (investigation_jobs + evidence, hypotheses, contradictions, witnesses, gaps, residual_uncertainties), id sequences, pg_notify trigger on investigation_jobs, RLS read-only public, investigator role with least-privilege grants (no service_role). - 0005_investigator_write_policies.sql: fixup adding RLS INSERT/UPDATE policies bound to investigator + service_role + postgres (RLS with only a SELECT policy was silently blocking the worker's claim UPDATE). investigator-runtime/ (new Bun + TS container): - src/main.ts: LISTEN/NOTIFY poller, claim-with-SKIP-LOCKED, drain pool, healthcheck file, graceful SIGTERM shutdown. - src/orchestrator.ts: chief-detective dispatch (evidence_chain → Locard). Marks job failed when all per-item outputs error; surfaces first errors. - src/lib/{env,pg,audit,ids,claude}.ts: typed config (gate #8), pool + dedicated LISTEN client, NDJSON audit, sequence allocator (E-NNNN etc), claude -p subprocess with quota detection (api_error_status=429). - src/tools/write_evidence.ts: schema-validate (grade A/B/C custody steps), resolve chunk_pk via FK, verify verbatim_excerpt actually appears in chunk content, INSERT + render case/evidence/E-NNNN.md + audit. - src/detectives/locard.ts: load chunk → call Claude with locard.md system prompt → parse strict JSON → call writeEvidence locally. - Dockerfile installs `claude` CLI (OAuth) at build time. Compose: - new `investigator` service builds from investigator-runtime/, connects with low-privilege role, mounts case/ RW and wiki/+raw/ RO, 512m mem cap. Web: - /api/admin/investigate/test (POST+GET) gated by middleware (W0-F1). POST creates a job, GET polls status. For W3.6 it becomes the chat tool. End-to-end smoke: INSERT job → pg_notify → claim → Locard dispatch → claude subprocess invoked. Auth works (CLI v2.1.150). Currently quota exhausted (weekly limit · resets 3pm UTC) — pipeline catches the typed isQuota error, marks job failed with surfaced reason. Architecture proven; quota reset enables real evidence creation. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-05-23 22:49:33 +00:00			`/**`
			`* write_evidence.ts — Locard's primary writer.`
			`*`
			`* The first of the gated write tools (sa-security gate #2). Builds a row in`
			* `public.evidence`, allocates E-NNNN via the sequence, writes the matching
			* `case/evidence/E-NNNN.md` markdown file, and audit-logs the act.
			`*`
			`* Discipline (mirrors the spec):`
			`* - verbatim_excerpt MUST appear verbatim inside the source chunk content`
			`* (we reject if it doesn't — Sonnet is occasionally creative about quoting).`
			`* - source_chunk must exist in public.chunks (FK enforced by DB; we also`
			`* check up front to give a friendlier error).`
			`* - grade A requires ≥ 3 custody steps, B ≥ 2, C ≥ 1.`
			`* - related_hypotheses[].hypothesis_id must exist (if provided).`
			`*/`
			`import { mkdir, writeFile } from "node:fs/promises";`
			`import path from "node:path";`
			`import { audit } from "../lib/audit";`
			`import { env } from "../lib/env";`
			`import { allocate } from "../lib/ids";`
			`import { query, queryOne } from "../lib/pg";`

			`export interface CustodyStep {`
			`step: string;`
			`uri?: string;`
			`sha256?: string;`
			`notes?: string;`
			`}`

			`export interface WriteEvidenceArgs {`
			`verbatim_excerpt: string;`
			`source_doc_id: string;`
			`source_chunk_id: string;`
			`/** Page is 1-indexed; we derive source_page_id from doc+page. */`
			`page?: number;`
			`bbox?: { x: number; y: number; w: number; h: number };`
			`grade: "A" \| "B" \| "C";`
			`custody_steps: CustodyStep[];`
			`custody_gaps?: string[];`
			`confidence_band?: "high" \| "medium" \| "low" \| "speculation";`
			`related_hypotheses?: Array<{ hypothesis_id: string; supports: boolean }>;`
			`}`

			`export interface WriteEvidenceContext {`
			`job_id: string;`
			`detective: string;`
			`}`

			`const GRADE_MIN_STEPS: Record<string, number> = { A: 3, B: 2, C: 1 };`

			`function pageId(doc: string, p: number \| undefined, chunkId: string): string {`
			`// Prefer explicit page; otherwise we leave it doc-only and the lint`
			`// catches the malformed reference later.`
			`if (typeof p === "number" && p > 0) {`
			return `${doc}/p${String(p).padStart(3, "0")}#${chunkId}`;
			`}`
			return `${doc}#${chunkId}`;
			`}`

			`function renderMd(args: {`
			`evidence_id: string;`
			`body: WriteEvidenceArgs;`
			`ctx: WriteEvidenceContext;`
			`}): string {`
			`const { evidence_id, body, ctx } = args;`
			`const fm = [`
			`"---",`
			`schema_version: "0.1.0"`,
			`type: evidence`,
			`evidence_id: ${evidence_id}`,
			`source_doc: ${body.source_doc_id}`,
			`source_chunk: ${body.source_chunk_id}`,
			body.page ? `source_page: ${body.page}` : null,
			`grade: ${body.grade}`,
			body.confidence_band ? `confidence_band: ${body.confidence_band}` : null,
			`created_by: ${ctx.detective}`,
			`job_id: ${ctx.job_id}`,
			`created_at: ${new Date().toISOString()}`,
			body.bbox ? `bbox: { x: ${body.bbox.x}, y: ${body.bbox.y}, w: ${body.bbox.w}, h: ${body.bbox.h} }` : null,
			`"---",`
			`].filter(Boolean).join("\n");`

			`const custody = body.custody_steps`
			.map((s, i) => ` ${i + 1}. ${s.step}${s.uri ? ` — ${s.uri}` : ""}${s.sha256 ? ` (sha256: \`${s.sha256.slice(0, 16)}…\`)` : ""}${s.notes ? `\n ${s.notes}` : ""}`)
			`.join("\n");`
			const gaps = (body.custody_gaps ?? []).map((g) => ` - ${g}`).join("\n");
			`const links = (body.related_hypotheses ?? [])`
			.map((h) => ` - [[hypothesis/${h.hypothesis_id}]] (${h.supports ? "supports" : "refutes"})`)
			`.join("\n");`

			`return [`
			`fm,`
			`"",`
			`# Evidence ${evidence_id}`,
			`"",`
			`"## Verbatim excerpt (source language)",`
			`"",`
			`"> " + body.verbatim_excerpt.replace(/\n+/g, "\n> "),`
			`"",`
			`"## Chain of custody",`
			`"",`
			`custody,`
			`gaps ? "\n### Gaps\n\n" + gaps : "",`
			`"",`
			`"## Source",`
			`"",`
			`- Doc: [[${body.source_doc_id}]]`,
			`- Chunk: [[${body.source_doc_id}#${body.source_chunk_id}]]`,
			body.page ? `- Page: [[${body.source_doc_id}/p${String(body.page).padStart(3, "0")}]]` : "",
			`"",`
			`"## Linked hypotheses",`
			`"",`
			`links \|\| "_(none yet)_",`
			`"",`
			`].join("\n");`
			`}`

			`export async function writeEvidence(`
			`body: WriteEvidenceArgs,`
			`ctx: WriteEvidenceContext,`
			`): Promise<{ evidence_id: string; case_file: string }> {`
			`// 1. Validate grade minimums (sa-security gate #2: schema check before INSERT).`
			`const minSteps = GRADE_MIN_STEPS[body.grade] ?? 99;`
			`if (!Array.isArray(body.custody_steps) \|\| body.custody_steps.length < minSteps) {`
			throw new Error(`grade ${body.grade} requires ≥ ${minSteps} custody steps; got ${body.custody_steps?.length ?? 0}`);
			`}`

			`// 2. Resolve chunk_pk + verify the verbatim excerpt appears in the chunk.`
			`const chunk = await queryOne<{ chunk_pk: number; content_en: string \| null; content_pt: string \| null }>(`
			`SELECT chunk_pk, content_en, content_pt
			`FROM public.chunks`
			`WHERE doc_id = $1 AND chunk_id = $2`
			LIMIT 1`,
			`[body.source_doc_id, body.source_chunk_id],`
			`);`
			`if (!chunk) {`
			throw new Error(`chunk_not_found: ${body.source_doc_id}#${body.source_chunk_id}`);
			`}`

			const haystack = `${chunk.content_en ?? ""}\n${chunk.content_pt ?? ""}`;
			`const needle = body.verbatim_excerpt.trim();`
			`if (needle.length < 8) {`
			throw new Error(`verbatim_excerpt too short (${needle.length} chars; min 8)`);
			`}`
			`if (!haystack.includes(needle.slice(0, 80))) {`
			throw new Error(`verbatim_excerpt not found in source chunk — Sonnet must not paraphrase`);
			`}`

W3 followup: drop _FOR_WEB token, fix claude CLI args + writer guards, BIGSERIAL grants Token consolidation: - docker-compose web service now reads ${CLAUDE_CODE_OAUTH_TOKEN} directly, drop the W1-F8 CLAUDE_CODE_OAUTH_TOKEN_FOR_WEB indirection (user feedback: one var name, no _FOR_WEB suffix). investigator-runtime claude.ts: - --system-prompt silently dropped by CLI v2.1.150 for multi-KB prompts; inline the system content into the user prompt with a separator (mirrors scripts/reextract/run.py pattern). - Multi-line prompts via positional -- broke ("Input must be provided …"); pipe via stdin instead. - --allowedTools "" is rejected; when no tools wanted, omit it and explicitly --disallowedTools the writer/reader set so the model can't reach for any. investigator-runtime locard.ts: - Log the raw response (first 600 chars) to container stderr — saved hours of debugging when the writer rejected. - Grade fallback: when Locard omits `grade` but provides custody_steps, infer the highest grade that fits (≥3 → A, ≥2 → B, ≥1 → C). investigator-runtime write_evidence.ts: - Filter related_hypotheses entries with empty/null hypothesis_id silently (Locard sometimes emits [{}] when it knows no link yet) instead of failing the whole write. Migration 0006_investigator_serial_sequences.sql: - BIGSERIAL on the 7 investigation tables created auto-sequences (evidence_evidence_pk_seq etc) that 0004 forgot to GRANT to the investigator role. Without those grants every INSERT failed with "permission denied for sequence …". Grant USAGE/SELECT/UPDATE on each auto-seq. Verified live: Locard wrote E-0002 + E-0003 from real Sandia chunks (green fireball Feb 1949; cobalt particle analysis). Grade B, confidence high, custody chain of 3 steps with honest gaps. Cost $0.09 for both, ~70s wall. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-05-24 00:05:35 +00:00			`// 3. (Optional) validate hypothesis refs exist. Drop empty/null IDs silently`
			// — Locard may emit `related_hypotheses: [{}]` when it knows of no link yet.
			`const validRefs = (body.related_hypotheses ?? []).filter(`
			`(r) => typeof r?.hypothesis_id === "string" && r.hypothesis_id.trim().length > 0,`
			`);`
			`for (const ref of validRefs) {`
W3.1-W3.4: Investigation Bureau foundation — migrations, runtime, Locard Migrations: - 0004_investigation_bureau.sql: 7 new tables (investigation_jobs + evidence, hypotheses, contradictions, witnesses, gaps, residual_uncertainties), id sequences, pg_notify trigger on investigation_jobs, RLS read-only public, investigator role with least-privilege grants (no service_role). - 0005_investigator_write_policies.sql: fixup adding RLS INSERT/UPDATE policies bound to investigator + service_role + postgres (RLS with only a SELECT policy was silently blocking the worker's claim UPDATE). investigator-runtime/ (new Bun + TS container): - src/main.ts: LISTEN/NOTIFY poller, claim-with-SKIP-LOCKED, drain pool, healthcheck file, graceful SIGTERM shutdown. - src/orchestrator.ts: chief-detective dispatch (evidence_chain → Locard). Marks job failed when all per-item outputs error; surfaces first errors. - src/lib/{env,pg,audit,ids,claude}.ts: typed config (gate #8), pool + dedicated LISTEN client, NDJSON audit, sequence allocator (E-NNNN etc), claude -p subprocess with quota detection (api_error_status=429). - src/tools/write_evidence.ts: schema-validate (grade A/B/C custody steps), resolve chunk_pk via FK, verify verbatim_excerpt actually appears in chunk content, INSERT + render case/evidence/E-NNNN.md + audit. - src/detectives/locard.ts: load chunk → call Claude with locard.md system prompt → parse strict JSON → call writeEvidence locally. - Dockerfile installs `claude` CLI (OAuth) at build time. Compose: - new `investigator` service builds from investigator-runtime/, connects with low-privilege role, mounts case/ RW and wiki/+raw/ RO, 512m mem cap. Web: - /api/admin/investigate/test (POST+GET) gated by middleware (W0-F1). POST creates a job, GET polls status. For W3.6 it becomes the chat tool. End-to-end smoke: INSERT job → pg_notify → claim → Locard dispatch → claude subprocess invoked. Auth works (CLI v2.1.150). Currently quota exhausted (weekly limit · resets 3pm UTC) — pipeline catches the typed isQuota error, marks job failed with surfaced reason. Architecture proven; quota reset enables real evidence creation. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-05-23 22:49:33 +00:00			`const h = await queryOne<{ hypothesis_pk: number }>(`
			`SELECT hypothesis_pk FROM public.hypotheses WHERE hypothesis_id = $1`,
			`[ref.hypothesis_id],`
			`);`
			if (!h) throw new Error(`linked hypothesis not found: ${ref.hypothesis_id}`);
			`}`
W3 followup: drop _FOR_WEB token, fix claude CLI args + writer guards, BIGSERIAL grants Token consolidation: - docker-compose web service now reads ${CLAUDE_CODE_OAUTH_TOKEN} directly, drop the W1-F8 CLAUDE_CODE_OAUTH_TOKEN_FOR_WEB indirection (user feedback: one var name, no _FOR_WEB suffix). investigator-runtime claude.ts: - --system-prompt silently dropped by CLI v2.1.150 for multi-KB prompts; inline the system content into the user prompt with a separator (mirrors scripts/reextract/run.py pattern). - Multi-line prompts via positional -- broke ("Input must be provided …"); pipe via stdin instead. - --allowedTools "" is rejected; when no tools wanted, omit it and explicitly --disallowedTools the writer/reader set so the model can't reach for any. investigator-runtime locard.ts: - Log the raw response (first 600 chars) to container stderr — saved hours of debugging when the writer rejected. - Grade fallback: when Locard omits `grade` but provides custody_steps, infer the highest grade that fits (≥3 → A, ≥2 → B, ≥1 → C). investigator-runtime write_evidence.ts: - Filter related_hypotheses entries with empty/null hypothesis_id silently (Locard sometimes emits [{}] when it knows no link yet) instead of failing the whole write. Migration 0006_investigator_serial_sequences.sql: - BIGSERIAL on the 7 investigation tables created auto-sequences (evidence_evidence_pk_seq etc) that 0004 forgot to GRANT to the investigator role. Without those grants every INSERT failed with "permission denied for sequence …". Grant USAGE/SELECT/UPDATE on each auto-seq. Verified live: Locard wrote E-0002 + E-0003 from real Sandia chunks (green fireball Feb 1949; cobalt particle analysis). Grade B, confidence high, custody chain of 3 steps with honest gaps. Cost $0.09 for both, ~70s wall. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-05-24 00:05:35 +00:00			`body.related_hypotheses = validRefs;`
W3.1-W3.4: Investigation Bureau foundation — migrations, runtime, Locard Migrations: - 0004_investigation_bureau.sql: 7 new tables (investigation_jobs + evidence, hypotheses, contradictions, witnesses, gaps, residual_uncertainties), id sequences, pg_notify trigger on investigation_jobs, RLS read-only public, investigator role with least-privilege grants (no service_role). - 0005_investigator_write_policies.sql: fixup adding RLS INSERT/UPDATE policies bound to investigator + service_role + postgres (RLS with only a SELECT policy was silently blocking the worker's claim UPDATE). investigator-runtime/ (new Bun + TS container): - src/main.ts: LISTEN/NOTIFY poller, claim-with-SKIP-LOCKED, drain pool, healthcheck file, graceful SIGTERM shutdown. - src/orchestrator.ts: chief-detective dispatch (evidence_chain → Locard). Marks job failed when all per-item outputs error; surfaces first errors. - src/lib/{env,pg,audit,ids,claude}.ts: typed config (gate #8), pool + dedicated LISTEN client, NDJSON audit, sequence allocator (E-NNNN etc), claude -p subprocess with quota detection (api_error_status=429). - src/tools/write_evidence.ts: schema-validate (grade A/B/C custody steps), resolve chunk_pk via FK, verify verbatim_excerpt actually appears in chunk content, INSERT + render case/evidence/E-NNNN.md + audit. - src/detectives/locard.ts: load chunk → call Claude with locard.md system prompt → parse strict JSON → call writeEvidence locally. - Dockerfile installs `claude` CLI (OAuth) at build time. Compose: - new `investigator` service builds from investigator-runtime/, connects with low-privilege role, mounts case/ RW and wiki/+raw/ RO, 512m mem cap. Web: - /api/admin/investigate/test (POST+GET) gated by middleware (W0-F1). POST creates a job, GET polls status. For W3.6 it becomes the chat tool. End-to-end smoke: INSERT job → pg_notify → claim → Locard dispatch → claude subprocess invoked. Auth works (CLI v2.1.150). Currently quota exhausted (weekly limit · resets 3pm UTC) — pipeline catches the typed isQuota error, marks job failed with surfaced reason. Architecture proven; quota reset enables real evidence creation. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> 2026-05-23 22:49:33 +00:00
			`// 4. Allocate id + INSERT.`
			`const evidence_id = await allocate.evidenceId();`
			`await query(`
			`INSERT INTO public.evidence
			`(evidence_id, verbatim_excerpt, source_chunk_pk, source_page_id, bbox,`
			`grade, custody_steps, custody_gaps, confidence_band, related_hypotheses, created_by)`
			VALUES ($1, $2, $3, $4, $5::jsonb, $6, $7::jsonb, $8::jsonb, $9, $10::jsonb, $11)`,
			`[`
			`evidence_id,`
			`body.verbatim_excerpt,`
			`chunk.chunk_pk,`
			`pageId(body.source_doc_id, body.page, body.source_chunk_id),`
			`body.bbox ? JSON.stringify(body.bbox) : null,`
			`body.grade,`
			`JSON.stringify(body.custody_steps),`
			`body.custody_gaps ? JSON.stringify(body.custody_gaps) : null,`
			`body.confidence_band ?? null,`
			`JSON.stringify(body.related_hypotheses ?? []),`
			`ctx.detective,`
			`],`
			`);`

			`// 5. Write the case markdown file.`
			`const dir = path.join(env.CASE_ROOT, "evidence");`
			`await mkdir(dir, { recursive: true });`
			const filepath = path.join(dir, `${evidence_id}.md`);
			`await writeFile(filepath, renderMd({ evidence_id, body, ctx }), "utf-8");`

			`// 6. Audit.`
			`await audit({`
			`event: "write_evidence",`
			`job_id: ctx.job_id,`
			`detective: ctx.detective,`
			`evidence_id,`
			`source_doc: body.source_doc_id,`
			`source_chunk: body.source_chunk_id,`
			`grade: body.grade,`
			`file: filepath,`
			`});`

			`return { evidence_id, case_file: filepath };`
			`}`