W3 followup: drop _FOR_WEB token, fix claude CLI args + writer guards, BIGSERIAL grants

Token consolidation: - docker-compose web service now reads ${CLAUDE_CODE_OAUTH_TOKEN} directly, drop the W1-F8 CLAUDE_CODE_OAUTH_TOKEN_FOR_WEB indirection (user feedback: one var name, no _FOR_WEB suffix). investigator-runtime claude.ts: - --system-prompt silently dropped by CLI v2.1.150 for multi-KB prompts; inline the system content into the user prompt with a separator (mirrors scripts/reextract/run.py pattern). - Multi-line prompts via positional -- broke ("Input must be provided …"); pipe via stdin instead. - --allowedTools "" is rejected; when no tools wanted, omit it and explicitly --disallowedTools the writer/reader set so the model can't reach for any. investigator-runtime locard.ts: - Log the raw response (first 600 chars) to container stderr — saved hours of debugging when the writer rejected. - Grade fallback: when Locard omits `grade` but provides custody_steps, infer the highest grade that fits (≥3 → A, ≥2 → B, ≥1 → C). investigator-runtime write_evidence.ts: - Filter related_hypotheses entries with empty/null hypothesis_id silently (Locard sometimes emits [{}] when it knows no link yet) instead of failing the whole write. Migration 0006_investigator_serial_sequences.sql: - BIGSERIAL on the 7 investigation tables created auto-sequences (evidence_evidence_pk_seq etc) that 0004 forgot to GRANT to the investigator role. Without those grants every INSERT failed with "permission denied for sequence …". Grant USAGE/SELECT/UPDATE on each auto-seq. Verified live: Locard wrote E-0002 + E-0003 from real Sandia chunks (green fireball Feb 1949; cobalt particle analysis). Grade B, confidence high, custody chain of 3 steps with honest gaps. Cost $0.09 for both, ~70s wall. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 21:05:35 -03:00 · 2026-05-23 21:05:35 -03:00 · 54a26f8db8
commit 54a26f8db8
parent 189a771cbe
5 changed files with 74 additions and 11 deletions
--- a/infra/disclosure-stack/docker-compose.yml
+++ b/infra/disclosure-stack/docker-compose.yml
@ -330,10 +330,11 @@ services:
      # W1-TD#10: bump pg pool from default 5 to 20 (chat agent + hybrid_search
      # can saturate the smaller pool under concurrent load).
      PG_POOL_MAX: ${PG_POOL_MAX:-20}
-      # Chat agent (W1-F8: CLAUDE_CODE_OAUTH_TOKEN only injected when the
-      # provider actually uses it — default provider is openrouter, so the token
-      # stays absent from this container's env unless CHAT_PROVIDER=claude-code).
-      CLAUDE_CODE_OAUTH_TOKEN: ${CLAUDE_CODE_OAUTH_TOKEN_FOR_WEB:-}
+      # Chat agent. Single source of truth — `CLAUDE_CODE_OAUTH_TOKEN` is the
+      # only OAuth var in the stack. The investigator-runtime reads the same
+      # one. When CHAT_PROVIDER=openrouter (default) the web container ignores
+      # this var at runtime, so no harm in exposing it.
+      CLAUDE_CODE_OAUTH_TOKEN: ${CLAUDE_CODE_OAUTH_TOKEN}
      CLAUDE_CODE_MODEL: ${CLAUDE_CODE_MODEL}
      OPENROUTER_API_KEY: ${OPENROUTER_API_KEY}
      OPENROUTER_MODEL: ${OPENROUTER_MODEL}
--- a/infra/supabase/migrations/0006_investigator_serial_sequences.sql
+++ b/infra/supabase/migrations/0006_investigator_serial_sequences.sql
@ -0,0 +1,32 @@
+-- 0006_investigator_serial_sequences.sql
+--
+-- BIGSERIAL on each investigation table created auto-sequences (e.g.
+-- `evidence_evidence_pk_seq`) that 0004 forgot to GRANT to the investigator
+-- role. Without those grants, every INSERT failed with:
+--   permission denied for sequence evidence_evidence_pk_seq
+--
+-- Idempotent. Apply as supabase_admin.
+
+BEGIN;
+
+DO $$
+DECLARE
+  s TEXT;
+BEGIN
+  FOREACH s IN ARRAY ARRAY[
+    'evidence_evidence_pk_seq',
+    'hypotheses_hypothesis_pk_seq',
+    'contradictions_contradiction_pk_seq',
+    'witnesses_witness_pk_seq',
+    'gaps_gap_pk_seq',
+    'residual_uncertainties_ru_pk_seq'
+  ]
+  LOOP
+    IF EXISTS (SELECT 1 FROM pg_class WHERE relname = s AND relkind = 'S') THEN
+      EXECUTE format('GRANT USAGE, SELECT, UPDATE ON SEQUENCE public.%I TO investigator', s);
+    END IF;
+  END LOOP;
+END
+$$;
+
+COMMIT;
--- a/investigator-runtime/src/detectives/locard.ts
+++ b/investigator-runtime/src/detectives/locard.ts
@ -130,6 +130,11 @@ export async function runLocard(task: LocardTask): Promise<
    duration_ms: llm.durationMs,
  });

+  // Surface the raw response in container logs so we can diagnose the prompt
+  // when validation downstream rejects (free-tier-style models sometimes drop
+  // required fields).
+  console.error(`[locard] response (${llm.text.length} chars): ${llm.text.slice(0, 600)}`);
+
  const parsed = extractJson(llm.text);
  if (parsed === null) {
    return { skipped: true, reason: "NO_EVIDENCE" };
@ -143,6 +148,13 @@ export async function runLocard(task: LocardTask): Promise<
  if (!args.page) args.page = chunk.page;
  if (!args.bbox && chunk.bbox) args.bbox = chunk.bbox;

+  // Grade fallback: if Locard forgot to emit one but provided custody_steps,
+  // infer the highest grade that fits. Better than rejecting outright.
+  if (!args.grade && Array.isArray(args.custody_steps)) {
+    const n = args.custody_steps.length;
+    args.grade = n >= 3 ? "A" : n >= 2 ? "B" : n >= 1 ? "C" : undefined;
+  }
+
  const result = await writeEvidence(args as WriteEvidenceArgs, {
    job_id: task.job_id,
    detective: "locard@detective",
--- a/investigator-runtime/src/lib/claude.ts
+++ b/investigator-runtime/src/lib/claude.ts
@ -56,21 +56,34 @@ export async function callClaude(args: ClaudeCallArgs): Promise<ClaudeCallResult
    "--output-format", "json",
    "--max-turns", "8",
  ];
-  if (tools.length > 0) cliArgs.push("--allowedTools", tools.join(","));
-  else cliArgs.push("--allowedTools", "");
-  if (args.systemPrompt) cliArgs.push("--system-prompt", args.systemPrompt);
+  if (tools.length > 0) {
+    cliArgs.push("--allowedTools", tools.join(","));
+  } else {
+    // The CLI rejects `--allowedTools ""` ("argument missing"). When no tools
+    // are wanted, omit the allowlist and instead refuse every tool via
+    // `--disallowedTools` so the model truly cannot reach for Read/Bash/etc.
+    cliArgs.push("--disallowedTools", "Read,Write,Edit,Bash,Glob,Grep,Task,WebSearch,WebFetch");
+  }
  if (args.addDir) cliArgs.push("--add-dir", args.addDir);
-  cliArgs.push("--", args.prompt);
+  // Pipe the prompt via stdin. The `-- <positional arg>` path doesn't survive
+  // long multi-line content reliably (the CLI complained "Input must be
+  // provided either through stdin or as a prompt argument when using --print"
+  // for prompts past a few KB). Stdin is unambiguous.
+  const fullPrompt = args.systemPrompt
+    ? `${args.systemPrompt}\n\n---\n\n${args.prompt}`
+    : args.prompt;

  const t0 = Date.now();
  return await new Promise<ClaudeCallResult>((resolve, reject) => {
    const child = spawn("claude", cliArgs, {
-      stdio: ["ignore", "pipe", "pipe"],
+      stdio: ["pipe", "pipe", "pipe"],
      env: {
        ...process.env,
        // CLAUDE_CODE_OAUTH_TOKEN is read by the CLI itself.
      },
    });
+    child.stdin.write(fullPrompt);
+    child.stdin.end();
    let stdout = "";
    let stderr = "";
    child.stdout.on("data", (c) => (stdout += c.toString()));
--- a/investigator-runtime/src/tools/write_evidence.ts
+++ b/investigator-runtime/src/tools/write_evidence.ts
@ -146,14 +146,19 @@ export async function writeEvidence(
    throw new Error(`verbatim_excerpt not found in source chunk — Sonnet must not paraphrase`);
  }

-  // 3. (Optional) validate hypothesis refs exist.
-  for (const ref of body.related_hypotheses ?? []) {
+  // 3. (Optional) validate hypothesis refs exist. Drop empty/null IDs silently
+  // — Locard may emit `related_hypotheses: [{}]` when it knows of no link yet.
+  const validRefs = (body.related_hypotheses ?? []).filter(
+    (r) => typeof r?.hypothesis_id === "string" && r.hypothesis_id.trim().length > 0,
+  );
+  for (const ref of validRefs) {
    const h = await queryOne<{ hypothesis_pk: number }>(
      `SELECT hypothesis_pk FROM public.hypotheses WHERE hypothesis_id = $1`,
      [ref.hypothesis_id],
    );
    if (!h) throw new Error(`linked hypothesis not found: ${ref.hypothesis_id}`);
  }
+  body.related_hypotheses = validRefs;

  // 4. Allocate id + INSERT.
  const evidence_id = await allocate.evidenceId();