W3 followup: drop _FOR_WEB token, fix claude CLI args + writer guards, BIGSERIAL grants
Token consolidation:
- docker-compose web service now reads ${CLAUDE_CODE_OAUTH_TOKEN} directly,
drop the W1-F8 CLAUDE_CODE_OAUTH_TOKEN_FOR_WEB indirection (user feedback:
one var name, no _FOR_WEB suffix).
investigator-runtime claude.ts:
- --system-prompt silently dropped by CLI v2.1.150 for multi-KB prompts;
inline the system content into the user prompt with a separator
(mirrors scripts/reextract/run.py pattern).
- Multi-line prompts via positional -- broke ("Input must be provided …");
pipe via stdin instead.
- --allowedTools "" is rejected; when no tools wanted, omit it and explicitly
--disallowedTools the writer/reader set so the model can't reach for any.
investigator-runtime locard.ts:
- Log the raw response (first 600 chars) to container stderr — saved hours
of debugging when the writer rejected.
- Grade fallback: when Locard omits `grade` but provides custody_steps,
infer the highest grade that fits (≥3 → A, ≥2 → B, ≥1 → C).
investigator-runtime write_evidence.ts:
- Filter related_hypotheses entries with empty/null hypothesis_id silently
(Locard sometimes emits [{}] when it knows no link yet) instead of
failing the whole write.
Migration 0006_investigator_serial_sequences.sql:
- BIGSERIAL on the 7 investigation tables created auto-sequences
(evidence_evidence_pk_seq etc) that 0004 forgot to GRANT to the
investigator role. Without those grants every INSERT failed with
"permission denied for sequence …". Grant USAGE/SELECT/UPDATE on each
auto-seq.
Verified live: Locard wrote E-0002 + E-0003 from real Sandia chunks
(green fireball Feb 1949; cobalt particle analysis). Grade B, confidence
high, custody chain of 3 steps with honest gaps. Cost $0.09 for both,
~70s wall.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
189a771cbe
commit
54a26f8db8
5 changed files with 74 additions and 11 deletions
|
|
@ -330,10 +330,11 @@ services:
|
|||
# W1-TD#10: bump pg pool from default 5 to 20 (chat agent + hybrid_search
|
||||
# can saturate the smaller pool under concurrent load).
|
||||
PG_POOL_MAX: ${PG_POOL_MAX:-20}
|
||||
# Chat agent (W1-F8: CLAUDE_CODE_OAUTH_TOKEN only injected when the
|
||||
# provider actually uses it — default provider is openrouter, so the token
|
||||
# stays absent from this container's env unless CHAT_PROVIDER=claude-code).
|
||||
CLAUDE_CODE_OAUTH_TOKEN: ${CLAUDE_CODE_OAUTH_TOKEN_FOR_WEB:-}
|
||||
# Chat agent. Single source of truth — `CLAUDE_CODE_OAUTH_TOKEN` is the
|
||||
# only OAuth var in the stack. The investigator-runtime reads the same
|
||||
# one. When CHAT_PROVIDER=openrouter (default) the web container ignores
|
||||
# this var at runtime, so no harm in exposing it.
|
||||
CLAUDE_CODE_OAUTH_TOKEN: ${CLAUDE_CODE_OAUTH_TOKEN}
|
||||
CLAUDE_CODE_MODEL: ${CLAUDE_CODE_MODEL}
|
||||
OPENROUTER_API_KEY: ${OPENROUTER_API_KEY}
|
||||
OPENROUTER_MODEL: ${OPENROUTER_MODEL}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,32 @@
|
|||
-- 0006_investigator_serial_sequences.sql
|
||||
--
|
||||
-- BIGSERIAL on each investigation table created auto-sequences (e.g.
|
||||
-- `evidence_evidence_pk_seq`) that 0004 forgot to GRANT to the investigator
|
||||
-- role. Without those grants, every INSERT failed with:
|
||||
-- permission denied for sequence evidence_evidence_pk_seq
|
||||
--
|
||||
-- Idempotent. Apply as supabase_admin.
|
||||
|
||||
BEGIN;
|
||||
|
||||
DO $$
|
||||
DECLARE
|
||||
s TEXT;
|
||||
BEGIN
|
||||
FOREACH s IN ARRAY ARRAY[
|
||||
'evidence_evidence_pk_seq',
|
||||
'hypotheses_hypothesis_pk_seq',
|
||||
'contradictions_contradiction_pk_seq',
|
||||
'witnesses_witness_pk_seq',
|
||||
'gaps_gap_pk_seq',
|
||||
'residual_uncertainties_ru_pk_seq'
|
||||
]
|
||||
LOOP
|
||||
IF EXISTS (SELECT 1 FROM pg_class WHERE relname = s AND relkind = 'S') THEN
|
||||
EXECUTE format('GRANT USAGE, SELECT, UPDATE ON SEQUENCE public.%I TO investigator', s);
|
||||
END IF;
|
||||
END LOOP;
|
||||
END
|
||||
$$;
|
||||
|
||||
COMMIT;
|
||||
|
|
@ -130,6 +130,11 @@ export async function runLocard(task: LocardTask): Promise<
|
|||
duration_ms: llm.durationMs,
|
||||
});
|
||||
|
||||
// Surface the raw response in container logs so we can diagnose the prompt
|
||||
// when validation downstream rejects (free-tier-style models sometimes drop
|
||||
// required fields).
|
||||
console.error(`[locard] response (${llm.text.length} chars): ${llm.text.slice(0, 600)}`);
|
||||
|
||||
const parsed = extractJson(llm.text);
|
||||
if (parsed === null) {
|
||||
return { skipped: true, reason: "NO_EVIDENCE" };
|
||||
|
|
@ -143,6 +148,13 @@ export async function runLocard(task: LocardTask): Promise<
|
|||
if (!args.page) args.page = chunk.page;
|
||||
if (!args.bbox && chunk.bbox) args.bbox = chunk.bbox;
|
||||
|
||||
// Grade fallback: if Locard forgot to emit one but provided custody_steps,
|
||||
// infer the highest grade that fits. Better than rejecting outright.
|
||||
if (!args.grade && Array.isArray(args.custody_steps)) {
|
||||
const n = args.custody_steps.length;
|
||||
args.grade = n >= 3 ? "A" : n >= 2 ? "B" : n >= 1 ? "C" : undefined;
|
||||
}
|
||||
|
||||
const result = await writeEvidence(args as WriteEvidenceArgs, {
|
||||
job_id: task.job_id,
|
||||
detective: "locard@detective",
|
||||
|
|
|
|||
|
|
@ -56,21 +56,34 @@ export async function callClaude(args: ClaudeCallArgs): Promise<ClaudeCallResult
|
|||
"--output-format", "json",
|
||||
"--max-turns", "8",
|
||||
];
|
||||
if (tools.length > 0) cliArgs.push("--allowedTools", tools.join(","));
|
||||
else cliArgs.push("--allowedTools", "");
|
||||
if (args.systemPrompt) cliArgs.push("--system-prompt", args.systemPrompt);
|
||||
if (tools.length > 0) {
|
||||
cliArgs.push("--allowedTools", tools.join(","));
|
||||
} else {
|
||||
// The CLI rejects `--allowedTools ""` ("argument missing"). When no tools
|
||||
// are wanted, omit the allowlist and instead refuse every tool via
|
||||
// `--disallowedTools` so the model truly cannot reach for Read/Bash/etc.
|
||||
cliArgs.push("--disallowedTools", "Read,Write,Edit,Bash,Glob,Grep,Task,WebSearch,WebFetch");
|
||||
}
|
||||
if (args.addDir) cliArgs.push("--add-dir", args.addDir);
|
||||
cliArgs.push("--", args.prompt);
|
||||
// Pipe the prompt via stdin. The `-- <positional arg>` path doesn't survive
|
||||
// long multi-line content reliably (the CLI complained "Input must be
|
||||
// provided either through stdin or as a prompt argument when using --print"
|
||||
// for prompts past a few KB). Stdin is unambiguous.
|
||||
const fullPrompt = args.systemPrompt
|
||||
? `${args.systemPrompt}\n\n---\n\n${args.prompt}`
|
||||
: args.prompt;
|
||||
|
||||
const t0 = Date.now();
|
||||
return await new Promise<ClaudeCallResult>((resolve, reject) => {
|
||||
const child = spawn("claude", cliArgs, {
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
stdio: ["pipe", "pipe", "pipe"],
|
||||
env: {
|
||||
...process.env,
|
||||
// CLAUDE_CODE_OAUTH_TOKEN is read by the CLI itself.
|
||||
},
|
||||
});
|
||||
child.stdin.write(fullPrompt);
|
||||
child.stdin.end();
|
||||
let stdout = "";
|
||||
let stderr = "";
|
||||
child.stdout.on("data", (c) => (stdout += c.toString()));
|
||||
|
|
|
|||
|
|
@ -146,14 +146,19 @@ export async function writeEvidence(
|
|||
throw new Error(`verbatim_excerpt not found in source chunk — Sonnet must not paraphrase`);
|
||||
}
|
||||
|
||||
// 3. (Optional) validate hypothesis refs exist.
|
||||
for (const ref of body.related_hypotheses ?? []) {
|
||||
// 3. (Optional) validate hypothesis refs exist. Drop empty/null IDs silently
|
||||
// — Locard may emit `related_hypotheses: [{}]` when it knows of no link yet.
|
||||
const validRefs = (body.related_hypotheses ?? []).filter(
|
||||
(r) => typeof r?.hypothesis_id === "string" && r.hypothesis_id.trim().length > 0,
|
||||
);
|
||||
for (const ref of validRefs) {
|
||||
const h = await queryOne<{ hypothesis_pk: number }>(
|
||||
`SELECT hypothesis_pk FROM public.hypotheses WHERE hypothesis_id = $1`,
|
||||
[ref.hypothesis_id],
|
||||
);
|
||||
if (!h) throw new Error(`linked hypothesis not found: ${ref.hypothesis_id}`);
|
||||
}
|
||||
body.related_hypotheses = validRefs;
|
||||
|
||||
// 4. Allocate id + INSERT.
|
||||
const evidence_id = await allocate.evidenceId();
|
||||
|
|
|
|||
Loading…
Reference in a new issue