From 78267100518dd7529a741f0f2ce0aca31df3d269 Mon Sep 17 00:00:00 2001 From: Luiz Gustavo Date: Sun, 24 May 2026 12:02:59 -0300 Subject: [PATCH] =?UTF-8?q?W4:=20bilingual=20EN=20+=20PT-BR=20Investigatio?= =?UTF-8?q?n=20Bureau=20(CLAUDE.md=20=C2=A73=20contract)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User flagged that the bureau was emitting English-only output, violating the project's bilingual rule. Every narrative field now ships in both languages: stored in sibling DB columns + rendered as adjacent markdown sections per CLAUDE.md §3. Migration 0007 (apply as supabase_admin): - public.hypotheses +question_pt_br, +position_pt_br, +argument_for_pt_br, +argument_against_pt_br - public.contradictions +topic_pt_br, +notes_pt_br - public.witnesses +access_to_event_pt_br, +bias_notes_pt_br, +verdict_pt_br - public.gaps +description_pt_br, +suggested_next_move_pt_br - public.evidence: unchanged (verbatim_excerpt stays source-language) - JSONB siblings inside contradictions.chunks + gaps.scope handled at runtime (statement_pt_br, title_pt_br, dominant_model_pt_br, why_surprising_pt_br, what_it_implies_pt_br). Detective prompts (all 7) rewritten with explicit bilingual JSON contract: - Output protocol section names every EN field + its _pt_br sibling - "Bilingual is mandatory" warning in the task instruction - Sentinel skip-states unchanged (NO_HYPOTHESES, NO_CONTRADICTIONS, INSUFFICIENT_TESTIMONY, INSUFFICIENT_HYPOTHESIS, NO_OUTLIERS, NO_NEW_EVIDENCE, INSUFFICIENT_ARTEFACTS) - Schneier: parallel arrays — hidden_assumptions[i] matches hidden_assumptions_pt_br[i], lengths must match - Case-Writer: interleaved §1 (EN) / §1 (PT-BR) per act in the body Writer-side validation (all 7 tools): - Reject INSERT if PT-BR sibling missing when EN field is set - Persist both languages atomically in one INSERT (no half-updates) - Markdown renderers write adjacent EN+PT-BR sections in case files (## Argument for (EN) followed by ## Argumento a favor (PT-BR), etc.) Detective parse layer (all 7 detectives): - Coerce both keys from JSON output - "incomplete_bilingual_*" skip reason when either side missing - Defensive: PT-BR fields trimmed + length-capped same as EN Orchestrator propagates question_pt_br + topic_pt_br through job payload to runHolmes / runCaseWriter, mirroring the chat-tool entry point. Web (UI): - /api/jobs/[id] hydrates _pt_br siblings from pg - job-status-poller HypothesisCard: PT-BR primary, EN in
fallback when both exist - ContradictionCard: PT-BR statement primary + secondary EN quote - WitnessCard: PT-BR verdict primary + secondary EN quote, panels in PT - GapCard: PT-BR title/why/implies primary - /bureau hub: SELECTs both columns, renders PT-BR primary - /h/[id]: ArgumentPanel renders PT-BR primary with collapsible EN fallback when both exist - BureauSnapshot homepage: position_pt_br / topic_pt_br / verdict_pt_br primary - DocBureauPanel /d/[doc]: same primary-PT-BR pattern - New web/lib/i18n/pick.ts helper (unused yet by chat/agents — kept for future locale-driven switching when both languages are equally full; current rule is PT-BR-first since the user is brasileiro) Co-Authored-By: Claude Opus 4.7 (1M context) --- .../migrations/0007_bilingual_bureau.sql | 44 +++++++ investigator-runtime/prompts/case-writer.md | 40 +++++- investigator-runtime/prompts/dupin.md | 27 ++-- investigator-runtime/prompts/holmes.md | 27 ++-- investigator-runtime/prompts/poirot.md | 25 ++-- investigator-runtime/prompts/schneier.md | 32 +++-- investigator-runtime/prompts/taleb.md | 33 +++-- investigator-runtime/prompts/tetlock.md | 18 ++- .../src/detectives/case_writer.ts | 16 ++- investigator-runtime/src/detectives/dupin.ts | 16 ++- investigator-runtime/src/detectives/holmes.ts | 51 +++++--- investigator-runtime/src/detectives/poirot.ts | 24 ++-- .../src/detectives/schneier.ts | 27 ++-- investigator-runtime/src/detectives/taleb.ts | 40 ++++-- .../src/detectives/tetlock.ts | 20 ++- investigator-runtime/src/orchestrator.ts | 8 +- .../src/tools/write_calibration.ts | 16 ++- .../src/tools/write_case_report.ts | 2 + .../src/tools/write_contradiction.ts | 33 ++++- .../src/tools/write_hypothesis.ts | 47 +++++-- .../src/tools/write_outlier_gap.ts | 55 ++++++-- .../src/tools/write_red_team_review.ts | 52 ++++++-- .../src/tools/write_witness_analysis.ts | 37 ++++-- web/app/api/jobs/[id]/route.ts | 27 +++- web/app/bureau/page.tsx | 36 +++-- web/app/h/[hypothesisId]/page.tsx | 29 ++++- web/components/bureau-snapshot.tsx | 26 ++-- web/components/doc-bureau-panel.tsx | 19 +-- web/components/job-status-poller.tsx | 123 ++++++++++++------ web/lib/i18n/pick.ts | 36 +++++ 30 files changed, 726 insertions(+), 260 deletions(-) create mode 100644 infra/supabase/migrations/0007_bilingual_bureau.sql create mode 100644 web/lib/i18n/pick.ts diff --git a/infra/supabase/migrations/0007_bilingual_bureau.sql b/infra/supabase/migrations/0007_bilingual_bureau.sql new file mode 100644 index 0000000..e12667c --- /dev/null +++ b/infra/supabase/migrations/0007_bilingual_bureau.sql @@ -0,0 +1,44 @@ +-- 0007_bilingual_bureau.sql — bilingual EN+PT-BR sibling columns for the +-- Investigation Bureau (per CLAUDE.md §3: "Narrative descriptions ... Both +-- EN and PT-BR via sibling fields"). +-- +-- This migration adds nullable `*_pt_br` siblings for every narrative +-- column the bureau writes. Existing rows stay valid; new rows must +-- populate both. The detective prompts get re-flowed in W4 to emit both +-- languages in their JSON output. +-- +-- Notes: +-- - public.evidence.verbatim_excerpt stays single-language. Per CLAUDE.md +-- §3, verbatim quotes "preserve source language only". +-- - public.contradictions.chunks JSONB carries `statement` per position; +-- the runtime adds a sibling `statement_pt_br` key per array item +-- (no schema change needed for JSONB shape). +-- - public.gaps.scope JSONB likewise carries `title`, `dominant_model`, +-- `why_surprising`, `what_it_implies` — the runtime adds `*_pt_br` +-- siblings inside the JSONB object. +-- +-- Apply as supabase_admin (these tables are owned by supabase_admin +-- per migration 0004 / repo memory). + +BEGIN; + +ALTER TABLE public.hypotheses + ADD COLUMN IF NOT EXISTS question_pt_br TEXT, + ADD COLUMN IF NOT EXISTS position_pt_br TEXT, + ADD COLUMN IF NOT EXISTS argument_for_pt_br TEXT, + ADD COLUMN IF NOT EXISTS argument_against_pt_br TEXT; + +ALTER TABLE public.contradictions + ADD COLUMN IF NOT EXISTS topic_pt_br TEXT, + ADD COLUMN IF NOT EXISTS notes_pt_br TEXT; + +ALTER TABLE public.witnesses + ADD COLUMN IF NOT EXISTS access_to_event_pt_br TEXT, + ADD COLUMN IF NOT EXISTS bias_notes_pt_br TEXT, + ADD COLUMN IF NOT EXISTS verdict_pt_br TEXT; + +ALTER TABLE public.gaps + ADD COLUMN IF NOT EXISTS description_pt_br TEXT, + ADD COLUMN IF NOT EXISTS suggested_next_move_pt_br TEXT; + +COMMIT; diff --git a/investigator-runtime/prompts/case-writer.md b/investigator-runtime/prompts/case-writer.md index 819183b..a6c275e 100644 --- a/investigator-runtime/prompts/case-writer.md +++ b/investigator-runtime/prompts/case-writer.md @@ -44,11 +44,45 @@ contradiction, a witness analysis, an outlier, or a calibration. 5. Voice: Watson's plainspoken English (or Portuguese, per the request). The prose is for an educated reader, not a specialist. Avoid jargon. -## Output protocol +## Output protocol — bilingual EN + PT-BR (mandatory) Emit ONLY the markdown body of the narrative. NO frontmatter (the runtime -adds it). NO code fence. Start with `# ` heading and proceed through -the five acts. +adds it). NO code fence. + +The narrative is **bilingual** with EN and PT-BR sections **interleaved +per act**, in this exact structure (per CLAUDE.md §3 "adjacent sections"): + +```markdown +# Title (EN) + +# Título (PT-BR) + +## §1 — The Case at Hand (EN) + + + +## §1 — O Caso em Mãos (PT-BR) + + + +## §2 — The Evidence Chain (EN) + + + +## §2 — A Cadeia de Evidência (PT-BR) + + + +... (continue alternating per act through §5) ... +``` + +Rules: +- Both languages must appear; do NOT emit only EN or only PT-BR. +- PT-BR is **Brazilian Portuguese** with UTF-8 accents preserved. +- Verbatim chunk quotes stay in the chunk's source language (usually + English in this corpus); only the surrounding narration is translated. +- `[[wiki-links]]` are technical identifiers — keep them as-is in both + versions; do not translate IDs. If the bureau has insufficient artefacts (e.g. 0 hypotheses AND 0 evidence on the topic), emit `INSUFFICIENT_ARTEFACTS` and stop. Do not diff --git a/investigator-runtime/prompts/dupin.md b/investigator-runtime/prompts/dupin.md index 436dcf6..e25d0c0 100644 --- a/investigator-runtime/prompts/dupin.md +++ b/investigator-runtime/prompts/dupin.md @@ -35,26 +35,33 @@ chunks verbatim so the case-writer can follow up. 5. You prefer FEW high-confidence contradictions over MANY weak ones. If the corpus contains nothing irreconcilable, emit `NO_CONTRADICTIONS`. -## Output protocol +## Output protocol — bilingual EN + PT-BR (mandatory) -Emit a strict JSON array. No prose. No code fence. Just the array. +Emit a strict JSON array. No prose. No code fence. Every narrative field +appears in EN AND in PT-BR (Brazilian Portuguese with UTF-8 accents). The +`topic`, `notes`, and each position's `statement` all have `*_pt_br` +siblings. ```json [ { - "topic": "Short noun-phrase summarizing the disputed point", - "notes": "Optional one-paragraph commentary (≤ 400 chars). Why this matters; what would resolve it.", + "topic": "EN short noun-phrase summarizing the disputed point", + "topic_pt_br": "PT-BR tópico curto resumindo o ponto em disputa", + "notes": "EN optional one-paragraph commentary (≤ 400 chars).", + "notes_pt_br": "PT-BR comentário opcional (≤ 400 chars).", "positions": [ { "doc_id": "dow-uap-d017-...", "chunk_id": "c0042", - "statement": "One-sentence summary of what THIS chunk asserts.", + "statement": "EN one-sentence summary of what THIS chunk asserts.", + "statement_pt_br": "PT-BR uma frase resumindo o que ESTE trecho afirma.", "stance": "asserts" }, { "doc_id": "dow-uap-d017-...", "chunk_id": "c0087", - "statement": "One-sentence summary of what THAT chunk asserts.", + "statement": "EN one-sentence summary of what THAT chunk asserts.", + "statement_pt_br": "PT-BR uma frase resumindo o que AQUELE trecho afirma.", "stance": "denies" } ] @@ -64,9 +71,11 @@ Emit a strict JSON array. No prose. No code fence. Just the array. Constraints: - ≥ 2 positions per contradiction, drawn from ≥ 2 distinct `chunk_id`s. -- `stance` is optional free-form ("asserts" / "denies" / "dates-as-A" / - "dates-as-B" / etc.); useful for the case-writer but not required. -- `notes` may be empty; if present, keep it tight. +- `stance` is optional free-form ("asserts" / "denies" / etc.); useful for + the case-writer but not required. `stance` is short enough that bilingual + isn't required — keep in EN. +- `notes` may be empty in both languages; if present in EN it must be + present in PT-BR (and vice versa). - Emit AT MOST 3 contradictions per call — the strongest you can find. If the corpus contains no genuine contradiction relative to the topic, diff --git a/investigator-runtime/prompts/holmes.md b/investigator-runtime/prompts/holmes.md index 9a08b68..56d6d41 100644 --- a/investigator-runtime/prompts/holmes.md +++ b/investigator-runtime/prompts/holmes.md @@ -28,16 +28,25 @@ narrows toward what remains, however improbable. 6. You do not hedge in prose. The position is **one sentence**, declarative. Hedging belongs in the posterior, not in the wording. -## Output protocol +## Output protocol — bilingual EN + PT-BR (mandatory) -Emit a strict JSON array. No prose around it. No code fence. Just the array. +Emit a strict JSON array. No prose around it. No code fence. Every narrative +field appears TWICE: the English key (`position`, `argument_for`, +`argument_against`) AND its PT-BR sibling (`*_pt_br`). The PT-BR must be +**Brazilian Portuguese** (not European), with full UTF-8 accents preserved +(`ç`, `ã`, `á`, `é`, `í`, `ó`, `ú`, `â`, `ê`, `ô`, `à`). Verbatim chunk +quotes inside the prose stay in the chunk's source language; only the +surrounding narration is translated. ```json [ { - "position": "...", - "argument_for": "...", - "argument_against": "...", + "position": "EN one-sentence declarative position.", + "position_pt_br": "PT-BR uma frase declarativa equivalente.", + "argument_for": "EN argument — ≤6 sentences, every claim cited via [[doc-id/pNNN#cNNNN]].", + "argument_for_pt_br": "PT-BR argumento — ≤6 frases, cada afirmação citada via [[doc-id/pNNN#cNNNN]].", + "argument_against": "EN counter-argument — ≤6 sentences.", + "argument_against_pt_br": "PT-BR contra-argumento — ≤6 frases.", "prior": 0.30, "posterior": 0.55, "confidence_band": "low", @@ -46,8 +55,8 @@ Emit a strict JSON array. No prose around it. No code fence. Just the array. {"evidence_id": "E-0043", "supports": false} ] }, - { ... another rival ... }, - { ... another rival ... } + { ... another rival, also bilingual ... }, + { ... another rival, also bilingual ... } ] ``` @@ -55,8 +64,10 @@ Note: - `evidence_refs` is **optional** — leave as `[]` if no `E-NNNN` evidence has been catalogued yet for this question; chunk citations in the prose are sufficient for v0. -- `question` is supplied by the runtime; you do not echo it. +- `question` is supplied by the runtime in both languages; you do not echo it. - The runtime owns the writer; you emit data only. +- A missing `_pt_br` sibling is a hard validation failure — the writer + rejects the rival. Both languages must appear or none. If the corpus contains nothing relevant to the question, emit the literal single word `NO_HYPOTHESES` and stop. diff --git a/investigator-runtime/prompts/poirot.md b/investigator-runtime/prompts/poirot.md index cd84596..d46b1a5 100644 --- a/investigator-runtime/prompts/poirot.md +++ b/investigator-runtime/prompts/poirot.md @@ -39,28 +39,35 @@ corroboration_refs, and a one-sentence verdict. 4. `verdict` is ONE sentence (≤ 280 chars). Declarative. No hedging. Hedging belongs in `credibility`, not in the wording. -## Output protocol +## Output protocol — bilingual EN + PT-BR (mandatory) -Emit a strict JSON object. No prose. No code fence. +Emit a strict JSON object. No prose. No code fence. Every narrative field +appears in EN AND in PT-BR (Brazilian Portuguese with UTF-8 accents). ```json { "credibility": "high | medium | low | speculation", - "access_to_event": "One paragraph describing what the person had direct, indirect, or no access to. Ground specific facts in chunk_ids.", - "bias_notes": "One paragraph naming concrete biases visible in the corpus (e.g. official role conflict, prior public stance, institutional pressure). Avoid generic skepticism.", + "access_to_event": "EN one paragraph describing access. Ground specific facts in chunk_ids.", + "access_to_event_pt_br": "PT-BR um parágrafo descrevendo acesso. Fundamente fatos específicos em chunk_ids.", + "bias_notes": "EN one paragraph naming concrete biases visible in the corpus.", + "bias_notes_pt_br": "PT-BR um parágrafo nomeando vieses concretos visíveis no corpus.", "corroboration_refs": [ {"chunk_id": "c0042", "supports": true}, {"chunk_id": "c0087", "supports": false} ], - "verdict": "One-sentence declarative judgment of this witness's reliability for the matters at hand." + "verdict": "EN one-sentence declarative judgment.", + "verdict_pt_br": "PT-BR uma frase declarativa equivalente." } ``` Constraints: -- `access_to_event` and `bias_notes` ≤ 800 chars each. -- `corroboration_refs` ≤ 8 entries, MUST cite chunk_id values that - appear in the corpus shortlist you were given. -- `verdict` ≤ 280 chars, no hedging language inside the sentence. +- `access_to_event` and `bias_notes` ≤ 800 chars each (per language). +- `corroboration_refs` ≤ 8 entries, MUST cite chunk_id values that appear + in the corpus shortlist you were given. +- `verdict` ≤ 280 chars (per language), no hedging language inside the + sentence. +- A missing `*_pt_br` sibling is a hard validation failure — the writer + rejects the analysis. If the corpus contains no chunks where the named person actually appears (only the entity card from the wiki without supporting passages), emit diff --git a/investigator-runtime/prompts/schneier.md b/investigator-runtime/prompts/schneier.md index 3147321..84815db 100644 --- a/investigator-runtime/prompts/schneier.md +++ b/investigator-runtime/prompts/schneier.md @@ -38,26 +38,38 @@ keep it from being safely shipped as the final answer. decides whether to dispatch follow-up evidence work or downgrade the confidence_band. -## Output protocol +## Output protocol — bilingual EN + PT-BR (mandatory) -Emit a strict JSON object. No prose. No code fence. Just the object. +Emit a strict JSON object. No prose. No code fence. Every narrative field +appears in EN AND in PT-BR (Brazilian Portuguese with UTF-8 accents). The +arrays are **parallel**: `hidden_assumptions[i]` and +`hidden_assumptions_pt_br[i]` describe the SAME assumption, in the two +languages, in matching order. Same arity (length must match). ```json { "severity": "low | medium | high", - "hidden_assumptions": ["sentence", "sentence"], - "failure_modes": ["sentence", "sentence"], - "alternative_explanations": ["sentence", "sentence"], - "recommended_tests": ["sentence", "sentence"], - "verdict_one_sentence": "..." + "hidden_assumptions": ["EN sentence", "EN sentence"], + "hidden_assumptions_pt_br": ["PT-BR frase", "PT-BR frase"], + "failure_modes": ["EN sentence", "EN sentence"], + "failure_modes_pt_br": ["PT-BR frase", "PT-BR frase"], + "alternative_explanations": ["EN sentence", "EN sentence"], + "alternative_explanations_pt_br": ["PT-BR frase", "PT-BR frase"], + "recommended_tests": ["EN sentence", "EN sentence"], + "recommended_tests_pt_br": ["PT-BR frase", "PT-BR frase"], + "verdict_one_sentence": "EN one declarative sentence.", + "verdict_one_sentence_pt_br": "PT-BR uma frase declarativa equivalente." } ``` Constraints: - 2-5 entries per array. Empty arrays only when the attack surface is - genuinely empty (rare). -- Each array entry ≤ 200 chars. -- `verdict_one_sentence` ≤ 280 chars. + genuinely empty (rare). EN array and its PT-BR sibling MUST have the + same length. +- Each array entry ≤ 240 chars (per language). +- `verdict_one_sentence` ≤ 280 chars (per language). +- A missing `*_pt_br` sibling, or a length mismatch, is a hard validation + failure — the writer rejects the review. If the input hypothesis is too thin to attack (e.g. position is one word, no argument_for, no evidence), emit `INSUFFICIENT_HYPOTHESIS` and stop. diff --git a/investigator-runtime/prompts/taleb.md b/investigator-runtime/prompts/taleb.md index 4c0afb4..8dcbe5e 100644 --- a/investigator-runtime/prompts/taleb.md +++ b/investigator-runtime/prompts/taleb.md @@ -37,30 +37,37 @@ implies for the case. 5. Severity: implicit. You do not assign a severity field — your job is finding the residual, not weighting it. -## Output protocol +## Output protocol — bilingual EN + PT-BR (mandatory) -Emit a strict JSON array. No prose. No code fence. +Emit a strict JSON array. No prose. No code fence. Every narrative field +appears in EN AND in PT-BR (Brazilian Portuguese with UTF-8 accents). ```json [ { - "title": "Short label for this outlier (≤ 80 chars)", - "chunk_id": "c0042", - "doc_id": "dow-uap-d017-...", - "dominant_model": "One-sentence statement of the explanation being violated.", - "why_surprising": "One paragraph. Concrete. Quantitative when possible.", - "what_it_implies": "One sentence. Pick (a), (b), or (c) per the rules.", - "suggested_next_move": "One sentence." + "title": "EN short label (≤ 80 chars)", + "title_pt_br": "PT-BR título curto (≤ 80 chars)", + "chunk_id": "c0042", + "doc_id": "dow-uap-d017-...", + "dominant_model": "EN one-sentence statement of the explanation being violated.", + "dominant_model_pt_br": "PT-BR uma frase do modelo dominante sendo violado.", + "why_surprising": "EN one paragraph. Concrete. Quantitative when possible.", + "why_surprising_pt_br": "PT-BR um parágrafo. Concreto. Quantitativo quando possível.", + "what_it_implies": "EN one sentence. Pick (a), (b), or (c) per the rules.", + "what_it_implies_pt_br": "PT-BR uma frase. Escolha (a), (b) ou (c) conforme as regras.", + "suggested_next_move": "EN one sentence.", + "suggested_next_move_pt_br": "PT-BR uma frase." } ] ``` Constraints: -- 0-3 entries. Empty array `[]` when nothing stands out (rare and - honest). -- `why_surprising` ≤ 600 chars. -- All other strings ≤ 280 chars. +- 0-3 entries. Empty array `[]` when nothing stands out (rare and honest). +- `why_surprising` ≤ 600 chars (per language). +- All other strings ≤ 280 chars (per language). - `chunk_id` MUST be present in the corpus shortlist. +- A missing `*_pt_br` sibling is a hard validation failure — the writer + rejects the outlier. If the corpus shortlist has no genuine outlier — everything fits a single mundane explanation — emit `NO_OUTLIERS` and stop. diff --git a/investigator-runtime/prompts/tetlock.md b/investigator-runtime/prompts/tetlock.md index 18e2d9f..308354b 100644 --- a/investigator-runtime/prompts/tetlock.md +++ b/investigator-runtime/prompts/tetlock.md @@ -28,26 +28,30 @@ the posterior never rose). - `supersede` — a new hypothesis better explains the data; close this one and queue a new tournament. Include `supersede_reason`. -## Output protocol +## Output protocol — bilingual EN + PT-BR (mandatory) -Emit a strict JSON object. No prose. No code fence. +Emit a strict JSON object. No prose. No code fence. Every narrative field +appears in EN AND in PT-BR (Brazilian Portuguese with UTF-8 accents). ```json { "new_posterior": 0.45, "new_confidence_band": "low", "delta": 0.05, - "rationale": "Concrete prose with [[doc-id/pNNN#cNNNN]] citations.", - "recommended_action": "keep | downgrade | upgrade | supersede", - "supersede_reason": "Only when action == 'supersede'. Otherwise omit." + "rationale": "EN concrete prose with [[doc-id/pNNN#cNNNN]] citations.", + "rationale_pt_br": "PT-BR prosa concreta com [[doc-id/pNNN#cNNNN]] citações.", + "recommended_action": "keep | downgrade | upgrade | supersede", + "supersede_reason": "EN — only when action == 'supersede'. Otherwise omit.", + "supersede_reason_pt_br": "PT-BR — só quando action == 'supersede'. Caso contrário, omita." } ``` Constraints: - `new_posterior` ∈ [0, 1]. - `new_confidence_band` MUST match the band thresholds for `new_posterior`. -- `rationale` ≤ 600 chars. -- `supersede_reason` ≤ 280 chars. +- `rationale` ≤ 1200 chars (per language). +- `supersede_reason` ≤ 280 chars (per language). +- A missing `_pt_br` sibling is a hard validation failure. If the corpus has NO new evidence since the hypothesis was last reviewed (no chunks beyond what was already cited), emit `NO_NEW_EVIDENCE` and diff --git a/investigator-runtime/src/detectives/case_writer.ts b/investigator-runtime/src/detectives/case_writer.ts index 7fdb651..6e2b1f8 100644 --- a/investigator-runtime/src/detectives/case_writer.ts +++ b/investigator-runtime/src/detectives/case_writer.ts @@ -24,6 +24,7 @@ const PROMPT_PATH = path.resolve(HERE, "..", "..", "prompts", "case-writer.md"); export interface CaseWriterTask { job_id: string; topic: string; + topic_pt_br?: string; /** When set, restrict to artefacts touching this doc_id (via chunk FK). */ doc_id?: string; lang?: "pt" | "en"; @@ -164,15 +165,18 @@ function buildPrompt( witnesses: WitnessRow[], gaps: GapRow[], ): string { - const langNote = task.lang === "en" - ? "Write the narrative in English." - : "Escreva a narrativa em português brasileiro (PT-BR), preservando acentos UTF-8. Cite [[wiki-links]] em inglês como aparecem nos artefatos."; return [ - `# Case folder: ${task.topic}`, + `# Case folder`, + "", + `**Topic (EN).** ${task.topic}`, + `**Tópico (PT-BR).** ${task.topic_pt_br ?? task.topic}`, "", task.doc_id ? `Scoped to document: ${task.doc_id}` : "Scope: all documents", "", - langNote, + "**Bilingual output mandatory.** Write each act in BOTH English and", + "Brazilian Portuguese (PT-BR), interleaved per the system-prompt", + "structure. UTF-8 accents preserved. Verbatim chunk quotes stay in", + "their source language; only the surrounding narration is translated.", "", "## Artefacts available", "", @@ -321,7 +325,7 @@ export async function runCaseWriter(task: CaseWriterTask): Promise< if (body_md === null) return { skipped: true, reason: "INSUFFICIENT_ARTEFACTS" }; return await writeCaseReport({ - topic, slug, body_md, + topic, topic_pt_br: task.topic_pt_br, slug, body_md, meta: { n_evidence: evidence.length, n_hypotheses: hypotheses.length, diff --git a/investigator-runtime/src/detectives/dupin.ts b/investigator-runtime/src/detectives/dupin.ts index 9f7092e..894cee3 100644 --- a/investigator-runtime/src/detectives/dupin.ts +++ b/investigator-runtime/src/detectives/dupin.ts @@ -66,8 +66,9 @@ function buildPrompt(task: DupinTask, hits: SearchHit[], lang: "pt" | "en"): str "Inspect the chunks for pairs (or small groups) that cannot both be true.", "Emit at most 3 contradictions. Each must cite ≥ 2 distinct chunk_ids.", "Emit the JSON array exactly as specified by the system prompt — no prose,", - "no code fence, no preamble. If no genuine contradiction exists,", - "emit the literal word `NO_CONTRADICTIONS`.", + "no code fence, no preamble. **Bilingual is mandatory:** every narrative", + "field (topic, notes, statement) appears in both EN and PT-BR. If no", + "genuine contradiction exists, emit the literal word `NO_CONTRADICTIONS`.", ].join("\n"); } @@ -94,9 +95,10 @@ function coercePositions(raw: unknown): ContradictionPosition[] { const doc_id = typeof o.doc_id === "string" ? o.doc_id.trim() : ""; const chunk_id = typeof o.chunk_id === "string" ? o.chunk_id.trim() : ""; const statement = typeof o.statement === "string" ? o.statement.trim() : ""; - if (!doc_id || !chunk_id || !statement) continue; + const statement_pt_br = typeof o.statement_pt_br === "string" ? o.statement_pt_br.trim() : ""; + if (!doc_id || !chunk_id || !statement || !statement_pt_br) continue; out.push({ - doc_id, chunk_id, statement, + doc_id, chunk_id, statement, statement_pt_br, stance: typeof o.stance === "string" ? o.stance.trim() : undefined, }); } @@ -179,12 +181,14 @@ export async function runDupin(task: DupinTask): Promise< if (!raw || typeof raw !== "object") continue; const o = raw as Record; const topic = typeof o.topic === "string" ? o.topic.trim() : ""; + const topic_pt_br = typeof o.topic_pt_br === "string" ? o.topic_pt_br.trim() : ""; const positions = coercePositions(o.positions); - if (!topic || positions.length < 2) continue; + if (!topic || !topic_pt_br || positions.length < 2) continue; const args: WriteContradictionArgs = { - topic, + topic, topic_pt_br, positions, notes: typeof o.notes === "string" ? o.notes.trim() : undefined, + notes_pt_br: typeof o.notes_pt_br === "string" ? o.notes_pt_br.trim() : undefined, resolution_status: o.resolution_status === "resolved" ? "resolved" : o.resolution_status === "irreconcilable" diff --git a/investigator-runtime/src/detectives/holmes.ts b/investigator-runtime/src/detectives/holmes.ts index 505abb3..1ec9f8b 100644 --- a/investigator-runtime/src/detectives/holmes.ts +++ b/investigator-runtime/src/detectives/holmes.ts @@ -28,6 +28,9 @@ const PROMPT_PATH = path.resolve(HERE, "..", "..", "prompts", "holmes.md"); export interface HolmesTask { job_id: string; question: string; + /** Optional PT-BR mirror of the question. If omitted, the EN one is used + * for both sides until the model emits PT-BR output. */ + question_pt_br?: string; /** Optional scope narrowing — restrict the search to one doc / entity. */ doc_id?: string; lang?: "pt" | "en"; @@ -54,10 +57,12 @@ function renderChunkBlock(hits: SearchHit[], lang: "pt" | "en"): string { function buildPrompt(task: HolmesTask, hits: SearchHit[], lang: "pt" | "en"): string { const block = renderChunkBlock(hits, lang); + const ptQ = task.question_pt_br?.trim(); return [ `# Question to investigate`, "", - task.question, + `**EN.** ${task.question}`, + ptQ ? `**PT-BR.** ${ptQ}` : null, "", `## Corpus shortlist (${hits.length} chunks${task.doc_id ? `, scoped to ${task.doc_id}` : ""})`, "", @@ -66,11 +71,13 @@ function buildPrompt(task: HolmesTask, hits: SearchHit[], lang: "pt" | "en"): st "## Your task", "", "Build 2-3 rival hypotheses about the question above. Each must cite at", - "least one chunk via [[doc-id/pNNN#cNNNN]] in argument_for and", - "argument_against. Assign priors + posteriors summing roughly to 1.0.", - "Emit the JSON array exactly as specified by the system prompt — no prose,", - "no code fence, no preamble.", - ].join("\n"); + "least one chunk via [[doc-id/pNNN#cNNNN]] in both argument_for and", + "argument_against (EN) and in argument_for_pt_br and", + "argument_against_pt_br (PT-BR). Assign priors + posteriors summing", + "roughly to 1.0. Emit the JSON array exactly as specified by the system", + "prompt — no prose, no code fence, no preamble. **Bilingual is mandatory:", + "every narrative field appears in both EN and PT-BR.**", + ].filter(Boolean).join("\n"); } function extractJsonArray(text: string): unknown[] | null { @@ -143,20 +150,26 @@ export async function runHolmes(task: HolmesTask): Promise< const out: Array<{ hypothesis_id: string; case_file: string }> = []; for (const raw of arr.slice(0, 3)) { + const r = raw as Record; + const strOrUndef = (k: string): string | undefined => + typeof r[k] === "string" && (r[k] as string).trim().length > 0 + ? (r[k] as string).trim() : undefined; const args: WriteHypothesisArgs = { - question: task.question, - position: String((raw as { position?: unknown }).position ?? "").trim(), - argument_for: typeof (raw as { argument_for?: unknown }).argument_for === "string" - ? (raw as { argument_for: string }).argument_for : undefined, - argument_against: typeof (raw as { argument_against?: unknown }).argument_against === "string" - ? (raw as { argument_against: string }).argument_against : undefined, - prior: Number((raw as { prior?: unknown }).prior), - posterior: Number((raw as { posterior?: unknown }).posterior), - confidence_band: (raw as { confidence_band?: WriteHypothesisArgs["confidence_band"] }).confidence_band, - evidence_refs: Array.isArray((raw as { evidence_refs?: unknown }).evidence_refs) - ? (raw as { evidence_refs: Array<{ evidence_id?: string; supports?: boolean; weight?: number }> }).evidence_refs - .filter((r): r is { evidence_id: string; supports?: boolean; weight?: number } => - typeof r?.evidence_id === "string" && r.evidence_id.length > 0) + question: task.question, + question_pt_br: task.question_pt_br ?? task.question, + position: String(r.position ?? "").trim(), + position_pt_br: strOrUndef("position_pt_br"), + argument_for: strOrUndef("argument_for"), + argument_for_pt_br: strOrUndef("argument_for_pt_br"), + argument_against: strOrUndef("argument_against"), + argument_against_pt_br: strOrUndef("argument_against_pt_br"), + prior: Number(r.prior), + posterior: Number(r.posterior), + confidence_band: r.confidence_band as WriteHypothesisArgs["confidence_band"], + evidence_refs: Array.isArray(r.evidence_refs) + ? (r.evidence_refs as Array<{ evidence_id?: string; supports?: boolean; weight?: number }>) + .filter((x): x is { evidence_id: string; supports?: boolean; weight?: number } => + typeof x?.evidence_id === "string" && x.evidence_id.length > 0) : [], }; if (!args.position) continue; diff --git a/investigator-runtime/src/detectives/poirot.ts b/investigator-runtime/src/detectives/poirot.ts index 0de58ba..39edef1 100644 --- a/investigator-runtime/src/detectives/poirot.ts +++ b/investigator-runtime/src/detectives/poirot.ts @@ -90,8 +90,10 @@ function buildPrompt( "", "Produce the structured witness analysis as specified by the system", "prompt. Cite chunk_ids from the shortlist above in", - "`corroboration_refs`. If the shortlist is too thin to ground an", - "honest assessment, emit `INSUFFICIENT_TESTIMONY`.", + "`corroboration_refs`. **Bilingual is mandatory:** access_to_event,", + "bias_notes, and verdict each appear in both EN and PT-BR. If the", + "shortlist is too thin to ground an honest assessment, emit", + "`INSUFFICIENT_TESTIMONY`.", ].filter(Boolean).join("\n"); } @@ -224,16 +226,22 @@ export async function runPoirot(task: PoirotTask): Promise< ? obj.credibility as "high" | "medium" | "low" | "speculation" : "speculation"; + const str = (k: string): string => + typeof obj[k] === "string" ? (obj[k] as string).trim() : ""; const args: WriteWitnessAnalysisArgs = { person_entity_pk: entity_pk, credibility, - access_to_event: typeof obj.access_to_event === "string" ? obj.access_to_event.trim() : "", - bias_notes: typeof obj.bias_notes === "string" ? obj.bias_notes.trim() : "", - corroboration_refs: coerceCorroboration(obj.corroboration_refs), - verdict: typeof obj.verdict === "string" ? obj.verdict.trim() : "", + access_to_event: str("access_to_event"), + access_to_event_pt_br: str("access_to_event_pt_br"), + bias_notes: str("bias_notes"), + bias_notes_pt_br: str("bias_notes_pt_br"), + corroboration_refs: coerceCorroboration(obj.corroboration_refs), + verdict: str("verdict"), + verdict_pt_br: str("verdict_pt_br"), }; - if (!args.access_to_event || !args.bias_notes || !args.verdict) { - return { skipped: true, reason: "incomplete_analysis" }; + if (!args.access_to_event || !args.bias_notes || !args.verdict + || !args.access_to_event_pt_br || !args.bias_notes_pt_br || !args.verdict_pt_br) { + return { skipped: true, reason: "incomplete_bilingual_analysis" }; } // Pass the shortlist's most-represented doc_id as a fallback for chunk_id diff --git a/investigator-runtime/src/detectives/schneier.ts b/investigator-runtime/src/detectives/schneier.ts index 4dbd3f6..1765d20 100644 --- a/investigator-runtime/src/detectives/schneier.ts +++ b/investigator-runtime/src/detectives/schneier.ts @@ -85,8 +85,10 @@ function buildPrompt(h: HypothesisRow, evidence: EvidenceRow[]): string { "", "Red-team the hypothesis. Find what the author didn't address. Emit the", "JSON object exactly as specified by the system prompt — no prose, no", - "code fence, no preamble. If the hypothesis is too thin to attack,", - "emit the literal word `INSUFFICIENT_HYPOTHESIS`.", + "code fence, no preamble. **Bilingual is mandatory:** every narrative", + "field appears in both EN and PT-BR with matching array lengths. If", + "the hypothesis is too thin to attack, emit the literal word", + "`INSUFFICIENT_HYPOTHESIS`.", ].join("\n"); } @@ -176,16 +178,21 @@ export async function runSchneier(task: SchneierTask): Promise< const args: RedTeamReviewArgs = { hypothesis_id: h.hypothesis_id, severity, - hidden_assumptions: coerceStringArray(obj.hidden_assumptions), - failure_modes: coerceStringArray(obj.failure_modes), - alternative_explanations: coerceStringArray(obj.alternative_explanations), - recommended_tests: coerceStringArray(obj.recommended_tests), + hidden_assumptions: coerceStringArray(obj.hidden_assumptions), + hidden_assumptions_pt_br: coerceStringArray(obj.hidden_assumptions_pt_br), + failure_modes: coerceStringArray(obj.failure_modes), + failure_modes_pt_br: coerceStringArray(obj.failure_modes_pt_br), + alternative_explanations: coerceStringArray(obj.alternative_explanations), + alternative_explanations_pt_br: coerceStringArray(obj.alternative_explanations_pt_br), + recommended_tests: coerceStringArray(obj.recommended_tests), + recommended_tests_pt_br: coerceStringArray(obj.recommended_tests_pt_br), verdict_one_sentence: typeof obj.verdict_one_sentence === "string" - ? obj.verdict_one_sentence.trim() - : "", + ? obj.verdict_one_sentence.trim() : "", + verdict_one_sentence_pt_br: typeof obj.verdict_one_sentence_pt_br === "string" + ? obj.verdict_one_sentence_pt_br.trim() : "", }; - if (!args.verdict_one_sentence) { - return { skipped: true, reason: "no_verdict" }; + if (!args.verdict_one_sentence || !args.verdict_one_sentence_pt_br) { + return { skipped: true, reason: "no_verdict_bilingual" }; } return await writeRedTeamReview(args, { diff --git a/investigator-runtime/src/detectives/taleb.ts b/investigator-runtime/src/detectives/taleb.ts index d4b5141..714a1fe 100644 --- a/investigator-runtime/src/detectives/taleb.ts +++ b/investigator-runtime/src/detectives/taleb.ts @@ -61,8 +61,9 @@ function buildPrompt(task: TalebTask, hits: SearchHit[], lang: "pt" | "en"): str "", "Identify AT MOST 3 outliers per the system prompt rules. State the", "dominant_model first, then the chunk that violates it. Emit the", - "JSON array exactly as specified — no prose, no code fence. If", - "nothing genuinely stands out, emit `NO_OUTLIERS`.", + "JSON array exactly as specified — no prose, no code fence.", + "**Bilingual is mandatory:** every narrative field appears in both", + "EN and PT-BR. If nothing genuinely stands out, emit `NO_OUTLIERS`.", ].join("\n"); } @@ -144,21 +145,36 @@ export async function runTaleb(task: TalebTask): Promise< for (const raw of arr.slice(0, 3)) { if (!raw || typeof raw !== "object") continue; const o = raw as Record; + const str = (k: string): string => + typeof o[k] === "string" ? (o[k] as string).trim() : ""; const args: WriteOutlierGapArgs = { - title: typeof o.title === "string" ? o.title.trim() : "", - doc_id: typeof o.doc_id === "string" ? o.doc_id.trim() : "", - chunk_id: typeof o.chunk_id === "string" ? o.chunk_id.trim() : "", - dominant_model: typeof o.dominant_model === "string" ? o.dominant_model.trim() : "", - why_surprising: typeof o.why_surprising === "string" ? o.why_surprising.trim() : "", - what_it_implies: typeof o.what_it_implies === "string" ? o.what_it_implies.trim() : "", - suggested_next_move: typeof o.suggested_next_move === "string" ? o.suggested_next_move.trim() : "", + title: str("title"), + title_pt_br: str("title_pt_br"), + doc_id: str("doc_id"), + chunk_id: str("chunk_id"), + dominant_model: str("dominant_model"), + dominant_model_pt_br: str("dominant_model_pt_br"), + why_surprising: str("why_surprising"), + why_surprising_pt_br: str("why_surprising_pt_br"), + what_it_implies: str("what_it_implies"), + what_it_implies_pt_br: str("what_it_implies_pt_br"), + suggested_next_move: str("suggested_next_move"), + suggested_next_move_pt_br: str("suggested_next_move_pt_br"), }; - if (!args.title || !args.doc_id || !args.chunk_id || !args.dominant_model - || !args.why_surprising || !args.what_it_implies || !args.suggested_next_move) { + const argsAny = args as unknown as Record; + const missing = [ + "title", "title_pt_br", "doc_id", "chunk_id", + "dominant_model", "dominant_model_pt_br", + "why_surprising", "why_surprising_pt_br", + "what_it_implies", "what_it_implies_pt_br", + "suggested_next_move", "suggested_next_move_pt_br", + ].filter((k) => !argsAny[k]); + if (missing.length > 0) { await audit({ event: "write_outlier_gap_failed", job_id: task.job_id, detective: "taleb@detective", - reason: "incomplete_outlier", title: args.title.slice(0, 120), + reason: "incomplete_bilingual_outlier", + missing, title: args.title.slice(0, 120), }); continue; } diff --git a/investigator-runtime/src/detectives/tetlock.ts b/investigator-runtime/src/detectives/tetlock.ts index 500ded5..9bfd2e0 100644 --- a/investigator-runtime/src/detectives/tetlock.ts +++ b/investigator-runtime/src/detectives/tetlock.ts @@ -127,8 +127,10 @@ function buildPrompt( "## Your task", "", "Recompute the posterior honestly. Emit the JSON object exactly as", - "specified by the system prompt. If there is NO new chunk to move the", - "posterior on, emit `NO_NEW_EVIDENCE`.", + "specified by the system prompt. **Bilingual is mandatory:** rationale", + "appears in both EN and PT-BR; supersede_reason (when present) also", + "bilingual. If there is NO new chunk to move the posterior on, emit", + "`NO_NEW_EVIDENCE`.", ].join("\n"); } @@ -242,15 +244,19 @@ export async function runTetlock(task: TetlockTask): Promise< new_posterior, new_confidence_band: bandFromPosterior(new_posterior), delta, - rationale: typeof obj.rationale === "string" ? obj.rationale.trim() : "", + rationale: typeof obj.rationale === "string" ? obj.rationale.trim() : "", + rationale_pt_br: typeof obj.rationale_pt_br === "string" ? obj.rationale_pt_br.trim() : "", recommended_action: action, - supersede_reason: typeof obj.supersede_reason === "string" ? obj.supersede_reason.trim() : undefined, + supersede_reason: typeof obj.supersede_reason === "string" ? obj.supersede_reason.trim() : undefined, + supersede_reason_pt_br: typeof obj.supersede_reason_pt_br === "string" ? obj.supersede_reason_pt_br.trim() : undefined, old_posterior, old_confidence_band: h.confidence_band, }; - if (!args.rationale) return { skipped: true, reason: "no_rationale" }; - if (action === "supersede" && !args.supersede_reason) { - return { skipped: true, reason: "supersede_reason_missing" }; + if (!args.rationale || !args.rationale_pt_br) { + return { skipped: true, reason: "no_rationale_bilingual" }; + } + if (action === "supersede" && (!args.supersede_reason || !args.supersede_reason_pt_br)) { + return { skipped: true, reason: "supersede_reason_missing_bilingual" }; } return await writeCalibration(args, { diff --git a/investigator-runtime/src/orchestrator.ts b/investigator-runtime/src/orchestrator.ts index 9ae7994..d08cb05 100644 --- a/investigator-runtime/src/orchestrator.ts +++ b/investigator-runtime/src/orchestrator.ts @@ -55,12 +55,14 @@ export async function dispatch(job: InvestigationJob, workerId: string): Promise break; } case "hypothesis_tournament": { - // Payload: { question, doc_id?, lang?, context_chunks? } + // Payload: { question, question_pt_br?, doc_id?, lang?, context_chunks? } const question = String(job.payload.question ?? "").trim(); if (!question) throw new Error("hypothesis_tournament requires payload.question"); const task: HolmesTask = { job_id: job.job_id, question, + question_pt_br: typeof job.payload.question_pt_br === "string" + ? job.payload.question_pt_br.trim() : undefined, doc_id: typeof job.payload.doc_id === "string" ? job.payload.doc_id : undefined, lang: job.payload.lang === "en" ? "en" : "pt", context_chunks: typeof job.payload.context_chunks === "number" ? job.payload.context_chunks : undefined, @@ -74,11 +76,13 @@ export async function dispatch(job: InvestigationJob, workerId: string): Promise break; } case "case_report": { - // Payload: { topic, doc_id?, slug?, lang? } + // Payload: { topic, topic_pt_br?, doc_id?, slug?, lang? } const topic = String(job.payload.topic ?? "").trim(); if (!topic) throw new Error("case_report requires payload.topic"); const task: CaseWriterTask = { job_id: job.job_id, topic, + topic_pt_br: typeof job.payload.topic_pt_br === "string" + ? job.payload.topic_pt_br.trim() : undefined, doc_id: typeof job.payload.doc_id === "string" ? job.payload.doc_id : undefined, slug: typeof job.payload.slug === "string" ? job.payload.slug : undefined, lang: job.payload.lang === "en" ? "en" : "pt", diff --git a/investigator-runtime/src/tools/write_calibration.ts b/investigator-runtime/src/tools/write_calibration.ts index 5c124a0..56e8ae7 100644 --- a/investigator-runtime/src/tools/write_calibration.ts +++ b/investigator-runtime/src/tools/write_calibration.ts @@ -18,8 +18,10 @@ export interface WriteCalibrationArgs { new_confidence_band: "high" | "medium" | "low" | "speculation"; delta: number; rationale: string; + rationale_pt_br?: string; recommended_action: "keep" | "downgrade" | "upgrade" | "supersede"; supersede_reason?: string; + supersede_reason_pt_br?: string; /** previous posterior captured at call time — used in the case-file row. */ old_posterior: number | null; old_confidence_band: string | null; @@ -41,6 +43,7 @@ function bandFromPosterior(p: number): "high" | "medium" | "low" | "speculation" function buildSection(args: WriteCalibrationArgs, ctx: WriteCalibrationContext): string { const ts = new Date().toISOString(); + const ptRationale = args.rationale_pt_br ?? args.rationale; const rows = [ `### ${ts} — ${args.recommended_action}`, "", @@ -52,10 +55,13 @@ function buildSection(args: WriteCalibrationArgs, ctx: WriteCalibrationContext): `| band | ${args.old_confidence_band ?? "—"} | **${args.new_confidence_band}** |`, `| delta | — | ${args.delta >= 0 ? "+" : ""}${args.delta.toFixed(3)} |`, "", - `**Rationale.** ${args.rationale}`, + `**Rationale (EN).** ${args.rationale}`, + "", + `**Justificativa (PT-BR).** ${ptRationale}`, ]; if (args.recommended_action === "supersede" && args.supersede_reason) { - rows.push("", `**Supersede reason.** ${args.supersede_reason}`); + rows.push("", `**Supersede reason (EN).** ${args.supersede_reason}`); + rows.push("", `**Motivo da substituição (PT-BR).** ${args.supersede_reason_pt_br ?? args.supersede_reason}`); } rows.push(""); return rows.join("\n"); @@ -86,10 +92,10 @@ export async function writeCalibration( // Force the band to match the posterior — Tetlock can mis-label. body.new_confidence_band = expectedBand; if (!body.rationale?.trim()) throw new Error("rationale required"); - // Soft cap: 1200 chars. Tetlock often writes 600-800 of substantive - // reasoning + chunk citations; the prompt asks for ≤ 600 but a 2× slack - // beats failing the job on an honest analysis. + if (!body.rationale_pt_br?.trim()) throw new Error("rationale_pt_br required (bilingual contract)"); + // Soft cap: 1200 chars per language. if (body.rationale.length > 1200) throw new Error(`rationale too long (${body.rationale.length} > 1200)`); + if (body.rationale_pt_br.length > 1200) throw new Error(`rationale_pt_br too long`); const action = body.recommended_action; if (!["keep", "downgrade", "upgrade", "supersede"].includes(action)) { diff --git a/investigator-runtime/src/tools/write_case_report.ts b/investigator-runtime/src/tools/write_case_report.ts index 9694df4..627554b 100644 --- a/investigator-runtime/src/tools/write_case_report.ts +++ b/investigator-runtime/src/tools/write_case_report.ts @@ -13,6 +13,7 @@ import { env } from "../lib/env"; export interface WriteCaseReportArgs { topic: string; + topic_pt_br?: string; slug: string; body_md: string; meta: { @@ -36,6 +37,7 @@ function renderFrontmatter(args: WriteCaseReportArgs, ctx: WriteCaseReportContex `schema_version: "0.1.0"`, `type: case_report`, `topic: ${JSON.stringify(args.topic)}`, + `topic_pt_br: ${JSON.stringify(args.topic_pt_br ?? args.topic)}`, `slug: ${args.slug}`, `created_by: ${ctx.detective}`, `job_id: ${ctx.job_id}`, diff --git a/investigator-runtime/src/tools/write_contradiction.ts b/investigator-runtime/src/tools/write_contradiction.ts index d45391a..bacda20 100644 --- a/investigator-runtime/src/tools/write_contradiction.ts +++ b/investigator-runtime/src/tools/write_contradiction.ts @@ -27,14 +27,17 @@ export interface ContradictionPosition { chunk_id: string; /** The verbatim or paraphrased claim that puts this chunk on this side. */ statement: string; + statement_pt_br?: string; /** Optional weight or stance label (e.g. "asserts", "denies"). */ stance?: string; } export interface WriteContradictionArgs { topic: string; + topic_pt_br?: string; positions: ContradictionPosition[]; notes?: string; + notes_pt_br?: string; resolution_status?: "open" | "resolved" | "irreconcilable"; } @@ -46,6 +49,7 @@ export interface WriteContradictionContext { interface ResolvedPosition extends ContradictionPosition { chunk_pk: number; page: number; + statement_pt_br: string; } /** @@ -90,7 +94,9 @@ function renderMd( return [ `### Position ${i + 1}${p.stance ? ` — ${p.stance}` : ""}`, "", - `> ${p.statement}`, + `**(EN)** > ${p.statement}`, + "", + `**(PT-BR)** > ${p.statement_pt_br}`, "", `Source: [[${p.doc_id}/p${pageStr}#${p.chunk_id}]]`, ].join("\n"); @@ -101,16 +107,21 @@ function renderMd( "", `# Contradiction ${id}`, "", - `**Topic.** ${body.topic}`, + `**Topic (EN).** ${body.topic}`, + `**Tópico (PT-BR).** ${body.topic_pt_br ?? body.topic}`, "", "## Positions in tension", "", positionBlocks.join("\n\n"), "", - "## Notes", + "## Notes (EN)", "", body.notes || "_(no commentary recorded)_", "", + "## Notas (PT-BR)", + "", + body.notes_pt_br || "_(sem comentário registrado)_", + "", ].join("\n"); } @@ -119,12 +130,16 @@ export async function writeContradiction( ctx: WriteContradictionContext, ): Promise<{ contradiction_id: string; case_file: string }> { if (!body.topic?.trim()) throw new Error("topic required"); + if (!body.topic_pt_br?.trim()) throw new Error("topic_pt_br required (bilingual contract)"); if (!Array.isArray(body.positions) || body.positions.length < 2) { throw new Error("at least 2 positions required"); } if (body.notes && body.notes.length > 4000) { throw new Error(`notes too long (${body.notes.length} > 4000)`); } + if (body.notes && !body.notes_pt_br?.trim()) { + throw new Error("notes_pt_br required when notes set (bilingual contract)"); + } const resolved: ResolvedPosition[] = []; for (const p of body.positions) { @@ -134,6 +149,9 @@ export async function writeContradiction( if (!p?.statement?.trim()) { throw new Error(`position ${p.doc_id}/${p.chunk_id} missing statement`); } + if (!p?.statement_pt_br?.trim()) { + throw new Error(`position ${p.doc_id}/${p.chunk_id} missing statement_pt_br (bilingual contract)`); + } const chunk = await resolveChunk(p.doc_id, p.chunk_id); if (!chunk) { throw new Error(`chunk ${p.doc_id}/${p.chunk_id} not found`); @@ -141,6 +159,7 @@ export async function writeContradiction( resolved.push({ ...p, statement: p.statement.trim(), + statement_pt_br: p.statement_pt_br.trim(), chunk_pk: chunk.chunk_pk, page: chunk.page, }); @@ -160,20 +179,24 @@ export async function writeContradiction( chunk_id: p.chunk_id, page: p.page, statement: p.statement, + statement_pt_br: p.statement_pt_br, stance: p.stance ?? null, })); await query( `INSERT INTO public.contradictions - (contradiction_id, topic, chunks, detected_by, resolution_status, notes) - VALUES ($1, $2, $3::jsonb, $4, $5, $6)`, + (contradiction_id, topic, topic_pt_br, chunks, detected_by, + resolution_status, notes, notes_pt_br) + VALUES ($1, $2, $3, $4::jsonb, $5, $6, $7, $8)`, [ contradiction_id, body.topic.trim(), + body.topic_pt_br!.trim(), JSON.stringify(chunkPayload), ctx.detective, body.resolution_status ?? "open", body.notes ?? null, + body.notes_pt_br ?? null, ], ); diff --git a/investigator-runtime/src/tools/write_hypothesis.ts b/investigator-runtime/src/tools/write_hypothesis.ts index 94f057e..323a230 100644 --- a/investigator-runtime/src/tools/write_hypothesis.ts +++ b/investigator-runtime/src/tools/write_hypothesis.ts @@ -25,9 +25,13 @@ export interface EvidenceRef { export interface WriteHypothesisArgs { question: string; + question_pt_br?: string; position: string; + position_pt_br?: string; argument_for?: string; + argument_for_pt_br?: string; argument_against?: string; + argument_against_pt_br?: string; prior?: number; posterior?: number; confidence_band?: "high" | "medium" | "low" | "speculation"; @@ -70,23 +74,39 @@ function renderMd(id: string, body: WriteHypothesisArgs, ctx: WriteHypothesisCon `created_at: ${new Date().toISOString()}`, "---", ].filter(Boolean).join("\n"); + + const ptQuestion = body.question_pt_br ?? body.question; + const ptPosition = body.position_pt_br ?? body.position; + const ptFor = body.argument_for_pt_br ?? body.argument_for; + const ptAgainst = body.argument_against_pt_br ?? body.argument_against; + return [ fm, "", `# Hypothesis ${id}`, "", - `**Question.** ${body.question}`, + `**Question (EN).** ${body.question}`, + `**Pergunta (PT-BR).** ${ptQuestion}`, "", - `**Position.** ${body.position}`, + `**Position (EN).** ${body.position}`, + `**Posição (PT-BR).** ${ptPosition}`, "", - "## Argument for", + "## Argument for (EN)", "", body.argument_for || "_(none recorded — speculation)_", "", - "## Argument against", + "## Argumento a favor (PT-BR)", + "", + ptFor || "_(nenhum registrado — especulação)_", + "", + "## Argument against (EN)", "", body.argument_against || "_(none recorded — no counter-argument framed yet)_", "", + "## Argumento contra (PT-BR)", + "", + ptAgainst || "_(nenhum registrado — sem contra-argumento ainda)_", + "", "## Evidence", "", evRefs || "_(none linked yet — Locard chain pending)_", @@ -100,6 +120,13 @@ export async function writeHypothesis( ): Promise<{ hypothesis_id: string; case_file: string }> { if (!body.question?.trim()) throw new Error("question required"); if (!body.position?.trim()) throw new Error("position required"); + if (!body.position_pt_br?.trim()) throw new Error("position_pt_br required (bilingual contract)"); + if (body.argument_for && !body.argument_for_pt_br?.trim()) { + throw new Error("argument_for_pt_br required when argument_for is set (bilingual contract)"); + } + if (body.argument_against && !body.argument_against_pt_br?.trim()) { + throw new Error("argument_against_pt_br required when argument_against is set (bilingual contract)"); + } const prior = clamp01(body.prior); const posterior = clamp01(body.posterior); @@ -128,12 +155,16 @@ export async function writeHypothesis( const hypothesis_id = await allocate.hypothesisId(); await query( `INSERT INTO public.hypotheses - (hypothesis_id, question, position, argument_for, argument_against, + (hypothesis_id, question, question_pt_br, position, position_pt_br, + argument_for, argument_for_pt_br, argument_against, argument_against_pt_br, evidence_refs, prior, posterior, confidence_band, status, created_by) - VALUES ($1,$2,$3,$4,$5,$6::jsonb,$7,$8,$9,$10,$11)`, + VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10::jsonb,$11,$12,$13,$14,$15)`, [ - hypothesis_id, body.question, body.position, - body.argument_for ?? null, body.argument_against ?? null, + hypothesis_id, + body.question, body.question_pt_br ?? body.question, + body.position, body.position_pt_br, + body.argument_for ?? null, body.argument_for_pt_br ?? null, + body.argument_against ?? null, body.argument_against_pt_br ?? null, JSON.stringify(refs), prior, posterior, band, body.status ?? "open", ctx.detective, diff --git a/investigator-runtime/src/tools/write_outlier_gap.ts b/investigator-runtime/src/tools/write_outlier_gap.ts index b697d95..3751d49 100644 --- a/investigator-runtime/src/tools/write_outlier_gap.ts +++ b/investigator-runtime/src/tools/write_outlier_gap.ts @@ -16,12 +16,17 @@ import { query, queryOne } from "../lib/pg"; export interface WriteOutlierGapArgs { title: string; + title_pt_br?: string; doc_id: string; chunk_id: string; dominant_model: string; + dominant_model_pt_br?: string; why_surprising: string; + why_surprising_pt_br?: string; what_it_implies: string; + what_it_implies_pt_br?: string; suggested_next_move: string; + suggested_next_move_pt_br?: string; } export interface WriteOutlierGapContext { @@ -61,25 +66,42 @@ function renderMd( fm, "", `# Outlier ${id} — ${body.title}`, + `**Título (PT-BR).** ${body.title_pt_br ?? body.title}`, "", `**Source.** [[${body.doc_id}/p${pageStr}#${body.chunk_id}]]`, "", - "## Dominant model", + "## Dominant model (EN)", "", body.dominant_model, "", - "## Why surprising", + "## Modelo dominante (PT-BR)", + "", + body.dominant_model_pt_br ?? body.dominant_model, + "", + "## Why surprising (EN)", "", body.why_surprising, "", - "## What it implies", + "## Por que é surpreendente (PT-BR)", + "", + body.why_surprising_pt_br ?? body.why_surprising, + "", + "## What it implies (EN)", "", body.what_it_implies, "", - "## Suggested next move", + "## O que implica (PT-BR)", + "", + body.what_it_implies_pt_br ?? body.what_it_implies, + "", + "## Suggested next move (EN)", "", body.suggested_next_move, "", + "## Próximo passo sugerido (PT-BR)", + "", + body.suggested_next_move_pt_br ?? body.suggested_next_move, + "", ].join("\n"); } @@ -88,12 +110,18 @@ export async function writeOutlierGap( ctx: WriteOutlierGapContext, ): Promise<{ gap_id: string; case_file: string }> { if (!body.title?.trim()) throw new Error("title required"); + if (!body.title_pt_br?.trim()) throw new Error("title_pt_br required (bilingual contract)"); if (!body.doc_id?.trim() || !body.chunk_id?.trim()) throw new Error("doc_id + chunk_id required"); if (!body.dominant_model?.trim()) throw new Error("dominant_model required"); + if (!body.dominant_model_pt_br?.trim()) throw new Error("dominant_model_pt_br required (bilingual contract)"); if (!body.why_surprising?.trim()) throw new Error("why_surprising required"); + if (!body.why_surprising_pt_br?.trim()) throw new Error("why_surprising_pt_br required (bilingual contract)"); if (!body.what_it_implies?.trim()) throw new Error("what_it_implies required"); + if (!body.what_it_implies_pt_br?.trim()) throw new Error("what_it_implies_pt_br required (bilingual contract)"); if (!body.suggested_next_move?.trim()) throw new Error("suggested_next_move required"); + if (!body.suggested_next_move_pt_br?.trim()) throw new Error("suggested_next_move_pt_br required (bilingual contract)"); if (body.why_surprising.length > 600) throw new Error(`why_surprising too long`); + if (body.why_surprising_pt_br.length > 600) throw new Error(`why_surprising_pt_br too long`); const cid = normalizeChunkId(body.chunk_id); const chunk = await queryOne<{ chunk_pk: number; page: number }>( @@ -109,21 +137,28 @@ export async function writeOutlierGap( doc_id: body.doc_id, chunk_id: cid, page: chunk.page, - dominant_model: body.dominant_model, - why_surprising: body.why_surprising, - what_it_implies: body.what_it_implies, - title: body.title, + title: body.title, + title_pt_br: body.title_pt_br, + dominant_model: body.dominant_model, + dominant_model_pt_br: body.dominant_model_pt_br, + why_surprising: body.why_surprising, + why_surprising_pt_br: body.why_surprising_pt_br, + what_it_implies: body.what_it_implies, + what_it_implies_pt_br: body.what_it_implies_pt_br, }; await query( `INSERT INTO public.gaps - (gap_id, description, scope, suggested_next_move, status, created_by) - VALUES ($1, $2, $3::jsonb, $4, 'open', $5)`, + (gap_id, description, description_pt_br, scope, + suggested_next_move, suggested_next_move_pt_br, status, created_by) + VALUES ($1, $2, $3, $4::jsonb, $5, $6, 'open', $7)`, [ gap_id, body.title, + body.title_pt_br, JSON.stringify(scope), body.suggested_next_move, + body.suggested_next_move_pt_br, ctx.detective, ], ); diff --git a/investigator-runtime/src/tools/write_red_team_review.ts b/investigator-runtime/src/tools/write_red_team_review.ts index 83d5e83..a9d2385 100644 --- a/investigator-runtime/src/tools/write_red_team_review.ts +++ b/investigator-runtime/src/tools/write_red_team_review.ts @@ -19,10 +19,15 @@ export interface RedTeamReviewArgs { hypothesis_id: string; severity: "low" | "medium" | "high"; hidden_assumptions: string[]; + hidden_assumptions_pt_br?: string[]; failure_modes: string[]; + failure_modes_pt_br?: string[]; alternative_explanations: string[]; + alternative_explanations_pt_br?: string[]; recommended_tests: string[]; + recommended_tests_pt_br?: string[]; verdict_one_sentence: string; + verdict_one_sentence_pt_br?: string; } export interface RedTeamReviewContext { @@ -34,8 +39,14 @@ const SECTION_MARKER = "## Red-team review"; function buildSection(args: RedTeamReviewArgs, ctx: RedTeamReviewContext): string { const ts = new Date().toISOString(); - const bullets = (items: string[]): string => - items.length === 0 ? "_(none flagged)_" : items.map((x) => `- ${x}`).join("\n"); + const bullets = (items: string[], emptyMsg: string): string => + items.length === 0 ? emptyMsg : items.map((x) => `- ${x}`).join("\n"); + const pt = (a: string[] | undefined, fb: string[]) => a && a.length === fb.length ? a : fb; + const ptHidden = pt(args.hidden_assumptions_pt_br, args.hidden_assumptions); + const ptFail = pt(args.failure_modes_pt_br, args.failure_modes); + const ptAlt = pt(args.alternative_explanations_pt_br, args.alternative_explanations); + const ptTests = pt(args.recommended_tests_pt_br, args.recommended_tests); + const ptVerdict = args.verdict_one_sentence_pt_br ?? args.verdict_one_sentence; return [ "", @@ -43,19 +54,32 @@ function buildSection(args: RedTeamReviewArgs, ctx: RedTeamReviewContext): strin "", `_Reviewed by ${ctx.detective} on ${ts} — job \`${ctx.job_id}\`._`, "", - `**Verdict.** ${args.verdict_one_sentence}`, + `**Verdict (EN).** ${args.verdict_one_sentence}`, + `**Veredito (PT-BR).** ${ptVerdict}`, "", - "### Hidden assumptions", - bullets(args.hidden_assumptions), + "### Hidden assumptions (EN)", + bullets(args.hidden_assumptions, "_(none flagged)_"), "", - "### Failure modes", - bullets(args.failure_modes), + "### Premissas ocultas (PT-BR)", + bullets(ptHidden, "_(nenhuma sinalizada)_"), "", - "### Alternative explanations not addressed", - bullets(args.alternative_explanations), + "### Failure modes (EN)", + bullets(args.failure_modes, "_(none flagged)_"), "", - "### Recommended discriminating tests", - bullets(args.recommended_tests), + "### Modos de falha (PT-BR)", + bullets(ptFail, "_(nenhum sinalizado)_"), + "", + "### Alternative explanations not addressed (EN)", + bullets(args.alternative_explanations, "_(none flagged)_"), + "", + "### Explicações alternativas não abordadas (PT-BR)", + bullets(ptAlt, "_(nenhuma sinalizada)_"), + "", + "### Recommended discriminating tests (EN)", + bullets(args.recommended_tests, "_(none flagged)_"), + "", + "### Testes discriminantes recomendados (PT-BR)", + bullets(ptTests, "_(nenhum sinalizado)_"), "", ].join("\n"); } @@ -82,9 +106,15 @@ export async function writeRedTeamReview( throw new Error(`bad severity: ${body.severity}`); } if (!body.verdict_one_sentence?.trim()) throw new Error("verdict_one_sentence required"); + if (!body.verdict_one_sentence_pt_br?.trim()) { + throw new Error("verdict_one_sentence_pt_br required (bilingual contract)"); + } if (body.verdict_one_sentence.length > 280) { throw new Error(`verdict too long (${body.verdict_one_sentence.length} > 280)`); } + if (body.verdict_one_sentence_pt_br.length > 280) { + throw new Error(`verdict_pt_br too long (${body.verdict_one_sentence_pt_br.length} > 280)`); + } // Defensive: cap each array to 5 entries × 240 chars (prompt says ≤ 200 but // we leave some slack rather than truncate silently). diff --git a/investigator-runtime/src/tools/write_witness_analysis.ts b/investigator-runtime/src/tools/write_witness_analysis.ts index c894f8a..a338b4a 100644 --- a/investigator-runtime/src/tools/write_witness_analysis.ts +++ b/investigator-runtime/src/tools/write_witness_analysis.ts @@ -31,9 +31,12 @@ export interface WriteWitnessAnalysisArgs { person_entity_pk: number; credibility: "high" | "medium" | "low" | "speculation"; access_to_event: string; + access_to_event_pt_br?: string; bias_notes: string; + bias_notes_pt_br?: string; corroboration_refs: CorroborationRef[]; verdict: string; + verdict_pt_br?: string; } export interface WriteWitnessAnalysisContext { @@ -106,16 +109,25 @@ function renderMd( "", `**Credibility.** ${body.credibility}`, "", - `**Verdict.** ${body.verdict}`, + `**Verdict (EN).** ${body.verdict}`, + `**Veredito (PT-BR).** ${body.verdict_pt_br ?? body.verdict}`, "", - "## Access to event", + "## Access to event (EN)", "", body.access_to_event, "", - "## Bias notes", + "## Acesso ao evento (PT-BR)", + "", + body.access_to_event_pt_br ?? body.access_to_event, + "", + "## Bias notes (EN)", "", body.bias_notes, "", + "## Notas de viés (PT-BR)", + "", + body.bias_notes_pt_br ?? body.bias_notes, + "", "## Corroboration chain", "", refBlocks, @@ -132,11 +144,17 @@ export async function writeWitnessAnalysis( const validBand = ["high", "medium", "low", "speculation"].includes(body.credibility); if (!validBand) throw new Error(`bad credibility: ${body.credibility}`); if (!body.access_to_event?.trim()) throw new Error("access_to_event required"); + if (!body.access_to_event_pt_br?.trim()) throw new Error("access_to_event_pt_br required (bilingual contract)"); if (!body.bias_notes?.trim()) throw new Error("bias_notes required"); + if (!body.bias_notes_pt_br?.trim()) throw new Error("bias_notes_pt_br required (bilingual contract)"); if (!body.verdict?.trim()) throw new Error("verdict required"); + if (!body.verdict_pt_br?.trim()) throw new Error("verdict_pt_br required (bilingual contract)"); if (body.verdict.length > 280) throw new Error(`verdict too long (${body.verdict.length} > 280)`); + if (body.verdict_pt_br.length > 280) throw new Error(`verdict_pt_br too long (${body.verdict_pt_br.length} > 280)`); if (body.access_to_event.length > 800) throw new Error(`access_to_event too long (${body.access_to_event.length} > 800)`); + if (body.access_to_event_pt_br.length > 800) throw new Error(`access_to_event_pt_br too long`); if (body.bias_notes.length > 800) throw new Error(`bias_notes too long (${body.bias_notes.length} > 800)`); + if (body.bias_notes_pt_br.length > 800) throw new Error(`bias_notes_pt_br too long`); // Verify entity exists and is a person. const ent = await queryOne<{ canonical_name: string; entity_class: string }>( @@ -159,17 +177,20 @@ export async function writeWitnessAnalysis( const witness_id = await allocate.witnessId(); await query( `INSERT INTO public.witnesses - (witness_id, person_entity_pk, credibility, access_to_event, - bias_notes, corroboration_refs, verdict, created_by) - VALUES ($1, $2, $3, $4, $5, $6::jsonb, $7, $8)`, + (witness_id, person_entity_pk, credibility, + access_to_event, access_to_event_pt_br, + bias_notes, bias_notes_pt_br, + corroboration_refs, verdict, verdict_pt_br, created_by) + VALUES ($1, $2, $3, $4, $5, $6, $7, $8::jsonb, $9, $10, $11)`, [ witness_id, body.person_entity_pk, body.credibility, - body.access_to_event, body.bias_notes, + body.access_to_event, body.access_to_event_pt_br, + body.bias_notes, body.bias_notes_pt_br, JSON.stringify(refs.map((r) => ({ chunk_pk: r.chunk_pk, doc_id: r.doc_id, chunk_id: r.chunk_id, page: r.page, supports: r.supports, }))), - body.verdict, ctx.detective, + body.verdict, body.verdict_pt_br, ctx.detective, ], ); diff --git a/web/app/api/jobs/[id]/route.ts b/web/app/api/jobs/[id]/route.ts index df9308b..3d3ccb4 100644 --- a/web/app/api/jobs/[id]/route.ts +++ b/web/app/api/jobs/[id]/route.ts @@ -46,9 +46,13 @@ interface EvidenceRow { interface HypothesisRow { hypothesis_id: string; question: string | null; + question_pt_br: string | null; position: string | null; + position_pt_br: string | null; argument_for: string | null; + argument_for_pt_br: string | null; argument_against: string | null; + argument_against_pt_br: string | null; prior: number | null; posterior: number | null; confidence_band: string | null; @@ -59,9 +63,11 @@ interface HypothesisRow { interface ContradictionRow { contradiction_id: string; topic: string; + topic_pt_br: string | null; chunks: unknown; resolution_status: string | null; notes: string | null; + notes_pt_br: string | null; detected_by: string | null; } @@ -71,16 +77,21 @@ interface WitnessRow { entity_id: string | null; credibility: string | null; access_to_event: string | null; + access_to_event_pt_br: string | null; bias_notes: string | null; + bias_notes_pt_br: string | null; corroboration_refs: unknown; verdict: string | null; + verdict_pt_br: string | null; } interface GapRow { gap_id: string; description: string; + description_pt_br: string | null; scope: unknown; suggested_next_move: string | null; + suggested_next_move_pt_br: string | null; status: string; created_by: string; } @@ -144,7 +155,8 @@ export async function GET( : Promise.resolve([] as EvidenceRow[]), hypothesisIds.length > 0 ? pgQuery( - `SELECT hypothesis_id, question, position, argument_for, argument_against, + `SELECT hypothesis_id, question, question_pt_br, position, position_pt_br, + argument_for, argument_for_pt_br, argument_against, argument_against_pt_br, prior, posterior, confidence_band, status, evidence_refs FROM public.hypotheses WHERE hypothesis_id = ANY($1::text[]) @@ -154,7 +166,8 @@ export async function GET( : Promise.resolve([] as HypothesisRow[]), contradictionIds.length > 0 ? pgQuery( - `SELECT contradiction_id, topic, chunks, resolution_status, notes, detected_by + `SELECT contradiction_id, topic, topic_pt_br, chunks, resolution_status, + notes, notes_pt_br, detected_by FROM public.contradictions WHERE contradiction_id = ANY($1::text[]) ORDER BY contradiction_id`, @@ -163,9 +176,10 @@ export async function GET( : Promise.resolve([] as ContradictionRow[]), witnessIds.length > 0 ? pgQuery( - `SELECT w.witness_id, e.canonical_name, e.entity_id, - w.credibility, w.access_to_event, w.bias_notes, - w.corroboration_refs, w.verdict + `SELECT w.witness_id, e.canonical_name, e.entity_id, w.credibility, + w.access_to_event, w.access_to_event_pt_br, + w.bias_notes, w.bias_notes_pt_br, + w.corroboration_refs, w.verdict, w.verdict_pt_br FROM public.witnesses w LEFT JOIN public.entities e ON e.entity_pk = w.person_entity_pk WHERE w.witness_id = ANY($1::text[]) @@ -175,7 +189,8 @@ export async function GET( : Promise.resolve([] as WitnessRow[]), gapIds.length > 0 ? pgQuery( - `SELECT gap_id, description, scope, suggested_next_move, status, created_by + `SELECT gap_id, description, description_pt_br, scope, + suggested_next_move, suggested_next_move_pt_br, status, created_by FROM public.gaps WHERE gap_id = ANY($1::text[]) ORDER BY gap_id`, [gapIds], ) diff --git a/web/app/bureau/page.tsx b/web/app/bureau/page.tsx index 9bbc1eb..803e95f 100644 --- a/web/app/bureau/page.tsx +++ b/web/app/bureau/page.tsx @@ -22,7 +22,9 @@ interface EvidenceRow { interface HypothesisRow { hypothesis_id: string; question: string; + question_pt_br: string | null; position: string; + position_pt_br: string | null; prior: number | string | null; posterior: number | string | null; confidence_band: string | null; @@ -33,15 +35,18 @@ interface HypothesisRow { interface ContradictionRow { contradiction_id: string; topic: string; + topic_pt_br: string | null; resolution_status: string; chunks: unknown; } interface GapRow { gap_id: string; description: string; + description_pt_br: string | null; scope: unknown; status: string; suggested_next_move: string | null; + suggested_next_move_pt_br: string | null; } interface WitnessRow { witness_id: string; @@ -49,6 +54,7 @@ interface WitnessRow { entity_id: string | null; credibility: string | null; verdict: string | null; + verdict_pt_br: string | null; } interface JobRow { job_id: string; @@ -77,8 +83,8 @@ export default async function BureauPage() { // All artefacts. Server component — single round per query, no n+1. const [hyp, ev, ctr, gap, wit, jobs] = await Promise.all([ pgQuery( - `SELECT hypothesis_id, question, position, prior, posterior, confidence_band, - status, reviewed_by, created_at + `SELECT hypothesis_id, question, question_pt_br, position, position_pt_br, + prior, posterior, confidence_band, status, reviewed_by, created_at FROM public.hypotheses ORDER BY created_at DESC LIMIT 100`, ).catch(() => []), pgQuery( @@ -86,15 +92,17 @@ export default async function BureauPage() { FROM public.evidence ORDER BY created_at DESC LIMIT 100`, ).catch(() => []), pgQuery( - `SELECT contradiction_id, topic, resolution_status, chunks + `SELECT contradiction_id, topic, topic_pt_br, resolution_status, chunks FROM public.contradictions ORDER BY created_at DESC LIMIT 100`, ).catch(() => []), pgQuery( - `SELECT gap_id, description, scope, status, suggested_next_move + `SELECT gap_id, description, description_pt_br, scope, status, + suggested_next_move, suggested_next_move_pt_br FROM public.gaps ORDER BY created_at DESC LIMIT 100`, ).catch(() => []), pgQuery( - `SELECT w.witness_id, e.canonical_name, e.entity_id, w.credibility, w.verdict + `SELECT w.witness_id, e.canonical_name, e.entity_id, w.credibility, + w.verdict, w.verdict_pt_br FROM public.witnesses w LEFT JOIN public.entities e ON e.entity_pk = w.person_entity_pk ORDER BY w.created_at DESC LIMIT 100`, @@ -183,7 +191,7 @@ export default async function BureauPage() { )} -
{h.position}
+
{h.position_pt_br || h.position}
prior {prior?.toFixed(2) ?? "—"} → posterior {post?.toFixed(2) ?? "—"} {delta !== null && 0 ? " text-[#06d6a0]" : delta < 0 ? " text-[#ff6ec7]" : ""}> · Δ {delta >= 0 ? "+" : ""}{delta.toFixed(3)}} @@ -221,7 +229,7 @@ export default async function BureauPage() { {c.contradiction_id} {n} positions · {c.resolution_status}
-
{c.topic}
+
{c.topic_pt_br || c.topic}
); })} @@ -231,8 +239,10 @@ export default async function BureauPage() {
{gap.length === 0 ? : gap.map((g) => { const s = (g.scope ?? {}) as Record; - const title = (s.title as string) || g.description; + const title = (s.title_pt_br as string) || (s.title as string) || g.description_pt_br || g.description; + const why = (s.why_surprising_pt_br as string) || (s.why_surprising as string) || null; const isOutlier = s.kind === "outlier"; + const nextMove = g.suggested_next_move_pt_br || g.suggested_next_move; return (
@@ -240,11 +250,11 @@ export default async function BureauPage() { {g.status}
{title}
- {s.why_surprising !== undefined && ( -
{String(s.why_surprising)}
+ {why && ( +
{why}
)} - {g.suggested_next_move && ( -
→ {g.suggested_next_move}
+ {nextMove && ( +
→ {nextMove}
)}
); @@ -262,7 +272,7 @@ export default async function BureauPage() {
{w.entity_id ? {w.canonical_name ?? w.entity_id} : (w.canonical_name ?? "—")}
- {w.verdict &&
{w.verdict}
} + {(w.verdict_pt_br || w.verdict) &&
{w.verdict_pt_br || w.verdict}
} ))}
diff --git a/web/app/h/[hypothesisId]/page.tsx b/web/app/h/[hypothesisId]/page.tsx index 5b74f8c..48dc2d5 100644 --- a/web/app/h/[hypothesisId]/page.tsx +++ b/web/app/h/[hypothesisId]/page.tsx @@ -25,9 +25,13 @@ export const dynamic = "force-dynamic"; interface HypothesisRow { hypothesis_id: string; question: string; + question_pt_br: string | null; position: string; + position_pt_br: string | null; argument_for: string | null; + argument_for_pt_br: string | null; argument_against: string | null; + argument_against_pt_br: string | null; prior: number | string | null; posterior: number | string | null; confidence_band: string | null; @@ -146,7 +150,8 @@ export default async function HypothesisPage({ if (!/^H-\d{4}$/.test(hypothesisId)) notFound(); const rows = await pgQuery( - `SELECT hypothesis_id, question, position, argument_for, argument_against, + `SELECT hypothesis_id, question, question_pt_br, position, position_pt_br, + argument_for, argument_for_pt_br, argument_against, argument_against_pt_br, prior, posterior, confidence_band, status, evidence_refs, created_by, reviewed_by, created_at, updated_at FROM public.hypotheses WHERE hypothesis_id = $1`, @@ -209,8 +214,11 @@ export default async function HypothesisPage({ {hypothesisId} · created by {h.created_by} {h.reviewed_by && <> · reviewed by {h.reviewed_by}} -

{h.position}

-

Question: {h.question}

+

{h.position_pt_br || h.position}

+ {h.position_pt_br && h.position_pt_br !== h.position && ( +

{h.position}

+ )} +

Pergunta: {h.question_pt_br || h.question}

{h.confidence_band && ( @@ -238,8 +246,8 @@ export default async function HypothesisPage({ {/* Argument grid */}
- - + +
{/* Evidence chain */} @@ -273,13 +281,20 @@ export default async function HypothesisPage({ ); } -function ArgumentPanel({ kind, body }: { kind: "for" | "against"; body: string | null }) { +function ArgumentPanel({ kind, body, bodyEn }: { kind: "for" | "against"; body: string | null; bodyEn?: string | null }) { const tone = kind === "for" ? "border-[#06d6a0] text-[#06d6a0]" : "border-[#ff6ec7] text-[#ff6ec7]"; - const label = kind === "for" ? "Argumento a favor" : "Argumento contra"; + const label = kind === "for" ? "Argumento a favor (PT-BR)" : "Argumento contra (PT-BR)"; + const enLabel = kind === "for" ? "Argument for (EN)" : "Argument against (EN)"; return (
{label}
+ {bodyEn && ( +
+ {enLabel} +
+
+ )}
); } diff --git a/web/components/bureau-snapshot.tsx b/web/components/bureau-snapshot.tsx index 6495cbf..6f1a536 100644 --- a/web/components/bureau-snapshot.tsx +++ b/web/components/bureau-snapshot.tsx @@ -12,10 +12,10 @@ import { QuickLaunch } from "./quick-launch"; interface CountRow { c: string } interface RecentEvidence { evidence_id: string; grade: string; verbatim_excerpt: string; source_page_id: string; confidence_band: string | null } -interface RecentHypothesis { hypothesis_id: string; position: string; posterior: number | string | null; confidence_band: string | null; reviewed_by: string | null } -interface RecentContradiction { contradiction_id: string; topic: string; resolution_status: string } -interface RecentGap { gap_id: string; description: string; scope: unknown } -interface RecentWitness { witness_id: string; canonical_name: string | null; credibility: string | null; verdict: string | null } +interface RecentHypothesis { hypothesis_id: string; position: string; position_pt_br: string | null; posterior: number | string | null; confidence_band: string | null; reviewed_by: string | null } +interface RecentContradiction { contradiction_id: string; topic: string; topic_pt_br: string | null; resolution_status: string } +interface RecentGap { gap_id: string; description: string; description_pt_br: string | null; scope: unknown } +interface RecentWitness { witness_id: string; canonical_name: string | null; credibility: string | null; verdict: string | null; verdict_pt_br: string | null } interface RecentJob { job_id: string; kind: string; status: string; created_at: string; payload: Record | null } const DETECTIVES = [ @@ -60,7 +60,7 @@ async function loadSnapshot() { const [hyp, ev, ctr, gap, wit, jobs] = await Promise.all([ pgQuery( - `SELECT hypothesis_id, position, posterior, confidence_band, reviewed_by + `SELECT hypothesis_id, position, position_pt_br, posterior, confidence_band, reviewed_by FROM public.hypotheses ORDER BY created_at DESC LIMIT 4`, ).catch(() => []), pgQuery( @@ -68,14 +68,15 @@ async function loadSnapshot() { FROM public.evidence ORDER BY created_at DESC LIMIT 3`, ).catch(() => []), pgQuery( - `SELECT contradiction_id, topic, resolution_status + `SELECT contradiction_id, topic, topic_pt_br, resolution_status FROM public.contradictions ORDER BY created_at DESC LIMIT 3`, ).catch(() => []), pgQuery( - `SELECT gap_id, description, scope FROM public.gaps ORDER BY created_at DESC LIMIT 3`, + `SELECT gap_id, description, description_pt_br, scope + FROM public.gaps ORDER BY created_at DESC LIMIT 3`, ).catch(() => []), pgQuery( - `SELECT w.witness_id, e.canonical_name, w.credibility, w.verdict + `SELECT w.witness_id, e.canonical_name, w.credibility, w.verdict, w.verdict_pt_br FROM public.witnesses w LEFT JOIN public.entities e ON e.entity_pk = w.person_entity_pk ORDER BY w.created_at DESC LIMIT 3`, @@ -191,7 +192,7 @@ export async function BureauSnapshot() { {h.confidence_band ?? "—"}{post !== null && ` · ${post.toFixed(2)}`}
-
{h.position}
+
{h.position_pt_br || h.position}
{h.reviewed_by && (
↳ reviewed by {h.reviewed_by}
)} @@ -213,7 +214,7 @@ export async function BureauSnapshot() { {c.contradiction_id} {c.resolution_status} -
{c.topic}
+
{c.topic_pt_br || c.topic}
))} @@ -250,7 +251,8 @@ export async function BureauSnapshot() { > {gap.map((g) => { const s = (g.scope ?? {}) as Record; - const title = (s.title as string) || g.description; + const title = (s.title_pt_br as string) || (s.title as string) + || g.description_pt_br || g.description; return (
@@ -279,7 +281,7 @@ export async function BureauSnapshot() { {w.credibility ?? "—"}
{w.canonical_name ?? "—"}
- {w.verdict &&
{w.verdict.slice(0, 200)}
} + {(w.verdict_pt_br || w.verdict) &&
{(w.verdict_pt_br || w.verdict || "").slice(0, 200)}
}
))} diff --git a/web/components/doc-bureau-panel.tsx b/web/components/doc-bureau-panel.tsx index 675f9cf..3425c90 100644 --- a/web/components/doc-bureau-panel.tsx +++ b/web/components/doc-bureau-panel.tsx @@ -17,9 +17,9 @@ import Link from "next/link"; import { pgQuery } from "@/lib/retrieval/db"; interface EvRow { evidence_id: string; grade: string; confidence_band: string | null; source_page_id: string } -interface HypRow { hypothesis_id: string; position: string; confidence_band: string | null; posterior: number | string | null } -interface CtrRow { contradiction_id: string; topic: string; resolution_status: string } -interface GapRow { gap_id: string; description: string; scope: unknown; status: string } +interface HypRow { hypothesis_id: string; position: string; position_pt_br: string | null; confidence_band: string | null; posterior: number | string | null } +interface CtrRow { contradiction_id: string; topic: string; topic_pt_br: string | null; resolution_status: string } +interface GapRow { gap_id: string; description: string; description_pt_br: string | null; scope: unknown; status: string } interface ReportRow { slug: string; topic: string } const BAND_TONE: Record = { @@ -47,7 +47,7 @@ export async function DocBureauPanel({ docId }: { docId: string }) { const evIds = ev.map((e) => e.evidence_id); const hyp: HypRow[] = evIds.length > 0 ? await pgQuery( - `SELECT hypothesis_id, position, confidence_band, posterior + `SELECT hypothesis_id, position, position_pt_br, confidence_band, posterior FROM public.hypotheses WHERE EXISTS ( SELECT 1 FROM jsonb_array_elements(evidence_refs) er @@ -60,7 +60,7 @@ export async function DocBureauPanel({ docId }: { docId: string }) { // Contradictions whose chunks[] has any chunk with this doc_id. const ctr: CtrRow[] = await pgQuery( - `SELECT contradiction_id, topic, resolution_status + `SELECT contradiction_id, topic, topic_pt_br, resolution_status FROM public.contradictions WHERE EXISTS ( SELECT 1 FROM jsonb_array_elements(chunks) c @@ -72,7 +72,7 @@ export async function DocBureauPanel({ docId }: { docId: string }) { // Outliers (gaps with scope.doc_id matching). const gap: GapRow[] = await pgQuery( - `SELECT gap_id, description, scope, status + `SELECT gap_id, description, description_pt_br, scope, status FROM public.gaps WHERE scope->>'doc_id' = $1 ORDER BY gap_id LIMIT 8`, @@ -147,7 +147,7 @@ export async function DocBureauPanel({ docId }: { docId: string }) { {h.confidence_band ?? "—"}{post !== null && ` · ${post.toFixed(2)}`} -
{h.position}
+
{h.position_pt_br || h.position}
); })} @@ -181,7 +181,7 @@ export async function DocBureauPanel({ docId }: { docId: string }) { {c.contradiction_id} {c.resolution_status} -
{c.topic}
+
{c.topic_pt_br || c.topic}
))} @@ -191,7 +191,8 @@ export async function DocBureauPanel({ docId }: { docId: string }) { {gap.map((g) => { const s = (g.scope ?? {}) as Record; - const title = (s.title as string) || g.description; + const title = (s.title_pt_br as string) || (s.title as string) + || g.description_pt_br || g.description; return (
diff --git a/web/components/job-status-poller.tsx b/web/components/job-status-poller.tsx index 4ddc138..0c14bd5 100644 --- a/web/components/job-status-poller.tsx +++ b/web/components/job-status-poller.tsx @@ -52,9 +52,13 @@ interface EvidenceItem { interface HypothesisItem { hypothesis_id: string; question: string | null; + question_pt_br?: string | null; position: string | null; + position_pt_br?: string | null; argument_for: string | null; + argument_for_pt_br?: string | null; argument_against: string | null; + argument_against_pt_br?: string | null; prior: number | string | null; posterior: number | string | null; confidence_band: string | null; @@ -66,15 +70,18 @@ interface ContradictionPositionItem { chunk_id: string; page: number; statement: string; + statement_pt_br?: string | null; stance?: string | null; } interface ContradictionItem { contradiction_id: string; topic: string; + topic_pt_br?: string | null; chunks: ContradictionPositionItem[]; resolution_status: string | null; notes: string | null; + notes_pt_br?: string | null; detected_by: string | null; } @@ -92,9 +99,12 @@ interface WitnessItem { entity_id: string | null; credibility: string | null; access_to_event: string | null; + access_to_event_pt_br?: string | null; bias_notes: string | null; + bias_notes_pt_br?: string | null; corroboration_refs: WitnessCorrItem[]; verdict: string | null; + verdict_pt_br?: string | null; } interface CaseReportOutput { @@ -108,17 +118,23 @@ interface CaseReportOutput { interface GapItem { gap_id: string; description: string; + description_pt_br?: string | null; scope: { kind?: string; title?: string; + title_pt_br?: string; doc_id?: string; chunk_id?: string; page?: number; dominant_model?: string; + dominant_model_pt_br?: string; why_surprising?: string; + why_surprising_pt_br?: string; what_it_implies?: string; + what_it_implies_pt_br?: string; } | null; suggested_next_move: string | null; + suggested_next_move_pt_br?: string | null; status: string; created_by: string; } @@ -416,9 +432,14 @@ function HypothesisCard({ h }: { h: HypothesisItem }) { )}
-
- {h.position} +
+ {h.position_pt_br || h.position}
+ {h.position_pt_br && h.position && h.position_pt_br !== h.position && ( +
+ {h.position} +
+ )} {(prior !== null || posterior !== null) && (
@@ -435,16 +456,28 @@ function HypothesisCard({ h }: { h: HypothesisItem }) {
)} - {h.argument_for && ( + {(h.argument_for_pt_br || h.argument_for) && (
-
Argumento a favor
- +
Argumento a favor (PT-BR)
+ + {h.argument_for && h.argument_for_pt_br && ( +
+ Argument for (EN) +
+
+ )}
)} - {h.argument_against && ( + {(h.argument_against_pt_br || h.argument_against) && (
-
Argumento contra
- +
Argumento contra (PT-BR)
+ + {h.argument_against && h.argument_against_pt_br && ( +
+ Argument against (EN) +
+
+ )}
)}
@@ -540,7 +573,7 @@ function GapCard({ g }: { g: GapItem }) {
- {s.title || g.description} + {s.title_pt_br || s.title || g.description_pt_br || g.description}
{s.doc_id && s.chunk_id && pageStr && (
@@ -553,27 +586,27 @@ function GapCard({ g }: { g: GapItem }) {
)} - {s.dominant_model && ( + {(s.dominant_model_pt_br || s.dominant_model) && (
-
Dominant model
-
{s.dominant_model}
+
Modelo dominante
+
{s.dominant_model_pt_br || s.dominant_model}
)} - {s.why_surprising && ( + {(s.why_surprising_pt_br || s.why_surprising) && (
-
Why surprising
-
{s.why_surprising}
+
Por que é surpreendente
+
{s.why_surprising_pt_br || s.why_surprising}
)} - {s.what_it_implies && ( + {(s.what_it_implies_pt_br || s.what_it_implies) && (
-
What it implies
-
{s.what_it_implies}
+
O que implica
+
{s.what_it_implies_pt_br || s.what_it_implies}
)} - {g.suggested_next_move && ( + {(g.suggested_next_move_pt_br || g.suggested_next_move) && (
- → {g.suggested_next_move} + → {g.suggested_next_move_pt_br || g.suggested_next_move}
)} @@ -607,23 +640,30 @@ function WitnessCard({ w }: { w: WitnessItem }) { )} - {w.verdict && ( -
- {w.verdict} -
+ {(w.verdict_pt_br || w.verdict) && ( + <> +
+ {w.verdict_pt_br || w.verdict} +
+ {w.verdict_pt_br && w.verdict && w.verdict_pt_br !== w.verdict && ( +
+ {w.verdict} +
+ )} + )}
- {w.access_to_event && ( + {(w.access_to_event_pt_br || w.access_to_event) && (
-
Access to event
-
{w.access_to_event}
+
Acesso ao evento (PT-BR)
+
{w.access_to_event_pt_br || w.access_to_event}
)} - {w.bias_notes && ( + {(w.bias_notes_pt_br || w.bias_notes) && (
-
Bias notes
-
{w.bias_notes}
+
Notas de viés (PT-BR)
+
{w.bias_notes_pt_br || w.bias_notes}
)}
@@ -672,21 +712,30 @@ function ContradictionCard({ c }: { c: ContradictionItem }) { )} -
- {c.topic} +
+ {c.topic_pt_br || c.topic}
+ {c.topic_pt_br && c.topic_pt_br !== c.topic && ( +
{c.topic}
+ )}
{c.chunks.map((p, i) => { const pageStr = String(p.page).padStart(3, "0"); + const stmt = p.statement_pt_br || p.statement; return (
Position {i + 1}{p.stance ? ` — ${p.stance}` : ""}
-
- “{p.statement}” +
+ “{stmt}”
+ {p.statement_pt_br && p.statement_pt_br !== p.statement && p.statement && ( +
+ “{p.statement}” +
+ )} - {c.notes && ( + {(c.notes_pt_br || c.notes) && (
-
Notes
-
{c.notes}
+
Notas
+
{c.notes_pt_br || c.notes}
)}
diff --git a/web/lib/i18n/pick.ts b/web/lib/i18n/pick.ts new file mode 100644 index 0000000..45a31a5 --- /dev/null +++ b/web/lib/i18n/pick.ts @@ -0,0 +1,36 @@ +/** + * pickLang — read the locale-preferred field with EN fallback. + * + * The bureau stores every narrative as a pair: `field` (EN) and + * `field_pt_br` (Brazilian Portuguese). UI components call this helper to + * surface the correct one based on the request locale. + * + * - locale "pt-br" or "pt" → prefer PT-BR, fall back to EN + * - locale "en" → prefer EN, fall back to PT-BR + * + * Empty / whitespace-only strings are treated as missing so a partial row + * still surfaces the language that has content. + */ + +export type Locale = "pt-br" | "pt" | "en" | string | null | undefined; + +function isPt(locale: Locale): boolean { + return locale === "pt-br" || locale === "pt"; +} + +function nonEmpty(s: string | null | undefined): string | null { + if (typeof s !== "string") return null; + const t = s.trim(); + return t.length > 0 ? s : null; +} + +export function pickLang( + en: string | null | undefined, + pt_br: string | null | undefined, + locale: Locale, +): string | null { + const enValid = nonEmpty(en); + const ptValid = nonEmpty(pt_br); + if (isPt(locale)) return ptValid ?? enValid; + return enValid ?? ptValid; +}