disclosure-bureau/scripts/maintain/55_relations_schema.sql
Luiz Gustavo a7e9dce6d2 rebuild entity layer from Sonnet-vision reextract pipeline
Add reextract pipeline (scripts/reextract/) that rebuilds doc-level entity
JSON from Sonnet-vision chunks via Opus, replacing the noisy per-page
extraction. Add synthesize scripts to regenerate wiki/entities from the 116
_reextract.json (30), aggregate missing page.md from chunks (31), and reprocess
805 pages the doc-rebuilder agent dropped on context overflow (32). Add
maintain scripts 43-56 for chunk-page sync, dedup, generic-entity marking, and
typed relation extraction.

Web: wire relations API + entity-relations component; entity/timeline/doc
pages consume the rebuilt layer.

Note: raw/, processing/, wiki/ remain gitignored (bulk data managed
separately); the 116 reextract JSONs and 7,798 rebuilt entity files live on
disk only. The 27 curated anchor events under wiki/entities/events/ are
preserved.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-21 12:20:24 -03:00

50 lines
2.2 KiB
PL/PgSQL

-- 55_relations_schema.sql
-- Typed relations between entities. Replaces noisy co-mention with semantic
-- edges like (Person, witnessed, Event), (Event, occurred_at, Location),
-- (Person, signed, Document), etc.
BEGIN;
CREATE TABLE IF NOT EXISTS public.relations (
relation_pk BIGSERIAL PRIMARY KEY,
source_class TEXT NOT NULL,
source_id TEXT NOT NULL,
relation_type TEXT NOT NULL,
target_class TEXT NOT NULL,
target_id TEXT NOT NULL,
evidence_ref TEXT, -- e.g. '[[doc-id/p007]]' or chunk_id
confidence TEXT NOT NULL DEFAULT 'medium', -- high|medium|low
extracted_by TEXT NOT NULL DEFAULT 'yaml', -- yaml|regex|llm|manual
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
UNIQUE (source_class, source_id, relation_type, target_class, target_id, evidence_ref)
);
-- Enum check on relation_type (extensible — add new values as we discover them)
ALTER TABLE public.relations
ADD CONSTRAINT relations_type_check
CHECK (relation_type IN (
'witnessed', -- (person, witnessed, event)
'occurred_at', -- (event, occurred_at, location)
'involves_uap', -- (event, involves_uap, uap_object)
'documented_in', -- (event, documented_in, document)
'authored', -- (person, authored, document)
'signed', -- (person, signed, document)
'mentioned_by', -- (person, mentioned_by, document)
'employed_by', -- (person, employed_by, organization)
'operated_by', -- (operation, operated_by, organization)
'investigated', -- (person, investigated, event)
'commanded', -- (person, commanded, organization)
'related_to', -- generic fallback (lower-quality)
'similar_to', -- (event, similar_to, event)
'precedes', -- (event, precedes, event)
'follows' -- (event, follows, event)
));
CREATE INDEX IF NOT EXISTS relations_source_idx
ON public.relations (source_class, source_id);
CREATE INDEX IF NOT EXISTS relations_target_idx
ON public.relations (target_class, target_id);
CREATE INDEX IF NOT EXISTS relations_type_idx
ON public.relations (relation_type);
COMMIT;