Add reextract pipeline (scripts/reextract/) that rebuilds doc-level entity JSON from Sonnet-vision chunks via Opus, replacing the noisy per-page extraction. Add synthesize scripts to regenerate wiki/entities from the 116 _reextract.json (30), aggregate missing page.md from chunks (31), and reprocess 805 pages the doc-rebuilder agent dropped on context overflow (32). Add maintain scripts 43-56 for chunk-page sync, dedup, generic-entity marking, and typed relation extraction. Web: wire relations API + entity-relations component; entity/timeline/doc pages consume the rebuilt layer. Note: raw/, processing/, wiki/ remain gitignored (bulk data managed separately); the 116 reextract JSONs and 7,798 rebuilt entity files live on disk only. The 27 curated anchor events under wiki/entities/events/ are preserved. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
50 lines
2.2 KiB
PL/PgSQL
50 lines
2.2 KiB
PL/PgSQL
-- 55_relations_schema.sql
|
|
-- Typed relations between entities. Replaces noisy co-mention with semantic
|
|
-- edges like (Person, witnessed, Event), (Event, occurred_at, Location),
|
|
-- (Person, signed, Document), etc.
|
|
|
|
BEGIN;
|
|
|
|
CREATE TABLE IF NOT EXISTS public.relations (
|
|
relation_pk BIGSERIAL PRIMARY KEY,
|
|
source_class TEXT NOT NULL,
|
|
source_id TEXT NOT NULL,
|
|
relation_type TEXT NOT NULL,
|
|
target_class TEXT NOT NULL,
|
|
target_id TEXT NOT NULL,
|
|
evidence_ref TEXT, -- e.g. '[[doc-id/p007]]' or chunk_id
|
|
confidence TEXT NOT NULL DEFAULT 'medium', -- high|medium|low
|
|
extracted_by TEXT NOT NULL DEFAULT 'yaml', -- yaml|regex|llm|manual
|
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
|
UNIQUE (source_class, source_id, relation_type, target_class, target_id, evidence_ref)
|
|
);
|
|
|
|
-- Enum check on relation_type (extensible — add new values as we discover them)
|
|
ALTER TABLE public.relations
|
|
ADD CONSTRAINT relations_type_check
|
|
CHECK (relation_type IN (
|
|
'witnessed', -- (person, witnessed, event)
|
|
'occurred_at', -- (event, occurred_at, location)
|
|
'involves_uap', -- (event, involves_uap, uap_object)
|
|
'documented_in', -- (event, documented_in, document)
|
|
'authored', -- (person, authored, document)
|
|
'signed', -- (person, signed, document)
|
|
'mentioned_by', -- (person, mentioned_by, document)
|
|
'employed_by', -- (person, employed_by, organization)
|
|
'operated_by', -- (operation, operated_by, organization)
|
|
'investigated', -- (person, investigated, event)
|
|
'commanded', -- (person, commanded, organization)
|
|
'related_to', -- generic fallback (lower-quality)
|
|
'similar_to', -- (event, similar_to, event)
|
|
'precedes', -- (event, precedes, event)
|
|
'follows' -- (event, follows, event)
|
|
));
|
|
|
|
CREATE INDEX IF NOT EXISTS relations_source_idx
|
|
ON public.relations (source_class, source_id);
|
|
CREATE INDEX IF NOT EXISTS relations_target_idx
|
|
ON public.relations (target_class, target_id);
|
|
CREATE INDEX IF NOT EXISTS relations_type_idx
|
|
ON public.relations (relation_type);
|
|
|
|
COMMIT;
|