disclosure-bureau/web/lib/retrieval/entity-pages.ts

111 lines
3.2 KiB
TypeScript

/**
* Live entity data queries — replaces stale Haiku-era frontmatter `mentioned_in[]`
* with real counts from `public.entity_mentions` + `public.chunks`.
*/
import { pgQuery } from "./db";
import { findEntity } from "./graph";
export interface EntityCore {
entity_pk: number;
entity_class: string;
entity_id: string;
canonical_name: string;
aliases: string[] | null;
total_mentions: number;
documents_count: number;
enrichment_status: string | null;
}
export interface EntityMentionGroup {
doc_id: string;
canonical_title: string | null;
collection: string | null;
page_count: number | null;
classification: string | null;
mention_count: number;
pages: number[];
}
export async function getEntityCore(
entityClass: string,
entityId: string,
): Promise<EntityCore | null> {
const rows = await pgQuery<EntityCore>(
`SELECT
e.entity_pk, e.entity_class, e.entity_id, e.canonical_name, e.aliases,
COALESCE(em.mention_count, 0) AS total_mentions,
COALESCE(em.doc_count, 0) AS documents_count,
e.enrichment_status
FROM public.entities e
LEFT JOIN (
SELECT em.entity_pk,
COUNT(*)::INT AS mention_count,
COUNT(DISTINCT c.doc_id)::INT AS doc_count
FROM public.entity_mentions em
JOIN public.chunks c ON c.chunk_pk = em.chunk_pk
GROUP BY em.entity_pk
) em ON em.entity_pk = e.entity_pk
WHERE e.entity_class = $1 AND e.entity_id = $2
LIMIT 1`,
[entityClass, entityId],
);
return rows[0] ?? null;
}
/** Group mentions per document so the sidebar can list "appears in N docs". */
export async function getEntityMentionsByDoc(
entityPk: number,
limit: number = 50,
): Promise<EntityMentionGroup[]> {
return pgQuery<EntityMentionGroup>(
`SELECT
c.doc_id,
d.canonical_title,
d.collection,
d.page_count,
d.classification,
COUNT(*)::INT AS mention_count,
array_agg(DISTINCT c.page ORDER BY c.page) AS pages
FROM public.entity_mentions em
JOIN public.chunks c ON c.chunk_pk = em.chunk_pk
LEFT JOIN public.documents d ON d.doc_id = c.doc_id
WHERE em.entity_pk = $1
GROUP BY c.doc_id, d.canonical_title, d.collection, d.page_count, d.classification
ORDER BY mention_count DESC
LIMIT $2`,
[entityPk, limit],
);
}
export interface EntityChunkPreview {
chunk_pk: number;
doc_id: string;
chunk_id: string;
page: number;
type: string;
bbox: { x: number; y: number; w: number; h: number } | null;
classification: string | null;
content_pt: string | null;
content_en: string | null;
ufo_anomaly: boolean | null;
ufo_anomaly_type: string | null;
}
export async function getEntityChunks(
entityPk: number,
limit: number = 30,
): Promise<EntityChunkPreview[]> {
return pgQuery<EntityChunkPreview>(
`SELECT
c.chunk_pk, c.doc_id, c.chunk_id, c.page, c.type, c.bbox, c.classification,
c.content_pt, c.content_en, c.ufo_anomaly, c.ufo_anomaly_type
FROM public.entity_mentions em
JOIN public.chunks c ON c.chunk_pk = em.chunk_pk
WHERE em.entity_pk = $1
ORDER BY c.ufo_anomaly DESC NULLS LAST, c.doc_id, c.order_global
LIMIT $2`,
[entityPk, limit],
);
}
export { findEntity };