111 lines
3.2 KiB
TypeScript
111 lines
3.2 KiB
TypeScript
/**
|
|
* Live entity data queries — replaces stale Haiku-era frontmatter `mentioned_in[]`
|
|
* with real counts from `public.entity_mentions` + `public.chunks`.
|
|
*/
|
|
import { pgQuery } from "./db";
|
|
import { findEntity } from "./graph";
|
|
|
|
export interface EntityCore {
|
|
entity_pk: number;
|
|
entity_class: string;
|
|
entity_id: string;
|
|
canonical_name: string;
|
|
aliases: string[] | null;
|
|
total_mentions: number;
|
|
documents_count: number;
|
|
enrichment_status: string | null;
|
|
}
|
|
|
|
export interface EntityMentionGroup {
|
|
doc_id: string;
|
|
canonical_title: string | null;
|
|
collection: string | null;
|
|
page_count: number | null;
|
|
classification: string | null;
|
|
mention_count: number;
|
|
pages: number[];
|
|
}
|
|
|
|
export async function getEntityCore(
|
|
entityClass: string,
|
|
entityId: string,
|
|
): Promise<EntityCore | null> {
|
|
const rows = await pgQuery<EntityCore>(
|
|
`SELECT
|
|
e.entity_pk, e.entity_class, e.entity_id, e.canonical_name, e.aliases,
|
|
COALESCE(em.mention_count, 0) AS total_mentions,
|
|
COALESCE(em.doc_count, 0) AS documents_count,
|
|
e.enrichment_status
|
|
FROM public.entities e
|
|
LEFT JOIN (
|
|
SELECT em.entity_pk,
|
|
COUNT(*)::INT AS mention_count,
|
|
COUNT(DISTINCT c.doc_id)::INT AS doc_count
|
|
FROM public.entity_mentions em
|
|
JOIN public.chunks c ON c.chunk_pk = em.chunk_pk
|
|
GROUP BY em.entity_pk
|
|
) em ON em.entity_pk = e.entity_pk
|
|
WHERE e.entity_class = $1 AND e.entity_id = $2
|
|
LIMIT 1`,
|
|
[entityClass, entityId],
|
|
);
|
|
return rows[0] ?? null;
|
|
}
|
|
|
|
/** Group mentions per document so the sidebar can list "appears in N docs". */
|
|
export async function getEntityMentionsByDoc(
|
|
entityPk: number,
|
|
limit: number = 50,
|
|
): Promise<EntityMentionGroup[]> {
|
|
return pgQuery<EntityMentionGroup>(
|
|
`SELECT
|
|
c.doc_id,
|
|
d.canonical_title,
|
|
d.collection,
|
|
d.page_count,
|
|
d.classification,
|
|
COUNT(*)::INT AS mention_count,
|
|
array_agg(DISTINCT c.page ORDER BY c.page) AS pages
|
|
FROM public.entity_mentions em
|
|
JOIN public.chunks c ON c.chunk_pk = em.chunk_pk
|
|
LEFT JOIN public.documents d ON d.doc_id = c.doc_id
|
|
WHERE em.entity_pk = $1
|
|
GROUP BY c.doc_id, d.canonical_title, d.collection, d.page_count, d.classification
|
|
ORDER BY mention_count DESC
|
|
LIMIT $2`,
|
|
[entityPk, limit],
|
|
);
|
|
}
|
|
|
|
export interface EntityChunkPreview {
|
|
chunk_pk: number;
|
|
doc_id: string;
|
|
chunk_id: string;
|
|
page: number;
|
|
type: string;
|
|
bbox: { x: number; y: number; w: number; h: number } | null;
|
|
classification: string | null;
|
|
content_pt: string | null;
|
|
content_en: string | null;
|
|
ufo_anomaly: boolean | null;
|
|
ufo_anomaly_type: string | null;
|
|
}
|
|
|
|
export async function getEntityChunks(
|
|
entityPk: number,
|
|
limit: number = 30,
|
|
): Promise<EntityChunkPreview[]> {
|
|
return pgQuery<EntityChunkPreview>(
|
|
`SELECT
|
|
c.chunk_pk, c.doc_id, c.chunk_id, c.page, c.type, c.bbox, c.classification,
|
|
c.content_pt, c.content_en, c.ufo_anomaly, c.ufo_anomaly_type
|
|
FROM public.entity_mentions em
|
|
JOIN public.chunks c ON c.chunk_pk = em.chunk_pk
|
|
WHERE em.entity_pk = $1
|
|
ORDER BY c.ufo_anomaly DESC NULLS LAST, c.doc_id, c.order_global
|
|
LIMIT $2`,
|
|
[entityPk, limit],
|
|
);
|
|
}
|
|
|
|
export { findEntity };
|