disclosure-bureau/web/lib/fm-types.ts

266 lines
7.2 KiB
TypeScript
Raw Normal View History

/**
* Frontmatter type contracts derived from CLAUDE-schema-full.md.
*
* Used by the UI primitives in `components/fm/*` to render each field with
* the correct semantic UI (chip / link / badge / bbox / etc.).
*
* NOTE: every field is optional because the schema is permissive earlier
* extraction passes left gaps that later phases fill. The UI must be robust
* to missing data.
*/
export type ConfidenceBand = "high" | "medium" | "low" | "speculation";
export type EnrichmentStatus = "deep" | "shallow" | "none";
export type ClassificationLevel = "UNCLASSIFIED" | "CUI" | "CONFIDENTIAL" | "SECRET" | "TOP SECRET";
export type ContentClass =
| "text-only" | "contains-photos" | "contains-sketches" | "contains-diagrams"
| "contains-maps" | "contains-tables" | "contains-signatures" | "contains-stamps"
| "redaction-heavy" | "mixed" | "blank";
export interface BBox { x: number; y: number; w: number; h: number }
export interface Coords { lat?: number | null; lon?: number | null; raw_text?: string; confidence_band?: ConfidenceBand }
export interface ClassificationMarking {
level?: ClassificationLevel;
caveats?: string[];
location?: "header" | "footer" | "banner" | "stamp";
bbox?: BBox;
}
export interface Redaction {
code?: "(b)(1) 1.4(a)" | "(b)(3)" | "(b)(6)" | "other" | string;
description?: string;
bbox?: BBox;
text_inferred?: string | null;
}
export interface SignatureObserved {
signer_inferred?: string | null;
confidence_band?: ConfidenceBand;
bbox?: BBox;
notes?: string;
}
export interface TableDetected {
local_table_index?: number;
bbox?: BBox;
spans_multi_page?: boolean;
continues_from_prev_page?: boolean;
likely_continues_next_page?: boolean;
row_count_estimate?: number;
col_count_estimate?: number;
headers_summary?: string;
table_id?: string; // populated by Phase 4.8 consolidate-tables
}
export interface ImageDetected {
local_image_index?: number;
image_type?: "photo" | "sketch" | "map" | "chart" | "stamp" | "signature" | "redaction" | "logo" | "seal" | "diagram" | "other";
bbox?: BBox;
caption_ocr?: string;
}
export interface ExternalSource {
url?: string;
title?: string;
publisher?: string;
accessed_at?: string;
key_facts?: string[];
reliability_band?: ConfidenceBand;
}
export interface EntityRef {
name?: string;
role_in_page?: "subject" | "witness" | "author" | "signer" | "mentioned";
aliases?: string[];
type?: string;
class?: string;
shape?: string;
color?: string;
size_estimate?: string;
label?: string;
date?: string;
}
export interface EntitiesExtracted {
people?: EntityRef[];
organizations?: EntityRef[];
locations?: EntityRef[];
events?: EntityRef[];
uap_objects?: EntityRef[];
vehicles?: EntityRef[];
operations?: EntityRef[];
concepts?: EntityRef[];
}
export interface UapObservation {
date_time_utc?: string;
duration_seconds?: number | null;
shape?: string;
color?: string;
size_estimate?: string;
altitude_ft?: number | null;
speed_kts?: number | null;
bearing_deg?: number | null;
distance_nm?: number | null;
coordinates?: Coords;
}
export interface MentionedIn {
page?: string; // wiki-link string
page_ref?: string; // alternative naming
mention_count?: number;
role_in_page?: string;
}
/** Universal frontmatter shape covers document/page/entity/etc.
* Use this with `(fm as AnyFrontmatter).field` for safe access. */
export interface AnyFrontmatter {
schema_version?: string;
type?: string;
wiki_version?: string;
last_ingest?: string;
last_lint?: string;
last_enriched?: string;
// identity
doc_id?: string;
page_id?: string;
page_number?: number;
total_pages?: number;
person_id?: string;
organization_id?: string;
location_id?: string;
event_id?: string;
uap_object_id?: string;
vehicle_id?: string;
operation_id?: string;
concept_id?: string;
table_id?: string;
image_id?: string;
entity_class?: string;
canonical_name?: string;
canonical_title?: string;
aliases?: string[];
// classification
highest_classification?: ClassificationLevel;
classification_markings?: ClassificationMarking[];
language_detected?: string;
languages_detected?: string[];
content_classification?: ContentClass[];
page_type?: string;
document_class?: string;
collection?: string;
redactions?: Redaction[];
signatures_observed?: SignatureObserved[];
// page-level extraction
tables_detected?: TableDetected[];
images_detected?: ImageDetected[];
entities_extracted?: EntitiesExtracted;
uap_observation_fields?: UapObservation | null;
vision_description?: string;
vision_description_pt_br?: string;
ocr_quality_score?: number;
vision_quality_score?: number;
flags?: string[];
// document-level
page_count?: number;
total_redactions?: number;
total_signatures?: number;
total_tables?: number;
total_images?: number;
key_entities?: EntitiesExtracted;
// entity-level relational
mentioned_in?: MentionedIn[];
total_mentions?: number;
documents_count?: number;
enrichment_status?: EnrichmentStatus;
external_sources?: ExternalSource[];
disambiguation_note?: string;
// location-specific
location_type?: string;
country?: string | string[];
region?: string;
parent_location?: string;
coordinates?: Coords;
events_here?: string[];
// event-specific
event_class?: string;
date_start?: string;
date_end?: string;
date_confidence?: ConfidenceBand;
primary_location?: string | null;
observers?: string[];
uap_objects?: string[];
documented_in?: string[];
narrative_summary?: string;
narrative_summary_pt_br?: string;
// uap-object-specific
observed_in_event?: string;
secondary_events?: string[];
shape?: string;
color?: string;
size_estimate_m?: { min?: number | null; max?: number | null; confidence_band?: ConfidenceBand };
features?: string[];
altitude_ft?: { min?: number | null; max?: number | null; confidence_band?: ConfidenceBand };
speed_kts?: { min?: number | null; max?: number | null; confidence_band?: ConfidenceBand };
maneuver_descriptors?: string[];
sensor_observations?: string[];
visual_records?: string[];
confidence_band_overall?: ConfidenceBand;
// person-specific
roles?: string[];
dates?: { born?: string | null; died?: string | null };
primary_role?: string;
primary_organization?: string;
// organization-specific
organization_type?: string;
founded?: string;
// vehicle-specific
vehicle_class?: string;
operator?: string;
model?: string;
// operation-specific
operation_type?: string;
status?: string;
// concept-specific
concept_class?: string;
domain?: string;
definition_short?: string;
definition_short_pt_br?: string;
// table/image specific
source_doc?: string;
multi_page?: boolean;
spans_pages?: Array<{ page?: string; bbox?: BBox; role?: "start" | "middle" | "end" }>;
headers_summary?: string;
total_rows_estimate?: number;
total_columns_estimate?: number;
extraction_quality?: number | null;
csv_path?: string;
headers?: string[];
row_count_extracted?: number;
column_count_extracted?: number;
extraction_notes?: string;
extraction_model?: string;
extracted_at?: string;
// war.gov enrichment (from 02b script)
war_gov?: Record<string, unknown>;
// catch-all
[k: string]: unknown;
}