266 lines
7.2 KiB
TypeScript
266 lines
7.2 KiB
TypeScript
|
|
/**
|
||
|
|
* Frontmatter type contracts derived from CLAUDE-schema-full.md.
|
||
|
|
*
|
||
|
|
* Used by the UI primitives in `components/fm/*` to render each field with
|
||
|
|
* the correct semantic UI (chip / link / badge / bbox / etc.).
|
||
|
|
*
|
||
|
|
* NOTE: every field is optional because the schema is permissive — earlier
|
||
|
|
* extraction passes left gaps that later phases fill. The UI must be robust
|
||
|
|
* to missing data.
|
||
|
|
*/
|
||
|
|
|
||
|
|
export type ConfidenceBand = "high" | "medium" | "low" | "speculation";
|
||
|
|
export type EnrichmentStatus = "deep" | "shallow" | "none";
|
||
|
|
export type ClassificationLevel = "UNCLASSIFIED" | "CUI" | "CONFIDENTIAL" | "SECRET" | "TOP SECRET";
|
||
|
|
export type ContentClass =
|
||
|
|
| "text-only" | "contains-photos" | "contains-sketches" | "contains-diagrams"
|
||
|
|
| "contains-maps" | "contains-tables" | "contains-signatures" | "contains-stamps"
|
||
|
|
| "redaction-heavy" | "mixed" | "blank";
|
||
|
|
|
||
|
|
export interface BBox { x: number; y: number; w: number; h: number }
|
||
|
|
export interface Coords { lat?: number | null; lon?: number | null; raw_text?: string; confidence_band?: ConfidenceBand }
|
||
|
|
|
||
|
|
export interface ClassificationMarking {
|
||
|
|
level?: ClassificationLevel;
|
||
|
|
caveats?: string[];
|
||
|
|
location?: "header" | "footer" | "banner" | "stamp";
|
||
|
|
bbox?: BBox;
|
||
|
|
}
|
||
|
|
|
||
|
|
export interface Redaction {
|
||
|
|
code?: "(b)(1) 1.4(a)" | "(b)(3)" | "(b)(6)" | "other" | string;
|
||
|
|
description?: string;
|
||
|
|
bbox?: BBox;
|
||
|
|
text_inferred?: string | null;
|
||
|
|
}
|
||
|
|
|
||
|
|
export interface SignatureObserved {
|
||
|
|
signer_inferred?: string | null;
|
||
|
|
confidence_band?: ConfidenceBand;
|
||
|
|
bbox?: BBox;
|
||
|
|
notes?: string;
|
||
|
|
}
|
||
|
|
|
||
|
|
export interface TableDetected {
|
||
|
|
local_table_index?: number;
|
||
|
|
bbox?: BBox;
|
||
|
|
spans_multi_page?: boolean;
|
||
|
|
continues_from_prev_page?: boolean;
|
||
|
|
likely_continues_next_page?: boolean;
|
||
|
|
row_count_estimate?: number;
|
||
|
|
col_count_estimate?: number;
|
||
|
|
headers_summary?: string;
|
||
|
|
table_id?: string; // populated by Phase 4.8 consolidate-tables
|
||
|
|
}
|
||
|
|
|
||
|
|
export interface ImageDetected {
|
||
|
|
local_image_index?: number;
|
||
|
|
image_type?: "photo" | "sketch" | "map" | "chart" | "stamp" | "signature" | "redaction" | "logo" | "seal" | "diagram" | "other";
|
||
|
|
bbox?: BBox;
|
||
|
|
caption_ocr?: string;
|
||
|
|
}
|
||
|
|
|
||
|
|
export interface ExternalSource {
|
||
|
|
url?: string;
|
||
|
|
title?: string;
|
||
|
|
publisher?: string;
|
||
|
|
accessed_at?: string;
|
||
|
|
key_facts?: string[];
|
||
|
|
reliability_band?: ConfidenceBand;
|
||
|
|
}
|
||
|
|
|
||
|
|
export interface EntityRef {
|
||
|
|
name?: string;
|
||
|
|
role_in_page?: "subject" | "witness" | "author" | "signer" | "mentioned";
|
||
|
|
aliases?: string[];
|
||
|
|
type?: string;
|
||
|
|
class?: string;
|
||
|
|
shape?: string;
|
||
|
|
color?: string;
|
||
|
|
size_estimate?: string;
|
||
|
|
label?: string;
|
||
|
|
date?: string;
|
||
|
|
}
|
||
|
|
|
||
|
|
export interface EntitiesExtracted {
|
||
|
|
people?: EntityRef[];
|
||
|
|
organizations?: EntityRef[];
|
||
|
|
locations?: EntityRef[];
|
||
|
|
events?: EntityRef[];
|
||
|
|
uap_objects?: EntityRef[];
|
||
|
|
vehicles?: EntityRef[];
|
||
|
|
operations?: EntityRef[];
|
||
|
|
concepts?: EntityRef[];
|
||
|
|
}
|
||
|
|
|
||
|
|
export interface UapObservation {
|
||
|
|
date_time_utc?: string;
|
||
|
|
duration_seconds?: number | null;
|
||
|
|
shape?: string;
|
||
|
|
color?: string;
|
||
|
|
size_estimate?: string;
|
||
|
|
altitude_ft?: number | null;
|
||
|
|
speed_kts?: number | null;
|
||
|
|
bearing_deg?: number | null;
|
||
|
|
distance_nm?: number | null;
|
||
|
|
coordinates?: Coords;
|
||
|
|
}
|
||
|
|
|
||
|
|
export interface MentionedIn {
|
||
|
|
page?: string; // wiki-link string
|
||
|
|
page_ref?: string; // alternative naming
|
||
|
|
mention_count?: number;
|
||
|
|
role_in_page?: string;
|
||
|
|
}
|
||
|
|
|
||
|
|
/** Universal frontmatter shape — covers document/page/entity/etc.
|
||
|
|
* Use this with `(fm as AnyFrontmatter).field` for safe access. */
|
||
|
|
export interface AnyFrontmatter {
|
||
|
|
schema_version?: string;
|
||
|
|
type?: string;
|
||
|
|
wiki_version?: string;
|
||
|
|
last_ingest?: string;
|
||
|
|
last_lint?: string;
|
||
|
|
last_enriched?: string;
|
||
|
|
|
||
|
|
// identity
|
||
|
|
doc_id?: string;
|
||
|
|
page_id?: string;
|
||
|
|
page_number?: number;
|
||
|
|
total_pages?: number;
|
||
|
|
person_id?: string;
|
||
|
|
organization_id?: string;
|
||
|
|
location_id?: string;
|
||
|
|
event_id?: string;
|
||
|
|
uap_object_id?: string;
|
||
|
|
vehicle_id?: string;
|
||
|
|
operation_id?: string;
|
||
|
|
concept_id?: string;
|
||
|
|
table_id?: string;
|
||
|
|
image_id?: string;
|
||
|
|
entity_class?: string;
|
||
|
|
canonical_name?: string;
|
||
|
|
canonical_title?: string;
|
||
|
|
aliases?: string[];
|
||
|
|
|
||
|
|
// classification
|
||
|
|
highest_classification?: ClassificationLevel;
|
||
|
|
classification_markings?: ClassificationMarking[];
|
||
|
|
language_detected?: string;
|
||
|
|
languages_detected?: string[];
|
||
|
|
content_classification?: ContentClass[];
|
||
|
|
page_type?: string;
|
||
|
|
document_class?: string;
|
||
|
|
collection?: string;
|
||
|
|
redactions?: Redaction[];
|
||
|
|
signatures_observed?: SignatureObserved[];
|
||
|
|
|
||
|
|
// page-level extraction
|
||
|
|
tables_detected?: TableDetected[];
|
||
|
|
images_detected?: ImageDetected[];
|
||
|
|
entities_extracted?: EntitiesExtracted;
|
||
|
|
uap_observation_fields?: UapObservation | null;
|
||
|
|
vision_description?: string;
|
||
|
|
vision_description_pt_br?: string;
|
||
|
|
ocr_quality_score?: number;
|
||
|
|
vision_quality_score?: number;
|
||
|
|
flags?: string[];
|
||
|
|
|
||
|
|
// document-level
|
||
|
|
page_count?: number;
|
||
|
|
total_redactions?: number;
|
||
|
|
total_signatures?: number;
|
||
|
|
total_tables?: number;
|
||
|
|
total_images?: number;
|
||
|
|
key_entities?: EntitiesExtracted;
|
||
|
|
|
||
|
|
// entity-level relational
|
||
|
|
mentioned_in?: MentionedIn[];
|
||
|
|
total_mentions?: number;
|
||
|
|
documents_count?: number;
|
||
|
|
enrichment_status?: EnrichmentStatus;
|
||
|
|
external_sources?: ExternalSource[];
|
||
|
|
disambiguation_note?: string;
|
||
|
|
|
||
|
|
// location-specific
|
||
|
|
location_type?: string;
|
||
|
|
country?: string | string[];
|
||
|
|
region?: string;
|
||
|
|
parent_location?: string;
|
||
|
|
coordinates?: Coords;
|
||
|
|
events_here?: string[];
|
||
|
|
|
||
|
|
// event-specific
|
||
|
|
event_class?: string;
|
||
|
|
date_start?: string;
|
||
|
|
date_end?: string;
|
||
|
|
date_confidence?: ConfidenceBand;
|
||
|
|
primary_location?: string | null;
|
||
|
|
observers?: string[];
|
||
|
|
uap_objects?: string[];
|
||
|
|
documented_in?: string[];
|
||
|
|
narrative_summary?: string;
|
||
|
|
narrative_summary_pt_br?: string;
|
||
|
|
|
||
|
|
// uap-object-specific
|
||
|
|
observed_in_event?: string;
|
||
|
|
secondary_events?: string[];
|
||
|
|
shape?: string;
|
||
|
|
color?: string;
|
||
|
|
size_estimate_m?: { min?: number | null; max?: number | null; confidence_band?: ConfidenceBand };
|
||
|
|
features?: string[];
|
||
|
|
altitude_ft?: { min?: number | null; max?: number | null; confidence_band?: ConfidenceBand };
|
||
|
|
speed_kts?: { min?: number | null; max?: number | null; confidence_band?: ConfidenceBand };
|
||
|
|
maneuver_descriptors?: string[];
|
||
|
|
sensor_observations?: string[];
|
||
|
|
visual_records?: string[];
|
||
|
|
confidence_band_overall?: ConfidenceBand;
|
||
|
|
|
||
|
|
// person-specific
|
||
|
|
roles?: string[];
|
||
|
|
dates?: { born?: string | null; died?: string | null };
|
||
|
|
primary_role?: string;
|
||
|
|
primary_organization?: string;
|
||
|
|
|
||
|
|
// organization-specific
|
||
|
|
organization_type?: string;
|
||
|
|
founded?: string;
|
||
|
|
|
||
|
|
// vehicle-specific
|
||
|
|
vehicle_class?: string;
|
||
|
|
operator?: string;
|
||
|
|
model?: string;
|
||
|
|
|
||
|
|
// operation-specific
|
||
|
|
operation_type?: string;
|
||
|
|
status?: string;
|
||
|
|
|
||
|
|
// concept-specific
|
||
|
|
concept_class?: string;
|
||
|
|
domain?: string;
|
||
|
|
definition_short?: string;
|
||
|
|
definition_short_pt_br?: string;
|
||
|
|
|
||
|
|
// table/image specific
|
||
|
|
source_doc?: string;
|
||
|
|
multi_page?: boolean;
|
||
|
|
spans_pages?: Array<{ page?: string; bbox?: BBox; role?: "start" | "middle" | "end" }>;
|
||
|
|
headers_summary?: string;
|
||
|
|
total_rows_estimate?: number;
|
||
|
|
total_columns_estimate?: number;
|
||
|
|
extraction_quality?: number | null;
|
||
|
|
csv_path?: string;
|
||
|
|
headers?: string[];
|
||
|
|
row_count_extracted?: number;
|
||
|
|
column_count_extracted?: number;
|
||
|
|
extraction_notes?: string;
|
||
|
|
extraction_model?: string;
|
||
|
|
extracted_at?: string;
|
||
|
|
|
||
|
|
// war.gov enrichment (from 02b script)
|
||
|
|
war_gov?: Record<string, unknown>;
|
||
|
|
|
||
|
|
// catch-all
|
||
|
|
[k: string]: unknown;
|
||
|
|
}
|