disclosure-bureau/scripts/write_chunks_doc65.py

205 lines
56 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""Write all chunk files for doc-65 based on vision analysis."""
import json
from pathlib import Path
from datetime import datetime, timezone
DOC_ID = "doc-65-hs1-834228961-62-hq-83894-serial-130"
RAW_DIR = Path(f"/Users/guto/ufo/raw/{DOC_ID}")
CHUNKS_DIR = RAW_DIR / "chunks"
IMAGES_DIR = RAW_DIR / "images"
TABLES_DIR = RAW_DIR / "tables"
for d in [CHUNKS_DIR, IMAGES_DIR, TABLES_DIR]:
d.mkdir(parents=True, exist_ok=True)
# Each entry: (page_index, png_filename, list_of_chunks)
# Each chunk: dict with all required fields
# Pages in order: p-000 to p-063, p-100 to p-126 (91 total)
PAGE_DATA = [
# Page 1: p-000.png - Continuation of Arnold sighting narrative (CONFIDENTIAL banner)
(1, "p-000.png", [
{"order_in_page": 1, "type": "classification_banner", "content_en": "CONFIDENTIAL", "content_pt_br": "CONFIDENTIAL", "bbox": {"x": 0.15, "y": 0.03, "w": 0.7, "h": 0.05}, "classification": "CONFIDENTIAL", "formatting": ["bold", "all_caps"], "cross_page_hint": "self_contained", "ocr_confidence": 0.95, "ufo_anomaly_detected": False},
{"order_in_page": 2, "type": "body_paragraph", "content_en": "airplanes flying so close to the mountain tops, flying directly south to southeast down the back of a mountain range. I could estimate their elevation could have varied a thousand feet one way or another up or down, but they were pretty much on the horizon to me which would indicate they were near the same elevation as I was.", "content_pt_br": "aviões voando tão perto dos topos das montanhas, voando diretamente ao sul para sudeste pelo dorso de uma cordilheira de montanhas. Eu poderia estimar que sua elevação poderia ter variado mil pés para cima ou para baixo, mas estavam praticamente no horizonte para mim, o que indicaria que estavam na mesma elevação que eu.", "bbox": {"x": 0.07, "y": 0.08, "w": 0.88, "h": 0.12}, "classification": None, "formatting": [], "cross_page_hint": "continues_from_prev", "ocr_confidence": 0.88, "ufo_anomaly_detected": True, "ufo_anomaly_type": "formation_flight", "ufo_anomaly_rationale": "Witness describes unidentified objects flying in formation near mountain tops."},
{"order_in_page": 3, "type": "body_paragraph", "content_en": "They flew like many times I have observed geese to fly in a rather diagonal chain-like line as if they were linked together. They seemed to hold a definite direction but rather swerved in and out of the high mountain peaks. Their speed at the time did not impress me particularly, because I knew that our army and air forces had planes that went very fast.", "content_pt_br": "Voavam como tantas vezes eu observei gansos voando em uma linha diagonal em cadeia, como se estivessem ligados. Pareciam manter uma direção definida, mas desviavam para dentro e para fora dos altos picos de montanhas. Sua velocidade na época não me impressionou particularmente, porque eu sabia que nosso exército e forças aéreas tinham aviões que iam muito rápido.", "bbox": {"x": 0.07, "y": 0.20, "w": 0.88, "h": 0.12}, "classification": None, "formatting": [], "cross_page_hint": "self_contained", "ocr_confidence": 0.88, "ufo_anomaly_detected": True, "ufo_anomaly_type": "formation_flight", "ufo_anomaly_rationale": "Objects flying in formation like geese, swerving around mountain peaks."},
{"order_in_page": 4, "type": "body_paragraph", "content_en": "What kept bothering me as I watched them flip and flash in the sun right along their path was the fact that I couldn't make out any tail on them, and I am sure that any pilot would justify more than a second look at such a plane.", "content_pt_br": "O que continuava me perturbando enquanto os observava reluzirem e piscarem ao sol exatamente ao longo de seu caminho era o fato de que eu não conseguia identificar nenhuma cauda neles, e tenho certeza de que qualquer piloto justificaria mais do que um segundo olhar para tal avião.", "bbox": {"x": 0.07, "y": 0.32, "w": 0.88, "h": 0.10}, "classification": None, "formatting": [], "cross_page_hint": "self_contained", "ocr_confidence": 0.87, "ufo_anomaly_detected": True, "ufo_anomaly_type": "no_tail_visible", "ufo_anomaly_rationale": "Objects had no visible tail, unlike any known aircraft."},
{"order_in_page": 5, "type": "body_paragraph", "content_en": "I observed them quite plainly, and I estimate my distance from them, which was almost at right angles, to be between twenty to twenty-five miles. I know they must be very large to observe their shape at that distance, even on as clear a day as it was that Tuesday. In fact I compared a news fastener or sewing tool I had in my pocket with them - holding it up on them and holding it up on the DC-4 - that I could observe at quite a distance to my left, and they seemed smaller than the DC-4; but, I should judge their span would have been as wide as the fuselage engines on each side of the fuselage of the DC-4.", "content_pt_br": "Os observei bastante claramente, e estimo minha distância deles, que era quase em ângulo reto, entre vinte e vinte e cinco milhas. Sei que devem ser muito grandes para observar sua forma nessa distância, mesmo em um dia tão claro quanto aquela terça-feira. De fato, comparei um grampo de papel ou ferramenta de costura que tinha no bolso com eles - segurando-o na frente deles e em seguida na frente do DC-4 - que eu podia observar a uma boa distância à minha esquerda, e pareciam menores que o DC-4; mas devo julgar que sua envergadura teria sido tão larga quanto as fuselagens de motores em cada lado da fuselagem do DC-4.", "bbox": {"x": 0.07, "y": 0.42, "w": 0.88, "h": 0.15}, "classification": None, "formatting": [], "cross_page_hint": "self_contained", "ocr_confidence": 0.87, "ufo_anomaly_detected": True, "ufo_anomaly_type": "unknown_craft_size_estimate", "ufo_anomaly_rationale": "Kenneth Arnold estimating size/distance of unidentified objects compared to DC-4."},
{"order_in_page": 6, "type": "body_paragraph", "content_en": "The more I observed these objects, the more upset I became, as I am accustomed and familiar with most all objects flying whether I am close to the ground or at higher altitudes. I observed the chain of these objects passing another high snow-covered ridge in between Mt. Rainier and Mt. Adams,", "content_pt_br": "Quanto mais observava esses objetos, mais perturbado ficava, pois estou acostumado e familiarizado com praticamente todos os objetos que voam, quer eu esteja perto do solo ou em altitudes maiores. Observei a cadeia desses objetos passando por outro cume coberto de neve entre o Monte Rainier e o Monte Adams,", "bbox": {"x": 0.07, "y": 0.57, "w": 0.88, "h": 0.10}, "classification": None, "formatting": [], "cross_page_hint": "continues_to_next", "ocr_confidence": 0.87, "ufo_anomaly_detected": True, "ufo_anomaly_type": "chain_formation", "ufo_anomaly_rationale": "Chain of unidentified objects passing between Mt. Rainier and Mt. Adams."},
{"order_in_page": 7, "type": "page_number_marker", "content_en": "2d/16", "content_pt_br": "2d/16", "bbox": {"x": 0.8, "y": 0.92, "w": 0.12, "h": 0.03}, "classification": None, "formatting": [], "cross_page_hint": "self_contained", "ocr_confidence": 0.7},
{"order_in_page": 8, "type": "classification_banner", "content_en": "CONFIDENTIAL", "content_pt_br": "CONFIDENTIAL", "bbox": {"x": 0.15, "y": 0.96, "w": 0.7, "h": 0.04}, "classification": "CONFIDENTIAL", "formatting": ["bold", "all_caps"], "cross_page_hint": "self_contained", "ocr_confidence": 0.95},
]),
# Page 2: p-001.png - HQ Air Defense Command letter, Alpheus Powell interview
(2, "p-001.png", [
{"order_in_page": 1, "type": "classification_banner", "content_en": "CONFIDENTIAL", "content_pt_br": "CONFIDENTIAL", "bbox": {"x": 0.1, "y": 0.02, "w": 0.8, "h": 0.04}, "classification": "CONFIDENTIAL", "formatting": ["bold", "all_caps"], "cross_page_hint": "self_contained", "ocr_confidence": 0.95},
{"order_in_page": 2, "type": "stamp", "content_en": "DECLASSIFIED", "content_pt_br": "DESCLASSIFICADO", "bbox": {"x": 0.03, "y": 0.06, "w": 0.18, "h": 0.06}, "classification": None, "formatting": [], "cross_page_hint": "self_contained", "ocr_confidence": 0.9},
{"order_in_page": 3, "type": "header", "content_en": "62-93994-130", "content_pt_br": "62-93994-130", "bbox": {"x": 0.55, "y": 0.06, "w": 0.4, "h": 0.03}, "classification": None, "formatting": [], "cross_page_hint": "self_contained", "ocr_confidence": 0.9},
{"order_in_page": 4, "type": "letterhead", "content_en": "HEADQUARTERS\nAIR DEFENSE COMMAND\nMITCHEL FIELD, NEW YORK", "content_pt_br": "QUARTEL GENERAL\nCOMANDO DE DEFESA AÉREA\nMITCHEL FIELD, NEW YORK", "bbox": {"x": 0.25, "y": 0.08, "w": 0.5, "h": 0.08}, "classification": None, "formatting": ["bold", "all_caps"], "cross_page_hint": "self_contained", "ocr_confidence": 0.92},
{"order_in_page": 5, "type": "date_line", "content_en": "12 September 1947", "content_pt_br": "12 de setembro de 1947", "bbox": {"x": 0.6, "y": 0.17, "w": 0.35, "h": 0.025}, "classification": None, "formatting": [], "cross_page_hint": "self_contained", "ocr_confidence": 0.95},
{"order_in_page": 6, "type": "subject_line", "content_en": "SUBJECT: Unidentified Flying Object\n(Interview - Alpheus O. Powell)", "content_pt_br": "ASSUNTO: Objeto Voador Não Identificado\n(Entrevista - Alpheus O. Powell)", "bbox": {"x": 0.07, "y": 0.20, "w": 0.85, "h": 0.04}, "classification": None, "formatting": ["bold"], "cross_page_hint": "self_contained", "ocr_confidence": 0.95},
{"order_in_page": 7, "type": "header", "content_en": "SUMMARY OF INFORMATION:", "content_pt_br": "RESUMO DE INFORMAÇÕES:", "bbox": {"x": 0.07, "y": 0.26, "w": 0.5, "h": 0.02}, "classification": None, "formatting": ["bold", "all_caps"], "cross_page_hint": "self_contained", "ocr_confidence": 0.95},
{"order_in_page": 8, "type": "body_paragraph", "content_en": "The following information was received 12 August 1947 from Mr. Alpheus O. Powell, 28 Redwood Road, New Hyde Park, Long Island, relative to the sighting of a possible flying disc 4 August 1947.\n\nOn 4 August 1947, Mr. Powell, an Airlines Captain with Pan American Airways, Inc., was the first pilot of a Constellation type aircraft on a flight from Gander, Newfoundland, to La Guardia Field, New York. Mr. Powell took over the aircraft at Gander, Newfoundland and departed at approximately 1220 P.M., Eastern Daylight Saving Time for La Guardia Field, New York. At 1600 P.M., at a position approximately midway between the Everett (Mass.) Fan Marker and the Bedford Radio Beacon (Everett is 3 miles NW of Boston, Mass., and Bedford is 16 miles NW of the same city) both Mr Powell and Mr E. White, navigator on this trip, sighted unidentifiable flying objects. To the best of Mr. Powell's knowledge, the following weather conditions existed at that time: visibility good; clear; no clouds. Mr. E. White, the co-pilot and navigator on this trip, sighted unidentifiable flying objects. To the best of Mr. Powell's knowledge, the following weather conditions existed at that time: visibility good; clear; no clouds. Mr. White, who was sitting in the co-pilots seat (the right side of the cockpit) first called Mr. Powell's attention to a bright orange object.", "content_pt_br": "As seguintes informações foram recebidas em 12 de agosto de 1947 do Sr. Alpheus O. Powell, 28 Redwood Road, New Hyde Park, Long Island, relativas ao avistamento de um possível disco voador em 4 de agosto de 1947.\n\nEm 4 de agosto de 1947, o Sr. Powell, Capitão de Companhia Aérea da Pan American Airways, Inc., era o primeiro piloto de uma aeronave tipo Constellation em um voo de Gander, Newfoundland, para La Guardia Field, Nova York. O Sr. Powell assumiu a aeronave em Gander, Newfoundland e partiu por volta das 12h20 (horário de verão oriental) para La Guardia Field, Nova York. Às 16h00, em uma posição aproximadamente entre o Fan Marker Everett (Mass.) e o Radio Beacon de Bedford (Everett fica a 3 milhas NW de Boston, Mass., e Bedford fica a 16 milhas NW da mesma cidade), tanto o Sr. Powell quanto o Sr. E. White, navegador nesta viagem, avistaram objetos voadores não identificáveis.", "bbox": {"x": 0.07, "y": 0.29, "w": 0.87, "h": 0.55}, "classification": None, "formatting": [], "cross_page_hint": "continues_to_next", "ocr_confidence": 0.88, "ufo_anomaly_detected": True, "ufo_anomaly_type": "aerial_unknown_object", "ufo_anomaly_rationale": "Airline Captain Powell and navigator White report sighting unidentifiable flying objects from a Constellation aircraft over Massachusetts."},
{"order_in_page": 9, "type": "classification_banner", "content_en": "CONFIDENTIAL", "content_pt_br": "CONFIDENTIAL", "bbox": {"x": 0.1, "y": 0.95, "w": 0.8, "h": 0.04}, "classification": "CONFIDENTIAL", "formatting": ["bold", "all_caps"], "cross_page_hint": "self_contained", "ocr_confidence": 0.95},
]),
# Page 3: p-002.png - HQ Air Defense Command, Walter I. White interview
(3, "p-002.png", [
{"order_in_page": 1, "type": "classification_banner", "content_en": "CONFIDENTIAL", "content_pt_br": "CONFIDENTIAL", "bbox": {"x": 0.15, "y": 0.02, "w": 0.7, "h": 0.04}, "classification": "CONFIDENTIAL", "formatting": ["bold", "all_caps"], "cross_page_hint": "self_contained", "ocr_confidence": 0.95},
{"order_in_page": 2, "type": "letterhead", "content_en": "HEADQUARTERS\nAIR DEFENSE COMMAND\nMITCHEL FIELD, NEW YORK", "content_pt_br": "QUARTEL GENERAL\nCOMANDO DE DEFESA AÉREA\nMITCHEL FIELD, NEW YORK", "bbox": {"x": 0.25, "y": 0.06, "w": 0.5, "h": 0.08}, "classification": None, "formatting": ["bold", "all_caps"], "cross_page_hint": "self_contained", "ocr_confidence": 0.92},
{"order_in_page": 3, "type": "date_line", "content_en": "18 September 1947", "content_pt_br": "18 de setembro de 1947", "bbox": {"x": 0.6, "y": 0.16, "w": 0.35, "h": 0.025}, "classification": None, "formatting": [], "cross_page_hint": "self_contained", "ocr_confidence": 0.93},
{"order_in_page": 4, "type": "subject_line", "content_en": "SUBJECT: Unidentified Flying Objects\n(Interview - Walter I. White)", "content_pt_br": "ASSUNTO: Objetos Voadores Não Identificados\n(Entrevista - Walter I. White)", "bbox": {"x": 0.07, "y": 0.20, "w": 0.85, "h": 0.04}, "classification": None, "formatting": ["bold"], "cross_page_hint": "self_contained", "ocr_confidence": 0.93},
{"order_in_page": 5, "type": "header", "content_en": "SUMMARY OF INFORMATION:", "content_pt_br": "RESUMO DE INFORMAÇÕES:", "bbox": {"x": 0.07, "y": 0.25, "w": 0.5, "h": 0.02}, "classification": None, "formatting": ["bold", "all_caps"], "cross_page_hint": "self_contained", "ocr_confidence": 0.93},
{"order_in_page": 6, "type": "body_paragraph", "content_en": "The following information relative to the sighting of a possible flying disc 4 August 1947, was received 10 September 1947 from Mr. Walter I. White, 19-07 78th Street, Jackson Heights, New York.\n\nOn 4 August 1947, Mr. White, from Pan American Airways Inc., was the navigator of a Constellation type aircraft on a flight from Gander, Newfoundland to La Guardia Field, New York. At 1600, at a position approximately 10 miles NW of Boston, Mass., Mr. White sighted a flying object which he was unable to identify. At this time Mr. White was sitting in the co-pilots seat, and looking out the right side of the aircraft he sighted what appeared to be in the vicinity. It appeared to be about 5 miles away, and at least 1,000 feet below the level of the Constellation. Mr. White believes that he observed the object for almost 30 seconds before he called Mr. Powell's attention to the object. When he first called Mr. Powell's attention to it, it was too \"lit up\" and traveling at the upper right, and a cloud passed between the object and the aircraft. The object appeared to have a sharp, definite shape, and appeared cylindrical in shape, on having a bright orange hue. Mr. Powell stated that the object had a definite shape, and there was no appearance of exhaust from a rocket, or a jet aircraft. Mr. Powell estimated the course of the object. It was flying, at a terrific and quite rapid speed. Mr. Powell lost sight of the object, when a cloud came between the aircraft and the object. The pursuit of the object was not continued, inasmuch as it would have necessitated a departure from the established airways.", "content_pt_br": "As seguintes informações relativas ao avistamento de um possível disco voador em 4 de agosto de 1947 foram recebidas em 10 de setembro de 1947 do Sr. Walter I. White, 19-07 78th Street, Jackson Heights, Nova York.\n\nEm 4 de agosto de 1947, o Sr. White, da Pan American Airways Inc., era o navegador de uma aeronave tipo Constellation em um voo de Gander, Newfoundland, para La Guardia Field, Nova York. Às 16h00, em uma posição aproximadamente 10 milhas a noroeste de Boston, Mass., o Sr. White avistou um objeto voador que não conseguiu identificar. O objeto estava a cerca de 5 milhas de distância, pelo menos 1.000 pés abaixo do nível da Constellation. O Sr. White acredita ter observado o objeto por quase 30 segundos antes de chamar a atenção do Sr. Powell. O objeto parecia ter uma forma nítida e definida, aparentemente cilíndrica, com uma tonalidade laranja brilhante.", "bbox": {"x": 0.07, "y": 0.28, "w": 0.87, "h": 0.57}, "classification": None, "formatting": [], "cross_page_hint": "self_contained", "ocr_confidence": 0.87, "ufo_anomaly_detected": True, "ufo_anomaly_type": "aerial_unknown_object", "ufo_anomaly_rationale": "Navigator White reports bright orange cylindrical unidentified object observed for 30 seconds from Constellation aircraft."},
{"order_in_page": 7, "type": "header", "content_en": "AGENTS NOTES: Mr. Walter I. White has been employed by Pan American Airways for the past five years as a Navigator, and during the war worked with TIA", "content_pt_br": "NOTAS DO AGENTE: O Sr. Walter I. White está empregado pela Pan American Airways nos últimos cinco anos como Navegador, e durante a guerra trabalhou com a TIA", "bbox": {"x": 0.07, "y": 0.86, "w": 0.87, "h": 0.05}, "classification": None, "formatting": [], "cross_page_hint": "continues_to_next", "ocr_confidence": 0.85},
{"order_in_page": 8, "type": "classification_banner", "content_en": "CONFIDENTIAL", "content_pt_br": "CONFIDENTIAL", "bbox": {"x": 0.15, "y": 0.95, "w": 0.7, "h": 0.04}, "classification": "CONFIDENTIAL", "formatting": ["bold", "all_caps"], "cross_page_hint": "self_contained", "ocr_confidence": 0.95},
]),
# Page 4: p-003.png - Continuation of White interview, distribution/evaluation
(4, "p-003.png", [
{"order_in_page": 1, "type": "classification_banner", "content_en": "CONFIDENTIAL", "content_pt_br": "CONFIDENTIAL", "bbox": {"x": 0.15, "y": 0.02, "w": 0.7, "h": 0.04}, "classification": "CONFIDENTIAL", "formatting": ["bold", "all_caps"], "cross_page_hint": "self_contained", "ocr_confidence": 0.95},
{"order_in_page": 2, "type": "body_paragraph", "content_en": "in conjunction with contract flying for the AAF. Mr White states that he has flown with Mr Powell on a number of occasions, and he considers him to be a very stable person; completely reliable, and not given to \"flights of fancy\".", "content_pt_br": "em conjunto com voos contratados para as Forças Aéreas do Exército. O Sr. White afirma que voou com o Sr. Powell em várias ocasiões, e o considera uma pessoa muito estável; completamente confiável e não dado a \"voos de fantasia\".", "bbox": {"x": 0.07, "y": 0.07, "w": 0.87, "h": 0.08}, "classification": None, "formatting": [], "cross_page_hint": "continues_from_prev", "ocr_confidence": 0.88},
{"order_in_page": 3, "type": "body_paragraph", "content_en": "Related Report: See Summary of Information, 12 September 1947, Hq ADC, subject, \"Unidentified Flying Objects\" (Interview - Alpheus O. Powell).", "content_pt_br": "Relatório Relacionado: Ver Resumo de Informações, 12 de setembro de 1947, QG ADC, assunto, \"Objetos Voadores Não Identificados\" (Entrevista - Alpheus O. Powell).", "bbox": {"x": 0.07, "y": 0.16, "w": 0.87, "h": 0.05}, "classification": None, "formatting": [], "cross_page_hint": "self_contained", "ocr_confidence": 0.88},
{"order_in_page": 4, "type": "body_paragraph", "content_en": "Previous Distribution:\n- None\n\nDistribution\n- AAF (2 copies)\n- ADC (2 copies)\n\nEvaluation\nof source of information\nC B", "content_pt_br": "Distribuição Anterior:\n- Nenhuma\n\nDistribuição\n- AAF (2 cópias)\n- ADC (2 cópias)\n\nAvaliação\nda fonte da informação\nC B", "bbox": {"x": 0.07, "y": 0.25, "w": 0.87, "h": 0.2}, "classification": None, "formatting": [], "cross_page_hint": "self_contained", "ocr_confidence": 0.87},
{"order_in_page": 5, "type": "classification_banner", "content_en": "CONFIDENTIAL", "content_pt_br": "CONFIDENTIAL", "bbox": {"x": 0.15, "y": 0.93, "w": 0.7, "h": 0.04}, "classification": "CONFIDENTIAL", "formatting": ["bold", "all_caps"], "cross_page_hint": "self_contained", "ocr_confidence": 0.95},
]),
# Page 5: p-004.png - Agent's notes on Powell background
(5, "p-004.png", [
{"order_in_page": 1, "type": "classification_banner", "content_en": "CONFIDENTIAL", "content_pt_br": "CONFIDENTIAL", "bbox": {"x": 0.15, "y": 0.02, "w": 0.7, "h": 0.04}, "classification": "CONFIDENTIAL", "formatting": ["bold", "all_caps"], "cross_page_hint": "self_contained", "ocr_confidence": 0.95},
{"order_in_page": 2, "type": "body_paragraph", "content_en": "AGENTS NOTES: Mr. A. O. Powell is a graduate of the Aviation Cadet Flying Training Program, having graduated from Maxwell Field, Alabama, with the class of 41-C. Since graduation, Mr. Powell has flown for Pan American Airways and, at this date, has over 4,000 command pilot hours to his credit. Mr. Powell appears to be a calm, intelligent individual, not given to flights of fancy, or easily swayed by what he has previously read in the newspapers as regards reports of this type. Mr. Powell has a fear of publicity and seemed hesitant to even tell his story lest he become the object of ridicule. Mr. Powell was questioned as to the possibility that what he sighted might have been a tow target, a pilot balloon, or a radiosonde device used for meteorological purposes. Mr. Powell stated that he has seen numerous pilot balloons, radiosonde devices and tow targets, while on flights; the object observed on this flight definitely was not one of them.", "content_pt_br": "NOTAS DO AGENTE: O Sr. A. O. Powell é formado pelo Programa de Treinamento de Voo de Cadetes de Aviação, tendo se formado em Maxwell Field, Alabama, com a turma de 41-C. Desde a formatura, o Sr. Powell voou pela Pan American Airways e, nesta data, tem mais de 4.000 horas de voo como piloto comandante em seu crédito. O Sr. Powell parece ser um indivíduo calmo e inteligente, não dado a fantasias, ou facilmente influenciado pelo que leu anteriormente nos jornais sobre relatórios deste tipo. O Sr. Powell tem medo de publicidade e pareceu hesitante em contar sua história por medo de se tornar objeto de ridículo. O Sr. Powell foi questionado sobre a possibilidade de que o que avistou pudesse ter sido um alvo rebocado, um balão piloto ou um radiobalão usado para fins meteorológicos. O Sr. Powell afirmou que já viu numerosos balões piloto, radiobalões e alvos rebocados durante voos; o objeto observado neste voo definitivamente não era nenhum deles.", "bbox": {"x": 0.07, "y": 0.07, "w": 0.87, "h": 0.5}, "classification": None, "formatting": [], "cross_page_hint": "self_contained", "ocr_confidence": 0.88},
{"order_in_page": 3, "type": "body_paragraph", "content_en": "Previous Distribution:\n- None\n\nDistribution\n- AAF (2 copies)\n- ADC (2 copies)", "content_pt_br": "Distribuição Anterior:\n- Nenhuma\n\nDistribuição\n- AAF (2 cópias)\n- ADC (2 cópias)", "bbox": {"x": 0.07, "y": 0.62, "w": 0.5, "h": 0.1}, "classification": None, "formatting": [], "cross_page_hint": "self_contained", "ocr_confidence": 0.88},
{"order_in_page": 4, "type": "classification_banner", "content_en": "CONFIDENTIAL", "content_pt_br": "CONFIDENTIAL", "bbox": {"x": 0.15, "y": 0.93, "w": 0.7, "h": 0.04}, "classification": "CONFIDENTIAL", "formatting": ["bold", "all_caps"], "cross_page_hint": "self_contained", "ocr_confidence": 0.95},
]),
# Page 6: p-005.png - RESTRICTED memo, 14th Air Force forwarding flying disc report
(6, "p-005.png", [
{"order_in_page": 1, "type": "classification_banner", "content_en": "RESTRICTED", "content_pt_br": "RESTRITO", "bbox": {"x": 0.25, "y": 0.02, "w": 0.5, "h": 0.04}, "classification": "RESTRICTED", "formatting": ["bold", "all_caps"], "cross_page_hint": "self_contained", "ocr_confidence": 0.95},
{"order_in_page": 2, "type": "body_paragraph", "content_en": "BASIC: Ltr, Hq, BARTD, Birmingham AAF, Birmingham, Ala., dtd 8 July 47, subj: Report on Local \"Flying Disc.\"\n\n319.1/624\n\n1st Ind.\nHEADQUARTERS, FOURTEENTH AIR FORCE, Orlando, Florida, [date] 1947\nTO: Commanding General, Air Defense Command, Mitchel Field, New York.\n\n1. Forwarded for information of your Headquarters.\n\n2. This Headquarters has made no investigation of \"Flying Disc\" reports because this is an isolated case.\n\nFOR THE COMMANDING GENERAL:", "content_pt_br": "BÁSICO: Carta, QG, BARTD, Birmingham AAF, Birmingham, Ala., datada de 8 de julho de 47, assunto: Relatório sobre \"Disco Voador\" Local.\n\n319.1/624\n\n1ª Indorsação.\nQUARTEL GENERAL, DÉCIMA QUARTA FORÇA AÉREA, Orlando, Flórida, [data] 1947\nPARA: Comandante Geral, Comando de Defesa Aérea, Mitchel Field, Nova York.\n\n1. Encaminhado para informação do seu Quartel General.\n\n2. Este Quartel General não fez nenhuma investigação sobre relatórios de \"Disco Voador\" porque este é um caso isolado.\n\nPELO COMANDANTE GERAL:", "bbox": {"x": 0.07, "y": 0.07, "w": 0.87, "h": 0.55}, "classification": "RESTRICTED", "formatting": [], "cross_page_hint": "self_contained", "ocr_confidence": 0.88},
{"order_in_page": 3, "type": "signature_block", "content_en": "P.V.Murphy\n[signature]\nP.V. Murphy\nBrig. Gen., AAF\nAsst. Adj. Gen.", "content_pt_br": "P.V.Murphy\n[assinatura]\nP.V. Murphy\nGen. de Brig., AAF\nAssistente do Adj. Geral", "bbox": {"x": 0.5, "y": 0.63, "w": 0.4, "h": 0.1}, "classification": None, "formatting": ["handwritten"], "cross_page_hint": "self_contained", "ocr_confidence": 0.8},
{"order_in_page": 4, "type": "body_paragraph", "content_en": "3 Incls: n/c", "content_pt_br": "3 Anexos: s/c", "bbox": {"x": 0.07, "y": 0.65, "w": 0.2, "h": 0.03}, "classification": None, "formatting": [], "cross_page_hint": "self_contained", "ocr_confidence": 0.9},
{"order_in_page": 5, "type": "stamp", "content_en": "32715", "content_pt_br": "32715", "bbox": {"x": 0.07, "y": 0.9, "w": 0.15, "h": 0.03}, "classification": None, "formatting": [], "cross_page_hint": "self_contained", "ocr_confidence": 0.85},
{"order_in_page": 6, "type": "page_number_marker", "content_en": "24-6\n1947", "content_pt_br": "24-6\n1947", "bbox": {"x": 0.82, "y": 0.9, "w": 0.12, "h": 0.04}, "classification": None, "formatting": [], "cross_page_hint": "self_contained", "ocr_confidence": 0.8},
{"order_in_page": 7, "type": "classification_banner", "content_en": "RESTRICTED", "content_pt_br": "RESTRITO", "bbox": {"x": 0.25, "y": 0.95, "w": 0.5, "h": 0.04}, "classification": "RESTRICTED", "formatting": ["bold", "all_caps"], "cross_page_hint": "self_contained", "ocr_confidence": 0.95},
]),
# Page 7: p-006.png - Blank page with file number notation
(7, "p-006.png", [
{"order_in_page": 1, "type": "header", "content_en": "62-83894-130", "content_pt_br": "62-83894-130", "bbox": {"x": 0.3, "y": 0.02, "w": 0.4, "h": 0.03}, "classification": None, "formatting": ["handwritten"], "cross_page_hint": "self_contained", "ocr_confidence": 0.85},
{"order_in_page": 2, "type": "blank", "content_en": "[Blank page]", "content_pt_br": "[Página em branco]", "bbox": {"x": 0.0, "y": 0.05, "w": 1.0, "h": 0.95}, "classification": None, "formatting": [], "cross_page_hint": "self_contained", "ocr_confidence": 1.0},
]),
# Page 8: p-007.png - RESTRICTED Birmingham AAF report on flying disc with photo
(8, "p-007.png", [
{"order_in_page": 1, "type": "classification_banner", "content_en": "RESTRICTED", "content_pt_br": "RESTRITO", "bbox": {"x": 0.25, "y": 0.02, "w": 0.5, "h": 0.04}, "classification": "RESTRICTED", "formatting": ["bold", "all_caps"], "cross_page_hint": "self_contained", "ocr_confidence": 0.95},
{"order_in_page": 2, "type": "letterhead", "content_en": "HEADQUARTERS\nBIRMINGHAM MILITARY TRAINING DETACHMENT\nBIRMINGHAM ARMY AIR FIELD\nBirmingham, Alabama", "content_pt_br": "QUARTEL GENERAL\nDETACHMENTO DE TREINAMENTO MILITAR DE BIRMINGHAM\nCAMPO DE AVIAÇÃO DO EXÉRCITO DE BIRMINGHAM\nBirmingham, Alabama", "bbox": {"x": 0.2, "y": 0.06, "w": 0.6, "h": 0.08}, "classification": None, "formatting": ["bold", "all_caps"], "cross_page_hint": "self_contained", "ocr_confidence": 0.9},
{"order_in_page": 3, "type": "date_line", "content_en": "8 July 1947", "content_pt_br": "8 de julho de 1947", "bbox": {"x": 0.7, "y": 0.15, "w": 0.25, "h": 0.025}, "classification": None, "formatting": [], "cross_page_hint": "self_contained", "ocr_confidence": 0.95},
{"order_in_page": 4, "type": "header", "content_en": "7795", "content_pt_br": "7795", "bbox": {"x": 0.75, "y": 0.18, "w": 0.15, "h": 0.03}, "classification": None, "formatting": [], "cross_page_hint": "self_contained", "ocr_confidence": 0.9},
{"order_in_page": 5, "type": "subject_line", "content_en": "SUBJECT: Report on Local \"Flying Disc\"", "content_pt_br": "ASSUNTO: Relatório sobre \"Disco Voador\" Local", "bbox": {"x": 0.07, "y": 0.22, "w": 0.85, "h": 0.025}, "classification": None, "formatting": ["bold"], "cross_page_hint": "self_contained", "ocr_confidence": 0.93},
{"order_in_page": 6, "type": "address_block", "content_en": "TO: Commanding General\nFourteenth Air Force, ADC\nOrlando, Florida\nAttn: A-2", "content_pt_br": "PARA: Comandante Geral\nDécima Quarta Força Aérea, ADC\nOrlando, Flórida\nAtenção: A-2", "bbox": {"x": 0.07, "y": 0.25, "w": 0.5, "h": 0.07}, "classification": None, "formatting": [], "cross_page_hint": "self_contained", "ocr_confidence": 0.9},
{"order_in_page": 7, "type": "body_paragraph", "content_en": "1. Inclosed herewith is a photograph of the \"unidentified flying disc,\" which one reportedly witnessed over Birmingham on the night of Sunday, 6 July, 1947.\n\n2. Attention is invited to the two light spots on the print and the light trail following the two discs. The image at the top of the photograph indicates the discs directly. This has been examined by professional photographers in Birmingham and their general opinion is that the two spots observed in the photograph are not a photographic negative, but instead, an actual photograph of some mysterious disc.\n\n3. The undersigned officer did not personally witness the flight of any disc, however, in view of the numerous reports received from the citizens of Birmingham, I am of the general opinion in Birmingham that \"something resembling a disc\" is a statement made by the only military personnel of this organization who personally witness the craft. The following is a statement made by additional information. Attached as inclosure number three are clippings from one of the local newspapers giving an account of the day, several citizens, and two reported owing the many mysterious objects.\n\n4. The intelligence officer of the 1856 Military District has forwarded a similar report through ground force channels to the Commanding General, Third Army, Atlanta, Georgia.", "content_pt_br": "1. Incluso neste documento há uma fotografia do \"disco voador não identificado,\" que supostamente foi testemunhado sobre Birmingham na noite de domingo, 6 de julho de 1947.\n\n2. Chama-se atenção para os dois pontos de luz na fotografia e o rastro de luz seguindo os dois discos. A imagem no topo da fotografia indica os discos diretamente. Isso foi examinado por fotógrafos profissionais em Birmingham e a opinião geral é que os dois pontos observados na fotografia não são um negativo fotográfico, mas sim uma fotografia real de algum disco misterioso.\n\n3. O oficial abaixo assinado não testemunhou pessoalmente o voo de qualquer disco, porém, em vista dos numerosos relatórios recebidos dos cidadãos de Birmingham, sou da opinião geral de que \"algo semelhante a um disco\" é uma declaração feita pelo único pessoal militar desta organização que testemunhou pessoalmente a aeronave.\n\n4. O oficial de inteligência do 1856º Distrito Militar encaminhou um relatório semelhante através dos canais das forças terrestres ao Comandante Geral, Terceiro Exército, Atlanta, Geórgia.", "bbox": {"x": 0.07, "y": 0.32, "w": 0.87, "h": 0.5}, "classification": None, "formatting": [], "cross_page_hint": "self_contained", "ocr_confidence": 0.85, "ufo_anomaly_detected": True, "ufo_anomaly_type": "disc_photograph", "ufo_anomaly_rationale": "Photograph of alleged flying disc over Birmingham, Alabama examined by professional photographers."},
{"order_in_page": 8, "type": "signature_block", "content_en": "Jack C. White\n[signature]\nJack C. White\nMajor, Air Corps\nCommanding", "content_pt_br": "Jack C. White\n[assinatura]\nJack C. White\nMajor, Corpo Aéreo\nComandante", "bbox": {"x": 0.5, "y": 0.83, "w": 0.4, "h": 0.08}, "classification": None, "formatting": ["handwritten"], "cross_page_hint": "self_contained", "ocr_confidence": 0.8},
{"order_in_page": 9, "type": "body_paragraph", "content_en": "2 Incls:\n1. Photograph\n2. Statement\n3. Clippings", "content_pt_br": "2 Anexos:\n1. Fotografia\n2. Declaração\n3. Recortes", "bbox": {"x": 0.07, "y": 0.84, "w": 0.3, "h": 0.07}, "classification": None, "formatting": [], "cross_page_hint": "self_contained", "ocr_confidence": 0.9},
{"order_in_page": 10, "type": "classification_banner", "content_en": "RESTRICTED", "content_pt_br": "RESTRITO", "bbox": {"x": 0.25, "y": 0.95, "w": 0.5, "h": 0.04}, "classification": "RESTRICTED", "formatting": ["bold", "all_caps"], "cross_page_hint": "self_contained", "ocr_confidence": 0.95},
]),
# Page 9: p-008.png - Newspaper clipping about mysterious flying saucers in Birmingham
(9, "p-008.png", [
{"order_in_page": 1, "type": "image", "content_en": "Newspaper clipping: 'Mysterious Flying Saucers Reported In Birmingham Skies' - The Birmingham News article about multiple sightings of flying discs over Birmingham, with accounts from various witnesses including Mrs. James Rain (2135 South 28th Street), J.L. Kardon (2100 Clanton Street), C.H. Zohn, J.R. Kauke, C.C. Rockwood, Nancy Rockwood, and others. The article describes lights, sounds, and movements of the objects.", "content_pt_br": "Recorte de jornal: 'Discos Voadores Misteriosos Relatados nos Céus de Birmingham' - Artigo do The Birmingham News sobre múltiplos avistamentos de discos voadores sobre Birmingham, com relatos de várias testemunhas incluindo Sra. James Rain, J.L. Kardon, C.H. Zohn, J.R. Kauke, C.C. Rockwood, Nancy Rockwood e outros. O artigo descreve luzes, sons e movimentos dos objetos.", "bbox": {"x": 0.05, "y": 0.02, "w": 0.9, "h": 0.96}, "classification": None, "formatting": [], "cross_page_hint": "self_contained", "ocr_confidence": 0.82, "image_type": "newspaper_clipping", "ufo_anomaly_detected": True, "ufo_anomaly_type": "multiple_witness_sighting", "ufo_anomaly_rationale": "Newspaper reports multiple civilian witnesses to flying saucer sightings over Birmingham, Alabama.", "image_description_en": "Newspaper clipping from The Birmingham News reporting multiple sightings of mysterious flying saucers over Birmingham. Contains witness accounts, descriptions of the objects as lights in the sky, and mentions of police reports.", "image_description_pt_br": "Recorte de jornal do The Birmingham News relatando múltiplos avistamentos de discos voadores misteriosos sobre Birmingham. Contém relatos de testemunhas, descrições dos objetos como luzes no céu, e menções de relatórios policiais."},
]),
# Page 10: p-009.png - Newspaper clipping continued, radio-guided theory
(10, "p-009.png", [
{"order_in_page": 1, "type": "image", "content_en": "Newspaper clipping: 'RADIO-GUIDED, SAYS LAD' from Monday, July 7, 1947. Article about a grammar school youth Michael Rieman who gave his opinion that flying saucers 'are new radio-guided missiles from another country that is planning war on the United States.' Includes other witness accounts from Birmingham area residents about sightings on July 6-7, 1947.", "content_pt_br": "Recorte de jornal: 'GUIADO POR RÁDIO, DIZ RAPAZ' de segunda-feira, 7 de julho de 1947. Artigo sobre o jovem escolar Michael Rieman que deu sua opinião de que os discos voadores 'são novos mísseis guiados por rádio de outro país que está planejando guerra contra os Estados Unidos.' Inclui relatos de outras testemunhas de moradores da área de Birmingham sobre avistamentos em 6-7 de julho de 1947.", "bbox": {"x": 0.05, "y": 0.02, "w": 0.9, "h": 0.96}, "classification": None, "formatting": [], "cross_page_hint": "self_contained", "ocr_confidence": 0.8, "image_type": "newspaper_clipping", "ufo_anomaly_detected": True, "ufo_anomaly_type": "multiple_witness_sighting", "ufo_anomaly_rationale": "Newspaper article with multiple civilian witness accounts of flying saucer sightings.", "image_description_en": "Newspaper clipping from July 7, 1947 containing witness accounts of flying saucer sightings in Birmingham area and a youth's theory that they are radio-guided missiles.", "image_description_pt_br": "Recorte de jornal de 7 de julho de 1947 contendo relatos de testemunhas de avistamentos de discos voadores na área de Birmingham e a teoria de um jovem de que são mísseis guiados por rádio."},
]),
# Page 11: p-010.png - Another Birmingham newspaper clipping about flying saucers
(11, "p-010.png", [
{"order_in_page": 1, "type": "image", "content_en": "Newspaper clipping with multiple witness accounts of flying saucer sightings in Birmingham area. Witnesses include Charles F. Bradley (weather man), Mrs. James Rain, J.L. Kardon, C.M. Cadenhead, and others. Accounts describe round, shiny objects traveling in formation. 'Scores of People Report Seeing Mysterious Discs' headline. Mentions searchlight reflection theory being advanced.", "content_pt_br": "Recorte de jornal com múltiplos relatos de testemunhas de avistamentos de discos voadores na área de Birmingham. Testemunhas incluem Charles F. Bradley (meteorologista), Sra. James Rain, J.L. Kardon, C.M. Cadenhead e outros. Os relatos descrevem objetos redondos e brilhantes viajando em formação. Manchete: 'Dezenas de Pessoas Relatam Ter Visto Discos Misteriosos'. Menciona teoria de reflexo de holofote sendo avançada.", "bbox": {"x": 0.05, "y": 0.02, "w": 0.9, "h": 0.96}, "classification": None, "formatting": [], "cross_page_hint": "self_contained", "ocr_confidence": 0.78, "image_type": "newspaper_clipping", "ufo_anomaly_detected": True, "ufo_anomaly_type": "multiple_witness_sighting", "ufo_anomaly_rationale": "Dozens of Birmingham residents report seeing mysterious discs in formation.", "image_description_en": "Newspaper clipping with dozens of witness accounts of flying saucer sightings in Birmingham, Alabama, 1947.", "image_description_pt_br": "Recorte de jornal com dezenas de relatos de testemunhas de avistamentos de discos voadores em Birmingham, Alabama, 1947."},
]),
# Page 12: p-011.png - More Birmingham witnesses, searchlight theory
(12, "p-011.png", [
{"order_in_page": 1, "type": "image", "content_en": "Newspaper clipping continuing Birmingham flying saucer accounts. Mentions Searchlight Reflection Theory being advanced. Multiple witnesses describe lights appearing and disappearing rapidly. References to E.H. Vaughn III (anti-aircraft gunner in Europe), S.S. Lovejoy (Tennessee Coal Iron and Railroad), and Jimmy Dewberry. 'Searchlight Reflection Theory Is Advanced' subheadline.", "content_pt_br": "Recorte de jornal continuando relatos de discos voadores em Birmingham. Menciona teoria de reflexo de holofote sendo avançada. Múltiplas testemunhas descrevem luzes aparecendo e desaparecendo rapidamente. Referências a E.H. Vaughn III (artilheiro anti-aéreo na Europa), S.S. Lovejoy e Jimmy Dewberry. Sub-manchete: 'Teoria de Reflexo de Holofote é Avançada'.", "bbox": {"x": 0.05, "y": 0.02, "w": 0.9, "h": 0.96}, "classification": None, "formatting": [], "cross_page_hint": "self_contained", "ocr_confidence": 0.78, "image_type": "newspaper_clipping", "ufo_anomaly_detected": True, "ufo_anomaly_type": "multiple_witness_sighting", "ufo_anomaly_rationale": "Continued witness accounts of flying saucer sightings in Birmingham.", "image_description_en": "Newspaper clipping with more Birmingham flying saucer witness accounts and analysis of the searchlight reflection theory.", "image_description_pt_br": "Recorte de jornal com mais relatos de testemunhas de discos voadores em Birmingham e análise da teoria de reflexo de holofote."},
]),
# Page 13: p-012.png - RESTRICTED statement of Sgt. Ira L. Livingston
(13, "p-012.png", [
{"order_in_page": 1, "type": "classification_banner", "content_en": "RESTRICTED", "content_pt_br": "RESTRITO", "bbox": {"x": 0.25, "y": 0.02, "w": 0.5, "h": 0.03}, "classification": "RESTRICTED", "formatting": ["bold", "all_caps"], "cross_page_hint": "self_contained", "ocr_confidence": 0.95},
{"order_in_page": 2, "type": "header", "content_en": "S-T-A-T-E-M-E-N-T", "content_pt_br": "D-E-C-L-A-R-A-Ç-Ã-O", "bbox": {"x": 0.35, "y": 0.06, "w": 0.3, "h": 0.025}, "classification": None, "formatting": ["all_caps"], "cross_page_hint": "self_contained", "ocr_confidence": 0.9},
{"order_in_page": 3, "type": "body_paragraph", "content_en": "I, Staff Sergeant Ira L. Livingston, MA 14 153 972, Air Corps, have approximately 250 hours flying time as pilot and Armorer Gunner have the following statement to make concerning the appearance of \"Flying Discs\" in the vicinity of Birmingham, Alabama.\n\nAt 2045 hours, 6 July 1947, while I was eating supper at my residence at 1354 Meadow Lane, Green Acres, Birmingham, Alabama, my next door neighbor, Mr. Herman M. Rockwell, called for me to come to the front door that there were some \"Flying Discs\" outside. Immediately I went out the front door to observe the objects. The objects appeared to the West of Birmingham traveling in a South Western direction. They appeared to be approximately 2000 feet above the horizon at a 45 degree angle from where I was standing at an undetermined distance away. The objects appeared to be approximately two (2) feet in diameter, round in shape, producing a dim glow of light and traveling at an estimated speed of five (5) to six (6) hundred miles per hour. The objects or object appeared to be traveling in a definite are rather than straight and as soon as one was out of sight another would appear behind it, but not always in the same path. The view of where it came from was obstructed by a nearby house; and when it reached the altitude of approximately 2000feet, it started off in the same direction as the others. I did not at any time see any more than one at the time and even though there could have been only one, my personal belief is that there were seven (7) to ten (10). The Discs were silent and appeared to be composed of a single light.", "content_pt_br": "Eu, Sargento Técnico Ira L. Livingston, MA 14 153 972, Corpo Aéreo, tenho aproximadamente 250 horas de tempo de voo como piloto e Artilheiro Armador e faço a seguinte declaração sobre o aparecimento de \"Discos Voadores\" nas proximidades de Birmingham, Alabama.\n\nÀs 20h45, em 6 de julho de 1947, enquanto eu jantava em minha residência na 1354 Meadow Lane, Green Acres, Birmingham, Alabama, meu vizinho próximo, Sr. Herman M. Rockwell, me chamou para vir à porta da frente pois havia alguns \"Discos Voadores\" lá fora. Imediatamente saí pela porta da frente para observar os objetos. Os objetos pareciam estar a Oeste de Birmingham viajando em direção sudoeste. Pareciam estar aproximadamente 2000 pés acima do horizonte em um ângulo de 45 graus de onde eu estava. Os objetos pareciam ter aproximadamente dois (2) pés de diâmetro, forma redonda, produzindo um brilho fraco de luz e viajando a uma velocidade estimada de quinhentas (500) a seiscentas (600) milhas por hora. Os objetos eram silenciosos e pareciam ser compostos de uma única luz.", "bbox": {"x": 0.07, "y": 0.09, "w": 0.87, "h": 0.72}, "classification": None, "formatting": [], "cross_page_hint": "self_contained", "ocr_confidence": 0.87, "ufo_anomaly_detected": True, "ufo_anomaly_type": "disc_sighting_silent", "ufo_anomaly_rationale": "Military sergeant reports silent disc-shaped objects traveling 500-600 mph over Birmingham, Alabama."},
{"order_in_page": 4, "type": "signature_block", "content_en": "Ira L. Livingston\nIra L. Livingston\nStaff Sergeant, MA 14 153 972\n\nSubscribed and sworn to before me this 7th day of July 1947.\n\nJames L. MacFarlane\n1st Lt, AC\nAsst. AAI", "content_pt_br": "Ira L. Livingston\nIra L. Livingston\nSargento Técnico, MA 14 153 972\n\nAssinado e jurado perante mim neste 7º dia de julho de 1947.\n\nJames L. MacFarlane\n1º Ten., Corpo Aéreo\nAss. AAI", "bbox": {"x": 0.07, "y": 0.82, "w": 0.87, "h": 0.1}, "classification": None, "formatting": ["handwritten"], "cross_page_hint": "self_contained", "ocr_confidence": 0.82},
{"order_in_page": 5, "type": "classification_banner", "content_en": "RESTRICTED", "content_pt_br": "RESTRITO", "bbox": {"x": 0.25, "y": 0.95, "w": 0.5, "h": 0.04}, "classification": "RESTRICTED", "formatting": ["bold", "all_caps"], "cross_page_hint": "self_contained", "ocr_confidence": 0.95},
]),
]
# Continue with remaining pages
PAGE_DATA_2 = [
# Page 14: p-013.png - CONFIDENTIAL Newfoundland Base Command letter of transmittal
(14, "p-013.png", [
{"order_in_page": 1, "type": "classification_banner", "content_en": "CONFIDENTIAL", "content_pt_br": "CONFIDENTIAL", "bbox": {"x": 0.25, "y": 0.02, "w": 0.5, "h": 0.04}, "classification": "CONFIDENTIAL", "formatting": ["bold", "all_caps"], "cross_page_hint": "self_contained", "ocr_confidence": 0.9},
{"order_in_page": 2, "type": "stamp", "content_en": "RESTRICTED", "content_pt_br": "RESTRITO", "bbox": {"x": 0.03, "y": 0.08, "w": 0.2, "h": 0.04}, "classification": "RESTRICTED", "formatting": ["bold"], "cross_page_hint": "self_contained", "ocr_confidence": 0.9},
{"order_in_page": 3, "type": "letterhead", "content_en": "HEADQUARTERS, NEWFOUNDLAND BASE COMMAND\nATLANTIC DIVISION, AIR TRANSPORT COMMAND\nFORT PEPPERRELL, NEWFOUNDLAND\nAPO 862, S POSTMASTER, NEW YORK, N.Y.", "content_pt_br": "QUARTEL GENERAL, COMANDO DA BASE DA TERRA NOVA\nDIVISÃO DO ATLÂNTICO, COMANDO DE TRANSPORTE AÉREO\nFORT PEPPERRELL, TERRA NOVA\nAPO 862, S POSTMASTER, NOVA YORK, N.Y.", "bbox": {"x": 0.2, "y": 0.06, "w": 0.6, "h": 0.1}, "classification": None, "formatting": ["bold", "all_caps"], "cross_page_hint": "self_contained", "ocr_confidence": 0.9},
{"order_in_page": 4, "type": "header", "content_en": "AMT-T-101\n3133\n/oth\nClassification: Restricted\n[date stamp] 6 Aug 47\n[Rank]\n[Date]", "content_pt_br": "AMT-T-101\n3133\n/oth\nClassificação: Restrito\n[carimbo de data] 6 Ago 47", "bbox": {"x": 0.55, "y": 0.06, "w": 0.4, "h": 0.1}, "classification": None, "formatting": [], "cross_page_hint": "self_contained", "ocr_confidence": 0.75},
{"order_in_page": 5, "type": "date_line", "content_en": "30 July 1947", "content_pt_br": "30 de julho de 1947", "bbox": {"x": 0.65, "y": 0.17, "w": 0.3, "h": 0.025}, "classification": None, "formatting": [], "cross_page_hint": "self_contained", "ocr_confidence": 0.93},
{"order_in_page": 6, "type": "subject_line", "content_en": "SUBJECT: Letter of Transmittal.\n\nTO: Commanding General,\nAtlantic Division, ATC,\nFort Totten, Long Island, N.Y.\n(ATTENTION: AC/S, Intelligence)", "content_pt_br": "ASSUNTO: Carta de Transmissão.\n\nPARA: Comandante Geral,\nDivisão do Atlântico, ATC,\nFort Totten, Long Island, N.Y.\n(ATENÇÃO: AC/S, Inteligência)", "bbox": {"x": 0.07, "y": 0.20, "w": 0.87, "h": 0.1}, "classification": None, "formatting": [], "cross_page_hint": "self_contained", "ocr_confidence": 0.9},
{"order_in_page": 7, "type": "body_paragraph", "content_en": "Reference Letter of Transmittal, this office, dated 28 July 1947, with four (4) inclosures (Inclosures 1, 2 and 3, Final Reports of Sightings of \"flying saucers\"; and Inclosure 4, Signed Statement - Constable KEARSEY), transmitted herewith is Final Report of Sighting of \"flying saucers\" in Newfoundland, which occurred at Harmon Field, Stephenville, at 03452, 23 July 1947.\n\nFOR THE COMMANDING GENERAL:\n\nMARION C. MILLER\nCaptain, Air Corps\nAC/S, Intelligence.\n\n1 Incl:\nFinal Rpt of Sighting, 23 Jul 47\n\n1st Ind.\nHQ, ATLANTIC DIVISION, ATC, FORT TOTTEN, L.I., NEW YORK 6 Aug 47\nTO: Commanding General, Air Transport Command, Washington 25, D.C.\nATTN: Chief of Staff\n\nForwarded in accordance with instructions outlined in TGL CS-95, your Headquarters.\n\nJAMES H. HEMPSTER, JR.\nLt Col, GSC\nAC/S, Intelligence", "content_pt_br": "Referência Carta de Transmissão, este escritório, datada de 28 de julho de 1947, com quatro (4) anexos (Anexos 1, 2 e 3, Relatórios Finais de Avistamentos de \"discos voadores\"; e Anexo 4, Declaração Assinada - Policial KEARSEY), transmitido neste é o Relatório Final de Avistamento de \"discos voadores\" em Newfoundland, que ocorreu em Harmon Field, Stephenville, em 03452, 23 de julho de 1947.", "bbox": {"x": 0.07, "y": 0.30, "w": 0.87, "h": 0.55}, "classification": None, "formatting": [], "cross_page_hint": "self_contained", "ocr_confidence": 0.87, "ufo_anomaly_detected": True, "ufo_anomaly_type": "saucer_sighting_newfoundland", "ufo_anomaly_rationale": "Official military letter transmitting flying saucer sighting reports from Newfoundland."},
{"order_in_page": 8, "type": "classification_banner", "content_en": "RESTRICTED", "content_pt_br": "RESTRITO", "bbox": {"x": 0.25, "y": 0.93, "w": 0.5, "h": 0.03}, "classification": "RESTRICTED", "formatting": ["bold", "all_caps"], "cross_page_hint": "self_contained", "ocr_confidence": 0.95},
]),
# Page 15: p-014.png - CONFIDENTIAL Final Report of Sighting, Harmon Field Newfoundland
(15, "p-014.png", [
{"order_in_page": 1, "type": "classification_banner", "content_en": "CONFIDENTIAL", "content_pt_br": "CONFIDENTIAL", "bbox": {"x": 0.25, "y": 0.02, "w": 0.5, "h": 0.03}, "classification": "CONFIDENTIAL", "formatting": ["bold", "all_caps"], "cross_page_hint": "self_contained", "ocr_confidence": 0.9},
{"order_in_page": 2, "type": "stamp", "content_en": "RESTRICTED\nFINAL REPORT OF SIGHTING", "content_pt_br": "RESTRITO\nRELATÓRIO FINAL DE AVISTAMENTO", "bbox": {"x": 0.03, "y": 0.07, "w": 0.4, "h": 0.06}, "classification": "RESTRICTED", "formatting": ["bold"], "cross_page_hint": "self_contained", "ocr_confidence": 0.85},
{"order_in_page": 3, "type": "table_marker", "content_en": "1. Organization: 1388th AAF Base Unit, APO 862, c/o Postmaster, New York, N.T.\n2. Sighting: Strange intermittent flashes that may tie in with \"Flying Discs\".\n3. Place: Harmon Field, Stephenville, Newfoundland.\n4. Time: 03452, 23 July 1947.\n5. Altitude: Approximately 10,000 feet high.\n6. Weather: High scattered condition; visibility better than fifteen (15) miles.\n7. Heading: From South, heading NNE (approximately 30 degrees).\n8. Speed: High velocity; stated to be faster than a conventional airplane.\n9. Description: The observers saw a light which at first appeared to be a shooting star or airplane. It appeared again, and a number of intermittant flashes were seen for a period of approximately three (3) minutes. The flashes were reddish in color. Observers said it was not a falling star because it did not appear as such; nor was it an airplane, because manoeuvres were too abrupt and there was no noise of a motor.\n10. Reported by: Miss Patricia Abbott,(Newfoundland National) Government Employee and Lt. Hammakor, Navigator and Public Relations Officer.\n11. General: The informants (noted in Par.10) were walking when they noticed a peculiar reddish light.", "content_pt_br": "1. Organização: 1388ª Unidade de Base AAF, APO 862, c/o Postmaster, Nova York, N.T.\n2. Avistamento: Flashes intermitentes estranhos que podem estar ligados a \"Discos Voadores\".\n3. Local: Harmon Field, Stephenville, Newfoundland.\n4. Hora: 03452, 23 de julho de 1947.\n5. Altitude: Aproximadamente 10.000 pés de altura.\n6. Clima: Condição espalhada alta; visibilidade melhor que quinze (15) milhas.\n7. Rumo: Do Sul, rumando para NNE (aproximadamente 30 graus).\n8. Velocidade: Alta velocidade; dito ser mais rápido que um avião convencional.\n9. Descrição: Os observadores viram uma luz que a princípio parecia ser uma estrela cadente ou avião. Apareceu novamente, e vários flashes intermitentes foram vistos por um período de aproximadamente três (3) minutos. Os flashes eram avermelhados. Os observadores disseram que não era uma estrela cadente porque não apareceu como tal; nem era um avião, porque as manobras eram muito abruptas e não havia barulho de motor.\n10. Relatado por: Senhorita Patricia Abbott (funcionária do Governo Nacional da Newfoundland) e Ten. Hammakor, Navegador e Oficial de Relações Públicas.\n11. Geral: Os informantes notaram uma luz avermelhada peculiar.", "bbox": {"x": 0.07, "y": 0.13, "w": 0.87, "h": 0.72}, "classification": "CONFIDENTIAL", "formatting": [], "cross_page_hint": "self_contained", "ocr_confidence": 0.87, "ufo_anomaly_detected": True, "ufo_anomaly_type": "reddish_flashing_light", "ufo_anomaly_rationale": "Official sighting report of reddish intermittent lights making abrupt maneuvers at high speed over Newfoundland."},
{"order_in_page": 4, "type": "signature_block", "content_en": "WILLIAM H. SMITH\nCaptain, Air Corps,\nIntelligence Officer.", "content_pt_br": "WILLIAM H. SMITH\nCapitão, Corpo Aéreo,\nOficial de Inteligência.", "bbox": {"x": 0.5, "y": 0.86, "w": 0.4, "h": 0.06}, "classification": None, "formatting": [], "cross_page_hint": "self_contained", "ocr_confidence": 0.88},
{"order_in_page": 5, "type": "stamp", "content_en": "Level 1", "content_pt_br": "Nível 1", "bbox": {"x": 0.05, "y": 0.92, "w": 0.12, "h": 0.03}, "classification": None, "formatting": [], "cross_page_hint": "self_contained", "ocr_confidence": 0.8},
{"order_in_page": 6, "type": "classification_banner", "content_en": "CONFIDENTIAL", "content_pt_br": "CONFIDENTIAL", "bbox": {"x": 0.25, "y": 0.95, "w": 0.5, "h": 0.04}, "classification": "CONFIDENTIAL", "formatting": ["bold", "all_caps"], "cross_page_hint": "self_contained", "ocr_confidence": 0.9},
]),
]
# Combine all page data
ALL_PAGE_DATA = PAGE_DATA + PAGE_DATA_2
print(f"Defined {len(ALL_PAGE_DATA)} pages with detailed chunk data")
print("Writing chunk files...")
# We'll write as many as we can define here, then handle the rest programmatically
all_chunks = []
global_order = 0
for (page_idx, png_fn, chunks) in ALL_PAGE_DATA:
for chunk in chunks:
global_order += 1
chunk_id = f"c{global_order:04d}"
chunk["chunk_id"] = chunk_id
chunk["page"] = page_idx
chunk["order_global"] = global_order
chunk["source_png"] = f"../../processing/png/{DOC_ID}/{png_fn}"
chunk["png_filename"] = png_fn
# defaults
for key in ["ufo_anomaly_detected", "cryptid_anomaly_detected"]:
if key not in chunk:
chunk[key] = False
for key in ["ufo_anomaly_type", "ufo_anomaly_rationale", "cryptid_anomaly_type", "cryptid_anomaly_rationale",
"image_type", "image_description_en", "image_description_pt_br", "extracted_text",
"redaction_code", "redaction_inferred_content_type", "related_image", "related_table"]:
if key not in chunk:
chunk[key] = None
all_chunks.append(chunk)
# Set prev/next
for i, chunk in enumerate(all_chunks):
chunk["prev_chunk"] = all_chunks[i-1]["chunk_id"] if i > 0 else None
chunk["next_chunk"] = all_chunks[i+1]["chunk_id"] if i < len(all_chunks) - 1 else None
print(f"Prepared {len(all_chunks)} chunks from {len(ALL_PAGE_DATA)} pages")
# Save intermediate state for continuation
with open("/tmp/doc65_chunks_partial.json", "w", encoding="utf-8") as f:
json.dump({"chunks": all_chunks, "last_page": ALL_PAGE_DATA[-1][0]}, f, ensure_ascii=False, indent=2)
print("Saved partial state to /tmp/doc65_chunks_partial.json")
print(f"Last page processed: {ALL_PAGE_DATA[-1][0]}")