124 lines
4.2 KiB
TypeScript
124 lines
4.2 KiB
TypeScript
|
|
/**
|
||
|
|
* Extract a short summary from a wiki/documents/<id>.md body.
|
||
|
|
*
|
||
|
|
* Priority:
|
||
|
|
* 1. `enthusiast_pitch_pt_br` / `enthusiast_pitch_en` in frontmatter (Johnny Harris-style
|
||
|
|
* generated by scripts/34-generate-doc-pitches.py) — preferred when present
|
||
|
|
* 2. `## Sumário Executivo (PT-BR)` (5 docs have this, synthesized by Sonnet 4.6)
|
||
|
|
* 3. `## Executive Summary (EN)` (same 5 docs)
|
||
|
|
* 4. First substantial paragraph in the body (skipping headings, blockquotes, callouts)
|
||
|
|
*
|
||
|
|
* Strips markdown formatting (asterisks, backticks, wiki-links) and returns
|
||
|
|
* a plain-text snippet capped at ~280 chars (≈3 lines of card width).
|
||
|
|
*
|
||
|
|
* pickPitch(): returns the Johnny Harris-style pitch directly (preserves markdown
|
||
|
|
* for rich rendering in cards/wikis).
|
||
|
|
*/
|
||
|
|
const MAX_CHARS = 280;
|
||
|
|
|
||
|
|
function stripMd(s: string): string {
|
||
|
|
return s
|
||
|
|
// wiki-links → display text
|
||
|
|
.replace(/\[\[([^\]|]+?)(?:\|([^\]]+))?\]\]/g, (_full, target: string, alias?: string) =>
|
||
|
|
(alias ?? target).trim(),
|
||
|
|
)
|
||
|
|
// markdown links [text](url) → text
|
||
|
|
.replace(/\[([^\]]+?)\]\([^)]*?\)/g, "$1")
|
||
|
|
// emphasis & code
|
||
|
|
.replace(/[*_`~]+/g, "")
|
||
|
|
// heading hash
|
||
|
|
.replace(/^#+\s*/gm, "")
|
||
|
|
// bullets
|
||
|
|
.replace(/^\s*[-*+]\s+/gm, "")
|
||
|
|
// ordered list markers
|
||
|
|
.replace(/^\s*\d+\.\s+/gm, "")
|
||
|
|
// blockquote markers
|
||
|
|
.replace(/^>\s*/gm, "")
|
||
|
|
// collapse whitespace
|
||
|
|
.replace(/\s+/g, " ")
|
||
|
|
.trim();
|
||
|
|
}
|
||
|
|
|
||
|
|
function findSection(body: string, headingRe: RegExp): string | null {
|
||
|
|
const lines = body.split("\n");
|
||
|
|
let inSection = false;
|
||
|
|
const captured: string[] = [];
|
||
|
|
for (const line of lines) {
|
||
|
|
if (headingRe.test(line)) {
|
||
|
|
inSection = true;
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
if (inSection) {
|
||
|
|
// next ## or ### heading ends this section
|
||
|
|
if (/^#{1,3}\s/.test(line)) break;
|
||
|
|
captured.push(line);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
const text = stripMd(captured.join("\n"));
|
||
|
|
return text.length >= 40 ? text : null;
|
||
|
|
}
|
||
|
|
|
||
|
|
function firstParagraph(body: string): string {
|
||
|
|
// Skip leading H1, callouts (>), bare headings; pick first paragraph ≥ 80 chars
|
||
|
|
const lines = body.split("\n");
|
||
|
|
const buffer: string[] = [];
|
||
|
|
for (const line of lines) {
|
||
|
|
const t = line.trim();
|
||
|
|
if (!t) {
|
||
|
|
if (buffer.length > 0) {
|
||
|
|
const text = stripMd(buffer.join(" "));
|
||
|
|
if (text.length >= 80) return text;
|
||
|
|
buffer.length = 0;
|
||
|
|
}
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
if (/^#{1,6}\s/.test(t)) continue;
|
||
|
|
if (t.startsWith(">")) continue;
|
||
|
|
if (t.startsWith("|")) continue; // skip tables
|
||
|
|
if (/^[-*+]\s/.test(t)) continue; // skip bullet starts
|
||
|
|
buffer.push(t);
|
||
|
|
}
|
||
|
|
// trailing buffer
|
||
|
|
if (buffer.length > 0) {
|
||
|
|
const text = stripMd(buffer.join(" "));
|
||
|
|
if (text.length >= 40) return text;
|
||
|
|
}
|
||
|
|
return "";
|
||
|
|
}
|
||
|
|
|
||
|
|
/** Return the Johnny Harris pitch from frontmatter if present, else null. */
|
||
|
|
export function pickPitch(
|
||
|
|
fm: Record<string, unknown> | undefined,
|
||
|
|
lang: "pt" | "en" = "pt",
|
||
|
|
): string | null {
|
||
|
|
if (!fm) return null;
|
||
|
|
const key = lang === "en" ? "enthusiast_pitch_en" : "enthusiast_pitch_pt_br";
|
||
|
|
const v = fm[key];
|
||
|
|
if (typeof v === "string" && v.trim().length > 20) return v.trim();
|
||
|
|
// Cross-fall: prefer ANY pitch (pt or en) over heuristic summary
|
||
|
|
const other = lang === "en" ? "enthusiast_pitch_pt_br" : "enthusiast_pitch_en";
|
||
|
|
const v2 = fm[other];
|
||
|
|
if (typeof v2 === "string" && v2.trim().length > 20) return v2.trim();
|
||
|
|
return null;
|
||
|
|
}
|
||
|
|
|
||
|
|
export function summarize(body: string, lang: "pt" | "en" = "pt"): string {
|
||
|
|
if (!body) return "";
|
||
|
|
const ptSection = findSection(body, /^##\s+Sum[áa]rio Executivo\s*\(PT-BR\)/i);
|
||
|
|
const enSection = findSection(body, /^##\s+Executive Summary\s*\(EN\)/i);
|
||
|
|
const narrativeSection = findSection(body, /^##\s+Narrative Arc\s*\(EN\)/i);
|
||
|
|
|
||
|
|
let chosen: string;
|
||
|
|
if (lang === "pt") {
|
||
|
|
chosen = ptSection ?? enSection ?? narrativeSection ?? firstParagraph(body);
|
||
|
|
} else {
|
||
|
|
chosen = enSection ?? narrativeSection ?? ptSection ?? firstParagraph(body);
|
||
|
|
}
|
||
|
|
if (!chosen) return "";
|
||
|
|
if (chosen.length <= MAX_CHARS) return chosen;
|
||
|
|
// truncate at last word boundary before MAX_CHARS
|
||
|
|
const cut = chosen.slice(0, MAX_CHARS);
|
||
|
|
const lastSpace = cut.lastIndexOf(" ");
|
||
|
|
return (lastSpace > 200 ? cut.slice(0, lastSpace) : cut) + "…";
|
||
|
|
}
|