/** * Extract a short summary from a wiki/documents/.md body. * * Priority: * 1. `enthusiast_pitch_pt_br` / `enthusiast_pitch_en` in frontmatter (Johnny Harris-style * generated by scripts/34-generate-doc-pitches.py) — preferred when present * 2. `## Sumário Executivo (PT-BR)` (5 docs have this, synthesized by Sonnet 4.6) * 3. `## Executive Summary (EN)` (same 5 docs) * 4. First substantial paragraph in the body (skipping headings, blockquotes, callouts) * * Strips markdown formatting (asterisks, backticks, wiki-links) and returns * a plain-text snippet capped at ~280 chars (≈3 lines of card width). * * pickPitch(): returns the Johnny Harris-style pitch directly (preserves markdown * for rich rendering in cards/wikis). */ const MAX_CHARS = 280; function stripMd(s: string): string { return s // wiki-links → display text .replace(/\[\[([^\]|]+?)(?:\|([^\]]+))?\]\]/g, (_full, target: string, alias?: string) => (alias ?? target).trim(), ) // markdown links [text](url) → text .replace(/\[([^\]]+?)\]\([^)]*?\)/g, "$1") // emphasis & code .replace(/[*_`~]+/g, "") // heading hash .replace(/^#+\s*/gm, "") // bullets .replace(/^\s*[-*+]\s+/gm, "") // ordered list markers .replace(/^\s*\d+\.\s+/gm, "") // blockquote markers .replace(/^>\s*/gm, "") // collapse whitespace .replace(/\s+/g, " ") .trim(); } function findSection(body: string, headingRe: RegExp): string | null { const lines = body.split("\n"); let inSection = false; const captured: string[] = []; for (const line of lines) { if (headingRe.test(line)) { inSection = true; continue; } if (inSection) { // next ## or ### heading ends this section if (/^#{1,3}\s/.test(line)) break; captured.push(line); } } const text = stripMd(captured.join("\n")); return text.length >= 40 ? text : null; } function firstParagraph(body: string): string { // Skip leading H1, callouts (>), bare headings; pick first paragraph ≥ 80 chars const lines = body.split("\n"); const buffer: string[] = []; for (const line of lines) { const t = line.trim(); if (!t) { if (buffer.length > 0) { const text = stripMd(buffer.join(" ")); if (text.length >= 80) return text; buffer.length = 0; } continue; } if (/^#{1,6}\s/.test(t)) continue; if (t.startsWith(">")) continue; if (t.startsWith("|")) continue; // skip tables if (/^[-*+]\s/.test(t)) continue; // skip bullet starts buffer.push(t); } // trailing buffer if (buffer.length > 0) { const text = stripMd(buffer.join(" ")); if (text.length >= 40) return text; } return ""; } /** Return the Johnny Harris pitch from frontmatter if present, else null. */ export function pickPitch( fm: Record | undefined, lang: "pt" | "en" = "pt", ): string | null { if (!fm) return null; const key = lang === "en" ? "enthusiast_pitch_en" : "enthusiast_pitch_pt_br"; const v = fm[key]; if (typeof v === "string" && v.trim().length > 20) return v.trim(); // Cross-fall: prefer ANY pitch (pt or en) over heuristic summary const other = lang === "en" ? "enthusiast_pitch_pt_br" : "enthusiast_pitch_en"; const v2 = fm[other]; if (typeof v2 === "string" && v2.trim().length > 20) return v2.trim(); return null; } export function summarize(body: string, lang: "pt" | "en" = "pt"): string { if (!body) return ""; const ptSection = findSection(body, /^##\s+Sum[áa]rio Executivo\s*\(PT-BR\)/i); const enSection = findSection(body, /^##\s+Executive Summary\s*\(EN\)/i); const narrativeSection = findSection(body, /^##\s+Narrative Arc\s*\(EN\)/i); let chosen: string; if (lang === "pt") { chosen = ptSection ?? enSection ?? narrativeSection ?? firstParagraph(body); } else { chosen = enSection ?? narrativeSection ?? ptSection ?? firstParagraph(body); } if (!chosen) return ""; if (chosen.length <= MAX_CHARS) return chosen; // truncate at last word boundary before MAX_CHARS const cut = chosen.slice(0, MAX_CHARS); const lastSpace = cut.lastIndexOf(" "); return (lastSpace > 200 ? cut.slice(0, lastSpace) : cut) + "…"; }