All files / scripts/lib/parseReadme normalize.js

93.33% Statements 28/30
63.63% Branches 14/22
100% Functions 2/2
92.3% Lines 24/26

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52                    15x 9x 9x 9x 9x   9x 9x 9x 9x 9x 9x                         54x 45x 45x 45x 45x 45x 45x 45x   45x 45x 45x 45x       10x  
/**
 * Strips markdown syntax, URLs, and emoji codes from a heading string and
 * truncates to maxLen. Removes Unicode surrogate pairs (emoji encoded as
 * two code units) which cause JSON serialisation issues in some runtimes.
 *
 * @param {string} t - Raw heading text
 * @param {number} [maxLen=120]
 * @returns {string|null}
 */
function normalizeTitle(t, maxLen = 120) {
  if (!t) return null;
  try {
    let s = String(t || '');
    s = s.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1');
    s = s.replace(/https?:\/\/\S+/g, '');
    // Surrogate pairs represent multi-codepoint emoji; strip them to avoid JSON issues
    s = s.replace(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g, '');
    s = s.replace(/:[a-z0-9_+-]+:/gi, '');
    s = s.replace(/[*_`>#~]/g, '');
    s = s.replace(/\s+/g, ' ').trim();
    if (s.length > maxLen) s = s.slice(0, maxLen).trim() + '…';
    return s || null;
  } catch (e) { return (t && String(t).slice(0, maxLen)) || null; }
}
 
/**
 * Strips code blocks, inline code, markdown links, HTML tags, and URLs from
 * a summary string and truncates to maxLen. Also removes Unicode surrogate pairs.
 *
 * @param {string} t - Raw summary text
 * @param {number} [maxLen=400]
 * @returns {string}
 */
function normalizeSummary(t, maxLen = 400) {
  if (!t) return '';
  try {
    let s = String(t || '');
    s = s.replace(/```[\s\S]*?```/g, '');
    s = s.replace(/`([^`]+)`/g, '$1');
    s = s.replace(/\[([^\]]+)\]\([^)]+\)/g, '$1');
    s = s.replace(/<[^>]+>/g, '');
    s = s.replace(/https?:\/\/\S+/g, '');
    // Surrogate pairs represent multi-codepoint emoji; strip them to avoid JSON issues
    s = s.replace(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g, '');
    s = s.replace(/\s+/g, ' ').trim();
    if (s.length > maxLen) s = s.slice(0, maxLen).trim() + '…';
    return s;
  } catch (e) { return (t && String(t).slice(0, maxLen)) || ''; }
}
 
module.exports = { normalizeTitle, normalizeSummary };