All files / scripts/lib/docs docsHeuristics.js

93.75% Statements 60/64
79% Branches 79/100
100% Functions 8/8
97.95% Lines 48/49

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100                      4x 4x 4x 3x 3x 2x 2x 1x 1x       2x                       3x 3x 2x 2x 2x 2x 2x     2x                           9x 9x 30x 9x 9x 7x 3x 1x 1x 2x 2x 2x     7x 1x 1x   7x 3x   3x 3x 3x 3x 3x 3x 3x 2x 2x 1x 3x           9x     1x  
#!/usr/bin/env node
 
/**
 * Scans raw README text for a markdown link whose label contains "doc" and
 * attaches the first match as docsLink/docsTitle. Only runs when the node has
 * no docsLink yet, so structured extraction results are never overwritten.
 *
 * @param {object} node - Repository node; mutated in place
 * @returns {object} The same node
 */
function backfillDocsFromText(node) {
  try {
    Iif (!node) return node;
    if (!node.docsLink || !node.docsTitle) {
      const txt = node.object && node.object.text;
      if (txt) {
        const m = txt.match(/\[([^\]]*doc[^\]]*)\]\((https?:\/\/[^)\s]+)\)/i);
        if (m) { node.docsTitle = m[1].trim(); node.docsLink = m[2].trim(); return node; }
        const dline = txt.match(/(?:^|\n)\s*documentation[:\s]+(https?:\/\/[^\s]+)/i);
        Eif (dline) { node.docsTitle = 'Documentation'; node.docsLink = dline[1].trim(); return node; }
      }
    }
  } catch (e) { /* ignore */ }
  return node;
}
 
/**
 * Falls back to a README heading that mentions "doc", "api", or "architecture"
 * when no structured docs link was found. Uses the heading text as docsTitle and
 * points docsLink to the README itself.
 *
 * @param {object} node - Repository node with optional `_ast`
 * @returns {object} The same node
 */
function backfillFromAstHeading(node) {
  try {
    if (!node || !node._ast || !Array.isArray(node._ast.children)) return node;
    const heading = node._ast.children.find(c => c && c.type === 'heading' && /doc|api|architectur/i.test((c.children||[]).map(ch=>ch.value||'').join('')));
    Eif (heading) {
      const titleText = (heading.children||[]).map(ch=>ch.value||'').join(' ').trim() || 'Documentation';
      node.docsTitle = node.docsTitle || titleText;
      node.docsLink = node.docsLink || `https://github.com/keglev/${node.name}/blob/main/README.md`;
    }
  } catch (e) { /* ignore */ }
  return node;
}
 
/**
 * Replaces a docsLink that points to a GitHub issue or PR with a better candidate
 * from repoDocs or a doc-like link in the README text.
 * Issue URLs end up as docsLink when READMEs contain "open an issue" links
 * near their documentation sections.
 *
 * @param {object} node - Repository node; mutated in place
 * @param {boolean} [DEBUG_FETCH] - When true, logs heuristic failures
 * @returns {object} The same node
 */
function postProcessDocsLinkCandidates(node, DEBUG_FETCH) {
  try {
    const txt = (node.object && node.object.text) || '';
    const looksLikeIssue = (u) => !!(u && /github\.com\/.+\/(issues|pulls?)\b/i.test(u));
    const isDocsCandidate = (u) => !!(u && /(?:\/docs\b|\bdocs\/|redoc|openapi|swagger|\/api\/|\.md\b|api\b|documentation)/i.test(u));
    if (node.docsLink && looksLikeIssue(node.docsLink)) {
      if (node.repoDocs) {
        if (node.repoDocs.apiDocumentation && node.repoDocs.apiDocumentation.link && !looksLikeIssue(node.repoDocs.apiDocumentation.link)) {
          node.docsTitle = node.repoDocs.apiDocumentation.title || node.docsTitle;
          node.docsLink = node.repoDocs.apiDocumentation.link;
        } else Eif (node.repoDocs.architectureOverview && node.repoDocs.architectureOverview.link && !looksLikeIssue(node.repoDocs.architectureOverview.link)) {
          node.docsTitle = node.repoDocs.architectureOverview.title || node.docsTitle;
          node.docsLink = node.repoDocs.architectureOverview.link;
        }
      }
      if ((!node.docsLink || looksLikeIssue(node.docsLink)) && node.docs && node.docs.documentation && node.docs.documentation.link && !looksLikeIssue(node.docs.documentation.link)) {
        node.docsTitle = node.docs.documentation.title || node.docsTitle;
        node.docsLink = node.docs.documentation.link;
      }
      if ((!node.docsLink || looksLikeIssue(node.docsLink)) && txt) {
        const linkRe = /\[([^\]]+)\]\((https?:\/\/[^)]+|\/.+?|\.\/[^)]+)\)/ig;
        let lm;
        while ((lm = linkRe.exec(txt)) !== null) {
          const label = lm[1] || '';
          const href = lm[2] || '';
          Iif (looksLikeIssue(href)) continue;
          Eif (isDocsCandidate(href) || isDocsCandidate(label)) {
            node.docsTitle = node.docsTitle || label.trim();
            if (/^\//.test(href) || /^\.\/?/.test(href)) {
              const cleaned = href.replace(/^\.\//,'').replace(/^\//,'');
              node.docsLink = `https://raw.githubusercontent.com/keglev/${node.name}/main/${cleaned}`;
            } else node.docsLink = href;
            break;
          }
        }
      }
    }
  } catch (e) { if (DEBUG_FETCH) console.log('post-docsLink heuristics failed', e && e.message); }
  return node;
}
 
module.exports = { backfillDocsFromText, backfillFromAstHeading, postProcessDocsLinkCandidates };