parseReadme techs.js

93.33% Statements 70/75
81.48% Branches 66/81
100% Functions 11/11
98.03% Lines 50/51
Press n or j to go to the next uncovered block, b, p or k for the previous block.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88 10x
 
 
21x
21x
21x
21x
21x
21x
21x
21x
 
 
 
13x
11x
21x
21x
7x
 
 
4x
 
 
 
7x
7x
10x
10x
9x
6x
12x
3x
3x
 
 
 
9x
3x
2x
2x
 
 
 
7x
 
 
 
4x
4x
6x
6x
6x
4x
10x
10x
8x
 
 
6x
2x
3x
1x
 
 
16x
 
 
 
 
 
 
 
 
 
 
 
13x
13x
13x
7x
7x
4x
 
 
 
10x
  const { flattenNodeText, extractTextFromListItem } = require('./helpers');
 
function normalize(raw) {
  Iif (!raw) return null;
  let token = String(raw).trim().replace(/^Also[:\s]+/i, '').trim();
  const p = token.indexOf('(');
  Iif (p !== -1) token = token.slice(0, p).trim();
  const strip = new Set(['-', ':', '(', ')', '[', ']', '"', "'", ',', '.', ';']);
  while (token.length && (token[0].trim() === '' || strip.has(token[0]))) token = token.slice(1);
  while (token.length && (token[token.length - 1].trim() === '' || strip.has(token[token.length - 1]))) token = token.slice(0, -1);
  return token.trim() || null;
}
 
function findTechSectionBounds(ast) {
  if (!ast || !Array.isArray(ast.children)) return null;
  for (let i = 0; i < ast.children.length; i++) {
    const el = ast.children[i];
    if (el.type === 'heading' && /technolog|tech|stack/i.test((el.children || []).map(c => c.value || '').join(''))) {
      return { startIdx: i + 1, depth: el.depth || 2 };
    }
  }
  return null;
}
 
function collectBoldTokens(ast, startIdx, depth) {
  const techs = [];
  for (let j = startIdx; j < ast.children.length; j++) {
    const nn = ast.children[j];
    if (nn && nn.type === 'heading' && typeof nn.depth === 'number' && nn.depth <= depth) break;
    if (nn.type === 'list' && nn.children) {
      for (const li of nn.children) {
        for (const m of Array.from(String(extractTextFromListItem(li)).matchAll(/\*\*([^*]+?)\*\*/g))) {
          const token = normalize((m && m[1]) ? String(m[1]).trim() : '');
          Eif (token) techs.push(token);
        }
      }
    }
    if (nn.type === 'paragraph') {
      for (const m of Array.from(String(flattenNodeText(nn).trim()).matchAll(/\*\*([^*]+?)\*\*/g))) {
        const token = normalize((m && m[1]) ? String(m[1]).trim() : '');
        Eif (token) techs.push(token);
      }
    }
  }
  return techs;
}
 
function collectLegacyTokens(ast, startIdx, depth) {
  const legacy = [];
  for (let j = startIdx; j < ast.children.length; j++) {
    const nn = ast.children[j];
    Iif (nn && nn.type === 'heading' && typeof nn.depth === 'number' && nn.depth <= depth) break;
    if (nn.type === 'list' && nn.children) {
      for (const li of nn.children) {
        const txt = extractTextFromListItem(li);
        if (txt && txt.includes(',')) txt.split(',').map(s => s.trim()).filter(Boolean).forEach(x => legacy.push(x));
        else Eif (txt) legacy.push(txt.trim());
      }
    }
    if (nn.type === 'paragraph') {
      const p = flattenNodeText(nn).trim();
      if (p && p.includes(',')) p.split(',').map(s => s.trim()).filter(Boolean).forEach(x => legacy.push(x));
      else Eif (p && p.length > 0 && !/^(<|!|#)/.test(p)) legacy.push(p.replace(/^Also[:\s]+/i, '').trim());
    }
  }
  return legacy.filter(Boolean).map(s => normalize(s)).filter(Boolean);
}
 
/**
 * Extracts the technology list from the "Technologies" / "Tech Stack" section of a README AST.
 * Prefers `**bold**` tokens (structured entries) over the legacy comma-separated list format,
 * because bold tokens are more reliably scoped to a single technology name.
 *
 * @param {object} ast - Parsed README AST
 * @returns {string[]} Normalized technology names, or an empty array if no section is found
 */
function extractTechnologiesFromAst(ast) {
  try {
    const bounds = findTechSectionBounds(ast);
    if (!bounds) return [];
    const bold = collectBoldTokens(ast, bounds.startIdx, bounds.depth);
    if (bold.length > 0) return bold.filter(Boolean);
    return collectLegacyTokens(ast, bounds.startIdx, bounds.depth);
  } catch (e) { return []; }
}
 
module.exports = { extractTechnologiesFromAst };