volpe/scripts/find-missing-plurals.js

73 lines
2.4 KiB
JavaScript

#!/usr/bin/env node
/**
* Discovery script: reads all recipe .md files, finds count measurements,
* extracts the preceding word(s), and reports items not in the pluralization dict.
*
* Usage: node scripts/find-missing-plurals.js
*/
const fs = require('fs');
const path = require('path');
const { findAllMeasurements } = require('../lib/measurements/matcher');
const { matchPlural } = require('../lib/measurements/plurals');
const recipesDir = path.join(__dirname, '..', 'recipes');
if (!fs.existsSync(recipesDir)) {
console.error('recipes/ directory not found');
process.exit(1);
}
const files = fs.readdirSync(recipesDir).filter(f => f.endsWith('.md'));
const missing = new Map(); // noun -> [{ file, line }]
for (const file of files) {
const content = fs.readFileSync(path.join(recipesDir, file), 'utf8');
const lines = content.split('\n');
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
const measurements = findAllMeasurements(line);
for (const m of measurements) {
if (m.type !== 'count') continue;
const textBefore = line.slice(0, m.index);
const result = matchPlural(textBefore);
if (!result) {
// Extract the last 1-3 words before the count as a candidate noun
const trimmed = textBefore.trimEnd();
const words = trimmed.split(/\s+/).filter(Boolean);
if (words.length === 0) continue;
// Skip checkbox markers and list markers
const filtered = words.filter(w => !/^[-*\[\]x]$/.test(w));
if (filtered.length === 0) continue;
const candidate = filtered.slice(-2).join(' ').toLowerCase();
if (!candidate || /^\d/.test(candidate)) continue;
if (!missing.has(candidate)) {
missing.set(candidate, []);
}
missing.get(candidate).push({ file, line: i + 1 });
}
}
}
}
if (missing.size === 0) {
console.log('All count nouns are covered by the pluralization dictionary.');
} else {
console.log('Count measurements with unrecognized preceding nouns:\n');
const sorted = [...missing.entries()].sort((a, b) => b[1].length - a[1].length);
for (const [noun, locations] of sorted) {
console.log(` "${noun}" (${locations.length} occurrence${locations.length > 1 ? 's' : ''})`);
for (const loc of locations) {
console.log(` - ${loc.file}:${loc.line}`);
}
}
console.log(`\nTotal: ${missing.size} unrecognized noun(s)`);
}