/** * Measurement matcher for recipe markdown text. * * Finds and parses measurement strings including weights, volumes, * temperatures, dimensions, and times. Handles tricky patterns like: * - Dual units: "200°C (400°F)", "227g (8 oz)" * - Ranges: "28 to 32 minutes", "180-240g" * - Fractions: "1/2 cup", "1 1/2 teaspoon" * - Dimensions: "9x13", "8 x 8 inch", "8x6x2 inch" * - Approximate: "~250g", "(~220 °C)" * - Bare temp units: "170C", "100-110C" */ // ─── Amount Building Blocks ─────────────────────────────── const NUM = '\\d+(?:\\.\\d+)?'; const FRAC = '\\d+\\s*/\\s*\\d+'; const MIXED = '\\d+\\s+\\d+\\s*/\\s*\\d+'; const UNICODE_FRAC = '[\u00BC-\u00BE\u2150-\u215E]'; const MIXED_UNI = `\\d+\\s*${UNICODE_FRAC}`; const APPROX_PREFIX = '~\\s*'; const SINGLE_AMOUNT = `(?:${APPROX_PREFIX})?(?:${MIXED}|${FRAC}|${MIXED_UNI}|${UNICODE_FRAC}|${NUM})`; const RANGE_SEP = '\\s*(?:-|to)\\s*'; const AMOUNT = `(?:${SINGLE_AMOUNT}${RANGE_SEP}${SINGLE_AMOUNT}|${SINGLE_AMOUNT})`; // ─── Unit Patterns ──────────────────────────────────────── const TEMP_UNIT_DEG = '°\\s*[FfCc]'; const TEMP_UNIT_BARE = '[FfCc]'; const WEIGHT_UNIT = '(?:kg|g|oz|lbs?|ounces?|pounds?)'; const VOLUME_UNIT = '(?:cups?|tablespoons?|table\\s+spoons?|tbsp|teaspoons?|tsp|ml|mL|L|liters?|litres?|quarts?|gallons?|pints?|fl\\.?\\s*oz|fluid\\s+ounces?|parts\\s+by\\s+(?:volume|weight))'; const TIME_UNIT = '(?:minutes?|mins?|hours?|hrs?|days?|seconds?|secs?)'; const DIM_UNIT = '(?:inch(?:es)?|in\\.?|cm|mm)'; // ─── Amount Parsing ─────────────────────────────────────── /** * Parse a single numeric amount string (not a range). * Handles integers, decimals, fractions, mixed numbers, and approximate markers. * * @param {string} str — e.g. "200", "1.5", "1/2", "1 1/2", "~250" * @returns {{ value: number, approximate: boolean }} */ const UNICODE_FRAC_MAP = { '\u00BC': 0.25, // ¼ '\u00BD': 0.5, // ½ '\u00BE': 0.75, // ¾ '\u2150': 1/7, // ⅐ '\u2151': 1/9, // ⅑ '\u2152': 1/10, // ⅒ '\u2153': 1/3, // ⅓ '\u2154': 2/3, // ⅔ '\u2155': 0.2, // ⅕ '\u2156': 0.4, // ⅖ '\u2157': 0.6, // ⅗ '\u2158': 0.8, // ⅘ '\u2159': 1/6, // ⅙ '\u215A': 5/6, // ⅚ '\u215B': 0.125, // ⅛ '\u215C': 0.375, // ⅜ '\u215D': 0.625, // ⅝ '\u215E': 0.875, // ⅞ }; function parseSingleAmount(str) { str = str.trim(); const approximate = str.startsWith('~'); if (approximate) { str = str.replace(/^~\s*/, ''); } // Unicode mixed number: "1½", "1 ½" const uniMixedMatch = str.match(/^(\d+)\s*([\u00BC-\u00BE\u2150-\u215E])$/); if (uniMixedMatch) { const whole = parseInt(uniMixedMatch[1], 10); const frac = UNICODE_FRAC_MAP[uniMixedMatch[2]] || 0; return { value: whole + frac, approximate }; } // Standalone Unicode fraction: "½", "¾" const uniFracMatch = str.match(/^([\u00BC-\u00BE\u2150-\u215E])$/); if (uniFracMatch) { return { value: UNICODE_FRAC_MAP[uniFracMatch[1]] || 0, approximate }; } // Mixed number: "1 1/2" const mixedMatch = str.match(/^(\d+)\s+(\d+)\s*\/\s*(\d+)$/); if (mixedMatch) { return { value: parseInt(mixedMatch[1], 10) + parseInt(mixedMatch[2], 10) / parseInt(mixedMatch[3], 10), approximate, }; } // Fraction: "1/2", "3/4" const fracMatch = str.match(/^(\d+)\s*\/\s*(\d+)$/); if (fracMatch) { return { value: parseInt(fracMatch[1], 10) / parseInt(fracMatch[2], 10), approximate, }; } // Plain number return { value: parseFloat(str), approximate, }; } /** * Parse an amount string that may be a single value or a range. * * @param {string} str — e.g. "200", "180-240", "28 to 32", "1 1/2" * @returns { * { value: number, approximate: boolean } | * { min: { value: number, approximate: boolean }, max: { value: number, approximate: boolean } } * } */ function parseAmount(str) { str = str.trim(); // Try range with "to" first (word boundary matters to avoid "1 1/2 to 2" mis-parse) const rangeToMatch = str.match( new RegExp(`^(${SINGLE_AMOUNT})\\s+to\\s+(${SINGLE_AMOUNT})$`) ); if (rangeToMatch) { return { min: parseSingleAmount(rangeToMatch[1]), max: parseSingleAmount(rangeToMatch[2]), }; } // Try range with dash, but only if it doesn't look like a negative or // a fraction. Need to be careful: "180-240" is a range, "1/2" is not. // Strategy: split on dash that is surrounded by digits (not inside fraction). const rangeDashMatch = str.match( new RegExp(`^(${SINGLE_AMOUNT})-(${SINGLE_AMOUNT})$`) ); if (rangeDashMatch) { return { min: parseSingleAmount(rangeDashMatch[1]), max: parseSingleAmount(rangeDashMatch[2]), }; } return parseSingleAmount(str); } // ─── Unit Normalization ─────────────────────────────────── /** Normalize a unit string for consistent comparison. */ function normalizeUnit(unit) { if (!unit) return null; let u = unit.trim().toLowerCase().replace(/\s+/g, ' ').replace(/\.$/, ''); // Temperature if (/^°\s*f$/.test(u)) return '°F'; if (/^°\s*c$/.test(u)) return '°C'; if (u === 'f') return '°F'; if (u === 'c') return '°C'; // Weight if (u === 'g') return 'g'; if (u === 'kg') return 'kg'; if (u === 'oz') return 'oz'; if (u === 'lb' || u === 'lbs') return 'lb'; if (u === 'ounce' || u === 'ounces') return 'oz'; if (u === 'pound' || u === 'pounds') return 'lb'; // Volume if (u === 'cup' || u === 'cups') return 'cup'; if (/^table\s*spoons?$/.test(u)) return 'tablespoon'; if (u === 'tbsp') return 'tablespoon'; if (/^tea\s*spoons?$/.test(u)) return 'teaspoon'; if (u === 'tsp') return 'teaspoon'; if (u === 'ml') return 'ml'; if (u === 'l') return 'L'; if (/^liters?$/.test(u) || /^litres?$/.test(u)) return 'L'; if (/^quarts?$/.test(u)) return 'quart'; if (/^gallons?$/.test(u)) return 'gallon'; if (/^pints?$/.test(u)) return 'pint'; if (/^fl\.?\s*oz$/.test(u)) return 'fl oz'; if (/^fluid\s+ounces?$/.test(u)) return 'fl oz'; if (/^parts\s+by\s+volume$/.test(u)) return 'parts by volume'; if (/^parts\s+by\s+weight$/.test(u)) return 'parts by weight'; // Time if (/^minutes?$/.test(u) || /^mins?$/.test(u)) return 'minute'; if (/^hours?$/.test(u) || /^hrs?$/.test(u)) return 'hour'; if (/^days?$/.test(u)) return 'day'; if (/^seconds?$/.test(u) || /^secs?$/.test(u)) return 'second'; // Dimension units if (/^inch(es)?$/.test(u) || u === 'in') return 'inch'; if (u === 'cm') return 'cm'; if (u === 'mm') return 'mm'; if (/^f(oo|ee)t$/.test(u) || u === 'ft') return 'ft'; return u; } /** Determine measurement type from a normalized unit. */ function unitType(normalizedUnit) { if (!normalizedUnit) return null; if (['°F', '°C'].includes(normalizedUnit)) return 'temperature'; if (['g', 'kg', 'oz', 'lb'].includes(normalizedUnit)) return 'weight'; if (['cup', 'tablespoon', 'teaspoon', 'ml', 'L', 'quart', 'gallon', 'pint', 'fl oz', 'parts by volume', 'parts by weight'].includes(normalizedUnit)) return 'volume'; if (['minute', 'hour', 'day', 'second'].includes(normalizedUnit)) return 'time'; if (['inch', 'cm', 'mm', 'ft'].includes(normalizedUnit)) return 'dimension'; return null; } // ─── Matchers ───────────────────────────────────────────── /** * @typedef {Object} Measurement * @property {string} match — full matched string from source * @property {number} index — start position in source text * @property {string} type — "temperature"|"weight"|"volume"|"time"|"dimension" * @property {number|number[]|{min:object,max:object}} amount — parsed amount * @property {string} unit — normalized unit * @property {boolean} approximate — had ~ prefix * @property {object|null} alt — alternative measurement in parentheses */ /** * Find temperature measurements in text. * * Handles: 350°F, 200°C (400°F), 165-175 °F, 170C, 100-110C, * 32°c (90°f), ~220 °C */ function findTemperatures(text) { const results = []; // Pattern with degree symbol: AMOUNT °F/C (optional alt) const degRe = new RegExp( `(${AMOUNT})\\s*(${TEMP_UNIT_DEG})` + `(?:\\s*\\(\\s*(${AMOUNT})\\s*(${TEMP_UNIT_DEG}|${TEMP_UNIT_BARE})\\s*\\))?`, 'gi' ); let m; while ((m = degRe.exec(text)) !== null) { const amount = parseAmount(m[1]); const unit = normalizeUnit(m[2]); let alt = null; if (m[3] && m[4]) { alt = { amount: parseAmount(m[3]), unit: normalizeUnit(m[4]), }; } results.push({ match: m[0], index: m.index, type: 'temperature', amount, unit, approximate: typeof amount.approximate === 'boolean' ? amount.approximate : false, alt, }); } // Pattern with bare C/F (no degree symbol): number directly followed by C or F // Only match if not already captured by degree pattern above const bareRe = new RegExp( `(${AMOUNT})(${TEMP_UNIT_BARE})(?=\\s|\\)|$|,|\\/)` + `(?:\\s*\\(\\s*(${AMOUNT})\\s*(${TEMP_UNIT_DEG}|${TEMP_UNIT_BARE})\\s*\\))?`, 'gi' ); while ((m = bareRe.exec(text)) !== null) { // Skip if this position was already matched by the degree pattern const alreadyMatched = results.some( r => m.index >= r.index && m.index < r.index + r.match.length ); if (alreadyMatched) continue; // Only match bare C/F if the character directly before the letter is a digit const beforeUnit = m[0].match(new RegExp(`(${AMOUNT})[FfCc]`)); if (!beforeUnit) continue; const amount = parseAmount(m[1]); const unit = normalizeUnit(m[2]); let alt = null; if (m[3] && m[4]) { alt = { amount: parseAmount(m[3]), unit: normalizeUnit(m[4]), }; } results.push({ match: m[0], index: m.index, type: 'temperature', amount, unit, approximate: typeof amount.approximate === 'boolean' ? amount.approximate : false, alt, }); } return results; } /** * Find dimension measurements in text. * * Handles: 9x13, 9 x 13, 8x6x2, 9 x 13 inch, 8x6x2 inch, 5-8mm (as dimension when mm) */ function findDimensions(text) { const results = []; // NxN or NxNxN with optional unit const dimRe = new RegExp( `(${NUM})\\s*x\\s*(${NUM})(?:\\s*x\\s*(${NUM}))?(?:\\s+(${DIM_UNIT}))?`, 'gi' ); let m; while ((m = dimRe.exec(text)) !== null) { const dims = [parseFloat(m[1]), parseFloat(m[2])]; if (m[3]) dims.push(parseFloat(m[3])); const rawUnit = m[4] || null; const unit = normalizeUnit(rawUnit); results.push({ match: m[0], index: m.index, type: 'dimension', amount: dims, unit, approximate: false, alt: null, }); } // Standalone AMOUNT + dimension unit (e.g. "1 inch", "0.5-1cm") const standaloneDimRe = new RegExp( `(${AMOUNT})\\s*(${DIM_UNIT})\\b`, 'gi' ); while ((m = standaloneDimRe.exec(text)) !== null) { // Skip if overlapping with an NxN match already found const alreadyMatched = results.some( r => m.index >= r.index && m.index < r.index + r.match.length ); if (alreadyMatched) continue; const amount = parseAmount(m[1]); const unit = normalizeUnit(m[2]); results.push({ match: m[0], index: m.index, type: 'dimension', amount, unit, approximate: typeof amount.approximate === 'boolean' ? amount.approximate : false, alt: null, }); } return results; } /** * Find weight measurements in text. * * Handles: 200g, 550 g, ~250g, 180-240g, 1kg, 227g (8 oz) */ function findWeights(text) { const results = []; const weightRe = new RegExp( `(${AMOUNT})\\s*(${WEIGHT_UNIT})\\b` + `(?:\\s*\\(\\s*(${AMOUNT})\\s*(${WEIGHT_UNIT}|${VOLUME_UNIT})\\s*\\))?`, 'gi' ); let m; while ((m = weightRe.exec(text)) !== null) { // Avoid matching dimension patterns (e.g., the "g" in "9x13 glass") // Check if this match overlaps with any dimension const amount = parseAmount(m[1]); const unit = normalizeUnit(m[2]); let alt = null; if (m[3] && m[4]) { alt = { amount: parseAmount(m[3]), unit: normalizeUnit(m[4]), }; } results.push({ match: m[0], index: m.index, type: 'weight', amount, unit, approximate: typeof amount.approximate === 'boolean' ? amount.approximate : false, alt, }); } return results; } /** * Find volume measurements in text. * * Handles: 2 quarts, 1/2 cups, 1 cup, 6 tablespoons, 6 table spoons, * 1 1/2 tablespoon, 3/4 teaspoon, 6 parts by volume */ function findVolumes(text) { const results = []; const volumeRe = new RegExp( `(${AMOUNT})\\s*(${VOLUME_UNIT})\\b`, 'gi' ); let m; while ((m = volumeRe.exec(text)) !== null) { const amount = parseAmount(m[1]); const unit = normalizeUnit(m[2]); results.push({ match: m[0], index: m.index, type: 'volume', amount, unit, approximate: typeof amount.approximate === 'boolean' ? amount.approximate : false, alt: null, }); } return results; } /** * Find time measurements in text. * * Handles: 10 minutes, 28 to 32 minutes, 8 to 10 minutes, an hour, * five days, for hour */ function findTimes(text) { const results = []; const timeRe = new RegExp( `(${AMOUNT})\\s+(${TIME_UNIT})\\b`, 'gi' ); let m; while ((m = timeRe.exec(text)) !== null) { const amount = parseAmount(m[1]); const unit = normalizeUnit(m[2]); results.push({ match: m[0], index: m.index, type: 'time', amount, unit, approximate: typeof amount.approximate === 'boolean' ? amount.approximate : false, alt: null, }); } return results; } // ─── Count Matcher ──────────────────────────────────────── /** * Find bare numeric counts in text (no unit attached). * These represent ingredient quantities like "eggs 3" or "biscuits 20-24". * Overlap with unit-bearing matches is resolved by deduplication (longer wins). */ function findCounts(text) { const results = []; const countRe = new RegExp( `(${AMOUNT})(?=\\s*$|\\s*,|\\s*\\)|\\s*\\]|\\s*;|\\s+[^\\dxX~])`, 'g' ); let m; while ((m = countRe.exec(text)) !== null) { const amount = parseAmount(m[1]); results.push({ match: m[1], index: m.index, type: 'count', amount, unit: null, approximate: typeof amount.approximate === 'boolean' ? amount.approximate : false, alt: null, }); } return results; } // ─── Main Matcher ───────────────────────────────────────── /** * Find all measurement strings in the given text. * Returns an array of Measurement objects sorted by position, * with overlapping matches resolved (longer match wins). * * @param {string} text — markdown or plain text to scan * @returns {Measurement[]} */ function findAllMeasurements(text) { const all = [ ...findTemperatures(text), ...findDimensions(text), ...findWeights(text), ...findVolumes(text), ...findTimes(text), ...findCounts(text), ]; // Sort by position all.sort((a, b) => a.index - b.index); // Remove overlapping matches: if two matches overlap, keep the longer one. // If same length, prefer the one that appeared first in the type-specific // matcher (temperatures > dimensions > weights > volumes > times). const deduped = []; for (const measurement of all) { const end = measurement.index + measurement.match.length; const overlapping = deduped.findIndex(existing => { const existingEnd = existing.index + existing.match.length; return measurement.index < existingEnd && end > existing.index; }); if (overlapping === -1) { deduped.push(measurement); } else { // Keep the longer match const existing = deduped[overlapping]; if (measurement.match.length > existing.match.length) { deduped[overlapping] = measurement; } } } return deduped; } // ─── Exports ────────────────────────────────────────────── module.exports = { // Main API findAllMeasurements, // Individual matchers (exported for testing) findTemperatures, findDimensions, findWeights, findVolumes, findTimes, findCounts, // Parsing utilities (exported for testing) parseAmount, parseSingleAmount, normalizeUnit, unitType, };