688 lines
No EOL
20 KiB
JavaScript
688 lines
No EOL
20 KiB
JavaScript
/**
|
|
* Measurement matcher for recipe markdown text.
|
|
*
|
|
* Finds and parses measurement strings including weights, volumes,
|
|
* temperatures, dimensions, and times. Handles tricky patterns like:
|
|
* - Dual units: "200°C (400°F)", "227g (8 oz)"
|
|
* - Ranges: "28 to 32 minutes", "180-240g"
|
|
* - Fractions: "1/2 cup", "1 1/2 teaspoon"
|
|
* - Dimensions: "9x13", "8 x 8 inch", "8x6x2 inch"
|
|
* - Approximate: "~250g", "(~220 °C)"
|
|
* - Bare temp units: "170C", "100-110C"
|
|
*/
|
|
|
|
// ─── Amount Building Blocks ───────────────────────────────
|
|
|
|
const NUM = '\\d+(?:\\.\\d+)?';
|
|
const FRAC = '\\d+\\s*/\\s*\\d+';
|
|
const MIXED = '\\d+\\s+\\d+\\s*/\\s*\\d+';
|
|
const UNICODE_FRAC = '[\u00BC-\u00BE\u2150-\u215E]';
|
|
const MIXED_UNI = `\\d+\\s*${UNICODE_FRAC}`;
|
|
const APPROX_PREFIX = '~\\s*';
|
|
const SINGLE_AMOUNT = `(?:${APPROX_PREFIX})?(?:${MIXED}|${FRAC}|${MIXED_UNI}|${UNICODE_FRAC}|${NUM})`;
|
|
const RANGE_SEP = '\\s*(?:-|to)\\s*';
|
|
const AMOUNT = `(?:${SINGLE_AMOUNT}${RANGE_SEP}${SINGLE_AMOUNT}|${SINGLE_AMOUNT})`;
|
|
|
|
// ─── Unit Patterns ────────────────────────────────────────
|
|
|
|
const TEMP_UNIT_DEG = '°\\s*[FfCc]';
|
|
const TEMP_UNIT_BARE = '[FfCc]';
|
|
const WEIGHT_UNIT = '(?:kg|g|oz|lbs?|ounces?|pounds?)';
|
|
const VOLUME_UNIT = '(?:cups?|tablespoons?|table\\s+spoons?|tbsp|teaspoons?|tsp|ml|mL|L|liters?|litres?|quarts?|gallons?|pints?|fl\\.?\\s*oz|fluid\\s+ounces?|parts\\s+by\\s+(?:volume|weight))';
|
|
const TIME_UNIT = '(?:minutes?|mins?|hours?|hrs?|days?|weeks?|months?|years?|seconds?|secs?)';
|
|
const DIM_UNIT = '(?:inch(?:es)?|in\\.?|cm|mm)';
|
|
const PRESSURE_UNIT = '(?:PSI|psi|kPa|bar)';
|
|
const PH_UNIT = '(?:pH)';
|
|
|
|
// ─── Amount Parsing ───────────────────────────────────────
|
|
|
|
/**
|
|
* Parse a single numeric amount string (not a range).
|
|
* Handles integers, decimals, fractions, mixed numbers, and approximate markers.
|
|
*
|
|
* @param {string} str — e.g. "200", "1.5", "1/2", "1 1/2", "~250"
|
|
* @returns {{ value: number, approximate: boolean }}
|
|
*/
|
|
const UNICODE_FRAC_MAP = {
|
|
'\u00BC': 0.25, // ¼
|
|
'\u00BD': 0.5, // ½
|
|
'\u00BE': 0.75, // ¾
|
|
'\u2150': 1/7, // ⅐
|
|
'\u2151': 1/9, // ⅑
|
|
'\u2152': 1/10, // ⅒
|
|
'\u2153': 1/3, // ⅓
|
|
'\u2154': 2/3, // ⅔
|
|
'\u2155': 0.2, // ⅕
|
|
'\u2156': 0.4, // ⅖
|
|
'\u2157': 0.6, // ⅗
|
|
'\u2158': 0.8, // ⅘
|
|
'\u2159': 1/6, // ⅙
|
|
'\u215A': 5/6, // ⅚
|
|
'\u215B': 0.125, // ⅛
|
|
'\u215C': 0.375, // ⅜
|
|
'\u215D': 0.625, // ⅝
|
|
'\u215E': 0.875, // ⅞
|
|
};
|
|
|
|
function parseSingleAmount(str) {
|
|
str = str.trim();
|
|
const approximate = str.startsWith('~');
|
|
if (approximate) {
|
|
str = str.replace(/^~\s*/, '');
|
|
}
|
|
|
|
// Unicode mixed number: "1½", "1 ½"
|
|
const uniMixedMatch = str.match(/^(\d+)\s*([\u00BC-\u00BE\u2150-\u215E])$/);
|
|
if (uniMixedMatch) {
|
|
const whole = parseInt(uniMixedMatch[1], 10);
|
|
const frac = UNICODE_FRAC_MAP[uniMixedMatch[2]] || 0;
|
|
return { value: whole + frac, approximate };
|
|
}
|
|
|
|
// Standalone Unicode fraction: "½", "¾"
|
|
const uniFracMatch = str.match(/^([\u00BC-\u00BE\u2150-\u215E])$/);
|
|
if (uniFracMatch) {
|
|
return { value: UNICODE_FRAC_MAP[uniFracMatch[1]] || 0, approximate };
|
|
}
|
|
|
|
// Mixed number: "1 1/2"
|
|
const mixedMatch = str.match(/^(\d+)\s+(\d+)\s*\/\s*(\d+)$/);
|
|
if (mixedMatch) {
|
|
return {
|
|
value: parseInt(mixedMatch[1], 10) + parseInt(mixedMatch[2], 10) / parseInt(mixedMatch[3], 10),
|
|
approximate,
|
|
};
|
|
}
|
|
|
|
// Fraction: "1/2", "3/4"
|
|
const fracMatch = str.match(/^(\d+)\s*\/\s*(\d+)$/);
|
|
if (fracMatch) {
|
|
return {
|
|
value: parseInt(fracMatch[1], 10) / parseInt(fracMatch[2], 10),
|
|
approximate,
|
|
};
|
|
}
|
|
|
|
// Plain number
|
|
return {
|
|
value: parseFloat(str),
|
|
approximate,
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Parse an amount string that may be a single value or a range.
|
|
*
|
|
* @param {string} str — e.g. "200", "180-240", "28 to 32", "1 1/2"
|
|
* @returns {
|
|
* { value: number, approximate: boolean } |
|
|
* { min: { value: number, approximate: boolean }, max: { value: number, approximate: boolean } }
|
|
* }
|
|
*/
|
|
function parseAmount(str) {
|
|
str = str.trim();
|
|
|
|
// Try range with "to" first (word boundary matters to avoid "1 1/2 to 2" mis-parse)
|
|
const rangeToMatch = str.match(
|
|
new RegExp(`^(${SINGLE_AMOUNT})\\s+to\\s+(${SINGLE_AMOUNT})$`)
|
|
);
|
|
if (rangeToMatch) {
|
|
return {
|
|
min: parseSingleAmount(rangeToMatch[1]),
|
|
max: parseSingleAmount(rangeToMatch[2]),
|
|
};
|
|
}
|
|
|
|
// Try range with dash, but only if it doesn't look like a negative or
|
|
// a fraction. Need to be careful: "180-240" is a range, "1/2" is not.
|
|
// Strategy: split on dash that is surrounded by digits (not inside fraction).
|
|
const rangeDashMatch = str.match(
|
|
new RegExp(`^(${SINGLE_AMOUNT})-(${SINGLE_AMOUNT})$`)
|
|
);
|
|
if (rangeDashMatch) {
|
|
return {
|
|
min: parseSingleAmount(rangeDashMatch[1]),
|
|
max: parseSingleAmount(rangeDashMatch[2]),
|
|
};
|
|
}
|
|
|
|
return parseSingleAmount(str);
|
|
}
|
|
|
|
// ─── Unit Normalization ───────────────────────────────────
|
|
|
|
/** Normalize a unit string for consistent comparison. */
|
|
function normalizeUnit(unit) {
|
|
if (!unit) return null;
|
|
let u = unit.trim().toLowerCase().replace(/\s+/g, ' ').replace(/\.$/, '');
|
|
|
|
// Temperature
|
|
if (/^°\s*f$/.test(u)) return '°F';
|
|
if (/^°\s*c$/.test(u)) return '°C';
|
|
if (u === 'f') return '°F';
|
|
if (u === 'c') return '°C';
|
|
|
|
// Weight
|
|
if (u === 'g') return 'g';
|
|
if (u === 'kg') return 'kg';
|
|
if (u === 'oz') return 'oz';
|
|
if (u === 'lb' || u === 'lbs') return 'lb';
|
|
if (u === 'ounce' || u === 'ounces') return 'oz';
|
|
if (u === 'pound' || u === 'pounds') return 'lb';
|
|
|
|
// Volume
|
|
if (u === 'cup' || u === 'cups') return 'cup';
|
|
if (/^table\s*spoons?$/.test(u)) return 'tablespoon';
|
|
if (u === 'tbsp') return 'tablespoon';
|
|
if (/^tea\s*spoons?$/.test(u)) return 'teaspoon';
|
|
if (u === 'tsp') return 'teaspoon';
|
|
if (u === 'ml') return 'ml';
|
|
if (u === 'l') return 'L';
|
|
if (/^liters?$/.test(u) || /^litres?$/.test(u)) return 'L';
|
|
if (/^quarts?$/.test(u)) return 'quart';
|
|
if (/^gallons?$/.test(u)) return 'gallon';
|
|
if (/^pints?$/.test(u)) return 'pint';
|
|
if (/^fl\.?\s*oz$/.test(u)) return 'fl oz';
|
|
if (/^fluid\s+ounces?$/.test(u)) return 'fl oz';
|
|
if (/^parts\s+by\s+volume$/.test(u)) return 'parts by volume';
|
|
if (/^parts\s+by\s+weight$/.test(u)) return 'parts by weight';
|
|
|
|
// Time
|
|
if (/^minutes?$/.test(u) || /^mins?$/.test(u)) return 'minute';
|
|
if (/^hours?$/.test(u) || /^hrs?$/.test(u)) return 'hour';
|
|
if (/^days?$/.test(u)) return 'day';
|
|
if (/^weeks?$/.test(u)) return 'week';
|
|
if (/^months?$/.test(u)) return 'month';
|
|
if (/^seconds?$/.test(u) || /^secs?$/.test(u)) return 'second';
|
|
if (/^years?$/.test(u)) return 'year';
|
|
|
|
// Dimension units
|
|
if (/^inch(es)?$/.test(u) || u === 'in') return 'inch';
|
|
if (u === 'cm') return 'cm';
|
|
if (u === 'mm') return 'mm';
|
|
if (/^f(oo|ee)t$/.test(u) || u === 'ft') return 'ft';
|
|
|
|
// Pressure
|
|
if (u === 'psi') return 'psi';
|
|
if (u === 'kpa') return 'kPa';
|
|
if (u === 'bar') return 'bar';
|
|
|
|
// pH (normalizeUnit lowercases input, so 'pH' becomes 'ph')
|
|
if (u === 'ph') return 'pH';
|
|
|
|
return u;
|
|
}
|
|
|
|
/** Determine measurement type from a normalized unit. */
|
|
function unitType(normalizedUnit) {
|
|
if (!normalizedUnit) return null;
|
|
if (['°F', '°C'].includes(normalizedUnit)) return 'temperature';
|
|
if (['g', 'kg', 'oz', 'lb'].includes(normalizedUnit)) return 'weight';
|
|
if (['cup', 'tablespoon', 'teaspoon', 'ml', 'L', 'quart', 'gallon', 'pint', 'fl oz', 'parts by volume', 'parts by weight'].includes(normalizedUnit)) return 'volume';
|
|
if (['minute', 'hour', 'day', 'week', 'month', 'year', 'second'].includes(normalizedUnit)) return 'time';
|
|
if (['inch', 'cm', 'mm', 'ft'].includes(normalizedUnit)) return 'dimension';
|
|
if (['psi', 'kPa', 'bar'].includes(normalizedUnit)) return 'pressure';
|
|
if (normalizedUnit === 'pH') return 'pH';
|
|
return null;
|
|
}
|
|
|
|
// ─── Matchers ─────────────────────────────────────────────
|
|
|
|
/**
|
|
* @typedef {Object} Measurement
|
|
* @property {string} match — full matched string from source
|
|
* @property {number} index — start position in source text
|
|
* @property {string} type — "temperature"|"weight"|"volume"|"time"|"dimension"
|
|
* @property {number|number[]|{min:object,max:object}} amount — parsed amount
|
|
* @property {string} unit — normalized unit
|
|
* @property {boolean} approximate — had ~ prefix
|
|
* @property {object|null} alt — alternative measurement in parentheses
|
|
*/
|
|
|
|
/**
|
|
* Find temperature measurements in text.
|
|
*
|
|
* Handles: 350°F, 200°C (400°F), 165-175 °F, 170C, 100-110C,
|
|
* 32°c (90°f), ~220 °C
|
|
*/
|
|
function findTemperatures(text) {
|
|
const results = [];
|
|
|
|
// Pattern with degree symbol: AMOUNT °F/C (optional alt)
|
|
const degRe = new RegExp(
|
|
`(${AMOUNT})\\s*(${TEMP_UNIT_DEG})` +
|
|
`(?:\\s*\\(\\s*(${AMOUNT})\\s*(${TEMP_UNIT_DEG}|${TEMP_UNIT_BARE})\\s*\\))?`,
|
|
'gi'
|
|
);
|
|
|
|
let m;
|
|
while ((m = degRe.exec(text)) !== null) {
|
|
const amount = parseAmount(m[1]);
|
|
const unit = normalizeUnit(m[2]);
|
|
let alt = null;
|
|
if (m[3] && m[4]) {
|
|
alt = {
|
|
amount: parseAmount(m[3]),
|
|
unit: normalizeUnit(m[4]),
|
|
};
|
|
}
|
|
results.push({
|
|
match: m[0],
|
|
index: m.index,
|
|
type: 'temperature',
|
|
amount,
|
|
unit,
|
|
approximate: typeof amount.approximate === 'boolean' ? amount.approximate : false,
|
|
alt,
|
|
});
|
|
}
|
|
|
|
// Pattern with bare C/F (no degree symbol): number directly followed by C or F
|
|
// Only match if not already captured by degree pattern above
|
|
const bareRe = new RegExp(
|
|
`(${AMOUNT})(${TEMP_UNIT_BARE})(?=\\s|\\)|$|,|\\/)` +
|
|
`(?:\\s*\\(\\s*(${AMOUNT})\\s*(${TEMP_UNIT_DEG}|${TEMP_UNIT_BARE})\\s*\\))?`,
|
|
'gi'
|
|
);
|
|
|
|
while ((m = bareRe.exec(text)) !== null) {
|
|
// Skip if this position was already matched by the degree pattern
|
|
const alreadyMatched = results.some(
|
|
r => m.index >= r.index && m.index < r.index + r.match.length
|
|
);
|
|
if (alreadyMatched) continue;
|
|
|
|
// Only match bare C/F if the character directly before the letter is a digit
|
|
const beforeUnit = m[0].match(new RegExp(`(${AMOUNT})[FfCc]`));
|
|
if (!beforeUnit) continue;
|
|
|
|
const amount = parseAmount(m[1]);
|
|
const unit = normalizeUnit(m[2]);
|
|
let alt = null;
|
|
if (m[3] && m[4]) {
|
|
alt = {
|
|
amount: parseAmount(m[3]),
|
|
unit: normalizeUnit(m[4]),
|
|
};
|
|
}
|
|
results.push({
|
|
match: m[0],
|
|
index: m.index,
|
|
type: 'temperature',
|
|
amount,
|
|
unit,
|
|
approximate: typeof amount.approximate === 'boolean' ? amount.approximate : false,
|
|
alt,
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
/**
|
|
* Find dimension measurements in text.
|
|
*
|
|
* Handles: 9x13, 9 x 13, 8x6x2, 9 x 13 inch, 8x6x2 inch, 5-8mm (as dimension when mm)
|
|
*/
|
|
function findDimensions(text) {
|
|
const results = [];
|
|
|
|
// NxN or NxNxN with optional unit
|
|
const dimRe = new RegExp(
|
|
`(${NUM})\\s*x\\s*(${NUM})(?:\\s*x\\s*(${NUM}))?(?:\\s+(${DIM_UNIT}))?`,
|
|
'gi'
|
|
);
|
|
|
|
let m;
|
|
while ((m = dimRe.exec(text)) !== null) {
|
|
const dims = [parseFloat(m[1]), parseFloat(m[2])];
|
|
if (m[3]) dims.push(parseFloat(m[3]));
|
|
const rawUnit = m[4] || null;
|
|
const unit = normalizeUnit(rawUnit);
|
|
|
|
results.push({
|
|
match: m[0],
|
|
index: m.index,
|
|
type: 'dimension',
|
|
amount: dims,
|
|
unit,
|
|
approximate: false,
|
|
alt: null,
|
|
});
|
|
}
|
|
|
|
// Standalone AMOUNT + dimension unit (e.g. "1 inch", "0.5-1cm")
|
|
const standaloneDimRe = new RegExp(
|
|
`(${AMOUNT})\\s*(${DIM_UNIT})\\b`,
|
|
'gi'
|
|
);
|
|
|
|
while ((m = standaloneDimRe.exec(text)) !== null) {
|
|
// Skip if overlapping with an NxN match already found
|
|
const alreadyMatched = results.some(
|
|
r => m.index >= r.index && m.index < r.index + r.match.length
|
|
);
|
|
if (alreadyMatched) continue;
|
|
|
|
const amount = parseAmount(m[1]);
|
|
const unit = normalizeUnit(m[2]);
|
|
|
|
results.push({
|
|
match: m[0],
|
|
index: m.index,
|
|
type: 'dimension',
|
|
amount,
|
|
unit,
|
|
approximate: typeof amount.approximate === 'boolean' ? amount.approximate : false,
|
|
alt: null,
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
/**
|
|
* Find weight measurements in text.
|
|
*
|
|
* Handles: 200g, 550 g, ~250g, 180-240g, 1kg, 227g (8 oz),
|
|
* 80g (1/3 cups), 860g (800mL (3 1/3 cups))
|
|
*/
|
|
function findWeights(text) {
|
|
const results = [];
|
|
|
|
// Supports optional nested alternatives:
|
|
// 860g (800mL (3 1/3 cups)) → primary=860g, outer=800mL, inner=3 1/3 cups
|
|
// 80g (1/3 cups) → primary=80g, outer=1/3 cups
|
|
// 227g (8 oz) → primary=227g, outer=8 oz
|
|
const weightRe = new RegExp(
|
|
`(${AMOUNT})\\s*(${WEIGHT_UNIT})\\b` +
|
|
`(?:\\s*\\(\\s*(${AMOUNT})\\s*(${WEIGHT_UNIT}|${VOLUME_UNIT})\\b` +
|
|
`(?:\\s*\\(\\s*(${AMOUNT})\\s*(${WEIGHT_UNIT}|${VOLUME_UNIT})\\s*\\))?` +
|
|
`\\s*\\))?`,
|
|
'gi'
|
|
);
|
|
|
|
let m;
|
|
while ((m = weightRe.exec(text)) !== null) {
|
|
const amount = parseAmount(m[1]);
|
|
const unit = normalizeUnit(m[2]);
|
|
let alt = null;
|
|
let intermediate = null;
|
|
|
|
if (m[5] && m[6]) {
|
|
// Nested alt: e.g. 860g (800mL (3 1/3 cups))
|
|
// Inner alt is the display alternative, outer is the intermediate for scaling
|
|
alt = {
|
|
amount: parseAmount(m[5]),
|
|
unit: normalizeUnit(m[6]),
|
|
};
|
|
intermediate = {
|
|
amount: parseAmount(m[3]),
|
|
unit: normalizeUnit(m[4]),
|
|
};
|
|
} else if (m[3] && m[4]) {
|
|
// Simple alt: e.g. 227g (8 oz), 80g (1/3 cups)
|
|
alt = {
|
|
amount: parseAmount(m[3]),
|
|
unit: normalizeUnit(m[4]),
|
|
};
|
|
}
|
|
|
|
results.push({
|
|
match: m[0],
|
|
index: m.index,
|
|
type: 'weight',
|
|
amount,
|
|
unit,
|
|
approximate: typeof amount.approximate === 'boolean' ? amount.approximate : false,
|
|
alt,
|
|
intermediate,
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
/**
|
|
* Find volume measurements in text.
|
|
*
|
|
* Handles: 2 quarts, 1/2 cups, 1 cup, 6 tablespoons, 6 table spoons,
|
|
* 1 1/2 tablespoon, 3/4 teaspoon, 6 parts by volume,
|
|
* 800mL (3 1/3 cups)
|
|
*/
|
|
function findVolumes(text) {
|
|
const results = [];
|
|
|
|
const volumeRe = new RegExp(
|
|
`(${AMOUNT})\\s*(${VOLUME_UNIT})\\b` +
|
|
`(?:\\s*\\(\\s*(${AMOUNT})\\s*(${VOLUME_UNIT}|${WEIGHT_UNIT})\\s*\\))?`,
|
|
'gi'
|
|
);
|
|
|
|
let m;
|
|
while ((m = volumeRe.exec(text)) !== null) {
|
|
const amount = parseAmount(m[1]);
|
|
const unit = normalizeUnit(m[2]);
|
|
let alt = null;
|
|
if (m[3] && m[4]) {
|
|
alt = {
|
|
amount: parseAmount(m[3]),
|
|
unit: normalizeUnit(m[4]),
|
|
};
|
|
}
|
|
results.push({
|
|
match: m[0],
|
|
index: m.index,
|
|
type: 'volume',
|
|
amount,
|
|
unit,
|
|
approximate: typeof amount.approximate === 'boolean' ? amount.approximate : false,
|
|
alt,
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
/**
|
|
* Find time measurements in text.
|
|
*
|
|
* Handles: 10 minutes, 28 to 32 minutes, 8 to 10 minutes, an hour,
|
|
* five days, for hour
|
|
*/
|
|
function findTimes(text) {
|
|
const results = [];
|
|
|
|
const timeRe = new RegExp(
|
|
`(${AMOUNT})\\s+(${TIME_UNIT})\\b`,
|
|
'gi'
|
|
);
|
|
|
|
let m;
|
|
while ((m = timeRe.exec(text)) !== null) {
|
|
const amount = parseAmount(m[1]);
|
|
const unit = normalizeUnit(m[2]);
|
|
results.push({
|
|
match: m[0],
|
|
index: m.index,
|
|
type: 'time',
|
|
amount,
|
|
unit,
|
|
approximate: typeof amount.approximate === 'boolean' ? amount.approximate : false,
|
|
alt: null,
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
// ─── Count Matcher ────────────────────────────────────────
|
|
|
|
/**
|
|
* Find bare numeric counts in text (no unit attached).
|
|
* These represent ingredient quantities like "eggs 3" or "biscuits 20-24".
|
|
* Overlap with unit-bearing matches is resolved by deduplication (longer wins).
|
|
*/
|
|
function findCounts(text) {
|
|
const results = [];
|
|
|
|
const countRe = new RegExp(
|
|
`(${AMOUNT})(?=\\s*$|\\s*,|\\s*\\)|\\s*\\]|\\s*;|\\s+[^\\dxX~])`,
|
|
'g'
|
|
);
|
|
|
|
let m;
|
|
while ((m = countRe.exec(text)) !== null) {
|
|
const amount = parseAmount(m[1]);
|
|
results.push({
|
|
match: m[1],
|
|
index: m.index,
|
|
type: 'count',
|
|
amount,
|
|
unit: null,
|
|
approximate: typeof amount.approximate === 'boolean' ? amount.approximate : false,
|
|
alt: null,
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
// ─── Pressure Matcher ─────────────────────────────────────
|
|
|
|
/**
|
|
* Find pressure measurements in text.
|
|
*
|
|
* Handles: 11 PSI, 15 psi, 100 kPa
|
|
*/
|
|
function findPressures(text) {
|
|
const results = [];
|
|
|
|
const pressureRe = new RegExp(
|
|
`(${AMOUNT})\\s*(${PRESSURE_UNIT})\\b`,
|
|
'gi'
|
|
);
|
|
|
|
let m;
|
|
while ((m = pressureRe.exec(text)) !== null) {
|
|
const amount = parseAmount(m[1]);
|
|
const unit = normalizeUnit(m[2]);
|
|
results.push({
|
|
match: m[0],
|
|
index: m.index,
|
|
type: 'pressure',
|
|
amount,
|
|
unit,
|
|
approximate: typeof amount.approximate === 'boolean' ? amount.approximate : false,
|
|
alt: null,
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
// ─── pH Matcher ───────────────────────────────────────────
|
|
|
|
/**
|
|
* Find pH measurements in text.
|
|
*
|
|
* Handles: 4.0 pH, 4.0 ph, 3.5 pH
|
|
*/
|
|
function findPH(text) {
|
|
const results = [];
|
|
|
|
const phRe = new RegExp(
|
|
`(${AMOUNT})\\s*(${PH_UNIT})\\b`,
|
|
'g'
|
|
);
|
|
|
|
let m;
|
|
while ((m = phRe.exec(text)) !== null) {
|
|
const amount = parseAmount(m[1]);
|
|
const unit = normalizeUnit(m[2]);
|
|
results.push({
|
|
match: m[0],
|
|
index: m.index,
|
|
type: 'pH',
|
|
amount,
|
|
unit,
|
|
approximate: typeof amount.approximate === 'boolean' ? amount.approximate : false,
|
|
alt: null,
|
|
});
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
// ─── Main Matcher ─────────────────────────────────────────
|
|
|
|
/**
|
|
* Find all measurement strings in the given text.
|
|
* Returns an array of Measurement objects sorted by position,
|
|
* with overlapping matches resolved (longer match wins).
|
|
*
|
|
* @param {string} text — markdown or plain text to scan
|
|
* @returns {Measurement[]}
|
|
*/
|
|
function findAllMeasurements(text) {
|
|
const all = [
|
|
...findTemperatures(text),
|
|
...findDimensions(text),
|
|
...findWeights(text),
|
|
...findVolumes(text),
|
|
...findTimes(text),
|
|
...findPressures(text),
|
|
...findPH(text),
|
|
...findCounts(text),
|
|
];
|
|
|
|
// Sort by position
|
|
all.sort((a, b) => a.index - b.index);
|
|
|
|
// Remove overlapping matches: if two matches overlap, keep the longer one.
|
|
// If same length, prefer the one that appeared first in the type-specific
|
|
// matcher (temperatures > dimensions > weights > volumes > times).
|
|
const deduped = [];
|
|
for (const measurement of all) {
|
|
const end = measurement.index + measurement.match.length;
|
|
const overlapping = deduped.findIndex(existing => {
|
|
const existingEnd = existing.index + existing.match.length;
|
|
return measurement.index < existingEnd && end > existing.index;
|
|
});
|
|
|
|
if (overlapping === -1) {
|
|
deduped.push(measurement);
|
|
} else {
|
|
// Keep the longer match
|
|
const existing = deduped[overlapping];
|
|
if (measurement.match.length > existing.match.length) {
|
|
deduped[overlapping] = measurement;
|
|
}
|
|
}
|
|
}
|
|
|
|
return deduped;
|
|
}
|
|
|
|
// ─── Exports ──────────────────────────────────────────────
|
|
|
|
module.exports = {
|
|
// Main API
|
|
findAllMeasurements,
|
|
|
|
// Individual matchers (exported for testing)
|
|
findTemperatures,
|
|
findDimensions,
|
|
findWeights,
|
|
findVolumes,
|
|
findTimes,
|
|
findPressures,
|
|
findPH,
|
|
findCounts,
|
|
|
|
// Parsing utilities (exported for testing)
|
|
parseAmount,
|
|
parseSingleAmount,
|
|
normalizeUnit,
|
|
unitType,
|
|
}; |