feat: added unit conversion buttons
This commit is contained in:
parent
12df111c5e
commit
a96734c394
10 changed files with 2624 additions and 1 deletions
475
lib/measurements/matcher.js
Normal file
475
lib/measurements/matcher.js
Normal file
|
|
@ -0,0 +1,475 @@
|
|||
/**
|
||||
* Measurement matcher for recipe markdown text.
|
||||
*
|
||||
* Finds and parses measurement strings including weights, volumes,
|
||||
* temperatures, dimensions, and times. Handles tricky patterns like:
|
||||
* - Dual units: "200°C (400°F)", "227g (8 oz)"
|
||||
* - Ranges: "28 to 32 minutes", "180-240g"
|
||||
* - Fractions: "1/2 cup", "1 1/2 teaspoon"
|
||||
* - Dimensions: "9x13", "8 x 8 inch", "8x6x2 inch"
|
||||
* - Approximate: "~250g", "(~220 °C)"
|
||||
* - Bare temp units: "170C", "100-110C"
|
||||
*/
|
||||
|
||||
// ─── Amount Building Blocks ───────────────────────────────
|
||||
|
||||
const NUM = '\\d+(?:\\.\\d+)?';
|
||||
const FRAC = '\\d+\\s*/\\s*\\d+';
|
||||
const MIXED = '\\d+\\s+\\d+\\s*/\\s*\\d+';
|
||||
const APPROX_PREFIX = '~\\s*';
|
||||
const SINGLE_AMOUNT = `(?:${APPROX_PREFIX})?(?:${MIXED}|${FRAC}|${NUM})`;
|
||||
const RANGE_SEP = '\\s*(?:-|to)\\s*';
|
||||
const AMOUNT = `(?:${SINGLE_AMOUNT}${RANGE_SEP}${SINGLE_AMOUNT}|${SINGLE_AMOUNT})`;
|
||||
|
||||
// ─── Unit Patterns ────────────────────────────────────────
|
||||
|
||||
const TEMP_UNIT_DEG = '°\\s*[FfCc]';
|
||||
const TEMP_UNIT_BARE = '[FfCc]';
|
||||
const WEIGHT_UNIT = '(?:kg|g|oz|lbs?|ounces?|pounds?)';
|
||||
const VOLUME_UNIT = '(?:cups?|tablespoons?|table\\s+spoons?|tbsp|teaspoons?|tsp|ml|mL|L|liters?|litres?|quarts?|gallons?|pints?|fl\\.?\\s*oz|fluid\\s+ounces?|parts\\s+by\\s+(?:volume|weight))';
|
||||
const TIME_UNIT = '(?:minutes?|mins?|hours?|hrs?|days?|seconds?|secs?)';
|
||||
const DIM_UNIT = '(?:inch(?:es)?|in\\.?|cm|mm)';
|
||||
|
||||
// ─── Amount Parsing ───────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Parse a single numeric amount string (not a range).
|
||||
* Handles integers, decimals, fractions, mixed numbers, and approximate markers.
|
||||
*
|
||||
* @param {string} str — e.g. "200", "1.5", "1/2", "1 1/2", "~250"
|
||||
* @returns {{ value: number, approximate: boolean }}
|
||||
*/
|
||||
function parseSingleAmount(str) {
|
||||
str = str.trim();
|
||||
const approximate = str.startsWith('~');
|
||||
if (approximate) {
|
||||
str = str.replace(/^~\s*/, '');
|
||||
}
|
||||
|
||||
// Mixed number: "1 1/2"
|
||||
const mixedMatch = str.match(/^(\d+)\s+(\d+)\s*\/\s*(\d+)$/);
|
||||
if (mixedMatch) {
|
||||
return {
|
||||
value: parseInt(mixedMatch[1], 10) + parseInt(mixedMatch[2], 10) / parseInt(mixedMatch[3], 10),
|
||||
approximate,
|
||||
};
|
||||
}
|
||||
|
||||
// Fraction: "1/2", "3/4"
|
||||
const fracMatch = str.match(/^(\d+)\s*\/\s*(\d+)$/);
|
||||
if (fracMatch) {
|
||||
return {
|
||||
value: parseInt(fracMatch[1], 10) / parseInt(fracMatch[2], 10),
|
||||
approximate,
|
||||
};
|
||||
}
|
||||
|
||||
// Plain number
|
||||
return {
|
||||
value: parseFloat(str),
|
||||
approximate,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse an amount string that may be a single value or a range.
|
||||
*
|
||||
* @param {string} str — e.g. "200", "180-240", "28 to 32", "1 1/2"
|
||||
* @returns {
|
||||
* { value: number, approximate: boolean } |
|
||||
* { min: { value: number, approximate: boolean }, max: { value: number, approximate: boolean } }
|
||||
* }
|
||||
*/
|
||||
function parseAmount(str) {
|
||||
str = str.trim();
|
||||
|
||||
// Try range with "to" first (word boundary matters to avoid "1 1/2 to 2" mis-parse)
|
||||
const rangeToMatch = str.match(
|
||||
new RegExp(`^(${SINGLE_AMOUNT})\\s+to\\s+(${SINGLE_AMOUNT})$`)
|
||||
);
|
||||
if (rangeToMatch) {
|
||||
return {
|
||||
min: parseSingleAmount(rangeToMatch[1]),
|
||||
max: parseSingleAmount(rangeToMatch[2]),
|
||||
};
|
||||
}
|
||||
|
||||
// Try range with dash, but only if it doesn't look like a negative or
|
||||
// a fraction. Need to be careful: "180-240" is a range, "1/2" is not.
|
||||
// Strategy: split on dash that is surrounded by digits (not inside fraction).
|
||||
const rangeDashMatch = str.match(
|
||||
new RegExp(`^(${SINGLE_AMOUNT})-(${SINGLE_AMOUNT})$`)
|
||||
);
|
||||
if (rangeDashMatch) {
|
||||
return {
|
||||
min: parseSingleAmount(rangeDashMatch[1]),
|
||||
max: parseSingleAmount(rangeDashMatch[2]),
|
||||
};
|
||||
}
|
||||
|
||||
return parseSingleAmount(str);
|
||||
}
|
||||
|
||||
// ─── Unit Normalization ───────────────────────────────────
|
||||
|
||||
/** Normalize a unit string for consistent comparison. */
|
||||
function normalizeUnit(unit) {
|
||||
if (!unit) return null;
|
||||
let u = unit.trim().toLowerCase().replace(/\s+/g, ' ').replace(/\.$/, '');
|
||||
|
||||
// Temperature
|
||||
if (/^°\s*f$/.test(u)) return '°F';
|
||||
if (/^°\s*c$/.test(u)) return '°C';
|
||||
if (u === 'f') return '°F';
|
||||
if (u === 'c') return '°C';
|
||||
|
||||
// Weight
|
||||
if (u === 'g') return 'g';
|
||||
if (u === 'kg') return 'kg';
|
||||
if (u === 'oz') return 'oz';
|
||||
if (u === 'lb' || u === 'lbs') return 'lb';
|
||||
if (u === 'ounce' || u === 'ounces') return 'oz';
|
||||
if (u === 'pound' || u === 'pounds') return 'lb';
|
||||
|
||||
// Volume
|
||||
if (u === 'cup' || u === 'cups') return 'cup';
|
||||
if (/^table\s*spoons?$/.test(u)) return 'tablespoon';
|
||||
if (u === 'tbsp') return 'tablespoon';
|
||||
if (/^tea\s*spoons?$/.test(u)) return 'teaspoon';
|
||||
if (u === 'tsp') return 'teaspoon';
|
||||
if (u === 'ml') return 'ml';
|
||||
if (u === 'l') return 'L';
|
||||
if (/^liters?$/.test(u) || /^litres?$/.test(u)) return 'L';
|
||||
if (/^quarts?$/.test(u)) return 'quart';
|
||||
if (/^gallons?$/.test(u)) return 'gallon';
|
||||
if (/^pints?$/.test(u)) return 'pint';
|
||||
if (/^fl\.?\s*oz$/.test(u)) return 'fl oz';
|
||||
if (/^fluid\s+ounces?$/.test(u)) return 'fl oz';
|
||||
if (/^parts\s+by\s+volume$/.test(u)) return 'parts by volume';
|
||||
if (/^parts\s+by\s+weight$/.test(u)) return 'parts by weight';
|
||||
|
||||
// Time
|
||||
if (/^minutes?$/.test(u) || /^mins?$/.test(u)) return 'minute';
|
||||
if (/^hours?$/.test(u) || /^hrs?$/.test(u)) return 'hour';
|
||||
if (/^days?$/.test(u)) return 'day';
|
||||
if (/^seconds?$/.test(u) || /^secs?$/.test(u)) return 'second';
|
||||
|
||||
// Dimension units
|
||||
if (/^inch(es)?$/.test(u) || u === 'in') return 'inch';
|
||||
if (u === 'cm') return 'cm';
|
||||
if (u === 'mm') return 'mm';
|
||||
if (/^f(oo|ee)t$/.test(u) || u === 'ft') return 'ft';
|
||||
|
||||
return u;
|
||||
}
|
||||
|
||||
/** Determine measurement type from a normalized unit. */
|
||||
function unitType(normalizedUnit) {
|
||||
if (!normalizedUnit) return null;
|
||||
if (['°F', '°C'].includes(normalizedUnit)) return 'temperature';
|
||||
if (['g', 'kg', 'oz', 'lb'].includes(normalizedUnit)) return 'weight';
|
||||
if (['cup', 'tablespoon', 'teaspoon', 'ml', 'L', 'quart', 'gallon', 'pint', 'fl oz', 'parts by volume', 'parts by weight'].includes(normalizedUnit)) return 'volume';
|
||||
if (['minute', 'hour', 'day', 'second'].includes(normalizedUnit)) return 'time';
|
||||
if (['inch', 'cm', 'mm', 'ft'].includes(normalizedUnit)) return 'dimension';
|
||||
return null;
|
||||
}
|
||||
|
||||
// ─── Matchers ─────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* @typedef {Object} Measurement
|
||||
* @property {string} match — full matched string from source
|
||||
* @property {number} index — start position in source text
|
||||
* @property {string} type — "temperature"|"weight"|"volume"|"time"|"dimension"
|
||||
* @property {number|number[]|{min:object,max:object}} amount — parsed amount
|
||||
* @property {string} unit — normalized unit
|
||||
* @property {boolean} approximate — had ~ prefix
|
||||
* @property {object|null} alt — alternative measurement in parentheses
|
||||
*/
|
||||
|
||||
/**
|
||||
* Find temperature measurements in text.
|
||||
*
|
||||
* Handles: 350°F, 200°C (400°F), 165-175 °F, 170C, 100-110C,
|
||||
* 32°c (90°f), ~220 °C
|
||||
*/
|
||||
function findTemperatures(text) {
|
||||
const results = [];
|
||||
|
||||
// Pattern with degree symbol: AMOUNT °F/C (optional alt)
|
||||
const degRe = new RegExp(
|
||||
`(${AMOUNT})\\s*(${TEMP_UNIT_DEG})` +
|
||||
`(?:\\s*\\(\\s*(${AMOUNT})\\s*(${TEMP_UNIT_DEG}|${TEMP_UNIT_BARE})\\s*\\))?`,
|
||||
'gi'
|
||||
);
|
||||
|
||||
let m;
|
||||
while ((m = degRe.exec(text)) !== null) {
|
||||
const amount = parseAmount(m[1]);
|
||||
const unit = normalizeUnit(m[2]);
|
||||
let alt = null;
|
||||
if (m[3] && m[4]) {
|
||||
alt = {
|
||||
amount: parseAmount(m[3]),
|
||||
unit: normalizeUnit(m[4]),
|
||||
};
|
||||
}
|
||||
results.push({
|
||||
match: m[0],
|
||||
index: m.index,
|
||||
type: 'temperature',
|
||||
amount,
|
||||
unit,
|
||||
approximate: typeof amount.approximate === 'boolean' ? amount.approximate : false,
|
||||
alt,
|
||||
});
|
||||
}
|
||||
|
||||
// Pattern with bare C/F (no degree symbol): number directly followed by C or F
|
||||
// Only match if not already captured by degree pattern above
|
||||
const bareRe = new RegExp(
|
||||
`(${AMOUNT})(${TEMP_UNIT_BARE})(?=\\s|\\)|$|,|\\/)` +
|
||||
`(?:\\s*\\(\\s*(${AMOUNT})\\s*(${TEMP_UNIT_DEG}|${TEMP_UNIT_BARE})\\s*\\))?`,
|
||||
'gi'
|
||||
);
|
||||
|
||||
while ((m = bareRe.exec(text)) !== null) {
|
||||
// Skip if this position was already matched by the degree pattern
|
||||
const alreadyMatched = results.some(
|
||||
r => m.index >= r.index && m.index < r.index + r.match.length
|
||||
);
|
||||
if (alreadyMatched) continue;
|
||||
|
||||
// Only match bare C/F if the character directly before the letter is a digit
|
||||
const beforeUnit = m[0].match(new RegExp(`(${AMOUNT})[FfCc]`));
|
||||
if (!beforeUnit) continue;
|
||||
|
||||
const amount = parseAmount(m[1]);
|
||||
const unit = normalizeUnit(m[2]);
|
||||
let alt = null;
|
||||
if (m[3] && m[4]) {
|
||||
alt = {
|
||||
amount: parseAmount(m[3]),
|
||||
unit: normalizeUnit(m[4]),
|
||||
};
|
||||
}
|
||||
results.push({
|
||||
match: m[0],
|
||||
index: m.index,
|
||||
type: 'temperature',
|
||||
amount,
|
||||
unit,
|
||||
approximate: typeof amount.approximate === 'boolean' ? amount.approximate : false,
|
||||
alt,
|
||||
});
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find dimension measurements in text.
|
||||
*
|
||||
* Handles: 9x13, 9 x 13, 8x6x2, 9 x 13 inch, 8x6x2 inch, 5-8mm (as dimension when mm)
|
||||
*/
|
||||
function findDimensions(text) {
|
||||
const results = [];
|
||||
|
||||
// NxN or NxNxN with optional unit
|
||||
const dimRe = new RegExp(
|
||||
`(${NUM})\\s*x\\s*(${NUM})(?:\\s*x\\s*(${NUM}))?(?:\\s+(${DIM_UNIT}))?`,
|
||||
'gi'
|
||||
);
|
||||
|
||||
let m;
|
||||
while ((m = dimRe.exec(text)) !== null) {
|
||||
const dims = [parseFloat(m[1]), parseFloat(m[2])];
|
||||
if (m[3]) dims.push(parseFloat(m[3]));
|
||||
const rawUnit = m[4] || null;
|
||||
const unit = normalizeUnit(rawUnit);
|
||||
|
||||
results.push({
|
||||
match: m[0],
|
||||
index: m.index,
|
||||
type: 'dimension',
|
||||
amount: dims,
|
||||
unit,
|
||||
approximate: false,
|
||||
alt: null,
|
||||
});
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find weight measurements in text.
|
||||
*
|
||||
* Handles: 200g, 550 g, ~250g, 180-240g, 1kg, 227g (8 oz)
|
||||
*/
|
||||
function findWeights(text) {
|
||||
const results = [];
|
||||
|
||||
const weightRe = new RegExp(
|
||||
`(${AMOUNT})\\s*(${WEIGHT_UNIT})\\b` +
|
||||
`(?:\\s*\\(\\s*(${AMOUNT})\\s*(${WEIGHT_UNIT}|${VOLUME_UNIT})\\s*\\))?`,
|
||||
'gi'
|
||||
);
|
||||
|
||||
let m;
|
||||
while ((m = weightRe.exec(text)) !== null) {
|
||||
// Avoid matching dimension patterns (e.g., the "g" in "9x13 glass")
|
||||
// Check if this match overlaps with any dimension
|
||||
const amount = parseAmount(m[1]);
|
||||
const unit = normalizeUnit(m[2]);
|
||||
let alt = null;
|
||||
if (m[3] && m[4]) {
|
||||
alt = {
|
||||
amount: parseAmount(m[3]),
|
||||
unit: normalizeUnit(m[4]),
|
||||
};
|
||||
}
|
||||
results.push({
|
||||
match: m[0],
|
||||
index: m.index,
|
||||
type: 'weight',
|
||||
amount,
|
||||
unit,
|
||||
approximate: typeof amount.approximate === 'boolean' ? amount.approximate : false,
|
||||
alt,
|
||||
});
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find volume measurements in text.
|
||||
*
|
||||
* Handles: 2 quarts, 1/2 cups, 1 cup, 6 tablespoons, 6 table spoons,
|
||||
* 1 1/2 tablespoon, 3/4 teaspoon, 6 parts by volume
|
||||
*/
|
||||
function findVolumes(text) {
|
||||
const results = [];
|
||||
|
||||
const volumeRe = new RegExp(
|
||||
`(${AMOUNT})\\s+(${VOLUME_UNIT})\\b`,
|
||||
'gi'
|
||||
);
|
||||
|
||||
let m;
|
||||
while ((m = volumeRe.exec(text)) !== null) {
|
||||
const amount = parseAmount(m[1]);
|
||||
const unit = normalizeUnit(m[2]);
|
||||
results.push({
|
||||
match: m[0],
|
||||
index: m.index,
|
||||
type: 'volume',
|
||||
amount,
|
||||
unit,
|
||||
approximate: typeof amount.approximate === 'boolean' ? amount.approximate : false,
|
||||
alt: null,
|
||||
});
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find time measurements in text.
|
||||
*
|
||||
* Handles: 10 minutes, 28 to 32 minutes, 8 to 10 minutes, an hour,
|
||||
* five days, for hour
|
||||
*/
|
||||
function findTimes(text) {
|
||||
const results = [];
|
||||
|
||||
const timeRe = new RegExp(
|
||||
`(${AMOUNT})\\s+(${TIME_UNIT})\\b`,
|
||||
'gi'
|
||||
);
|
||||
|
||||
let m;
|
||||
while ((m = timeRe.exec(text)) !== null) {
|
||||
const amount = parseAmount(m[1]);
|
||||
const unit = normalizeUnit(m[2]);
|
||||
results.push({
|
||||
match: m[0],
|
||||
index: m.index,
|
||||
type: 'time',
|
||||
amount,
|
||||
unit,
|
||||
approximate: typeof amount.approximate === 'boolean' ? amount.approximate : false,
|
||||
alt: null,
|
||||
});
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
// ─── Main Matcher ─────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Find all measurement strings in the given text.
|
||||
* Returns an array of Measurement objects sorted by position,
|
||||
* with overlapping matches resolved (longer match wins).
|
||||
*
|
||||
* @param {string} text — markdown or plain text to scan
|
||||
* @returns {Measurement[]}
|
||||
*/
|
||||
function findAllMeasurements(text) {
|
||||
const all = [
|
||||
...findTemperatures(text),
|
||||
...findDimensions(text),
|
||||
...findWeights(text),
|
||||
...findVolumes(text),
|
||||
...findTimes(text),
|
||||
];
|
||||
|
||||
// Sort by position
|
||||
all.sort((a, b) => a.index - b.index);
|
||||
|
||||
// Remove overlapping matches: if two matches overlap, keep the longer one.
|
||||
// If same length, prefer the one that appeared first in the type-specific
|
||||
// matcher (temperatures > dimensions > weights > volumes > times).
|
||||
const deduped = [];
|
||||
for (const measurement of all) {
|
||||
const end = measurement.index + measurement.match.length;
|
||||
const overlapping = deduped.findIndex(existing => {
|
||||
const existingEnd = existing.index + existing.match.length;
|
||||
return measurement.index < existingEnd && end > existing.index;
|
||||
});
|
||||
|
||||
if (overlapping === -1) {
|
||||
deduped.push(measurement);
|
||||
} else {
|
||||
// Keep the longer match
|
||||
const existing = deduped[overlapping];
|
||||
if (measurement.match.length > existing.match.length) {
|
||||
deduped[overlapping] = measurement;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return deduped;
|
||||
}
|
||||
|
||||
// ─── Exports ──────────────────────────────────────────────
|
||||
|
||||
module.exports = {
|
||||
// Main API
|
||||
findAllMeasurements,
|
||||
|
||||
// Individual matchers (exported for testing)
|
||||
findTemperatures,
|
||||
findDimensions,
|
||||
findWeights,
|
||||
findVolumes,
|
||||
findTimes,
|
||||
|
||||
// Parsing utilities (exported for testing)
|
||||
parseAmount,
|
||||
parseSingleAmount,
|
||||
normalizeUnit,
|
||||
unitType,
|
||||
};
|
||||
Loading…
Add table
Add a link
Reference in a new issue