feat: stripped tags from end of document

This commit is contained in:
Leyla Becker 2026-02-18 11:31:33 -06:00
parent 64061c21b9
commit 672a4ee7fb
2 changed files with 91 additions and 40 deletions

View file

@ -40,15 +40,12 @@ const extractTags = (content, mdInstance) => {
return [...new Set(tags)]; return [...new Set(tags)];
} }
const getPostTags = (post, mdInstance) => { const getPostTags = (post, tagMdInstance) => {
const filePath = post.inputPath; const filePath = post.inputPath;
try { try {
const content = fs.readFileSync(filePath, 'utf-8'); const content = fs.readFileSync(filePath, 'utf-8');
const tags = extractTags(content, mdInstance); const tags = extractTags(content, tagMdInstance);
return tags.map(tag => { return tags.map(tag => tag.toLowerCase());
const normalizedTag = tag.toLowerCase();
return normalizedTag
});
} catch (e) { } catch (e) {
// Skip if file can't be read // Skip if file can't be read
return [] return []
@ -60,13 +57,13 @@ const isReleased = (post) => {
} }
const markdownItHashtag = (md) => { const markdownItHashtag = (md) => {
const hashtagRegex = /^#([a-zA-Z][a-zA-Z0-9_\/]*)(?![a-zA-Z0-9_-])/; const hashtagRegex = /^#([a-zA-Z][a-zA-Z0-9_\\\\/]*)(?![a-zA-Z0-9_-])/;
const HASH_CODE = '#'.charCodeAt(0); const HASH_CODE = '#'.charCodeAt(0);
const SPACE_CODE = ' '.charCodeAt(0); const SPACE_CODE = ' '.charCodeAt(0);
const TAB_CODE = '\t'.charCodeAt(0); const TAB_CODE = '\\\\t'.charCodeAt(0);
const NEWLINE_CODE = '\n'.charCodeAt(0); const NEWLINE_CODE = '\\\\n'.charCodeAt(0);
const CARRIAGE_RETURN_CODE = '\r'.charCodeAt(0); const CARRIAGE_RETURN_CODE = '\\\\r'.charCodeAt(0);
md.inline.ruler.push('hashtag', function(state, silent) { md.inline.ruler.push('hashtag', function(state, silent) {
const pos = state.pos; const pos = state.pos;
@ -99,42 +96,94 @@ const markdownItHashtag = (md) => {
const slug = tagName.toLowerCase(); const slug = tagName.toLowerCase();
return `<a href="/tags/${slug}/" class="inline-tag">#${md.utils.escapeHtml(tagName)}</a>`; return `<a href="/tags/${slug}/" class="inline-tag">#${md.utils.escapeHtml(tagName)}</a>`;
}; };
} };
const md = markdownIt({ // Plugin: Strip trailing hashtag-only paragraphs from rendered output
html: true, // Must be applied AFTER footnote plugin since footnotes are moved to the end
breaks: true, const markdownItStripTrailingHashtags = (md) => {
linkify: true md.core.ruler.push('strip_trailing_hashtags', function(state) {
const tokens = state.tokens;
const isHashtagOnlyParagraph = (inlineToken) =>
inlineToken?.type === 'inline' &&
inlineToken.children?.every(child =>
child.type === 'hashtag' ||
(child.type === 'text' && child.content.trim() === '') ||
child.type === 'softbreak'
) &&
inlineToken.children?.some(child => child.type === 'hashtag');
const isHashtagParagraphAt = (idx) =>
tokens[idx]?.type === 'paragraph_open' &&
tokens[idx + 1]?.type === 'inline' &&
tokens[idx + 2]?.type === 'paragraph_close' &&
isHashtagOnlyParagraph(tokens[idx + 1]);
const footnoteIdx = tokens.findIndex(t => t.type === 'footnote_block_open');
const footnoteSectionStart = footnoteIdx === -1 ? tokens.length : footnoteIdx;
const hashtagSectionStart = Array.from(
{ length: Math.floor(footnoteSectionStart / 3) },
(_, i) => footnoteSectionStart - 3 * (i + 1)
).reduce(
(start, idx) => isHashtagParagraphAt(idx) ? idx : start,
footnoteSectionStart
);
state.tokens = tokens.filter((_, idx) =>
idx < hashtagSectionStart || idx >= footnoteSectionStart
);
return true;
}); });
};
const markdownItDetails = (md) => {
md.use(markdownItContainer, 'details', { md.use(markdownItContainer, 'details', {
validate: function (params) { validate: (params) => params.trim().match(/^(.*)$/),
return params.trim().match(/^(.*)$/); render: (tokens, idx) => {
},
render: function (tokens, idx) {
const m = tokens[idx].info.trim().match(/^(.*)$/); const m = tokens[idx].info.trim().match(/^(.*)$/);
if (tokens[idx].nesting === 1) { if (tokens[idx].nesting === 1) {
const title = md.utils.escapeHtml(m[1]); const title = md.utils.escapeHtml(m[1]);
return `<details class="expandable">\n<summary>${title}</summary>\n`; return `<details class="expandable">\\n<summary>${title}</summary>\\n`;
} else {
return '</details>\n';
} }
return '</details>\\n';
} }
}); });
};
md.use(markdownItFootnote); const sharedPlugins = [
md.use(markdownItHashtag); markdownItFootnote,
md.use(markdownItMermaid); markdownItHashtag,
md.use(markdownItTaskLists, { enabled: true, label: true, labelAfter: true }); markdownItMermaid,
[markdownItTaskLists, { enabled: true, label: true, labelAfter: true }],
markdownItDetails,
];
const applyPlugins = (md, plugins) =>
plugins.reduce((instance, plugin) => {
const [pluginFn, options] = Array.isArray(plugin) ? plugin : [plugin];
return instance.use(pluginFn, options), instance;
}, md);
const createMarkdownInstance = (extraPlugins = []) => {
const md = markdownIt({ html: true, breaks: true, linkify: true });
applyPlugins(md, [...sharedPlugins, ...extraPlugins]);
return md;
};
const md = createMarkdownInstance([markdownItStripTrailingHashtags]);
const tagExtractorMd = createMarkdownInstance();
module.exports = (eleventyConfig) => { module.exports = (eleventyConfig) => {
eleventyConfig.addPlugin(syntaxHighlight); eleventyConfig.addPlugin(syntaxHighlight);
eleventyConfig.addFilter("extractTags", (content) => extractTags(content, md)); eleventyConfig.addFilter("extractTags", (content) => extractTags(content, tagExtractorMd));
eleventyConfig.addFilter("extractTagsFromFile", (filePath) => { eleventyConfig.addFilter("extractTagsFromFile", (filePath) => {
try { try {
const content = fs.readFileSync(filePath, 'utf-8'); const content = fs.readFileSync(filePath, 'utf-8');
return extractTags(content, md); return extractTags(content, tagExtractorMd);
} catch (e) { } catch (e) {
return []; return [];
} }
@ -216,7 +265,7 @@ module.exports = (eleventyConfig) => {
const tagMap = {}; const tagMap = {};
posts.forEach(post => { posts.forEach(post => {
const tags = getPostTags(post, md) const tags = getPostTags(post, tagExtractorMd)
tags.forEach((tag) => { tags.forEach((tag) => {
tagMap[tag] = { tagMap[tag] = {
name: tag, name: tag,
@ -275,7 +324,7 @@ module.exports = (eleventyConfig) => {
const filePath = recipe.inputPath; const filePath = recipe.inputPath;
try { try {
const content = fs.readFileSync(filePath, 'utf-8'); const content = fs.readFileSync(filePath, 'utf-8');
const tags = extractTags(content, md); const tags = extractTags(content, tagExtractorMd);
return tags.map(tag => tag.toLowerCase()); return tags.map(tag => tag.toLowerCase());
} catch (e) { } catch (e) {
return []; return [];
@ -287,7 +336,7 @@ module.exports = (eleventyConfig) => {
const recipes = collectionApi.getFilteredByGlob("recipes/**/*.md") const recipes = collectionApi.getFilteredByGlob("recipes/**/*.md")
.filter(r => r.data.isNewestVersion && r.data.draft !== true); .filter(r => r.data.isNewestVersion && r.data.draft !== true);
const postTags = posts.flatMap(post => getPostTags(post, md)); const postTags = posts.flatMap(post => getPostTags(post, tagExtractorMd));
const recipeTags = recipes.flatMap(recipe => getRecipeTags(recipe)); const recipeTags = recipes.flatMap(recipe => getRecipeTags(recipe));
return [...new Set([...postTags, ...recipeTags])].sort(); return [...new Set([...postTags, ...recipeTags])].sort();
@ -306,7 +355,7 @@ module.exports = (eleventyConfig) => {
// Build tag map from posts // Build tag map from posts
const postTagMap = posts.reduce((acc, post) => { const postTagMap = posts.reduce((acc, post) => {
const tags = getPostTags(post, md); const tags = getPostTags(post, tagExtractorMd);
return tags.reduce((innerAcc, tag) => ({ return tags.reduce((innerAcc, tag) => ({
...innerAcc, ...innerAcc,
[tag]: { [tag]: {

View file

@ -255,3 +255,5 @@ The HyperLogLog approach trades exactness (~3% error) for dramatic scalability
## References ## References
A working simulation is available at [simulations/hyperloglog-tombstone/simulation.ts](/simulations/hyperloglog-tombstone/simulation.ts). A working simulation is available at [simulations/hyperloglog-tombstone/simulation.ts](/simulations/hyperloglog-tombstone/simulation.ts).
#algorithm #computer #distributed #peer_to_peer