feat: stripped tags from end of document

This commit is contained in:
Leyla Becker 2026-02-18 11:31:33 -06:00
parent 64061c21b9
commit 672a4ee7fb
2 changed files with 91 additions and 40 deletions

View file

@ -40,15 +40,12 @@ const extractTags = (content, mdInstance) => {
return [...new Set(tags)];
}
const getPostTags = (post, mdInstance) => {
const getPostTags = (post, tagMdInstance) => {
const filePath = post.inputPath;
try {
const content = fs.readFileSync(filePath, 'utf-8');
const tags = extractTags(content, mdInstance);
return tags.map(tag => {
const normalizedTag = tag.toLowerCase();
return normalizedTag
});
const tags = extractTags(content, tagMdInstance);
return tags.map(tag => tag.toLowerCase());
} catch (e) {
// Skip if file can't be read
return []
@ -60,13 +57,13 @@ const isReleased = (post) => {
}
const markdownItHashtag = (md) => {
const hashtagRegex = /^#([a-zA-Z][a-zA-Z0-9_\/]*)(?![a-zA-Z0-9_-])/;
const hashtagRegex = /^#([a-zA-Z][a-zA-Z0-9_\\\\/]*)(?![a-zA-Z0-9_-])/;
const HASH_CODE = '#'.charCodeAt(0);
const SPACE_CODE = ' '.charCodeAt(0);
const TAB_CODE = '\t'.charCodeAt(0);
const NEWLINE_CODE = '\n'.charCodeAt(0);
const CARRIAGE_RETURN_CODE = '\r'.charCodeAt(0);
const TAB_CODE = '\\\\t'.charCodeAt(0);
const NEWLINE_CODE = '\\\\n'.charCodeAt(0);
const CARRIAGE_RETURN_CODE = '\\\\r'.charCodeAt(0);
md.inline.ruler.push('hashtag', function(state, silent) {
const pos = state.pos;
@ -99,42 +96,94 @@ const markdownItHashtag = (md) => {
const slug = tagName.toLowerCase();
return `<a href="/tags/${slug}/" class="inline-tag">#${md.utils.escapeHtml(tagName)}</a>`;
};
}
};
const md = markdownIt({
html: true,
breaks: true,
linkify: true
});
// Plugin: Strip trailing hashtag-only paragraphs from rendered output
// Must be applied AFTER footnote plugin since footnotes are moved to the end
const markdownItStripTrailingHashtags = (md) => {
md.core.ruler.push('strip_trailing_hashtags', function(state) {
const tokens = state.tokens;
md.use(markdownItContainer, 'details', {
validate: function (params) {
return params.trim().match(/^(.*)$/);
},
render: function (tokens, idx) {
const m = tokens[idx].info.trim().match(/^(.*)$/);
if (tokens[idx].nesting === 1) {
const title = md.utils.escapeHtml(m[1]);
return `<details class="expandable">\n<summary>${title}</summary>\n`;
} else {
return '</details>\n';
const isHashtagOnlyParagraph = (inlineToken) =>
inlineToken?.type === 'inline' &&
inlineToken.children?.every(child =>
child.type === 'hashtag' ||
(child.type === 'text' && child.content.trim() === '') ||
child.type === 'softbreak'
) &&
inlineToken.children?.some(child => child.type === 'hashtag');
const isHashtagParagraphAt = (idx) =>
tokens[idx]?.type === 'paragraph_open' &&
tokens[idx + 1]?.type === 'inline' &&
tokens[idx + 2]?.type === 'paragraph_close' &&
isHashtagOnlyParagraph(tokens[idx + 1]);
const footnoteIdx = tokens.findIndex(t => t.type === 'footnote_block_open');
const footnoteSectionStart = footnoteIdx === -1 ? tokens.length : footnoteIdx;
const hashtagSectionStart = Array.from(
{ length: Math.floor(footnoteSectionStart / 3) },
(_, i) => footnoteSectionStart - 3 * (i + 1)
).reduce(
(start, idx) => isHashtagParagraphAt(idx) ? idx : start,
footnoteSectionStart
);
state.tokens = tokens.filter((_, idx) =>
idx < hashtagSectionStart || idx >= footnoteSectionStart
);
return true;
});
};
const markdownItDetails = (md) => {
md.use(markdownItContainer, 'details', {
validate: (params) => params.trim().match(/^(.*)$/),
render: (tokens, idx) => {
const m = tokens[idx].info.trim().match(/^(.*)$/);
if (tokens[idx].nesting === 1) {
const title = md.utils.escapeHtml(m[1]);
return `<details class="expandable">\\n<summary>${title}</summary>\\n`;
}
return '</details>\\n';
}
}
});
});
};
md.use(markdownItFootnote);
md.use(markdownItHashtag);
md.use(markdownItMermaid);
md.use(markdownItTaskLists, { enabled: true, label: true, labelAfter: true });
const sharedPlugins = [
markdownItFootnote,
markdownItHashtag,
markdownItMermaid,
[markdownItTaskLists, { enabled: true, label: true, labelAfter: true }],
markdownItDetails,
];
const applyPlugins = (md, plugins) =>
plugins.reduce((instance, plugin) => {
const [pluginFn, options] = Array.isArray(plugin) ? plugin : [plugin];
return instance.use(pluginFn, options), instance;
}, md);
const createMarkdownInstance = (extraPlugins = []) => {
const md = markdownIt({ html: true, breaks: true, linkify: true });
applyPlugins(md, [...sharedPlugins, ...extraPlugins]);
return md;
};
const md = createMarkdownInstance([markdownItStripTrailingHashtags]);
const tagExtractorMd = createMarkdownInstance();
module.exports = (eleventyConfig) => {
eleventyConfig.addPlugin(syntaxHighlight);
eleventyConfig.addFilter("extractTags", (content) => extractTags(content, md));
eleventyConfig.addFilter("extractTags", (content) => extractTags(content, tagExtractorMd));
eleventyConfig.addFilter("extractTagsFromFile", (filePath) => {
try {
const content = fs.readFileSync(filePath, 'utf-8');
return extractTags(content, md);
return extractTags(content, tagExtractorMd);
} catch (e) {
return [];
}
@ -216,7 +265,7 @@ module.exports = (eleventyConfig) => {
const tagMap = {};
posts.forEach(post => {
const tags = getPostTags(post, md)
const tags = getPostTags(post, tagExtractorMd)
tags.forEach((tag) => {
tagMap[tag] = {
name: tag,
@ -275,7 +324,7 @@ module.exports = (eleventyConfig) => {
const filePath = recipe.inputPath;
try {
const content = fs.readFileSync(filePath, 'utf-8');
const tags = extractTags(content, md);
const tags = extractTags(content, tagExtractorMd);
return tags.map(tag => tag.toLowerCase());
} catch (e) {
return [];
@ -287,7 +336,7 @@ module.exports = (eleventyConfig) => {
const recipes = collectionApi.getFilteredByGlob("recipes/**/*.md")
.filter(r => r.data.isNewestVersion && r.data.draft !== true);
const postTags = posts.flatMap(post => getPostTags(post, md));
const postTags = posts.flatMap(post => getPostTags(post, tagExtractorMd));
const recipeTags = recipes.flatMap(recipe => getRecipeTags(recipe));
return [...new Set([...postTags, ...recipeTags])].sort();
@ -306,7 +355,7 @@ module.exports = (eleventyConfig) => {
// Build tag map from posts
const postTagMap = posts.reduce((acc, post) => {
const tags = getPostTags(post, md);
const tags = getPostTags(post, tagExtractorMd);
return tags.reduce((innerAcc, tag) => ({
...innerAcc,
[tag]: {

View file

@ -255,3 +255,5 @@ The HyperLogLog approach trades exactness (~3% error) for dramatic scalability
## References
A working simulation is available at [simulations/hyperloglog-tombstone/simulation.ts](/simulations/hyperloglog-tombstone/simulation.ts).
#algorithm #computer #distributed #peer_to_peer