diff --git a/eleventy.config.js b/eleventy.config.js index 0d12675..44882a5 100644 --- a/eleventy.config.js +++ b/eleventy.config.js @@ -6,22 +6,25 @@ const syntaxHighlight = require("@11ty/eleventy-plugin-syntaxhighlight"); const fs = require("fs"); const { DateTime } = require("luxon"); -const tagPattern = /(?<=^|\s)#([a-zA-Z][a-zA-Z0-9_]*)(?![a-zA-Z0-9_-])/g; - -// TODO: is there any reasonable way to make this use real markdown parsing because right now this is sketchy -const extractTags = (content) => { +const extractTags = (content, mdInstance) => { if (!content) return []; - const matches = content.match(tagPattern); - if (!matches) return []; - const tags = [...new Set(matches.map(m => m.slice(1)))]; - return tags; + + const collectHashtags = (tokens) => + tokens.flatMap(token => [ + ...(token.type === 'hashtag' ? [token.content] : []), + ...(token.children ? collectHashtags(token.children) : []) + ]); + + const tokens = mdInstance.parse(content, {}); + const tags = collectHashtags(tokens); + return [...new Set(tags)]; } -const getPostTags = (post) => { +const getPostTags = (post, mdInstance) => { const filePath = post.inputPath; try { const content = fs.readFileSync(filePath, 'utf-8'); - const tags = extractTags(content); + const tags = extractTags(content, mdInstance); return tags.map(tag => { const normalizedTag = tag.toLowerCase(); return normalizedTag @@ -38,16 +41,22 @@ const isReleased = (post) => { const markdownItHashtag = (md) => { const hashtagRegex = /^#([a-zA-Z][a-zA-Z0-9_]*)(?![a-zA-Z0-9_-])/; + + const HASH_CODE = '#'.charCodeAt(0); + const SPACE_CODE = ' '.charCodeAt(0); + const TAB_CODE = '\t'.charCodeAt(0); + const NEWLINE_CODE = '\n'.charCodeAt(0); + const CARRIAGE_RETURN_CODE = '\r'.charCodeAt(0); md.inline.ruler.push('hashtag', function(state, silent) { const pos = state.pos; const ch = state.src.charCodeAt(pos); - if (ch !== '#') return false; + if (ch !== HASH_CODE) return false; if (pos > 0) { const prevCh = state.src.charCodeAt(pos - 1); - if (prevCh !== ' ' && prevCh !== '\t' && prevCh !== '\n' && prevCh !== '\r') { + if (prevCh !== SPACE_CODE && prevCh !== TAB_CODE && prevCh !== NEWLINE_CODE && prevCh !== CARRIAGE_RETURN_CODE) { return false; } } @@ -99,11 +108,11 @@ md.use(markdownItMermaid); module.exports = (eleventyConfig) => { eleventyConfig.addPlugin(syntaxHighlight); - eleventyConfig.addFilter("extractTags", extractTags); + eleventyConfig.addFilter("extractTags", (content) => extractTags(content, md)); eleventyConfig.addFilter("extractTagsFromFile", (filePath) => { try { const content = fs.readFileSync(filePath, 'utf-8'); - return extractTags(content); + return extractTags(content, md); } catch (e) { return []; } @@ -183,7 +192,7 @@ module.exports = (eleventyConfig) => { eleventyConfig.addCollection("contentTags", (collectionApi) => { const posts = collectionApi.getFilteredByGlob("posts/**/*.md").filter(isReleased); - return [...new Set(posts.flatMap(getPostTags))].sort(); + return [...new Set(posts.flatMap(post => getPostTags(post, md)))].sort(); }); eleventyConfig.addCollection("postsByTag", (collectionApi) => { @@ -191,7 +200,7 @@ module.exports = (eleventyConfig) => { const tagMap = {}; posts.forEach(post => { - const tags = getPostTags(post) + const tags = getPostTags(post, md) tags.forEach((tag) => { tagMap[tag] = { name: tag,