feat: made tag extraction more robust
This commit is contained in:
parent
9517dc4d7a
commit
7b5f3adee9
1 changed files with 25 additions and 16 deletions
|
|
@ -6,22 +6,25 @@ const syntaxHighlight = require("@11ty/eleventy-plugin-syntaxhighlight");
|
||||||
const fs = require("fs");
|
const fs = require("fs");
|
||||||
const { DateTime } = require("luxon");
|
const { DateTime } = require("luxon");
|
||||||
|
|
||||||
const tagPattern = /(?<=^|\s)#([a-zA-Z][a-zA-Z0-9_]*)(?![a-zA-Z0-9_-])/g;
|
const extractTags = (content, mdInstance) => {
|
||||||
|
|
||||||
// TODO: is there any reasonable way to make this use real markdown parsing because right now this is sketchy
|
|
||||||
const extractTags = (content) => {
|
|
||||||
if (!content) return [];
|
if (!content) return [];
|
||||||
const matches = content.match(tagPattern);
|
|
||||||
if (!matches) return [];
|
const collectHashtags = (tokens) =>
|
||||||
const tags = [...new Set(matches.map(m => m.slice(1)))];
|
tokens.flatMap(token => [
|
||||||
return tags;
|
...(token.type === 'hashtag' ? [token.content] : []),
|
||||||
|
...(token.children ? collectHashtags(token.children) : [])
|
||||||
|
]);
|
||||||
|
|
||||||
|
const tokens = mdInstance.parse(content, {});
|
||||||
|
const tags = collectHashtags(tokens);
|
||||||
|
return [...new Set(tags)];
|
||||||
}
|
}
|
||||||
|
|
||||||
const getPostTags = (post) => {
|
const getPostTags = (post, mdInstance) => {
|
||||||
const filePath = post.inputPath;
|
const filePath = post.inputPath;
|
||||||
try {
|
try {
|
||||||
const content = fs.readFileSync(filePath, 'utf-8');
|
const content = fs.readFileSync(filePath, 'utf-8');
|
||||||
const tags = extractTags(content);
|
const tags = extractTags(content, mdInstance);
|
||||||
return tags.map(tag => {
|
return tags.map(tag => {
|
||||||
const normalizedTag = tag.toLowerCase();
|
const normalizedTag = tag.toLowerCase();
|
||||||
return normalizedTag
|
return normalizedTag
|
||||||
|
|
@ -38,16 +41,22 @@ const isReleased = (post) => {
|
||||||
|
|
||||||
const markdownItHashtag = (md) => {
|
const markdownItHashtag = (md) => {
|
||||||
const hashtagRegex = /^#([a-zA-Z][a-zA-Z0-9_]*)(?![a-zA-Z0-9_-])/;
|
const hashtagRegex = /^#([a-zA-Z][a-zA-Z0-9_]*)(?![a-zA-Z0-9_-])/;
|
||||||
|
|
||||||
|
const HASH_CODE = '#'.charCodeAt(0);
|
||||||
|
const SPACE_CODE = ' '.charCodeAt(0);
|
||||||
|
const TAB_CODE = '\t'.charCodeAt(0);
|
||||||
|
const NEWLINE_CODE = '\n'.charCodeAt(0);
|
||||||
|
const CARRIAGE_RETURN_CODE = '\r'.charCodeAt(0);
|
||||||
|
|
||||||
md.inline.ruler.push('hashtag', function(state, silent) {
|
md.inline.ruler.push('hashtag', function(state, silent) {
|
||||||
const pos = state.pos;
|
const pos = state.pos;
|
||||||
const ch = state.src.charCodeAt(pos);
|
const ch = state.src.charCodeAt(pos);
|
||||||
|
|
||||||
if (ch !== '#') return false;
|
if (ch !== HASH_CODE) return false;
|
||||||
|
|
||||||
if (pos > 0) {
|
if (pos > 0) {
|
||||||
const prevCh = state.src.charCodeAt(pos - 1);
|
const prevCh = state.src.charCodeAt(pos - 1);
|
||||||
if (prevCh !== ' ' && prevCh !== '\t' && prevCh !== '\n' && prevCh !== '\r') {
|
if (prevCh !== SPACE_CODE && prevCh !== TAB_CODE && prevCh !== NEWLINE_CODE && prevCh !== CARRIAGE_RETURN_CODE) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -99,11 +108,11 @@ md.use(markdownItMermaid);
|
||||||
module.exports = (eleventyConfig) => {
|
module.exports = (eleventyConfig) => {
|
||||||
eleventyConfig.addPlugin(syntaxHighlight);
|
eleventyConfig.addPlugin(syntaxHighlight);
|
||||||
|
|
||||||
eleventyConfig.addFilter("extractTags", extractTags);
|
eleventyConfig.addFilter("extractTags", (content) => extractTags(content, md));
|
||||||
eleventyConfig.addFilter("extractTagsFromFile", (filePath) => {
|
eleventyConfig.addFilter("extractTagsFromFile", (filePath) => {
|
||||||
try {
|
try {
|
||||||
const content = fs.readFileSync(filePath, 'utf-8');
|
const content = fs.readFileSync(filePath, 'utf-8');
|
||||||
return extractTags(content);
|
return extractTags(content, md);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
|
|
@ -183,7 +192,7 @@ module.exports = (eleventyConfig) => {
|
||||||
eleventyConfig.addCollection("contentTags", (collectionApi) => {
|
eleventyConfig.addCollection("contentTags", (collectionApi) => {
|
||||||
const posts = collectionApi.getFilteredByGlob("posts/**/*.md").filter(isReleased);
|
const posts = collectionApi.getFilteredByGlob("posts/**/*.md").filter(isReleased);
|
||||||
|
|
||||||
return [...new Set(posts.flatMap(getPostTags))].sort();
|
return [...new Set(posts.flatMap(post => getPostTags(post, md)))].sort();
|
||||||
});
|
});
|
||||||
|
|
||||||
eleventyConfig.addCollection("postsByTag", (collectionApi) => {
|
eleventyConfig.addCollection("postsByTag", (collectionApi) => {
|
||||||
|
|
@ -191,7 +200,7 @@ module.exports = (eleventyConfig) => {
|
||||||
const tagMap = {};
|
const tagMap = {};
|
||||||
|
|
||||||
posts.forEach(post => {
|
posts.forEach(post => {
|
||||||
const tags = getPostTags(post)
|
const tags = getPostTags(post, md)
|
||||||
tags.forEach((tag) => {
|
tags.forEach((tag) => {
|
||||||
tagMap[tag] = {
|
tagMap[tag] = {
|
||||||
name: tag,
|
name: tag,
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue