feat: made tag extraction more robust
This commit is contained in:
parent
9517dc4d7a
commit
7b5f3adee9
1 changed files with 25 additions and 16 deletions
|
|
@ -6,22 +6,25 @@ const syntaxHighlight = require("@11ty/eleventy-plugin-syntaxhighlight");
|
|||
const fs = require("fs");
|
||||
const { DateTime } = require("luxon");
|
||||
|
||||
const tagPattern = /(?<=^|\s)#([a-zA-Z][a-zA-Z0-9_]*)(?![a-zA-Z0-9_-])/g;
|
||||
|
||||
// TODO: is there any reasonable way to make this use real markdown parsing because right now this is sketchy
|
||||
const extractTags = (content) => {
|
||||
const extractTags = (content, mdInstance) => {
|
||||
if (!content) return [];
|
||||
const matches = content.match(tagPattern);
|
||||
if (!matches) return [];
|
||||
const tags = [...new Set(matches.map(m => m.slice(1)))];
|
||||
return tags;
|
||||
|
||||
const collectHashtags = (tokens) =>
|
||||
tokens.flatMap(token => [
|
||||
...(token.type === 'hashtag' ? [token.content] : []),
|
||||
...(token.children ? collectHashtags(token.children) : [])
|
||||
]);
|
||||
|
||||
const tokens = mdInstance.parse(content, {});
|
||||
const tags = collectHashtags(tokens);
|
||||
return [...new Set(tags)];
|
||||
}
|
||||
|
||||
const getPostTags = (post) => {
|
||||
const getPostTags = (post, mdInstance) => {
|
||||
const filePath = post.inputPath;
|
||||
try {
|
||||
const content = fs.readFileSync(filePath, 'utf-8');
|
||||
const tags = extractTags(content);
|
||||
const tags = extractTags(content, mdInstance);
|
||||
return tags.map(tag => {
|
||||
const normalizedTag = tag.toLowerCase();
|
||||
return normalizedTag
|
||||
|
|
@ -39,15 +42,21 @@ const isReleased = (post) => {
|
|||
const markdownItHashtag = (md) => {
|
||||
const hashtagRegex = /^#([a-zA-Z][a-zA-Z0-9_]*)(?![a-zA-Z0-9_-])/;
|
||||
|
||||
const HASH_CODE = '#'.charCodeAt(0);
|
||||
const SPACE_CODE = ' '.charCodeAt(0);
|
||||
const TAB_CODE = '\t'.charCodeAt(0);
|
||||
const NEWLINE_CODE = '\n'.charCodeAt(0);
|
||||
const CARRIAGE_RETURN_CODE = '\r'.charCodeAt(0);
|
||||
|
||||
md.inline.ruler.push('hashtag', function(state, silent) {
|
||||
const pos = state.pos;
|
||||
const ch = state.src.charCodeAt(pos);
|
||||
|
||||
if (ch !== '#') return false;
|
||||
if (ch !== HASH_CODE) return false;
|
||||
|
||||
if (pos > 0) {
|
||||
const prevCh = state.src.charCodeAt(pos - 1);
|
||||
if (prevCh !== ' ' && prevCh !== '\t' && prevCh !== '\n' && prevCh !== '\r') {
|
||||
if (prevCh !== SPACE_CODE && prevCh !== TAB_CODE && prevCh !== NEWLINE_CODE && prevCh !== CARRIAGE_RETURN_CODE) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
@ -99,11 +108,11 @@ md.use(markdownItMermaid);
|
|||
module.exports = (eleventyConfig) => {
|
||||
eleventyConfig.addPlugin(syntaxHighlight);
|
||||
|
||||
eleventyConfig.addFilter("extractTags", extractTags);
|
||||
eleventyConfig.addFilter("extractTags", (content) => extractTags(content, md));
|
||||
eleventyConfig.addFilter("extractTagsFromFile", (filePath) => {
|
||||
try {
|
||||
const content = fs.readFileSync(filePath, 'utf-8');
|
||||
return extractTags(content);
|
||||
return extractTags(content, md);
|
||||
} catch (e) {
|
||||
return [];
|
||||
}
|
||||
|
|
@ -183,7 +192,7 @@ module.exports = (eleventyConfig) => {
|
|||
eleventyConfig.addCollection("contentTags", (collectionApi) => {
|
||||
const posts = collectionApi.getFilteredByGlob("posts/**/*.md").filter(isReleased);
|
||||
|
||||
return [...new Set(posts.flatMap(getPostTags))].sort();
|
||||
return [...new Set(posts.flatMap(post => getPostTags(post, md)))].sort();
|
||||
});
|
||||
|
||||
eleventyConfig.addCollection("postsByTag", (collectionApi) => {
|
||||
|
|
@ -191,7 +200,7 @@ module.exports = (eleventyConfig) => {
|
|||
const tagMap = {};
|
||||
|
||||
posts.forEach(post => {
|
||||
const tags = getPostTags(post)
|
||||
const tags = getPostTags(post, md)
|
||||
tags.forEach((tag) => {
|
||||
tagMap[tag] = {
|
||||
name: tag,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue