refactor: improve content transformer maintenability

unjs · Mar 18, 2024 · e7244af · e7244af
1 parent a4e2121
commit e7244af
Showing 1 changed file with 146 additions and 99 deletions.
diff --git a/app/server/plugins/content.ts b/app/server/plugins/content.ts
@@ -1,124 +1,102 @@
+// @ts-ignore
 export default defineNitroPlugin((nitroApp) => {
-  nitroApp.hooks.hook('content:file:afterParse' as any, (file) => {
+  nitroApp.hooks.hook('content:file:afterParse', (file: ContentFile) => {
     // Filter out non-markdown files
-    if (!file._id.endsWith('.md')) {
+    if (!file._id?.endsWith('.md')) {
       return
     }
 
-    // Set the icon for the file if it is not already set
-    if (!file.icon) {
-      file.icon = resolveIcon(file._path)
-    }
-
-    // Remove first h1 from markdown files as it is added to front-matter as title
-    if (file.body?.children?.[0]?.tag === 'h1') {
-      const text = getTextContents(file.body.children[0].children)
-      if (file.title === text) {
-        file.body.children.shift()
-      }
-    }
+    transformFile(file)
+    resolveFileIcon(file)
 
-    // Only use the first blockquote as the description
-    const firstChild = file.body.children?.[0]
-    const firstChildText = getTextContents(firstChild?.children)
-    if (firstChild?.tag === 'blockquote' && firstChildText && !firstChildText.startsWith('!')) {
-      file.description = firstChildText
-      file.body.children.shift()
-    } else {
-      file.description = '' // Avoid duplication
-    }
-
-    // Handle GitHub flavoured markdown blockquotes
-    // https://github.com/orgs/community/discussions/16925
     for (const [idx, node] of (file.body?.children || []).entries()) {
-      if (
-        node.tag === 'blockquote' && // blockquote > p x 2 > span > text
-        ['!NOTE', '!TIP', '!IMPORTANT', '!WARNING', '!CAUTION'].includes(
-          node.children?.[0]?.children?.[0]?.children?.[0]?.value,
-        )
-      ) {
-        node.type = 'element'
-        node.tag = node.children?.[0]?.children?.[0]?.children?.[0]?.value.slice(1).toLowerCase()
-        node.children[0].children.shift()
-      }
-
-      // CONVERT OL->LI to Steps
-      // TODO: Find a way to opt out of this transformation if needed within markdown.
-      if (node.tag === 'ol' && node.children.length > 0 && node.children[0].tag === 'li') {
-        const stepsChildren = node.children.map((li) => {
-          const label = li.children?.[0]?.value ?? undefined
-          // Exclude br tags from children to avoid spacing
-          const children = (label ? li.children.slice(1) : []).filter((child) => !['br'].includes(child.tag))
-
-          return {
-            type: 'element',
-            tag: 'div',
-            props: {
-              label,
-            },
-            children,
-          }
-        })
-
-        // For now we only check if there is at least (1) content to generate the steps..
-        const stepsHaveContent = stepsChildren.some((step) => step.children.length > 0)
-        if (stepsHaveContent) {
-          node.type = 'element'
-          node.tag = 'Steps'
-          node.props = {}
-          node.children = stepsChildren
-        }
-      }
-
-      // Generate Code Groups
-      generateCodeGroup(idx, file.body.children)
+      transformGithubAlert(node)
+      transformStepsList(node)
+      transformCodeGroups(idx, file.body?.children)
     }
   })
 })
 
-function isNamedCodeBlock(children: any): boolean {
-  return children?.tag === 'pre' && children?.children?.[0]?.tag === 'code' && children?.props?.filename
+// --- transform github alerts ---
+
+// Handle GitHub flavoured markdown blockquotes
+// https://github.com/orgs/community/discussions/16925
+function transformGithubAlert(node: ContentNode) {
+  const firstChildValue = node.children?.[0]?.children?.[0]?.children?.[0]?.value || ''
+  if (
+    node.tag === 'blockquote' && // blockquote > p x 2 > span > text
+    ['!NOTE', '!TIP', '!IMPORTANT', '!WARNING', '!CAUTION'].includes(firstChildValue)
+  ) {
+    node.type = 'element'
+    node.tag = firstChildValue.slice(1).toLowerCase()
+    node.children?.[0].children?.shift()
+  }
 }
 
-function generateCodeGroup(currChildIdx: number, children: any[]) {
-  if (!isNamedCodeBlock(children[currChildIdx])) {
-    return
+// --- transform steps list ---
+
+function transformStepsList(node: ContentNode) {
+  // CONVERT OL->LI to Steps
+  // TODO: Find a way to opt out of this transformation if needed within markdown.
+  if (node.tag === 'ol' && (node.children?.length || 0) > 0 && node.children?.[0].tag === 'li') {
+    const stepsChildren = node.children.map((li) => {
+      const label = li.children?.[0]?.value ?? undefined
+      // Exclude br tags from children to avoid spacing
+      const children = ((label && li.children?.slice(1)) || []).filter((child) => !['br'].includes(child.tag || ''))
+
+      return {
+        type: 'element',
+        tag: 'div',
+        props: {
+          label,
+        },
+        children,
+      }
+    })
+
+    // For now we only check if there is at least (1) content to generate the steps..
+    const stepsHaveContent = stepsChildren.some((step) => step.children.length > 0)
+    if (stepsHaveContent) {
+      node.type = 'element'
+      node.tag = 'Steps'
+      node.props = {}
+      node.children = stepsChildren
+    }
   }
+}
 
-  const group: any[] = []
+// --- transform first h1 and blockquote ---
 
-  for (let i = currChildIdx; i < children.length; i++) {
-    const nextNode = children[i]
-    if (!isNamedCodeBlock(nextNode)) {
-      break
+function transformFile(file: ContentFile) {
+  // Remove first h1 from markdown files as it is added to front-matter as title
+  if (file.body?.children?.[0]?.tag === 'h1') {
+    const text = _getTextContents(file.body.children[0].children)
+    if (file.title === text) {
+      file.body.children.shift()
     }
-    group.push(nextNode)
-    children[i] = { type: 'text', value: '' }
   }
 
-  // Replace current children with the new code group if it has two or more code blocks
-  if (group.length > 1) {
-    children[currChildIdx] = {
-      type: 'element',
-      tag: 'CodeGroup',
-      children: [...group],
-    }
+  // Only use the first blockquote as the description
+  const firstChild = file.body?.children?.[0]
+  const firstChildText = _getTextContents(firstChild?.children)
+  if (firstChild?.tag === 'blockquote' && firstChildText && !firstChildText.startsWith('!')) {
+    file.description = firstChildText
+    file.body?.children?.shift()
+  } else {
+    file.description = '' // Avoid duplication
   }
 }
 
-function getTextContents(children: any[]): string {
-  return (children || [])
-    .map((child) => {
-      if (child.type === 'element') {
-        return getTextContents(child.children)
-      }
-      return child.value
-    })
-    .join('')
+// --- resolve icon ---
+
+function resolveFileIcon(file: ContentFile) {
+  if (file.icon) {
+    return
+  }
+  file.icon = _resolveIcon(file._path)
 }
 
-// A set of common icons
-const commonIcons = [
+const _commonIcons = [
   {
     pattern: 'guide',
     icon: 'ph:book-open-duotone',
@@ -145,16 +123,85 @@ const commonIcons = [
   },
 ]
 
-function resolveIcon(path: string) {
+function _resolveIcon(path: string = '') {
   // Split the path into parts and reverse it
   const paths = path.slice(1).split('/').reverse()
 
   // Search for icons in reverse order
   for (const p of paths) {
-    for (const icon of commonIcons) {
+    for (const icon of _commonIcons) {
       if (p.includes(icon.pattern)) {
         return icon.icon
       }
     }
   }
 }
+
+// --- transform code groups ---
+
+function transformCodeGroups(currChildIdx: number, children: ContentNode[] = []) {
+  if (!children?.length || !_isNamedCodeBlock(children[currChildIdx])) {
+    return
+  }
+
+  const group: ContentNode[] = []
+
+  for (let i = currChildIdx; i < children.length; i++) {
+    const nextNode = children[i]
+    if (!_isNamedCodeBlock(nextNode)) {
+      break
+    }
+    group.push(nextNode)
+    children[i] = { type: 'text', value: '' }
+  }
+
+  // Replace current children with the new code group if it has two or more code blocks
+  if (group.length > 1) {
+    children[currChildIdx] = {
+      type: 'element',
+      tag: 'CodeGroup',
+      children: [...group],
+    }
+  }
+}
+
+function _isNamedCodeBlock(children: ContentNode): boolean {
+  return children?.tag === 'pre' && children?.children?.[0]?.tag === 'code' && children?.props?.filename
+}
+
+// --- internal utils ---
+
+function _getTextContents(children: ContentNode[] = []): string {
+  return (children || [])
+    .map((child) => {
+      if (child.type === 'element') {
+        return _getTextContents(child.children)
+      }
+      return child.value
+    })
+    .join('')
+}
+
+// --- types ---
+
+// TODO: @nuxt/content runtimes seems both not well typed and also crashes my TS server or might be doing it wrong.
+
+interface ContentNode {
+  type?: string
+  tag?: string
+
+  children?: ContentNode[]
+  props?: Record<string, any>
+  value?: string
+}
+
+interface ContentFile {
+  _id?: string
+  _path?: string
+  icon?: string
+  description?: string
+  title?: string
+  body?: {
+    children?: ContentNode[]
+  }
+}