Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 42 additions & 72 deletions src/node/plugins/localSearchPlugin.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import _debug from 'debug'
import fs from 'fs-extra'
import MiniSearch from 'minisearch'
import pMap from 'p-map'
import path from 'path'
import type { Plugin, ViteDevServer } from 'vite'
import type { SiteConfig } from '../config'
Expand Down Expand Up @@ -53,15 +54,18 @@ export async function localSearchPlugin(

const options = siteConfig.site.themeConfig.search.options || {}

function render(file: string) {
async function render(file: string) {
if (!fs.existsSync(file)) return ''
const { srcDir, cleanUrls = false } = siteConfig
const relativePath = slash(path.relative(srcDir, file))
const env: MarkdownEnv = { path: file, relativePath, cleanUrls }
let src = fs.readFileSync(file, 'utf-8')
src = processIncludes(srcDir, src, file, [])
if (options._render) return options._render(src, env, md)
const html = md.render(src, env)
return env.frontmatter?.search === false ? '' : html
const md_raw = await fs.promises.readFile(file, 'utf-8')
const md_src = processIncludes(srcDir, md_raw, file, [])
if (options._render) return await options._render(md_src, env, md)
else {
const html = md.render(md_src, env)
return env.frontmatter?.search === false ? '' : html
}
}

const indexByLocales = new Map<string, MiniSearch<IndexObject>>()
Expand All @@ -85,11 +89,6 @@ export async function localSearchPlugin(
return siteData?.localeIndex ?? 'root'
}

function getIndexForPath(file: string) {
const locale = getLocaleForPath(file)
return getIndexByLocale(locale)
}

let server: ViteDevServer | undefined

function onIndexUpdated() {
Expand Down Expand Up @@ -123,43 +122,39 @@ export async function localSearchPlugin(
return id
}

async function indexAllFiles(files: string[]) {
const documentsByLocale = new Map<string, IndexObject[]>()
await Promise.all(
files
.filter((file) => fs.existsSync(file))
.map(async (file) => {
const fileId = getDocId(file)
const sections = splitPageIntoSections(render(file))
if (sections.length === 0) return
const locale = getLocaleForPath(file)
let documents = documentsByLocale.get(locale)
if (!documents) {
documents = []
documentsByLocale.set(locale, documents)
}
documents.push(
...sections.map((section) => ({
id: `${fileId}#${section.anchor}`,
text: section.text,
title: section.titles.at(-1)!,
titles: section.titles.slice(0, -1)
}))
)
})
)
for (const [locale, documents] of documentsByLocale) {
const index = getIndexByLocale(locale)
index.removeAll()
await index.addAllAsync(documents)
async function indexFile(page: string) {
const file = path.join(siteConfig.srcDir, page)
// get file metadata
const fileId = getDocId(file)
const locale = getLocaleForPath(file)
const index = getIndexByLocale(locale)
// retrieve file and split into "sections"
const html = await render(file)
const sections =
// user provided generator
(await options.miniSearch?._splitIntoSections?.(file, html)) ??
// default implementation
splitPageIntoSections(html)
// add sections to the locale index
for await (const section of sections) {
if (!section || !(section.text || section.titles)) break
const { anchor, text, titles } = section
const id = anchor ? [fileId, anchor].join('#') : fileId
index.add({
id,
text,
title: titles.at(-1)!,
titles: titles.slice(0, -1)
})
}
debug(`🔍️ Indexed ${files.length} files`)
}

async function scanForBuild() {
await indexAllFiles(
siteConfig.pages.map((f) => path.join(siteConfig.srcDir, f))
)
debug('🔍️ Indexing files for search...')
await pMap(siteConfig.pages, indexFile, {
concurrency: siteConfig.buildConcurrency
})
debug('✅ Indexing finished...')
}

return {
Expand Down Expand Up @@ -214,25 +209,8 @@ export async function localSearchPlugin(

async handleHotUpdate({ file }) {
if (file.endsWith('.md')) {
const fileId = getDocId(file)
if (!fs.existsSync(file)) return
const index = getIndexForPath(file)
const sections = splitPageIntoSections(render(file))
if (sections.length === 0) return
for (const section of sections) {
const id = `${fileId}#${section.anchor}`
if (index.has(id)) {
index.discard(id)
}
index.add({
id,
text: section.text,
title: section.titles.at(-1)!,
titles: section.titles.slice(0, -1)
})
}
await indexFile(file)
debug('🔍️ Updated', file)

onIndexUpdated()
}
}
Expand All @@ -242,20 +220,13 @@ export async function localSearchPlugin(
const headingRegex = /<h(\d*).*?>(.*?<a.*? href="#.*?".*?>.*?<\/a>)<\/h\1>/gi
const headingContentRegex = /(.*?)<a.*? href="#(.*?)".*?>.*?<\/a>/i

interface PageSection {
anchor: string
titles: string[]
text: string
}

/**
* Splits HTML into sections based on headings
*/
function splitPageIntoSections(html: string) {
function* splitPageIntoSections(html: string) {
const result = html.split(headingRegex)
result.shift()
let parentTitles: string[] = []
const sections: PageSection[] = []
for (let i = 0; i < result.length; i += 3) {
const level = parseInt(result[i]) - 1
const heading = result[i + 1]
Expand All @@ -266,14 +237,13 @@ function splitPageIntoSections(html: string) {
if (!title || !content) continue
const titles = parentTitles.slice(0, level)
titles[level] = title
sections.push({ anchor, titles, text: getSearchableText(content) })
yield { anchor, titles, text: getSearchableText(content) }
if (level === 0) {
parentTitles = [title]
} else {
parentTitles[level] = title
}
}
return sections
}

function getSearchableText(content: string) {
Expand Down
32 changes: 28 additions & 4 deletions types/default-theme.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@ import type MarkdownIt from 'markdown-it'
import type { Options as MiniSearchOptions } from 'minisearch'
import type { ComputedRef, Ref } from 'vue'
import type { DocSearchProps } from './docsearch.js'
import type { LocalSearchTranslations } from './local-search.js'
import type { MarkdownEnv, PageData } from './shared.js'
import type {
LocalSearchTranslations,
PageSplitSection
} from './local-search.js'
import type { Awaitable, MarkdownEnv, PageData } from './shared.js'

export namespace DefaultTheme {
export interface Config {
Expand Down Expand Up @@ -422,13 +425,34 @@ export namespace DefaultTheme {
* @see https://lucaong.github.io/minisearch/modules/_minisearch_.html#searchoptions-1
*/
searchOptions?: MiniSearchOptions['searchOptions']
}

/**
* Overrides the default regex based page splitter.
* Supports async generator, making it possible to run in true parallel
* (when used along with `node:child_process` or `worker_threads`)
* ---
* This should be especially useful for scalability reasons.
* ---
* @param {string} path - absolute path to the markdown source file
* @param {string} html - document page rendered as html
*/
_splitIntoSections?: (
path: string,
html: string
) =>
| AsyncGenerator<PageSplitSection>
| Generator<PageSplitSection>
| Awaitable<PageSplitSection[]>
}
/**
* Allows transformation of content before indexing (node only)
* Return empty string to skip indexing
*/
_render?: (src: string, env: MarkdownEnv, md: MarkdownIt) => string
_render?: (
src: string,
env: MarkdownEnv,
md: MarkdownIt
) => Awaitable<string>
}

// algolia -------------------------------------------------------------------
Expand Down
6 changes: 6 additions & 0 deletions types/local-search.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,9 @@ export interface FooterTranslations {
closeText?: string
closeKeyAriaLabel?: string
}

export interface PageSplitSection {
anchor?: string
titles: string[]
text: string
}