-
-
Notifications
You must be signed in to change notification settings - Fork 41
/
Copy pathplugin-scraper.mjs
143 lines (116 loc) · 3.16 KB
/
plugin-scraper.mjs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
const puppeteer = require('puppeteer-extra')
const fse = require('fs-extra')
const log = console.log
const debug = false
const config = require('../.vitepress/config')
async function main() {
try {
const browser = await puppeteer.launch({
headless: true,
})
const promises = []
const fetchData = async (plugin) => {
const url = plugin.readme
log(`Fetching: ${url}`)
const page = await browser.newPage()
// Go to the plugin page and locate the README.md
const response = await page.goto(url, {
waitUntil: 'domcontentloaded',
})
// this is the README text :)
const data = await response.text()
const headerSnippet = [
`---`,
`title: ${JSON.stringify(plugin.name)}`,
`link: ${url}`,
`---`,
].join('\n')
// adds a link to the actual plugin repo to the MD file
const repoSnippet = [
`<div style="width: 100%; padding: 1.2em 0em">`,
` <img alt="github logo" src="../assets/images/github/GitHub-Mark-32px.png" style="display: inline; margin: 1em 0.5em 1em 0em">`,
` <a href="${plugin.repo}" target="_blank" noopener>${plugin.name}</a>`,
`</div>`,
].join('\n')
// transformers...
let content = data;
content = transformImagePaths(data, url)
content = transformNoteStyle(content)
// save the file
fse.outputFileSync(
`./${plugin.link}.md`,
`${headerSnippet}\n\n${repoSnippet}\n\n${content}`
)
log(`File saved for plugin: ${plugin.name}`)
}
const walkPlugin = (plugin) => {
if (plugin.category) {
return walkPlugins(plugin.plugins)
}
if (plugin.readme) {
promises.push(
fetchData(plugin).catch((error) => {
log(error)
})
)
}
}
const walkPlugins = (plugins) => {
plugins.forEach(walkPlugin)
}
walkPlugins(config.plugins)
await Promise.all(promises)
log('Done.')
process.exit()
} catch (error) {
log(error, error)
process.exit()
}
}
function transformNoteStyle(content) {
// transforms note styles for vitepress
const NOTE_RE = /^>\s+\*{2}([^\*]+)\*{2}:?[^\S\r\n]+(.+(?:\n>.*)*)/gim
let match_
const containerTypeMap = {
note: 'tip',
warning: 'warning',
}
while ((match_ = NOTE_RE.exec(content))) {
let [match, title, cont] = match_
debug &&
console.log({
match,
title,
cont,
})
// normalize title
title = title.replace(/:/g, '')
// replace "> " from multiline quotes
cont = cont.replace(/^>\s*/gim, '')
// generate replacement
let containerType = containerTypeMap[title.toLowerCase()] ?? 'tip'
const replacement = `:::${containerType} ${title}\n\n${cont}\n\n:::`
// replace the match with the replacement
content = content.replace(match, replacement)
}
return content
}
function transformImagePaths(content, baseUrl) {
// replace relative/absolute image urls with full urls
// rudimentary approach, but seems fine for the plugin readmes we have for now.
return content.replaceAll(
/(!\[[^\]]*\]\()([./].*?)\s*(?:"(?:.*[^"])")?\s*(\))/gim,
(_, p1, path, p3) => {
const url = `${baseUrl.replace(/(main)\/.+/, '$1')}${path}`
debug && console.log('Transforming image path', {
p1,
path,
p3,
baseUrl,
url,
})
return `${p1}${url}${p3}`
}
)
}
main()