-
Notifications
You must be signed in to change notification settings - Fork 5.3k
/
Copy pathvalidate-mdx-links.mjs
executable file
·309 lines (269 loc) · 8.94 KB
/
validate-mdx-links.mjs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
import { promises as fs } from "fs";
import {
dirname, join, relative, basename,
} from "path";
import chalk from "chalk";
// Convert header text to anchor link format
function headerToAnchor(headerText) {
// First remove any Markdown links - replace [text](url) with just text
const textWithoutLinks = headerText.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1");
return textWithoutLinks
.toLowerCase()
// Remove backticks which are just formatting
.replace(/`/g, "")
// Keep underscores but remove other special characters
.replace(/[^a-z0-9_\s-]/g, "")
.trim()
// Convert spaces to hyphens
.replace(/\s+/g, "-");
}
// Convert relative link to absolute path
function resolveRelativeLink(relativeLink, currentFilePath) {
// If it's just an anchor link (#something), keep it relative to current file
if (relativeLink.startsWith("#")) {
const basePath = "/" + relative("pages", currentFilePath).replace(/\.mdx$/, "");
return `${basePath}${relativeLink}`;
}
const dirPath = dirname(currentFilePath);
const absolutePath = join(dirPath, relativeLink);
return "/" + relative("pages", absolutePath);
}
// Normalize path handling
function normalizePath(path) {
// Special case: root path
if (path === "/") return "/";
// Remove trailing slash unless it's the root
if (path.endsWith("/") && path !== "/") {
path = path.slice(0, -1);
}
return path;
}
// Find all MDX files recursively
async function findMdxFiles(dir) {
const files = await fs.readdir(dir, {
withFileTypes: true,
});
const mdxFiles = [];
for (const file of files) {
const filePath = join(dir, file.name);
if (file.isDirectory()) {
mdxFiles.push(...await findMdxFiles(filePath));
} else if (file.name.endsWith(".mdx")) {
mdxFiles.push(filePath);
}
}
return mdxFiles;
}
// Extract links and their line numbers from MDX content
function extractLinks(content, filePath) {
const links = [];
const lines = content.split("\n");
const linkRegex = /\[([^\]]+)\]\(([^)]+)\)/g;
lines.forEach((line, index) => {
let match;
while ((match = linkRegex.exec(line)) !== null) {
const [
, , link,
] = match;
// Only process internal links
if (link.startsWith("/") || link.startsWith("#") || !link.includes("://")) {
let normalizedLink;
if (link.startsWith("#")) {
// For same-file anchors, we'll check both with and without the file path
const basePath = "/" + relative("pages", filePath).replace(/\.mdx$/, "");
normalizedLink = `${basePath}${link}`;
} else {
normalizedLink = link.startsWith("/")
? link
: resolveRelativeLink(link, filePath);
}
links.push({
originalLink: link,
link: normalizedLink,
lineNumber: index + 1,
});
}
}
});
return links;
}
// Extract valid anchors from MDX content
function extractAnchors(content, filePath) {
const anchors = new Set();
const lines = content.split("\n");
const headerRegex = /^#{1,6}\s+(.+)$/;
// Calculate the base path for this file
const relativePath = relative("pages", filePath);
const basePath = "/" + relativePath.replace(/\.mdx$/, "");
const baseDir = dirname(basePath);
// For basePath /core/workflows/code/nodejs.mdx -> /core/workflows/code/nodejs
const normalizedBasePath = normalizePath(basePath);
anchors.add(normalizedBasePath.toLowerCase());
// For index files, also add the directory path
const isIndexFile = basename(filePath) === "index.mdx";
if (isIndexFile) {
const dirPath = baseDir === "."
? "/"
: baseDir;
anchors.add(dirPath.toLowerCase());
}
// Process all headers in the file
lines.forEach((line) => {
const match = line.match(headerRegex);
if (match) {
const headerText = match[1].trim();
const anchor = headerToAnchor(headerText);
// For headers, we need to track:
// 1. Simple #anchor for same-file references
anchors.add(`#${anchor}`.toLowerCase());
// 2. Full path versions for cross-file references
const anchorPaths = [
`${normalizedBasePath}#${anchor}`,
`${normalizedBasePath}/#${anchor}`,
];
// For index files, also add anchors at the directory level
if (isIndexFile) {
const dirPath = baseDir === "."
? "/"
: baseDir;
anchorPaths.push(
`${dirPath}#${anchor}`,
`${dirPath}/#${anchor}`,
);
}
// Add all variants to our set of valid anchors
anchorPaths.forEach((path) => {
anchors.add(path.toLowerCase());
});
}
});
if (process.env.DEBUG) {
console.log(`File: ${filePath}`);
console.log("Valid anchors:", Array.from(anchors));
}
return anchors;
}
// Try to find MDX file in direct or index format
async function findMdxFile(basePath) {
basePath = normalizePath(basePath);
// Try direct .mdx file first
const directPath = join("pages", basePath + ".mdx");
try {
await fs.access(directPath);
return directPath;
} catch (err) {
// Then try index.mdx
const indexPath = join("pages", basePath, "index.mdx");
try {
await fs.access(indexPath);
return indexPath;
} catch (err) {
return null;
}
}
}
async function main() {
try {
const mdxFiles = await findMdxFiles("pages");
const linkMap = new Map();
const validAnchors = new Set();
const fileAnchorsMap = new Map(); // Track anchors by file
// First pass: collect all links and generate valid anchors
console.log("Processing MDX files...");
for (const filePath of mdxFiles) {
const content = await fs.readFile(filePath, "utf8");
// Extract and store links
const links = extractLinks(content, filePath);
if (links.length > 0) {
linkMap.set(filePath, links);
}
// Extract and store anchors
const fileAnchors = extractAnchors(content, filePath);
fileAnchorsMap.set(filePath, fileAnchors);
for (const anchor of fileAnchors) {
validAnchors.add(anchor);
}
}
// Second pass: validate all links
let brokenLinksFound = false;
for (const [
file,
links,
] of linkMap) {
// Get anchors for the current file
const currentFileAnchors = fileAnchorsMap.get(file);
for (const {
originalLink, link, lineNumber,
} of links) {
if (originalLink.startsWith("#")) {
// For same-file anchors, check both the simple #anchor and the full path
const anchorExists = currentFileAnchors.has(originalLink.toLowerCase());
if (!anchorExists) {
brokenLinksFound = true;
console.log(
chalk.red("✗"),
`${chalk.yellow(file)}:${chalk.cyan(lineNumber)}`,
`Broken link: ${chalk.red(originalLink)} (anchor not found)`,
);
}
continue;
}
// Split link into path and anchor parts
const [
path,
anchor,
] = link.split("#");
const normalizedPath = normalizePath(path);
// First verify the file exists
const targetFile = await findMdxFile(normalizedPath);
if (!targetFile && anchor) {
brokenLinksFound = true;
console.log(
chalk.red("✗"),
`${chalk.yellow(file)}:${chalk.cyan(lineNumber)}`,
`Broken link: ${chalk.red(link)} (file not found)`,
);
continue;
}
// Then check anchor if present
if (anchor) {
// Generate all possible variants of how this anchor might appear
const variations = [
`${normalizedPath}#${anchor}`,
`${normalizedPath}/#${anchor}`,
// For index files, also check directory-level anchors
basename(targetFile) === "index.mdx"
? `${dirname(normalizedPath)}#${anchor}`
: null,
basename(targetFile) === "index.mdx"
? `${dirname(normalizedPath)}/#${anchor}`
: null,
].filter(Boolean).map((v) => v.toLowerCase());
if (process.env.DEBUG) {
console.log("\nChecking link:", link);
console.log("Checking variations:", variations);
console.log("Against anchors:", Array.from(validAnchors));
}
const anchorExists = variations.some((v) => validAnchors.has(v));
if (!anchorExists) {
brokenLinksFound = true;
console.log(
chalk.red("✗"),
`${chalk.yellow(file)}:${chalk.cyan(lineNumber)}`,
`Broken link: ${chalk.red(originalLink)} (anchor not found)`,
);
}
}
}
}
if (brokenLinksFound) {
console.log(chalk.red("\n✗ Broken links found!"));
process.exit(1);
}
console.log(chalk.green("\n✓ All links are valid!"));
} catch (error) {
console.error(chalk.red("Error:"), error.message);
process.exit(1);
}
}
main();