import path from 'path'; import fse from 'fs-extra'; import { toString } from 'mdast-util-to-string'; import { cache } from 'react'; import { remark } from 'remark'; import remarkGfm from 'remark-gfm'; import html from 'remark-html'; import slugify from 'slugify'; import { visit } from 'unist-util-visit'; export type DocumentationPage = { content: string; sections: DocumentationSection[]; metadata: { title: string; description?: string; order?: number; }; }; export type TableOfContentsItem = { title: string; url: string; items?: TableOfContentsItem[]; level: number; }; export type TableOfContents = { items: TableOfContentsItem[]; }; export type DocumentationSection = { title: string; level: number; slug?: string; url?: string; }; export type DocumentationPageInfo = { title: string; slug: string; description?: string; order?: number; sections: DocumentationSection[]; }; // Function to extract title from markdown content function extractTitle(content: string): string { const titleMatch = /^#\s+(.*)$/m.exec(content); if(titleMatch) { return titleMatch[1]; } else { return ''; } } // Function to extract description from markdown content function extractDescription(content: string): string | undefined { const lines = content.split('\n'); for(let i = 0; i < lines.length; i++) { if(lines[i].startsWith('# ')) { // Look for the first paragraph after the title for(let j = i + 1; j < lines.length; j++) { const line = lines[j].trim(); if(line && !line.startsWith('#')) { return line; } } } } return undefined; } // Function to extract metadata from markdown content function extractMetadata(content: string): { order?: number } { const metadata: { order?: number } = {}; // Look for metadata in HTML comments at the top of the file // Format: const metadataRegex = //g; let match; while((match = metadataRegex.exec(content)) !== null) { const [, key, value] = match; if(key === 'order') { metadata.order = Number.parseInt(value, 10); } } return metadata; } // Function to extract sections (h2 headings) from markdown content function extractSections(content: string): DocumentationSection[] { const sections: DocumentationSection[] = []; const headingRegex = /^(#{2,3})\s+(?:<\/a>)?(.*)$/gm; let match; while((match = headingRegex.exec(content)) !== null) { let [, hashes, slug, title] = match; const level = hashes.length; if((match = /\[(.*)]\((.*)\)/.exec(title))) { const [, title, url] = match; slug ||= slugify(title, { lower: true }); sections.push({ title, level, url, }); } else { // Only include h2 and h3 if(level <= 3 && title !== 'Table of Contents') { slug ||= slugify(title, { lower: true }); sections.push({ title, level, slug, }); } } } return sections; } // Custom plugin to generate TOC with proper slugs function remarkTocWithSlugs() { return (tree: any) => { const toc: TableOfContents = { items: [] }; let currentLevel1: TableOfContentsItem | null = null; let currentLevel2: TableOfContentsItem | null = null; visit(tree, 'heading', (node: any) => { if(node.depth > 3) { return; } // Only include h1, h2, h3 let text: string; let slug: string; if(node.children[0].type === 'html') { // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment,@typescript-eslint/no-unsafe-call slug = node.children[0].value.replace(//, '$1'); while(node.children[0].type === 'html') { // eslint-disable-next-line @typescript-eslint/no-unsafe-call node.children.shift(); } text = toString(node); } else { text = toString(node); slug = slugify(text, { lower: true }); } const url = `#${slug}`; // Add id to the heading node for linking node.data ??= {}; node.data.hProperties ??= {}; node.data.hProperties.id = slug; node.data.hProperties.class = 'scroll-mt-16'; const item: TableOfContentsItem = { title: text, url, level: node.depth as number, }; if(node.depth === 1) { toc.items.push(item); currentLevel1 = item; currentLevel2 = null; } else if(node.depth === 2) { if(currentLevel1) { currentLevel1.items ??= []; currentLevel1.items.push(item); currentLevel2 = item; } else { toc.items.push(item); currentLevel2 = item; } } else if(node.depth === 3) { if(currentLevel2) { currentLevel2.items ??= []; currentLevel2.items.push(item); } else if(currentLevel1) { currentLevel1.items ??= []; currentLevel1.items.push(item); } else { toc.items.push(item); } } }); // Store the TOC in the tree data tree.data ??= {}; tree.data.toc = toc; }; } // Custom plugin to remove "Table of Contents" section function remarkRemoveTableOfContents() { return (tree: any) => { visit(tree, 'heading', (node: any, index: number, parent: any) => { if(toString(node) === 'Table of Contents') { if(parent.children[0].type === 'heading') { return; } // Find the next heading to determine where the TOC section ends // eslint-disable-next-line @typescript-eslint/no-unsafe-assignment let endIndex = parent.children.length; for(let i = index + 1; i < parent.children.length; i++) { if(parent.children[i].type === 'heading') { endIndex = i; break; } } // Remove the TOC heading and all content until the next heading // eslint-disable-next-line @typescript-eslint/no-unsafe-call parent.children.splice(index, endIndex - index); return [visit.SKIP, index]; } }); }; } // Function to fetch and process markdown content export const getDocumentationPage = cache(async (markdownFile: string): Promise => { const filePath = path.resolve(process.cwd(), `./cache/${markdownFile}`); let content; if(fse.existsSync(filePath)) { content = fse.readFileSync(filePath, 'utf8'); } else { try { const headers: Record = { Accept: 'application/vnd.github.v3.raw', }; // Only add the Authorization header if GITHUB_TOKEN exists if(process.env.GITHUB_TOKEN) { headers.Authorization = `token ${process.env.GITHUB_TOKEN}`; } const response = await fetch(`https://api.github.com/repos/VSCodium/vscodium/contents/docs/${markdownFile}`, { headers, next: { revalidate: 3600 }, // Revalidate every hour }); if(!response.ok) { const errorMessage = response.status === 403 ? 'GitHub API rate limit exceeded. Consider adding a GITHUB_TOKEN environment variable for higher limits.' : `GitHub API responded with status: ${response.status}`; console.warn(errorMessage); throw new Error(errorMessage); } content = await response.text(); await fse.ensureDir(path.dirname(filePath)); await fse.writeFile(filePath, content); } catch (error) { console.error(`Error fetching doc page ${markdownFile}:`, error); // Return a fallback page return { content: '

Documentation Unavailable

We couldn\'t load the documentation. Please try again later or check the GitHub repository.

', sections: [], metadata: { title: 'Documentation Unavailable', description: 'We couldn\'t load the documentation. Please try again later.', }, }; } } content = content.replaceAll(/https:\/\/github.com\/VSCodium\/vscodium\/blob\/master\/docs\/([\w-]+)\.md/g, '/docs/$1'); // Extract metadata const title = extractTitle(content); const description = extractDescription(content); const { order } = extractMetadata(content); // Process markdown const processor = remark() .use(remarkGfm) // GitHub Flavored Markdown .use(remarkRemoveTableOfContents) // Remove "Table of Contents" section .use(remarkTocWithSlugs) // Custom TOC plugin .use(html, { sanitize: false }); // Convert to HTML const result = await processor.process(content); const htmlContent = result.toString(); // Extract sections from the content const sections = extractSections(content); return { content: htmlContent, sections, metadata: { title, description, order, }, }; }); // Function to get all available doc pages export const getDocumentationPages = cache(async (): Promise => { const filePath = path.resolve(process.cwd(), './cache/pages.json'); if(fse.existsSync(filePath)) { const data = fse.readFileSync(filePath, 'utf8'); return JSON.parse(data) as string[]; } else { try { const headers: Record = { Accept: 'application/vnd.github.v3+json', }; if(process.env.GITHUB_TOKEN) { headers.Authorization = `token ${process.env.GITHUB_TOKEN}`; } const response = await fetch('https://api.github.com/repos/VSCodium/vscodium/contents/docs', { headers, next: { revalidate: 3600 }, }); if(!response.ok) { throw new Error(`GitHub API responded with status: ${response.status}`); } const files = await response.json() as Array<{ name: string }>; const markdowns = files.filter((file) => file.name.endsWith('.md')).map((file) => file.name); await fse.ensureDir(path.dirname(filePath)); await fse.writeFile(filePath, JSON.stringify(markdowns, null, 2)); return markdowns; } catch (error) { console.error('Error fetching doc pages:', error); return []; } } }); // Function to get all doc pages with their sections export const getDocumentationPagesWithSections = cache(async (): Promise => { const documentFiles = await getDocumentationPages(); // Get info for all pages including their sections const pagesInfo: Array = await Promise.all( documentFiles.map(async (filename) => { const slug = filename.replace('.md', ''); const page = await getDocumentationPage(filename); if(page.metadata.order === 0) { return null; } return { title: page.metadata.title, slug, description: page.metadata.description, order: page.metadata.order ?? 999, // Default to high number for sorting sections: page.sections, }; }), ); // Sort pages by order then title return pagesInfo.filter((page) => page !== null).sort((a, b) => { // Special case for index.md - always first if(a.slug === 'index') { return -1; } if(b.slug === 'index') { return 1; } // Sort by order if specified if(a.order !== b.order) { return (a.order ?? 999) - (b.order ?? 999); } // Then alphabetically by title return a.title.localeCompare(b.title); }); });