From 42ba4e3ae8d7314f5d880bd111d809bb2231d800 Mon Sep 17 00:00:00 2001 From: Darshan Date: Tue, 1 Apr 2025 12:01:43 +0530 Subject: [PATCH] feat: split sitemaps into smaller chunks and groups. --- server/main.js | 7 +- server/sitemap.js | 185 +++++++++++++++++++++++++++++++++++----------- 2 files changed, 144 insertions(+), 48 deletions(-) diff --git a/server/main.js b/server/main.js index 1fb7bf9b1..e3842985a 100644 --- a/server/main.js +++ b/server/main.js @@ -1,12 +1,13 @@ -import { createApp, fromNodeMiddleware, toNodeListener } from 'h3'; +import { sitemaps } from './sitemap.js'; import { createServer } from 'node:http'; import { handler } from '../build/handler.js'; -import { sitemap } from './sitemap.js'; +import { createApp, fromNodeMiddleware, toNodeListener } from 'h3'; async function main() { const port = process.env.PORT || 3000; const app = createApp(); - app.use('/sitemap.xml', await sitemap()); + app.use(['/sitemap.xml', '/sitemaps'], await sitemaps()); + app.use(fromNodeMiddleware(handler)); const server = createServer(toNodeListener(app)).listen(port); server.addListener('listening', () => { diff --git a/server/sitemap.js b/server/sitemap.js index affaef02b..5d4f7686f 100644 --- a/server/sitemap.js +++ b/server/sitemap.js @@ -1,59 +1,154 @@ +import { fileURLToPath } from 'node:url'; import { createRequire } from 'node:module'; -import { defineEventHandler, setResponseHeader } from 'h3'; +import { dirname, join } from 'node:path'; +import { readFile, stat } from 'node:fs/promises'; +import { mkdirSync, writeFileSync } from 'node:fs'; +import { + defineEventHandler, + getRequestURL, + sendRedirect, + serveStatic, + setResponseHeader +} from 'h3'; -/** - * @returns {Promise} - */ -export async function sitemap() { +const MAX_THREADS_PER_FILE = 1000; +const BASE_URL = 'https://appwrite.io'; +const BASE_DIR = dirname(fileURLToPath(import.meta.url)); + +const SITEMAP_DIR = join(BASE_DIR, './sitemaps'); +const THREADS_DIR = join(SITEMAP_DIR, 'threads'); +const NAMED_GROUPS = { + blog: '/blog', + docs: '/docs', + integrations: '/integrations' +}; + +export async function sitemaps() { console.info('Preparing Sitemap...'); const { manifest } = await import('../build/server/manifest.js'); - const sveltekit_routes = manifest._.routes - .filter((route) => route.params.length === 0) - .map((route) => route.id); - const threads = collectThreads(); - const all_routes = [...sveltekit_routes, ...threads]; - const document_routes = all_routes.filter( - (route) => !['.json', '.xml'].some((ext) => route.endsWith(ext)) - ); - const routes = new Set(document_routes); - console.info(`Sitemap loaded with ${routes.length} routes!`); - console.group(); - console.info(`sveltekit: ${sveltekit_routes.length}`); - console.info(`threads: ${threads.length}`); - console.groupEnd(); + const threads = collectThreads().map((id) => `/threads/${id}`); + const otherRoutes = manifest._.routes + .filter((r) => r.params.length === 0) + .map((r) => r.id) + .filter( + (id) => !id.startsWith('/threads/') && !id.endsWith('.json') && !id.endsWith('.xml') + ); - const sitemap = ` - - - ${[...routes] + mkdirSync(SITEMAP_DIR, { recursive: true }); + mkdirSync(THREADS_DIR, { recursive: true }); + + let totalCount = 0; + const sitemapIndexOrder = []; + + const grouped = {}, + fallback = []; + + for (const route of otherRoutes) { + const match = Object.entries(NAMED_GROUPS).find(([, prefix]) => route.startsWith(prefix)); + if (match) { + const [group] = match; + grouped[group] ??= []; + grouped[group].push(route); + } else fallback.push(route); + } + + totalCount += writeSitemap('pages.xml', fallback, SITEMAP_DIR); + sitemapIndexOrder.push('pages.xml'); + + for (const group of ['docs', 'blog', 'integrations']) { + if (grouped[group]?.length) { + const filename = `${group}.xml`; + totalCount += writeSitemap(filename, grouped[group], SITEMAP_DIR); + sitemapIndexOrder.push(filename); + } + } + + const threadChunks = chunkArray(threads, MAX_THREADS_PER_FILE); + threadChunks.forEach((chunk, i) => { + const filename = `${i + 1}.xml`; + totalCount += writeSitemap(filename, chunk, THREADS_DIR); + sitemapIndexOrder.push(`threads/${filename}`); + }); + + const sitemapIndex = ` + + + ${sitemapIndexOrder .map( - (route) => ` - https://appwrite.io${route} - - ` + (name) => ` + + ${BASE_URL}/sitemaps/${name} + ` ) - .join('')} - `.trim(); + .join('\n')} + `.trim(); - return defineEventHandler((event) => { - setResponseHeader(event, 'Content-Type', 'application/xml'); + console.info(`✅ Sitemap generation complete — ${totalCount} URLs in total.\n`); - return sitemap; + return defineEventHandler(async (event) => { + const url = getRequestURL(event); + + if (url.pathname === '/sitemap.xml') { + setResponseHeader(event, 'Content-Type', 'application/xml'); + return sitemapIndex; + } + + if (url.pathname === '/sitemaps') { + return sendRedirect(event, '/sitemap.xml', 307); + } + + if (url.pathname === '/sitemaps/threads') { + return sendRedirect(event, '/sitemaps/threads/1.xml', 307); + } + + const dir = import.meta.resolve('./sitemaps'); + return serveStatic(event, { + fallthrough: true, + indexNames: undefined, + getContents: (id) => readFile(new URL(dir + id)), + getMeta: async (id) => { + const stats = await stat(new URL(dir + id)).catch(() => null); + if (!stats?.isFile()) return; + return { + size: stats.size, + mtime: stats.mtimeMs + }; + } + }); }); } -/** - * @returns {string[]} - */ -function collectThreads() { - const threads = createRequire(import.meta.url)('../build/prerendered/threads/data.json'); +function writeSitemap(filename, routes, dir) { + const body = ` + + +${routes.map((route) => ` \n ${BASE_URL}${route}\n `).join('\n')} +`.trim(); - return threads.map((id) => `/threads/${id}`); + const filepath = join(dir, filename); + writeFileSync(filepath, body); + + const label = filepath.replace(BASE_DIR + '/sitemaps', ''); + console.info(` └── Generated ${label} with ${routes.length} URLs`); + + return routes.length; +} + +function chunkArray(arr, size) { + const chunks = []; + for (let i = 0; i < arr.length; i += size) { + chunks.push(arr.slice(i, i + size)); + } + return chunks; +} + +function collectThreads() { + return createRequire(import.meta.url)('../build/prerendered/threads/data.json'); }