feat: split sitemaps into smaller chunks and groups.

This commit is contained in:
Darshan
2025-04-01 12:01:43 +05:30
parent 2e7e9b8430
commit 42ba4e3ae8
2 changed files with 144 additions and 48 deletions

View File

@@ -1,12 +1,13 @@
import { createApp, fromNodeMiddleware, toNodeListener } from 'h3'; import { sitemaps } from './sitemap.js';
import { createServer } from 'node:http'; import { createServer } from 'node:http';
import { handler } from '../build/handler.js'; import { handler } from '../build/handler.js';
import { sitemap } from './sitemap.js'; import { createApp, fromNodeMiddleware, toNodeListener } from 'h3';
async function main() { async function main() {
const port = process.env.PORT || 3000; const port = process.env.PORT || 3000;
const app = createApp(); const app = createApp();
app.use('/sitemap.xml', await sitemap()); app.use(['/sitemap.xml', '/sitemaps'], await sitemaps());
app.use(fromNodeMiddleware(handler)); app.use(fromNodeMiddleware(handler));
const server = createServer(toNodeListener(app)).listen(port); const server = createServer(toNodeListener(app)).listen(port);
server.addListener('listening', () => { server.addListener('listening', () => {

View File

@@ -1,59 +1,154 @@
import { fileURLToPath } from 'node:url';
import { createRequire } from 'node:module'; import { createRequire } from 'node:module';
import { defineEventHandler, setResponseHeader } from 'h3'; import { dirname, join } from 'node:path';
import { readFile, stat } from 'node:fs/promises';
import { mkdirSync, writeFileSync } from 'node:fs';
import {
defineEventHandler,
getRequestURL,
sendRedirect,
serveStatic,
setResponseHeader
} from 'h3';
/** const MAX_THREADS_PER_FILE = 1000;
* @returns {Promise<import('h3').EventHandler>} const BASE_URL = 'https://appwrite.io';
*/ const BASE_DIR = dirname(fileURLToPath(import.meta.url));
export async function sitemap() {
const SITEMAP_DIR = join(BASE_DIR, './sitemaps');
const THREADS_DIR = join(SITEMAP_DIR, 'threads');
const NAMED_GROUPS = {
blog: '/blog',
docs: '/docs',
integrations: '/integrations'
};
export async function sitemaps() {
console.info('Preparing Sitemap...'); console.info('Preparing Sitemap...');
const { manifest } = await import('../build/server/manifest.js'); const { manifest } = await import('../build/server/manifest.js');
const sveltekit_routes = manifest._.routes const threads = collectThreads().map((id) => `/threads/${id}`);
.filter((route) => route.params.length === 0) const otherRoutes = manifest._.routes
.map((route) => route.id); .filter((r) => r.params.length === 0)
const threads = collectThreads(); .map((r) => r.id)
const all_routes = [...sveltekit_routes, ...threads]; .filter(
const document_routes = all_routes.filter( (id) => !id.startsWith('/threads/') && !id.endsWith('.json') && !id.endsWith('.xml')
(route) => !['.json', '.xml'].some((ext) => route.endsWith(ext)) );
);
const routes = new Set(document_routes);
console.info(`Sitemap loaded with ${routes.length} routes!`);
console.group();
console.info(`sveltekit: ${sveltekit_routes.length}`);
console.info(`threads: ${threads.length}`);
console.groupEnd();
const sitemap = ` mkdirSync(SITEMAP_DIR, { recursive: true });
<?xml version="1.0" encoding="UTF-8" ?> mkdirSync(THREADS_DIR, { recursive: true });
<urlset
xmlns="https://www.sitemaps.org/schemas/sitemap/0.9" let totalCount = 0;
xmlns:xhtml="https://www.w3.org/1999/xhtml" const sitemapIndexOrder = [];
xmlns:mobile="https://www.google.com/schemas/sitemap-mobile/1.0"
xmlns:news="https://www.google.com/schemas/sitemap-news/0.9" const grouped = {},
xmlns:image="https://www.google.com/schemas/sitemap-image/1.1" fallback = [];
xmlns:video="https://www.google.com/schemas/sitemap-video/1.1"
> for (const route of otherRoutes) {
${[...routes] const match = Object.entries(NAMED_GROUPS).find(([, prefix]) => route.startsWith(prefix));
if (match) {
const [group] = match;
grouped[group] ??= [];
grouped[group].push(route);
} else fallback.push(route);
}
totalCount += writeSitemap('pages.xml', fallback, SITEMAP_DIR);
sitemapIndexOrder.push('pages.xml');
for (const group of ['docs', 'blog', 'integrations']) {
if (grouped[group]?.length) {
const filename = `${group}.xml`;
totalCount += writeSitemap(filename, grouped[group], SITEMAP_DIR);
sitemapIndexOrder.push(filename);
}
}
const threadChunks = chunkArray(threads, MAX_THREADS_PER_FILE);
threadChunks.forEach((chunk, i) => {
const filename = `${i + 1}.xml`;
totalCount += writeSitemap(filename, chunk, THREADS_DIR);
sitemapIndexOrder.push(`threads/${filename}`);
});
const sitemapIndex = `
<?xml version="1.0" encoding="UTF-8"?>
<sitemapindex xmlns="https://www.sitemaps.org/schemas/sitemap/0.9">
${sitemapIndexOrder
.map( .map(
(route) => `<url> (name) => `
<loc>https://appwrite.io${route}</loc> <sitemap>
</url> <loc>${BASE_URL}/sitemaps/${name}</loc>
` </sitemap>`
) )
.join('')} .join('\n')}
</urlset>`.trim(); </sitemapindex>`.trim();
return defineEventHandler((event) => { console.info(`✅ Sitemap generation complete — ${totalCount} URLs in total.\n`);
setResponseHeader(event, 'Content-Type', 'application/xml');
return sitemap; return defineEventHandler(async (event) => {
const url = getRequestURL(event);
if (url.pathname === '/sitemap.xml') {
setResponseHeader(event, 'Content-Type', 'application/xml');
return sitemapIndex;
}
if (url.pathname === '/sitemaps') {
return sendRedirect(event, '/sitemap.xml', 307);
}
if (url.pathname === '/sitemaps/threads') {
return sendRedirect(event, '/sitemaps/threads/1.xml', 307);
}
const dir = import.meta.resolve('./sitemaps');
return serveStatic(event, {
fallthrough: true,
indexNames: undefined,
getContents: (id) => readFile(new URL(dir + id)),
getMeta: async (id) => {
const stats = await stat(new URL(dir + id)).catch(() => null);
if (!stats?.isFile()) return;
return {
size: stats.size,
mtime: stats.mtimeMs
};
}
});
}); });
} }
/** function writeSitemap(filename, routes, dir) {
* @returns {string[]} const body = `
*/ <?xml version="1.0" encoding="UTF-8" ?>
function collectThreads() { <urlset
const threads = createRequire(import.meta.url)('../build/prerendered/threads/data.json'); xmlns="https://www.sitemaps.org/schemas/sitemap/0.9"
xmlns:xhtml="https://www.w3.org/1999/xhtml"
xmlns:mobile="https://www.google.com/schemas/sitemap-mobile/1.0"
xmlns:news="https://www.google.com/schemas/sitemap-news/0.9"
xmlns:image="https://www.google.com/schemas/sitemap-image/1.1"
xmlns:video="https://www.google.com/schemas/sitemap-video/1.1"
>
${routes.map((route) => ` <url>\n <loc>${BASE_URL}${route}</loc>\n </url>`).join('\n')}
</urlset>`.trim();
return threads.map((id) => `/threads/${id}`); const filepath = join(dir, filename);
writeFileSync(filepath, body);
const label = filepath.replace(BASE_DIR + '/sitemaps', '');
console.info(` └── Generated ${label} with ${routes.length} URLs`);
return routes.length;
}
function chunkArray(arr, size) {
const chunks = [];
for (let i = 0; i < arr.length; i += size) {
chunks.push(arr.slice(i, i + size));
}
return chunks;
}
function collectThreads() {
return createRequire(import.meta.url)('../build/prerendered/threads/data.json');
} }