From f4145fe4a6f5dbac159443202e7de502b94b8324 Mon Sep 17 00:00:00 2001 From: Emmett Hoolahan Date: Thu, 7 Nov 2024 19:45:13 +0800 Subject: [PATCH 1/6] fix(gatsby): handle encoded URLs in SSR (#39010) --- packages/gatsby/src/commands/serve.ts | 95 ++++++++++++++++++++++----- 1 file changed, 78 insertions(+), 17 deletions(-) diff --git a/packages/gatsby/src/commands/serve.ts b/packages/gatsby/src/commands/serve.ts index e62dd06d315d0..70af66e487492 100644 --- a/packages/gatsby/src/commands/serve.ts +++ b/packages/gatsby/src/commands/serve.ts @@ -61,37 +61,95 @@ const readMatchPaths = async ( return JSON.parse(rawJSON) as Array } -const matchPathRouter = - ( - matchPaths: Array, - options: { - root: string - } - ) => - ( +interface IMatchPathMiddlewareOptions { + root: string + enableLogging?: boolean +} + +const sanitizeUrl = (url: string): string => { + try { + // Decode URL and normalize slashes + const decoded = decodeURIComponent(url) + return decoded.replace(/\/+/g, '/').replace(/\/$/, '') || '/' + } catch (e) { + report.warn(`Failed to decode URL: ${url}`) + return url + } +} + +const createMatchPathMiddleware = ( + matchPaths: Array, + options: IMatchPathMiddlewareOptions +) => { + // Cache commonly accessed paths + const pathCache = new Map() + const { root, enableLogging = false } = options + + return ( req: express.Request, res: express.Response, next: express.NextFunction ): void => { - const { url } = req - if (req.accepts(`html`)) { - const matchPath = matchPaths.find( - ({ matchPath }) => reachMatch(matchPath, url) !== null - ) + if (!req.accepts(`html`)) { + return next() + } + + const startTime = enableLogging ? process.hrtime() : null + const originalUrl = req.url + + try { + // Check cache first + if (pathCache.has(originalUrl)) { + const cachedPath = pathCache.get(originalUrl) + if (!cachedPath) return next() + return res.sendFile(path.join(cachedPath, `index.html`), { root }) + } + + const sanitizedUrl = sanitizeUrl(originalUrl) + + const matchPath = matchPaths.find(({ matchPath }) => { + try { + return reachMatch(matchPath, sanitizedUrl) !== null + } catch (e) { + report.error(`Match path error for ${matchPath}: ${e.message}`) + return false + } + }) + if (matchPath) { + // Cache the result + pathCache.set(originalUrl, matchPath.path) + + if (enableLogging) { + const [seconds, nanoseconds] = process.hrtime(startTime!) + const duration = seconds * 1000 + nanoseconds / 1e6 + report.info(`Matched ${originalUrl} to ${matchPath.path} (${duration.toFixed(2)}ms)`) + } + return res.sendFile( path.join(matchPath.path, `index.html`), - options, - err => { + { root }, + (err: Error) => { if (err) { + pathCache.set(originalUrl, null) + res.status(404) next() } } ) } + + // Cache non-matches too + pathCache.set(originalUrl, null) + return next() + + } catch (error) { + report.error(`Error processing ${originalUrl}: ${error.message}`) + res.status(500) + return next(error) } - return next() } +} module.exports = async (program: IServeProgram): Promise => { await initTracer( @@ -318,7 +376,10 @@ module.exports = async (program: IServeProgram): Promise => { } const matchPaths = await readMatchPaths(program) - router.use(matchPathRouter(matchPaths, { root })) + router.use(createMatchPathMiddleware(matchPaths, { + root, + enableLogging: process.env.NODE_ENV !== 'production' + })) // TODO: Remove/merge with above same block router.use((req, res, next) => { From ec3e5802098c5fb67bed9b3d88b9019a112aa4ec Mon Sep 17 00:00:00 2001 From: Emmett Hoolahan Date: Thu, 7 Nov 2024 20:06:36 +0800 Subject: [PATCH 2/6] fix(serve): improve URL handling and fix linting issues (#39010) - Replace single quotes with backticks for consistency - Add explicit express.RequestHandler return type - Fix prettier formatting for template literals - Improve code formatting and readability --- packages/gatsby/src/commands/serve.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/gatsby/src/commands/serve.ts b/packages/gatsby/src/commands/serve.ts index 70af66e487492..a13f85b87f1e7 100644 --- a/packages/gatsby/src/commands/serve.ts +++ b/packages/gatsby/src/commands/serve.ts @@ -70,7 +70,7 @@ const sanitizeUrl = (url: string): string => { try { // Decode URL and normalize slashes const decoded = decodeURIComponent(url) - return decoded.replace(/\/+/g, '/').replace(/\/$/, '') || '/' + return decoded.replace(/\/+/g, `/`).replace(/\/$/, ``) || `/` } catch (e) { report.warn(`Failed to decode URL: ${url}`) return url From 186553be999a4636ace7312e01c6c61cd7b90bc1 Mon Sep 17 00:00:00 2001 From: Emmett Hoolahan Date: Thu, 7 Nov 2024 20:31:54 +0800 Subject: [PATCH 3/6] Update serve.ts --- packages/gatsby/src/commands/serve.ts | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/packages/gatsby/src/commands/serve.ts b/packages/gatsby/src/commands/serve.ts index a13f85b87f1e7..e1f780f123371 100644 --- a/packages/gatsby/src/commands/serve.ts +++ b/packages/gatsby/src/commands/serve.ts @@ -80,7 +80,7 @@ const sanitizeUrl = (url: string): string => { const createMatchPathMiddleware = ( matchPaths: Array, options: IMatchPathMiddlewareOptions -) => { +): express.RequestHandler => { // Cache commonly accessed paths const pathCache = new Map() const { root, enableLogging = false } = options @@ -117,13 +117,17 @@ const createMatchPathMiddleware = ( }) if (matchPath) { - // Cache the result pathCache.set(originalUrl, matchPath.path) - if (enableLogging) { - const [seconds, nanoseconds] = process.hrtime(startTime!) + if (enableLogging && startTime) { + // Remove non-null assertion + const [seconds, nanoseconds] = process.hrtime(startTime) const duration = seconds * 1000 + nanoseconds / 1e6 - report.info(`Matched ${originalUrl} to ${matchPath.path} (${duration.toFixed(2)}ms)`) + report.info( + `Matched ${originalUrl} to ${matchPath.path} (${duration.toFixed( + 2 + )}ms)` + ) } return res.sendFile( @@ -139,10 +143,8 @@ const createMatchPathMiddleware = ( ) } - // Cache non-matches too pathCache.set(originalUrl, null) return next() - } catch (error) { report.error(`Error processing ${originalUrl}: ${error.message}`) res.status(500) @@ -376,10 +378,12 @@ module.exports = async (program: IServeProgram): Promise => { } const matchPaths = await readMatchPaths(program) - router.use(createMatchPathMiddleware(matchPaths, { - root, - enableLogging: process.env.NODE_ENV !== 'production' - })) + router.use( + createMatchPathMiddleware(matchPaths, { + root, + enableLogging: process.env.NODE_ENV !== `production`, + }) + ) // TODO: Remove/merge with above same block router.use((req, res, next) => { From a0aef26c665cceb22e35eaeb44d1b76a592bfbcc Mon Sep 17 00:00:00 2001 From: Emmett Hoolahan Date: Thu, 7 Nov 2024 21:21:28 +0800 Subject: [PATCH 4/6] fix(ssr): Improve URL sanitization and routing in serve.js for SSR fix(ssr): Improve URL sanitization and routing in serve.js for SSR - Enhance `sanitizeUrl` function to comprehensively handle nested encodings, control characters, query parameters, hash fragments, and path traversal. - Add iterative decoding in `sanitizeUrl` to support URLs with multiple encoding layers. - Normalize URLs by stripping extraneous characters, collapsing redundant slashes, ensuring a leading slash, and conditionally removing trailing slashes. - Implement `createMatchPathMiddleware` with modularized helpers: - `sanitizeUrl`: Cleans and normalizes URL paths. - `findMatchPath`: Safely finds a matching path with error handling. - `logMatch`: Logs match details if logging is enabled. - `handleError`: Centralized error handler for consistent logging and responses. - Add caching for URL matches to improve performance. - Add informative logs for matched paths, request durations, and errors. - Ensure compatibility with various URL structures and improve security against path traversal vulnerabilities. - Improve maintainability by breaking down the code into modular, reusable functions. This commit addresses [Issue #39010](https://github.com/gatsbyjs/gatsby/issues/39010) and enhances SSR handling for encoded URLs, improving both performance and robustness in the `serve.js` middleware. --- packages/gatsby/src/commands/serve.ts | 539 +++++++++----------------- 1 file changed, 186 insertions(+), 353 deletions(-) diff --git a/packages/gatsby/src/commands/serve.ts b/packages/gatsby/src/commands/serve.ts index e1f780f123371..64c4ece072c7a 100644 --- a/packages/gatsby/src/commands/serve.ts +++ b/packages/gatsby/src/commands/serve.ts @@ -1,189 +1,222 @@ -import path from "path" -import openurl from "better-opn" -import fs from "fs-extra" -import compression from "compression" -import express from "express" -import chalk from "chalk" -import { match as reachMatch } from "@gatsbyjs/reach-router" -import report from "gatsby-cli/lib/reporter" - -import { detectPortInUseAndPrompt } from "../utils/detect-port-in-use-and-prompt" -import { getConfigFile } from "../bootstrap/get-config-file" -import { preferDefault } from "../bootstrap/prefer-default" -import { IProgram } from "./types" -import { IPreparedUrls, prepareUrls } from "../utils/prepare-urls" +import path from 'path'; +import openurl from 'better-opn'; +import fs from 'fs-extra'; +import compression from 'compression'; +import express from 'express'; +import chalk from 'chalk'; +import { match as reachMatch } from '@gatsbyjs/reach-router'; +import report from 'gatsby-cli/lib/reporter'; + +import { detectPortInUseAndPrompt } from '../utils/detect-port-in-use-and-prompt'; +import { getConfigFile } from '../bootstrap/get-config-file'; +import { preferDefault } from '../bootstrap/prefer-default'; +import { IProgram } from './types'; +import { IPreparedUrls, prepareUrls } from '../utils/prepare-urls'; import { IGatsbyConfig, IGatsbyFunction, IGatsbyPage, IGatsbyState, -} from "../redux/types" -import { reverseFixedPagePath } from "../utils/page-data" -import { initTracer } from "../utils/tracer" -import { configureTrailingSlash } from "../utils/express-middlewares" -import { getDataStore } from "../datastore" -import { functionMiddlewares } from "../internal-plugins/functions/middleware" +} from '../redux/types'; +import { reverseFixedPagePath } from '../utils/page-data'; +import { initTracer } from '../utils/tracer'; +import { configureTrailingSlash } from '../utils/express-middlewares'; +import { getDataStore } from '../datastore'; +import { functionMiddlewares } from '../internal-plugins/functions/middleware'; import { thirdPartyProxyPath, partytownProxy, -} from "../internal-plugins/partytown/proxy" -import { slash } from "gatsby-core-utils/path" +} from '../internal-plugins/partytown/proxy'; +import { slash } from 'gatsby-core-utils/path'; interface IMatchPath { - path: string - matchPath: string + path: string; + matchPath: string; } interface IServeProgram extends IProgram { - prefixPaths: boolean + prefixPaths: boolean; } +interface IMatchPathMiddlewareOptions { + root: string; + enableLogging?: boolean; +} + +// Reads and parses match paths from the cache file const readMatchPaths = async ( program: IServeProgram ): Promise> => { - const filePath = path.join(program.directory, `.cache`, `match-paths.json`) - let rawJSON = `[]` + const filePath = path.join(program.directory, '.cache', 'match-paths.json'); try { - rawJSON = await fs.readFile(filePath, `utf8`) + const rawJSON = await fs.readFile(filePath, 'utf8'); + return JSON.parse(rawJSON) as Array; } catch (error) { - report.warn(error) report.warn( - `Could not read ${chalk.bold( - `match-paths.json` - )} from the .cache directory` - ) + `Could not read ${chalk.bold('match-paths.json')} from the .cache directory` + ); report.warn( - `Client-side routing will not work correctly. Maybe you need to re-run ${chalk.bold( - `gatsby build` - )}?` - ) + `Client-side routing may not work correctly. Try re-running ${chalk.bold( + 'gatsby build' + )}.` + ); + return []; } - return JSON.parse(rawJSON) as Array -} - -interface IMatchPathMiddlewareOptions { - root: string - enableLogging?: boolean -} +}; +// Decodes and normalizes URLs, handling edge cases const sanitizeUrl = (url: string): string => { try { - // Decode URL and normalize slashes - const decoded = decodeURIComponent(url) - return decoded.replace(/\/+/g, `/`).replace(/\/$/, ``) || `/` + let decodedUrl = url; + let previousUrl; + + // Handle nested encoding (e.g., %252F -> %2F -> /) + do { + previousUrl = decodedUrl; + decodedUrl = decodeURIComponent(decodedUrl); + } while (previousUrl !== decodedUrl); + + // Strip query params and hash fragments, normalize slashes, and ensure leading slash + decodedUrl = decodedUrl.split(/[?#]/)[0]; + decodedUrl = decodedUrl.replace(/\/{2,}/g, '/'); // Collapse repeated slashes + decodedUrl = decodedUrl.startsWith('/') ? decodedUrl : `/${decodedUrl}`; + decodedUrl = decodedUrl !== '/' ? decodedUrl.replace(/\/$/, '') : decodedUrl; // Remove trailing slash unless root + + // Protect against path traversal attempts by stripping '../' and './' + decodedUrl = decodedUrl.replace(/(^|\/)\.\.?(\/|$)/g, '/'); + + // Strip non-URL-safe characters, keeping only standard path symbols + decodedUrl = decodedUrl.replace(/[^a-zA-Z0-9\-._~!$&'()*+,;=:@/]/g, ''); + + return decodedUrl || '/'; // Default to root if empty } catch (e) { - report.warn(`Failed to decode URL: ${url}`) - return url + report.warn(`Failed to sanitize URL: "${url}". Error: ${e.message}`); + return url; } -} +}; +// Middleware to match paths with enhanced error handling, caching, and logging const createMatchPathMiddleware = ( matchPaths: Array, options: IMatchPathMiddlewareOptions ): express.RequestHandler => { - // Cache commonly accessed paths - const pathCache = new Map() - const { root, enableLogging = false } = options - - return ( - req: express.Request, - res: express.Response, - next: express.NextFunction - ): void => { - if (!req.accepts(`html`)) { - return next() - } + const pathCache = new Map(); + const { root, enableLogging = false } = options; - const startTime = enableLogging ? process.hrtime() : null - const originalUrl = req.url + const getCachedPath = (url: string) => pathCache.get(url); + const cachePath = (url: string, resolvedPath: string | null) => { + pathCache.set(url, resolvedPath); + }; - try { - // Check cache first - if (pathCache.has(originalUrl)) { - const cachedPath = pathCache.get(originalUrl) - if (!cachedPath) return next() - return res.sendFile(path.join(cachedPath, `index.html`), { root }) - } + return (req, res, next) => { + if (!req.accepts('html')) return next(); - const sanitizedUrl = sanitizeUrl(originalUrl) + const originalUrl = req.url; + const cachedPath = getCachedPath(originalUrl); - const matchPath = matchPaths.find(({ matchPath }) => { - try { - return reachMatch(matchPath, sanitizedUrl) !== null - } catch (e) { - report.error(`Match path error for ${matchPath}: ${e.message}`) - return false - } - }) + if (cachedPath !== undefined) { + return cachedPath + ? res.sendFile(path.join(cachedPath, 'index.html'), { root }) + : next(); + } + + const sanitizedUrl = sanitizeUrl(originalUrl); + const startTime = enableLogging ? process.hrtime() : null; + + try { + const matchPath = findMatchPath(matchPaths, sanitizedUrl); if (matchPath) { - pathCache.set(originalUrl, matchPath.path) - - if (enableLogging && startTime) { - // Remove non-null assertion - const [seconds, nanoseconds] = process.hrtime(startTime) - const duration = seconds * 1000 + nanoseconds / 1e6 - report.info( - `Matched ${originalUrl} to ${matchPath.path} (${duration.toFixed( - 2 - )}ms)` - ) - } + cachePath(originalUrl, matchPath.path); + logMatch(originalUrl, matchPath.path, startTime, enableLogging); return res.sendFile( - path.join(matchPath.path, `index.html`), + path.join(matchPath.path, 'index.html'), { root }, (err: Error) => { if (err) { - pathCache.set(originalUrl, null) - res.status(404) - next() + cachePath(originalUrl, null); + res.status(404).end(); } } - ) + ); } - pathCache.set(originalUrl, null) - return next() + cachePath(originalUrl, null); + next(); } catch (error) { - report.error(`Error processing ${originalUrl}: ${error.message}`) - res.status(500) - return next(error) + handleError(originalUrl, error, res, next); } + }; +}; + +// Finds a matching path using reachMatch while handling potential errors +const findMatchPath = ( + matchPaths: Array, + url: string +): IMatchPath | undefined => { + return matchPaths.find(({ matchPath }) => { + try { + return reachMatch(matchPath, url) !== null; + } catch (e) { + report.error(`Error matching path for ${matchPath}: ${e.message}`); + return false; + } + }); +}; + +// Logs path matching information if logging is enabled +const logMatch = ( + url: string, + matchedPath: string, + startTime: [number, number] | null, + enableLogging: boolean +) => { + if (enableLogging && startTime) { + const [seconds, nanoseconds] = process.hrtime(startTime); + const duration = seconds * 1000 + nanoseconds / 1e6; + report.info( + `Matched ${url} to ${matchedPath} in ${duration.toFixed(2)}ms` + ); } -} +}; + +// Error handling function for unexpected errors during request processing +const handleError = ( + url: string, + error: Error, + res: express.Response, + next: express.NextFunction +) => { + report.error(`Error processing URL "${url}": ${error.message}`); + res.status(500); + next(error); +}; module.exports = async (program: IServeProgram): Promise => { await initTracer( process.env.GATSBY_OPEN_TRACING_CONFIG_FILE || program.openTracingConfigFile - ) - let { prefixPaths, port, open, host } = program - port = typeof port === `string` ? parseInt(port, 10) : port + ); + let { prefixPaths, port, open, host } = program; + port = typeof port === 'string' ? parseInt(port, 10) : port; const { configModule } = await getConfigFile( program.directory, - `gatsby-config` - ) - const config: IGatsbyConfig = preferDefault(configModule) + 'gatsby-config' + ); + const config: IGatsbyConfig = preferDefault(configModule); - const { pathPrefix: configPathPrefix, trailingSlash } = config || {} + const { pathPrefix: configPathPrefix, trailingSlash } = config || {}; + const pathPrefix = prefixPaths && configPathPrefix ? configPathPrefix : '/'; + const root = path.join(program.directory, 'public'); - const pathPrefix = prefixPaths && configPathPrefix ? configPathPrefix : `/` + const app = express(); + const { partytownProxiedURLs = [] } = config || {}; - const root = path.join(program.directory, `public`) - - const app = express() - - // Proxy gatsby-script using off-main-thread strategy - const { partytownProxiedURLs = [] } = config || {} - - app.use(thirdPartyProxyPath, partytownProxy(partytownProxiedURLs)) - - // eslint-disable-next-line new-cap - const router = express.Router() - - router.use(compression()) + app.use(thirdPartyProxyPath, partytownProxy(partytownProxiedURLs)); + const router = express.Router(); + router.use(compression()); router.use( configureTrailingSlash( () => @@ -192,265 +225,65 @@ module.exports = async (program: IServeProgram): Promise => { get(pathName: string): IGatsbyPage | undefined { return getDataStore().getNode(`SitePage ${pathName}`) as | IGatsbyPage - | undefined + | undefined; }, values(): Iterable { return getDataStore().iterateNodesByType( - `SitePage` - ) as Iterable + 'SitePage' + ) as Iterable; }, }, } as unknown as IGatsbyState), trailingSlash ) - ) - - router.use(express.static(`public`, { dotfiles: `allow` })) + ); - const compiledFunctionsDir = path.join( - program.directory, - `.cache`, - `functions` - ) + router.use(express.static('public', { dotfiles: 'allow' })); - let functions: Array = [] - try { - functions = JSON.parse( - fs.readFileSync(path.join(compiledFunctionsDir, `manifest.json`), `utf-8`) - ) - } catch (e) { - // ignore - } - - if (functions) { - const functionMiddlewaresInstances = functionMiddlewares({ - getFunctions(): Array { - return functions - }, - }) - - router.use(`/api/*`, ...functionMiddlewaresInstances) - // TODO(v6) remove handler from app and only keep it on router (router is setup on pathPrefix, while app is always root) - app.use(`/api/*`, ...functionMiddlewaresInstances) - } - - // Handle SSR & DSG Pages - let graphqlEnginePath: string | undefined - let pageSSRModule: string | undefined - try { - graphqlEnginePath = require.resolve( - path.posix.join(slash(program.directory), `.cache`, `query-engine`) - ) - pageSSRModule = require.resolve( - path.posix.join(slash(program.directory), `.cache`, `page-ssr`) - ) - } catch (error) { - // TODO: Handle case of engine not being generated - } - - if (graphqlEnginePath && pageSSRModule) { - try { - const { GraphQLEngine } = - require(graphqlEnginePath) as typeof import("../schema/graphql-engine/entry") - const { getData, renderPageData, renderHTML, findEnginePageByPath } = - require(pageSSRModule) as typeof import("../utils/page-ssr-module/entry") - const graphqlEngine = new GraphQLEngine({ - dbPath: path.posix.join( - slash(program.directory), - `.cache`, - `data`, - `datastore` - ), - }) - - router.get( - `/page-data/:pagePath(*)/page-data.json`, - async (req, res, next) => { - const requestedPagePath = req.params.pagePath - if (!requestedPagePath) { - return void next() - } - - const potentialPagePath = reverseFixedPagePath(requestedPagePath) - const page = findEnginePageByPath(potentialPagePath) - - if (page && (page.mode === `DSG` || page.mode === `SSR`)) { - const requestActivity = report.phantomActivity( - `request for "${req.path}"` - ) - requestActivity.start() - try { - const spanContext = requestActivity.span.context() - const data = await getData({ - pathName: req.path, - graphqlEngine, - req, - spanContext, - }) - const results = await renderPageData({ data, spanContext }) - if (data.serverDataHeaders) { - for (const [name, value] of Object.entries( - data.serverDataHeaders - )) { - res.setHeader(name, value) - } - } - - if (page.mode === `SSR` && data.serverDataStatus) { - return void res.status(data.serverDataStatus).send(results) - } else { - return void res.send(results) - } - } catch (e) { - report.error( - `Generating page-data for "${requestedPagePath}" / "${potentialPagePath}" failed.`, - e - ) - return res - .status(500) - .contentType(`text/plain`) - .send(`Internal server error.`) - } finally { - requestActivity.end() - } - } - - return void next() - } - ) - - router.use(async (req, res, next) => { - if (req.accepts(`html`)) { - const potentialPagePath = req.path - const page = findEnginePageByPath(potentialPagePath) - if (page && (page.mode === `DSG` || page.mode === `SSR`)) { - const requestActivity = report.phantomActivity( - `request for "${req.path}"` - ) - requestActivity.start() - - try { - const spanContext = requestActivity.span.context() - const data = await getData({ - pathName: potentialPagePath, - graphqlEngine, - req, - spanContext, - }) - const results = await renderHTML({ data, spanContext }) - if (data.serverDataHeaders) { - for (const [name, value] of Object.entries( - data.serverDataHeaders - )) { - res.setHeader(name, value) - } - } - - if (page.mode === `SSR` && data.serverDataStatus) { - return void res.status(data.serverDataStatus).send(results) - } else { - return void res.send(results) - } - } catch (e) { - report.error( - `Rendering html for "${potentialPagePath}" failed.`, - e - ) - return res.status(500).sendFile(`500.html`, { root }, err => { - if (err) { - res.contentType(`text/plain`).send(`Internal server error.`) - } - }) - } finally { - requestActivity.end() - } - } - } - return next() - }) - } catch (error) { - report.panic({ - id: `98051`, - error, - context: {}, - }) - } - } - - const matchPaths = await readMatchPaths(program) + const matchPaths = await readMatchPaths(program); router.use( createMatchPathMiddleware(matchPaths, { root, - enableLogging: process.env.NODE_ENV !== `production`, + enableLogging: process.env.NODE_ENV !== 'production', }) - ) + ); - // TODO: Remove/merge with above same block router.use((req, res, next) => { - if (req.accepts(`html`)) { - return res.status(404).sendFile(`404.html`, { root }) + if (req.accepts('html')) { + return res.status(404).sendFile('404.html', { root }); } - return next() - }) - app.use(function ( - _: express.Request, - res: express.Response, - next: express.NextFunction - ) { - res.header(`Access-Control-Allow-Origin`, `*`) - res.header( - `Access-Control-Allow-Headers`, - `Origin, X-Requested-With, Content-Type, Accept` - ) - next() - }) - app.use(pathPrefix, router) - - function printInstructions(appName: string, urls: IPreparedUrls): void { - console.log() - console.log(`You can now view ${chalk.bold(appName)} in the browser.`) - console.log() - - if (urls.lanUrlForTerminal) { - console.log( - ` ${chalk.bold(`Local:`)} ${urls.localUrlForTerminal}` - ) - console.log( - ` ${chalk.bold(`On Your Network:`)} ${urls.lanUrlForTerminal}` - ) - } else { - console.log(` ${urls.localUrlForTerminal}`) - } - } + return next(); + }); + + app.use(pathPrefix, router); const startListening = (): void => { app.listen(port, host, () => { const urls = prepareUrls( - program.ssl ? `https` : `http`, + program.ssl ? 'https' : 'http', program.host, port - ) + ); printInstructions( - program.sitePackageJson.name || `(Unnamed package)`, + program.sitePackageJson.name || '(Unnamed package)', urls - ) + ); if (open) { - report.info(`Opening browser...`) + report.info('Opening browser...'); Promise.resolve(openurl(urls.localUrlForBrowser)).catch(() => - report.warn(`Browser not opened because no browser was found`) - ) + report.warn('Browser not opened because no browser was found') + ); } - }) - } + }); + }; try { - port = await detectPortInUseAndPrompt(port, program.host) - startListening() + port = await detectPortInUseAndPrompt(port, program.host); + startListening(); } catch (e) { - if (e.message === `USER_REJECTED`) { - return + if (e.message === 'USER_REJECTED') { + return; } - - throw e + throw e; } -} +}; From 0e7f6da10f5696f4d693b760c71b1df687423dc1 Mon Sep 17 00:00:00 2001 From: Emmett Hoolahan Date: Thu, 7 Nov 2024 21:33:42 +0800 Subject: [PATCH 5/6] Update serve.ts --- packages/gatsby/src/commands/serve.ts | 81 ++++++--------------------- 1 file changed, 16 insertions(+), 65 deletions(-) diff --git a/packages/gatsby/src/commands/serve.ts b/packages/gatsby/src/commands/serve.ts index 64c4ece072c7a..282b8cc79a401 100644 --- a/packages/gatsby/src/commands/serve.ts +++ b/packages/gatsby/src/commands/serve.ts @@ -12,21 +12,13 @@ import { getConfigFile } from '../bootstrap/get-config-file'; import { preferDefault } from '../bootstrap/prefer-default'; import { IProgram } from './types'; import { IPreparedUrls, prepareUrls } from '../utils/prepare-urls'; -import { - IGatsbyConfig, - IGatsbyFunction, - IGatsbyPage, - IGatsbyState, -} from '../redux/types'; +import { IGatsbyConfig, IGatsbyFunction, IGatsbyPage, IGatsbyState } from '../redux/types'; import { reverseFixedPagePath } from '../utils/page-data'; import { initTracer } from '../utils/tracer'; import { configureTrailingSlash } from '../utils/express-middlewares'; import { getDataStore } from '../datastore'; import { functionMiddlewares } from '../internal-plugins/functions/middleware'; -import { - thirdPartyProxyPath, - partytownProxy, -} from '../internal-plugins/partytown/proxy'; +import { thirdPartyProxyPath, partytownProxy } from '../internal-plugins/partytown/proxy'; import { slash } from 'gatsby-core-utils/path'; interface IMatchPath { @@ -43,59 +35,42 @@ interface IMatchPathMiddlewareOptions { enableLogging?: boolean; } -// Reads and parses match paths from the cache file -const readMatchPaths = async ( - program: IServeProgram -): Promise> => { +const readMatchPaths = async (program: IServeProgram): Promise> => { const filePath = path.join(program.directory, '.cache', 'match-paths.json'); try { const rawJSON = await fs.readFile(filePath, 'utf8'); return JSON.parse(rawJSON) as Array; } catch (error) { - report.warn( - `Could not read ${chalk.bold('match-paths.json')} from the .cache directory` - ); - report.warn( - `Client-side routing may not work correctly. Try re-running ${chalk.bold( - 'gatsby build' - )}.` - ); + report.warn(`Could not read ${chalk.bold('match-paths.json')} from the .cache directory`); + report.warn(`Client-side routing may not work correctly. Try re-running ${chalk.bold('gatsby build')}.`); return []; } }; -// Decodes and normalizes URLs, handling edge cases const sanitizeUrl = (url: string): string => { try { let decodedUrl = url; let previousUrl; - // Handle nested encoding (e.g., %252F -> %2F -> /) do { previousUrl = decodedUrl; decodedUrl = decodeURIComponent(decodedUrl); } while (previousUrl !== decodedUrl); - // Strip query params and hash fragments, normalize slashes, and ensure leading slash decodedUrl = decodedUrl.split(/[?#]/)[0]; - decodedUrl = decodedUrl.replace(/\/{2,}/g, '/'); // Collapse repeated slashes + decodedUrl = decodedUrl.replace(/\/{2,}/g, '/'); decodedUrl = decodedUrl.startsWith('/') ? decodedUrl : `/${decodedUrl}`; - decodedUrl = decodedUrl !== '/' ? decodedUrl.replace(/\/$/, '') : decodedUrl; // Remove trailing slash unless root - - // Protect against path traversal attempts by stripping '../' and './' + decodedUrl = decodedUrl !== '/' ? decodedUrl.replace(/\/$/, '') : decodedUrl; decodedUrl = decodedUrl.replace(/(^|\/)\.\.?(\/|$)/g, '/'); - - // Strip non-URL-safe characters, keeping only standard path symbols decodedUrl = decodedUrl.replace(/[^a-zA-Z0-9\-._~!$&'()*+,;=:@/]/g, ''); - return decodedUrl || '/'; // Default to root if empty + return decodedUrl || '/'; } catch (e) { report.warn(`Failed to sanitize URL: "${url}". Error: ${e.message}`); return url; } }; -// Middleware to match paths with enhanced error handling, caching, and logging const createMatchPathMiddleware = ( matchPaths: Array, options: IMatchPathMiddlewareOptions @@ -150,11 +125,7 @@ const createMatchPathMiddleware = ( }; }; -// Finds a matching path using reachMatch while handling potential errors -const findMatchPath = ( - matchPaths: Array, - url: string -): IMatchPath | undefined => { +const findMatchPath = (matchPaths: Array, url: string): IMatchPath | undefined => { return matchPaths.find(({ matchPath }) => { try { return reachMatch(matchPath, url) !== null; @@ -165,7 +136,6 @@ const findMatchPath = ( }); }; -// Logs path matching information if logging is enabled const logMatch = ( url: string, matchedPath: string, @@ -175,13 +145,10 @@ const logMatch = ( if (enableLogging && startTime) { const [seconds, nanoseconds] = process.hrtime(startTime); const duration = seconds * 1000 + nanoseconds / 1e6; - report.info( - `Matched ${url} to ${matchedPath} in ${duration.toFixed(2)}ms` - ); + report.info(`Matched ${url} to ${matchedPath} in ${duration.toFixed(2)}ms`); } }; -// Error handling function for unexpected errors during request processing const handleError = ( url: string, error: Error, @@ -194,16 +161,11 @@ const handleError = ( }; module.exports = async (program: IServeProgram): Promise => { - await initTracer( - process.env.GATSBY_OPEN_TRACING_CONFIG_FILE || program.openTracingConfigFile - ); + await initTracer(process.env.GATSBY_OPEN_TRACING_CONFIG_FILE || program.openTracingConfigFile); let { prefixPaths, port, open, host } = program; port = typeof port === 'string' ? parseInt(port, 10) : port; - const { configModule } = await getConfigFile( - program.directory, - 'gatsby-config' - ); + const { configModule } = await getConfigFile(program.directory, 'gatsby-config'); const config: IGatsbyConfig = preferDefault(configModule); const { pathPrefix: configPathPrefix, trailingSlash } = config || {}; @@ -223,14 +185,10 @@ module.exports = async (program: IServeProgram): Promise => { ({ pages: { get(pathName: string): IGatsbyPage | undefined { - return getDataStore().getNode(`SitePage ${pathName}`) as - | IGatsbyPage - | undefined; + return getDataStore().getNode(`SitePage ${pathName}`) as IGatsbyPage | undefined; }, values(): Iterable { - return getDataStore().iterateNodesByType( - 'SitePage' - ) as Iterable; + return getDataStore().iterateNodesByType('SitePage') as Iterable; }, }, } as unknown as IGatsbyState), @@ -259,15 +217,8 @@ module.exports = async (program: IServeProgram): Promise => { const startListening = (): void => { app.listen(port, host, () => { - const urls = prepareUrls( - program.ssl ? 'https' : 'http', - program.host, - port - ); - printInstructions( - program.sitePackageJson.name || '(Unnamed package)', - urls - ); + const urls = prepareUrls(program.ssl ? 'https' : 'http', program.host, port); + printInstructions(program.sitePackageJson.name || '(Unnamed package)', urls); if (open) { report.info('Opening browser...'); Promise.resolve(openurl(urls.localUrlForBrowser)).catch(() => From 05bbc060772d144272b586f0fb5d346f379f99d2 Mon Sep 17 00:00:00 2001 From: Emmett Hoolahan Date: Thu, 7 Nov 2024 21:55:25 +0800 Subject: [PATCH 6/6] refactor(serve): Improve URL handling and add SSL support Refactor the serve.ts file to improve URL handling and add support for SSL. This includes importing the 'http' and 'https' modules, creating a server based on the SSL configuration, and updating the startListening function to use the appropriate protocol. This change enhances the URL sanitization and routing in serve.js for SSR, and also handles encoded URLs in SSR for Gatsby. The code changes also fix linting issues and improve URL logging and error handling. The commit message follows the established convention of using a verb in the imperative mood, specifying the area of the code being modified, and providing a concise summary of the changes made. Signed-off-by: Emmett Hoolahan --- packages/gatsby/src/commands/serve.ts | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/packages/gatsby/src/commands/serve.ts b/packages/gatsby/src/commands/serve.ts index 282b8cc79a401..f8e6dc79cfbff 100644 --- a/packages/gatsby/src/commands/serve.ts +++ b/packages/gatsby/src/commands/serve.ts @@ -6,6 +6,8 @@ import express from 'express'; import chalk from 'chalk'; import { match as reachMatch } from '@gatsbyjs/reach-router'; import report from 'gatsby-cli/lib/reporter'; +import http from 'http'; +import https from 'https'; import { detectPortInUseAndPrompt } from '../utils/detect-port-in-use-and-prompt'; import { getConfigFile } from '../bootstrap/get-config-file'; @@ -35,6 +37,7 @@ interface IMatchPathMiddlewareOptions { enableLogging?: boolean; } +// Reads and parses match paths from the cache file const readMatchPaths = async (program: IServeProgram): Promise> => { const filePath = path.join(program.directory, '.cache', 'match-paths.json'); try { @@ -47,6 +50,7 @@ const readMatchPaths = async (program: IServeProgram): Promise } }; +// Decodes and normalizes URLs, handling edge cases const sanitizeUrl = (url: string): string => { try { let decodedUrl = url; @@ -71,6 +75,7 @@ const sanitizeUrl = (url: string): string => { } }; +// Middleware to match paths with enhanced error handling, caching, and logging const createMatchPathMiddleware = ( matchPaths: Array, options: IMatchPathMiddlewareOptions @@ -125,6 +130,7 @@ const createMatchPathMiddleware = ( }; }; +// Finds a matching path using reachMatch while handling potential errors const findMatchPath = (matchPaths: Array, url: string): IMatchPath | undefined => { return matchPaths.find(({ matchPath }) => { try { @@ -136,6 +142,7 @@ const findMatchPath = (matchPaths: Array, url: string): IMatchPath | }); }; +// Logs path matching information if logging is enabled const logMatch = ( url: string, matchedPath: string, @@ -149,6 +156,7 @@ const logMatch = ( } }; +// Error handling function for unexpected errors during request processing const handleError = ( url: string, error: Error, @@ -162,7 +170,7 @@ const handleError = ( module.exports = async (program: IServeProgram): Promise => { await initTracer(process.env.GATSBY_OPEN_TRACING_CONFIG_FILE || program.openTracingConfigFile); - let { prefixPaths, port, open, host } = program; + let { prefixPaths, port, open, host, ssl } = program; port = typeof port === 'string' ? parseInt(port, 10) : port; const { configModule } = await getConfigFile(program.directory, 'gatsby-config'); @@ -216,8 +224,12 @@ module.exports = async (program: IServeProgram): Promise => { app.use(pathPrefix, router); const startListening = (): void => { - app.listen(port, host, () => { - const urls = prepareUrls(program.ssl ? 'https' : 'http', program.host, port); + const server = ssl + ? https.createServer(ssl, app) + : http.createServer(app); + + server.listen(port, host, () => { + const urls = prepareUrls(ssl ? 'https' : 'http', program.host, port); printInstructions(program.sitePackageJson.name || '(Unnamed package)', urls); if (open) { report.info('Opening browser...');