diff --git a/projects/js-packages/critical-css-gen/changelog/update-css-gen-lib-playwright-dont-open-all-pages-at-once b/projects/js-packages/critical-css-gen/changelog/update-css-gen-lib-playwright-dont-open-all-pages-at-once new file mode 100644 index 0000000000000..1bb2055aca602 --- /dev/null +++ b/projects/js-packages/critical-css-gen/changelog/update-css-gen-lib-playwright-dont-open-all-pages-at-once @@ -0,0 +1,4 @@ +Significance: patch +Type: changed + +Update Playwright interface, to open less pages at the same time. diff --git a/projects/js-packages/critical-css-gen/src/browser-interface-playwright.ts b/projects/js-packages/critical-css-gen/src/browser-interface-playwright.ts index f0da79687a6a7..7b31566189d86 100644 --- a/projects/js-packages/critical-css-gen/src/browser-interface-playwright.ts +++ b/projects/js-packages/critical-css-gen/src/browser-interface-playwright.ts @@ -16,62 +16,14 @@ export class BrowserInterfacePlaywright extends BrowserInterface { * Creates a new BrowserInterfacePlaywright instance. * * @param {BrowserContext} context - The playwright browser context to work with. - * @param {string[]} urls - Array of urls to evaluate. The reason we are taking this as an argument is because we want to load all of them in parallel. + * @param {string[]} urls - Array of urls to evaluate. */ constructor( private context: BrowserContext, private urls: string[] ) { super(); - } - - private async getTabs() { - if ( typeof this.tabs === 'undefined' ) { - await this.openUrls( this.context, this.urls ); - } - - return this.tabs; - } - - /** - * Open an array of urls in a new browser context. - * - * Take a browser instance and an array of urls to open in new tabs. - * - * @param {BrowserContext} context - Browser context to use. - * @param {string[]} urls - Array of urls to open. - * @return {Promise< TabsByUrl >} Promise resolving to the browser context. - */ - private async openUrls( context: BrowserContext, urls: string[] ): Promise< void > { - this.tabs = await objectPromiseAll< Tab >( - urls.reduce( ( set, url ) => { - set[ url ] = this.newTab( context, url ); - return set; - }, {} ) - ); - } - - /** - * Open url in a new tab in a given browserContext. - * - * @param {BrowserContext} browserContext - Browser context to use. - * @param {string} url - Url to open. - * @return {Promise} Promise resolving to the page instance. - */ - private async newTab( browserContext: BrowserContext, url: string ): Promise< Tab > { - const tab = { - page: await browserContext.newPage(), - statusCode: null, - }; - tab.page.on( 'response', async response => { - if ( response.url() === url ) { - tab.statusCode = response.status(); - } - } ); - - await tab.page.goto( url, { timeout: PAGE_GOTO_TIMEOUT_MS } ); - - return tab; + this.tabs = {}; } async runInPage< ReturnType >( @@ -80,11 +32,13 @@ export class BrowserInterfacePlaywright extends BrowserInterface { method: BrowserRunnable< ReturnType >, ...args: unknown[] ): Promise< ReturnType > { - const tabs = await this.getTabs(); - const tab = tabs[ pageUrl ]; + const tab = this.tabs[ pageUrl ]; if ( ! tab || ! tab.page ) { - throw new Error( `Playwright interface does not include URL ${ pageUrl }` ); + if ( ! this.urls.includes( pageUrl ) ) { + throw new Error( `URL not in original URL set: ${ pageUrl }` ); + } + throw new Error( `Page not loaded in current batch: ${ pageUrl }` ); } // Bail early if the page returned a non-200 status code. @@ -120,4 +74,51 @@ export class BrowserInterfacePlaywright extends BrowserInterface { private isOkStatus( statusCode: number ) { return statusCode >= 200 && statusCode < 300; } + + async loadBatch( urls: string[] ): Promise< void > { + // Close existing tabs + await this.closeTabs(); + + // Load new batch of URLs + this.tabs = await objectPromiseAll< Tab >( + urls.reduce( ( set, url ) => { + set[ url ] = this.newTab( this.context, url ); + return set; + }, {} ) + ); + } + + /** + * Open url in a new tab in a given browserContext. + * + * @param {BrowserContext} browserContext - Browser context to use. + * @param {string} url - Url to open. + * @return {Promise} Promise resolving to the tab instance. + */ + private async newTab( browserContext: BrowserContext, url: string ): Promise< Tab > { + const tab = { + page: await browserContext.newPage(), + statusCode: null, + }; + tab.page.on( 'response', async response => { + if ( response.url() === url ) { + tab.statusCode = response.status(); + } + } ); + + await tab.page.goto( url, { timeout: PAGE_GOTO_TIMEOUT_MS } ); + + return tab; + } + + private async closeTabs(): Promise< void > { + for ( const tab of Object.values( this.tabs ) ) { + await tab.page.close(); + } + this.tabs = {}; + } + + async cleanup() { + await this.closeTabs(); + } } diff --git a/projects/js-packages/critical-css-gen/src/generate-critical-css.ts b/projects/js-packages/critical-css-gen/src/generate-critical-css.ts index 1083e4f67db8e..10421b8aed7c7 100644 --- a/projects/js-packages/critical-css-gen/src/generate-critical-css.ts +++ b/projects/js-packages/critical-css-gen/src/generate-critical-css.ts @@ -1,3 +1,4 @@ +import { BrowserInterfacePlaywright } from './browser-interface-playwright.js'; import { BrowserInterface } from './browser-interface.js'; import { CSSFileSet } from './css-file-set.js'; import { SuccessTargetError, EmptyCSSError, UrlError } from './errors.js'; @@ -9,24 +10,23 @@ const noop = () => { // No op. }; +// Add this near the top with other constants +const DEFAULT_BATCH_SIZE = 5; + /** - * Collate and return a CSSFileSet object describing all the CSS files used by - * the set of URLs provided. - * - * Errors that occur during this process are collated, but not thrown yet. - * + * Process a batch of URLs and update the CSS files set * @param {BrowserInterface} browserInterface - interface to access pages * @param {string[]} urls - list of URLs to scan for CSS files - * @param {number} maxPages - number of pages to process at most - * @return {Array} - Two member array; CSSFileSet, and an object containing errors that occurred at each URL. + * @param {CSSFileSet} cssFiles - CSSFileSet object to update + * @param {object} errors - object to store errors + * @return {Promise< number >} - number of successes */ -async function collateCssFiles( +async function processBatch( browserInterface: BrowserInterface, urls: string[], - maxPages: number -): Promise< [ CSSFileSet, { [ url: string ]: UrlError } ] > { - const cssFiles = new CSSFileSet( browserInterface ); - const errors = {}; + cssFiles: CSSFileSet, + errors: { [ url: string ]: UrlError } +): Promise< number > { let successes = 0; for ( const url of urls ) { @@ -56,16 +56,74 @@ async function collateCssFiles( const internalStyles = await browserInterface.getInternalStyles( url ); await cssFiles.addInternalStyles( url, internalStyles ); - // Abort early if we hit the critical mass successes++; - if ( successes >= maxPages ) { - break; - } } catch ( err ) { errors[ url ] = err; } } + return successes; +} + +/** + * Collate and return a CSSFileSet object describing all the CSS files used by + * the set of URLs provided. + * + * Errors that occur during this process are collated, but not thrown yet. + * + * @param {BrowserInterface} browserInterface - interface to access pages + * @param {string[]} urls - list of URLs to scan for CSS files + * @param {number} maxPages - number of pages to process at most + * @param {number} batchSize - number of pages to process in each batch + * @return {Array} - Two member array; CSSFileSet, and an object containing errors that occurred at each URL. + */ +async function collateCssFiles( + browserInterface: BrowserInterface, + urls: string[], + maxPages: number, + batchSize: number +): Promise< [ CSSFileSet, { [ url: string ]: UrlError } ] > { + const cssFiles = new CSSFileSet( browserInterface ); + const errors = {}; + let totalSuccesses = 0; + const failedUrls = new Set< string >(); + + // Process URLs in batches + for ( let i = 0; i < urls.length && totalSuccesses < maxPages; i += batchSize ) { + const batchUrls = urls.slice( i, i + batchSize ).filter( url => ! failedUrls.has( url ) ); + + // If no valid URLs in this batch, continue to next batch + if ( batchUrls.length === 0 ) { + continue; + } + + // For Playwright, load the batch of pages + if ( browserInterface instanceof BrowserInterfacePlaywright ) { + await browserInterface.loadBatch( batchUrls ); + } + + const batchSuccesses = await processBatch( browserInterface, batchUrls, cssFiles, errors ); + totalSuccesses += batchSuccesses; + + // Add failed URLs from this batch to failedUrls set + batchUrls.forEach( url => { + if ( errors[ url ] ) { + failedUrls.add( url ); + // eslint-disable-next-line no-console + console.log( `Failed URL: ${ url }`, errors[ url ] ); + } + } ); + + // If we've tried all URLs and still haven't hit maxPages, break to avoid infinite loop + if ( failedUrls.size === urls.length ) { + break; + } + + if ( totalSuccesses >= maxPages ) { + break; + } + } + return [ cssFiles, errors ]; } @@ -78,6 +136,7 @@ async function collateCssFiles( * @param {string[]} param.validUrls - List of all the valid URLs * @param {Array} param.viewports - Browser viewports * @param {number} param.maxPages - Maximum number of pages to process + * @param {number} param.batchSize - Number of pages to process in each batch * @param {Function} param.updateProgress - Update progress callback function * * @return {Set} - List of above the fold selectors. @@ -88,6 +147,7 @@ async function getAboveFoldSelectors( { validUrls, viewports, maxPages, + batchSize, updateProgress, }: { browserInterface: BrowserInterface; @@ -95,6 +155,7 @@ async function getAboveFoldSelectors( { validUrls: string[]; viewports: Viewport[]; maxPages: number; + batchSize: number; updateProgress: () => void; } ): Promise< Set< string > > { // For each selector string, create a "trimmed" version with the stuff JavaScript can't handle cut out. @@ -104,34 +165,51 @@ async function getAboveFoldSelectors( { }, {} ); // Go through all the URLs looking for above-the-fold selectors, and selectors which may be "dangerous" - // i.e.: may match elements on pages that do not include their CSS file. const aboveFoldSelectors = new Set< string >(); const dangerousSelectors = new Set< string >(); - for ( const url of validUrls.slice( 0, maxPages ) ) { - // Work out which CSS selectors match any element on this page. - const pageSelectors = await browserInterface.runInPage< - ReturnType< typeof BrowserInterface.innerFindMatchingSelectors > - >( url, null, BrowserInterface.innerFindMatchingSelectors, trimmedSelectors ); + // Process URLs in batches + for ( let i = 0; i < validUrls.length && i < maxPages; i += batchSize ) { + const batchUrls = validUrls.slice( i, i + batchSize ); - // Check for selectors which may match this page, but are not included in this page's CSS. - pageSelectors - .filter( s => ! selectorPages[ s ].has( url ) ) - .forEach( s => dangerousSelectors.add( s ) ); + // For Playwright, load the batch of pages + if ( browserInterface instanceof BrowserInterfacePlaywright ) { + await browserInterface.loadBatch( batchUrls ); + } - // Collate all above-fold selectors for all viewport sizes. - for ( const size of viewports ) { - updateProgress(); + // Process each URL in the batch + for ( const url of batchUrls ) { + // Work out which CSS selectors match any element on this page + const pageSelectors = await browserInterface.runInPage< string[] >( + url, + null, + BrowserInterface.innerFindMatchingSelectors, + trimmedSelectors + ); + + // Check for selectors which may match this page, but are not included in this page's CSS + pageSelectors + .filter( s => ! selectorPages[ s ].has( url ) ) + .forEach( s => dangerousSelectors.add( s ) ); + + // Collate all above-fold selectors for all viewport sizes + for ( const size of viewports ) { + updateProgress(); - const pageAboveFold = await browserInterface.runInPage< - ReturnType< typeof BrowserInterface.innerFindAboveFoldSelectors > - >( url, size, BrowserInterface.innerFindAboveFoldSelectors, trimmedSelectors, pageSelectors ); + const pageAboveFold = await browserInterface.runInPage< string[] >( + url, + size, + BrowserInterface.innerFindAboveFoldSelectors, + trimmedSelectors, + pageSelectors + ); - pageAboveFold.forEach( s => aboveFoldSelectors.add( s ) ); + pageAboveFold.forEach( s => aboveFoldSelectors.add( s ) ); + } } } - // Remove dangerous selectors from above fold set. + // Remove dangerous selectors from above fold set for ( const dangerousSelector of dangerousSelectors ) { aboveFoldSelectors.delete( dangerousSelector ); } @@ -150,6 +228,7 @@ async function getAboveFoldSelectors( { * @param {FilterSpec} root0.filters - Optional filters to apply to the CSS * @param {number} root0.successRatio - Ratio of successful URLs required (default: 1) * @param {number} root0.maxPages - Maximum number of pages to process (default: 10) + * @param {number} root0.batchSize - Number of pages to process in each batch (default: 5) * @return {Promise<[string, Error[]]>} A promise that resolves to an array containing the critical CSS string and an array of errors. */ export async function generateCriticalCSS( { @@ -160,6 +239,7 @@ export async function generateCriticalCSS( { filters, successRatio = 1, maxPages = 10, + batchSize = DEFAULT_BATCH_SIZE, }: { browserInterface: BrowserInterface; progressCallback?: ( step: number, total: number ) => void; @@ -168,6 +248,7 @@ export async function generateCriticalCSS( { filters?: FilterSpec; successRatio?: number; maxPages?: number; + batchSize?: number; } ): Promise< [ string, Error[] ] > { // Success threshold is calculated based on the success ratio of "the number of URLs provided", or "maxPages" whichever is lower. // See 268-gh-Automattic/boost-cloud @@ -179,8 +260,13 @@ export async function generateCriticalCSS( { const progressSteps = 1 + urls.length * viewports.length; const updateProgress = () => progressCallback( ++progress, progressSteps ); - // Collate all CSS Files used by all valid URLs. - const [ cssFiles, cssFileErrors ] = await collateCssFiles( browserInterface, urls, maxPages ); + // Collate all CSS Files used by all valid URLs + const [ cssFiles, cssFileErrors ] = await collateCssFiles( + browserInterface, + urls, + maxPages, + batchSize + ); updateProgress(); // Verify there are enough valid URLs to carry on with. @@ -203,6 +289,7 @@ export async function generateCriticalCSS( { validUrls, viewports, maxPages, + batchSize, updateProgress, } );