Skip to content

Commit

Permalink
Update playwright interface to not open all pages at the same time
Browse files Browse the repository at this point in the history
  • Loading branch information
dilirity committed Feb 7, 2025
1 parent d1d5fcd commit 5fce223
Show file tree
Hide file tree
Showing 2 changed files with 177 additions and 89 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,62 +16,14 @@ export class BrowserInterfacePlaywright extends BrowserInterface {
* Creates a new BrowserInterfacePlaywright instance.
*
* @param {BrowserContext} context - The playwright browser context to work with.
* @param {string[]} urls - Array of urls to evaluate. The reason we are taking this as an argument is because we want to load all of them in parallel.
* @param {string[]} urls - Array of urls to evaluate.
*/
constructor(
private context: BrowserContext,
private urls: string[]
) {
super();
}

private async getTabs() {
if ( typeof this.tabs === 'undefined' ) {
await this.openUrls( this.context, this.urls );
}

return this.tabs;
}

/**
* Open an array of urls in a new browser context.
*
* Take a browser instance and an array of urls to open in new tabs.
*
* @param {BrowserContext} context - Browser context to use.
* @param {string[]} urls - Array of urls to open.
* @return {Promise< TabsByUrl >} Promise resolving to the browser context.
*/
private async openUrls( context: BrowserContext, urls: string[] ): Promise< void > {
this.tabs = await objectPromiseAll< Tab >(
urls.reduce( ( set, url ) => {
set[ url ] = this.newTab( context, url );
return set;
}, {} )
);
}

/**
* Open url in a new tab in a given browserContext.
*
* @param {BrowserContext} browserContext - Browser context to use.
* @param {string} url - Url to open.
* @return {Promise<Page>} Promise resolving to the page instance.
*/
private async newTab( browserContext: BrowserContext, url: string ): Promise< Tab > {
const tab = {
page: await browserContext.newPage(),
statusCode: null,
};
tab.page.on( 'response', async response => {
if ( response.url() === url ) {
tab.statusCode = response.status();
}
} );

await tab.page.goto( url, { timeout: PAGE_GOTO_TIMEOUT_MS } );

return tab;
this.tabs = {};
}

async runInPage< ReturnType >(
Expand All @@ -80,11 +32,13 @@ export class BrowserInterfacePlaywright extends BrowserInterface {
method: BrowserRunnable< ReturnType >,
...args: unknown[]
): Promise< ReturnType > {
const tabs = await this.getTabs();
const tab = tabs[ pageUrl ];
const tab = this.tabs[ pageUrl ];

if ( ! tab || ! tab.page ) {
throw new Error( `Playwright interface does not include URL ${ pageUrl }` );
if ( ! this.urls.includes( pageUrl ) ) {
throw new Error( `URL not in original URL set: ${ pageUrl }` );
}
throw new Error( `Page not loaded in current batch: ${ pageUrl }` );
}

// Bail early if the page returned a non-200 status code.
Expand Down Expand Up @@ -120,4 +74,51 @@ export class BrowserInterfacePlaywright extends BrowserInterface {
private isOkStatus( statusCode: number ) {
return statusCode >= 200 && statusCode < 300;
}

async loadBatch( urls: string[] ): Promise< void > {
// Close existing tabs
await this.closeTabs();

// Load new batch of URLs
this.tabs = await objectPromiseAll< Tab >(
urls.reduce( ( set, url ) => {
set[ url ] = this.newTab( this.context, url );
return set;
}, {} )
);
}

/**
* Open url in a new tab in a given browserContext.
*
* @param {BrowserContext} browserContext - Browser context to use.
* @param {string} url - Url to open.
* @return {Promise<Tab>} Promise resolving to the tab instance.
*/
private async newTab( browserContext: BrowserContext, url: string ): Promise< Tab > {
const tab = {
page: await browserContext.newPage(),
statusCode: null,
};
tab.page.on( 'response', async response => {
if ( response.url() === url ) {
tab.statusCode = response.status();
}
} );

await tab.page.goto( url, { timeout: PAGE_GOTO_TIMEOUT_MS } );

return tab;
}

private async closeTabs(): Promise< void > {
for ( const tab of Object.values( this.tabs ) ) {
await tab.page.close();
}
this.tabs = {};
}

async cleanup() {
await this.closeTabs();
}
}
159 changes: 123 additions & 36 deletions projects/js-packages/critical-css-gen/src/generate-critical-css.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { BrowserInterfacePlaywright } from './browser-interface-playwright.js';
import { BrowserInterface } from './browser-interface.js';
import { CSSFileSet } from './css-file-set.js';
import { SuccessTargetError, EmptyCSSError, UrlError } from './errors.js';
Expand All @@ -9,24 +10,23 @@ const noop = () => {
// No op.
};

// Add this near the top with other constants
const DEFAULT_BATCH_SIZE = 5;

/**
* Collate and return a CSSFileSet object describing all the CSS files used by
* the set of URLs provided.
*
* Errors that occur during this process are collated, but not thrown yet.
*
* Process a batch of URLs and update the CSS files set
* @param {BrowserInterface} browserInterface - interface to access pages
* @param {string[]} urls - list of URLs to scan for CSS files
* @param {number} maxPages - number of pages to process at most
* @return {Array} - Two member array; CSSFileSet, and an object containing errors that occurred at each URL.
* @param {CSSFileSet} cssFiles - CSSFileSet object to update
* @param {object} errors - object to store errors
* @return {Promise< number >} - number of successes
*/
async function collateCssFiles(
async function processBatch(
browserInterface: BrowserInterface,
urls: string[],
maxPages: number
): Promise< [ CSSFileSet, { [ url: string ]: UrlError } ] > {
const cssFiles = new CSSFileSet( browserInterface );
const errors = {};
cssFiles: CSSFileSet,
errors: { [ url: string ]: UrlError }
): Promise< number > {
let successes = 0;

for ( const url of urls ) {
Expand Down Expand Up @@ -56,16 +56,74 @@ async function collateCssFiles(
const internalStyles = await browserInterface.getInternalStyles( url );
await cssFiles.addInternalStyles( url, internalStyles );

// Abort early if we hit the critical mass
successes++;
if ( successes >= maxPages ) {
break;
}
} catch ( err ) {
errors[ url ] = err;
}
}

return successes;
}

/**
* Collate and return a CSSFileSet object describing all the CSS files used by
* the set of URLs provided.
*
* Errors that occur during this process are collated, but not thrown yet.
*
* @param {BrowserInterface} browserInterface - interface to access pages
* @param {string[]} urls - list of URLs to scan for CSS files
* @param {number} maxPages - number of pages to process at most
* @param {number} batchSize - number of pages to process in each batch
* @return {Array} - Two member array; CSSFileSet, and an object containing errors that occurred at each URL.
*/
async function collateCssFiles(
browserInterface: BrowserInterface,
urls: string[],
maxPages: number,
batchSize: number
): Promise< [ CSSFileSet, { [ url: string ]: UrlError } ] > {
const cssFiles = new CSSFileSet( browserInterface );
const errors = {};
let totalSuccesses = 0;
const failedUrls = new Set< string >();

// Process URLs in batches
for ( let i = 0; i < urls.length && totalSuccesses < maxPages; i += batchSize ) {
const batchUrls = urls.slice( i, i + batchSize ).filter( url => ! failedUrls.has( url ) );

// If no valid URLs in this batch, continue to next batch
if ( batchUrls.length === 0 ) {
continue;
}

// For Playwright, load the batch of pages
if ( browserInterface instanceof BrowserInterfacePlaywright ) {
await browserInterface.loadBatch( batchUrls );
}

const batchSuccesses = await processBatch( browserInterface, batchUrls, cssFiles, errors );
totalSuccesses += batchSuccesses;

// Add failed URLs from this batch to failedUrls set
batchUrls.forEach( url => {
if ( errors[ url ] ) {
failedUrls.add( url );
// eslint-disable-next-line no-console
console.log( `Failed URL: ${ url }`, errors[ url ] );
}
} );

// If we've tried all URLs and still haven't hit maxPages, break to avoid infinite loop
if ( failedUrls.size === urls.length ) {
break;
}

if ( totalSuccesses >= maxPages ) {
break;
}
}

return [ cssFiles, errors ];
}

Expand All @@ -78,6 +136,7 @@ async function collateCssFiles(
* @param {string[]} param.validUrls - List of all the valid URLs
* @param {Array} param.viewports - Browser viewports
* @param {number} param.maxPages - Maximum number of pages to process
* @param {number} param.batchSize - Number of pages to process in each batch
* @param {Function} param.updateProgress - Update progress callback function
*
* @return {Set<string>} - List of above the fold selectors.
Expand All @@ -88,13 +147,15 @@ async function getAboveFoldSelectors( {
validUrls,
viewports,
maxPages,
batchSize,
updateProgress,
}: {
browserInterface: BrowserInterface;
selectorPages: { [ selector: string ]: Set< string > };
validUrls: string[];
viewports: Viewport[];
maxPages: number;
batchSize: number;
updateProgress: () => void;
} ): Promise< Set< string > > {
// For each selector string, create a "trimmed" version with the stuff JavaScript can't handle cut out.
Expand All @@ -104,34 +165,51 @@ async function getAboveFoldSelectors( {
}, {} );

// Go through all the URLs looking for above-the-fold selectors, and selectors which may be "dangerous"
// i.e.: may match elements on pages that do not include their CSS file.
const aboveFoldSelectors = new Set< string >();
const dangerousSelectors = new Set< string >();

for ( const url of validUrls.slice( 0, maxPages ) ) {
// Work out which CSS selectors match any element on this page.
const pageSelectors = await browserInterface.runInPage<
ReturnType< typeof BrowserInterface.innerFindMatchingSelectors >
>( url, null, BrowserInterface.innerFindMatchingSelectors, trimmedSelectors );
// Process URLs in batches
for ( let i = 0; i < validUrls.length && i < maxPages; i += batchSize ) {
const batchUrls = validUrls.slice( i, i + batchSize );

// Check for selectors which may match this page, but are not included in this page's CSS.
pageSelectors
.filter( s => ! selectorPages[ s ].has( url ) )
.forEach( s => dangerousSelectors.add( s ) );
// For Playwright, load the batch of pages
if ( browserInterface instanceof BrowserInterfacePlaywright ) {
await browserInterface.loadBatch( batchUrls );
}

// Collate all above-fold selectors for all viewport sizes.
for ( const size of viewports ) {
updateProgress();
// Process each URL in the batch
for ( const url of batchUrls ) {
// Work out which CSS selectors match any element on this page
const pageSelectors = await browserInterface.runInPage< string[] >(
url,
null,
BrowserInterface.innerFindMatchingSelectors,
trimmedSelectors
);

// Check for selectors which may match this page, but are not included in this page's CSS
pageSelectors
.filter( s => ! selectorPages[ s ].has( url ) )
.forEach( s => dangerousSelectors.add( s ) );

// Collate all above-fold selectors for all viewport sizes
for ( const size of viewports ) {
updateProgress();

const pageAboveFold = await browserInterface.runInPage<
ReturnType< typeof BrowserInterface.innerFindAboveFoldSelectors >
>( url, size, BrowserInterface.innerFindAboveFoldSelectors, trimmedSelectors, pageSelectors );
const pageAboveFold = await browserInterface.runInPage< string[] >(
url,
size,
BrowserInterface.innerFindAboveFoldSelectors,
trimmedSelectors,
pageSelectors
);

pageAboveFold.forEach( s => aboveFoldSelectors.add( s ) );
pageAboveFold.forEach( s => aboveFoldSelectors.add( s ) );
}
}
}

// Remove dangerous selectors from above fold set.
// Remove dangerous selectors from above fold set
for ( const dangerousSelector of dangerousSelectors ) {
aboveFoldSelectors.delete( dangerousSelector );
}
Expand All @@ -150,6 +228,7 @@ async function getAboveFoldSelectors( {
* @param {FilterSpec} root0.filters - Optional filters to apply to the CSS
* @param {number} root0.successRatio - Ratio of successful URLs required (default: 1)
* @param {number} root0.maxPages - Maximum number of pages to process (default: 10)
* @param {number} root0.batchSize - Number of pages to process in each batch (default: 5)
* @return {Promise<[string, Error[]]>} A promise that resolves to an array containing the critical CSS string and an array of errors.
*/
export async function generateCriticalCSS( {
Expand All @@ -160,6 +239,7 @@ export async function generateCriticalCSS( {
filters,
successRatio = 1,
maxPages = 10,
batchSize = DEFAULT_BATCH_SIZE,
}: {
browserInterface: BrowserInterface;
progressCallback?: ( step: number, total: number ) => void;
Expand All @@ -168,6 +248,7 @@ export async function generateCriticalCSS( {
filters?: FilterSpec;
successRatio?: number;
maxPages?: number;
batchSize?: number;
} ): Promise< [ string, Error[] ] > {
// Success threshold is calculated based on the success ratio of "the number of URLs provided", or "maxPages" whichever is lower.
// See 268-gh-Automattic/boost-cloud
Expand All @@ -179,8 +260,13 @@ export async function generateCriticalCSS( {
const progressSteps = 1 + urls.length * viewports.length;
const updateProgress = () => progressCallback( ++progress, progressSteps );

// Collate all CSS Files used by all valid URLs.
const [ cssFiles, cssFileErrors ] = await collateCssFiles( browserInterface, urls, maxPages );
// Collate all CSS Files used by all valid URLs
const [ cssFiles, cssFileErrors ] = await collateCssFiles(
browserInterface,
urls,
maxPages,
batchSize
);
updateProgress();

// Verify there are enough valid URLs to carry on with.
Expand All @@ -203,6 +289,7 @@ export async function generateCriticalCSS( {
validUrls,
viewports,
maxPages,
batchSize,
updateProgress,
} );

Expand Down

0 comments on commit 5fce223

Please sign in to comment.