Skip to content

Commit

Permalink
Set 'referrer' and 'referrerPolicy' when fetching images (#173)
Browse files Browse the repository at this point in the history
* Set 'referrer' and 'referrerPolicy' when fetching inline images, re: #172

* Also send referrer when fetching images for EPUB.
  • Loading branch information
danburzo authored May 27, 2024
1 parent 387a94f commit c8d4eb3
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 9 deletions.
29 changes: 23 additions & 6 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import humanDate from './src/util/human-date.js';
import outputPath from './src/util/output-path.js';
import getCssPageFormat from './src/util/get-css-page-format.js';
import { resolveSequence, resolveParallel } from './src/util/promises.js';
import { getUrlOrigin } from './src/util/url-origin.js';
import addExif from './src/exif.js';
import { hyphenateDom } from './src/hyphenate.js';
import { textToIso6391, getLanguageAttribute } from './src/util/language.js';
Expand Down Expand Up @@ -303,6 +304,16 @@ async function cleanup(url, options) {
headers: {
'user-agent': UA
},
/*
Send the referrer as the browser would
when fetching the image to render it.
The referrer policy would take care of
stripping the URL down to its origin,
but just in case, let’s strip it ourselves.
*/
referrer: getUrlOrigin(final_url),
referrerPolicy: 'strict-origin-when-cross-origin',
timeout: 10 * 1000
},
options.debug ? out : undefined
Expand Down Expand Up @@ -880,17 +891,23 @@ async function epubgen(data, output_path, options) {
let entry = remoteResources[i];
try {
if (options.debug) {
err.write(`Fetching: ${entry[0]}\n`);
err.write(`Fetching: ${entry.original}\n`);
}
let stream = (
await fetch(entry[0], {
await fetch(entry.original, {
headers: {
'user-agent': UA
},
/*
Send the referrer as the browser would
when fetching the image to render it.
*/
referrer: entry.origin,
referrerPolicy: 'strict-origin-when-cross-origin',
timeout: 10 * 1000
})
).body;
archive.append(stream, { name: `OEBPS/${entry[1]}` });
archive.append(stream, { name: `OEBPS/${entry.mapped}` });
} catch (err) {
console.error(err);
}
Expand Down Expand Up @@ -945,9 +962,9 @@ async function epubgen(data, output_path, options) {
}
: undefined,
remoteResources: remoteResources.map(entry => ({
id: entry[1].replace(/[^a-z0-9]/gi, ''),
href: entry[1],
mimetype: fileMimetype(entry[1])
id: entry.mapped.replace(/[^a-z0-9]/gi, ''),
href: entry.mapped,
mimetype: fileMimetype(entry.mapped)
}))
});

Expand Down
11 changes: 8 additions & 3 deletions src/remote-resources.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { randomUUID as uuid } from 'node:crypto';
import { parseSrcset, stringifySrcset } from 'srcset';
import { REGEX_IMAGE_URL } from './constants/regex.js';
import { getUrlOrigin } from './util/url-origin.js';

export default function remoteResources(doc) {
let srcs = new Map();
Expand All @@ -21,9 +22,13 @@ export default function remoteResources(doc) {
return src;
}
if (!srcs.has(src)) {
srcs.set(src, `rr-${uuid()}.${match[1]}`);
srcs.set(src, {
original: src,
mapped: `rr-${uuid()}.${match[1]}`,
origin: getUrlOrigin(doc.baseURI)
});
}
return `./${srcs.get(src)}`;
return `./${srcs.get(src).mapped}`;
}

Array.from(doc.querySelectorAll('picture source[src], img[src]')).forEach(
Expand Down Expand Up @@ -63,5 +68,5 @@ export default function remoteResources(doc) {
console.error(err);
}
});
return Array.from(srcs.entries());
return Array.from(srcs.values());
}
9 changes: 9 additions & 0 deletions src/util/url-origin.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
export function getUrlOrigin(str) {
let origin;
try {
origin = new URL(str).origin;
} catch (err) {
// ignore
}
return origin && origin !== 'null' ? origin : undefined;
}
9 changes: 9 additions & 0 deletions test/url-origin.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import tape from 'tape';
import { getUrlOrigin } from '../src/util/url-origin.js';

tape('getUrlOrigin', t => {
t.equal(getUrlOrigin('invalid'), undefined);
t.equal(getUrlOrigin('file:///Users/myuser/'), undefined);
t.equal(getUrlOrigin('https://github.com/user/repo'), 'https://github.com');
t.end();
});

0 comments on commit c8d4eb3

Please sign in to comment.