Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Blog import #47

Merged
merged 12 commits into from
Nov 30, 2023
461 changes: 461 additions & 0 deletions tools/blog-us.sorted.txt

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions tools/importer/import.js
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,12 @@ export default {
'div.cmp-page__skiptomaincontent',
'div#mainContent',
'div.page-header',
// Remove navigation from the beginning of blog entries as well as
// readmore-type teasers and blurb about 24petwatch at the end
// Remove navigation from the beginning of blog entries as well as readmore-type
// teasers and blurb about 24petwatch at the end
'nav',
'div.imagelist',
'div.cmp-experiencefragment--blog-page-cta-component',
'div.cmp-layout-manual-articles',
]);

// create the metadata block and append it to the main element
Expand Down
7 changes: 7 additions & 0 deletions tools/importer/transformers/blogArticle.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ function createBlogArticle(main, document) {
const dds = document.querySelectorAll('dd');
if (dts) {
for (let i = 0; i < dts.length; i += 1) {
if (dts[i].textContent === 'Byline') {
const span = document.createElement('em');
const authorText = dds[i].textContent.trim();
span.textContent = authorText;
dts[i].closest('article').appendChild(span);
}

if (dts[i].textContent === 'Text') {
const div = document.createElement('div');
div.innerHTML = dds[i].innerHTML;
Expand Down
4 changes: 2 additions & 2 deletions tools/importer/transformers/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import createFooter from './footer.js';
import createFullLayoutSection from './fullLayoutSection.js';
import createHomepage from './homepage.js';
import createHeader from './header.js';
// import createHero from './hero.js';
import createHero from './hero.js';
import createMetadata from './metadata.js';
import createBold from './bold.js';
import blogBanner from './blogBanner.js';
Expand Down Expand Up @@ -36,5 +36,5 @@ export const preTransformers = [

export const postTransformers = [
createMetadata,
cleanBlog,
cleanBlog
];
20 changes: 20 additions & 0 deletions tools/importer/transformers/makeProxySrcs.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
function makeProxySrcs(main, document, host = 'https://www.24petwatch.com') => {
main.querySelectorAll('img').forEach((img) => {
if (img.src.startsWith('//')) {
img.src = `https:${img.src}`;
} else if (img.src.startsWith('/')) {
// make absolute
const cu = new URL(host);
img.src = `${cu.origin}${img.src}`;
}
try {
const u = new URL(img.src);
u.searchParams.append('host', u.origin);
img.src = `http://localhost:3001${u.pathname}${u.search}`;
} catch (error) {
console.warn(`Unable to make proxy src for ${img.src}: ${error.message}`);
}
});
};

export default makeProxySrcs;
22 changes: 18 additions & 4 deletions tools/importer/transformers/metadata.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,17 @@ const createMetadata = (main, document) => {
meta.Description = desc.content;
}

const img = document.querySelector('[property="og:image"]');
if (img && img.content) {
const img = document.querySelector('body img:first-child');
if (img && img.src) {
const el = document.createElement('img');
el.src = img.content;
el.src = img.src.replace('https://www.24petwatch.com', '');
meta.Image = el;
}

const metaImage = document.querySelector('[property="og:image"]');
if (metaImage) {
const el = document.createElement('img');
el.src = metaImage.content.replace('https://www.24petwatch.com', '');
meta.Image = el;
}

Expand All @@ -29,7 +36,9 @@ const createMetadata = (main, document) => {
const blogTags = document.querySelectorAll('div.cmp-contentfragment__element--tag > dd.cmp-contentfragment__element-value');
if (blogTags) {
for (let i = 0; i < blogTags.length; i += 1) {
meta.Tags = blogTags[i].innerHTML.replace('<br>', ' ');
meta.Tags = blogTags[i].innerHTML.trim()
.replaceAll('24petwatch:newletter/topic/', '')
.replaceAll('<br>', ',');
}
}

Expand All @@ -44,6 +53,11 @@ const createMetadata = (main, document) => {
}
}

const author = document.querySelector('div.cmp-contentfragment__element--byline dd');
if (author) {
meta.Author = author.textContent.trim().replace(/^By /, '');
}

const block = WebImporter.Blocks.getMetadataBlock(document, meta);
main.append(block);

Expand Down
Loading