From cabd6821e7e7f46e93dd556b7de8ec262dee3c80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Delval?= Date: Wed, 22 Nov 2023 15:11:15 +0100 Subject: [PATCH] Blog import (#43) * 1st vers of blog article import with basic cleanup * Added tags to meta table, removed inline metadata * Blog import --------- Co-authored-by: Chris Bohnert <38424477+bohnertchris@users.noreply.github.com> --- tools/importer/import.js | 7 ++++++ tools/importer/transformers/blogArticle.js | 19 ++++++++++++++++ tools/importer/transformers/blogBanner.js | 26 ++++++++++++++++++++++ tools/importer/transformers/cleanBlog.js | 13 +++++++++++ tools/importer/transformers/index.js | 8 ++++++- tools/importer/transformers/metadata.js | 18 +++++++++++++++ 6 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 tools/importer/transformers/blogArticle.js create mode 100644 tools/importer/transformers/blogBanner.js create mode 100644 tools/importer/transformers/cleanBlog.js diff --git a/tools/importer/import.js b/tools/importer/import.js index 7c5077bd..c2382d15 100644 --- a/tools/importer/import.js +++ b/tools/importer/import.js @@ -13,6 +13,7 @@ /* eslint-disable class-methods-use-this */ // helix-importer-ui <-> node compatibility: + import { xfTransformers, xfAsyncTransformers, transformers, postTransformers, } from './transformers/index.js'; @@ -57,12 +58,18 @@ export default { 'div.loader-wrapper', 'div.cmp-page__skiptomaincontent', 'div#mainContent', + 'div.page-header', + // Remove navigation from the beginning of blog entries as well as readmore-type teasers and blurb about 24petwatch at the end + 'nav', + 'div.imagelist', + 'div.cmp-experiencefragment--blog-page-cta-component', ]); // create the metadata block and append it to the main element postTransformers.forEach( (fn) => fn.call(this, main, document, params, url), ); + return main; }, diff --git a/tools/importer/transformers/blogArticle.js b/tools/importer/transformers/blogArticle.js new file mode 100644 index 00000000..7a9f88bf --- /dev/null +++ b/tools/importer/transformers/blogArticle.js @@ -0,0 +1,19 @@ +function createBlogArticle(main, document) { + // Try to remove dts and dds that are not needed + const dts = document.querySelectorAll('dt'); + const dds = document.querySelectorAll('dd'); + if ( dts ) { + for( let i = 0; i < dts.length; i += 1) { + if( dts[i].textContent === 'Text') + { + const div = document.createElement('div'); + div.innerHTML = dds[i].innerHTML; + dts[i].closest('article').appendChild(div); + dts[i].remove(); + dds[i].remove(); + } + } + } +} + +export default createBlogArticle; diff --git a/tools/importer/transformers/blogBanner.js b/tools/importer/transformers/blogBanner.js new file mode 100644 index 00000000..32bc6620 --- /dev/null +++ b/tools/importer/transformers/blogBanner.js @@ -0,0 +1,26 @@ +function blogBanner(main, document) { + + // Banner image is in mainContent element + const bannerImage = main.querySelector('#mainContent img'); + + if( bannerImage ){ + + // Create div to hold image + const div = document.createElement('div'); + const img = document.createElement('img'); + const imgSrc = bannerImage.getAttribute('src'); + img.setAttribute('src', imgSrc); + div.append(img); + + main.prepend(div); + + } + + // const p = document.createElement('p'); + // p.textContent = 'Hello world!'; + // main.append(p); + + +} + +export default blogBanner; diff --git a/tools/importer/transformers/cleanBlog.js b/tools/importer/transformers/cleanBlog.js new file mode 100644 index 00000000..7242ee1b --- /dev/null +++ b/tools/importer/transformers/cleanBlog.js @@ -0,0 +1,13 @@ +function cleanBlog(main, document) { + // Try to remove dts and dds that are not needed + const dts = document.querySelectorAll('dt'); + const dds = document.querySelectorAll('dd'); + if (dts) { + for (let i = 0; i < dts.length; i += 1) { + dts[i].remove(); + dds[i].remove(); + } + } +} + +export default cleanBlog; diff --git a/tools/importer/transformers/index.js b/tools/importer/transformers/index.js index af31ed21..89883ccf 100644 --- a/tools/importer/transformers/index.js +++ b/tools/importer/transformers/index.js @@ -7,11 +7,16 @@ import createHeader from './header.js'; import createHero from './hero.js'; import createMetadata from './metadata.js'; import createBold from './bold.js'; +import blogBanner from './blogBanner.js'; +import createBlogArticle from './blogArticle.js'; +import cleanBlog from './cleanBlog.js'; export const transformers = [ createBold, createFullLayoutSection, - createHero, + blogBanner, + createBlogArticle, + // createHero, createHomepage, createCards, createFeatureImage, @@ -31,4 +36,5 @@ export const preTransformers = [ export const postTransformers = [ createMetadata, + cleanBlog ]; diff --git a/tools/importer/transformers/metadata.js b/tools/importer/transformers/metadata.js index 54758f13..fce65176 100644 --- a/tools/importer/transformers/metadata.js +++ b/tools/importer/transformers/metadata.js @@ -25,6 +25,24 @@ const createMetadata = (main, document) => { delete meta.Title; } + // Get blog article tags + const blogTags = document.querySelectorAll('div.cmp-contentfragment__element--tag > dd.cmp-contentfragment__element-value'); + if( blogTags ) { + for( let i = 0; i < blogTags.length; i += 1 ) { + meta.Tags = blogTags[i].innerHTML.replace('
', ' '); + } + } + + // Get blog related articles + // Assumes that related articles are rendered as the only ul on the page, if not, related articles are borked for the page + const relatedArticles = document.querySelector('ul.cmp-image-list__list'); + if ( relatedArticles ){ + const articleLinks = relatedArticles.querySelectorAll('a.cmp-image-list__item-title-link'); + for ( let i = 0; i < articleLinks.length; i += 0 ) { + meta.Related = articleLinks[i].getAttribute('href'); + } + } + const block = WebImporter.Blocks.getMetadataBlock(document, meta); main.append(block);