diff --git a/README.md b/README.md index 2037edc6..9801d76a 100644 --- a/README.md +++ b/README.md @@ -106,8 +106,6 @@ Note that the URL argument is still supplied, in order to identify the web site Mercury Parser also ships with a CLI, meaning you can use it from your command line like so: -![Mercury Parser CLI Basic Usage](./assets/parser-basic-usage.gif) - ```bash # Install Mercury Parser globally yarn global add @jocmp/mercury-parser diff --git a/src/extractors/custom/www.versants.com/index.js b/src/extractors/custom/www.versants.com/index.js index 938273ad..b6fce531 100644 --- a/src/extractors/custom/www.versants.com/index.js +++ b/src/extractors/custom/www.versants.com/index.js @@ -17,7 +17,20 @@ export const WwwVersantsComExtractor = { }, content: { - selectors: ['.entry-content'], - clean: ['.adv-link', '.versa-target'], + transforms: { + '.featured-image': $node => { + $node.addClass('mercury-parser-keep'); + const figcaption = $node.find('span'); + $node.find('figure').append(figcaption); + }, + }, + selectors: ['.article-content'], + clean: [ + '.adv-link', + '.versa-target', + 'header', // Clean title + '.author', // Clean author + '.thumbnail-slider', // Remove, the main images will be within the .main-slider div. + ], }, }; diff --git a/src/extractors/custom/www.versants.com/index.test.js b/src/extractors/custom/www.versants.com/index.test.js index 8156bfc9..598021eb 100644 --- a/src/extractors/custom/www.versants.com/index.test.js +++ b/src/extractors/custom/www.versants.com/index.test.js @@ -70,7 +70,7 @@ describe('WwwVersantsComExtractor', () => { assert.equal( first13, - 'La 32e campagne d’Opération Nez rouge de la Vallée-du-Richelieu sera en vigueur durant' + "C'est à Sainte-Julie que les bénévoles de l'ONR VDR se retrouveront dès le" ); }); }); diff --git a/src/utils/dom/clean-tags.js b/src/utils/dom/clean-tags.js index c1a9eb06..a5b307ea 100644 --- a/src/utils/dom/clean-tags.js +++ b/src/utils/dom/clean-tags.js @@ -105,7 +105,7 @@ export default function cleanTags($article, $) { if (weight < 0) { $node.remove(); } else { - // deteremine if node seems like content + // determine if node seems like content removeUnlessContent($node, $, weight); } });