From 8bd1c44101c8971bad92ba6f85c5f3ec50a96c9b Mon Sep 17 00:00:00 2001 From: Josiah Campbell <9521010+jocmp@users.noreply.github.com> Date: Tue, 14 Jan 2025 22:16:03 -0600 Subject: [PATCH] fix: androidauthority.com - Clear polls Remove polls that require JavaScript --- CHANGELOG.md | 2 +- .../custom/www.androidauthority.com/index.js | 14 ++++++++++++-- src/utils/dom/constants.js | 1 + 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 94269f78..1aed55c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ ### 2.3.0 - [a45b329e0a] - fix: Update versants.com to parse figures (Josiah Campbell) [#42](https://github.com/jocmp/mercury-parser/pull/42) -- [68e9b88a8d] - fix: androidauthority.com - Retain h3 tags (Josiah Campbell) [#41](https://github.com/jocmp/mercury-parser/pull/41) +- [68e9b88a8d] - fix: androidauthority.com - Retain heading tags (Josiah Campbell) [#41](https://github.com/jocmp/mercury-parser/pull/41) - [c2bc68449f] - bump version v2.2.9 -> v2.2.10 (jocmp) ### v2.2.10 (Jan 11, 2025) diff --git a/src/extractors/custom/www.androidauthority.com/index.js b/src/extractors/custom/www.androidauthority.com/index.js index e67784d3..49158a61 100644 --- a/src/extractors/custom/www.androidauthority.com/index.js +++ b/src/extractors/custom/www.androidauthority.com/index.js @@ -20,16 +20,26 @@ export const WwwAndroidauthorityComExtractor = { // remove if not following a paragraph. Adding this empty paragraph fixes it, and // the empty paragraph will be removed anyway. content: { - selectors: ['.d_Dd', '.e_Ac'], + selectors: ['main'], transforms: { ol: node => { node.attr('class', 'mercury-parser-keep'); }, h2: $node => $node.attr('class', 'mercury-parser-keep'), h3: $node => $node.attr('class', 'mercury-parser-keep'), + p: node => { + if ( + node.text() === + 'Affiliate links on Android Authority may earn us a commission. Learn more.' + ) { + node.remove(); + } + }, }, clean: [ - '.d_f .d_nr', // Lead image + 'h1', // Clean title + 'h1 ~ *', // Clean subtitle + '.e_Oh', // Polls ], }, }; diff --git a/src/utils/dom/constants.js b/src/utils/dom/constants.js index b664a857..abb0a15f 100644 --- a/src/utils/dom/constants.js +++ b/src/utils/dom/constants.js @@ -36,6 +36,7 @@ export const REMOVE_ATTR_LIST = REMOVE_ATTRS.join(','); export const WHITELIST_ATTRS = [ 'src', 'srcset', + 'start', 'sizes', 'type', 'href',