From 30d6f472ee5b80143aa9b7eaf11246c952149bb4 Mon Sep 17 00:00:00 2001 From: Nitin Khanna Date: Wed, 8 Sep 2021 10:00:28 -0700 Subject: [PATCH] feat: Times of India extractor (#503) * Adding custom parser for Times of India * moved transforms to clean The transforms were just working as cleans. Moved things around as per recommendations. Co-authored-by: Postlight Bot --- .../1570663350014.html | 1 + src/extractors/custom/index.js | 1 + .../timesofindia.indiatimes.com/index.js | 31 ++++++ .../timesofindia.indiatimes.com/index.test.js | 102 ++++++++++++++++++ 4 files changed, 135 insertions(+) create mode 100644 fixtures/timesofindia.indiatimes.com/1570663350014.html create mode 100644 src/extractors/custom/timesofindia.indiatimes.com/index.js create mode 100644 src/extractors/custom/timesofindia.indiatimes.com/index.test.js diff --git a/fixtures/timesofindia.indiatimes.com/1570663350014.html b/fixtures/timesofindia.indiatimes.com/1570663350014.html new file mode 100644 index 000000000..6e1a128c6 --- /dev/null +++ b/fixtures/timesofindia.indiatimes.com/1570663350014.html @@ -0,0 +1 @@ +Kashmir issue: China snubs Imran Khan, says resolve Jammu and Kashmir bilaterally | India News - Times of India

China snubs Imran Khan, says resolve J&K bilaterally

India, Pakistan should take bilateral route to resolve all disputes: China

Highlights

  • “We call on India and Pakistan to engage in dialogue and consultation on all issues, including the Kashmir issue, and consolidate mutual trust," Chinese foreign ministry spokesperson Geng Shuang said
  • Explaining Beijing’s changed position, observers said supporting Pakistan’s position on Kashmir might make it difficult for Xi to make his India visit successful
File photo of Chinese president Xi Jinping. (Reuters)
BEIJING: In what is being seen as a snub to the visiting Pakistan PM Imran Khan, China on Tuesday significantly modified its position on the Kashmir dispute by omitting its recent references to the UN charter and Security Council resolutions to the issue and stressing that New Delhi and Islamabad should take the bilateral route to resolve all disputes.
“We call on India and Pakistan to engage in dialogue and consultation on all issues, including the Kashmir issue, and consolidate mutual trust. This is in line with interest of both countries and common aspiration of the world,” Chinese foreign ministry spokesperson Geng Shuang said on Tuesday.
The statement, coming ahead of Chinese president Xi Jinping’s visit to India, which is expected to take place between October 11 and 13, is a departure from the position that Beijing took when Pakistan foreign minister Shah Mahmood Qureshi had visited China post-nullification of Articles 370 and 35A. Qureshi’s Chinese counterpart Wang Yi had then said: “It (Kashmir issue) should be properly and peacefully resolved based on the UN charter, relevant UN Security Council resolutions and bilateral agreement.” In fact, China had maintained that position at a meeting of the UN Security Council on Kashmir, which ended without any outcome.
However, on Tuesday, China indicated that it was ready to go back to its pre-August 5 position when it had held that the Kashmir issue should be resolved bilaterally between India and Pakistan. The omission of references to the UN charter and Security Council resolutions particularly assume significance at a time when Imran Khan and Pakistan army chief Gen Qamar Javed Bajwa are in Beijing to meet Xi and other Chinese leaders. By changing its position, China seems to be in agreement with the Indian stand that there should be no third-party mediation on the Kashmir issue. “China’s position on Kashmir issue is clear and consistent,” Geng emphasized.
Explaining Beijing’s changed position, observers said supporting Pakistan’s position on Kashmir might make it difficult for Xi to make his India visit successful. Also, by changing its position, China is trying to soften Pakistan’s rhetoric on the Kashmir issue. Beijing’s show of neutrality comes in the midst of Pakistan asking China to take a tough stand against India on the Kashmir issue.
Meanwhile, Beijing has not yet formally announced Xi’s visit to India. It has called a press conference on Wednesday to discuss the President’s “outbound tour” where the announcement might be made. Referring to Modi’s visit to the Chinese city of Wuhan which hosted an informal summit between him and Xi, Geng said: “Both (India and China) are major developing countries of the world and major emerging markets. Since the Wuhan informal summit, our bilateral relations have gathered good momentum. We have been advancing our cooperation and properly managing our differences.”
“We have a tradition of high-level exchange and our two sides are maintaining communication on high-level exchange in the next phase. We should make a good atmosphere and environment for this,” he added.
Speaking about Imran Khan’s visit, Geng said, “We have a good tradition of close exchange and communication. We have strategic mutual trust and advancing practical cooperation. Our cooperation in the China-Pakistan Economic Cooperation (CPEC) is bringing more outcomes to our peoples.”

Imran Khan is visiting Beijing on a thinly veiled excuse of attending the closing ceremony of a horticultural exhibition. He is due to meet Xi, Chinese premier Li Keqiang and National People’s Congress chief Li Zhansu during his visit.
Read this story in Bengali
In Video:India, Pakistan should take bilateral route to resolve all disputes: China
Get the app
\ No newline at end of file diff --git a/src/extractors/custom/index.js b/src/extractors/custom/index.js index e3bc157c4..2c8bfbe3b 100644 --- a/src/extractors/custom/index.js +++ b/src/extractors/custom/index.js @@ -132,3 +132,4 @@ export * from './www.phoronix.com'; export * from './pitchfork.com'; export * from './biorxiv.org'; export * from './epaper.zeit.de'; +export * from './timesofindia.indiatimes.com'; diff --git a/src/extractors/custom/timesofindia.indiatimes.com/index.js b/src/extractors/custom/timesofindia.indiatimes.com/index.js new file mode 100644 index 000000000..6972a5c56 --- /dev/null +++ b/src/extractors/custom/timesofindia.indiatimes.com/index.js @@ -0,0 +1,31 @@ +export const TimesofindiaIndiatimesComExtractor = { + domain: 'timesofindia.indiatimes.com', + + title: { + selectors: ['h1'], + }, + + extend: { + reporter: { + selectors: ['div.byline'], + transforms: {}, + }, + }, + + date_published: { + selectors: ['.byline'], + format: 'MMM D, YYYY, HH:mm z', + timezone: 'Asia/Kolkata', + }, + + lead_image_url: { + selectors: [['meta[name="og:image"]', 'value']], + }, + + content: { + selectors: ['div.contentwrapper:has(section)'], + defaultCleaner: false, + + clean: ['section', 'h1', '.byline', '.img_cptn'], + }, +}; diff --git a/src/extractors/custom/timesofindia.indiatimes.com/index.test.js b/src/extractors/custom/timesofindia.indiatimes.com/index.test.js new file mode 100644 index 000000000..23482984e --- /dev/null +++ b/src/extractors/custom/timesofindia.indiatimes.com/index.test.js @@ -0,0 +1,102 @@ +import assert from 'assert'; +import URL from 'url'; +import cheerio from 'cheerio'; + +import Mercury from 'mercury'; +import getExtractor from 'extractors/get-extractor'; +import { excerptContent } from 'utils/text'; + +const fs = require('fs'); + +describe('TimesofindiaIndiatimesComExtractor', () => { + describe('initial test case', () => { + let result; + let url; + beforeAll(() => { + url = + 'https://timesofindia.indiatimes.com/india/china-snubs-imran-says-resolve-jk-bilaterally/articleshow/71496416.cms'; + const html = fs.readFileSync( + './fixtures/timesofindia.indiatimes.com/1570663350014.html' + ); + result = Mercury.parse(url, { html, fallback: true }); + }); + + it('is selected properly', () => { + // This test should be passing by default. + // It sanity checks that the correct parser + // is being selected for URLs from this domain + const extractor = getExtractor(url); + assert.equal(extractor.domain, URL.parse(url).hostname); + }); + + it('returns the title', async () => { + // To pass this test, fill out the title selector + // in ./src/extractors/custom/timesofindia.indiatimes.com/index.js. + const { title } = await result; + + // Update these values with the expected values from + // the article. + assert.equal( + title, + `China snubs Imran Khan, says resolve J&K bilaterally` + ); + }); + + it('returns the author', async () => { + // To pass this test, fill out the author selector + // in ./src/extractors/custom/timesofindia.indiatimes.com/index.js. + const { author } = await result; + + // Update these values with the expected values from + // the article. + assert.equal(author, `Saibal Dasgupta`); + }); + + it('returns the date_published', async () => { + // To pass this test, fill out the date_published selector + // in ./src/extractors/custom/timesofindia.indiatimes.com/index.js. + const { date_published } = await result; + + // Update these values with the expected values from + // the article. + assert.equal(date_published, '2019-10-09T05:35:00.000Z'); + }); + + it('returns the lead_image_url', async () => { + // To pass this test, fill out the lead_image_url selector + // in ./src/extractors/custom/timesofindia.indiatimes.com/index.js. + const { lead_image_url } = await result; + + // Update these values with the expected values from + // the article. + assert.equal( + lead_image_url, + `https://static.toiimg.com/thumb/msid-71496420,width-1070,height-580,imgsize-83878,resizemode-6,overlay-toi_sw,pt-32,y_pad-40/photo.jpg` + ); + }); + + it('returns the content', async () => { + // To pass this test, fill out the content selector + // in ./src/extractors/custom/timesofindia.indiatimes.com/index.js. + // You may also want to make use of the clean and transform + // options. + const { content } = await result; + + const $ = cheerio.load(content || ''); + + const first13 = excerptContent( + $('*') + .first() + .text(), + 13 + ); + + // Update these values with the expected values from + // the article. + assert.equal( + first13, + 'BEIJING: In what is being seen as a snub to the visiting Pakistan' + ); + }); + }); +});