-
Notifications
You must be signed in to change notification settings - Fork 7.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
169 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
import logger from '@/utils/logger'; | ||
export async function zjzwfwCrawler(item: any, browser: any): Promise<string> { | ||
try { | ||
const page = await browser.newPage(); | ||
let response = ''; | ||
try { | ||
let navigationSuccess = false; | ||
const navigationAttempt = async (attempt) => { | ||
if (attempt >= 3) { | ||
return false; | ||
} | ||
try { | ||
await page.goto(item.link, { | ||
waitUntil: 'networkidle2', | ||
timeout: 60000, | ||
}); | ||
return true; | ||
} catch { | ||
if (attempt < 3) { | ||
await new Promise((resolve) => setTimeout(resolve, 5000)); | ||
if (page.isClosed()) { | ||
throw new Error('Navigation frame was detached'); | ||
} | ||
return navigationAttempt(attempt + 1); | ||
} | ||
return false; | ||
} | ||
}; | ||
|
||
navigationSuccess = await navigationAttempt(0); | ||
|
||
if (!navigationSuccess) { | ||
throw new Error('Navigation failed after retries'); | ||
} | ||
|
||
if (page.isClosed()) { | ||
throw new Error('Page was closed unexpectedly'); | ||
} | ||
await page.locator('.item-left .item .title .button').click(); | ||
|
||
response = await page.content(); | ||
} catch (error) { | ||
logger.error('Page Error when visiting /gov/hangzhou/zwfw:', error); | ||
} finally { | ||
if (!page.isClosed()) { | ||
await page.close(); | ||
} | ||
} | ||
return response || ''; | ||
} catch (error) { | ||
logger.error('Error when visiting /gov/hangzhou/zwfw:', error); | ||
} | ||
return ''; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
import type { Namespace } from '@/types'; | ||
|
||
export const namespace: Namespace = { | ||
name: "Hangzhou People's Government", | ||
url: 'hangzhou.gov.cn', | ||
zh: { | ||
name: '杭州市人民政府', | ||
}, | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
import { Route } from '@/types'; | ||
import { load } from 'cheerio'; | ||
import puppeteer from '@/utils/puppeteer'; | ||
import ofetch from '@/utils/ofetch'; | ||
import cache from '@/utils/cache'; | ||
import got from '@/utils/got'; | ||
import { parseDate } from '@/utils/parse-date'; | ||
import { zjzwfwCrawler } from './crawler'; | ||
import timezone from '@/utils/timezone'; | ||
|
||
export const route: Route = { | ||
path: '/hangzhou/zwfw', | ||
categories: ['government'], | ||
example: '/gov/hangzhou/zwfw', | ||
features: { | ||
requireConfig: false, | ||
requirePuppeteer: true, | ||
antiCrawler: true, | ||
supportBT: false, | ||
supportPodcast: false, | ||
supportScihub: false, | ||
}, | ||
radar: [ | ||
{ | ||
source: ['hangzhou.gov.cn/col/col1256349/index.html'], | ||
}, | ||
], | ||
name: '政务服务公开', | ||
maintainers: ['Flynn Cao'], | ||
handler, | ||
url: 'hangzhou.gov.cn/col/col1256349/index.html', | ||
}; | ||
|
||
async function handler() { | ||
const host = 'https://www.hangzhou.gov.cn/col/col1256349/index.html'; | ||
const response = await ofetch(host); | ||
|
||
const browser = await puppeteer({ stealth: true }); | ||
const link = host; | ||
const formatted = response | ||
.replace('<script type="text/xml">', '') | ||
.replace('</script>', '') | ||
.replaceAll('<recordset>', '') | ||
.replaceAll('</recordset>', '') | ||
.replaceAll('<record>', '') | ||
.replaceAll('</record>', '') | ||
.replaceAll('<![CDATA[', '') | ||
.replaceAll(']]>', ''); | ||
const $ = load(formatted); | ||
|
||
const list = $('li.clearfix') | ||
.toArray() | ||
.map((item) => { | ||
item = $(item); | ||
const title = item.find('a').first().text(); | ||
const time = timezone(parseDate(item.find('span').first().text(), 'YYYY-MM-DD'), 8); | ||
const a = item.find('a').first().attr('href'); | ||
const fullUrl = new URL(a, host).href; | ||
|
||
return { | ||
title, | ||
link: fullUrl, | ||
pubDate: time, | ||
}; | ||
}); | ||
const items = await Promise.all( | ||
list.map((item) => | ||
cache.tryGet(item.link, async () => { | ||
const host = new URL(item.link).hostname; | ||
if (host === 'www.zjzwfw.gov.cn') { | ||
// 来源为浙江政务服务网 | ||
const content = await zjzwfwCrawler(item, browser); | ||
const $ = load(content); | ||
item.description = $('.content .item').html(); | ||
item.author = '浙江政务服务网'; | ||
item.category = $('meta[name="ColumnType"]').attr('content'); | ||
} else { | ||
// 其他正常抓取 | ||
const response = await got(item.link); | ||
const $ = load(response.data); | ||
if (host === 'police.hangzhou.gov.cn') { | ||
// 来源为杭州市公安局 | ||
item.description = $('.art-content .wz_con_content').html(); | ||
item.author = $('meta[name="ContentSource"]').attr('content'); | ||
item.category = $('meta[name="ColumnType"]').attr('content'); | ||
} else { | ||
// 缺省:来源为杭州市政府网 | ||
item.description = $('.article').html(); | ||
item.author = $('meta[name="ContentSource"]').attr('content'); | ||
item.category = $('meta[name="ColumnType"]').attr('content'); | ||
} | ||
} | ||
item.pubDate = $('meta[name="PubDate"]').length ? timezone(parseDate($('meta[name="PubDate"]').attr('content') as string, 'YYYY-MM-DD HH:mm'), 8) : item.pubDate; | ||
return item; | ||
}) | ||
) | ||
); | ||
|
||
await browser.close(); | ||
return { | ||
allowEmpty: true, | ||
title: '杭州市人民政府-政务服务公开', | ||
link, | ||
item: items, | ||
}; | ||
} |