Skip to content

Commit

Permalink
feat(router): add 杭州市人民政府
Browse files Browse the repository at this point in the history
  • Loading branch information
flynncao committed Jan 24, 2025
1 parent ef06603 commit 7645a2c
Show file tree
Hide file tree
Showing 3 changed files with 169 additions and 0 deletions.
54 changes: 54 additions & 0 deletions lib/routes/gov/hangzhou/crawler.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import logger from '@/utils/logger';
export async function zjzwfwCrawler(item: any, browser: any): Promise<string> {
try {
const page = await browser.newPage();
let response = '';
try {
let navigationSuccess = false;
const navigationAttempt = async (attempt) => {
if (attempt >= 3) {
return false;
}
try {
await page.goto(item.link, {
waitUntil: 'networkidle2',
timeout: 60000,
});
return true;
} catch {
if (attempt < 3) {
await new Promise((resolve) => setTimeout(resolve, 5000));
if (page.isClosed()) {
throw new Error('Navigation frame was detached');
}
return navigationAttempt(attempt + 1);
}
return false;
}
};

navigationSuccess = await navigationAttempt(0);

if (!navigationSuccess) {
throw new Error('Navigation failed after retries');
}

if (page.isClosed()) {
throw new Error('Page was closed unexpectedly');
}
await page.locator('.item-left .item .title .button').click();

response = await page.content();
} catch (error) {
logger.error('Page Error when visiting /gov/hangzhou/zwfw:', error);
} finally {
if (!page.isClosed()) {
await page.close();
}
}
return response || '';
} catch (error) {
logger.error('Error when visiting /gov/hangzhou/zwfw:', error);
}
return '';
}
9 changes: 9 additions & 0 deletions lib/routes/gov/hangzhou/namespace.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import type { Namespace } from '@/types';

export const namespace: Namespace = {
name: "Hangzhou People's Government",
url: 'hangzhou.gov.cn',
zh: {
name: '杭州市人民政府',
},
};
106 changes: 106 additions & 0 deletions lib/routes/gov/hangzhou/zwfw.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import { Route } from '@/types';
import { load } from 'cheerio';
import puppeteer from '@/utils/puppeteer';
import ofetch from '@/utils/ofetch';
import cache from '@/utils/cache';
import got from '@/utils/got';
import { parseDate } from '@/utils/parse-date';
import { zjzwfwCrawler } from './crawler';
import timezone from '@/utils/timezone';

export const route: Route = {
path: '/hangzhou/zwfw',
categories: ['government'],
example: '/gov/hangzhou/zwfw',
features: {
requireConfig: false,
requirePuppeteer: true,
antiCrawler: true,
supportBT: false,
supportPodcast: false,
supportScihub: false,
},
radar: [
{
source: ['hangzhou.gov.cn/col/col1256349/index.html'],
},
],
name: '政务服务公开',
maintainers: ['Flynn Cao'],
handler,
url: 'hangzhou.gov.cn/col/col1256349/index.html',
};

async function handler() {
const host = 'https://www.hangzhou.gov.cn/col/col1256349/index.html';
const response = await ofetch(host);

const browser = await puppeteer({ stealth: true });
const link = host;
const formatted = response
.replace('<script type="text/xml">', '')
.replace('</script>', '')
.replaceAll('<recordset>', '')
.replaceAll('</recordset>', '')
.replaceAll('<record>', '')
.replaceAll('</record>', '')
.replaceAll('<![CDATA[', '')
.replaceAll(']]>', '');
const $ = load(formatted);

const list = $('li.clearfix')
.toArray()
.map((item) => {
item = $(item);
const title = item.find('a').first().text();
const time = timezone(parseDate(item.find('span').first().text(), 'YYYY-MM-DD'), 8);
const a = item.find('a').first().attr('href');
const fullUrl = new URL(a, host).href;

return {
title,
link: fullUrl,
pubDate: time,
};
});
const items = await Promise.all(
list.map((item) =>
cache.tryGet(item.link, async () => {
const host = new URL(item.link).hostname;
if (host === 'www.zjzwfw.gov.cn') {
// 来源为浙江政务服务网
const content = await zjzwfwCrawler(item, browser);
const $ = load(content);
item.description = $('.content .item').html();
item.author = '浙江政务服务网';
item.category = $('meta[name="ColumnType"]').attr('content');
} else {
// 其他正常抓取
const response = await got(item.link);
const $ = load(response.data);
if (host === 'police.hangzhou.gov.cn') {
// 来源为杭州市公安局
item.description = $('.art-content .wz_con_content').html();
item.author = $('meta[name="ContentSource"]').attr('content');
item.category = $('meta[name="ColumnType"]').attr('content');
} else {
// 缺省:来源为杭州市政府网
item.description = $('.article').html();
item.author = $('meta[name="ContentSource"]').attr('content');
item.category = $('meta[name="ColumnType"]').attr('content');
}
}
item.pubDate = $('meta[name="PubDate"]').length ? timezone(parseDate($('meta[name="PubDate"]').attr('content') as string, 'YYYY-MM-DD HH:mm'), 8) : item.pubDate;
return item;
})
)
);

await browser.close();
return {
allowEmpty: true,
title: '杭州市人民政府-政务服务公开',
link,
item: items,
};
}

0 comments on commit 7645a2c

Please sign in to comment.