forked from GiveHenanAHand/henan-rescue-viz-data
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfetchData.py
47 lines (39 loc) · 1.98 KB
/
fetchData.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from dataFetcher.dataProcessUtils import *
from dataFetcher.baiduApiWrapper import *
from dataFetcher.weiboDataFetcher import *
from dataSyncer.syncWithFeishu import *
import argparse
import shutil
import os
parser = argparse.ArgumentParser(description='Scraper for henan flood-related weibo')
parser.add_argument('--cache', type=str, default='latest_data', help='cache numpy array')
parser.add_argument('--json_output', type=str, default='final.json', help='output json file')
parser.add_argument('--csv_output', type=str, default='final.csv', help='output json file')
parser.add_argument('--api_key', type=str, default=None, help='baidu API key')
parser.add_argument('--api_secret', type=str, default=None, help='baidu API secret')
parser.add_argument('--feishu_app_id', type=str, default=None, help='Feishu API key')
parser.add_argument('--feishu_app_secret', type=str, default=None, help='Feishu API secret')
def backup_if_exist(path, backup_folder='backup'):
if not os.path.isdir(backup_folder):
os.makedirs(backup_folder)
now = datetime.datetime.now()
if os.path.exists(path+".npy"):
shutil.copy(path+".npy", backup_folder+'/'+path + "." + now.strftime('%Y%m%d%H%M%S') + ".old")
def fetch_n_export(args):
Weibo_Fetcher = WeiboDataFetcher()
Api_Wrapper = BaiduAPIWrapper(args.api_key, args.api_secret)
for i in tqdm(range(50)):
Weibo_Fetcher.fetch_weibo_data(args.cache, "河南暴雨互助", page=i, stop_if_repeat=False)
Api_Wrapper.extract_addresses_from_data(args.cache)
data_date_valid(args.cache)
data_link_valid(args.cache)
data_content_filter(args.cache)
data_export_csv(args.cache, args.csv_output)
feishuSyncer = FeishuSyncer(args.feishu_app_id, args.feishu_app_secret)
feishuSyncer.startSync(local_csv=args.csv_output,
save_local_path=args.json_output)
if __name__ == "__main__":
args = parser.parse_args()
while True:
backup_if_exist(args.cache)
fetch_n_export(args)