Skip to content

Latest commit

 

History

History
184 lines (128 loc) · 6.44 KB

工具.定时任务.md

File metadata and controls

184 lines (128 loc) · 6.44 KB

定时任务

import schedule
# import time


def run():
    print("I'm doing something...")


schedule.every(3).seconds.do(run)
# schedule.every(1).minutes.do(run)    # 每隔1分钟执行一次任务
# schedule.every().hour.do(run)         # 每隔一小时执行一次任务
# schedule.every().day.at("16:25").do(run)  # 每天的10:30执行一次任务
# schedule.every().monday.do(run)  # 每周一的这个时候执行一次任务
# schedule.every().wednesday.at("16:24").do(run)  # 每周三13:15执行一次任务

while True:
    schedule.run_pending()  # run_pending:运行所有可以运行的任务
I'm doing something...
I'm doing something...
I'm doing something...
I'm doing something...



---------------------------------------------------------------------------

KeyboardInterrupt                         Traceback (most recent call last)

/var/folders/w6/9k4dzqlj617f06dfby_vk1pr0000gn/T/ipykernel_54398/601997997.py in <module>
     15 
     16 while True:
---> 17     schedule.run_pending()  # run_pending:运行所有可以运行的任务


/usr/local/anaconda3/envs/py39/lib/python3.9/site-packages/schedule/__init__.py in run_pending()
    778     :data:`default scheduler instance <default_scheduler>`.
    779     """
--> 780     default_scheduler.run_pending()
    781 
    782 


/usr/local/anaconda3/envs/py39/lib/python3.9/site-packages/schedule/__init__.py in run_pending(self)
     97         """
     98         runnable_jobs = (job for job in self.jobs if job.should_run)
---> 99         for job in sorted(runnable_jobs):
    100             self._run_job(job)
    101 


KeyboardInterrupt: 
import schedule
import pandas as pd
from datetime import datetime
import logging

logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s - %(levelname)s: %(message)s')
count = 0

def get_content():
    global count   # 全局变量count
    print('----------- 正在爬取数据 -------------')
    url = 'https://s.weibo.com/top/summary?cate=realtimehot&sudaref=s.weibo.com&display=0&retcode=6102'
    df = pd.read_html(url)[0][1:11][['序号', '关键词']]   # 获取热搜前10
    time_ = datetime.now().strftime("%Y/%m/%d %H:%M")     # 获取当前时间
    df['序号'] = df['序号'].apply(int)
    df['热度'] = df['关键词'].str.split('  ', expand=True)[1]
    df['关键词'] = df['关键词'].str.split('  ', expand=True)[0]
    df['时间'] = [time_] * len(df['序号'])
    if count == 0:
        df.to_csv('datas.csv', mode='a+', index=False)
        count += 1
    else:
        df.to_csv('datas.csv', mode='a+', index=False, header=False)


# 定时爬虫
schedule.every(1).minutes.do(get_content)

while True:
    schedule.run_pending()
----------- 正在爬取数据 -------------



---------------------------------------------------------------------------

ImportError                               Traceback (most recent call last)

/var/folders/w6/9k4dzqlj617f06dfby_vk1pr0000gn/T/ipykernel_46195/500178070.py in <module>
     29 
     30 while True:
---> 31     schedule.run_pending()


/usr/local/anaconda3/envs/py39/lib/python3.9/site-packages/schedule/__init__.py in run_pending()
    778     :data:`default scheduler instance <default_scheduler>`.
    779     """
--> 780     default_scheduler.run_pending()
    781 
    782 


/usr/local/anaconda3/envs/py39/lib/python3.9/site-packages/schedule/__init__.py in run_pending(self)
     98         runnable_jobs = (job for job in self.jobs if job.should_run)
     99         for job in sorted(runnable_jobs):
--> 100             self._run_job(job)
    101 
    102     def run_all(self, delay_seconds: int = 0) -> None:


/usr/local/anaconda3/envs/py39/lib/python3.9/site-packages/schedule/__init__.py in _run_job(self, job)
    170 
    171     def _run_job(self, job: "Job") -> None:
--> 172         ret = job.run()
    173         if isinstance(ret, CancelJob) or ret is CancelJob:
    174             self.cancel_job(job)


/usr/local/anaconda3/envs/py39/lib/python3.9/site-packages/schedule/__init__.py in run(self)
    659 
    660         logger.debug("Running job %s", self)
--> 661         ret = self.job_func()
    662         self.last_run = datetime.datetime.now()
    663         self._schedule_next_run()


/var/folders/w6/9k4dzqlj617f06dfby_vk1pr0000gn/T/ipykernel_46195/500178070.py in get_content()
     12     print('----------- 正在爬取数据 -------------')
     13     url = 'https://s.weibo.com/top/summary?cate=realtimehot&sudaref=s.weibo.com&display=0&retcode=6102'
---> 14     df = pd.read_html(url)[0][1:11][['序号', '关键词']]   # 获取热搜前10
     15     time_ = datetime.now().strftime("%Y/%m/%d %H:%M")     # 获取当前时间
     16     df['序号'] = df['序号'].apply(int)


/usr/local/anaconda3/envs/py39/lib/python3.9/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
    309                     stacklevel=stacklevel,
    310                 )
--> 311             return func(*args, **kwargs)
    312 
    313         return wrapper


/usr/local/anaconda3/envs/py39/lib/python3.9/site-packages/pandas/io/html.py in read_html(io, match, flavor, header, index_col, skiprows, attrs, parse_dates, thousands, encoding, decimal, converters, na_values, keep_default_na, displayed_only)
   1096     io = stringify_path(io)
   1097 
-> 1098     return _parse(
   1099         flavor=flavor,
   1100         io=io,


/usr/local/anaconda3/envs/py39/lib/python3.9/site-packages/pandas/io/html.py in _parse(flavor, io, match, attrs, encoding, displayed_only, **kwargs)
    900     retained = None
    901     for flav in flavor:
--> 902         parser = _parser_dispatch(flav)
    903         p = parser(io, compiled_match, attrs, encoding, displayed_only)
    904 


/usr/local/anaconda3/envs/py39/lib/python3.9/site-packages/pandas/io/html.py in _parser_dispatch(flavor)
    849     if flavor in ("bs4", "html5lib"):
    850         if not _HAS_HTML5LIB:
--> 851             raise ImportError("html5lib not found, please install it")
    852         if not _HAS_BS4:
    853             raise ImportError("BeautifulSoup4 (bs4) not found, please install it")


ImportError: html5lib not found, please install it