From b5b09e94c55bfe5d6ae7e245306f60515e4203de Mon Sep 17 00:00:00 2001 From: Anish Kanthamneni Date: Thu, 7 Dec 2023 12:05:31 -0500 Subject: [PATCH] Handle pyppeteer crashing on import. Switch to different library soon. --- pygrab/js_scraper.py | 15 ++++++++++++++- setup.py | 2 +- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/pygrab/js_scraper.py b/pygrab/js_scraper.py index ceacb48..6e84594 100644 --- a/pygrab/js_scraper.py +++ b/pygrab/js_scraper.py @@ -1,6 +1,15 @@ from .warning import Warning from .tor import Tor -from pyppeteer import launch as _launch + +# Include this because pyppeteer is often a buggy library and will often crash on import +# Switch to new library soon +try: + from pyppeteer import launch as _launch + pyppeteer_working = True +except Exception: + pyppeteer_working = False + + import asyncio as _asyncio import atexit as _atexit import nest_asyncio as _nest_asyncio @@ -60,6 +69,8 @@ async def __pyppeteer_kernel(cls, url, use_tor:bool=None, timeout:int=20): @classmethod def pyppeteer_get(cls, url, use_tor:bool=None, timeout:int=20): # Test it + if not pyppeteer_working: + return None loop = _asyncio.get_event_loop() result = loop.run_until_complete(cls.__pyppeteer_kernel(url, use_tor, timeout)) return result @@ -90,4 +101,6 @@ async def scrape_all(cls, urls, use_tor=None, timeout:int=20) -> dict: @classmethod def pyppeteer_get_async(cls, urls, use_tor=None, timeout:int=20) -> dict: + if not pyppeteer_working: + return None return _asyncio.run(cls.scrape_all(urls, use_tor=use_tor, timeout=timeout)) diff --git a/setup.py b/setup.py index 5e33779..f4a0ca3 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ setup( name='pygrab', - version='2.2.2', + version='2.2.3', description='A secure python library for fetching data with async, JS, and Tor support', long_description=long_description, long_description_content_type='text/markdown',