Skip to content

Commit

Permalink
Handle pyppeteer crashing on import. Switch to different library soon.
Browse files Browse the repository at this point in the history
  • Loading branch information
akneni committed Dec 7, 2023
1 parent b9c87d5 commit b5b09e9
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 2 deletions.
15 changes: 14 additions & 1 deletion pygrab/js_scraper.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
from .warning import Warning
from .tor import Tor
from pyppeteer import launch as _launch

# Include this because pyppeteer is often a buggy library and will often crash on import
# Switch to new library soon
try:
from pyppeteer import launch as _launch
pyppeteer_working = True
except Exception:
pyppeteer_working = False


import asyncio as _asyncio
import atexit as _atexit
import nest_asyncio as _nest_asyncio
Expand Down Expand Up @@ -60,6 +69,8 @@ async def __pyppeteer_kernel(cls, url, use_tor:bool=None, timeout:int=20):
@classmethod
def pyppeteer_get(cls, url, use_tor:bool=None, timeout:int=20):
# Test it
if not pyppeteer_working:
return None
loop = _asyncio.get_event_loop()
result = loop.run_until_complete(cls.__pyppeteer_kernel(url, use_tor, timeout))
return result
Expand Down Expand Up @@ -90,4 +101,6 @@ async def scrape_all(cls, urls, use_tor=None, timeout:int=20) -> dict:

@classmethod
def pyppeteer_get_async(cls, urls, use_tor=None, timeout:int=20) -> dict:
if not pyppeteer_working:
return None
return _asyncio.run(cls.scrape_all(urls, use_tor=use_tor, timeout=timeout))
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

setup(
name='pygrab',
version='2.2.2',
version='2.2.3',
description='A secure python library for fetching data with async, JS, and Tor support',
long_description=long_description,
long_description_content_type='text/markdown',
Expand Down

0 comments on commit b5b09e9

Please sign in to comment.