Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: avoid memory leaks in browsercontext #290

Merged
merged 1 commit into from
Nov 2, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 38 additions & 41 deletions src/lootscraper/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,56 +263,53 @@ async def scrape_worker(
task_queue: asyncio.Queue[Type[Scraper]],
telegram_queue: asyncio.Queue[int],
) -> None:
async with AsyncExitStack() as stack:
# Check the "global" variable (set on import) to see if we can use a
# virtual display
if use_virtual_display:
stack.enter_context(Xvfb())

# Use one single browser instance for all scrapers
browser_context: BrowserContext = await stack.enter_async_context(
get_browser_context(),
)
run_no = 0
while True:
# This triggers when the time has come to run a scraper
scraper_class = await task_queue.get()

# Use a single database session for all worker runs
db_session = db.Session()
run_no += 1
logger.debug(f"Executing scheduled task #{run_no}.")

run_no = 0
while True:
# This triggers when the time has come to run a scraper
scraper_class = await task_queue.get()

run_no += 1
logger.debug(f"Executing scheduled task #{run_no}.")
try:
async with AsyncExitStack() as stack:
# Check the "global" variable (set on import) to see if we can use a
# virtual display. If so, we create one and add it to the stack
if use_virtual_display:
stack.enter_context(Xvfb())

# Use one browser instance per scraper (to avoid memory leaks)
browser_context: BrowserContext = await stack.enter_async_context(
get_browser_context(),
)

try:
scraper_instance = scraper_class(context=browser_context)
scraped_offers = await scraper_instance.scrape()
await process_new_offers(
db,
browser_context,
db_session,
db.Session(),
scraped_offers,
)

if Config.get().generate_feed:
await action_generate_feed(db)
else:
logging.info("Skipping feed generation because it is disabled.")
if Config.get().generate_feed:
await action_generate_feed(db)
else:
logging.info("Skipping feed generation because it is disabled.")

if Config.get().telegram_bot:
await telegram_queue.put(run_no)
else:
logging.debug(
"Skipping Telegram notification because it is disabled.",
)
except OperationalError:
# We handle DB errors on a higher level
raise
except Exception as e:
# This is our catch-all. Something really unexpected occurred.
# Log it with the highest priority and continue with the
# next scheduled run when it's due.
logger.critical(e)

task_queue.task_done()
if Config.get().telegram_bot:
await telegram_queue.put(run_no)
else:
logging.debug(
"Skipping Telegram notification because it is disabled.",
)
except OperationalError:
# We handle DB errors on a higher level
raise
except Exception as e:
# This is our catch-all. Something really unexpected occurred.
# Log it with the highest priority and continue with the
# next scheduled run when it's due.
logger.critical(e)

task_queue.task_done()