From 8d9bf913a52375b9d648cd33d10154678822a5f8 Mon Sep 17 00:00:00 2001 From: laggardkernel Date: Tue, 20 Jul 2021 09:39:01 +0800 Subject: [PATCH] Fix info sharing between req and download mw with Request.meta The canonical way to share info between `Request` and download middleware is `Request.meta`. Custom attributes `Request.custom_attr` should be avoided, cause they may be dropped after possible serialization and de-serialization in `Scheduler`. E.g. `scrapy-redis` converts `Request` into dict with `scrapy.utils.reqpar.request_to_dict()`. Custom attribute on `Request` will be lost. --- scrapy_selenium/http.py | 13 ++++++++----- scrapy_selenium/middlewares.py | 12 ++++++------ 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/scrapy_selenium/http.py b/scrapy_selenium/http.py index cddf7bf..1280c53 100644 --- a/scrapy_selenium/http.py +++ b/scrapy_selenium/http.py @@ -24,9 +24,12 @@ def __init__(self, wait_time=None, wait_until=None, screenshot=False, script=Non """ - self.wait_time = wait_time - self.wait_until = wait_until - self.screenshot = screenshot - self.script = script - + meta = { + "wait_time": wait_time, + "wait_until": wait_until, + "screenshot": screenshot, + "script": script, + } + meta.update(kwargs.pop("meta", {})) + kwargs["meta"] = meta super().__init__(*args, **kwargs) diff --git a/scrapy_selenium/middlewares.py b/scrapy_selenium/middlewares.py index 201db2c..5754e1f 100644 --- a/scrapy_selenium/middlewares.py +++ b/scrapy_selenium/middlewares.py @@ -110,16 +110,16 @@ def process_request(self, request, spider): } ) - if request.wait_until: - WebDriverWait(self.driver, request.wait_time).until( - request.wait_until + if request.meta.get('wait_until'): + WebDriverWait(self.driver, request.meta.get('wait_time')).until( + request.meta.get('wait_until') ) - if request.screenshot: + if request.meta.get('screenshot'): request.meta['screenshot'] = self.driver.get_screenshot_as_png() - if request.script: - self.driver.execute_script(request.script) + if request.meta.get('script'): + self.driver.execute_script(request.meta.get('script')) body = str.encode(self.driver.page_source)