From 2acf2a830bdf666f4dbf0de9bd7d1720b8a20ce4 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Mon, 18 Nov 2024 12:59:18 +0500 Subject: [PATCH] Remove custom priorities from the job posting spider. --- tests/test_job_posting.py | 4 ---- zyte_spider_templates/spiders/job_posting.py | 24 +------------------- 2 files changed, 1 insertion(+), 27 deletions(-) diff --git a/tests/test_job_posting.py b/tests/test_job_posting.py index a3ab29e..5ba3135 100644 --- a/tests/test_job_posting.py +++ b/tests/test_job_posting.py @@ -93,7 +93,6 @@ def test_crawl(): assert requests[1].callback == spider.parse_job_posting assert requests[2].url == nextpage_url assert requests[2].callback == spider.parse_navigation - assert [request.priority for request in requests] == [199, 183, 100] # nextpage navigation = JobPostingNavigation.from_dict( @@ -118,7 +117,6 @@ def test_crawl(): assert requests[0].callback == spider.parse_job_posting assert requests[1].url == item_urls[1] assert requests[1].callback == spider.parse_job_posting - assert [request.priority for request in requests] == [199, 183] def test_crawl_strategy_direct_item(): @@ -482,7 +480,6 @@ def test_get_nextpage_request(): scrapy_request = spider.get_nextpage_request(request) assert isinstance(scrapy_request, scrapy.Request) assert scrapy_request.callback == parse_navigation - assert scrapy_request.priority == 100 assert scrapy_request.meta == { "page_params": {}, "crawling_logs": {"name": "", "probability": None, "page_type": "nextPage"}, @@ -501,7 +498,6 @@ def test_get_parse_navigation_request(): scrapy_request = spider.get_parse_navigation_request(request) assert isinstance(scrapy_request, scrapy.Request) assert scrapy_request.callback == parse_navigation - assert scrapy_request.priority == 0 assert scrapy_request.meta == { "page_params": {}, "crawling_logs": { diff --git a/zyte_spider_templates/spiders/job_posting.py b/zyte_spider_templates/spiders/job_posting.py index d469a24..87b4cd4 100644 --- a/zyte_spider_templates/spiders/job_posting.py +++ b/zyte_spider_templates/spiders/job_posting.py @@ -195,31 +195,17 @@ def parse_job_posting( f"less than threshold of 0.1:\n{job_posting}" ) - @staticmethod - def get_parse_navigation_request_priority( - request: Union[ProbabilityRequest, Request] - ) -> int: - if ( - not hasattr(request, "metadata") - or not request.metadata - or request.metadata.probability is None - ): - return 0 - return int(100 * request.metadata.probability) - def get_parse_navigation_request( self, request: Union[ProbabilityRequest, Request], callback: Optional[Callable] = None, page_params: Optional[Dict[str, Any]] = None, - priority: Optional[int] = None, page_type: str = "jobPostingNavigation", ) -> Request: callback = callback or self.parse_navigation return request.to_scrapy( callback=callback, - priority=priority or self.get_parse_navigation_request_priority(request), meta={ "page_params": page_params or {}, "crawling_logs": { @@ -237,26 +223,18 @@ def get_nextpage_request( page_params: Optional[Dict[str, Any]] = None, ): return self.get_parse_navigation_request( - request, callback, page_params, self._NEXT_PAGE_PRIORITY, "nextPage" + request, callback, page_params, "nextPage" ) - def get_parse_job_posting_request_priority( - self, request: ProbabilityRequest - ) -> int: - probability = request.get_probability() or 0 - return int(100 * probability) + self._NEXT_PAGE_PRIORITY - def get_parse_job_posting_request( self, request: ProbabilityRequest, callback: Optional[Callable] = None ) -> Request: callback = callback or self.parse_job_posting - priority = self.get_parse_job_posting_request_priority(request) probability = request.get_probability() scrapy_request = request.to_scrapy( callback=callback, - priority=priority, meta={ "crawling_logs": { "name": request.name,