Skip to content

Commit

Permalink
Fetching pages - Custom browser endpoints should not have default pro…
Browse files Browse the repository at this point in the history
…xy info added
  • Loading branch information
dgtlmoon committed Feb 12, 2024
1 parent 4163030 commit ccb42bc
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 13 deletions.
7 changes: 7 additions & 0 deletions changedetectionio/content_fetchers/exceptions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,13 @@ def __init__(self, msg):
logger.error(f"Browser connection error {msg}")
return

class BrowserFetchTimedOut(Exception):
msg = ''
def __init__(self, msg):
self.msg = msg
logger.error(f"Browser processing took too long - {msg}")
return

class BrowserStepsStepException(Exception):
def __init__(self, step_n, original_e):
self.step_n = step_n
Expand Down
29 changes: 18 additions & 11 deletions changedetectionio/content_fetchers/puppeteer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from loguru import logger
from changedetectionio.content_fetchers.base import Fetcher
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable, BrowserConnectError
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, BrowserFetchTimedOut, BrowserConnectError


class fetcher(Fetcher):
Expand Down Expand Up @@ -221,14 +221,21 @@ async def main(self, **kwargs):
def run(self, url, timeout, request_headers, request_body, request_method, ignore_status_codes=False,
current_include_filters=None, is_binary=False):

#@todo make update_worker async which could run any of these content_fetchers within memory and time constraints
max_time = os.getenv('PUPPETEER_MAX_PROCESSING_TIMEOUT_SECONDS', 180)

# This will work in 3.10 but not >= 3.11 because 3.11 wants tasks only
asyncio.run(self.main(
url=url,
timeout=timeout,
request_headers=request_headers,
request_body=request_body,
request_method=request_method,
ignore_status_codes=ignore_status_codes,
current_include_filters=current_include_filters,
is_binary=is_binary
))
try:
asyncio.run(asyncio.wait_for(self.main(
url=url,
timeout=timeout,
request_headers=request_headers,
request_body=request_body,
request_method=request_method,
ignore_status_codes=ignore_status_codes,
current_include_filters=current_include_filters,
is_binary=is_binary
), timeout=max_time))
except asyncio.TimeoutError:
raise(BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds."))

8 changes: 6 additions & 2 deletions changedetectionio/processors/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,12 @@ def call_browser(self):

proxy_url = None
if preferred_proxy_id:
proxy_url = self.datastore.proxy_list.get(preferred_proxy_id).get('url')
logger.debug(f"Selected proxy key '{preferred_proxy_id}' as proxy URL '{proxy_url}' for {url}")
# Custom browser endpoints should not have a proxy added
if not preferred_proxy_id.startswith('ui-'):
proxy_url = self.datastore.proxy_list.get(preferred_proxy_id).get('url')
logger.debug(f"Selected proxy key '{preferred_proxy_id}' as proxy URL '{proxy_url}' for {url}")
else:
logger.debug(f"Skipping adding proxy data when custom Browser endpoint is specified.")

# Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need.
# When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc)
Expand Down
6 changes: 6 additions & 0 deletions changedetectionio/update_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,12 @@ def run(self):
}
)
process_changedetection_results = False
except content_fetchers.exceptions.BrowserFetchTimedOut as e:
self.datastore.update_watch(uuid=uuid,
update_obj={'last_error': e.msg
}
)
process_changedetection_results = False
except content_fetchers.exceptions.BrowserStepsStepException as e:

if not self.datastore.data['watching'].get(uuid):
Expand Down

0 comments on commit ccb42bc

Please sign in to comment.