From 01e1295ad57a6a69e84117b1a655736ed7875ff8 Mon Sep 17 00:00:00 2001 From: Alex Bair Date: Thu, 24 Oct 2024 21:01:08 -0400 Subject: [PATCH] source-zendesk-support: add logging & ticket_comments checkpoint For users with a lot of data, the `ticket_comments` stream can take a while to backfill, especially given the 30 requests/60 seconds rate limit for this endpoint. Checkpointing that stream will help if that backfill complete over connector restarts, and it'll provide an indicator in the UI that the connector is still processing records. I've also added logging to catch if the `ticket_comments` cursor does not increase & to log the status codes for non-200 responses to help troubleshoot if the connector is stuck making the same failing request. --- .../source_zendesk_support/streams.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/source-zendesk-support/source_zendesk_support/streams.py b/source-zendesk-support/source_zendesk_support/streams.py index 3deffda19..c6908e581 100644 --- a/source-zendesk-support/source_zendesk_support/streams.py +++ b/source-zendesk-support/source_zendesk_support/streams.py @@ -209,6 +209,8 @@ def should_retry(self, response: requests.Response) -> bool: self.logger.error(f"Skipping stream {self.name}: Check permissions, error message: {error}.") setattr(self, "raise_on_http_errors", False) return False + if response.status_code != 200: + self.logger.warning(f"Received a {response.status_code} response.") return super().should_retry(response) @@ -671,6 +673,8 @@ class SourceZendeskSupportTicketEventsExportStream(SourceZendeskIncrementalExpor @ param event_type : specific event_type to check ["Audit", "Change", "Comment", etc] """ + state_checkpoint_interval = 1000 + cursor_field = "created_at" response_list_name: str = "ticket_events" response_target_entity: str = "child_events" @@ -707,6 +711,14 @@ def request_params( Otherwise, returns the start time param from the stream's state/config and the sideload param. """ if next_page_token: + # Check if the next_page_token's start time is the the same or earlier than + # the previous request's / checkpointed state's start time. + next_page_start_time = int(next_page_token.get("start_time")) + checkpointed_start_time = self.check_stream_state(stream_state=stream_state) + + if next_page_start_time <= checkpointed_start_time: + self.logger.warning(f"start_time query param {next_page_start_time} is less than or equal to the previous start_time param {checkpointed_start_time}. Check if the stream is stuck in a loop.") + return next_page_token start_time = self.check_stream_state(stream_state=stream_state)