Skip to content

Commit

Permalink
Fix limit in fetch_replies_service to not always limit by 5 (which al…
Browse files Browse the repository at this point in the history
…ways caused us to only do one page). Rename some variables to make purpose clearer. Return the array of all fetched uris instead of just the number we got
  • Loading branch information
sneakers-the-rat committed Oct 13, 2024
1 parent 793ccd0 commit efa50b8
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 15 deletions.
4 changes: 3 additions & 1 deletion app/services/activitypub/fetch_all_replies_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def call(collection_or_uri, allow_synchronous_requests: true, request_id: nil)
@allow_synchronous_requests = allow_synchronous_requests
@filter_by_host = false

@items = collection_items(collection_or_uri)
@items = collection_items(collection_or_uri, fetch_all: true)
@items = filtered_replies
return if @items.nil?

Expand All @@ -25,6 +25,8 @@ def filtered_replies
return if @items.nil?

# find all statuses that we *shouldn't* update the replies for, and use that as a filter
# Typically we assume this is smaller than the replies we *should* fetch,
# so we minimize the number of uris we should load here.
uris = @items.map { |item| value_or_id(item) }
dont_update = Status.where(uri: uris).shouldnt_fetch_replies.pluck(:uri)

Expand Down
8 changes: 5 additions & 3 deletions app/services/activitypub/fetch_replies_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ class ActivityPub::FetchRepliesService < BaseService

# Limit of fetched replies
MAX_REPLIES = 5
# Limit when fetching all (to prevent infinite fetch attack)
FETCH_ALL_MAX_REPLIES = 500

def call(parent_status, collection_or_uri, allow_synchronous_requests: true, request_id: nil, filter_by_host: true)
@account = parent_status.account
Expand All @@ -21,7 +23,7 @@ def call(parent_status, collection_or_uri, allow_synchronous_requests: true, req

private

def collection_items(collection_or_uri)
def collection_items(collection_or_uri, fetch_all: false)
collection = fetch_collection(collection_or_uri)
return unless collection.is_a?(Hash)

Expand All @@ -39,8 +41,8 @@ def collection_items(collection_or_uri)

all_items.concat(as_array(items))

# Quit early if we are not fetching all replies
break if all_items.size >= MAX_REPLIES
# Quit early if we are not fetching all replies or we've reached the absolute max
break if (!fetch_all && all_items.size >= MAX_REPLIES) || (all_items.size >= FETCH_ALL_MAX_REPLIES)

collection = collection['next'].present? ? fetch_collection(collection['next']) : nil
end
Expand Down
22 changes: 11 additions & 11 deletions app/workers/activitypub/fetch_all_replies_worker.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class ActivityPub::FetchAllRepliesWorker

sidekiq_options queue: 'pull', retry: 3

# Global max replies to fetch per request
# Global max replies to fetch per request (all replies, recursively)
MAX_REPLIES = 1000

def perform(parent_status_id, current_account_id = nil, options = {})
Expand All @@ -20,21 +20,21 @@ def perform(parent_status_id, current_account_id = nil, options = {})
@current_account = @current_account_id.nil? ? nil : Account.find(@current_account_id)
Rails.logger.debug { "FetchAllRepliesWorker - #{parent_status_id}: Fetching all replies for status: #{@parent_status}" }

all_replies = get_replies(@parent_status.uri, options)
got_replies = all_replies.length
until all_replies.empty? || got_replies >= MAX_REPLIES
next_reply = all_replies.pop
uris_to_fetch = get_replies(@parent_status.uri, options)
fetched_uris = uris_to_fetch
until uris_to_fetch.empty? || fetched_uris.length >= MAX_REPLIES
next_reply = uris_to_fetch.pop
next if next_reply.nil?

new_replies = get_replies(next_reply, options)
next if new_replies.nil?
new_reply_uris = get_replies(next_reply, options)
next if new_reply_uris.nil?

got_replies += new_replies.length
all_replies.concat(new_replies)
uris_to_fetch.concat(new_reply_uris)
fetched_uris.concat(new_reply_uris)
end

Rails.logger.debug { "FetchAllRepliesWorker - #{parent_status_id}: fetched #{got_replies} replies" }
got_replies
Rails.logger.debug { "FetchAllRepliesWorker - #{parent_status_id}: fetched #{fetched_uris.length} replies" }
fetched_uris
end

private
Expand Down

0 comments on commit efa50b8

Please sign in to comment.