Skip to content

Commit

Permalink
Add batching to reanalyze_all.
Browse files Browse the repository at this point in the history
It currently times out in staging.
  • Loading branch information
n-rook committed Nov 12, 2023
1 parent 56ece0d commit 81946a4
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,15 @@ <h2>Replays to reanalyze:</h2>
<li>None!</li>
{% endfor %}
</ul>
<form method="POST">
{% if replays %}
<form method="POST" action="{% url 'Replays/ReanalyzePagePost' current_token next_token %}">
{% csrf_token %}
<input type="submit" />
</form>
{% endif %}
{% if more_pages %}
<p><a href="{% url 'Replays/ReanalyzeBatch' next_token %}">Continue to next page</a></p>
{% else %}
<p>No more replays to analyze.</p>
{% endif %}
{% endblock %}
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ <h2>Reanalyzed replays:</h2>
<li>None!</li>
{% endfor %}
</ul>
<a href="{% url 'Replays/ReanalyzeBatch' end_token %}">Continue to next page</a>
{% endblock %}
16 changes: 15 additions & 1 deletion project/thscoreboard/replays/urls/replay_urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,21 @@
path("publish/<str:game_id>", create_replay.publish_replay_no_file),
path("user/<str:username>", user.user_page, name="user_page"),
path("user/<str:username>/json", user.user_page_json),
path("reanalyze_all", reanalyze_all_replays.reanalyze_all),
path(
"reanalyze_all",
reanalyze_all_replays.batch_reanalyze_preview,
name="Replays/ReanalyzeBatch",
),
path(
"reanalyze_all/<int:pagination_token>",
reanalyze_all_replays.batch_reanalyze_preview,
name="Replays/ReanalyzeBatch",
),
path(
"reanalyze_batch/<int:start_token>/<int:end_token>",
reanalyze_all_replays.reanalyze_page,
name="Replays/ReanalyzePagePost",
),
path("<str:game_id>", replay_list.game_scoreboard),
path("<str:game_id>/json", replay_list.game_scoreboard_json),
path("<str:game_id>/d<int:difficulty>", replay_list.game_scoreboard),
Expand Down
80 changes: 58 additions & 22 deletions project/thscoreboard/replays/views/reanalyze_all_replays.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@
from replays import reanalyze_replay


# The number of replays to view and reanalyze at once.
# This must be low enough that the request will never take over 30 seconds,
# to avoid timeouts.
_BATCH_SIZE = 5000


def _ShortNameForReplay(r):
return "{id} ({user}, {game})".format(
id=r.id,
Expand All @@ -18,30 +24,32 @@ def _ShortNameForReplay(r):
)


def _select_all_replays_with_files():
return models.Replay.objects.select_related("shot", "shot__game").filter(
shot__game__has_replays=True
def _select_next_replays_with_files(pagination_token, end_token=None):
q = models.Replay.objects.filter(id__gt=pagination_token)
if end_token is not None:
q.filter(id__lte=end_token)
return (
q.select_related("shot", "shot__game")
.filter(shot__game__has_replays=True)
.order_by("id")[:_BATCH_SIZE]
)


@http_decorators.require_http_methods(["GET", "HEAD", "POST"])
@http_decorators.require_http_methods(["GET", "HEAD"])
@auth_decorators.permission_required("staff", raise_exception=True)
def reanalyze_all(request):
if request.method == "POST":
return _post_reanalyze_all(request)
else:
return _get_reanalyze_all(request)


def _get_reanalyze_all(request):
# This request is likely to be extremely expensive. At some point,
# we'll want to adjust it to be cheaper.
def batch_reanalyze_preview(request, pagination_token: int = 0):
"""Preview the reanalysis of a page of replays."""

replay_links = []

# Bind r now so we can use it outside of the for loop.
# This way, we can access the last element in the query without having to
# conduct additional queries.
r = None

# To lessen the big load on the database, this view function is not
# performed in one transaction.
for r in _select_all_replays_with_files():
for r in _select_next_replays_with_files(pagination_token):
# Typically in Django we would simply pass the replay instance to the
# template engine, and define its rendering in the template. However,
# since this method could easily return a very large number of replays,
Expand All @@ -57,21 +65,47 @@ def _get_reanalyze_all(request):
}
)

if r:
next_token = r.id
else:
# Since there are no replays, the "next token" is the same as the
# current token.
next_token = pagination_token

more_pages = models.Replay.objects.filter(id__gt=next_token).exists()

context = {
"replays": replay_links,
"current_token": pagination_token,
"next_token": next_token,
"more_pages": more_pages,
}

return render(
request,
"replays/reanalyze_all.html",
{
"replays": replay_links,
},
"replays/reanalyze_batch.html",
context,
)


def _post_reanalyze_all(request):
@http_decorators.require_http_methods(["POST"])
@auth_decorators.permission_required("staff", raise_exception=True)
def reanalyze_page(request, start_token: int, end_token: int):
"""Reanalyze replays, updating their metadata from the replay file.
Args:
start_token: A pagination token; an inclusive lower bound for the set
of replays to be reanalyzed.
end_token: A pagination token; an exclusive upper bound for the set of
replays to be reanalyzed.
"""
replay_links = []

# To lessen the big load on the database, the updates are performed in many
# small transactions, not one big one.
for r in _select_all_replays_with_files():
for r in _select_next_replays_with_files(
pagination_token=start_token, end_token=end_token
):
if reanalyze_replay.DoesReplayNeedUpdate(r.id):
with transaction.atomic():
# The replay might have been deleted during this method's
Expand All @@ -88,5 +122,7 @@ def _post_reanalyze_all(request):
)

return render(
request, "replays/successfully_reanalyzed_all.html", {"replays": replay_links}
request,
"replays/successfully_reanalyzed_batch.html",
{"replays": replay_links, "end_token": end_token},
)

0 comments on commit 81946a4

Please sign in to comment.