diff --git a/baidupcsleecher/settings.py b/baidupcsleecher/settings.py index 4401a3e..f878352 100644 --- a/baidupcsleecher/settings.py +++ b/baidupcsleecher/settings.py @@ -58,6 +58,7 @@ RUNNER_SLEEP_SECONDS = int(getenv("RUNNER_SLEEP_SECONDS", "5")) SAMPLE_SIZE = int(getenv("SAMPLE_SIZE", "10240")) FULL_DOWNLOAD_IMMEDIATELY = bool(int(getenv("FULL_DOWNLOAD_IMMEDIATELY", 0))) +RETRY_TIMES_LIMIT = int(getenv("RETRY_TIMES_LIMIT", 5)) # shared link transfer policy: always, if_not_present TRANSFER_POLICY = getenv("TRANSFER_POLICY", "if_not_present") PAN_BAIDU_BDUSS = getenv("PAN_BAIDU_BDUSS", "") diff --git a/entrypoint.sh b/entrypoint.sh index 72662f9..45391bf 100755 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -11,6 +11,7 @@ echo python manage.py runtransfer & python manage.py runsamplingdownloader & python manage.py runleecher & +python manage.py runresume & echo wait -n diff --git a/task/management/commands/runresume.py b/task/management/commands/runresume.py new file mode 100644 index 0000000..eb61d4b --- /dev/null +++ b/task/management/commands/runresume.py @@ -0,0 +1,37 @@ +import logging +from time import sleep + +from django.conf import settings +from django.core.management.base import BaseCommand + +from task.models import Task + +logger = logging.getLogger("runresume") + + +class Command(BaseCommand): + help = "resume failed but recoverable tasks." + + def add_arguments(self, parser): + parser.add_argument( + "--once", + action="store_true", + help="resume all failed tasks once and exit immediately.", + ) + + def resume_once(self): + for task in Task.filter_failed(): + if not task.recoverable: + continue + if task.retry_times >= settings.RETRY_TIMES_LIMIT: + continue + task.schedule_resume() + + def handle(self, *args, **options): + logger.info("auto resume failed but recoverable tasks.") + while True: + self.resume_once() + if options["once"]: + logger.info("auto resume tasks once and exit now.") + return + sleep(settings.RUNNER_SLEEP_SECONDS) diff --git a/task/models.py b/task/models.py index cb979e2..1c293e0 100644 --- a/task/models.py +++ b/task/models.py @@ -222,22 +222,25 @@ def get_resume_method_name(self): "downloading_files": "restart_downloading", } step_name = self.get_current_step() - return resume_methods[step_name] + if step_name: + return resume_methods[step_name] def inc_retry_times(self): self.retry_times += 1 self.save() - def resume(self): + def schedule_resume(self): if not self.failed: return - method = getattr(self, self.get_resume_method_name()) - method() + method_name = self.get_resume_method_name() + if method_name: + method = getattr(self, method_name) + method() @classmethod def schedule_resume_failed(cls): for task in cls.filter_failed(): - task.resume() + task.schedule_resume() @property def done(self): @@ -267,4 +270,5 @@ def recoverable(self): if "error_code: 105," in e or "啊哦,链接错误没找到文件,请打开正确的分享链接" in e: return False + # assume task is not recoverable by default to avoid flood requests return False diff --git a/task/tests/test_commands.py b/task/tests/test_commands.py new file mode 100644 index 0000000..cb61f18 --- /dev/null +++ b/task/tests/test_commands.py @@ -0,0 +1,64 @@ +from django.conf import settings +from django.core.management import call_command +from django.test import TestCase + +from task.management.commands.runresume import Command as ResumeCommand +from task.models import Task + + +class ResumeCommandTest(TestCase): + def setUp(self): + self.task = Task.objects.create(shared_id="foo", shared_password="foo") + self.task.status = Task.Status.SAMPLING_DOWNLOADED + self.task.started_at = self.task.created_at + self.task.transfer_completed_at = self.task.created_at + self.task.sample_downloaded_at = self.task.created_at + self.task.failed = True + self.task.message = "BaiduPCS._request" + self.task.save() + + def test_resume(self): + assert self.task.retry_times == 0 + + ResumeCommand().resume_once() + + task = Task.objects.get(pk=self.task.id) + assert task.status == Task.Status.TRANSFERRED + assert not task.failed + assert task.retry_times == 1 + + def test_resume_not_recoverable_task(self): + self.task.message = "error_code: 105," + self.task.save() + assert self.task.retry_times == 0 + + ResumeCommand().resume_once() + + task = Task.objects.get(pk=self.task.id) + assert task.retry_times == 0 + assert task.failed + assert task.status == Task.Status.SAMPLING_DOWNLOADED + + def test_resume_too_many_times(self): + self.task.retry_times = settings.RETRY_TIMES_LIMIT + 1 + self.task.save() + assert self.task.retry_times == settings.RETRY_TIMES_LIMIT + 1 + assert self.task.get_resume_method_name() == "restart_downloading" + + ResumeCommand().resume_once() + + task = Task.objects.get(pk=self.task.id) + assert task.retry_times == settings.RETRY_TIMES_LIMIT + 1 + assert task.get_resume_method_name() == "restart_downloading" + assert task.failed + assert task.status == Task.Status.SAMPLING_DOWNLOADED + + def test_run_command(self): + assert self.task.retry_times == 0 + + call_command("runresume", "--once") + + task = Task.objects.get(pk=self.task.id) + assert task.status == Task.Status.TRANSFERRED + assert not task.failed + assert task.retry_times == 1 diff --git a/task/tests/test_models.py b/task/tests/test_models.py index d5d7dde..2fbc2b5 100644 --- a/task/tests/test_models.py +++ b/task/tests/test_models.py @@ -89,6 +89,7 @@ def test_steps1_failed(self): ] assert self.task.get_current_step() == "waiting_assign" assert not self.task.done + assert self.task.get_resume_method_name() == "restart" def test_steps2_failed(self): self.task.status = self.task.Status.STARTED @@ -103,6 +104,7 @@ def test_steps2_failed(self): ] assert self.task.get_current_step() == "transferring" assert not self.task.done + assert self.task.get_resume_method_name() == "restart" def test_steps3_failed(self): self.task.status = self.task.Status.TRANSFERRED @@ -118,6 +120,7 @@ def test_steps3_failed(self): ] assert self.task.get_current_step() == "downloading_samplings" assert not self.task.done + assert self.task.get_resume_method_name() == "restart_downloading" def test_steps4_failed(self): self.task.status = self.task.Status.SAMPLING_DOWNLOADED @@ -134,6 +137,25 @@ def test_steps4_failed(self): ] assert self.task.get_current_step() == "downloading_files" assert not self.task.done + assert self.task.get_resume_method_name() == "restart_downloading" + + def test_steps5_failed_but_yes_it_should_not_happend(self): + self.task.status = self.task.Status.SAMPLING_DOWNLOADED + self.task.started_at = self.task.created_at + self.task.transfer_completed_at = self.task.created_at + self.task.sample_downloaded_at = self.task.created_at + self.task.full_downloaded_at = self.task.created_at + self.task.failed = True + + assert list(self.task.get_steps()) == [ + ("waiting_assign", "done"), + ("transferring", "done"), + ("downloading_samplings", "done"), + ("downloading_files", "done"), + ] + assert self.task.get_current_step() is None + assert not self.task.done + assert self.task.get_resume_method_name() is None def test_filter_failed_but_nothing(self): assert list(Task.filter_failed()) == [] diff --git a/task/views.py b/task/views.py index 945d343..c844fb6 100644 --- a/task/views.py +++ b/task/views.py @@ -114,7 +114,7 @@ def restart(self, request, pk=None): @action(methods=["post"], detail=True) def resume(self, request, pk=None): task = self.get_object() - task.resume() + task.schedule_resume() return Response({"status": task.status}) def get_serializer(self, *args, **kwargs):