From d499901faaa07a48384dc8f3ae5a79de839334bc Mon Sep 17 00:00:00 2001 From: Arkadii Yakovets Date: Mon, 11 Dec 2023 09:50:43 -0800 Subject: [PATCH] Add a prototype of Sample::developmental_stage backfill script --- .../migrations/0075_sample_last_refreshed.py | 18 ++++++++ common/data_refinery_common/models/sample.py | 4 ++ .../commands/refresh_sample_metadata.py | 43 +++++++++++++++++++ 3 files changed, 65 insertions(+) create mode 100644 common/data_refinery_common/migrations/0075_sample_last_refreshed.py create mode 100644 foreman/data_refinery_foreman/foreman/management/commands/refresh_sample_metadata.py diff --git a/common/data_refinery_common/migrations/0075_sample_last_refreshed.py b/common/data_refinery_common/migrations/0075_sample_last_refreshed.py new file mode 100644 index 000000000..f4f2d2289 --- /dev/null +++ b/common/data_refinery_common/migrations/0075_sample_last_refreshed.py @@ -0,0 +1,18 @@ +# Generated by Django 3.2.18 on 2023-12-08 00:45 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("data_refinery_common", "0074_sample_developmental_stage"), + ] + + operations = [ + migrations.AddField( + model_name="sample", + name="last_refreshed", + field=models.DateTimeField(auto_now=True, null=True), + ), + ] diff --git a/common/data_refinery_common/models/sample.py b/common/data_refinery_common/models/sample.py index 9d2152496..1ae132e82 100644 --- a/common/data_refinery_common/models/sample.py +++ b/common/data_refinery_common/models/sample.py @@ -94,6 +94,10 @@ def __str__(self): created_at = models.DateTimeField(editable=False, default=timezone.now) last_modified = models.DateTimeField(default=timezone.now) + # Auxiliary field for tracking latest metadata update time. + # Originally added to support Sample::developmental_stage values backfilling. + last_refreshed = models.DateTimeField(auto_now=True, null=True) + def save(self, *args, **kwargs): """On save, update timestamps""" current_time = timezone.now() diff --git a/foreman/data_refinery_foreman/foreman/management/commands/refresh_sample_metadata.py b/foreman/data_refinery_foreman/foreman/management/commands/refresh_sample_metadata.py new file mode 100644 index 000000000..c69f6ee30 --- /dev/null +++ b/foreman/data_refinery_foreman/foreman/management/commands/refresh_sample_metadata.py @@ -0,0 +1,43 @@ +import time + +from django.core.management.base import BaseCommand + +from data_refinery_common.logging import get_and_configure_logger +from data_refinery_common.models import Sample +from data_refinery_foreman.surveyor.sra import SraSurveyor + +logger = get_and_configure_logger(__name__) + + +class Command(BaseCommand): + def add_arguments(self, parser): + parser.add_argument( + "--limit", + default=1000, + type=int, + help="Number of samples to refresh", + ) + parser.add_argument( + "--source", + choices=("SRA",), + required=True, + type=str, + help="Source name (ARRAY_EXPRESS, GEO, SRA)", + ) + + def handle(self, *args, **options): + for sample in Sample.objects.filter( + developmental_stage__isnull=True, + last_refreshed__isnull=True, + source_database=options["source"], + ).order_by("id")[: options["limit"]]: + logger.info(f"Refreshing metadata for a sample {sample.accession_code}") + try: + _, sample_metadata = SraSurveyor.gather_all_metadata(sample.accession_code) + SraSurveyor._apply_harmonized_metadata_to_sample(sample_metadata) + except Exception as e: + logger.exception(e) + finally: + sample.save() + + time.sleep(1)