Skip to content

Commit

Permalink
osf:storageByteCount supplementary metadata [ENG-6187]
Browse files Browse the repository at this point in the history
  • Loading branch information
aaxelb committed Oct 25, 2024
1 parent 16d0a72 commit 19be71b
Show file tree
Hide file tree
Showing 6 changed files with 78 additions and 20 deletions.
64 changes: 46 additions & 18 deletions api/caching/tasks.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import logging
from urllib.parse import urlparse

from django.apps import apps
from django.contrib.contenttypes.models import ContentType
from django.db import connection
from django.db.models import Sum

import requests
import logging

from django.apps import apps
from api.caching.utils import storage_usage_cache
from framework.postcommit_tasks.handlers import enqueue_postcommit_task

Expand All @@ -16,6 +17,9 @@
logger = logging.getLogger(__name__)


_DEFAULT_FILEVERSION_PAGE_SIZE = 500000


def get_varnish_servers():
# TODO: this should get the varnish servers from HAProxy or a setting
return settings.VARNISH_SERVERS
Expand Down Expand Up @@ -111,35 +115,59 @@ def ban_url(instance):


@app.task(max_retries=5, default_retry_delay=10)
def update_storage_usage_cache(target_id, target_guid, per_page=500000):
def update_storage_usage_cache(target_id, target_guid, per_page=_DEFAULT_FILEVERSION_PAGE_SIZE):
if not settings.ENABLE_STORAGE_USAGE_CACHE:
return
from osf.models import Guid
storage_usage_total = compute_storage_usage_total(Guid.load(target_guid).referent, per_page=per_page)
key = cache_settings.STORAGE_USAGE_KEY.format(target_id=target_guid)
storage_usage_cache.set(key, storage_usage_total, settings.STORAGE_USAGE_CACHE_TIMEOUT)


def compute_storage_usage_total(target_obj, per_page=_DEFAULT_FILEVERSION_PAGE_SIZE):
sql = """
SELECT count(size), sum(size) from
(SELECT size FROM osf_basefileversionsthrough AS obfnv
LEFT JOIN osf_basefilenode file ON obfnv.basefilenode_id = file.id
LEFT JOIN osf_fileversion version ON obfnv.fileversion_id = version.id
LEFT JOIN django_content_type type on file.target_content_type_id = type.id
WHERE file.provider = 'osfstorage'
AND type.model = 'abstractnode'
AND file.deleted_on IS NULL
AND file.target_object_id=%s
AND file.target_object_id=%(target_pk)s
AND file.target_content_type_id=%(target_content_type_pk)s
ORDER BY version.id
LIMIT %s OFFSET %s) file_page
LIMIT %(per_page)s OFFSET %(offset)s
) file_page
"""
count = per_page
last_count = 1 # initialize non-zero
offset = 0
storage_usage_total = 0
content_type_pk = ContentType.objects.get_for_model(target_obj).pk
with connection.cursor() as cursor:
while count:
cursor.execute(sql, [target_id, per_page, offset])
result = cursor.fetchall()
storage_usage_total += int(result[0][1]) if result[0][1] else 0
count = int(result[0][0]) if result[0][0] else 0
offset += count

key = cache_settings.STORAGE_USAGE_KEY.format(target_id=target_guid)
storage_usage_cache.set(key, storage_usage_total, settings.STORAGE_USAGE_CACHE_TIMEOUT)
while last_count:
cursor.execute(
sql, {
'target_pk': target_obj.pk,
'target_content_type_pk': content_type_pk,
'per_page': per_page,
'offset': offset,
},
)
this_count, size_sum = cursor.fetchall()[0]
storage_usage_total += int(size_sum or 0)
last_count = (this_count or 0)
offset += last_count
return storage_usage_total


def get_storage_usage_total(target_obj):
if not settings.ENABLE_STORAGE_USAGE_CACHE:
return compute_storage_usage_total(target_obj)
_cache_key = cache_settings.STORAGE_USAGE_KEY.format(target_id=target_obj._id)
_storage_usage_total = storage_usage_cache.get(_cache_key)
if _storage_usage_total is None:
_storage_usage_total = compute_storage_usage_total(target_obj)
storage_usage_cache.set(_cache_key, _storage_usage_total, settings.STORAGE_USAGE_CACHE_TIMEOUT)
return _storage_usage_total


def update_storage_usage(target):
Expand Down
16 changes: 16 additions & 0 deletions osf/metadata/osf_gathering.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from django import db
import rdflib

from api.caching.tasks import get_storage_usage_total
from osf import models as osfdb
from osf.metadata import gather
from osf.metadata.rdfutils import (
Expand Down Expand Up @@ -208,19 +209,24 @@ def pls_get_magic_metadata_basket(osf_item) -> gather.Basket:
OSFMAP_SUPPLEMENT = {
OSF.Project: {
OSF.hasOsfAddon: None,
OSF.storageByteCount: None,
OSF.storageRegion: None,
},
OSF.ProjectComponent: {
OSF.hasOsfAddon: None,
OSF.storageByteCount: None,
OSF.storageRegion: None,
},
OSF.Registration: {
OSF.storageByteCount: None,
OSF.storageRegion: None,
},
OSF.RegistrationComponent: {
OSF.storageByteCount: None,
OSF.storageRegion: None,
},
OSF.Preprint: {
OSF.storageByteCount: None,
OSF.storageRegion: None,
},
OSF.File: {
Expand Down Expand Up @@ -1169,3 +1175,13 @@ def _storage_region_triples(region, *, subject_ref=None):
else:
yield (subject_ref, OSF.storageRegion, _region_ref)
yield (_region_ref, SKOS.prefLabel, rdflib.Literal(region.name, lang='en'))


@gather.er(
OSF.storageByteCount,
focustype_iris=[OSF.Project, OSF.ProjectComponent, OSF.Registration, OSF.RegistrationComponent, OSF.Preprint]
)
def gather_storage_byte_count(focus):
_storage_usage_total = get_storage_usage_total(focus.dbmodel)
if _storage_usage_total is not None:
yield (OSF.storageByteCount, _storage_usage_total)
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
@prefix osf: <https://osf.io/vocab/2022/> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .

<http://localhost:5000/w4ibb> osf:storageRegion <http://localhost:8000/v2/regions/us/> .
<http://localhost:5000/w4ibb> osf:storageByteCount 1337 ;
osf:storageRegion <http://localhost:8000/v2/regions/us/> .

<http://localhost:8000/v2/regions/us/> skos:prefLabel "United States"@en .
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .

<http://localhost:5000/w2ibb> osf:hasOsfAddon <urn:osf.io:addons:gitlab> ;
osf:storageByteCount 7 ;
osf:storageRegion <http://localhost:8000/v2/regions/us/> .

<urn:osf.io:addons:gitlab> a osf:AddonImplementation ;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
@prefix osf: <https://osf.io/vocab/2022/> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .

<http://localhost:5000/w5ibb> osf:storageRegion <http://localhost:8000/v2/regions/us/> .
<http://localhost:5000/w5ibb> osf:storageByteCount 17 ;
osf:storageRegion <http://localhost:8000/v2/regions/us/> .

<http://localhost:8000/v2/regions/us/> skos:prefLabel "United States"@en .
11 changes: 11 additions & 0 deletions osf_tests/metadata/test_osf_gathering.py
Original file line number Diff line number Diff line change
Expand Up @@ -873,3 +873,14 @@ def test_gather_qualified_attributions(self):
(_attribution_readonly, PROV.agent, self.userfocus__readonly),
(_attribution_readonly, DCAT.hadRole, OSF['readonly-contributor']),
})

def test_gather_storage_byte_count(self):
assert_triples(osf_gathering.gather_storage_byte_count(self.projectfocus), {
(self.projectfocus.iri, OSF.storageByteCount, Literal(123456)),
})
assert_triples(osf_gathering.gather_storage_byte_count(self.registrationfocus), {
(self.registrationfocus.iri, OSF.storageByteCount, Literal(0)),
})
assert_triples(osf_gathering.gather_storage_byte_count(self.preprintfocus), {
(self.preprintfocus.iri, OSF.storageByteCount, Literal(1337)),
})

0 comments on commit 19be71b

Please sign in to comment.