Skip to content

Commit

Permalink
use rawSQL for file and byte counts
Browse files Browse the repository at this point in the history
  • Loading branch information
John Tordoff committed Aug 22, 2024
1 parent 90bdefe commit 85d0e59
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 70 deletions.
112 changes: 75 additions & 37 deletions api/institutions/views.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
from django.db.models import Count, Q, F, Value, BooleanField, IntegerField
from django.db.models.functions import Coalesce
from django.db.models import Count, Q, F
from rest_framework import generics
from rest_framework import permissions as drf_permissions
from rest_framework import exceptions
from rest_framework import status
from rest_framework.response import Response
from rest_framework.settings import api_settings


from framework.auth.oauth_scopes import CoreScopes

from osf.metrics import InstitutionProjectCounts
Expand Down Expand Up @@ -545,73 +543,75 @@ class InstitutionDashboardUserList(JSONAPIBaseView, generics.ListAPIView, ListFi

def get_default_queryset(self):
institution = self.get_institution()
from django.db.models import OuterRef, Subquery, Count, Q, F, Value, BooleanField, IntegerField
from django.db.models import IntegerField, Value, BooleanField, BigIntegerField
from django.db.models.functions import Coalesce
from django.db.models.expressions import RawSQL
from django.db.models.expressions import RawSQL, F

return institution.get_institution_users().annotate(
email_address=F('username'),
department=F('institutionaffiliation__sso_department'),
# Count of public projects (assuming a related_name 'projects' from OSFUser to Project)
number_of_public_projects=Count(
# Count of public projects
public_projects=Count(
'nodes',
filter=(Q(nodes__is_public=True) & Q(nodes__type='osf.node')),
distinct=True
distinct=True,
),
number_of_private_projects=Count(
private_projects=Count(
'nodes',
filter=(Q(nodes__is_public=False) & Q(nodes__type='osf.node')),
distinct=True
distinct=True,
),
# Count of public and private registrations
number_of_public_registrations=Count(
public_registrations=Count(
'nodes',
filter=(Q(nodes__is_public=True) & Q(nodes__type='osf.registration')),
distinct=True
distinct=True,
),
number_of_private_registrations=Count(
embargoed_registrations=Count(
'nodes',
filter=(Q(nodes__is_public=False) & Q(nodes__type='osf.registration')),
distinct=True
distinct=True,
),
# Count of preprints
number_of_preprints=Count(
published_preprints=Count(
'preprints',
filter=Q(preprints__is_public=True),
distinct=True
distinct=True,
),
# Count files associated with nodes
number_of_node_files=RawSQL(
# Count files associated with nodes using RawSQL
public_node_files=RawSQL(
"""
SELECT COUNT(f.id)
FROM osf_basefilenode f
INNER JOIN osf_abstractnode n ON n.id = f.target_object_id
INNER JOIN django_content_type ct ON ct.id = f.target_content_type_id
WHERE ct.model = 'abstractnode'
AND n.type = 'osf.node'
AND n.is_public = TRUE
AND f.type = 'osf.osfstoragefile'
AND n.creator_id = osf_osfuser.id
""",
[],
output_field=IntegerField()
output_field=IntegerField(),
),
# Count files associated with registrations using RawSQL
number_of_registration_files=RawSQL(
public_registration_files=RawSQL(
"""
SELECT COUNT(f.id)
FROM osf_basefilenode f
INNER JOIN osf_abstractnode r ON r.id = f.target_object_id
INNER JOIN django_content_type ct ON ct.id = f.target_content_type_id
WHERE ct.model = 'abstractnode'
AND r.type = 'osf.registration'
AND r.is_public = TRUE
AND f.type = 'osf.osfstoragefile'
AND r.creator_id = osf_osfuser.id
""",
[],
output_field=IntegerField()
output_field=IntegerField(),
),
# Count files associated with preprints using RawSQL
number_of_preprint_files=RawSQL(
public_preprint_files=RawSQL(
"""
SELECT COUNT(f.id)
FROM osf_basefilenode f
Expand All @@ -623,21 +623,49 @@ def get_default_queryset(self):
AND p.creator_id = osf_osfuser.id
""",
[],
output_field=IntegerField()
output_field=IntegerField(),
),
public_files=Coalesce(
F('public_node_files') +
F('public_registration_files') +
F('public_preprint_files'),
Value(0),
output_field=IntegerField(),
),
storage_byte_count=RawSQL(
"""
SELECT COALESCE(SUM(version.size), 0) FROM osf_basefileversionsthrough AS obfnv
LEFT JOIN osf_basefilenode file ON obfnv.basefilenode_id = file.id
LEFT JOIN osf_fileversion version ON obfnv.fileversion_id = version.id
LEFT JOIN django_content_type type on file.target_content_type_id = type.id
WHERE file.provider = 'osfstorage'
AND type.model = 'abstractnode'
AND file.deleted_on IS NULL
AND file.target_object_id IN (
SELECT id FROM osf_abstractnode
WHERE creator_id = osf_osfuser.id
)
""",
[],
output_field=BigIntegerField(),
),
number_of_files=Coalesce(
F('number_of_node_files') +
F('number_of_registration_files') +
F('number_of_preprint_files'),
total_object_count=Coalesce(
F('public_projects') +
F('private_projects') +
F('public_registrations') +
F('embargoed_registrations') +
F('published_preprints'),
Value(0),
output_field=IntegerField()
output_field=IntegerField(),
),
month_last_login=F('last_login'),
month_last_active=F('last_login'),
has_orcid=Coalesce(
Q(external_identity__has_key='ORCID'),
Value(False),
output_field=BooleanField()
output_field=BooleanField(),
),
account_created_date=F('created')
account_created=F('created'),
)

# overrides RetrieveAPIView
Expand All @@ -659,18 +687,28 @@ def create_csv_response(self):
response['Content-Disposition'] = 'attachment; filename="institution_users.csv"'

writer = csv.writer(response)
writer.writerow(['ID', 'Email', 'Department', 'Public Projects', 'Private Projects', 'Public Registrations',
'Private Registrations', 'Preprints'])
writer.writerow(
[
'ID',
'Email',
'Department',
'Public Projects',
'Private Projects',
'Public Registrations',
'Private Registrations',
'Preprints',
],
)

for user in queryset:
writer.writerow([
user.id,
user.email_address,
user.department,
user.number_of_public_projects,
user.number_of_private_projects,
user.number_of_public_registrations,
user.number_of_private_registrations,
user.number_of_preprints,
user.public_projects,
user.private_projects,
user.public_registrations,
user.embargoed_registrations,
user.published_preprints,
])
return response
40 changes: 21 additions & 19 deletions api/users/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,15 @@ class UserSerializer(JSONAPISerializer):
filterable_fields = frozenset([
'email_address', # For Institutional Dashboard only
'department', # For Institutional Dashboard only
'number_of_public_projects', # For Institutional Dashboard only
'number_of_private_projects', # For Institutional Dashboard only
'number_of_public_registrations', # For Institutional Dashboard only
'number_of_private_registrations', # For Institutional Dashboard only
'number_of_preprints', # For Institutional Dashboard only
'number_of_files', # For Institutional Dashboard only
'public_projects', # For Institutional Dashboard only
'private_projects', # For Institutional Dashboard only
'public_registrations', # For Institutional Dashboard only
'embargoed_registrations', # For Institutional Dashboard only
'published_preprints', # For Institutional Dashboard only
'public_files', # For Institutional Dashboard only
'has_orcid', # For Institutional Dashboard only
'account_created_date', # For Institutional Dashboard only
'last_log', # For Institutional Dashboard only
'account_created', # For Institutional Dashboard only
'last_login', # For Institutional Dashboard only
'full_name',
'given_name',
'middle_names',
Expand Down Expand Up @@ -100,18 +100,20 @@ class UserSerializer(JSONAPISerializer):

email_address = ser.CharField(required=False, allow_blank=True, help_text='For Institutional Dashboard only')
department = ser.CharField(required=False, allow_blank=True, help_text='For Institutional Dashboard only')
number_of_public_projects = ser.IntegerField(required=False, help_text='For Institutional Dashboard only')
number_of_private_projects = ser.IntegerField(required=False, help_text='For Institutional Dashboard only')
number_of_public_registrations = ser.IntegerField(required=False, help_text='For Institutional Dashboard only')
number_of_private_registrations = ser.IntegerField(required=False, help_text='For Institutional Dashboard only')
number_of_preprints = ser.IntegerField(required=False, help_text='For Institutional Dashboard only')
number_of_node_files = ser.IntegerField(required=False, help_text='For Institutional Dashboard only')
number_of_registration_files = ser.IntegerField(required=False, help_text='For Institutional Dashboard only')
number_of_preprint_files = ser.IntegerField(required=False, help_text='For Institutional Dashboard only')
number_of_files = ser.IntegerField(required=False, help_text='For Institutional Dashboard only')
public_projects = ser.IntegerField(required=False, help_text='For Institutional Dashboard only')
private_projects = ser.IntegerField(required=False, help_text='For Institutional Dashboard only')
public_registrations = ser.IntegerField(required=False, help_text='For Institutional Dashboard only')
embargoed_registrations = ser.IntegerField(required=False, help_text='For Institutional Dashboard only')
published_preprints = ser.IntegerField(required=False, help_text='For Institutional Dashboard only')
public_node_files = ser.IntegerField(required=False, help_text='For Institutional Dashboard only')
public_registration_files = ser.IntegerField(required=False, help_text='For Institutional Dashboard only')
public_preprint_files = ser.IntegerField(required=False, help_text='For Institutional Dashboard only')
public_files = ser.IntegerField(required=False, help_text='For Institutional Dashboard only')
storage_byte_count = ser.IntegerField(required=False, help_text='For Institutional Dashboard only')
total_object_count = ser.IntegerField(required=False, help_text='For Institutional Dashboard only')
has_orcid = ser.BooleanField(required=False, help_text='For Institutional Dashboard only')
account_created_date = VersionedDateTimeField(required=False, help_text='For Institutional Dashboard only')
last_log = VersionedDateTimeField(required=False, help_text='For Institutional Dashboard only')
account_created = VersionedDateTimeField(required=False, help_text='For Institutional Dashboard only')
last_login = VersionedDateTimeField(required=False, help_text='For Institutional Dashboard only')

links = HideIfDisabled(
LinksField(
Expand Down
20 changes: 10 additions & 10 deletions api_tests/institutions/views/test_institution_dashboard_user.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def users(self, institution):
)
user_one.add_or_update_affiliated_institution(
institution,
sso_department="Science Department"
sso_department='Science Department'
)
user_two.add_or_update_affiliated_institution(institution)
user_three.add_or_update_affiliated_institution(institution)
Expand Down Expand Up @@ -123,16 +123,16 @@ def test_return_all_users(self, app, institution, users):
('[full_name]', 'Example', 3), # Multiple users should be returned here
('[email_address]', '[email protected]', 1),
('[department]', 'Science Department', 1),
('[number_of_public_projects][lte]', '1', 2),
('[number_of_private_projects][lt]', '1', 3),
('[number_of_private_projects][gte]', '1', 3),
('[number_of_public_registrations][lte]', '1', 1),
('[number_of_private_registrations][lte]', '1', 3),
('[number_of_preprints][lte]', '2', 1),
('[number_of_files][lte]', '1', 0),
('[public_projects][lte]', '1', 2),
('[private_projects][lte]', '1', 3),
('[private_projects][gte]', '1', 3),
('[public_registrations][lte]', '1', 3),
('[embargoed_registrations][lte]', '1', 3),
('[published_preprints][lte]', '1', 2),
('[public_files][lt]', '1', 1),
('[public_files][lte]', '1', 2),
('[last_login][lte]', '2-11-2018', 0),
('[last_log]', 'account_created', 0),
('[account_created_date]', '2-11-2018', 0),
('[account_created]', '2-11-2018', 0),
('[has_orcid]', 'True', 0),
])
def test_filter_users(self, app, institution, users, attribute, value, expected_count):
Expand Down
4 changes: 0 additions & 4 deletions osf/metrics/reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,6 @@ class RunningTotal(InnerDoc):
total = metrics.Integer()
total_daily = metrics.Integer()


class FileRunningTotals(InnerDoc):
total = metrics.Integer()
public = metrics.Integer()
Expand All @@ -113,9 +112,6 @@ class NodeRunningTotals(InnerDoc):
public_daily = metrics.Integer()
private_daily = metrics.Integer()




class RegistrationRunningTotals(InnerDoc):
total = metrics.Integer()
public = metrics.Integer()
Expand Down

0 comments on commit 85d0e59

Please sign in to comment.