Skip to content

Commit

Permalink
add benchmarking tests for summary reporter
Browse files Browse the repository at this point in the history
  • Loading branch information
John Tordoff committed Oct 7, 2024
1 parent dd9c7ea commit 88e1593
Show file tree
Hide file tree
Showing 2 changed files with 161 additions and 5 deletions.
6 changes: 5 additions & 1 deletion osf_tests/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,11 @@ class BaseNodeFactory(DjangoModelFactory):
title = factory.Faker('catch_phrase')
description = factory.Faker('sentence')
created = factory.LazyFunction(timezone.now)
creator = factory.SubFactory(AuthUserFactory)
creator = factory.Maybe(
factory.SelfAttribute('creator'),
yes_declaration=factory.SelfAttribute('creator'),
no_declaration=factory.SubFactory(AuthUserFactory),
)

class Meta:
model = models.Node
Expand Down
160 changes: 156 additions & 4 deletions osf_tests/metrics/reporters/test_institutional_summary_reporter.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import time
import datetime
from django.test import TestCase
import logging
from django.test import TestCase, override_settings
from osf.metrics.reporters import InstitutionalSummaryMonthlyReporter
from osf.metrics.utils import YearMonth
from osf_tests.factories import (
Expand Down Expand Up @@ -97,18 +99,17 @@ def test_report_generation_multiple_institutions(self):
institution3 = InstitutionFactory()

# Set up dates for different months
now = datetime.datetime(2018, 2, 4, tzinfo=datetime.UTC)
last_month = datetime.datetime(2018, 1, 15, tzinfo=datetime.UTC)
next_month = datetime.datetime(2018, 3, 10, tzinfo=datetime.UTC)

self._create_affiliated_project(institution2, is_public=True, created=now)
self._create_affiliated_project(institution2, is_public=True, created=self._now)
self._create_affiliated_project(institution3, is_public=True, created=last_month)

# Create future projects for self._institution (should not be counted)
self._create_affiliated_project(self._institution, is_public=True, created=next_month)

# Create users affiliated with different institutions
self._create_active_user(institution2, date_confirmed=now)
self._create_active_user(institution2, date_confirmed=self._now)
self._create_active_user(institution3, date_confirmed=last_month)

# Run the reporter for the current month (February 2018)
Expand All @@ -133,3 +134,154 @@ def test_report_generation_multiple_institutions(self):
self.assertEqual(report_institution2.user_count, 1)
self.assertEqual(report_institution2.monthly_active_user_count, 1)
self.assertEqual(report_institution2.monthly_logged_in_user_count, 0) # No logged-in users


class TestSummaryMonthlyReporterBenchmarker(TestCase):

@classmethod
def setUpTestData(cls):
cls.logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
cls._yearmonth = YearMonth(2018, 2) # February 2018
cls._institution = InstitutionFactory()
cls._now = datetime.datetime(2018, 2, 4, tzinfo=datetime.UTC)
cls.enable_benchmarking = True


@classmethod
def _create_affiliated_preprint(cls, institution, is_public, created, creator=None):
published_preprint = PreprintFactory(is_public=is_public, creator=creator)
published_preprint.affiliated_institutions.add(institution)
published_preprint.created = created
published_preprint.save()
return published_preprint

@classmethod
def _create_affiliated_project(cls, institution, is_public, created, creator=None):
project = ProjectFactory(is_public=is_public, creator=creator)
project.affiliated_institutions.add(institution)
project.created = created
project.save()
return project

@classmethod
def _create_affiliated_registration(cls, institution, is_public, created, creator=None):
registration = RegistrationFactory(is_public=is_public, creator=creator)
registration.affiliated_institutions.add(institution)
registration.created = created
registration.save()
return registration

@classmethod
def _create_logged_in_user(cls, institution, date_last_login):
user = AuthUserFactory()
user.add_or_update_affiliated_institution(institution)
user.date_last_login = date_last_login
user.save()
return user

@classmethod
def _create_active_user(cls, institution, date_confirmed):
user = AuthUserFactory()
user.add_or_update_affiliated_institution(institution)
user.date_confirmed = date_confirmed
ProjectFactory(creator=user) # adds log to make active
log = user.logs.get()
log.created = date_confirmed
log.save()
user.save()
return user

def test_high_counts_multiple_institutions(self):
"""
Test the report generation with configurable high counts for institutions, users, and their objects.
Benchmarking can be enabled by setting the 'enable_benchmarking' attribute to True.
"""
# Check if benchmarking is enabled
enable_benchmarking = self.enable_benchmarking

# Configure counts (adjust these numbers as needed)
additional_institution_count = 1 # Number of institutions (adjust as needed)
users_per_institution = 3 # Number of users per institution (adjust as needed)
objects_per_user = 3 # Number of objects per user (adjust as needed)

# Timing variables
if enable_benchmarking:
total_start_time = time.time()
data_creation_start_time = time.time()

# Create institutions
institutions = [self._institution]
institutions += [InstitutionFactory() for _ in range(additional_institution_count)]

if enable_benchmarking:
institutions_creation_time = time.time()
self.logger.info(
f"Time taken to create {additional_institution_count + 1} institutions: {institutions_creation_time - data_creation_start_time:.2f} seconds")

# Generate data for each institution
if enable_benchmarking:
users_creation_start_time = time.time()
institution_users = {}
for institution in institutions:
# Create users for the institution
users = []
for _ in range(users_per_institution):
user = AuthUserFactory()
user.add_or_update_affiliated_institution(institution)
user.date_last_login = self._now
user.date_confirmed = self._now - datetime.timedelta(days=1)
user.save()
users.append(user)
institution_users[institution] = users

if enable_benchmarking:
users_creation_time = time.time()
self.logger.info(f"Time taken to create users: {users_creation_time - users_creation_start_time:.2f} seconds")

# Create projects, registrations, and preprints for each user
if enable_benchmarking:
objects_creation_start_time = time.time()
for institution in institutions:
users = institution_users[institution]
for user in users:
for _ in range(objects_per_user):
self._create_affiliated_project(institution, is_public=True, created=self._now, creator=user)
self._create_affiliated_project(institution, is_public=False, created=self._now, creator=user)
self._create_affiliated_registration(institution, is_public=True, created=self._now, creator=user)
self._create_affiliated_registration(institution, is_public=False, created=self._now, creator=user)
self._create_affiliated_preprint(institution, is_public=True, created=self._now, creator=user)

if enable_benchmarking:
objects_creation_time = time.time()
self.logger.info(
f"Time taken to create objects: {objects_creation_time - objects_creation_start_time:.2f} seconds")
data_creation_end_time = time.time()
self.logger.info(
f"Total time taken to create data: {data_creation_end_time - data_creation_start_time:.2f} seconds")

# Run the reporter
if enable_benchmarking:
reporter_start_time = time.time()
reporter = InstitutionalSummaryMonthlyReporter()
reports = list(reporter.report(self._yearmonth))
assert len(reports) == additional_institution_count + 1

if enable_benchmarking:
reporter_end_time = time.time()
self.logger.info(f"Time taken to run the reporter: {reporter_end_time - reporter_start_time:.2f} seconds")
total_end_time = time.time()
self.logger.info(f"Total test execution time: {total_end_time - total_start_time:.2f} seconds")

self.assertEqual(len(reports), additional_institution_count + 1)

# Validate counts for each institution
expected_count = users_per_institution * objects_per_user
for report in reports:
self.assertEqual(report.public_project_count, expected_count)
self.assertEqual(report.private_project_count, expected_count)
self.assertEqual(report.public_registration_count, expected_count)
self.assertEqual(report.embargoed_registration_count, expected_count)
self.assertEqual(report.published_preprint_count, expected_count)
self.assertEqual(report.user_count, users_per_institution )
self.assertEqual(report.monthly_logged_in_user_count, users_per_institution)

0 comments on commit 88e1593

Please sign in to comment.