Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/develop' into feature/preprint…
Browse files Browse the repository at this point in the history
…s-affiliations
  • Loading branch information
cslzchen committed Sep 4, 2024
2 parents d8f16a2 + 63365d8 commit 248c17e
Show file tree
Hide file tree
Showing 23 changed files with 250 additions and 352 deletions.
277 changes: 163 additions & 114 deletions README-docker-compose.md

Large diffs are not rendered by default.

12 changes: 12 additions & 0 deletions addons/base/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -696,6 +696,10 @@ def osfstoragefile_mark_viewed(self, auth, fileversion, file_node):
@file_signals.file_viewed.connect
def osfstoragefile_update_view_analytics(self, auth, fileversion, file_node):
resource = file_node.target
user = getattr(auth, 'user', None)
if hasattr(resource, 'is_contributor_or_group_member') and resource.is_contributor_or_group_member(user):
# Don't record views by contributors
return
enqueue_update_analytics(
resource,
file_node,
Expand All @@ -707,6 +711,10 @@ def osfstoragefile_update_view_analytics(self, auth, fileversion, file_node):
@file_signals.file_viewed.connect
def osfstoragefile_viewed_update_metrics(self, auth, fileversion, file_node):
resource = file_node.target
user = getattr(auth, 'user', None)
if hasattr(resource, 'is_contributor_or_group_member') and resource.is_contributor_or_group_member(user):
# Don't record views by contributors
return
if waffle.switch_is_active(features.ELASTICSEARCH_METRICS) and isinstance(resource, Preprint):
try:
PreprintView.record_for_preprint(
Expand All @@ -730,6 +738,10 @@ def osfstoragefile_downloaded_update_analytics(self, auth, fileversion, file_nod
@file_signals.file_downloaded.connect
def osfstoragefile_downloaded_update_metrics(self, auth, fileversion, file_node):
resource = file_node.target
user = getattr(auth, 'user', None)
if hasattr(resource, 'is_contributor_or_group_member') and resource.is_contributor_or_group_member(user):
# Don't record downloads by contributors
return
if waffle.switch_is_active(features.ELASTICSEARCH_METRICS) and isinstance(resource, Preprint):
try:
PreprintDownload.record_for_preprint(
Expand Down
2 changes: 0 additions & 2 deletions admin/management/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,6 @@ def post(self, request, *args, **kwargs):
class DailyReportersGo(ManagementCommandPermissionView):

def post(self, request, *args, **kwargs):
also_keen = bool(request.POST.get('also_send_to_keen', False))
report_date = request.POST.get('report_date', None)
if report_date:
report_date = isoparse(report_date).date()
Expand All @@ -109,7 +108,6 @@ def post(self, request, *args, **kwargs):

daily_reporters_go.apply_async(kwargs={
'report_date': report_date,
'also_send_to_keen': also_keen
})
messages.success(request, 'Daily reporters going!')
return redirect(reverse('management:commands'))
Expand Down
5 changes: 0 additions & 5 deletions admin/templates/management/commands.html
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,6 @@ <h4><u>Daily Reporters, Go!</u></h4>
<input type="date" name="report_date" id="report_date"/>
(default: yesterday)
<br>
<label for="also_send_to_keen">
Also send to keen?
</label>
<input type="checkbox" name="also_send_to_keen" id="also_send_to_keen"/>
(may result in duplicates)
<nav>
<input class="btn btn-success" type="submit" value="Run" />
</nav>
Expand Down
2 changes: 1 addition & 1 deletion api/base/settings/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@
# django-elasticsearch-metrics
ELASTICSEARCH_DSL = {
'default': {
'hosts': os.environ.get('ELASTIC6_URI', '127.0.0.1:9201'),
'hosts': osf_settings.ELASTIC6_URI,
'retry_on_timeout': True,
},
}
Expand Down
2 changes: 1 addition & 1 deletion conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def _test_speedups_disable(request, settings, _test_speedups):

@pytest.fixture(scope='session')
def setup_connections():
connections.create_connection(hosts=['http://localhost:9201'])
connections.create_connection(hosts=[website_settings.ELASTIC6_URI])


@pytest.fixture(scope='function')
Expand Down
41 changes: 2 additions & 39 deletions docker-compose-dist-arm64.override.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,43 +6,6 @@ services:
# OSF #
#######

requirements:
image: quay.io/centerforopenscience/osf:develop-arm64
elasticsearch6:
image: quay.io/centerforopenscience/elasticsearch:es6-arm-6.3.1
platform: linux/arm64

assets:
image: quay.io/centerforopenscience/osf:develop-arm64
platform: linux/arm64
# Need to allocate tty to be able to call invoke for requirements task
tty: true

admin_assets:
image: quay.io/centerforopenscience/osf:develop-arm64
platform: linux/arm64
# Need to allocate tty to be able to call invoke for requirements task
tty: true

worker:
image: quay.io/centerforopenscience/osf:develop-arm64
platform: linux/arm64
# Need to allocate tty to be able to call invoke for requirements task
tty: true

admin:
image: quay.io/centerforopenscience/osf:develop-arm64
platform: linux/arm64
# Need to allocate tty to be able to call invoke for requirements task
tty: true

api:
image: quay.io/centerforopenscience/osf:develop-arm64
platform: linux/arm64
# Need to allocate tty to be able to call invoke for requirements task
tty: true

web:
image: quay.io/centerforopenscience/osf:develop-arm64
platform: linux/arm64
# Need to allocate tty to be able to call invoke for requirements task
tty: true

48 changes: 21 additions & 27 deletions osf/management/commands/daily_reporters_go.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,56 +2,51 @@
import logging

from django.core.management.base import BaseCommand
from django.db.utils import OperationalError
from django.utils import timezone

from framework import sentry
from framework.celery_tasks import app as celery_app
from osf.metrics.reporters import DAILY_REPORTERS
from osf.metrics.reporters import AllDailyReporters
from website.app import init_app


logger = logging.getLogger(__name__)


@celery_app.task(name='management.commands.daily_reporters_go')
def daily_reporters_go(also_send_to_keen=False, report_date=None, reporter_filter=None):
def daily_reporters_go(report_date=None, reporter_filter=None, **kwargs):
init_app() # OSF-specific setup

if report_date is None: # default to yesterday
report_date = (timezone.now() - datetime.timedelta(days=1)).date()

errors = {}
for reporter_class in DAILY_REPORTERS:
if reporter_filter and (reporter_filter.lower() not in reporter_class.__name__.lower()):
for _reporter_key, _reporter_class in AllDailyReporters.__members__.items():
if reporter_filter and (reporter_filter.lower() not in _reporter_class.__name__.lower()):
continue
try:
reporter_class().run_and_record_for_date(
report_date=report_date,
also_send_to_keen=also_send_to_keen,
)
except Exception as e:
errors[reporter_class.__name__] = repr(e)
logger.exception(e)
sentry.log_exception(e)
# continue with the next reporter
return errors
daily_reporter_go.apply_async(kwargs={
'reporter_key': _reporter_key,
'report_date': report_date.isoformat(),
})


def date_fromisoformat(date_str):
return datetime.datetime.strptime(date_str, '%Y-%m-%d').date()
@celery_app.task(
name='management.commands.daily_reporter_go',
autoretry_for=(OperationalError,),
max_retries=5,
retry_backoff=True,
bind=True,
)
def daily_reporter_go(task, reporter_key: str, report_date: str):
_reporter_class = AllDailyReporters[reporter_key].value
_parsed_date = datetime.date.fromisoformat(report_date)
_reporter_class().run_and_record_for_date(report_date=_parsed_date)


class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument(
'--keen',
type=bool,
default=False,
help='also send reports to keen',
)
parser.add_argument(
'--date',
type=date_fromisoformat, # in python 3.7+, could pass datetime.date.fromisoformat
type=datetime.date.fromisoformat,
help='run for a specific date (default: yesterday)',
)
parser.add_argument(
Expand All @@ -62,7 +57,6 @@ def add_arguments(self, parser):
def handle(self, *args, **options):
errors = daily_reporters_go(
report_date=options.get('date'),
also_send_to_keen=options['keen'],
reporter_filter=options.get('filter'),
)
for error_key, error_val in errors.items():
Expand Down
9 changes: 9 additions & 0 deletions osf/management/commands/force_archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,13 @@
# Ignorable NodeLogs
LOG_WHITELIST = {
'affiliated_institution_added',
'category_updated',
'comment_added',
'comment_removed',
'comment_restored',
'comment_updated',
'confirm_ham',
'confirm_spam',
'contributor_added',
'contributor_removed',
'contributors_reordered',
Expand All @@ -72,14 +75,19 @@
'embargo_completed',
'embargo_initiated',
'embargo_terminated',
'external_ids_added',
'file_tag_added',
'flag_spam',
'guid_metadata_updated',
'license_changed',
'made_contributor_invisible',
'made_private',
'made_public',
'made_wiki_private',
'made_wiki_public',
'node_removed',
'node_access_requests_disabled',
'node_access_requests_enabled',
'permissions_updated',
'pointer_created',
'pointer_removed',
Expand All @@ -92,6 +100,7 @@
'registration_initiated',
'retraction_approved',
'retraction_initiated',
'subjects_updated',
'tag_added',
'tag_removed',
'wiki_deleted',
Expand Down
31 changes: 19 additions & 12 deletions osf/management/commands/monthly_reporters_go.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import logging

from django.core.management.base import BaseCommand
from django.db.utils import OperationalError
from django.utils import timezone

from framework import sentry
from framework.celery_tasks import app as celery_app
from osf.metrics.reporters import MONTHLY_REPORTERS
from osf.metrics.reporters import AllMonthlyReporters
from osf.metrics.utils import YearMonth
from website.app import init_app

Expand All @@ -28,17 +28,24 @@ def monthly_reporters_go(report_year=None, report_month=None):
year=today.year if today.month > 1 else today.year - 1,
month=today.month - 1 or MAXMONTH,
)
for _reporter_key in AllMonthlyReporters.__members__.keys():
monthly_reporter_go.apply_async(kwargs={
'reporter_key': _reporter_key,
'yearmonth': str(report_yearmonth),
})

errors = {}
for reporter_class in MONTHLY_REPORTERS:
try:
reporter_class().run_and_record_for_month(report_yearmonth)
except Exception as e:
errors[reporter_class.__name__] = str(e)
logger.exception(e)
sentry.log_exception(e)
# continue with the next reporter
return errors

@celery_app.task(
name='management.commands.monthly_reporter_go',
autoretry_for=(OperationalError,),
max_retries=5,
retry_backoff=True,
bind=True,
)
def monthly_reporter_go(task, reporter_key: str, yearmonth: str):
_reporter_class = AllMonthlyReporters[reporter_key].value
_parsed_yearmonth = YearMonth.from_str(yearmonth)
_reporter_class().run_and_record_for_month(_parsed_yearmonth)


class Command(BaseCommand):
Expand Down
29 changes: 15 additions & 14 deletions osf/metrics/reporters/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import enum

# from .active_users import ActiveUserReporter
from .storage_addon_usage import StorageAddonUsageReporter
from .download_count import DownloadCountReporter
Expand All @@ -10,18 +12,17 @@
from .spam_count import SpamCountReporter


DAILY_REPORTERS = (
# ActiveUserReporter,
DownloadCountReporter,
InstitutionSummaryReporter,
NewUserDomainReporter,
NodeCountReporter,
OsfstorageFileCountReporter,
PreprintCountReporter,
StorageAddonUsageReporter,
UserCountReporter,
)
class AllDailyReporters(enum.Enum):
# ACTIVE_USER = ActiveUserReporter
DOWNLOAD_COUNT = DownloadCountReporter
INSTITUTION_SUMMARY = InstitutionSummaryReporter
NEW_USER_DOMAIN = NewUserDomainReporter
NODE_COUNT = NodeCountReporter
OSFSTORAGE_FILE_COUNT = OsfstorageFileCountReporter
PREPRINT_COUNT = PreprintCountReporter
STORAGE_ADDON_USAGE = StorageAddonUsageReporter
USER_COUNT = UserCountReporter


MONTHLY_REPORTERS = (
SpamCountReporter,
)
class AllMonthlyReporters(enum.Enum):
SPAM_COUNT = SpamCountReporter
Loading

0 comments on commit 248c17e

Please sign in to comment.