Skip to content

Commit

Permalink
Merge pull request #355 from CDLUC3/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
datadavev authored Nov 3, 2022
2 parents 6a2868e + b62cdf4 commit 2795804
Show file tree
Hide file tree
Showing 4 changed files with 368 additions and 6 deletions.
321 changes: 321 additions & 0 deletions ezidapp/management/commands/diag-identifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,321 @@
# Copyright©2021, Regents of the University of California
# http://creativecommons.org/licenses/BSD

"""Show the current state of one or more identifiers
This command works through the Django ORM and is useful for checking the current state
of an identifier.
This command does not alter any information in the database, and should be safe to run
at any time, including a running production instance.
Note however, that this command MAY alter the information in N2T when the --sync option
is used. Confirmation is requested before any metadata updates are propagated to N2T.
"""

import argparse
import csv
import json
import logging
import datetime
import zlib

import django.apps
import django.conf
import django.core.management
import django.core.serializers
import django.db.models
import django.forms
import django.forms.models

import ezidapp.models.datacenter
import ezidapp.models.group
import ezidapp.models.identifier
import ezidapp.models.user
import impl.noid_egg

log = logging.getLogger(__name__)

class SplitArgs(argparse.Action):
# From: https://stackoverflow.com/questions/52132076/argparse-action-or-type-for-comma-separated-list
def __call__(self, parser, namespace, values, option_string=None):
# Be sure to strip, maybe they have spaces where they don't belong and wrapped the arg value in quotes
setattr(namespace, self.dest, [value.strip() for value in values.split(",")])

class Command(django.core.management.BaseCommand):
help = __doc__

def __init__(self):
super().__init__()
self.opt = None

def create_parser(self, *args, **kwargs):
parser = super().create_parser(*args, **kwargs)
parser.formatter_class = argparse.RawTextHelpFormatter
return parser

def add_arguments(self, parser:argparse.ArgumentParser):
subparsers = parser.add_subparsers(
title="Operations",
dest="operation",
required=True
)

_show = subparsers.add_parser("show")
_list = subparsers.add_parser("list")
_show.add_argument(
"identifiers",
nargs="+",
type=str,
help="Space delimited list of identifiers to retrieve",
)
_show.add_argument(
'-I',
'--identifier',
action='store_true',
help='Show Identifier instead of SearchIdentifier table entry',
)
_show.add_argument(
'-y',
'--legacy',
action='store_true',
help='Show legacy form of identifier record',
)
_show.add_argument(
'-m',
'--cm',
action='store_true',
help='Decode the identifier cm zipped json section',
)
_show.add_argument(
'-e',
'--expanded',
action='store_true',
help='Expand related info such as owner, ownergroup, profile, and datacenter',
)
_show.add_argument(
'-t',
'--times',
action='store_true',
help='Convert timestamps to textual time representation',
)
_show.add_argument(
'-N',
'--N2T',
action='store_true',
help='Retrieve record from N2T if available',
)
_show.add_argument(
'--sync',
action='store_true',
help="Synchronize the N2T entry with metadata from the database.",
)

_list.add_argument(
"filter",
nargs="+",
type=str,
help="Filter to select identifiers, e.g. 'createTime__gt:1653019200'. Multiple filters are combined with AND.",
)
_list.add_argument(
'-I',
'--identifier',
action='store_true',
help='Show Identifier instead of SearchIdentifier table entry',
)
_list.add_argument(
'-W',
'--whereclause',
action='store_true',
help='Filter is an SQL WHERE clause instead of ORM applied to the identifier or searchIdentifier tables.',
)
_list.add_argument(
'-F',
'--fields',
action=SplitArgs,
default=[],
help="Comma separated list of fields in addition to identifier to list."
)
_list.add_argument(
'--compare',
action='store_true',
help='Show difference between EZID and N2T metadata.',
)

def diff_n2t(self, identifier:ezidapp.models.identifier):
res = {}
n2t_meta = impl.noid_egg.getElements(identifier.identifier)
if n2t_meta is None:
n2t_meta = {}
_legacy = identifier.toLegacy()
for k, v in _legacy.items():
res[k] = [v, None]
# If properties retrieved from N2T are not present in the supplied
# update metadata, then set the value of the field to an empty string.
# An empty value results in an "rm" (remove) operation for that field
# being sent to N2T.
for k, v in n2t_meta.items():
if k not in res:
res[k] = [None, v]
else:
res[k][1] = v
return res

def handle_show(self, *args, **opts):
def jsonable_instance(o):
if o is None:
return o
res = json.loads(
django.core.serializers.serialize(
'json',
[
o,
],
)
)[0]
return res

def tstamp_to_text(t):
return datetime.datetime.fromtimestamp(t, tz=datetime.timezone.utc).isoformat()

expand_fields = ['datacenter', 'owner', 'ownergroup', 'profile']
identifier_class = ezidapp.models.identifier.SearchIdentifier
if opts["identifier"]:
identifier_class = ezidapp.models.identifier.Identifier
identifiers = identifier_class.objects.filter(identifier__in=opts["identifiers"])
if opts['expanded']:
identifiers = identifiers.select_related(*expand_fields)
entries = []
for identifier in identifiers:
# Note, it is far more efficient to just call serialize('json', identifiers, indent=2)
# but we want to futz around with the cm section and other fields for each instance.
entry = jsonable_instance(identifier)
entry["isAgentPid"] = identifier.isAgentPid
if opts["legacy"]:
# Get the "legacy" format, which is used for sending to N2T binder
entry["legacy"] = identifier.toLegacy()
if opts["expanded"]:
for field_name in expand_fields:
entry["fields"][field_name] = jsonable_instance(getattr(identifier, field_name))
if opts["times"]:
entry["fields"]["createTime"] = tstamp_to_text(entry["fields"]["createTime"])
entry["fields"]["updateTime"] = tstamp_to_text(entry["fields"]["updateTime"])
if opts["cm"]:
try:
_cm = json.loads(zlib.decompress(identifier.cm))
entry['fields']['cm'] = _cm
# Simple test to verify the decode cm section matches the metadata section
_mequal = len(_cm.keys()) == len(entry['fields']['metadata'].keys())
for k, v in _cm.items():
if entry['fields']['metadata'][k] != _cm[k]:
_mequal = False
break
entry["cm_eq_metadata"] = _mequal
except zlib.error:
log.info("No cm section in %s", identifier.identifier)
n2t_meta = None
if opts["N2T"]:
# Retrieve entry from N2T
n2t_meta = impl.noid_egg.getElements(identifier.identifier)
entry["n2t"] = n2t_meta
if opts["sync"]:
_legacy = identifier.toLegacy()
# See proc_binder.update
# Retrieve the existing metadata from N2T
m = n2t_meta
if m is None:
m = impl.noid_egg.getElements(identifier.identifier)
if m is None:
m = {}
# First, update m with provided metadata
for k, v in list(_legacy.items()):
# If the provided metadata matches existing, then ignore
if m.get(k) == v:
del m[k]
# Otherwise add property to list for sending back to N2T
else:
m[k] = v
# If properties retrieved from N2T are not present in the supplied
# update metadata, then set the value of the field to an empty string.
# An empty value results in an "rm" (remove) operation for that field
# being sent to N2T.
for k in list(m.keys()):
if k not in _legacy:
m[k] = ""
if len(m) > 0:
log.warning("Updating N2T metadata for %s", identifier.identifier)
log.info("Pending updates for %s:\n%s", identifier.identifier, m)
self.stdout.write(f"About to update {identifier.identifier} !")
response = input("Enter Y to continue, anything else aborts: ")
if response.strip() == 'Y':
impl.noid_egg.setElements(identifier.identifier, m)
##
# Retrieve the updated metadata and add to the entry
entry["n2t_updated"] = impl.noid_egg.getElements(identifier.identifier)
else:
self.stdout.write("Aborted.")
else:
log.info("No pending updates for %s", identifier.identifier)

entries.append(entry)
self.stdout.write(json.dumps(entries, indent=2, sort_keys=True))

def handle_list_by_where(self, *args, **opts):
filter_strings = opts['filter']
_filter = {}
_fields = ['identifier',] + opts.get('fields', [])
identifiers = None
identifier_class = ezidapp.models.identifier.SearchIdentifier
_table = "ezidapp_searchidentifier"
if opts["identifier"]:
_table = "ezidapp_identifier"
sqlc = f"SELECT count(*) FROM {_table} WHERE {' AND '.join(filter_strings)};"
sql = f"SELECT * FROM {_table} WHERE {' AND '.join(filter_strings)};"
log.info("Generated SQL = %s", sql)
identifiers = identifier_class.objects.raw(sql)
writer = csv.DictWriter(self.stdout, _fields, dialect='excel')
writer.writeheader()
for identifier in identifiers:
writer.writerow(django.forms.models.model_to_dict(identifier, fields=_fields))

def handle_list(self, *args, **opts):
filter_strings = opts['filter']
_fields = ['identifier',] + opts.get('fields', [])
_filter = {}
_default_key = ""
identifier_class = ezidapp.models.identifier.SearchIdentifier
for filter_string in filter_strings:
parts = filter_string.split(':', 1)
if len(parts) > 1:
_filter[parts[0].strip()] = parts[1].strip()
else:
log.warning("Expecting ':' delimiter between filter and match value, e.g. createTime__gt:1653019200, got %s",filter_string)
self.stdout.write(f"Provided filter = {_filter}")
if len(_filter.keys()) < 1:
log.error("Aborting: Null filter matches all records.")
return
if opts["identifier"]:
identifier_class = ezidapp.models.identifier.Identifier
identifiers = identifier_class.objects.filter(**_filter)
dfields = _fields
if opts.get("compare", False):
dfields.append('n2t')
writer = csv.DictWriter(self.stdout, dfields, dialect='excel')
writer.writeheader()
for identifier in identifiers:
row = django.forms.models.model_to_dict(identifier, fields=_fields)
if opts.get('compare', False):
row['n2t'] = self.diff_n2t(identifier)
writer.writerow(row)


def handle(self, *args, **opts):
operation = opts['operation']
if operation == 'show':
self.handle_show(*args, **opts)
elif operation == 'list':
if opts['whereclause']:
self.handle_list_by_where(*args, **opts)
else:
self.handle_list(*args, **opts)

12 changes: 7 additions & 5 deletions ezidapp/management/commands/proc-binder.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,10 @@ def create(self, task_model):
"""
id_str = task_model.refIdentifier.identifier
self.log.info("CREATE: %s", id_str)
metadata = task_model.refIdentifier.metadata
##metadata = task_model.refIdentifier.metadata
# add the required target metadata:
metadata["_t"] = task_model.refIdentifier.target
##metadata["_t"] = task_model.refIdentifier.target
metadata = task_model.refIdentifier.toLegacy()
impl.noid_egg.setElements(id_str, metadata)

def update(self, task_model):
Expand All @@ -50,9 +51,10 @@ def update(self, task_model):
new fields oor fields that have changed values.
'''
id_str = task_model.refIdentifier.identifier
metadata = task_model.refIdentifier.metadata
# add the required target metadata:
metadata["_t"] = task_model.refIdentifier.target
##metadata = task_model.refIdentifier.metadata
### add the required target metadata:
##metadata["_t"] = task_model.refIdentifier.target
metadata = task_model.refIdentifier.toLegacy()
self.log.info("UPDATE: %s", id_str)

# Retrieve the existing metadata from N2T
Expand Down
29 changes: 29 additions & 0 deletions ezidapp/migrations/0002_auto_20221026_1139.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Generated by Django 3.2.10 on 2022-10-26 11:39

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('ezidapp', '0001_initial'),
]

operations = [
migrations.AddIndex(
model_name='identifier',
index=models.Index(fields=['createTime'], name='ezidapp_ide_createT_439579_idx'),
),
migrations.AddIndex(
model_name='identifier',
index=models.Index(fields=['updateTime'], name='ezidapp_ide_updateT_88212d_idx'),
),
migrations.AddIndex(
model_name='searchidentifier',
index=models.Index(fields=['createTime'], name='ezidapp_sea_createT_0c4dda_idx'),
),
migrations.AddIndex(
model_name='searchidentifier',
index=models.Index(fields=['updateTime'], name='ezidapp_sea_updateT_60feca_idx'),
),
]
Loading

0 comments on commit 2795804

Please sign in to comment.