From 7c94331c86efc9527fb99837019a711c5a74a9f7 Mon Sep 17 00:00:00 2001
From: Abram Booth <aaxelb@users.noreply.github.com>
Date: Wed, 23 Oct 2019 10:49:02 -0400
Subject: [PATCH 1/9] fix: avoid encoding error in assets containers (#9169)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Purpose
`invoke assets -w` fails with a unicode error -- Unsure of the root cause (possibly webpack speaking the non-ascii rune `…` aloud?) but @felliott found a workaround.

## Changes
Set `LANG=en_US.UTF-8` for `assets` and `admin_assets` containers in our docker-compose.yml. This avoids the problem in the common case, for now.
---
 docker-compose.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docker-compose.yml b/docker-compose.yml
index 62d0afb345d..05a02c16c59 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -383,6 +383,7 @@ services:
     restart: unless-stopped
     environment:
       DJANGO_SETTINGS_MODULE: api.base.settings
+      LANG: en_US.UTF-8
     volumes:
       - ./:/code:cached
       - osf_requirements_vol:/usr/lib/python2.7
@@ -396,6 +397,7 @@ services:
     restart: unless-stopped
     environment:
       DJANGO_SETTINGS_MODULE: admin.base.settings
+      LANG: en_US.UTF-8
     volumes:
       - ./:/code:cached
       - osf_requirements_vol:/usr/lib/python2.7

From affd450aea206f029b3d01c6c2337ce4cfce9f5e Mon Sep 17 00:00:00 2001
From: corbinSanders <50155660+corbinSanders@users.noreply.github.com>
Date: Mon, 28 Oct 2019 12:25:38 -0400
Subject: [PATCH 2/9] [ENG-893] parse user create structure (#9177)

## Purpose

Rather than dumping all the data directly from the EGAP spreadsheets, a better migration plan is to put validated data into JSON, then migrate the data from the JSON into the OSF. This PR creates a management command that reads in data from the EGAP registry spreadsheet and the EGAP author spreadsheet to create an OSFBag directory. (See https://openscience.atlassian.net/browse/ENG-1109 for details). This creates the base directory for each registry, and generates two JSON files. Project.json which contains the ID, Post date, a list of contributors names and emails (if available), and the title of the project. Registration-schema.json is also created, which contains the metadata of the registry. This metadata is already validated against the egap registration schema, and placed in the format accepted by the draft registration model.

## Changes
osf/management/commands/create_EGAP_json.py - The management command to create the directories and json

## QA Notes

Spot check that each registry in the EGAP schema spreadsheet is in the generated directory, and all the data is there and correct.

## Documentation

N/A

## Side Effects

N/A - Works outside of OSF

## Ticket

https://openscience.atlassian.net/browse/ENG-893
---
 scripts/EGAP/EGAP_tests.py       | 151 ++++++++++++++++++
 scripts/EGAP/__init__.py         |   0
 scripts/EGAP/create_EGAP_json.py | 258 +++++++++++++++++++++++++++++++
 3 files changed, 409 insertions(+)
 create mode 100644 scripts/EGAP/EGAP_tests.py
 create mode 100644 scripts/EGAP/__init__.py
 create mode 100644 scripts/EGAP/create_EGAP_json.py

diff --git a/scripts/EGAP/EGAP_tests.py b/scripts/EGAP/EGAP_tests.py
new file mode 100644
index 00000000000..b78f659ea88
--- /dev/null
+++ b/scripts/EGAP/EGAP_tests.py
@@ -0,0 +1,151 @@
+import unittest
+from create_EGAP_json import (schema_to_spreadsheet_mapping,
+	make_project_dict,
+	make_registration_dict,
+	other_mapping,
+)
+
+HEADER_ROW = ['POST DATE',
+	'ID',
+	'STATUS',
+	'TITLE',
+	'B2 AUTHORS',
+	'EMAIL',
+	'B3 ACKNOWLEDGEMENTS',
+	'B4 FACULTY MEMBER?',
+	'B5 PROSPECTIVE OR RETROSPECTIVE?',
+	'B6 EXPERIMENTAL STUDY?',
+	'B7 DATE OF START OF STUDY',
+	'B8 GATE DATE',
+	'B8 FORMERLY GATED UNTIL',
+	'B9 PRESENTED AT EGAP MEETING?',
+	'B10 PRE-ANALYSIS PLAN WITH REGISTRATION?',
+	'C1 BACKGROUND',
+	'C2 HYPOTHESES',
+	'C3 TESTING PLAN',
+	'C4 COUNTRY',
+	'C5 SAMPLE SIZE',
+	'C6 POWER ANALYSIS?',
+	'C7 IRB APPROVAL?',
+	'C8 IRB NUMBER',
+	'C9 DATE OF IRB APPROVAL',
+	'C10 INTERVENTION IMPLEMENTER',
+	'C11 REMUNERATION?',
+	'C12 PUBLICATION AGREEMENT?',
+	'C13 JEL CODES',
+	'METHODOLOGY',
+	'POLICY']
+
+TEST_ROW_WITH_OTHER = ['03/05/2017 - 17:00',
+	'20170305AA',
+	'Status is not saved, so this field doesnt matter',
+	'The members of Nsync',
+	'Justin Timberlake | Joey Fatone | Lance Bass',
+	'doesnt@matter.com',
+	'We acknolowledge Chris Kirkpatrick',
+	'Justin Timberlake is a faculty Member',
+	'This is my other response for prospective',
+	'Yes',
+	'05/01/2017',
+	'05/01/2020',
+	'',
+	'No',
+	'No',
+	'Test background',
+	'test hypothesis',
+	'This is my testing plan',
+	'Switzerland',
+	'3242',
+	'This is a power analysis other response',
+	'This is an other irb response',
+	'343434',
+	'03/06/2017',
+	'This is an other intervention response',
+	'This is an other renumeration response',
+	'This is an other publication agreement response',
+	'Jel Code',
+	'Survey Methodology',
+	'Gender']
+
+TEST_ROW_WITH_OTHER_AUTHORS = [
+ 	{'name': 'Justin Timberlake', 'email': 'jt@gmail.com'},
+ 	{'name': 'Joey Fatone'},
+ 	{'name': 'Lance Bass', 'email': 'lBass@gmail.com'}]
+
+TEST_ROW = ['05/05/2018 - 17:00',
+	'20180505AA',
+	'Status is not saved, so this field doesnt matter',
+	'The members of Backstreet boys',
+	'Nick Carter | Brian Littrell, Ph.D. | AJ McLean | U.S. Agency Bureau, Department of Agency affairs (DOAA)',
+	'doesnt@matter.com',
+	'We acknolowledge Chris Kirkpatrick',
+	'Yes',
+	'Registration prior to any research activities',
+	'Yes',
+	'05/01/2017',
+	'05/01/2020',
+	'',
+	'No',
+	'No',
+	'Test background',
+	'test hypothesis',
+	'This is my testing plan',
+	'Switzerland',
+	'3242',
+	'Yes',
+	'Yes',
+	'343434',
+	'03/06/2017',
+	'Researchers',
+	'Yes',
+	'Yes',
+	'Jel Code',
+	'Survey Methodology',
+	'Gender']
+
+TEST_ROW_AUTHORS = [
+ 	{'name': 'Nick Carter', 'email': 'nickc@gmail.com'},
+ 	{'name': 'Brian Littrell, Ph.D.'},
+ 	{'name': 'AJ McLean', 'email': 'AJML@gmail.com'},
+ 	{'name': 'U.S. Agency Bureau, Department of Agency affairs (DOAA)', 'email': 'DOAA@UAB.gov'}]
+
+class TestProjectDict(unittest.TestCase):
+
+	def test_row_with_other(self):
+		project_dict = make_project_dict(TEST_ROW_WITH_OTHER, TEST_ROW_WITH_OTHER_AUTHORS, HEADER_ROW)
+		self.assertEqual(project_dict['title'], TEST_ROW_WITH_OTHER[3])
+		self.assertEqual(project_dict['contributors'], TEST_ROW_WITH_OTHER_AUTHORS)
+		self.assertEqual(project_dict['post-date'], TEST_ROW_WITH_OTHER[0])
+		self.assertEqual(project_dict['id'], TEST_ROW_WITH_OTHER[1])
+
+	def test_row(self):
+		project_dict = make_project_dict(TEST_ROW, TEST_ROW_AUTHORS, HEADER_ROW)
+		self.assertEqual(project_dict['title'], TEST_ROW[3])
+		self.assertEqual(project_dict['contributors'], TEST_ROW_AUTHORS)
+		self.assertEqual(project_dict['post-date'], TEST_ROW[0])
+		self.assertEqual(project_dict['id'], TEST_ROW[1])
+
+class TestRegistrationDict(unittest.TestCase):
+
+	def run_registration_test(self, row, header_row):
+		project_dict = make_registration_dict(row, header_row)
+		for question_dict in schema_to_spreadsheet_mapping:
+			question_key = question_dict.keys()[0]
+			spreadsheet_column = question_dict[question_key]
+			column_index = header_row.index(spreadsheet_column)
+			if type(project_dict[question_key]['value']) == list:
+				field_val = project_dict[question_key]['value'][0]
+			else:
+				field_val = project_dict[question_key]['value']
+			if row[column_index] != field_val and question_key in other_mapping:
+				self.assertEqual(project_dict[question_key]['value'], 'Other (describe in text box below)')
+				field_val = project_dict[other_mapping[question_key]]['value']
+				self.assertEqual(row[column_index], field_val)
+			else:
+				self.assertEqual(row[column_index], field_val)
+
+	def test_row_with_other(self):
+		self.run_registration_test(TEST_ROW_WITH_OTHER, HEADER_ROW)
+
+	def test_row(self):
+		self.run_registration_test(TEST_ROW, HEADER_ROW)
diff --git a/scripts/EGAP/__init__.py b/scripts/EGAP/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/scripts/EGAP/create_EGAP_json.py b/scripts/EGAP/create_EGAP_json.py
new file mode 100644
index 00000000000..f04197ee79d
--- /dev/null
+++ b/scripts/EGAP/create_EGAP_json.py
@@ -0,0 +1,258 @@
+import logging
+import csv
+import datetime
+import json
+import os
+import shutil
+import re
+import jsonschema
+import argparse
+
+from django.core.management.base import BaseCommand
+from jsonschema.exceptions import ValidationError
+
+from website.project.metadata.utils import create_jsonschema_from_metaschema
+from website.project.metadata.schemas import ensure_schema_structure, from_json
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+
+parser = argparse.ArgumentParser()
+parser.add_argument('-a', '--authorsource', help='Specify the source file for the author csv file')
+parser.add_argument('-r', '--registrysource', help='Specify the source file for the registrty csv file')
+parser.add_argument('-t', '--target', help='Specify the target directory of the registry directories')
+parser.add_argument('-d', '--dry', action='store_true', help='Dry run: Have the script delete the target directory after completion')
+
+schema_to_spreadsheet_mapping = [
+    {'q1': 'TITLE'},
+    {'q2': 'B2 AUTHORS'},
+    {'q3': 'ID'},
+    {'q4': 'POST DATE'},
+    {'q5': 'B3 ACKNOWLEDGEMENTS'},
+    {'q6': 'B4 FACULTY MEMBER?'},
+    {'q8': 'B5 PROSPECTIVE OR RETROSPECTIVE?'},
+    {'q10': 'B6 EXPERIMENTAL STUDY?'},
+    {'q11': 'B7 DATE OF START OF STUDY'},
+    {'q12': 'B8 GATE DATE'},
+    {'q13': 'B9 PRESENTED AT EGAP MEETING?'},
+    {'q14': 'B10 PRE-ANALYSIS PLAN WITH REGISTRATION?'},
+    {'q15': 'C1 BACKGROUND'},
+    {'q16': 'C2 HYPOTHESES'},
+    {'q17': 'C3 TESTING PLAN'},
+    {'q18': 'C4 COUNTRY'},
+    {'q19': 'C5 SAMPLE SIZE'},
+    {'q20': 'C6 POWER ANALYSIS?'},
+    {'q22': 'C7 IRB APPROVAL?'},
+    {'q24': 'C8 IRB NUMBER'},
+    {'q25': 'C9 DATE OF IRB APPROVAL'},
+    {'q26': 'C10 INTERVENTION IMPLEMENTER'},
+    {'q28': 'C11 REMUNERATION?'},
+    {'q30': 'C12 PUBLICATION AGREEMENT?'},
+    {'q32': 'C13 JEL CODES'},
+    {'q33': 'METHODOLOGY'},
+    {'q34': 'POLICY'},
+]
+
+# Any multiple choice questions where "Other" is a possible response, have subsequent "Other"
+# question to log that response.  If multiple choice question value is invalid,
+# attempt to log the value in the corresponding "Other" question response.
+other_mapping = {
+    'q6': 'q7',
+    'q8': 'q9',
+    'q20': 'q21',
+    'q22': 'q23',
+    'q26': 'q27',
+    'q28': 'q29',
+    'q30': 'q31'
+}
+
+def create_file_tree_and_json(author_source, registry_source, target):
+    # Things this function needs to do:
+    # For each row in the registry function, create a directory.
+    # Create two JSON files, one project json with ID, Title, Postdate, and authors listed
+    # with emails. And another with all the key value pairs for the registry meta.
+    top_dir = target
+    logger.info('Creating EGAP directory at {}'.format(top_dir))
+    os.mkdir(top_dir)
+    author_list = create_author_dict(author_source)
+    with open(registry_source) as csv_registry_file:
+        csv_reader = csv.reader(csv_registry_file, delimiter=',')
+        header_row = next(csv_reader)
+        normalized_header_row = [col_header.decode('ascii', 'ignore') for col_header in header_row]
+
+        id_index = normalized_header_row.index('ID')
+        for line in csv_reader:
+            row = [cell.decode('ascii', 'ignore') for cell in line]
+            project_id = row[id_index]
+            logger.info('Adding project ID: {}'.format(project_id))
+            root_directory = os.path.join(top_dir, project_id)
+            os.mkdir(root_directory)
+            data_directory = os.path.join(root_directory, 'data')
+            os.mkdir(data_directory)
+            os.mkdir(os.path.join(data_directory, 'nonanonymous'))
+            project_dict = make_project_dict(row, author_list, normalized_header_row)
+            make_json_file(root_directory, project_dict, 'project')
+            registration_dict = make_registration_dict(row, normalized_header_row)
+            make_json_file(root_directory, registration_dict, 'registration')
+
+def create_author_dict(source):
+    # Reads in author CSV and returns a list of dicts with names and emails of EGAP Authors
+    authors = []
+    with open(source) as csv_file:
+        csv_reader = csv.reader(csv_file, delimiter=',')
+        header_row = next(csv_reader)
+        normalized_header_row = [col_header.decode('ascii', 'ignore').strip() for col_header in header_row]
+
+        name_index = normalized_header_row.index('Name')
+        email_index = normalized_header_row.index('Email')
+        for line in csv_reader:
+            row = [cell.decode('ascii', 'ignore') for cell in line]
+            logger.info('Adding user: ' + row[name_index])
+            if row[email_index] != '':
+                author_dict = {'name': row[name_index].strip(), 'email': row[email_index]}
+            else:
+                author_dict = {'name': row[name_index].strip()}
+            authors.append(author_dict)
+    return authors
+
+def make_project_dict(row, author_list, normalized_header_row):
+    project = {}
+    title_index = normalized_header_row.index('TITLE')
+    id_index = normalized_header_row.index('ID')
+    postdate_index = normalized_header_row.index('POST DATE')
+    contributors_index = normalized_header_row.index('B2 AUTHORS')
+    project['id'] = row[id_index]
+    project['title'] = row[title_index]
+    project['post-date'] = row[postdate_index]
+
+    authors = row[contributors_index]
+
+    authors = authors.split('|')
+    project['contributors'] = []
+    author_name_list = [author['name'] for author in author_list]
+    for author in authors:
+        author = author.strip()
+        if author:
+            if author not in author_name_list:
+                logger.warning('Author {} not in Author spreadsheet for project {}.'.format(author,row[id_index]))
+                project['contributors'].append({'name': author})
+            else:
+                author_list_index = author_name_list.index(author)
+                project['contributors'].append(author_list[author_list_index])
+    return project
+
+def make_registration_dict(row, normalized_header_row):
+    registration = {}
+
+    for question in schema_to_spreadsheet_mapping:
+        qid = question.keys()[0]
+        column_name = question.values()[0]
+        value = build_question_response(normalized_header_row, row, qid, column_name)
+        validated_qid, other_response = validate_response(qid, value)
+        registration[validated_qid] = value
+        if other_response:
+            registration[other_response] = build_nested_response('Other (describe in text box below)')
+    # q35 and q36 are required questions at the end of the schema, certification and
+    # confirmation questions. Just marking as agree -
+    registration['q35'] = build_nested_response('Agree')
+    registration['q36'] = build_nested_response('Agree')
+    return registration
+
+def make_json_file(filepath, data, json_type):
+    if json_type == 'project':
+        filepath = filepath + '/project.json'
+    if json_type == 'registration':
+        filepath = filepath + '/registration-schema.json'
+    with open(filepath, 'w') as outfile:
+        json.dump(data, outfile)
+
+def build_question_response(header_row, row, question_key, column_title):
+    """Format the question's response to go in the registration_metadata
+    :param header_row: Header row in spreadsheet
+    :param row: Row in spreadsheet
+    :param question_key: string, Official question key as part of schema
+    :param column_title: string, Corresponding question_key column title in EGAP spreadsheet
+    """
+    index = header_row.index(column_title)
+    value = clean_value(row[index])
+    # Spreadsheet has these as comma-separated values, but looking for array
+    if question_key in ['q33', 'q34']:
+        value = value.split(', ')
+    return build_nested_response(value)
+
+def clean_value(value):
+    """Clean spreadsheet values of issues that will affect validation """
+    if value == 'n/a':
+        return 'N/A'
+    elif value == 'Design was registered before field was added':
+        return ''
+    return value
+
+def build_nested_response(value):
+    return {
+        'comments': [],
+        'extra': [],
+        'value': value
+    }
+
+def validate_response(qid, value):
+    """Validate question response
+
+    Validating each question response individually.  If there is an error, we will
+    attempt to add the value to the corresponding "Other" block.  Return that question id instead.
+
+    For example, q6 is a multiple choice question, with "Other" as a choice.  If text is entered
+    for q6 that does not match one of the multiple choice answers, assuming that this is "other"
+    text, and this response should go to the corresponding q7 question.  q6 will be marked
+    as "Other"
+
+    :param qid: string, question id from schema
+    :param value: question response
+    :param draft: DraftRegistration
+    :return qid: tuple, (qid corresponding to value, optional "Other" qid)
+    """
+    temporary_check = {}
+    temporary_check[qid] = value
+    egap_schema = ensure_schema_structure(from_json('egap-registration.json'))
+    schema = create_jsonschema_from_metaschema(egap_schema,
+        required_fields=False,
+        is_reviewer=False)
+
+    try:
+        json_schema = jsonschema.validate(temporary_check, schema)
+    except ValidationError as exc:
+        if qid in other_mapping:
+            return other_mapping[qid], qid
+        else:
+            raise Exception(exc)
+    return qid, None
+
+def main(default_args=False):
+    if default_args:
+        args = parser.parse_args(['--source', 'default', '--target', 'default'])
+    else:
+        args = parser.parse_args()
+
+    author_source = args.authorsource
+    registry_source = args.registrysource
+    target_directory = args.target
+    dry_run = args.dry
+
+    if not author_source:
+        author_source = 'EGAP_author_emails.csv'
+
+    if not registry_source:
+        registry_source = 'EGAP_registry_for_OSF.csv'
+
+    if not target_directory:
+        target_directory = 'EGAP_data_{}'.format(datetime.datetime.now().strftime('%m-%d-%Y'))
+
+    create_file_tree_and_json(author_source, registry_source, target_directory)
+
+    if dry_run:
+        shutil.rmtree(target_directory)
+        raise RuntimeError('Dry run, file tree being deleted.')
+
+if __name__ == '__main__':
+
+    main(default_args=False)

From 499e98a688ac598b01af98b697977b3ee46fbae9 Mon Sep 17 00:00:00 2001
From: "Brian J. Geiger" <bgeiger@cos.io>
Date: Tue, 29 Oct 2019 11:19:52 -0400
Subject: [PATCH 3/9] Update script to work with Jupyter Notebook and Python 3
 (#9182)

## Purpose

Create an EGAP Jupyter Notebook capable of running the scripts for the EGAP migration

## Changes

1. Un-DRY the first script to remove the need to have the entire OSF running just to run this migration.
2. Make the script work with Python 3 (which current versions of Jupyter require)
3. Add notebook
4. Copy EGAP Schema to EGAP scripts directory
5. Ignore iPython checkpoint files in git

## QA Notes

  - Does this change require a data migration? If so, what data will we migrate?
_No, this is mostly an aid for testing_

  - What is the level of risk?
_Low_

    - Any permissions code touched?
_No_

    - Is this an additive or subtractive change, other?
_Additive_

  - How can QA verify? (Through UI, API, AdminApp or AdminAdminApp?)
_By running the notebook_

    - If verifying through API, what's the new version? Please include the endpoints in PR notes or Dev docs.
_N/A_

  - What features or workflows might this change impact?
_EGAP Migration_

  - How will this impact performance?
_It will not_


## Side Effects

No side effects. The reduction of DRYness was to prevent the possibility of side effects.

## Ticket

https://openscience.atlassian.net/browse/ENG-1177
---
 .gitignore                          |   2 +
 scripts/EGAP/create_EGAP_json.py    | 303 +++++++++++++++++++++-
 scripts/EGAP/egap-registration.json | 382 ++++++++++++++++++++++++++++
 scripts/EGAP/egap_workflow.ipynb    |  47 ++++
 4 files changed, 720 insertions(+), 14 deletions(-)
 create mode 100644 scripts/EGAP/egap-registration.json
 create mode 100644 scripts/EGAP/egap_workflow.ipynb

diff --git a/.gitignore b/.gitignore
index 84f32d26566..0f83ae23f86 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,6 +10,7 @@ ehthumbs.db
 Thumbs.db
 *.swp
 *~
+.ipynb_checkpoints
 
 # R
 #######################
@@ -202,3 +203,4 @@ ssl/
 
 # pyenv
 .python-version
+
diff --git a/scripts/EGAP/create_EGAP_json.py b/scripts/EGAP/create_EGAP_json.py
index f04197ee79d..9edb0679b2d 100644
--- a/scripts/EGAP/create_EGAP_json.py
+++ b/scripts/EGAP/create_EGAP_json.py
@@ -4,16 +4,11 @@
 import json
 import os
 import shutil
-import re
 import jsonschema
 import argparse
 
-from django.core.management.base import BaseCommand
 from jsonschema.exceptions import ValidationError
 
-from website.project.metadata.utils import create_jsonschema_from_metaschema
-from website.project.metadata.schemas import ensure_schema_structure, from_json
-
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
 
@@ -66,6 +61,22 @@
     'q30': 'q31'
 }
 
+
+here = os.path.split(os.path.abspath(__file__))[0]
+
+
+def from_json(fname):
+    with open(os.path.join(here, fname)) as f:
+        return json.load(f)
+
+
+def ensure_schema_structure(schema):
+    schema['pages'] = schema.get('pages', [])
+    schema['title'] = schema['name']
+    schema['version'] = schema.get('version', 1)
+    return schema
+
+
 def create_file_tree_and_json(author_source, registry_source, target):
     # Things this function needs to do:
     # For each row in the registry function, create a directory.
@@ -75,14 +86,17 @@ def create_file_tree_and_json(author_source, registry_source, target):
     logger.info('Creating EGAP directory at {}'.format(top_dir))
     os.mkdir(top_dir)
     author_list = create_author_dict(author_source)
-    with open(registry_source) as csv_registry_file:
+    with open(registry_source, 'rt', encoding='utf-8-sig') as csv_registry_file:
         csv_reader = csv.reader(csv_registry_file, delimiter=',')
         header_row = next(csv_reader)
-        normalized_header_row = [col_header.decode('ascii', 'ignore') for col_header in header_row]
+        normalized_header_row = [col_header.strip() for col_header in header_row]
+        logger.info('Debug data')
+        logger.info('Header row: {}'.format(header_row))
+        logger.info('Normalized header row: {}'.format(normalized_header_row))
 
         id_index = normalized_header_row.index('ID')
         for line in csv_reader:
-            row = [cell.decode('ascii', 'ignore') for cell in line]
+            row = [cell for cell in line]
             project_id = row[id_index]
             logger.info('Adding project ID: {}'.format(project_id))
             root_directory = os.path.join(top_dir, project_id)
@@ -95,18 +109,21 @@ def create_file_tree_and_json(author_source, registry_source, target):
             registration_dict = make_registration_dict(row, normalized_header_row)
             make_json_file(root_directory, registration_dict, 'registration')
 
+
 def create_author_dict(source):
     # Reads in author CSV and returns a list of dicts with names and emails of EGAP Authors
     authors = []
-    with open(source) as csv_file:
+    with open(source, 'rt', encoding='utf-8-sig') as csv_file:
         csv_reader = csv.reader(csv_file, delimiter=',')
         header_row = next(csv_reader)
-        normalized_header_row = [col_header.decode('ascii', 'ignore').strip() for col_header in header_row]
-
+        normalized_header_row = [col_header.strip() for col_header in header_row]
+        logger.info('Debug data')
+        logger.info('Header row: {}'.format(header_row))
+        logger.info('Normalized header row: {}'.format(normalized_header_row))
         name_index = normalized_header_row.index('Name')
         email_index = normalized_header_row.index('Email')
         for line in csv_reader:
-            row = [cell.decode('ascii', 'ignore') for cell in line]
+            row = [cell for cell in line]
             logger.info('Adding user: ' + row[name_index])
             if row[email_index] != '':
                 author_dict = {'name': row[name_index].strip(), 'email': row[email_index]}
@@ -115,6 +132,7 @@ def create_author_dict(source):
             authors.append(author_dict)
     return authors
 
+
 def make_project_dict(row, author_list, normalized_header_row):
     project = {}
     title_index = normalized_header_row.index('TITLE')
@@ -141,12 +159,13 @@ def make_project_dict(row, author_list, normalized_header_row):
                 project['contributors'].append(author_list[author_list_index])
     return project
 
+
 def make_registration_dict(row, normalized_header_row):
     registration = {}
 
     for question in schema_to_spreadsheet_mapping:
-        qid = question.keys()[0]
-        column_name = question.values()[0]
+        qid = list(question.keys())[0]
+        column_name = list(question.values())[0]
         value = build_question_response(normalized_header_row, row, qid, column_name)
         validated_qid, other_response = validate_response(qid, value)
         registration[validated_qid] = value
@@ -158,6 +177,7 @@ def make_registration_dict(row, normalized_header_row):
     registration['q36'] = build_nested_response('Agree')
     return registration
 
+
 def make_json_file(filepath, data, json_type):
     if json_type == 'project':
         filepath = filepath + '/project.json'
@@ -166,6 +186,7 @@ def make_json_file(filepath, data, json_type):
     with open(filepath, 'w') as outfile:
         json.dump(data, outfile)
 
+
 def build_question_response(header_row, row, question_key, column_title):
     """Format the question's response to go in the registration_metadata
     :param header_row: Header row in spreadsheet
@@ -180,6 +201,7 @@ def build_question_response(header_row, row, question_key, column_title):
         value = value.split(', ')
     return build_nested_response(value)
 
+
 def clean_value(value):
     """Clean spreadsheet values of issues that will affect validation """
     if value == 'n/a':
@@ -195,6 +217,257 @@ def build_nested_response(value):
         'value': value
     }
 
+
+def base_metaschema(metaschema):
+    json_schema = {
+        'type': 'object',
+        'description': metaschema['description'],
+        'title': metaschema['title'],
+        'additionalProperties': False,
+        'properties': {
+        }
+    }
+    return json_schema
+
+
+def get_required(question):
+    """
+    Returns True if metaschema question is required.
+    """
+    required = question.get('required', False)
+    if not required:
+        properties = question.get('properties', False)
+        if properties and isinstance(properties, list):
+            for item, property in enumerate(properties):
+                if isinstance(property, dict) and property.get('required', False):
+                    required = True
+                    break
+    return required
+
+
+COMMENTS_SCHEMA = {
+    'type': 'array',
+    'items': {
+        'type': 'object',
+        'additionalProperties': False,
+        'properties': {
+            'seenBy': {
+                'type': 'array',
+            },
+            'canDelete': {'type': 'boolean'},
+            'created': {'type': 'string'},
+            'lastModified': {'type': 'string'},
+            'author': {'type': 'string'},
+            'value': {'type': 'string'},
+            'isOwner': {'type': 'boolean'},
+            'getAuthor': {'type': 'string'},
+            'user': {
+                'type': 'object',
+                'additionalProperties': True,
+                'properties': {
+                    'fullname': {'type': 'string'},
+                    'id': {'type': 'integer'}
+                }
+            },
+            'saved': {'type': 'boolean'},
+            'canEdit': {'type': 'boolean'},
+            'isDeleted': {'type': 'boolean'}
+        }
+    }
+}
+
+
+def get_options_jsonschema(options, required):
+    """
+    Returns multiple choice options for schema questions
+    """
+    for item, option in enumerate(options):
+        if isinstance(option, dict) and option.get('text'):
+            options[item] = option.get('text')
+    value = {'enum': options}
+
+    if not required and '' not in value['enum']:  # Non-required fields need to accept empty strings as a value.
+        value['enum'].append('')
+
+    return value
+
+
+def get_object_jsonschema(question, required_fields, is_reviewer, is_required):
+    """
+    Returns jsonschema for nested objects within schema
+    """
+    object_jsonschema = {
+        'type': 'object',
+        'additionalProperties': False,
+        'properties': {
+
+        }
+    }
+    required = []
+    properties = question.get('properties')
+    if properties:
+        for property in properties:
+            if property.get('required', False) and required_fields:
+                required.append(property['id'])
+            values = extract_question_values(property, required_fields, is_reviewer, is_required)
+            object_jsonschema['properties'][property['id']] = {
+                'type': 'object',
+                'additionalProperties': False,
+                'properties': values
+            }
+            if required_fields:
+                object_jsonschema['properties'][property['id']]['required'] = ['value']
+    if required_fields and is_required:
+        object_jsonschema['required'] = required
+
+    return object_jsonschema
+
+
+OSF_UPLOAD_EXTRA_SCHEMA = {
+    'type': 'array',
+    'items': {
+        'type': 'object',
+        'additionalProperties': False,
+        'properties': {
+            'data': {
+                'type': 'object',
+                'additionalProperties': False,
+                'properties': {
+                    'kind': {'type': 'string'},
+                    'contentType': {'type': 'string'},
+                    'name': {'type': 'string'},
+                    'extra': {
+                        'type': 'object',
+                        'additionalProperties': False,
+                        'properties': {
+                            'downloads': {'type': 'integer'},
+                            'version': {'type': 'integer'},
+                            'latestVersionSeen': {'type': 'string'},
+                            'guid': {'type': 'string'},
+                            'checkout': {'type': 'string'},
+                            'hashes': {
+                                'type': 'object',
+                                'additionalProperties': False,
+                                'properties': {
+                                    'sha256': {'type': 'string'},
+                                    'md5': {'type': 'string'}
+                                }
+                            }
+                        }
+                    },
+                    'materialized': {'type': 'string'},
+                    'modified': {'type': 'string'},
+                    'nodeId': {'type': 'string'},
+                    'etag': {'type': 'string'},
+                    'provider': {'type': 'string'},
+                    'path': {'type': 'string'},
+                    'nodeUrl': {'type': 'string'},
+                    'waterbutlerURL': {'type': 'string'},
+                    'resource': {'type': 'string'},
+                    'nodeApiUrl': {'type': 'string'},
+                    'type': {'type': 'string'},
+                    'accept': {
+                        'type': 'object',
+                        'additionalProperties': False,
+                        'properties': {
+                            'acceptedFiles': {'type': 'boolean'},
+                            'maxSize': {'type': 'integer'},
+                        }
+                    },
+                    'links': {
+                        'type': 'object',
+                        'additionalProperties': False,
+                        'properties': {
+                            'download': {'type': 'string'},
+                            'move': {'type': 'string'},
+                            'upload': {'type': 'string'},
+                            'delete': {'type': 'string'}
+                        }
+                    },
+                    'permissions': {
+                        'type': 'object',
+                        'additionalProperties': False,
+                        'properties': {
+                            'edit': {'type': 'boolean'},
+                            'view': {'type': 'boolean'}
+                        }
+                    },
+                    'created_utc': {'type': 'string'},
+                    'id': {'type': 'string'},
+                    'modified_utc': {'type': 'string'},
+                    'size': {'type': 'integer'},
+                    'sizeInt': {'type': 'integer'},
+                }
+            },
+            'fileId': {'type': ['string', 'object']},
+            'descriptionValue': {'type': 'string'},
+            'sha256': {'type': 'string'},
+            'selectedFileName': {'type': 'string'},
+            'nodeId': {'type': 'string'},
+            'viewUrl': {'type': 'string'}
+        }
+    }
+}
+
+
+def extract_question_values(question, required_fields, is_reviewer, is_required):
+    """
+    Pulls structure for 'value', 'comments', and 'extra' items
+    """
+    response = {
+        'value': {'type': 'string'},
+        'comments': COMMENTS_SCHEMA,
+        'extra': {'type': 'array'}
+    }
+    if question.get('type') == 'object':
+        response['value'] = get_object_jsonschema(question, required_fields, is_reviewer, is_required)
+    elif question.get('type') == 'choose':
+        options = question.get('options')
+        if options:
+            enum_options = get_options_jsonschema(options, is_required)
+            if question.get('format') == 'singleselect':
+                response['value'] = enum_options
+            elif question.get('format') == 'multiselect':
+                response['value'] = {'type': 'array', 'items': enum_options}
+    elif question.get('type') == 'osf-upload':
+        response['extra'] = OSF_UPLOAD_EXTRA_SCHEMA
+
+    if is_reviewer:
+        del response['extra']
+        if not question.get('type') == 'object':
+            del response['value']
+
+    return response
+
+
+def create_jsonschema_from_metaschema(metaschema, required_fields=False, is_reviewer=False):
+    """
+    Creates jsonschema from registration metaschema for validation.
+
+    Reviewer schemas only allow comment fields.
+    """
+    json_schema = base_metaschema(metaschema)
+    required = []
+
+    for page in metaschema['pages']:
+        for question in page['questions']:
+            is_required = get_required(question)
+            if is_required and required_fields:
+                required.append(question['qid'])
+            json_schema['properties'][question['qid']] = {
+                'type': 'object',
+                'additionalProperties': False,
+                'properties': extract_question_values(question, required_fields, is_reviewer, is_required)
+            }
+            if required_fields:
+                json_schema['properties'][question['qid']]['required'] = ['value']
+
+        if required and required_fields:
+            json_schema['required'] = required
+
+    return json_schema
+
+
 def validate_response(qid, value):
     """Validate question response
 
@@ -227,6 +500,7 @@ def validate_response(qid, value):
             raise Exception(exc)
     return qid, None
 
+
 def main(default_args=False):
     if default_args:
         args = parser.parse_args(['--source', 'default', '--target', 'default'])
@@ -253,6 +527,7 @@ def main(default_args=False):
         shutil.rmtree(target_directory)
         raise RuntimeError('Dry run, file tree being deleted.')
 
+
 if __name__ == '__main__':
 
     main(default_args=False)
diff --git a/scripts/EGAP/egap-registration.json b/scripts/EGAP/egap-registration.json
new file mode 100644
index 00000000000..28d3721e8bf
--- /dev/null
+++ b/scripts/EGAP/egap-registration.json
@@ -0,0 +1,382 @@
+{
+	"name": "EGAP Registration",
+	"version": 2,
+	"description": "The EGAP registry focuses on designs for experiments and observational studies in governance and politics.",
+	"pages": [{
+			"id": "page1",
+			"title": "General Information About the Project",
+			"questions": [{
+					"qid": "q1",
+					"nav": "Title",
+					"type": "string",
+					"format": "text",
+					"title": "B1 Title of Study",
+					"description": "Provide the working title of your study.",
+					"required": true
+				},
+				{
+					"qid": "q2",
+					"nav": "Authors",
+					"title": "B2 Authors",
+					"help": "Jimmy Stewart, Ava Gardner, Bob Hope, Greta Garbo",
+					"format": "textarea",
+					"required": true
+				},
+				{
+					"qid": "q3",
+					"nav": "EGAP Registration ID",
+					"title": "EGAP Registration ID",
+					"format": "textarea",
+					"required": true
+				},
+				{
+					"qid": "q4",
+					"nav": "Timestamp",
+					"title": "Timestamp of original registration",
+					"format": "textarea",
+					"required": true
+				},
+				{
+					"qid": "q5",
+					"nav": "Acknowledgements",
+					"title": "B3 Acknowledgements",
+					"type": "string",
+					"format": "textarea",
+					"required": false
+				},
+				{
+					"qid": "q6",
+					"title": "B4 Is one of the study authors a university faculty member?",
+					"nav": "University Faculty Member?",
+					"type": "choose",
+					"format": "singleselect",
+					"options": [
+						"N/A",
+						"Yes",
+						"No",
+						"Other (describe in text box below)"
+					],
+					"description": "Please choose one"
+				},
+				{
+					"qid": "q7",
+					"title": "Other",
+					"format": "textarea",
+					"required": false
+				},
+				{
+					"qid": "q8",
+					"title": "B5 Is this Registration Prospective or Retrospective?",
+					"nav": "Prospective or Retrospective?",
+					"type": "choose",
+					"format": "singleselect",
+					"options": [
+						"N/A",
+						"Registration prior to any research activities",
+						"Registration prior to assignment of treatment",
+						"Registration prior to realization of outcomes",
+						"Registration prior to researcher access to outcome data",
+						"Registration prior to researcher analysis of outcome data",
+						"Registration after researcher analysis of outcome data",
+						"Other (describe in text box below)"
+					],
+					"description": "Please choose one"
+				},
+				{
+					"qid": "q9",
+					"title": "Other",
+					"format": "textarea",
+					"required": false
+				},
+				{
+					"qid": "q10",
+					"title": "B6 Is this an experimental study?",
+					"nav": "Experimental study?",
+					"type": "choose",
+					"format": "singleselect",
+					"options": [
+						"N/A",
+						"Yes",
+						"No"
+					],
+					"description": "(with random assignment of units to different conditions)"
+				},
+				{
+					"qid": "q11",
+					"title": "B7 Date of start of study",
+					"nav": "Date of start of study",
+					"type": "string",
+					"format": "text",
+					"description": "Understood as first date of treatment assignment or equivalent for observational study",
+					"help": "E.g., 06/02/2018"
+				},
+				{
+					"qid": "q12",
+					"title": "B8 Gate Date",
+					"nav": "Gate Date?",
+					"type": "string",
+					"format": "text",
+					"description": "Gating is discouraged, but if necessary, EGAP policy limits the gate range to 18 months maximum.",
+					"help": "E.g., 06/02/2018"
+				},
+				{
+					"qid": "q13",
+					"title": "B9 Was this design presented at an EGAP meeting?",
+					"nav": "Presented at an EGAP meeting?",
+					"type": "choose",
+					"format": "singleselect",
+					"options": [
+						"N/A",
+						"No",
+						"Yes"
+					],
+					"description": "Indicate if the design received feedback from a EGAP design workshop or other special EGAP session prior to registration"
+				},
+				{
+					"qid": "q14",
+					"title": "B10 Is there a pre-analysis plan associated with this registration?",
+					"nav": "Pre-analysis plan associated with this registration?",
+					"type": "choose",
+					"format": "singleselect",
+					"options": [
+						"N/A",
+						"No",
+						"Yes"
+					],
+					"description": "If so, please attach it in the Additional Documentation section on the final screen."
+				}
+			]
+		},
+		{
+			"id": "page2",
+			"title": "Registration Data",
+			"questions": [{
+					"qid": "q15",
+					"nav": "Background and explanation of rationale.",
+					"title": "C1 Background and explanation of rationale.",
+					"format": "textarea",
+					"required": true,
+					"description": "Brief description of goals of project. If you are also attaching a pre-analysis plan, please refrain from simply copying and pasting a section from your plan here. If possible, please also avoid saying \"see attached pre-analysis plan,\" as it renders the search functionality less useful. Rather, please provide a short (1-2 paragraph) summary of the project background."
+				},
+				{
+					"qid": "q16",
+					"nav": "Background and explanation of rationale.",
+					"title": "C2 What are the hypotheses to be tested/quantities of interest to be estimated?",
+					"format": "textarea",
+					"required": true,
+					"description": "Please list the hypotheses including hypotheses on heterogeneous effects. If you are also attaching a pre-analysis plan, please refrain from simply copying and pasting a section from your plan here. If possible, please also avoid saying \"see attached pre-analysis plan,\" as it renders the search functionality less useful. Rather, please provide a short (1-2 paragraph) summary of project hypotheses."
+				},
+				{
+					"qid": "q17",
+					"nav": "How will these hypotheses be tested?",
+					"title": "C3 How will these hypotheses be tested?",
+					"format": "textarea",
+					"required": true,
+					"description": "Brief description of your methodology. If you are also attaching a pre-analysis plan, please refrain from simply copying and pasting a section from your plan here. If possible, please also avoid saying \"see attached pre-analysis plan,\" as it renders the search functionality less useful. Rather, please provide a short (1-2 paragraph) summary of project methodology."
+				},
+				{
+					"qid": "q18",
+					"title": "C4 Country",
+					"nav": "Country",
+					"type": "string",
+					"format": "text",
+					"help": "comma separated names of countries (e.g. Canada, United States of America, Mexico)"
+				},
+				{
+					"qid": "q19",
+					"title": "C5 Sample Size (# of Units)",
+					"nav": "Sample Size",
+					"type": "string",
+					"format": "text"
+				},
+				{
+					"qid": "q20",
+					"title": "C6 Was a power analysis conducted prior to data collection?",
+					"nav": "Power analysis conducted prior to data collection?",
+					"type": "choose",
+					"format": "singleselect",
+					"options": [
+						"N/A",
+						"No",
+						"Yes",
+						"Other (describe in text box below)"
+					]
+				},
+				{
+					"qid": "q21",
+					"title": "Other",
+					"format": "textarea",
+					"required": false
+				},
+				{
+					"qid": "q22",
+					"title": "C7 Has this research received Institutional Review Board (IRB) or ethics committee approval?",
+					"nav": "Review Board (IRB) or ethics committee approval?",
+					"type": "choose",
+					"format": "singleselect",
+					"options": [
+						"N/A",
+						"No",
+						"Yes",
+						"Other (describe in text box below)"
+					]
+				},
+				{
+					"qid": "q23",
+					"title": "Other",
+					"format": "textarea",
+					"required": false
+				},
+				{
+					"qid": "q24",
+					"title": "C8 IRB Number",
+					"nav": "IRB Number",
+					"type": "string",
+					"format": "text"
+				},
+				{
+					"qid": "q25",
+					"title": "C9 Date of IRB Approval",
+					"nav": "IRB Number",
+					"type": "string",
+					"format": "text"
+				},
+				{
+					"qid": "q26",
+					"title": "C10 Will the intervention be implemented by the researcher or a third party? If a third party, please provide the name.",
+					"nav": "Review Board (IRB) or ethics committee approval?",
+					"type": "choose",
+					"format": "singleselect",
+					"options": [
+						"Researchers",
+						"Other (describe in text box below)"
+					]
+				},
+				{
+					"qid": "q27",
+					"title": "Other",
+					"format": "textarea",
+					"required": false
+				},
+				{
+					"qid": "q28",
+					"title": "C11 Did any of the research team receive remuneration from the implementing agency for taking part in this research?",
+					"nav": "Remuneration?",
+					"type": "choose",
+					"format": "singleselect",
+					"options": [
+						"N/A",
+						"Yes",
+						"No",
+						"Other (describe in text box below)"
+					]
+				},
+				{
+					"qid": "q29",
+					"title": "Other",
+					"format": "textarea",
+					"required": false
+				},
+				{
+					"qid": "q30",
+					"title": "C12 If relevant, is there an advance agreement with the implementation group that all results can be published?",
+					"nav": "is there an advance agreement with the implementation group that all results can be published?",
+					"type": "choose",
+					"format": "singleselect",
+					"options": [
+						"N/A",
+						"Yes",
+						"No",
+						"Other (describe in text box below)"
+					]
+				},
+				{
+					"qid": "q31",
+					"title": "Other",
+					"format": "textarea",
+					"required": false
+				},
+				{
+					"qid": "q32",
+					"title": "C13 JEL classification(s)",
+					"nav": "JEL classification(s)",
+					"type": "string",
+					"format": "text",
+					"description": "Please provide alphanumeric code(s). If multiple classifications, separate by commas (e.g. D31, C19, F22)"
+				}
+			]
+		},
+		{
+			"id": "page3",
+			"title": "Keywords and Data",
+			"questions": [{
+				"qid": "q33",
+				"nav": "Keywords",
+				"type": "choose",
+				"format": "multiselect",
+				"title": "Keywords for Methodology",
+				"description": "Choose one or more categories that describe your study methodology.",
+				"options": [
+					"Experimental Design",
+					"Field Experiments",
+					"Lab Experiments",
+					"Mixed Method",
+					"Statistics",
+					"Survey Methodology"
+				]
+			}, {
+				"qid": "q34",
+				"nav": "Keywords",
+				"type": "choose",
+				"format": "multiselect",
+				"title": "Keywords for Policy",
+				"description": "Choose one or more policy categories.",
+				"options": [
+					"Conflict and Violence",
+					"Corruption",
+					"Development",
+					"Elections",
+					"Ethnic Politics",
+					"Gender",
+					"Governance"
+				]
+			}, {
+				"qid": "q35",
+				"title": "Certification",
+				"nav": "Certification",
+				"type": "choose",
+				"format": "singleselect",
+				"description": "By submitting this form and accompanying documents with EGAP, I confirm that I have rights to put this information in the public domain and I understand that this information will remain on the EGAP registry in perpetuity, regardless of whether the research is subsequently implemented or not.",
+				"options": [
+					"Agree"
+				],
+				"required": true
+			}, {
+				"qid": "q36",
+				"title": "Confirmation",
+				"nav": "Confirmation",
+				"type": "choose",
+				"format": "singleselect",
+				"description": "You should receive a confirmation of your registration within three business days. Your registration is considered complete only when confirmation is received. If you do not receive confirmation within three business days please contact paps@egap.org.",
+				"options": [
+					"Agree"
+				],
+				"required": true
+			}, {
+				"qid": "q37",
+				"nav": "Additional Documentation",
+				"title": "Additional Documentation",
+				"type": "osf-upload",
+				"format": "osf-upload-open",
+				"description": "Please upload your pre-analysis plan, along with any other supporting documents, such as survey instrument, research protocol, any data, etc."
+			}, {
+				"qid": "q38",
+				"nav": "Anonymous Documentation",
+				"title": "Anonymous Documentation",
+				"type": "osf-upload",
+				"format": "osf-upload-open",
+				"description": "Please upload your anonymized pre-analysis plan, along with any other supporting documents, such as survey instrument, research protocol, any data, etc."
+			}]
+		}
+	]
+}
diff --git a/scripts/EGAP/egap_workflow.ipynb b/scripts/EGAP/egap_workflow.ipynb
new file mode 100644
index 00000000000..750bb02b152
--- /dev/null
+++ b/scripts/EGAP/egap_workflow.ipynb
@@ -0,0 +1,47 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from create_EGAP_json import create_file_tree_and_json\n",
+    "\n",
+    "author_source = '/Users/bgeiger/Desktop/EGAP/20190821_author_emails.csv'\n",
+    "registry_source = '/Users/bgeiger/Desktop/EGAP/20191014_OSF_database.csv'\n",
+    "target_directory= '/Users/bgeiger/Desktop/EGAP/output/'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "create_file_tree_and_json(author_source, registry_source, target_directory)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.1"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

From 8a454a3b4766da03aed12b6a9f0f0ff7b7619b02 Mon Sep 17 00:00:00 2001
From: John Tordoff <Johnetordoff@users.noreply.github.com>
Date: Wed, 30 Oct 2019 13:26:11 -0400
Subject: [PATCH 4/9] [ENG-1108] Add script for importing files into EGAP file
 structure (#9178)

## Purpose

Take the 52 pick-up that is the EGAP data dump and make it into the freshly minted deck of our file import structure.

## Changes

1. Given a directory of files (a.ka. files_dir), a directory of metadata generated from the egap spreadsheets (a.k.a. metadata_dir), and an optional list of project ids, copy the appropriate files from the files directory into the appropriate locations in the directory of metadata generated from the egap spreadsheets.

* The location to copy to (a.k.a. destination_dir) is "<metadata_dir>/<project_id>/data/"
* Do not copy the files if they do not start with the project id
* If the files contain the word "anonymous" (case insensitive), copy to a "<destination_dir>/anonymous" directory
* If the files contain the phrase "PAP_anon" (case insensitive), copy to the "<destination_dir>/anonymous" directory
* If the files are being copied but aren't to be copied to the "<destination_dir>/anonymous" directory, copy them to a "<destination_dir>/nonanonymous" directory
* Ensure that the actions listed above can also be activated by calling a top-level function with the appropriate args or kwargs
* If a list of project_ids is not specified when the script is run, you can default to the list of folder names contained inside the metadata_dir.
* A list of ids will always be provided when called by an external script such as a Jupyter Notebook.

2. Have an audit function that will list which files above will not be copied. Save that information to a file called "ignoring.txt". Use the same code in the audit that you use in the script to determine which files will not be copied.

3. Have tests to verify that regular, anonymous, and ignored files will be handled appropriately.

## QA Notes

Determine criteria for the types of files you'd like tested, so we can find directories that match that criteria. Test using the Jupyter Notebook.

## Documentation

Code comments only

## Side Effects

None that I know of

## Ticket

https://openscience.atlassian.net/browse/ENG-1108
---
 scripts/EGAP/files_to_import_structure.py     | 140 ++++++++++++++++++
 .../test_nonanonymous/20151016AA_FORM.pdf     | Bin
 .../data/test_nonanonymous/20151016AA_PAP.pdf | Bin
 .../data/test_nonanonymous/justafile.pdf      | Bin
 .../tests/test_files_to_import_structure.py   |  55 +++++++
 5 files changed, 195 insertions(+)
 create mode 100644 scripts/EGAP/files_to_import_structure.py
 create mode 100644 scripts/tests/test_files/20151016AA/data/test_nonanonymous/20151016AA_FORM.pdf
 create mode 100644 scripts/tests/test_files/20151016AA/data/test_nonanonymous/20151016AA_PAP.pdf
 create mode 100644 scripts/tests/test_files/20151016AA/data/test_nonanonymous/justafile.pdf
 create mode 100644 scripts/tests/test_files_to_import_structure.py

diff --git a/scripts/EGAP/files_to_import_structure.py b/scripts/EGAP/files_to_import_structure.py
new file mode 100644
index 00000000000..89cbe3934ce
--- /dev/null
+++ b/scripts/EGAP/files_to_import_structure.py
@@ -0,0 +1,140 @@
+import os
+import re
+import shutil
+import argparse
+from distutils.dir_util import copy_tree
+
+from nose.tools import assert_equal
+
+# This takes the item id from the path of the project directory for example '20121001AA Findley' -> '20121001AA'
+get_item_id = lambda _path: _path.split(os.sep)[-1].split(' ')[0]
+
+
+# Check if file name starts with EGAP id for example '20121001AA_PAP.pdf'
+def check_id(root, item):
+    project_id = get_item_id(root.split('/')[-3])
+    return item.startswith(project_id)
+
+
+# Check if file follows anonymous naming convention
+check_anon = lambda item: 'pap_anon' in item.lower() or 'anonymous' in item.lower()
+
+
+def action_files_by_name(root, source, item_name):
+    """
+    Pick out anonymous and create new folder to move them into it, remove ones that don't follow id naming convention.
+    :param root:
+    :param source:
+    :param item_name:
+    :return:
+    """
+    if not check_id(root, item_name):
+        path = os.path.join(root, item_name)
+        os.remove(path)
+        return
+
+    if check_anon(item_name):
+        destination_parent = os.path.join('/'.join(root.split('/')[:-1]), 'anonymous')
+
+        if not os.path.exists(destination_parent):
+            os.mkdir(destination_parent)
+
+        destination = os.path.join(destination_parent, item_name)
+        shutil.move(source, destination)
+
+
+def audit_files(source):
+    including = open('including.txt', 'w+')
+    ignoring = open('ignoring.txt', 'w+')
+    for root, dir, files in os.walk(source):
+        for item in files:
+            name = os.path.join(root.split('/')[-1], item) # get file/folder name after slash
+            if not check_id(root, name):
+                ignoring.writelines(name + '\r')
+            else:
+                including.writelines(name + '\r')
+
+    ignoring.close()
+    including.close()
+
+    projects = set(os.listdir(source))
+    project_ids = set([get_item_id(folders) for folders in list(projects)])
+
+    # check for duplicate ids
+    assert_equal(len(projects), len(project_ids))
+
+
+def main(files_dir, metadata_dir, id_list=None):
+    """
+    This is a script for our EGAP partnership that converts the EGAP provided dump of files into a directory structure
+    we can easily import into the OSF. Some files in the dump are anonymous and need to be sorted into a special folder
+    some don't follow an id naming convention and should be ignored and not imported.
+
+    This script copies whole file tree for a project to preserve file hierarchy then picks out anonymous files and moves
+    them to the anonymous folder and delete those that don't follow the naming convention.
+
+    This script can be safely removed once all EGAP registrations have been imported.
+
+    :param files_dir: the source path we're picking files out of
+    :param metadata_dir: a pre-made directory structure for importing projects that we are packing files into.
+    :return:
+    """
+    project_dirs = os.listdir(files_dir)
+    if id_list:
+        project_dirs = [project for project in project_dirs if get_item_id(project) in id_list]
+
+    # Copy whole tree to preserve file hierarchy then
+    for item in project_dirs:
+        item_id = get_item_id(item)
+        source = os.path.join(files_dir, item)
+        destination = os.path.join(metadata_dir, item_id, 'data', 'nonanonymous')
+        if os.path.isdir(source):
+            copy_tree(source, destination)
+
+    for root, dir, files in os.walk(metadata_dir):
+        for item in files:
+            if item not in ('project.json', 'registration-schema.json'):
+                source = os.path.join(root, item)
+                action_files_by_name(root, source, item)
+
+    # Check All anon files in /anonymous/ directory
+    for root, dir, files in os.walk(metadata_dir):
+        for item in files:
+            if item not in ('project.json', 'registration-schema.json'):
+                if check_anon(item):
+                    assert '/anonymous' in root
+                else:
+                    assert '/nonanonymous' in root
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '-source',
+        '--source',
+        help='This should be the directory for the EGAP data dump, traditionally called "EGAP_REGISTRY_staging/3 Registrations/"'
+    )
+    parser.add_argument(
+        '-destination',
+        '--destination',
+        help='This should be the directory of the import file structure containing the bags of data.'
+    )
+    parser.add_argument(
+        '-list',
+        '--list',
+        help='This is a list of ids to import into a the new metadata directory.'
+    )
+    parser.add_argument(
+        '-audit',
+        '--audit',
+        help='This includes all files that don\'t follow the "<id>_PAP" naming convention.'
+    )
+
+    args = parser.parse_args()
+    source = args.source
+    destination = args.destination
+    audit = args.audit
+    if audit:
+        audit_files(source)
+    else:
+        main(source, destination)
diff --git a/scripts/tests/test_files/20151016AA/data/test_nonanonymous/20151016AA_FORM.pdf b/scripts/tests/test_files/20151016AA/data/test_nonanonymous/20151016AA_FORM.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/scripts/tests/test_files/20151016AA/data/test_nonanonymous/20151016AA_PAP.pdf b/scripts/tests/test_files/20151016AA/data/test_nonanonymous/20151016AA_PAP.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/scripts/tests/test_files/20151016AA/data/test_nonanonymous/justafile.pdf b/scripts/tests/test_files/20151016AA/data/test_nonanonymous/justafile.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/scripts/tests/test_files_to_import_structure.py b/scripts/tests/test_files_to_import_structure.py
new file mode 100644
index 00000000000..0778df7f82f
--- /dev/null
+++ b/scripts/tests/test_files_to_import_structure.py
@@ -0,0 +1,55 @@
+# -*- coding: utf-8 -*-
+import mock
+from tests.base import OsfTestCase
+from scripts.EGAP.files_to_import_structure import action_files_by_name
+
+
+class TestEGAPFilesToImportStructure(OsfTestCase):
+
+    @mock.patch('scripts.EGAP.files_to_import_structure.os.mkdir')
+    @mock.patch('scripts.EGAP.files_to_import_structure.shutil.move')
+    def test_doesnt_move_nonanon_files(self, mock_move, mock_mkdir):
+        action_files_by_name(
+            'scripts/tests/test_files/20151016AA/data/datatest_nonanonymous',
+            'scripts/tests/test_files/20151016AA/data/test_nonanonymous/20151016AA_PAP.pdf',
+            '20151016AA_PAP.pdf'
+        )
+        assert not mock_mkdir.called
+        assert not mock_move.called
+
+    @mock.patch('scripts.EGAP.files_to_import_structure.os.mkdir')
+    @mock.patch('scripts.EGAP.files_to_import_structure.shutil.move')
+    def test_moves_anon_files(self, mock_move, mock_mkdir):
+        action_files_by_name(
+            'scripts/tests/test_files/20151016AA/data/test_nonanonymous',
+            'scripts/tests/test_files/20151016AA/data/test_nonanonymous/20151016AA_anonymous.pdf',
+            '20151016AA_anonymous.pdf'
+        )
+
+        mock_mkdir.assert_called_with('scripts/tests/test_files/20151016AA/data/anonymous')
+
+        mock_move.assert_called_with(
+            'scripts/tests/test_files/20151016AA/data/test_nonanonymous/20151016AA_anonymous.pdf',
+            'scripts/tests/test_files/20151016AA/data/anonymous/20151016AA_anonymous.pdf'
+        )
+
+    @mock.patch('scripts.EGAP.files_to_import_structure.os.remove')
+    def test_removes_no_id(self, mock_remove):
+        action_files_by_name(
+            'scripts/tests/test_files/20151016AA/data/test_nonanonymous',
+            'scripts/tests/test_files/20151016AA/data/test_nonanonymous/justafile.pdf',
+            'justafile.pdf'
+        )
+
+        mock_remove.assert_called_with('scripts/tests/test_files/20151016AA/data/test_nonanonymous/justafile.pdf')
+
+    @mock.patch('scripts.EGAP.files_to_import_structure.os.remove')
+    def test_removes_form(self, mock_remove):
+
+        action_files_by_name(
+            'scripts/tests/test_files/20151016AA/data/test_nonanonymous',
+            'scripts/tests/test_files/20151016AA/data/test_nonanonymous/20151016AA_FORM.pdf',
+            '20151016AA_FORM.pdf'
+        )
+
+        mock_remove.assert_called_with('scripts/tests/test_files/20151016AA/data/test_nonanonymous/20151016AA_FORM.pdf')

From 256b97c35ebfc32073375fd3bbd800599786ee20 Mon Sep 17 00:00:00 2001
From: Yuhuai Liu <yuhuai@cos.io>
Date: Mon, 4 Nov 2019 10:44:57 -0500
Subject: [PATCH 5/9] Change submittingAuthor to Author (#9190)

## Purpose

All the remaining author should be `Author` instead of `submittingAuthor`.
---
 osf/external/chronos.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/osf/external/chronos.py b/osf/external/chronos.py
index caf48790d23..64f19f7b51e 100644
--- a/osf/external/chronos.py
+++ b/osf/external/chronos.py
@@ -105,7 +105,7 @@ def serialize_author(cls, contributor):
         if contributor._order == 0:
             contribution = 'firstAuthor'
         else:
-            contribution = 'submittingAuthor'
+            contribution = 'Author'
         ret.update({
             'CONTRIBUTION': contribution,
             'ORGANIZATION': '',

From dc0e0852f801120081ff784b646ff5b19f3b741f Mon Sep 17 00:00:00 2001
From: John Tordoff <Johnetordoff@users.noreply.github.com>
Date: Mon, 4 Nov 2019 15:35:55 -0500
Subject: [PATCH 6/9] [ENG-897] EGAP Ingester (#9183)

## Purpose

Turn those wonderful local file structures in to EGAP projects!

## Changes

Adds a management command that creates a node with proper contributor and uploads files maintaining directory hierarchy with tests.

## QA Notes
To setup this you must:
1) log in and create a private project
2) upload a zip of file's using the EGAP import structure to the project
3) run `python manage.py import_EGAP -c=<username> -id=<guid>`

- This is a low risk change that doesn't involve a migration.
- QA will be able to verify this with an Ipython notebook. Once that's set-up they should be able to browse the registrations and see they were imported properly with the correct permissions and bibliographic status.
- This is unlikely to effect any other portion of the site in a substantive way.

This includes some unittests but obviously can easily be snagged on irregular data, if you find any please report it to me.

## Documentation

Code comments, JIRA

## Side Effects

None that I know of.

## Ticket

https://openscience.atlassian.net/browse/ENG-897
---
 egap_assets.zip                               | Bin 0 -> 15691 bytes
 osf/management/commands/import_EGAP.py        | 189 ++++++++++++++++
 .../management_commands/test_EGAP_import.py   | 210 ++++++++++++++++++
 .../20120220AA/data/nonanonymous/test-1.txt   |   0
 .../data/nonanonymous/test_folder/test-2.txt  |   0
 .../EGAP/20120220AA/project.json              |   1 +
 .../EGAP/20120220AA/registration-schema.json  |   1 +
 .../test_directory/EGAP/test-egap.zip         | Bin 0 -> 15691 bytes
 8 files changed, 401 insertions(+)
 create mode 100644 egap_assets.zip
 create mode 100644 osf/management/commands/import_EGAP.py
 create mode 100644 osf_tests/management_commands/test_EGAP_import.py
 create mode 100644 osf_tests/management_commands/test_directory/EGAP/20120220AA/data/nonanonymous/test-1.txt
 create mode 100644 osf_tests/management_commands/test_directory/EGAP/20120220AA/data/nonanonymous/test_folder/test-2.txt
 create mode 100644 osf_tests/management_commands/test_directory/EGAP/20120220AA/project.json
 create mode 100644 osf_tests/management_commands/test_directory/EGAP/20120220AA/registration-schema.json
 create mode 100644 osf_tests/management_commands/test_directory/EGAP/test-egap.zip

diff --git a/egap_assets.zip b/egap_assets.zip
new file mode 100644
index 0000000000000000000000000000000000000000..85b13477ac26a1490e5c535d41920d5ca95cb46b
GIT binary patch
literal 15691
zcmeHucRbbo`@g-5Y>Mo?CCbRi%FK>r@6EB2A~^_UXYUnBgb+$1dt~n<S;-8Me9uXz
z&iiz@@6UhTkKd2SL4UlT@9Vl=&)2n6S3<|YLxFx3YhP3Q@yE{}_$Wju0(|`Ze1d$!
zQc}FydITthMOB8lnN^0HsH7<BkOxUoa3J4^50U~O<hiJ6qzQ6zwSXe{K%RynEX9``
zzs*M~yIe@X#6v^#d>i$<l*|VHH`L2=#8~djdHD0;^qB!97nc>UsYY*&ywcb~G48J&
zMx~VuvA5@M++piZZ#w4gR$Dgv*e<~>i~8h%R|`J<9U@b2J)Nr5E&e-1_~s&~(P-vl
z9NI;zDLYm{>5M6*;qLc%=At%#{OxT`l|(q*sq;4VIOy9#Y~c1uohvuDzn@;I+ozEJ
zc2?)p{aw5+u(5vk8=Et#+c$8XzXrXxJR{s4sXAPZgTqfjsIG*K(`Ut~*Z{E^1|`Z5
zqa0Y1=qk{fxDcZl8L3FgsA=l`vL)1>Fk2#j3~I|?k0Ch?cm(99arzJh9-~7K#>|+^
z%*xo*ctu57XN6EFSltgTf)bsT=p<^%Nuq#lGRbxn%_+K<e&{0LI~Y<4WPFBmP}Q+<
z@Pl892tiEY1^N8u#|>2-0I?{5{BU-4vb8V+@z}aKIYKFN1Gm@^`hgn8;vT<U10PY~
zerv`#8}mE|x@Z{z5H2b9NCrs6Ac&hmN$z}kCH;PFlweHv{5#)e-!a)Ro)r!H=%L#Y
zi?KJZ#g?)e<2G7y=IHr#b39FMEg$>jan3HqqByC!Gw9+4o>%YrMDGn{T9vI32bFA{
z2^`BqEpPFfYg=1=XqyruleOxvMLLN2ZOJYSOVxTb^`_(r$~)04%d4u>4MiEv#_=xw
z5f`E}=3lJE#*5YDq-j+zk8spQhbP?1P}vY|XjNUc{HiQ8U~kzmG2B`IT|FRA6??OJ
zB<FH>ipq4BU6*?AoLJ~0FVu9`Ozm!cgqZCL>QSa+IP@@&vf|<AxoS#>wOTAdgn3Fv
z`gXc<#62oW`_SAwC+K++lhX<x=&46U7wGCG>czzCMkPFGRnK=aHa4+#NQ{cNj7Utj
zU5dLBlNe`Y;$ZpVleMGuyL`JTd)o{<Q~S@^i{>`A`HQADHt+1`Kh7<f&b1#<q&suk
zBN(D*KY${>2ozxiQpDB5%Ek@kY67xxa^!Y1v$k+B`9+tT5UM~wP`VVwiaWjHBLeTY
zwg}>gg0n4T_&`zx>9*SRU^~}imq|p26t(YeFALeXXo#AY@~*kf4sJyXindZEH6-|&
z4M&ZZ$T|iO;pi0<-qkW2e=|NYywOWpLq%alGgH>EL^--oC|h%~ol%uEkg?a<oc!MF
zsb(2IhU(WEg;yI~%TZ;#O~!k2yEzyQKHzCr49rv5=MUtrDw+&>z7BjB#TpuTKZfUd
z6RAo7Ug}7J1m*eR6ZTKIO+q=uyLrtqm|s=UnK@&0^+ovWdP=x08e|Bh?Sg0Xb#bn$
zdWlsn+~E&4@2m>Ce=Tvc<+>y~@wlc{Nc!0`a)C1~<m3y)vG>#FBX;k8Q0<u({d!`G
zuo)ZoK|uaW1`};V<TbL|Q#d+W^BW^y=F7WA@4t1}U>*B-TdmD=W=F+=>ah-(@cJ+(
zP1@<6nI9faak!>J2ExO7EO;&=o)OQXT>Q7k^*8epc}ye%^3vP{Wb(_I!@}Bc$A=gg
zsgOJkaKcas7|c?<=y3>_FIvgmRZy?Y-XN>=4}6Dl)lcmb9cE^_Qe%|}b%@wEQlbpJ
z+um;%$cD?Kn--6uoLpW%@eaj2xaovi3*|B55mPNY`^#D`FLPZ*7e-JQ$qB#sd|Df~
zt>wB~UVc>>2NErvObj}9#_PaD@9`gYVXI+Rhn<yBo7;HY)B_2MR1WP*b1|LP@;>Lg
zEzS^fPp_Da4U1&NXNl8D?3mMk2VaXR+}$VToxMf>n$T?C{4CY<d7?A<K1Ic*oyom9
zD~+;rAr}aODrdYQ&zJF_OM~jEpx&;GP26l4;sK71nNWKn3w{8z|8oRnItm6M48otl
z;q?3;!{7mz5W^r15;izGIhsH|-VRRgZoh~H3x>amg$TuZw1)7gurv+^dbpB*OKd>A
zMbw8V%VPxP1x!EKHTv)AM+UDSkJ0a>@4uwl0p<M=!urpW0;&>xN#XZ1eE%jmVyyX@
zGQu82Om^UN0PP4BwFW){@c!M{^#LJKKASvpN@5C0Iv1%^V=NBbYnPcncgJr|Dpoi2
zE$(i6TkLYlo%Sw@C?5)7n~|GvsK^<6Mg76iD9#k~wwC2N?wqlH_fGv~5p}^5LjIoO
zr_R>1_eLJ3-<rC{Na(F$z!Z&|$YQ0P7LfTx=c<nLda777qZcKbEX&n=2jPZRVhVi~
zrBz?svVefB(@x*!?&G_G8;|Mq4jFRR^-_`~*<qib;oh^s?DBXWH|2a$kUrXTWkhQH
z3wm`KWi<}3TNsu^4Sk^hHilFE7HUy{nmNJd`SE3tJi&d%#M7}!3+5>dvMyh-p)OT;
zt&MI2;!?eTCXoNGo`6{t#;rfSDdJHerl2qc9;2g9<?p|Sh+O@dg2L20DyM<+7slZw
zJwvBn1=2lEU2imY!>G9X<7=dB`LQ1UIb3h?R{2VtqEJW8ESuO-S0}f|5?qoiF_Y^a
z>FDEzyiVA4@2uKV-E-fV|4=^{Q+IEmz?w2m?rDw%ZMaQLLg|NAQL!6aw3A<-KI6FY
z73`Fz`gwV{_GI|P{@9#X>FG~_R7CUJ+h^r==flDAVj^Q7rlwe4Ok^g`*LB!w&*b(Q
ziR6Wr>Llb@ttG9EXl=MkJ#BQ&;aI9EI8}>tUt8)yZg*Abh|^rGkWwMhS`iOkx%u^1
zWV~g0f|OUpoo?q*JvyuUoZfawZN*EV@x^@BussfgII2$745xYaH(TYd@WI&JJkHSe
zGV0z%%T6-!!VP)N_>o807$V7osa`C-KD|L(*=*-C=d*fS%>C}Cp85WfUS#w2YT^TF
z1*VFM5xq4qm#EHJgA(RgHioBky<T2ct+Bc?XZizXI`ABA+vE+>+%q`z723&q>2kVL
zo~d<Odbac@&j(t{Gw;T>NcC&fNj+O~y293~DDK9>v-rwB-O4c1iqDYSru^YC2GOB@
z``X*bS4d}>X=vQ<m%0Z|*C{e|(S2j)ic=t|te?I?LU3~k3pCp}Q}~=-yiJy`H`eMo
z)=he)KD~H?0i~yw1`j!=$(^?FYyJ0824Y=7n1iXFbHozoGnv5YT%W9$r5y`U70wz*
zqL1Az&S2ug;mX15e!bwy*dVxVEikF4$7x6X`bvCL)HK&NWfl%g@kh-2GUPV{71atJ
zSGBscxL<9`0=f8;cIeOg3q(Z?Qj6o3F&oY%-4K*3c$bZn{LUnY3FPzIpsYBb_o7B;
zg0fMN`>ApEh!-8W`r8E6?tGo+TF{!y!sNsZu7Iz42);;1YYnUX%r@sUJIMfUqcgw9
zl5$PBp>L!K1y!-qSXMH3*_`28Io;zL=LseIiX8OKn1T$o@X9z0P%N2BD)o$&gZD7X
zf(k*xsaq5N`RmnBatS{Bh1O5a=CZZ-_ZYdm`cIPPW4t)w*W^xB)=JL*{eskYI}-Ee
z%*tCyDrbHBjCd8NDHXo)n{%9L{51PAdy;Y6nyS0-)D|kXmJZi_22WuDaAH{K4Sxax
zQc9Q0&HHxUIU~mx5=L{4!x*%;KlpZXmXptAgu4?v)N@$j=ki=H@-ei1%aY<n&3XYF
zOAti%@i?tCft9Tc>r&-Khq>Y#&*)!`k*ZP7l!t9_37%EkSpuuZ+|z$QP!}`R9EvHg
zEuVZI_maBKFvs4<c=GEV<7{^$lkKCjNjl%;8}+nJ@}kH4a|G+ID4?2FI^H$8j$v)#
z<tgK+tUFSy@HuNPDS1f0-svQZKk1sw$=NCb3Tm93*XyP-D*b24T1ka)qS?lYbX5oL
z39UDMy#!+5PLGT`Hu|P2_Yvc2Gq=pFVs@^88*B52QCfS>5oQx*3r(-^rM}bakrty`
z+%HChU6Q2APe-U*M=10#glU}{<@w<8CW?XS3deIBw;@AmVP)ZZare^h>Y+SuAK^;-
zkMiu~^Am}q&z39P->>rvjPMWG49btzb4HTr)Emrx3Yu`F)yyCnB|lF2X`FYHI`2#s
zN7<XwIqi~(`giA*_Q-TICuhuv<Fl(7OE30$uuK~>E=17;lsi}U(Z1=Zh+d|?)@?7v
zLN%+vkQx0kW4kjZbmG!dHGhEMxFun@t~D1P)005w?x9;=zJ5-N{oKogyeS-=?&f4W
zRV3)SFE5#W|D-CEZt+$YlyPZ{WjN)n+t;!Ts-CoPb*i2B;iXE)6GhTeP4SjFX;Wfi
zmY&T%!MfgS@GU#HAuf0@LcO_~-TYcxQPEj<O;DlqRTf?othh&4^T{VlJh}Q>rqp`5
zywp_ra|H|+(+k(9Z}A(pU$71gD3u6)GC*MdTpcH>`=;~;fu5kawtHnuTH3~%Tz~kO
zQdRQ$176Ia%|%-eVQuG|`=Y8GV6HMw=K0#qy^w~OCCyAhNiLsvbA(?mulO)K+#V3S
z<;<PkKuWk0gB94{dYi|v&343?g-O@~q$w{PaNhj-N@(Ex2OZCc6N|AX%VGBjyFNc_
zP`U2J<#A2?n72cj(a@6z!K^_P9s=javW35cYbf?Me^l7AOZ%h@(CV5DzQRTfL5LML
zEEe$?{o82tH;3JW1O+j@_*r4YR0WiJur?1gPelIo&zcJGG<@~*tET!B3bA2}SbPF7
z|9!fISegM~c#PnMgMq?i`<oB%px2Cec!;GLOj%%Q_TSMD@k0Jtn*G!ec66{T@NYi4
zgPZf{@M(_#+_VS%k7-XM!h>+r9_WLBein4vBk&X9|EXyYvat=kS^q4v5Y38gZ1cmu
ze~xWn7CxMG|6{-E;Bq52nc&7YP(cu@Js(IB1pWLN+eD}+q4Pug@DXm$J1VeE)C}w}
zF~x@_E}D<Dd&sYsFkP`>-<?#c2-YIRo$~$YE#pd_g6~qm7nLvB+^8J)w2Dh9UfNqz
zk)4H3&mvB<Dq?*`z{<xeYu4~|3SmfHYM`v3huA7>y|_Wnd<<&~-6DZ9S_gCSu7W%P
z=EG|QB-^pe-2ygVss_%RmvpTsmzaw^^S-~>4aTj{m33Ot-(!2lTtt|f1U5@rLfa>`
z*FTH%Z0>T7yWt6e(Y^GYh;`HDFv^LydLQ!N+;vcT@=8J-W$<Ndf~w$%II4lylgOIJ
z=FUto#oK9qtta12WxexPJ5h4G7>kU3KQ%w4KS?jn>1(|(#pKw#mm!lDrb!QWf>!9)
z3-$7eA=UY-pA|ZCGb7g5(E01Z!5?Mu!_WS2Y2zW%32wv#Rqv>b27V&gab?tXRE~Hr
zmR<acb__#E9n%Es%=_49wkh0g5BnUWg6O+3axvOp3ZlOSaqQRS`G7GbFI{`9wriY#
z(zDf@=kxMHmqe#ga;?s-)FMzXiA2h{PU@U}ZYSPgg}K?wj>+#Yg?#T`Yil&B+!47Y
z5xKkc>C^pvZjhUhkjI^rq@-Ti)HI&8_n^oJd7(FaN}IoEy_Zp0z#hlOYvt}Ie7r=R
z<sj9n_hmqCO-FwEkz=2a?FTcjqMP~&QQH|go<(mL!ZUKUF8D4qpR6cKZWPBC$GUyG
zrXr)4mN==+%#ZKO%EyJB9ow7kZazNB(z&JkX9!=X#vF67T#$E|%D*0Tp?)}FC#dPx
zLft@Je`QUvN@4%j$RhFnrxOFJ#jCUtk3UXcHN4euq5$80Z{bUS|B3UP#%oS^@;JVj
z>YJInd!J@y>0XSyr+-~{2KQ_uN>X`<&=Wx{)vUpS0Zxq+PYWaUhx+Tdw;tq&mT0Sw
z#Mw%jYY)y9yb%r!w`CuA3<@t)Mx~K0N&?$s`jiZupe{HstuSA@L5g<Abji@DA1{ZK
zDwaOJ6^}YgvnTqVYkd=*RhDOX0bNg^(ih*;+OrIGmNh(>9c40Z>V84<*TFA}w46ED
zFvbhstDO}u^$@>9{jO-(IUUDyN`%Z|*8K!C&e?GTj3(U&*1S43mz^d#y#~una&4cn
z%q7!P-qE=@n^EP#UVn0?rj$n_Zkj0L>Mn81#+}&+y(M9s#%Js=0z*CuUKt|t9dTXL
zz&&B-ic7xTlhRQ689nGmM5Iz<O%MHgvWCvn`oU@;a?Bk8YAm&!h*XJ9EFRw7+{^78
zJ+H$#RM@B~uHPtmj7ngJKPx(`iCL!q#E5M-XNQdZR4c_!CT&OuX#oD2{zaCVi@emP
zOk640B9ERX4SbObOf>e&quGf2_MV`BCFsSP@0-c?h(SDs?XWPi<luztP^EHZbL`3P
zW5ENWPSR~x7SJhrF)-0CYM?T7eYU>2vgGp^>~D0nKZo;rlS>GQG_ue^cI$Sx7*<Zu
zYo1WXxemIH^h_*V#cq58(b>rP{DLO#%9t0Y``3NGZFJHwZY+Qty{o#o(b(klPB6<U
zW}6QLiP#!NFE%Kf<4uv$%B6C$b2mTn;4TVeK82U&qj72SjN={?EvC(rDpWRnfjQ%j
zh0){LCB|X0E<JbHp7Nffw9cLy9{+Og!O0=2m{`@;=lxVUE_!yhnUcm?8vQ4Ui2CU3
z9D-%6?a;#*p455?a1!KF&Nd0W3M9;!R^pQSW*#|59;S%8tyL}UobE2gqJMW+($hYd
z*?3aNTC4v=nJmk`m8Wpf34F}q*8#yK<Qm^oox$r7udaFJDK*nGO1(|4QpJ~ge@~xE
ze-=wT%7V%BeK_ac307f>YA-(n-v{Qapo`j)brNKlG>dJxr_!3nNg8`@m-}1Gkv4^g
zV{7$L9_Q!29M>U)C3ehP!O;A@RJwRBS?S%&qwcD5G)jc%cA40^*|kjkEqTf58tdyy
z!>y87=>plNAs2$bm+%FxD4S*&J(`l9aLWb9KQI=|fKW-;;2B>q&8B2roH?s>D_0A#
zjHL!U6-Po#!sT9dJyW_FkyOTeM$gRYJIjxEzjNWeKg@5`?HZCNeW`PN{o-&2;T4@X
zV-Xm4m(F>e(!p9iWs_6OT&$nUPwY5H5ysadaohM?{F9V~rhA4h$D1pyG<`(+CHZH(
z2}Q8$Z%*R4Kil#5x-J6_`oQuG)XY8=ds{at@p-sMV0F!lF>VoMZm^2QdIUEpy;e5h
zP35L+IMMS|PXEdbugEPQoz!8kj0SYsI_XGWa8JBcYuL-!MGJbfAP;wi5KKZpw6r~i
zivI5=xf2;l`vC)Tmo39OHl}W>;6%I#RO#B1lz-;?5=ZK;^W@iZOM+d{;GA3P79D|M
zsZ-U7?-OS7-B<I*&>t)qrDC%?+~>_GesZ}f%)4a_MQ)-tzNH%vpNsISF5O7(rdxi&
zzJ{k##m+~1tIqAQ<5NyDp9gIM7YxqR-Iz^s?O2pA^nS`<6d%D3Ci%qP=(Ze14!Sh9
z7R}XfdMgyow%q2G6n`U*{sT7<zeOmWN3CU+5*jxequx+SY8EjH`=cq#YkowctzoPQ
zns;!Nmy)#@h)M*bS<HJmmo%R*QFE_4CT{kUC$!&8s$5D(Jw9;fvS2oC+vCS$0T37S
zg{*kvxG!`23zfE{Q9`F|!!v9-LN!lJRGw8CKR>sZka`N|`TISiM@+n(tf5UzDcIia
z5ecZzBMaMQgvA6<FwjkelZ+`(I$3<GJa)xatQl)<sH9PXo4&p@VZ}7w!&gUx`t#Wk
zb&ba}eSS?eD9nKaExlsKwgvgJf&*eAOBJdngc$;lKd7&I@v6y(lk$mGBrd7dXVUS!
zCBdM-X@$)jK_PLO={iyT`}X(o)RzPa*aa8jTC_~96;Zo=u>&IJb3zCL9{AOFh%uUA
zGMC3QNGJ?Whdhv)<ogIN%d1nUR8XSbjkoT$ddE;T#1?qAA}3m4#{K)dqL&2W{qAe|
zqh#mSszbzyrh`{!Z16iCR!-K1OTTq7l0UVBF>4}FmrxTz<ynj(dWKFar7UnXu-+#(
zxy(0Il#_am=xz1d2XZB!Why$OzEhLd&u@8bEhHNhGd<Q!WV!5FQ%RpV=aXEk`{{Pg
z4pFeH#Q82xt@G;><bla6g)!7@)8$?jz9y0tugJSd<Ga4ph+|tlMY(cLH!q`1^y({-
zw3E^kJ;p6<0wlH_efNv0x}#PeDwUeOoCghQdlnN3Y23M$=h!|kBG{H=*h9;DtS!th
zqDzZBn-a}3<k52n*2niIH!%`CT`w7#X(i4$OSXLekW)67(B~0zKD>BK>U@|{X!bmh
zlf*aMq(;e(H8e?9+s633$6feB!WZAlx<vDtx=_-h%6cf0?8bZ4_OKfhG}BO1GPmDE
zlW)gX!#~Fx)+tl>%=>l1N`yJu;^K8X=?*#G{&7Er3>2~Z3neZO#}rP^X@-I^B}>kH
zv(0AT>GQn-y77TYbt9raSGUu65}bJ_#maLsHLu7-Jf}ahUQEc-u93t`;VM(H2gl;u
zDQ&#q?3Ib_;7hq^7;R4{*0DkfT|w?&zlQOa4Y@ceJyyH5>heX1SMQ?{BWS1V)Zk<a
zkL!-l%-1uC(>G@m(r7!rZKQ!e`V{Bf`#Qf+ypIM=P+zQ02<YLWpsZ8Fccl;$H^i>g
z?*#SV=5T*=vWmPqhTHpqCaZw1)IXQVaC;xWo(9y(&<YM##r^-i4-~}K8;oH9v;T7m
zjco7ZxBY>2H2eu1PS5|bjy~WL;{Ac!`}iddd>imrj}lf*{~n7YLLoL60fs`}T!euh
zuH@fRFd}xS5SxoIWr58_*lqmp>Gx-I@u!Bc=HlP48Y&tY`T^+?mv-<`2q3!&KfWZF
zq@cf{ozq{d<v71q%hi>jAir0c;kT<v;Xtze&jCT;94gMQHS+_s1CbCL{J3yvds3>9
zrp&KhGi|*CfuKI-C<u6g_M`&gA&{u|8|}Ax@Eg;o;RMR|e{%>LU||hW)88#6_<dpu
z0PqzwfboG{C7fUD*YF1c^*iv3y9@xp7<4$(!Rd{-5rP~WIzxb8IAsK2-@pI}dw7O{
zg905ZKtJ$;L&4z%=K+AzK_+m&&Kw~4zkd&Mx)Z^@XGw|_8=%C`Fm^cS;kP5H;E=NY
zHIN_yh!5LeJc_mOkb6?MagZAUAmu+%mkA(c%TbUB{6!`(Vwj_WLtm0IhGeL4hxJ2F
zg!p-2T4>u4o(zaAW`%>z_Ai9MA|t~Oe8gCa`~^^_2hTQYzzgCbR6+IvSo}Ji5Ac{|
z08B4@I85NKAR^oWQ4uFvK)<XBancaeLalqSpLYNPooc}w5_+rP*Uf<bB@76H=wE=>
z6uD~#`qlnpa7rQ(50RXRmmcV?D2^jX`_o$iB|UJhQ2W6T?}6TGAA$`h;89)#wlfBA
zS136ASp&V*ZOE<wJbz%PbksHg4hrx+2j+$v=tn62`2*;!Jjek`0Q|sj|B4(5cvH)e
zAR)b}h_HyE8tAe1A+X2{0g&{cNCCY3aui2EBES5<Wj<gABI|&cUl8)2!;r}gy!?om
zh=c%MeiAA;%paHkCnaF%%kO)8a9W5NHSqF(hQL7qe$Ja<AkeH6o}|!0<>5&gE2MP*
z2LgyquxaoC+P}H^VkhA=goB02hIq(O*$&UBZXSY$5Rrgl{?4f2$9F)FM|29HG@{3Y
zmR!&eycH2+5zt3@(jeFSPaky{Hhe1w=%b8i0m2^TqhNElqXY-~C`z`&1poK%LC#g6
zk0M|PC;_<n$OwCQMs*1Z5`eX^&cIQu1^TD!9LS9Tkn*3X3(TnMkAg(tFEW9T&ZzV_
z|FkAjKmAWE20GO&2rRtMgL#%8c0le_5nllHSnv@M=u{)PkiP)vR1x`rhzWG6&fIXA
fzcMOFqXy=44&X2$r2`5IH{^Q(67!XKp#S<m2LJm-

literal 0
HcmV?d00001

diff --git a/osf/management/commands/import_EGAP.py b/osf/management/commands/import_EGAP.py
new file mode 100644
index 00000000000..d300c7789f3
--- /dev/null
+++ b/osf/management/commands/import_EGAP.py
@@ -0,0 +1,189 @@
+# -*- coding: utf-8 -*-
+import logging
+
+import os
+import json
+import shutil
+import requests
+import tempfile
+from django.core.management.base import BaseCommand
+from osf.utils.permissions import WRITE
+from osf.models import (
+    RegistrationSchema,
+    Node,
+    DraftRegistration,
+    OSFUser
+)
+from website.project.metadata.schemas import ensure_schema_structure, from_json
+from website.settings import WATERBUTLER_INTERNAL_URL
+from osf_tests.factories import ApiOAuth2PersonalTokenFactory
+from framework.auth.core import Auth
+from zipfile import ZipFile
+
+logger = logging.getLogger(__name__)
+HERE = os.path.dirname(os.path.abspath(__file__))
+
+
+class EGAPUploadException(Exception):
+    pass
+
+
+def ensure_egap_schema():
+    schema = ensure_schema_structure(from_json('egap-registration.json'))
+    schema_obj, created = RegistrationSchema.objects.update_or_create(
+        name=schema['name'],
+        schema_version=schema.get('version', 1),
+        defaults={
+            'schema': schema,
+        }
+    )
+    if created:
+        schema_obj.save()
+    return RegistrationSchema.objects.get(name='EGAP Registration')
+
+
+def get_creator_auth_header(creator_username):
+    creator = OSFUser.objects.get(username=creator_username)
+    token = ApiOAuth2PersonalTokenFactory(owner=creator)
+    token.save()
+    return creator, {'Authorization': 'Bearer {}'.format(token.token_id)}
+
+
+def create_node_from_project_json(egap_assets_path, epag_project_dir, creator):
+    with open(os.path.join(egap_assets_path, epag_project_dir, 'project.json'), 'r') as fp:
+        project_data = json.load(fp)
+        title = project_data['title']
+        node = Node(title=title, creator=creator)
+        node.save()  # must save before adding contribs for auth reasons
+
+        for contributor in project_data['contributors']:
+            node.add_contributor_registered_or_not(
+                Auth(creator),
+                full_name=contributor['name'],
+                email=contributor['email'],
+                permissions=WRITE,
+                send_email='false'
+            )
+
+        node.set_visible(creator, visible=False, log=False, save=True)
+
+    return node
+
+
+def recursive_upload(auth, node, dir_path, parent='', metadata=list()):
+    try:
+        for item in os.listdir(dir_path):
+            item_path = os.path.join(dir_path, item)
+            base_url = '{}/v1/resources/{}/providers/osfstorage/{}'.format(WATERBUTLER_INTERNAL_URL, node._id, parent)
+            if os.path.isfile(item_path):
+                with open(item_path, 'rb') as fp:
+                    url = base_url + '?name={}&kind=file'.format(item)
+                    resp = requests.put(url, data=fp.read(), headers=auth)
+            else:
+                url = base_url + '?name={}&kind=folder'.format(item)
+                resp = requests.put(url, headers=auth)
+                metadata = recursive_upload(auth, node, item_path, parent=resp.json()['data']['attributes']['path'], metadata=metadata)
+
+            if resp.status_code == 409:  # if we retry something already uploaded just skip.
+                continue
+
+            if resp.status_code != 201:
+                raise EGAPUploadException('Error waterbutler response is {}, with {}'.format(resp.status_code, resp.content))
+
+            metadata.append(resp.json())
+    except EGAPUploadException as e:
+        logger.info(str(e))
+        metadata = recursive_upload(auth, node, dir_path, parent=parent, metadata=metadata)
+
+    return metadata
+
+
+def get_egap_assets(guid, creator_auth):
+    node = Node.load(guid)
+    zip_file = node.files.first()
+    temp_path = tempfile.mkdtemp()
+
+    url = '{}/v1/resources/{}/providers/osfstorage/{}'.format(WATERBUTLER_INTERNAL_URL, guid, zip_file._id)
+    zip_file = requests.get(url, headers=creator_auth).content
+
+    egap_assets_path = os.path.join(temp_path, 'egap_assets.zip')
+
+    with open(egap_assets_path, 'w') as fp:
+        fp.write(zip_file)
+
+    with ZipFile(egap_assets_path, 'r') as zipObj:
+        zipObj.extractall(temp_path)
+
+    return temp_path
+
+
+def main(guid, creator_username):
+    egap_schema = ensure_egap_schema()
+    creator, creator_auth = get_creator_auth_header(creator_username)
+
+    egap_assets_path = get_egap_assets(guid, creator_auth)
+
+    # __MACOSX is a hidden file created by the os when zipping
+    directory_list = [directory for directory in os.listdir(egap_assets_path) if directory not in ('egap_assets.zip', '__MACOSX')]
+
+    for epag_project_dir in directory_list:
+        node = create_node_from_project_json(egap_assets_path, epag_project_dir, creator=creator)
+
+        non_anon_files = os.path.join(egap_assets_path, epag_project_dir, 'data', 'nonanonymous')
+        non_anon_metadata = recursive_upload(creator_auth, node, non_anon_files)
+
+        anon_files = os.path.join(egap_assets_path, epag_project_dir, 'data', 'anonymous')
+        if os.path.isdir(anon_files):
+            anon_metadata = recursive_upload(creator_auth, node, anon_files)
+        else:
+            anon_metadata = {}
+
+        with open(os.path.join(egap_assets_path, epag_project_dir, 'registration-schema.json'), 'r') as fp:
+            registration_metadata = json.load(fp)
+
+        # add selectedFileName Just so filenames are listed in the UI
+        for data in non_anon_metadata:
+            data['selectedFileName'] = data['data']['attributes']['name']
+
+        for data in anon_metadata:
+            data['selectedFileName'] = data['data']['attributes']['name']
+
+        non_anon_titles = ', '.join([data['data']['attributes']['name'] for data in non_anon_metadata])
+        registration_metadata['q37'] = {'comments': [], 'extra': non_anon_metadata, 'value': non_anon_titles}
+
+        anon_titles = ', '.join([data['data']['attributes']['name'] for data in anon_metadata])
+        registration_metadata['q38'] = {'comments': [], 'extra': anon_metadata, 'value': anon_titles}
+
+        DraftRegistration.create_from_node(
+            node,
+            user=creator,
+            schema=egap_schema,
+            data=registration_metadata,
+        )
+
+    shutil.rmtree(egap_assets_path)
+
+
+class Command(BaseCommand):
+    """Magically morphs csv data into lovable nodes with draft registrations attached
+    """
+
+    def add_arguments(self, parser):
+        super(Command, self).add_arguments(parser)
+        parser.add_argument(
+            '-c',
+            '--creator',
+            help='This should be the username of the initial adminstrator for the imported nodes',
+            required=True
+        )
+        parser.add_argument(
+            '-id',
+            '--guid',
+            help='This should be the guid of the private project with the directory structure',
+            required=True
+        )
+
+    def handle(self, *args, **options):
+        creator_username = options.get('creator', False)
+        guid = options.get('guid', False)
+        main(guid, creator_username)
diff --git a/osf_tests/management_commands/test_EGAP_import.py b/osf_tests/management_commands/test_EGAP_import.py
new file mode 100644
index 00000000000..c685722e277
--- /dev/null
+++ b/osf_tests/management_commands/test_EGAP_import.py
@@ -0,0 +1,210 @@
+# encoding: utf-8
+import os
+import shutil
+import pytest
+import responses
+HERE = os.path.dirname(os.path.abspath(__file__))
+
+from osf_tests.factories import (
+    AuthUserFactory,
+    NodeFactory,
+    ApiOAuth2PersonalTokenFactory
+)
+from osf.models import (
+    RegistrationSchema,
+    ApiOAuth2PersonalToken
+)
+from osf.management.commands.import_EGAP import (
+    get_egap_assets,
+    ensure_egap_schema,
+    create_node_from_project_json,
+    recursive_upload,
+    get_creator_auth_header
+)
+from api_tests.utils import create_test_file
+from website.settings import WATERBUTLER_INTERNAL_URL
+
+
+@pytest.mark.django_db
+class TestEGAPImport:
+
+    @pytest.fixture()
+    def greg(self):
+        return AuthUserFactory(username='greg@greg.com')
+
+    @pytest.fixture()
+    def node(self, greg):
+        return NodeFactory(creator=greg)
+
+    @pytest.fixture()
+    def node_with_file(self):
+        node = NodeFactory()
+        file = create_test_file(node, node.creator)
+        file.save()
+        node.save()
+        return node
+
+    @pytest.fixture()
+    def egap_assets_path(self):
+        return os.path.join(HERE, 'test_directory', 'EGAP')
+
+    @pytest.fixture()
+    def zip_data(self, egap_assets_path):
+        test_zip_path = os.path.join(egap_assets_path, 'test-egap.zip')
+        with open(test_zip_path, 'rb') as fp:
+            return fp.read()
+
+    @pytest.fixture()
+    def egap_project_name(self):
+        return '20120220AA'
+
+    def test_get_creator_auth_header(self, greg):
+        greg, auth_header = get_creator_auth_header(greg.username)
+
+        gregs_token = ApiOAuth2PersonalToken.objects.get(owner=greg).token_id
+        assert auth_header['Authorization'] == 'Bearer {}'.format(gregs_token)
+
+    def test_ensure_egap_schema(self):
+        ensure_egap_schema()
+
+        assert RegistrationSchema.objects.get(name='EGAP Registration')
+
+    def test_create_node_from_project_json(self, egap_assets_path, egap_project_name, greg):
+        node = create_node_from_project_json(egap_assets_path, egap_project_name, greg)
+
+        assert node.title == 'Home Security and Infidelity: a case study by Fletcher Cox'
+        assert node.creator == greg
+
+        assert len(node.contributors.all()) == 5
+        contrib = node.contributors.exclude(username='greg@greg.com').first()
+        assert contrib.fullname == 'Fletcher Cox'
+        assert node.get_permissions(contrib) == ['read', 'write']
+        assert not node.get_visible(greg)
+
+    @responses.activate
+    def test_recursive_upload(self, node, greg, egap_assets_path, egap_project_name):
+        responses.add(
+            responses.Response(
+                responses.PUT,
+                '{}/v1/resources/{}/providers/osfstorage/?name=test_folder&kind=folder'.format(
+                    WATERBUTLER_INTERNAL_URL,
+                    node._id,
+                ),
+                json={'data': {'attributes': {'path': 'parent'}}},
+                status=201,
+            )
+        )
+        responses.add(
+            responses.Response(
+                responses.PUT,
+                '{}/v1/resources/{}/providers/osfstorage/parent?name=test-2.txt&kind=file'.format(
+                    WATERBUTLER_INTERNAL_URL,
+                    node._id,
+                ),
+                json={'metadata': 'for test-2!'},
+                status=201,
+            )
+        )
+        responses.add(
+            responses.Response(
+                responses.PUT,
+                '{}/v1/resources/{}/providers/osfstorage/?name=test-1.txt&kind=file'.format(
+                    WATERBUTLER_INTERNAL_URL,
+                    node._id,
+                ),
+                json={'metadata': 'for test-1!'},
+                status=201,
+            )
+        )
+        token = ApiOAuth2PersonalTokenFactory(owner=greg)
+        token.save()
+        auth = {'Authorization': 'Bearer {}'.format(token.token_id)}
+
+        egap_project_path = os.path.join(egap_assets_path, egap_project_name, 'data', 'nonanonymous')
+
+        metadata = recursive_upload(auth, node, egap_project_path)
+
+        assert metadata[0] == {'metadata': 'for test-2!'}
+        assert metadata[1] == {'data': {'attributes': {'path': 'parent'}}}
+        assert metadata[2] == {'metadata': 'for test-1!'}
+
+    @responses.activate
+    def test_recursive_upload_retry(self, node, greg, egap_assets_path, egap_project_name):
+        responses.add(
+            responses.Response(
+                responses.PUT,
+                '{}/v1/resources/{}/providers/osfstorage/?name=test_folder&kind=folder'.format(
+                    WATERBUTLER_INTERNAL_URL,
+                    node._id,
+                ),
+                json={'data': {'attributes': {'path': 'parent'}}},
+                status=201,
+            )
+        )
+        responses.add(
+            responses.Response(
+                responses.PUT,
+                '{}/v1/resources/{}/providers/osfstorage/parent?name=test-2.txt&kind=file'.format(
+                    WATERBUTLER_INTERNAL_URL,
+                    node._id,
+                ),
+                status=500,
+            )
+        )
+        responses.add(
+            responses.Response(
+                responses.PUT,
+                '{}/v1/resources/{}/providers/osfstorage/parent?name=test-2.txt&kind=file'.format(
+                    WATERBUTLER_INTERNAL_URL,
+                    node._id,
+                ),
+                json={'metadata': 'for test-2!'},
+                status=201,
+            )
+        )
+        responses.add(
+            responses.Response(
+                responses.PUT,
+                '{}/v1/resources/{}/providers/osfstorage/?name=test-1.txt&kind=file'.format(
+                    WATERBUTLER_INTERNAL_URL,
+                    node._id,
+                ),
+                json={'metadata': 'for test-1!'},
+                status=201,
+            )
+        )
+        token = ApiOAuth2PersonalTokenFactory(owner=greg)
+        token.save()
+        auth = {'Authorization': 'Bearer {}'.format(token.token_id)}
+
+        egap_project_path = os.path.join(egap_assets_path, egap_project_name, 'data', 'nonanonymous')
+
+        metadata = recursive_upload(auth, node, egap_project_path)
+
+        assert metadata[0] == {'metadata': 'for test-2!'}
+        assert metadata[1] == {'data': {'attributes': {'path': 'parent'}}}
+        assert metadata[2] == {'metadata': 'for test-1!'}
+
+    @responses.activate
+    def test_get_egap_assets(self, node_with_file, zip_data):
+        file_node = node_with_file.files.first()
+
+        responses.add(
+            responses.Response(
+                responses.GET,
+                '{}/v1/resources/{}/providers/osfstorage/{}'.format(
+                    WATERBUTLER_INTERNAL_URL,
+                    node_with_file._id,
+                    file_node._id
+                ),
+                body=zip_data,
+                status=200,
+            )
+        )
+
+        asset_path = get_egap_assets(node_with_file._id, {'fake auth': 'sadasdadsdasdsds'})
+        directory_list = os.listdir(asset_path)
+        # __MACOSX is a hidden file created by the os when zipping
+        assert set(directory_list) == set(['20110307AA', '__MACOSX', '20110302AA', 'egap_assets.zip', '20120117AA'])
+
+        shutil.rmtree(asset_path)
diff --git a/osf_tests/management_commands/test_directory/EGAP/20120220AA/data/nonanonymous/test-1.txt b/osf_tests/management_commands/test_directory/EGAP/20120220AA/data/nonanonymous/test-1.txt
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/osf_tests/management_commands/test_directory/EGAP/20120220AA/data/nonanonymous/test_folder/test-2.txt b/osf_tests/management_commands/test_directory/EGAP/20120220AA/data/nonanonymous/test_folder/test-2.txt
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/osf_tests/management_commands/test_directory/EGAP/20120220AA/project.json b/osf_tests/management_commands/test_directory/EGAP/20120220AA/project.json
new file mode 100644
index 00000000000..99056c1ed47
--- /dev/null
+++ b/osf_tests/management_commands/test_directory/EGAP/20120220AA/project.json
@@ -0,0 +1 @@
+{"post-date": "03/01/2011 - 17:00", "id": "20110302AA", "contributors": [{"name": "Fletcher Cox", "email": "Cox@burds.com"}, {"name": "Brandon Graham", "email": "Graham@burds.com"}, {"name": "Nigel Bradham", "email": "Bradham@birds.com"}, {"name": "Vinny Curry", "email": "curry@burds.com"}], "title": "Home Security and Infidelity: a case study by Fletcher Cox"}
\ No newline at end of file
diff --git a/osf_tests/management_commands/test_directory/EGAP/20120220AA/registration-schema.json b/osf_tests/management_commands/test_directory/EGAP/20120220AA/registration-schema.json
new file mode 100644
index 00000000000..d9a435c7c3e
--- /dev/null
+++ b/osf_tests/management_commands/test_directory/EGAP/20120220AA/registration-schema.json
@@ -0,0 +1 @@
+{"q35": {"value": "Agree", "comments": [], "extra": []}, "q34": {"value": [""], "comments": [], "extra": []}, "q20": {"value": "", "comments": [], "extra": []}, "q22": {"value": "", "comments": [], "extra": []}, "q24": {"value": "", "comments": [], "extra": []}, "q25": {"value": "", "comments": [], "extra": []}, "q26": {"value": "", "comments": [], "extra": []}, "q28": {"value": "", "comments": [], "extra": []}, "q1": {"value": "The Causes of Compliance in International Relations: Evidence from a Field Experiment on Financial Transparency.", "comments": [], "extra": []}, "q3": {"value": "20110302AA", "comments": [], "extra": []}, "q2": {"value": "Mike Findley, Daniel Nielson, Jason Sharman, Shima Baradaran", "comments": [], "extra": []}, "q5": {"value": "", "comments": [], "extra": []}, "q4": {"value": "03/01/2011 - 17:00", "comments": [], "extra": []}, "q6": {"value": "Yes", "comments": [], "extra": []}, "q8": {"value": "", "comments": [], "extra": []}, "q15": {"value": "\"Observational studies of compliance with international law have produced mixed findings and been plagued by methodological challenges that might be addressed through random assignment to treatment and control groups. But potential field experiments manipulating sovereign governments would likely prove both impractical and unethical. In many IR realms, however, the actors who comply or not with international standards are ordinary firms and citizens, who can be studied ethically and practically using field experiments. The present study examines compliance with international standards that require full identity disclosure when incorporating a business. Without such disclosure, individuals are able to form anonymous shell corporations that can hide corruption, organized crime, and the financing of terrorism. Thus, this particular area of IR, while not focused directly on the behavior of national governments, nonetheless proves important globally\"", "comments": [], "extra": []}, "q14": {"value": "No", "comments": [], "extra": []}, "q17": {"value": "", "comments": [], "extra": []}, "q16": {"value": "\" The expectation of the first treatment is that service providers should be somewhat more likely to follow international standards when they receive a prompt about the existence of these standards (H1), than when they do not receive the prompt. The second hypothesis is that providers will be more likely to comply when they are informed that international standards are enforced by domestic agencies, which can apply meaningful penalties (H2), than when they are only prompted about international standards. This is broadly consistent with a realist view that compliance with international standards will only occur when backed up with state power. The third expectation is that service providers will be more likely to allow anonymous incorporation to a client from an explicitly non-corrupt country as opposed to a potential client from a clearly corrupt nation (H3). Finally, our fourth expectation is that providers in the United States will be far less likely to allow any client associated with global terror to incorporate without disclosing identification (H4). We expect the most significant treatment effects from H4, and suspect that the previous three treatments will garner much less substantial or statistically insignificant effects. In the international sample, the alias consultant makes an inquiry that is identical to the one in the U.S. control condition. In this case, however, the researcher purports to be from one of the basket of low-corruption countries discussed in treatment three above (Denmark, New Zealand, Finland, Sweden, Netherlands, Australia, Norway, or Austria), instead of from a Guineastan nation. Prior evidence suggests that U.S. incorporators are probably inclined to offer anonymous incorporation to the bulk of their clients; however, we presume that providers worldwide may exercise more discretion. So, we posit that sending the control emails from one of the worlds least-corrupt nations may raise fewer red flags, as it were, and increase our likelihood of seeing effects from the various treatments. The first international treatment is nearly identical to the first treatment in the U.S. sample and invokes the regulatory power of the FATF. In a similar manner, we hypothesize that service providers worldwide should be somewhat more likely to follow international standards when they receive a prompt about the existence of these standards (H5). The second treatment is identical to the control, but it lists the United States as its country of origin instead of the non-corrupt basket of countries. In this case, we are evaluating the difference that a request from the U.S. as the global hegemon has on services likelihood of allowing anonymous incorporation. We expect that most providers will be less likely to accept anonymous incorporation from hegemon-based clients and also predict that this treatment will garner some of our most significant results (H6). 23 The seventh and eighth treatments similarly reference the FATF, but they also offer rationales for raising the question of international law, mapping to different approaches core to the international relations literature. These treatments probe how incorporation services respond to rationalist or normative references to international standards. The treatment and matching hypothesis (H7) is designed to tap a constructivist logic of appropriateness. According to this view, actors engage in ethical reasoning to ensure their behavior conforms with generally shared conceptions of appropriate conduct. Thus H7 maintains that service providers will be more likely to comply, relative to the control condition but not necessarily compared to treatments one and two, if they are provided with cues about the appropriate course of action to preserve their self-esteem and reputation for propriety. The next treatment substitutes an explicitly rationalist logic of consequences whereby noncompliance runs the risk of costly punishment. Hypothesis eight thus explains compliance as a costbenefit calculation by service providers seeking to avoid sanctions (H8), and we expect that compliance should increase relative to the control, but are agnostic about whether compliance should be higher or lower than other conditions. \"", "comments": [], "extra": []}, "q11": {"value": "", "comments": [], "extra": []}, "q10": {"value": "No", "comments": [], "extra": []}, "q13": {"value": "No", "comments": [], "extra": []}, "q12": {"value": "", "comments": [], "extra": []}, "q19": {"value": "", "comments": [], "extra": []}, "q18": {"value": "", "comments": [], "extra": []}, "q33": {"value": [""], "comments": [], "extra": []}, "q32": {"value": "", "comments": [], "extra": []}, "q30": {"value": "", "comments": [], "extra": []}, "q36": {"value": "Agree", "comments": [], "extra": []}}
\ No newline at end of file
diff --git a/osf_tests/management_commands/test_directory/EGAP/test-egap.zip b/osf_tests/management_commands/test_directory/EGAP/test-egap.zip
new file mode 100644
index 0000000000000000000000000000000000000000..85b13477ac26a1490e5c535d41920d5ca95cb46b
GIT binary patch
literal 15691
zcmeHucRbbo`@g-5Y>Mo?CCbRi%FK>r@6EB2A~^_UXYUnBgb+$1dt~n<S;-8Me9uXz
z&iiz@@6UhTkKd2SL4UlT@9Vl=&)2n6S3<|YLxFx3YhP3Q@yE{}_$Wju0(|`Ze1d$!
zQc}FydITthMOB8lnN^0HsH7<BkOxUoa3J4^50U~O<hiJ6qzQ6zwSXe{K%RynEX9``
zzs*M~yIe@X#6v^#d>i$<l*|VHH`L2=#8~djdHD0;^qB!97nc>UsYY*&ywcb~G48J&
zMx~VuvA5@M++piZZ#w4gR$Dgv*e<~>i~8h%R|`J<9U@b2J)Nr5E&e-1_~s&~(P-vl
z9NI;zDLYm{>5M6*;qLc%=At%#{OxT`l|(q*sq;4VIOy9#Y~c1uohvuDzn@;I+ozEJ
zc2?)p{aw5+u(5vk8=Et#+c$8XzXrXxJR{s4sXAPZgTqfjsIG*K(`Ut~*Z{E^1|`Z5
zqa0Y1=qk{fxDcZl8L3FgsA=l`vL)1>Fk2#j3~I|?k0Ch?cm(99arzJh9-~7K#>|+^
z%*xo*ctu57XN6EFSltgTf)bsT=p<^%Nuq#lGRbxn%_+K<e&{0LI~Y<4WPFBmP}Q+<
z@Pl892tiEY1^N8u#|>2-0I?{5{BU-4vb8V+@z}aKIYKFN1Gm@^`hgn8;vT<U10PY~
zerv`#8}mE|x@Z{z5H2b9NCrs6Ac&hmN$z}kCH;PFlweHv{5#)e-!a)Ro)r!H=%L#Y
zi?KJZ#g?)e<2G7y=IHr#b39FMEg$>jan3HqqByC!Gw9+4o>%YrMDGn{T9vI32bFA{
z2^`BqEpPFfYg=1=XqyruleOxvMLLN2ZOJYSOVxTb^`_(r$~)04%d4u>4MiEv#_=xw
z5f`E}=3lJE#*5YDq-j+zk8spQhbP?1P}vY|XjNUc{HiQ8U~kzmG2B`IT|FRA6??OJ
zB<FH>ipq4BU6*?AoLJ~0FVu9`Ozm!cgqZCL>QSa+IP@@&vf|<AxoS#>wOTAdgn3Fv
z`gXc<#62oW`_SAwC+K++lhX<x=&46U7wGCG>czzCMkPFGRnK=aHa4+#NQ{cNj7Utj
zU5dLBlNe`Y;$ZpVleMGuyL`JTd)o{<Q~S@^i{>`A`HQADHt+1`Kh7<f&b1#<q&suk
zBN(D*KY${>2ozxiQpDB5%Ek@kY67xxa^!Y1v$k+B`9+tT5UM~wP`VVwiaWjHBLeTY
zwg}>gg0n4T_&`zx>9*SRU^~}imq|p26t(YeFALeXXo#AY@~*kf4sJyXindZEH6-|&
z4M&ZZ$T|iO;pi0<-qkW2e=|NYywOWpLq%alGgH>EL^--oC|h%~ol%uEkg?a<oc!MF
zsb(2IhU(WEg;yI~%TZ;#O~!k2yEzyQKHzCr49rv5=MUtrDw+&>z7BjB#TpuTKZfUd
z6RAo7Ug}7J1m*eR6ZTKIO+q=uyLrtqm|s=UnK@&0^+ovWdP=x08e|Bh?Sg0Xb#bn$
zdWlsn+~E&4@2m>Ce=Tvc<+>y~@wlc{Nc!0`a)C1~<m3y)vG>#FBX;k8Q0<u({d!`G
zuo)ZoK|uaW1`};V<TbL|Q#d+W^BW^y=F7WA@4t1}U>*B-TdmD=W=F+=>ah-(@cJ+(
zP1@<6nI9faak!>J2ExO7EO;&=o)OQXT>Q7k^*8epc}ye%^3vP{Wb(_I!@}Bc$A=gg
zsgOJkaKcas7|c?<=y3>_FIvgmRZy?Y-XN>=4}6Dl)lcmb9cE^_Qe%|}b%@wEQlbpJ
z+um;%$cD?Kn--6uoLpW%@eaj2xaovi3*|B55mPNY`^#D`FLPZ*7e-JQ$qB#sd|Df~
zt>wB~UVc>>2NErvObj}9#_PaD@9`gYVXI+Rhn<yBo7;HY)B_2MR1WP*b1|LP@;>Lg
zEzS^fPp_Da4U1&NXNl8D?3mMk2VaXR+}$VToxMf>n$T?C{4CY<d7?A<K1Ic*oyom9
zD~+;rAr}aODrdYQ&zJF_OM~jEpx&;GP26l4;sK71nNWKn3w{8z|8oRnItm6M48otl
z;q?3;!{7mz5W^r15;izGIhsH|-VRRgZoh~H3x>amg$TuZw1)7gurv+^dbpB*OKd>A
zMbw8V%VPxP1x!EKHTv)AM+UDSkJ0a>@4uwl0p<M=!urpW0;&>xN#XZ1eE%jmVyyX@
zGQu82Om^UN0PP4BwFW){@c!M{^#LJKKASvpN@5C0Iv1%^V=NBbYnPcncgJr|Dpoi2
zE$(i6TkLYlo%Sw@C?5)7n~|GvsK^<6Mg76iD9#k~wwC2N?wqlH_fGv~5p}^5LjIoO
zr_R>1_eLJ3-<rC{Na(F$z!Z&|$YQ0P7LfTx=c<nLda777qZcKbEX&n=2jPZRVhVi~
zrBz?svVefB(@x*!?&G_G8;|Mq4jFRR^-_`~*<qib;oh^s?DBXWH|2a$kUrXTWkhQH
z3wm`KWi<}3TNsu^4Sk^hHilFE7HUy{nmNJd`SE3tJi&d%#M7}!3+5>dvMyh-p)OT;
zt&MI2;!?eTCXoNGo`6{t#;rfSDdJHerl2qc9;2g9<?p|Sh+O@dg2L20DyM<+7slZw
zJwvBn1=2lEU2imY!>G9X<7=dB`LQ1UIb3h?R{2VtqEJW8ESuO-S0}f|5?qoiF_Y^a
z>FDEzyiVA4@2uKV-E-fV|4=^{Q+IEmz?w2m?rDw%ZMaQLLg|NAQL!6aw3A<-KI6FY
z73`Fz`gwV{_GI|P{@9#X>FG~_R7CUJ+h^r==flDAVj^Q7rlwe4Ok^g`*LB!w&*b(Q
ziR6Wr>Llb@ttG9EXl=MkJ#BQ&;aI9EI8}>tUt8)yZg*Abh|^rGkWwMhS`iOkx%u^1
zWV~g0f|OUpoo?q*JvyuUoZfawZN*EV@x^@BussfgII2$745xYaH(TYd@WI&JJkHSe
zGV0z%%T6-!!VP)N_>o807$V7osa`C-KD|L(*=*-C=d*fS%>C}Cp85WfUS#w2YT^TF
z1*VFM5xq4qm#EHJgA(RgHioBky<T2ct+Bc?XZizXI`ABA+vE+>+%q`z723&q>2kVL
zo~d<Odbac@&j(t{Gw;T>NcC&fNj+O~y293~DDK9>v-rwB-O4c1iqDYSru^YC2GOB@
z``X*bS4d}>X=vQ<m%0Z|*C{e|(S2j)ic=t|te?I?LU3~k3pCp}Q}~=-yiJy`H`eMo
z)=he)KD~H?0i~yw1`j!=$(^?FYyJ0824Y=7n1iXFbHozoGnv5YT%W9$r5y`U70wz*
zqL1Az&S2ug;mX15e!bwy*dVxVEikF4$7x6X`bvCL)HK&NWfl%g@kh-2GUPV{71atJ
zSGBscxL<9`0=f8;cIeOg3q(Z?Qj6o3F&oY%-4K*3c$bZn{LUnY3FPzIpsYBb_o7B;
zg0fMN`>ApEh!-8W`r8E6?tGo+TF{!y!sNsZu7Iz42);;1YYnUX%r@sUJIMfUqcgw9
zl5$PBp>L!K1y!-qSXMH3*_`28Io;zL=LseIiX8OKn1T$o@X9z0P%N2BD)o$&gZD7X
zf(k*xsaq5N`RmnBatS{Bh1O5a=CZZ-_ZYdm`cIPPW4t)w*W^xB)=JL*{eskYI}-Ee
z%*tCyDrbHBjCd8NDHXo)n{%9L{51PAdy;Y6nyS0-)D|kXmJZi_22WuDaAH{K4Sxax
zQc9Q0&HHxUIU~mx5=L{4!x*%;KlpZXmXptAgu4?v)N@$j=ki=H@-ei1%aY<n&3XYF
zOAti%@i?tCft9Tc>r&-Khq>Y#&*)!`k*ZP7l!t9_37%EkSpuuZ+|z$QP!}`R9EvHg
zEuVZI_maBKFvs4<c=GEV<7{^$lkKCjNjl%;8}+nJ@}kH4a|G+ID4?2FI^H$8j$v)#
z<tgK+tUFSy@HuNPDS1f0-svQZKk1sw$=NCb3Tm93*XyP-D*b24T1ka)qS?lYbX5oL
z39UDMy#!+5PLGT`Hu|P2_Yvc2Gq=pFVs@^88*B52QCfS>5oQx*3r(-^rM}bakrty`
z+%HChU6Q2APe-U*M=10#glU}{<@w<8CW?XS3deIBw;@AmVP)ZZare^h>Y+SuAK^;-
zkMiu~^Am}q&z39P->>rvjPMWG49btzb4HTr)Emrx3Yu`F)yyCnB|lF2X`FYHI`2#s
zN7<XwIqi~(`giA*_Q-TICuhuv<Fl(7OE30$uuK~>E=17;lsi}U(Z1=Zh+d|?)@?7v
zLN%+vkQx0kW4kjZbmG!dHGhEMxFun@t~D1P)005w?x9;=zJ5-N{oKogyeS-=?&f4W
zRV3)SFE5#W|D-CEZt+$YlyPZ{WjN)n+t;!Ts-CoPb*i2B;iXE)6GhTeP4SjFX;Wfi
zmY&T%!MfgS@GU#HAuf0@LcO_~-TYcxQPEj<O;DlqRTf?othh&4^T{VlJh}Q>rqp`5
zywp_ra|H|+(+k(9Z}A(pU$71gD3u6)GC*MdTpcH>`=;~;fu5kawtHnuTH3~%Tz~kO
zQdRQ$176Ia%|%-eVQuG|`=Y8GV6HMw=K0#qy^w~OCCyAhNiLsvbA(?mulO)K+#V3S
z<;<PkKuWk0gB94{dYi|v&343?g-O@~q$w{PaNhj-N@(Ex2OZCc6N|AX%VGBjyFNc_
zP`U2J<#A2?n72cj(a@6z!K^_P9s=javW35cYbf?Me^l7AOZ%h@(CV5DzQRTfL5LML
zEEe$?{o82tH;3JW1O+j@_*r4YR0WiJur?1gPelIo&zcJGG<@~*tET!B3bA2}SbPF7
z|9!fISegM~c#PnMgMq?i`<oB%px2Cec!;GLOj%%Q_TSMD@k0Jtn*G!ec66{T@NYi4
zgPZf{@M(_#+_VS%k7-XM!h>+r9_WLBein4vBk&X9|EXyYvat=kS^q4v5Y38gZ1cmu
ze~xWn7CxMG|6{-E;Bq52nc&7YP(cu@Js(IB1pWLN+eD}+q4Pug@DXm$J1VeE)C}w}
zF~x@_E}D<Dd&sYsFkP`>-<?#c2-YIRo$~$YE#pd_g6~qm7nLvB+^8J)w2Dh9UfNqz
zk)4H3&mvB<Dq?*`z{<xeYu4~|3SmfHYM`v3huA7>y|_Wnd<<&~-6DZ9S_gCSu7W%P
z=EG|QB-^pe-2ygVss_%RmvpTsmzaw^^S-~>4aTj{m33Ot-(!2lTtt|f1U5@rLfa>`
z*FTH%Z0>T7yWt6e(Y^GYh;`HDFv^LydLQ!N+;vcT@=8J-W$<Ndf~w$%II4lylgOIJ
z=FUto#oK9qtta12WxexPJ5h4G7>kU3KQ%w4KS?jn>1(|(#pKw#mm!lDrb!QWf>!9)
z3-$7eA=UY-pA|ZCGb7g5(E01Z!5?Mu!_WS2Y2zW%32wv#Rqv>b27V&gab?tXRE~Hr
zmR<acb__#E9n%Es%=_49wkh0g5BnUWg6O+3axvOp3ZlOSaqQRS`G7GbFI{`9wriY#
z(zDf@=kxMHmqe#ga;?s-)FMzXiA2h{PU@U}ZYSPgg}K?wj>+#Yg?#T`Yil&B+!47Y
z5xKkc>C^pvZjhUhkjI^rq@-Ti)HI&8_n^oJd7(FaN}IoEy_Zp0z#hlOYvt}Ie7r=R
z<sj9n_hmqCO-FwEkz=2a?FTcjqMP~&QQH|go<(mL!ZUKUF8D4qpR6cKZWPBC$GUyG
zrXr)4mN==+%#ZKO%EyJB9ow7kZazNB(z&JkX9!=X#vF67T#$E|%D*0Tp?)}FC#dPx
zLft@Je`QUvN@4%j$RhFnrxOFJ#jCUtk3UXcHN4euq5$80Z{bUS|B3UP#%oS^@;JVj
z>YJInd!J@y>0XSyr+-~{2KQ_uN>X`<&=Wx{)vUpS0Zxq+PYWaUhx+Tdw;tq&mT0Sw
z#Mw%jYY)y9yb%r!w`CuA3<@t)Mx~K0N&?$s`jiZupe{HstuSA@L5g<Abji@DA1{ZK
zDwaOJ6^}YgvnTqVYkd=*RhDOX0bNg^(ih*;+OrIGmNh(>9c40Z>V84<*TFA}w46ED
zFvbhstDO}u^$@>9{jO-(IUUDyN`%Z|*8K!C&e?GTj3(U&*1S43mz^d#y#~una&4cn
z%q7!P-qE=@n^EP#UVn0?rj$n_Zkj0L>Mn81#+}&+y(M9s#%Js=0z*CuUKt|t9dTXL
zz&&B-ic7xTlhRQ689nGmM5Iz<O%MHgvWCvn`oU@;a?Bk8YAm&!h*XJ9EFRw7+{^78
zJ+H$#RM@B~uHPtmj7ngJKPx(`iCL!q#E5M-XNQdZR4c_!CT&OuX#oD2{zaCVi@emP
zOk640B9ERX4SbObOf>e&quGf2_MV`BCFsSP@0-c?h(SDs?XWPi<luztP^EHZbL`3P
zW5ENWPSR~x7SJhrF)-0CYM?T7eYU>2vgGp^>~D0nKZo;rlS>GQG_ue^cI$Sx7*<Zu
zYo1WXxemIH^h_*V#cq58(b>rP{DLO#%9t0Y``3NGZFJHwZY+Qty{o#o(b(klPB6<U
zW}6QLiP#!NFE%Kf<4uv$%B6C$b2mTn;4TVeK82U&qj72SjN={?EvC(rDpWRnfjQ%j
zh0){LCB|X0E<JbHp7Nffw9cLy9{+Og!O0=2m{`@;=lxVUE_!yhnUcm?8vQ4Ui2CU3
z9D-%6?a;#*p455?a1!KF&Nd0W3M9;!R^pQSW*#|59;S%8tyL}UobE2gqJMW+($hYd
z*?3aNTC4v=nJmk`m8Wpf34F}q*8#yK<Qm^oox$r7udaFJDK*nGO1(|4QpJ~ge@~xE
ze-=wT%7V%BeK_ac307f>YA-(n-v{Qapo`j)brNKlG>dJxr_!3nNg8`@m-}1Gkv4^g
zV{7$L9_Q!29M>U)C3ehP!O;A@RJwRBS?S%&qwcD5G)jc%cA40^*|kjkEqTf58tdyy
z!>y87=>plNAs2$bm+%FxD4S*&J(`l9aLWb9KQI=|fKW-;;2B>q&8B2roH?s>D_0A#
zjHL!U6-Po#!sT9dJyW_FkyOTeM$gRYJIjxEzjNWeKg@5`?HZCNeW`PN{o-&2;T4@X
zV-Xm4m(F>e(!p9iWs_6OT&$nUPwY5H5ysadaohM?{F9V~rhA4h$D1pyG<`(+CHZH(
z2}Q8$Z%*R4Kil#5x-J6_`oQuG)XY8=ds{at@p-sMV0F!lF>VoMZm^2QdIUEpy;e5h
zP35L+IMMS|PXEdbugEPQoz!8kj0SYsI_XGWa8JBcYuL-!MGJbfAP;wi5KKZpw6r~i
zivI5=xf2;l`vC)Tmo39OHl}W>;6%I#RO#B1lz-;?5=ZK;^W@iZOM+d{;GA3P79D|M
zsZ-U7?-OS7-B<I*&>t)qrDC%?+~>_GesZ}f%)4a_MQ)-tzNH%vpNsISF5O7(rdxi&
zzJ{k##m+~1tIqAQ<5NyDp9gIM7YxqR-Iz^s?O2pA^nS`<6d%D3Ci%qP=(Ze14!Sh9
z7R}XfdMgyow%q2G6n`U*{sT7<zeOmWN3CU+5*jxequx+SY8EjH`=cq#YkowctzoPQ
zns;!Nmy)#@h)M*bS<HJmmo%R*QFE_4CT{kUC$!&8s$5D(Jw9;fvS2oC+vCS$0T37S
zg{*kvxG!`23zfE{Q9`F|!!v9-LN!lJRGw8CKR>sZka`N|`TISiM@+n(tf5UzDcIia
z5ecZzBMaMQgvA6<FwjkelZ+`(I$3<GJa)xatQl)<sH9PXo4&p@VZ}7w!&gUx`t#Wk
zb&ba}eSS?eD9nKaExlsKwgvgJf&*eAOBJdngc$;lKd7&I@v6y(lk$mGBrd7dXVUS!
zCBdM-X@$)jK_PLO={iyT`}X(o)RzPa*aa8jTC_~96;Zo=u>&IJb3zCL9{AOFh%uUA
zGMC3QNGJ?Whdhv)<ogIN%d1nUR8XSbjkoT$ddE;T#1?qAA}3m4#{K)dqL&2W{qAe|
zqh#mSszbzyrh`{!Z16iCR!-K1OTTq7l0UVBF>4}FmrxTz<ynj(dWKFar7UnXu-+#(
zxy(0Il#_am=xz1d2XZB!Why$OzEhLd&u@8bEhHNhGd<Q!WV!5FQ%RpV=aXEk`{{Pg
z4pFeH#Q82xt@G;><bla6g)!7@)8$?jz9y0tugJSd<Ga4ph+|tlMY(cLH!q`1^y({-
zw3E^kJ;p6<0wlH_efNv0x}#PeDwUeOoCghQdlnN3Y23M$=h!|kBG{H=*h9;DtS!th
zqDzZBn-a}3<k52n*2niIH!%`CT`w7#X(i4$OSXLekW)67(B~0zKD>BK>U@|{X!bmh
zlf*aMq(;e(H8e?9+s633$6feB!WZAlx<vDtx=_-h%6cf0?8bZ4_OKfhG}BO1GPmDE
zlW)gX!#~Fx)+tl>%=>l1N`yJu;^K8X=?*#G{&7Er3>2~Z3neZO#}rP^X@-I^B}>kH
zv(0AT>GQn-y77TYbt9raSGUu65}bJ_#maLsHLu7-Jf}ahUQEc-u93t`;VM(H2gl;u
zDQ&#q?3Ib_;7hq^7;R4{*0DkfT|w?&zlQOa4Y@ceJyyH5>heX1SMQ?{BWS1V)Zk<a
zkL!-l%-1uC(>G@m(r7!rZKQ!e`V{Bf`#Qf+ypIM=P+zQ02<YLWpsZ8Fccl;$H^i>g
z?*#SV=5T*=vWmPqhTHpqCaZw1)IXQVaC;xWo(9y(&<YM##r^-i4-~}K8;oH9v;T7m
zjco7ZxBY>2H2eu1PS5|bjy~WL;{Ac!`}iddd>imrj}lf*{~n7YLLoL60fs`}T!euh
zuH@fRFd}xS5SxoIWr58_*lqmp>Gx-I@u!Bc=HlP48Y&tY`T^+?mv-<`2q3!&KfWZF
zq@cf{ozq{d<v71q%hi>jAir0c;kT<v;Xtze&jCT;94gMQHS+_s1CbCL{J3yvds3>9
zrp&KhGi|*CfuKI-C<u6g_M`&gA&{u|8|}Ax@Eg;o;RMR|e{%>LU||hW)88#6_<dpu
z0PqzwfboG{C7fUD*YF1c^*iv3y9@xp7<4$(!Rd{-5rP~WIzxb8IAsK2-@pI}dw7O{
zg905ZKtJ$;L&4z%=K+AzK_+m&&Kw~4zkd&Mx)Z^@XGw|_8=%C`Fm^cS;kP5H;E=NY
zHIN_yh!5LeJc_mOkb6?MagZAUAmu+%mkA(c%TbUB{6!`(Vwj_WLtm0IhGeL4hxJ2F
zg!p-2T4>u4o(zaAW`%>z_Ai9MA|t~Oe8gCa`~^^_2hTQYzzgCbR6+IvSo}Ji5Ac{|
z08B4@I85NKAR^oWQ4uFvK)<XBancaeLalqSpLYNPooc}w5_+rP*Uf<bB@76H=wE=>
z6uD~#`qlnpa7rQ(50RXRmmcV?D2^jX`_o$iB|UJhQ2W6T?}6TGAA$`h;89)#wlfBA
zS136ASp&V*ZOE<wJbz%PbksHg4hrx+2j+$v=tn62`2*;!Jjek`0Q|sj|B4(5cvH)e
zAR)b}h_HyE8tAe1A+X2{0g&{cNCCY3aui2EBES5<Wj<gABI|&cUl8)2!;r}gy!?om
zh=c%MeiAA;%paHkCnaF%%kO)8a9W5NHSqF(hQL7qe$Ja<AkeH6o}|!0<>5&gE2MP*
z2LgyquxaoC+P}H^VkhA=goB02hIq(O*$&UBZXSY$5Rrgl{?4f2$9F)FM|29HG@{3Y
zmR!&eycH2+5zt3@(jeFSPaky{Hhe1w=%b8i0m2^TqhNElqXY-~C`z`&1poK%LC#g6
zk0M|PC;_<n$OwCQMs*1Z5`eX^&cIQu1^TD!9LS9Tkn*3X3(TnMkAg(tFEW9T&ZzV_
z|FkAjKmAWE20GO&2rRtMgL#%8c0le_5nllHSnv@M=u{)PkiP)vR1x`rhzWG6&fIXA
fzcMOFqXy=44&X2$r2`5IH{^Q(67!XKp#S<m2LJm-

literal 0
HcmV?d00001


From 3e6522b608e6a42e22535a273f46d6aea8c05c20 Mon Sep 17 00:00:00 2001
From: "Brian J. Geiger" <bgeiger@cos.io>
Date: Tue, 5 Nov 2019 15:49:25 -0500
Subject: [PATCH 7/9] Refactor audit, add requirements.txt, and update Jupyter
 notebook (#9192)

## Purpose

Integrate EGAP file processing with Jupyter notebook.

## Changes

1. Add notebook integration of EGAP file processing
2. Add requirements.txt for Jupyter users
3. Refactor audit and file checking for more consistency between uses
4. Update variables in file script to avoid shadowing outer scope

## QA Notes

  - Does this change require a data migration? If so, what data will we migrate? _No data migration_
  - What is the level of risk? _Low_
    - Any permissions code touched? _No_
    - Is this an additive or subtractive change, other? _Additive mostly, but some changes to a recently added, not yet QA'd PR_
  - How can QA verify? (Through UI, API, AdminApp or AdminAdminApp?) _Using the Jupyter notebook_
    - If verifying through API, what's the new version? Please include the endpoints in PR notes or Dev docs.
  - What features or workflows might this change impact? _EGAP migration_
  - How will this impact performance? _It shouldn't_

### How to use:

- Create a virtual env for this and activate it
- `pip3 install -r requirements.txt` from within the `scripts/EGAP/` directory
- Run `jupyter lab` from the OSF base directory.
- Open the ipynb file from within jupyter lab.
- The first code block does setup. Set your variables in that block to point to the various inputs and outputs indicated there.
    - `author_source` is the csv of authors from EGAP
    - `registry_source` is the csv of projects from EGAP
    - `raw_files_directory` is the path to the raw files from EGAP
    - `metadata_directory` is the path of the directory that will be created to hold all of the project data
    - `directories_to_process` is the list of project ids that should be processed in the file-moving step
- Run the first code block to import and set up everything
- Run the `create_file_tree_and_json` code block to create the metadata directory
- Run the `audit_files` code block to generate the ignoring and including text files inside the `scripts/EGAP` directory
- Run the `convert_files` block to move the files for the projects specified in `directories_to_process` into the metadata directory from the raw data directory.


## Documentation

No external documentation

## Side Effects

Shouldn't be.

## Ticket

https://openscience.atlassian.net/browse/ENG-1194
---
 scripts/EGAP/egap_workflow.ipynb          | 1499 ++++++++++++++++++++-
 scripts/EGAP/files_to_import_structure.py |   71 +-
 scripts/EGAP/requirements.txt             |   68 +
 3 files changed, 1610 insertions(+), 28 deletions(-)
 create mode 100644 scripts/EGAP/requirements.txt

diff --git a/scripts/EGAP/egap_workflow.ipynb b/scripts/EGAP/egap_workflow.ipynb
index 750bb02b152..96f5af42703 100644
--- a/scripts/EGAP/egap_workflow.ipynb
+++ b/scripts/EGAP/egap_workflow.ipynb
@@ -1,5 +1,12 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "From the `scripts/EGAP` folder, with your virtualenv active, `pip install -r requirements.txt`"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -7,10 +14,1481 @@
    "outputs": [],
    "source": [
     "from create_EGAP_json import create_file_tree_and_json\n",
+    "from files_to_import_structure import audit_files, main as convert_files\n",
     "\n",
     "author_source = '/Users/bgeiger/Desktop/EGAP/20190821_author_emails.csv'\n",
     "registry_source = '/Users/bgeiger/Desktop/EGAP/20191014_OSF_database.csv'\n",
-    "target_directory= '/Users/bgeiger/Desktop/EGAP/output/'"
+    "metadata_directory = '/Users/bgeiger/Desktop/EGAP/metadata/'\n",
+    "raw_files_directory = '/Users/bgeiger/Desktop/EGAP/raw_files/'\n",
+    "directories_to_process = [\n",
+    "    '20110302AA',\n",
+    "    '20110307AA',\n",
+    "    '20120117AA',\n",
+    "    '20120220AA',\n",
+    "    '20120727AA',\n",
+    "    '20120925AA',\n",
+    "    '20120926AA',\n",
+    "    '20121001AA',\n",
+    "    '20121002AA',\n",
+    "    '20121012AA',\n",
+    "    '20121026AA',\n",
+    "    '20121031AA',\n",
+    "    '20121101AA',\n",
+    "    '20121104AA',\n",
+    "    '20121106AA',\n",
+    "    '20121107AA',\n",
+    "    '20121123AA',\n",
+    "    '20121212AA',\n",
+    "    '20130122AA',\n",
+    "    '20130403AA',\n",
+    "    '20130406AA',\n",
+    "    '20130410AA',\n",
+    "    '20130426AA',\n",
+    "    '20130518AA',\n",
+    "    '20130607AA',\n",
+    "    '20130616AA',\n",
+    "    '20130704AA',\n",
+    "    '20130729AA',\n",
+    "    '20130731AA',\n",
+    "    '20130803AA',\n",
+    "    '20130813AA',\n",
+    "    '20130819AA',\n",
+    "    '20130913AA',\n",
+    "    '20130921AA',\n",
+    "    '20131012AA',\n",
+    "    '20131024AA',\n",
+    "    '20131101AA',\n",
+    "    '20131105AA',\n",
+    "    '20131110AA',\n",
+    "    '20131117AA',\n",
+    "    '20131118AA',\n",
+    "    '20131130AA',\n",
+    "    '20131203AA',\n",
+    "    '20131206AA',\n",
+    "    '20131210AA',\n",
+    "    '20131211AA',\n",
+    "    '20131216AA',\n",
+    "    '20131220AA',\n",
+    "    '20140110AA',\n",
+    "    '20140112AA',\n",
+    "    '20140113AA',\n",
+    "    '20140120AA',\n",
+    "    '20140124AA',\n",
+    "    '20140126AA',\n",
+    "    '20140131AA',\n",
+    "    '20140203AA',\n",
+    "    '20140203AB',\n",
+    "    '20140222AA',\n",
+    "    '20140222AB',\n",
+    "    '20140228AA',\n",
+    "    '20140303AA',\n",
+    "    '20140308AA',\n",
+    "    '20140316AA',\n",
+    "    '20140320AA',\n",
+    "    '20140417AA',\n",
+    "    '20140502AA',\n",
+    "    '20140503AA',\n",
+    "    '20140506AA',\n",
+    "    '20140509AA',\n",
+    "    '20140509AB',\n",
+    "    '20140512AA',\n",
+    "    '20140521AA',\n",
+    "    '20140523AA',\n",
+    "    '20140529AA',\n",
+    "    '20140610AA',\n",
+    "    '20140611AA',\n",
+    "    '20140611AB',\n",
+    "    '20140613AB',\n",
+    "    '20140627AA',\n",
+    "    '20140627AB',\n",
+    "    '20140627AC',\n",
+    "    '20140701AA',\n",
+    "    '20140701AB',\n",
+    "    '20140707AA',\n",
+    "    '20140708AA',\n",
+    "    '20140715AA',\n",
+    "    '20140722AA',\n",
+    "    '20140723AA',\n",
+    "    '20140723AB',\n",
+    "    '20140806AA',\n",
+    "    '20140812AA',\n",
+    "    '20140820AA',\n",
+    "    '20140912AA',\n",
+    "    '20140915AA',\n",
+    "    '20140918AA',\n",
+    "    '20140922AA',\n",
+    "    '20141002AA',\n",
+    "    '20141006AA',\n",
+    "    '20141023AA',\n",
+    "    '20141025AA',\n",
+    "    '20141027AA',\n",
+    "    '20141031AA',\n",
+    "    '20141031AB',\n",
+    "    '20141101AA',\n",
+    "    '20141103AA',\n",
+    "    '20141107AA',\n",
+    "    '20141117AA',\n",
+    "    '20141202AA',\n",
+    "    '20141208AA',\n",
+    "    '20141213AA',\n",
+    "    '20141223AA',\n",
+    "    '20141225AA',\n",
+    "    '20141227AA',\n",
+    "    '20141231AA',\n",
+    "    '20150110AA',\n",
+    "    '20150111AA',\n",
+    "    '20150118AA',\n",
+    "    '20150122AA',\n",
+    "    '20150122AB',\n",
+    "    '20150127AA',\n",
+    "    '20150131AA',\n",
+    "    '20150202AA',\n",
+    "    '20150204AA',\n",
+    "    '20150206AA',\n",
+    "    '20150211AA',\n",
+    "    '20150216AA',\n",
+    "    '20150304AA',\n",
+    "    '20150308AA',\n",
+    "    '20150309AA',\n",
+    "    '20150310AA',\n",
+    "    '20150311AA',\n",
+    "    '20150313AA',\n",
+    "    '20150320AA',\n",
+    "    '20150323AA',\n",
+    "    '20150324AA',\n",
+    "    '20150326AA',\n",
+    "    '20150330AA',\n",
+    "    '20150420AA',\n",
+    "    '20150423AA',\n",
+    "    '20150428AA',\n",
+    "    '20150429AA',\n",
+    "    '20150508AA',\n",
+    "    '20150513AA',\n",
+    "    '20150513AB',\n",
+    "    '20150513AC',\n",
+    "    '20150513AD',\n",
+    "    '20150513AE',\n",
+    "    '20150513AF',\n",
+    "    '20150513AG',\n",
+    "    '20150513AH',\n",
+    "    '20150513AI',\n",
+    "    '20150514AA',\n",
+    "    '20150517AA',\n",
+    "    '20150518AA',\n",
+    "    '20150520AA',\n",
+    "    '20150522AA',\n",
+    "    '20150526AA',\n",
+    "    '20150527AA',\n",
+    "    '20150602AA',\n",
+    "    '20150602AB',\n",
+    "    '20150603AA',\n",
+    "    '20150604AA',\n",
+    "    '20150605AA',\n",
+    "    '20150605AB',\n",
+    "    '20150616AA',\n",
+    "    '20150617AA',\n",
+    "    '20150619AA',\n",
+    "    '20150622AA',\n",
+    "    '20150623AA',\n",
+    "    '20150701AA',\n",
+    "    '20150702AA',\n",
+    "    '20150703AA',\n",
+    "    '20150707AA',\n",
+    "    '20150708AA',\n",
+    "    '20150709AA',\n",
+    "    '20150709AB',\n",
+    "    '20150710AA',\n",
+    "    '20150713AA',\n",
+    "    '20150716AA',\n",
+    "    '20150716AB',\n",
+    "    '20150717AA',\n",
+    "    '20150718AA',\n",
+    "    '20150720AA',\n",
+    "    '20150723AA',\n",
+    "    '20150724AA',\n",
+    "    '20150727AA',\n",
+    "    '20150731AA',\n",
+    "    '20150731AB',\n",
+    "    '20150803AA',\n",
+    "    '20150803AB',\n",
+    "    '20150812AA',\n",
+    "    '20150813AA',\n",
+    "    '20150813AB',\n",
+    "    '20150819AA',\n",
+    "    '20150819AB',\n",
+    "    '20150820AA',\n",
+    "    '20150824AA',\n",
+    "    '20150824AB',\n",
+    "    '20150825AA',\n",
+    "    '20150827AA',\n",
+    "    '20150903AA',\n",
+    "    '20150903AB',\n",
+    "    '20150914AA',\n",
+    "    '20150915AA',\n",
+    "    '20150917AA',\n",
+    "    '20150921AA',\n",
+    "    '20150922AA',\n",
+    "    '20150924AA',\n",
+    "    '20150925AA',\n",
+    "    '20150927AA',\n",
+    "    '20150928AA',\n",
+    "    '20150928AB',\n",
+    "    '20150929AA',\n",
+    "    '20150929AB',\n",
+    "    '20150930AA',\n",
+    "    '20150930AB',\n",
+    "    '20151003AA',\n",
+    "    '20151006AA',\n",
+    "    '20151006AB',\n",
+    "    '20151012AA',\n",
+    "    '20151013AA',\n",
+    "    '20151013AB',\n",
+    "    '20151014AA',\n",
+    "    '20151014AB',\n",
+    "    '20151016AA',\n",
+    "    '20151016AB',\n",
+    "    '20151016AC',\n",
+    "    '20151017AA',\n",
+    "    '20151019AA',\n",
+    "    '20151023AA',\n",
+    "    '20151027AA',\n",
+    "    '20151030AA',\n",
+    "    '20151102AA',\n",
+    "    '20151102AB',\n",
+    "    '20151102AC',\n",
+    "    '20151103AA',\n",
+    "    '20151107AA',\n",
+    "    '20151112AA',\n",
+    "    '20151112AB',\n",
+    "    '20151114AA',\n",
+    "    '20151116AA',\n",
+    "    '20151116AB',\n",
+    "    '20151118AA',\n",
+    "    '20151119AA',\n",
+    "    '20151119AB',\n",
+    "    '20151120AA',\n",
+    "    '20151120AB',\n",
+    "    '20151123AA',\n",
+    "    '20151125AA',\n",
+    "    '20151125AB',\n",
+    "    '20151128AA',\n",
+    "    '20151201AA',\n",
+    "    '20151201AB',\n",
+    "    '20151202AA',\n",
+    "    '20151204AA',\n",
+    "    '20151206AA',\n",
+    "    '20151207AA',\n",
+    "    '20151209AA',\n",
+    "    '20151218AA',\n",
+    "    '20160105AA',\n",
+    "    '20160106AA',\n",
+    "    '20160112AA',\n",
+    "    '20160112AB',\n",
+    "    '20160113AA',\n",
+    "    '20160113AB',\n",
+    "    '20160119AA',\n",
+    "    '20160121AA',\n",
+    "    '20160202AA',\n",
+    "    '20160208AA',\n",
+    "    '20160208AB',\n",
+    "    '20160208AC',\n",
+    "    '20160216AA',\n",
+    "    '20160217AA',\n",
+    "    '20160219AA',\n",
+    "    '20160222AA',\n",
+    "    '20160224AA',\n",
+    "    '20160224AB',\n",
+    "    '20160225AA',\n",
+    "    '20160308AA',\n",
+    "    '20160308AB',\n",
+    "    '20160309AA',\n",
+    "    '20160313AA',\n",
+    "    '20160315AA',\n",
+    "    '20160318AA',\n",
+    "    '20160321AA',\n",
+    "    '20160323AA',\n",
+    "    '20160324AA',\n",
+    "    '20160327AA',\n",
+    "    '20160330AA',\n",
+    "    '20160401AA',\n",
+    "    '20160404AA',\n",
+    "    '20160404AB',\n",
+    "    '20160405AA',\n",
+    "    '20160405AB',\n",
+    "    '20160406AA',\n",
+    "    '20160408AA',\n",
+    "    '20160409AA',\n",
+    "    '20160409AB',\n",
+    "    '20160410AA',\n",
+    "    '20160411AA',\n",
+    "    '20160411AB',\n",
+    "    '20160411AC',\n",
+    "    '20160411AD',\n",
+    "    '20160413AA',\n",
+    "    '20160414AA',\n",
+    "    '20160415AA',\n",
+    "    '20160416AA',\n",
+    "    '20160416AB',\n",
+    "    '20160421AA',\n",
+    "    '20160426AA',\n",
+    "    '20160427AA',\n",
+    "    '20160427AB',\n",
+    "    '20160429AA',\n",
+    "    '20160429AB',\n",
+    "    '20160507AA',\n",
+    "    '20160514AA',\n",
+    "    '20160515AA',\n",
+    "    '20160516AA',\n",
+    "    '20160517AA',\n",
+    "    '20160517AB',\n",
+    "    '20160517AC',\n",
+    "    '20160517AD',\n",
+    "    '20160517AE',\n",
+    "    '20160519AA',\n",
+    "    '20160520AA',\n",
+    "    '20160524AA',\n",
+    "    '20160531AA',\n",
+    "    '20160601AA',\n",
+    "    '20160601AB',\n",
+    "    '20160601AC',\n",
+    "    '20160601AD',\n",
+    "    '20160602AA',\n",
+    "    '20160603AA',\n",
+    "    '20160605AA',\n",
+    "    '20160607AA',\n",
+    "    '20160607AB',\n",
+    "    '20160609AA',\n",
+    "    '20160609AB',\n",
+    "    '20160609AC',\n",
+    "    '20160611AA',\n",
+    "    '20160612AA',\n",
+    "    '20160613AA',\n",
+    "    '20160613AB',\n",
+    "    '20160615AA',\n",
+    "    '20160615AB',\n",
+    "    '20160616AA',\n",
+    "    '20160617AA',\n",
+    "    '20160617AB',\n",
+    "    '20160618AA',\n",
+    "    '20160621AA',\n",
+    "    '20160621AB',\n",
+    "    '20160621AC',\n",
+    "    '20160621AD',\n",
+    "    '20160622AA',\n",
+    "    '20160622AB',\n",
+    "    '20160624AA',\n",
+    "    '20160624AB',\n",
+    "    '20160625AA',\n",
+    "    '20160628AA',\n",
+    "    '20160629AA',\n",
+    "    '20160630AA',\n",
+    "    '20160702AA',\n",
+    "    '20160708AA',\n",
+    "    '20160708AB',\n",
+    "    '20160712AA',\n",
+    "    '20160717AA',\n",
+    "    '20160719AA',\n",
+    "    '20160719AB',\n",
+    "    '20160721AA',\n",
+    "    '20160726AA',\n",
+    "    '20160726AB',\n",
+    "    '20160727AA',\n",
+    "    '20160729AA',\n",
+    "    '20160730AA',\n",
+    "    '20160801AA',\n",
+    "    '20160801AB',\n",
+    "    '20160803AA',\n",
+    "    '20160803AB',\n",
+    "    '20160809AA',\n",
+    "    '20160809AB',\n",
+    "    '20160812AA',\n",
+    "    '20160812AB',\n",
+    "    '20160813AA',\n",
+    "    '20160813AB',\n",
+    "    '20160813AC',\n",
+    "    '20160819AA',\n",
+    "    '20160819AB',\n",
+    "    '20160820AA',\n",
+    "    '20160822AA',\n",
+    "    '20160824AA',\n",
+    "    '20160825AA',\n",
+    "    '20160829AA',\n",
+    "    '20160831AA',\n",
+    "    '20160905AA',\n",
+    "    '20160907AA',\n",
+    "    '20160911AA',\n",
+    "    '20160912AB',\n",
+    "    '20160913AA',\n",
+    "    '20160913AB',\n",
+    "    '20160916AA',\n",
+    "    '20160916AB',\n",
+    "    '20160918AA',\n",
+    "    '20160919AA',\n",
+    "    '20160921AA',\n",
+    "    '20160921AB',\n",
+    "    '20160926AA',\n",
+    "    '20160926AB',\n",
+    "    '20160926AC',\n",
+    "    '20161001AA',\n",
+    "    '20161004AA',\n",
+    "    '20161006AA',\n",
+    "    '20161011AA',\n",
+    "    '20161016AA',\n",
+    "    '20161017AA',\n",
+    "    '20161019AA',\n",
+    "    '20161019AB',\n",
+    "    '20161020AA',\n",
+    "    '20161026AA',\n",
+    "    '20161028AA',\n",
+    "    '20161030AA',\n",
+    "    '20161101AA',\n",
+    "    '20161101AB',\n",
+    "    '20161103AA',\n",
+    "    '20161103AB',\n",
+    "    '20161104AA',\n",
+    "    '20161104AB',\n",
+    "    '20161109AA',\n",
+    "    '20161109AB',\n",
+    "    '20161109AC',\n",
+    "    '20161110AA',\n",
+    "    '20161110AB',\n",
+    "    '20161110AC',\n",
+    "    '20161110AD',\n",
+    "    '20161110AE',\n",
+    "    '20161112AA',\n",
+    "    '20161118AA',\n",
+    "    '20161121AA',\n",
+    "    '20161122AA',\n",
+    "    '20161122AB',\n",
+    "    '20161123AA',\n",
+    "    '20161125AA',\n",
+    "    '20161127AA',\n",
+    "    '20161128AA',\n",
+    "    '20161129AA',\n",
+    "    '20161204AA',\n",
+    "    '20161204AB',\n",
+    "    '20161205AA',\n",
+    "    '20161206AA',\n",
+    "    '20161207AA',\n",
+    "    '20161208AA',\n",
+    "    '20161212AA',\n",
+    "    '20161216AA',\n",
+    "    '20161216AB',\n",
+    "    '20161227AA',\n",
+    "    '20161227AB',\n",
+    "    '20170103AA',\n",
+    "    '20170109AA',\n",
+    "    '20170112AA',\n",
+    "    '20170115AA',\n",
+    "    '20170117AA',\n",
+    "    '20170118AA',\n",
+    "    '20170123AA',\n",
+    "    '20170124AA',\n",
+    "    '20170130AA',\n",
+    "    '20170131AA',\n",
+    "    '20170203AA',\n",
+    "    '20170203AB',\n",
+    "    '20170203AC',\n",
+    "    '20170203AD',\n",
+    "    '20170205AA',\n",
+    "    '20170207AA',\n",
+    "    '20170208AA',\n",
+    "    '20170209AA',\n",
+    "    '20170210AA',\n",
+    "    '20170212AA',\n",
+    "    '20170214AA',\n",
+    "    '20170214AB',\n",
+    "    '20170215AA',\n",
+    "    '20170216AA',\n",
+    "    '20170216AB',\n",
+    "    '20170220AA',\n",
+    "    '20170222AA',\n",
+    "    '20170223AA',\n",
+    "    '20170223AB',\n",
+    "    '20170223AC',\n",
+    "    '20170224AA',\n",
+    "    '20170225AA',\n",
+    "    '20170227AA',\n",
+    "    '20170227AB',\n",
+    "    '20170227AC',\n",
+    "    '20170301AA',\n",
+    "    '20170302AA',\n",
+    "    '20170307AA',\n",
+    "    '20170308AA',\n",
+    "    '20170308AB',\n",
+    "    '20170309AA',\n",
+    "    '20170310AA',\n",
+    "    '20170312AA',\n",
+    "    '20170317AA',\n",
+    "    '20170320AA',\n",
+    "    '20170320AB',\n",
+    "    '20170320AC',\n",
+    "    '20170321AA',\n",
+    "    '20170321AB',\n",
+    "    '20170322AA',\n",
+    "    '20170322AB',\n",
+    "    '20170323AA',\n",
+    "    '20170324AA',\n",
+    "    '20170325AA',\n",
+    "    '20170328AA',\n",
+    "    '20170329AA',\n",
+    "    '20170330AA',\n",
+    "    '20170403AA',\n",
+    "    '20170412AA',\n",
+    "    '20170412AB',\n",
+    "    '20170413AA',\n",
+    "    '20170413AB',\n",
+    "    '20170413AC',\n",
+    "    '20170414AA',\n",
+    "    '20170416AA',\n",
+    "    '20170417AA',\n",
+    "    '20170417AB',\n",
+    "    '20170420AA',\n",
+    "    '20170421AA',\n",
+    "    '20170422AA',\n",
+    "    '20170423AA',\n",
+    "    '20170423AB',\n",
+    "    '20170426AA',\n",
+    "    '20170427AA',\n",
+    "    '20170428AA',\n",
+    "    '20170501AA',\n",
+    "    '20170501AB',\n",
+    "    '20170501AC',\n",
+    "    '20170501AD',\n",
+    "    '20170503AA',\n",
+    "    '20170503AB',\n",
+    "    '20170503AC',\n",
+    "    '20170504AA',\n",
+    "    '20170504AB',\n",
+    "    '20170505AA',\n",
+    "    '20170505AB',\n",
+    "    '20170505AC',\n",
+    "    '20170506AA',\n",
+    "    '20170507AA',\n",
+    "    '20170508AA',\n",
+    "    '20170508AB',\n",
+    "    '20170509AA',\n",
+    "    '20170509AB',\n",
+    "    '20170510AA',\n",
+    "    '20170511AA',\n",
+    "    '20170515AA',\n",
+    "    '20170515AB',\n",
+    "    '20170516AA',\n",
+    "    '20170517AA',\n",
+    "    '20170519AA',\n",
+    "    '20170520AA',\n",
+    "    '20170522AA',\n",
+    "    '20170523AA',\n",
+    "    '20170524AA',\n",
+    "    '20170525AA',\n",
+    "    '20170525AB',\n",
+    "    '20170527AA',\n",
+    "    '20170531AA',\n",
+    "    '20170602AA',\n",
+    "    '20170603AA',\n",
+    "    '20170606AA',\n",
+    "    '20170608AA',\n",
+    "    '20170609AA',\n",
+    "    '20170609AB',\n",
+    "    '20170609AC',\n",
+    "    '20170611AA',\n",
+    "    '20170611AB',\n",
+    "    '20170612AA',\n",
+    "    '20170613AA',\n",
+    "    '20170614AA',\n",
+    "    '20170615AA',\n",
+    "    '20170615AB',\n",
+    "    '20170616AA',\n",
+    "    '20170617AA',\n",
+    "    '20170618AA',\n",
+    "    '20170619AA',\n",
+    "    '20170626AA',\n",
+    "    '20170626AB',\n",
+    "    '20170629AA',\n",
+    "    '20170705AA',\n",
+    "    '20170706AA',\n",
+    "    '20170706AB',\n",
+    "    '20170706AC',\n",
+    "    '20170711AA',\n",
+    "    '20170712AA',\n",
+    "    '20170714AA',\n",
+    "    '20170716AA',\n",
+    "    '20170716AB',\n",
+    "    '20170717AA',\n",
+    "    '20170720AA',\n",
+    "    '20170720AB',\n",
+    "    '20170720AC',\n",
+    "    '20170721AA',\n",
+    "    '20170721AB',\n",
+    "    '20170724AA',\n",
+    "    '20170725AA',\n",
+    "    '20170725AB',\n",
+    "    '20170727AA',\n",
+    "    '20170728AA',\n",
+    "    '20170728AB',\n",
+    "    '20170729AA',\n",
+    "    '20170731AA',\n",
+    "    '20170803AA',\n",
+    "    '20170804AA',\n",
+    "    '20170805AA',\n",
+    "    '20170807AA',\n",
+    "    '20170808AA',\n",
+    "    '20170809AA',\n",
+    "    '20170810AA',\n",
+    "    '20170811AA',\n",
+    "    '20170811AB',\n",
+    "    '20170814AA',\n",
+    "    '20170815AA',\n",
+    "    '20170816AA',\n",
+    "    '20170819AA',\n",
+    "    '20170821AA',\n",
+    "    '20170821AB',\n",
+    "    '20170822AA',\n",
+    "    '20170823AA',\n",
+    "    '20170828AA',\n",
+    "    '20170829AA',\n",
+    "    '20170831AA',\n",
+    "    '20170901AA',\n",
+    "    '20170905AA',\n",
+    "    '20170906AA',\n",
+    "    '20170907AA',\n",
+    "    '20170908AA',\n",
+    "    '20170908AB',\n",
+    "    '20170908AC',\n",
+    "    '20170910AA',\n",
+    "    '20170911AA',\n",
+    "    '20170913AA',\n",
+    "    '20170913AB',\n",
+    "    '20170913AC',\n",
+    "    '20170914AA',\n",
+    "    '20170914AB',\n",
+    "    '20170915AA',\n",
+    "    '20170915AB',\n",
+    "    '20170918AA',\n",
+    "    '20170919AA',\n",
+    "    '20170920AA',\n",
+    "    '20170920AB',\n",
+    "    '20170920AC',\n",
+    "    '20170921AA',\n",
+    "    '20170922AA',\n",
+    "    '20170922AB',\n",
+    "    '20170922AC',\n",
+    "    '20170925AA',\n",
+    "    '20170926AA',\n",
+    "    '20170926AB',\n",
+    "    '20170927AA',\n",
+    "    '20170927AB',\n",
+    "    '20170928AA',\n",
+    "    '20170929AA',\n",
+    "    '20170930AA',\n",
+    "    '20171001AA',\n",
+    "    '20171001AB',\n",
+    "    '20171002AA',\n",
+    "    '20171003AA',\n",
+    "    '20171003AB',\n",
+    "    '20171004AA',\n",
+    "    '20171009AA',\n",
+    "    '20171010AA',\n",
+    "    '20171010AB',\n",
+    "    '20171012AA',\n",
+    "    '20171013AA',\n",
+    "    '20171015AA',\n",
+    "    '20171016AA',\n",
+    "    '20171017AA',\n",
+    "    '20171018AA',\n",
+    "    '20171019AA',\n",
+    "    '20171020AA',\n",
+    "    '20171022AA',\n",
+    "    '20171023AA',\n",
+    "    '20171024AA',\n",
+    "    '20171024AB',\n",
+    "    '20171024AC',\n",
+    "    '20171025AA',\n",
+    "    '20171027AA',\n",
+    "    '20171101AA',\n",
+    "    '20171103AA',\n",
+    "    '20171104AA',\n",
+    "    '20171104AB',\n",
+    "    '20171105AA',\n",
+    "    '20171106AA',\n",
+    "    '20171106AB',\n",
+    "    '20171106AC',\n",
+    "    '20171107AA',\n",
+    "    '20171109AA',\n",
+    "    '20171109AB',\n",
+    "    '20171113AA',\n",
+    "    '20171113AB',\n",
+    "    '20171113AC',\n",
+    "    '20171114AA',\n",
+    "    '20171115AA',\n",
+    "    '20171117AA',\n",
+    "    '20171117AB',\n",
+    "    '20171117AC',\n",
+    "    '20171119AA',\n",
+    "    '20171120AA',\n",
+    "    '20171120AB',\n",
+    "    '20171121AA',\n",
+    "    '20171121AB',\n",
+    "    '20171122AA',\n",
+    "    '20171122AB',\n",
+    "    '20171122AC',\n",
+    "    '20171124AA',\n",
+    "    '20171127AA',\n",
+    "    '20171127AB',\n",
+    "    '20171128AA',\n",
+    "    '20171129AA',\n",
+    "    '20171205AA',\n",
+    "    '20171205AB',\n",
+    "    '20171206AA',\n",
+    "    '20171208AA',\n",
+    "    '20171210AA',\n",
+    "    '20171210AB',\n",
+    "    '20171211AA',\n",
+    "    '20171211AB',\n",
+    "    '20171211AC',\n",
+    "    '20171213AA',\n",
+    "    '20171218AA',\n",
+    "    '20171218AB',\n",
+    "    '20171218AC',\n",
+    "    '20171221AA',\n",
+    "    '20171222AA',\n",
+    "    '20171223AA',\n",
+    "    '20171228AA',\n",
+    "    '20171229AA',\n",
+    "    '20171230AA',\n",
+    "    '20180105AA',\n",
+    "    '20180105AB',\n",
+    "    '20180105AC',\n",
+    "    '20180105AD',\n",
+    "    '20180108AA',\n",
+    "    '20180108AB',\n",
+    "    '20180109AA',\n",
+    "    '20180109AB',\n",
+    "    '20180110AA',\n",
+    "    '20180110AB',\n",
+    "    '20180113AA',\n",
+    "    '20180119AA',\n",
+    "    '20180120AA',\n",
+    "    '20180121AA',\n",
+    "    '20180123AA',\n",
+    "    '20180124AA',\n",
+    "    '20180125AA',\n",
+    "    '20180126AA',\n",
+    "    '20180126AB',\n",
+    "    '20180127AA',\n",
+    "    '20180128AA',\n",
+    "    '20180130AA',\n",
+    "    '20180201AA',\n",
+    "    '20180201AB',\n",
+    "    '20180201AC',\n",
+    "    '20180202AA',\n",
+    "    '20180202AB',\n",
+    "    '20180202AC',\n",
+    "    '20180204AA',\n",
+    "    '20180204AB',\n",
+    "    '20180205AA',\n",
+    "    '20180205AB',\n",
+    "    '20180205AC',\n",
+    "    '20180206AA',\n",
+    "    '20180208AA',\n",
+    "    '20180208AB',\n",
+    "    '20180209AA',\n",
+    "    '20180211AA',\n",
+    "    '20180213AA',\n",
+    "    '20180213AB',\n",
+    "    '20180214AA',\n",
+    "    '20180215AA',\n",
+    "    '20180215AB',\n",
+    "    '20180215AC',\n",
+    "    '20180219AA',\n",
+    "    '20180219AB',\n",
+    "    '20180220AA',\n",
+    "    '20180221AA',\n",
+    "    '20180221AB',\n",
+    "    '20180222AA',\n",
+    "    '20180222AB',\n",
+    "    '20180227AA',\n",
+    "    '20180228AA',\n",
+    "    '20180228AB',\n",
+    "    '20180302AA',\n",
+    "    '20180303AA',\n",
+    "    '20180304AA',\n",
+    "    '20180304AB',\n",
+    "    '20180304AC',\n",
+    "    '20180304AD',\n",
+    "    '20180305AA',\n",
+    "    '20180306AA',\n",
+    "    '20180308AA',\n",
+    "    '20180310AA',\n",
+    "    '20180313AA',\n",
+    "    '20180315AA',\n",
+    "    '20180315AB',\n",
+    "    '20180315AC',\n",
+    "    '20180316AA',\n",
+    "    '20180316AB',\n",
+    "    '20180318AA',\n",
+    "    '20180319AA',\n",
+    "    '20180319AB',\n",
+    "    '20180319AC',\n",
+    "    '20180320AA',\n",
+    "    '20180321AA',\n",
+    "    '20180323AA',\n",
+    "    '20180323AB',\n",
+    "    '20180324AA',\n",
+    "    '20180325AA',\n",
+    "    '20180327AA',\n",
+    "    '20180328AA',\n",
+    "    '20180329AA',\n",
+    "    '20180331AA',\n",
+    "    '20180401AA',\n",
+    "    '20180402AA',\n",
+    "    '20180402AB',\n",
+    "    '20180403AA',\n",
+    "    '20180404AA',\n",
+    "    '20180409AA',\n",
+    "    '20180409AB',\n",
+    "    '20180409AC',\n",
+    "    '20180413AA',\n",
+    "    '20180413AB',\n",
+    "    '20180416AA',\n",
+    "    '20180417AA',\n",
+    "    '20180418AA',\n",
+    "    '20180418AB',\n",
+    "    '20180423AA',\n",
+    "    '20180424AA',\n",
+    "    '20180425AA',\n",
+    "    '20180425AB',\n",
+    "    '20180425AC',\n",
+    "    '20180425AD',\n",
+    "    '20180426AA',\n",
+    "    '20180426AB',\n",
+    "    '20180426AC',\n",
+    "    '20180427AA',\n",
+    "    '20180430AA',\n",
+    "    '20180430AB',\n",
+    "    '20180430AC',\n",
+    "    '20180502AA',\n",
+    "    '20180503AA',\n",
+    "    '20180503AB',\n",
+    "    '20180504AA',\n",
+    "    '20180507AA',\n",
+    "    '20180508AA',\n",
+    "    '20180509AA',\n",
+    "    '20180509AB',\n",
+    "    '20180514AA',\n",
+    "    '20180515AA',\n",
+    "    '20180515AB',\n",
+    "    '20180516AA',\n",
+    "    '20180516AB',\n",
+    "    '20180518AA',\n",
+    "    '20180521AA',\n",
+    "    '20180521AB',\n",
+    "    '20180523AA',\n",
+    "    '20180528AA',\n",
+    "    '20180529AA',\n",
+    "    '20180529AB',\n",
+    "    '20180529AC',\n",
+    "    '20180530AA',\n",
+    "    '20180601AA',\n",
+    "    '20180602AA',\n",
+    "    '20180605AA',\n",
+    "    '20180605AB',\n",
+    "    '20180605AC',\n",
+    "    '20180605AD',\n",
+    "    '20180607AA',\n",
+    "    '20180608AA',\n",
+    "    '20180608AB',\n",
+    "    '20180610AA',\n",
+    "    '20180610AB',\n",
+    "    '20180611AA',\n",
+    "    '20180611AB',\n",
+    "    '20180612AA',\n",
+    "    '20180613AA',\n",
+    "    '20180614AA',\n",
+    "    '20180614AB',\n",
+    "    '20180615AA',\n",
+    "    '20180616AA',\n",
+    "    '20180619AA',\n",
+    "    '20180619AB',\n",
+    "    '20180620AA',\n",
+    "    '20180625AA',\n",
+    "    '20180626AA',\n",
+    "    '20180628AA',\n",
+    "    '20180628AB',\n",
+    "    '20180701AA',\n",
+    "    '20180703AA',\n",
+    "    '20180703AB',\n",
+    "    '20180703AC',\n",
+    "    '20180707AA',\n",
+    "    '20180709AA',\n",
+    "    '20180709AB',\n",
+    "    '20180710AA',\n",
+    "    '20180710AB',\n",
+    "    '20180710AC',\n",
+    "    '20180711AA',\n",
+    "    '20180711AB',\n",
+    "    '20180712AA',\n",
+    "    '20180713AA',\n",
+    "    '20180716AA',\n",
+    "    '20180719AA',\n",
+    "    '20180720AA',\n",
+    "    '20180722AA',\n",
+    "    '20180723AA',\n",
+    "    '20180723AB',\n",
+    "    '20180724AA',\n",
+    "    '20180724AB',\n",
+    "    '20180725AA',\n",
+    "    '20180725AB',\n",
+    "    '20180725AC',\n",
+    "    '20180730AA',\n",
+    "    '20180731AA',\n",
+    "    '20180801AA',\n",
+    "    '20180801AB',\n",
+    "    '20180802AA',\n",
+    "    '20180802AB',\n",
+    "    '20180803AA',\n",
+    "    '20180804AA',\n",
+    "    '20180807AA',\n",
+    "    '20180807AB',\n",
+    "    '20180808AA',\n",
+    "    '20180809AA',\n",
+    "    '20180809AB',\n",
+    "    '20180809AC',\n",
+    "    '20180810AA',\n",
+    "    '20180811AA',\n",
+    "    '20180812AA',\n",
+    "    '20180814AA',\n",
+    "    '20180814AB',\n",
+    "    '20180815AA',\n",
+    "    '20180816AA',\n",
+    "    '20180816AB',\n",
+    "    '20180817AA',\n",
+    "    '20180819AA',\n",
+    "    '20180819AB',\n",
+    "    '20180821AA',\n",
+    "    '20180821AB',\n",
+    "    '20180822AA',\n",
+    "    '20180826AA',\n",
+    "    '20180827AA',\n",
+    "    '20180827AB',\n",
+    "    '20180829AA',\n",
+    "    '20180831AA',\n",
+    "    '20180903AA',\n",
+    "    '20180904AA',\n",
+    "    '20180904AB',\n",
+    "    '20180905AA',\n",
+    "    '20180906AA',\n",
+    "    '20180910AA',\n",
+    "    '20180910AB',\n",
+    "    '20180912AA',\n",
+    "    '20180914AA',\n",
+    "    '20180918AA',\n",
+    "    '20180918AB',\n",
+    "    '20180919AA',\n",
+    "    '20180920AA',\n",
+    "    '20180920AB',\n",
+    "    '20180925AA',\n",
+    "    '20180925AB',\n",
+    "    '20180927AA',\n",
+    "    '20180927AB',\n",
+    "    '20181001AA',\n",
+    "    '20181003AA',\n",
+    "    '20181005AA',\n",
+    "    '20181006AA',\n",
+    "    '20181010AA',\n",
+    "    '20181010AB',\n",
+    "    '20181012AA',\n",
+    "    '20181013AA',\n",
+    "    '20181015AA',\n",
+    "    '20181016AA',\n",
+    "    '20181017AA',\n",
+    "    '20181017AB',\n",
+    "    '20181018AA',\n",
+    "    '20181019AA',\n",
+    "    '20181022AA',\n",
+    "    '20181023AA',\n",
+    "    '20181023AB',\n",
+    "    '20181024AA',\n",
+    "    '20181024AB',\n",
+    "    '20181024AC',\n",
+    "    '20181025AA',\n",
+    "    '20181026AA',\n",
+    "    '20181029AA',\n",
+    "    '20181030AA',\n",
+    "    '20181030AB',\n",
+    "    '20181031AA',\n",
+    "    '20181101AA',\n",
+    "    '20181101AB',\n",
+    "    '20181101AC',\n",
+    "    '20181101AD',\n",
+    "    '20181102AA',\n",
+    "    '20181102AB',\n",
+    "    '20181102AC',\n",
+    "    '20181102AD',\n",
+    "    '20181105AA',\n",
+    "    '20181105AB',\n",
+    "    '20181105AC',\n",
+    "    '20181105AD',\n",
+    "    '20181106AA',\n",
+    "    '20181106AB',\n",
+    "    '20181106AC',\n",
+    "    '20181106AD',\n",
+    "    '20181106AE',\n",
+    "    '20181106AF',\n",
+    "    '20181107AA',\n",
+    "    '20181108AA',\n",
+    "    '20181108AB',\n",
+    "    '20181108AC',\n",
+    "    '20181110AA',\n",
+    "    '20181111AA',\n",
+    "    '20181112AA',\n",
+    "    '20181112AB',\n",
+    "    '20181112AC',\n",
+    "    '20181112AD',\n",
+    "    '20181113AA',\n",
+    "    '20181114AA',\n",
+    "    '20181115AA',\n",
+    "    '20181115AB',\n",
+    "    '20181115AC',\n",
+    "    '20181115AD',\n",
+    "    '20181120AA',\n",
+    "    '20181120AB',\n",
+    "    '20181120AC',\n",
+    "    '20181123AA',\n",
+    "    '20181125AA',\n",
+    "    '20181126AA',\n",
+    "    '20181126AB',\n",
+    "    '20181127AA',\n",
+    "    '20181127AB',\n",
+    "    '20181127AC',\n",
+    "    '20181128AA',\n",
+    "    '20181129AA',\n",
+    "    '20181129AB',\n",
+    "    '20181130AA',\n",
+    "    '20181130AB',\n",
+    "    '20181201AA',\n",
+    "    '20181201AB',\n",
+    "    '20181204AA',\n",
+    "    '20181204AB',\n",
+    "    '20181204AC',\n",
+    "    '20181205AA',\n",
+    "    '20181205AB',\n",
+    "    '20181206AA',\n",
+    "    '20181206AB',\n",
+    "    '20181206AC',\n",
+    "    '20181206AD',\n",
+    "    '20181206AE',\n",
+    "    '20181206AF',\n",
+    "    '20181206AG',\n",
+    "    '20181207AA',\n",
+    "    '20181208AA',\n",
+    "    '20181210AA',\n",
+    "    '20181210AB',\n",
+    "    '20181211AA',\n",
+    "    '20181211AB',\n",
+    "    '20181211AC',\n",
+    "    '20181212AA',\n",
+    "    '20181214AA',\n",
+    "    '20181216AA',\n",
+    "    '20181216AB',\n",
+    "    '20181219AA',\n",
+    "    '20181219AB',\n",
+    "    '20181221AA',\n",
+    "    '20181221AB',\n",
+    "    '20181221AC',\n",
+    "    '20181222AA',\n",
+    "    '20181228AA',\n",
+    "    '20181230AA',\n",
+    "    '20190103AA',\n",
+    "    '20190108AA',\n",
+    "    '20190109AA',\n",
+    "    '20190110AA',\n",
+    "    '20190110AB',\n",
+    "    '20190110AC',\n",
+    "    '20190111AA',\n",
+    "    '20190113AA',\n",
+    "    '20190113AB',\n",
+    "    '20190114AA',\n",
+    "    '20190115AA',\n",
+    "    '20190116AA',\n",
+    "    '20190116AB',\n",
+    "    '20190116AC',\n",
+    "    '20190119AA',\n",
+    "    '20190121AA',\n",
+    "    '20190122AA',\n",
+    "    '20190125AA',\n",
+    "    '20190128AA',\n",
+    "    '20190128AB',\n",
+    "    '20190128AC',\n",
+    "    '20190129AA',\n",
+    "    '20190129AB',\n",
+    "    '20190130AA',\n",
+    "    '20190131AA',\n",
+    "    '20190131AB',\n",
+    "    '20190131AC',\n",
+    "    '20190201AA',\n",
+    "    '20190201AB',\n",
+    "    '20190201AC',\n",
+    "    '20190202AA',\n",
+    "    '20190204AA',\n",
+    "    '20190205AA',\n",
+    "    '20190205AB',\n",
+    "    '20190205AC',\n",
+    "    '20190206AA',\n",
+    "    '20190206AB',\n",
+    "    '20190206AC',\n",
+    "    '20190208AA',\n",
+    "    '20190212AA',\n",
+    "    '20190212AB',\n",
+    "    '20190213AA',\n",
+    "    '20190213AB',\n",
+    "    '20190213AC',\n",
+    "    '20190213AD',\n",
+    "    '20190213AE',\n",
+    "    '20190215AA',\n",
+    "    '20190215AB',\n",
+    "    '20190215AC',\n",
+    "    '20190215AD',\n",
+    "    '20190215AE',\n",
+    "    '20190215AF',\n",
+    "    '20190219AA',\n",
+    "    '20190220AA',\n",
+    "    '20190220AB',\n",
+    "    '20190220AC',\n",
+    "    '20190221AA',\n",
+    "    '20190223AA',\n",
+    "    '20190223AB',\n",
+    "    '20190223AC',\n",
+    "    '20190225AA',\n",
+    "    '20190227AA',\n",
+    "    '20190227AB',\n",
+    "    '20190301AA',\n",
+    "    '20190301AB',\n",
+    "    '20190301AC',\n",
+    "    '20190301AD',\n",
+    "    '20190304AA',\n",
+    "    '20190304AB',\n",
+    "    '20190304AC',\n",
+    "    '20190304AD',\n",
+    "    '20190305AA',\n",
+    "    '20190306AA',\n",
+    "    '20190306AB',\n",
+    "    '20190307AA',\n",
+    "    '20190309AA',\n",
+    "    '20190310AA',\n",
+    "    '20190311AA',\n",
+    "    '20190311AB',\n",
+    "    '20190311AC',\n",
+    "    '20190312AA',\n",
+    "    '20190313AA',\n",
+    "    '20190313AB',\n",
+    "    '20190313AC',\n",
+    "    '20190313AD',\n",
+    "    '20190313AE',\n",
+    "    '20190314AA',\n",
+    "    '20190314AB',\n",
+    "    '20190314AC',\n",
+    "    '20190314AD',\n",
+    "    '20190314AE',\n",
+    "    '20190314AF',\n",
+    "    '20190314AG',\n",
+    "    '20190314AH',\n",
+    "    '20190314AI',\n",
+    "    '20190314AJ',\n",
+    "    '20190314AK',\n",
+    "    '20190314AL',\n",
+    "    '20190315AA',\n",
+    "    '20190315AB',\n",
+    "    '20190315AC',\n",
+    "    '20190315AD',\n",
+    "    '20190320AA',\n",
+    "    '20190320AB',\n",
+    "    '20190320AC',\n",
+    "    '20190320AD',\n",
+    "    '20190325AA',\n",
+    "    '20190326AA',\n",
+    "    '20190326AB',\n",
+    "    '20190327AA',\n",
+    "    '20190327AB',\n",
+    "    '20190327AC',\n",
+    "    '20190328AA',\n",
+    "    '20190329AA',\n",
+    "    '20190329AB',\n",
+    "    '20190401AA',\n",
+    "    '20190401AB',\n",
+    "    '20190402AA',\n",
+    "    '20190404AA',\n",
+    "    '20190405AA',\n",
+    "    '20190406AA',\n",
+    "    '20190410AA',\n",
+    "    '20190410AB',\n",
+    "    '20190410AC',\n",
+    "    '20190411AA',\n",
+    "    '20190411AB',\n",
+    "    '20190411AC',\n",
+    "    '20190412AA',\n",
+    "    '20190416AA',\n",
+    "    '20190417AA',\n",
+    "    '20190417AB',\n",
+    "    '20190418AA',\n",
+    "    '20190420AA',\n",
+    "    '20190422AA',\n",
+    "    '20190423AA',\n",
+    "    '20190424AA',\n",
+    "    '20190424AB',\n",
+    "    '20190426AA',\n",
+    "    '20190427AA',\n",
+    "    '20190427AB',\n",
+    "    '20190429AA',\n",
+    "    '20190430AA',\n",
+    "    '20190430AB',\n",
+    "    '20190430AC',\n",
+    "    '20190503AA',\n",
+    "    '20190503AB',\n",
+    "    '20190506AA',\n",
+    "    '20190507AA',\n",
+    "    '20190507AB',\n",
+    "    '20190512AA',\n",
+    "    '20190513AA',\n",
+    "    '20190513AB',\n",
+    "    '20190513AC',\n",
+    "    '20190514AA',\n",
+    "    '20190515AA',\n",
+    "    '20190515AB',\n",
+    "    '20190515AC',\n",
+    "    '20190515AD',\n",
+    "    '20190515AE',\n",
+    "    '20190516AA',\n",
+    "    '20190517AA',\n",
+    "    '20190520AA',\n",
+    "    '20190522AA',\n",
+    "    '20190522AB',\n",
+    "    '20190522AC',\n",
+    "    '20190522AD',\n",
+    "    '20190522AE',\n",
+    "    '20190522AF',\n",
+    "    '20190524AA',\n",
+    "    '20190524AB',\n",
+    "    '20190526AA',\n",
+    "    '20190526AB',\n",
+    "    '20190527AA',\n",
+    "    '20190527AB',\n",
+    "    '20190528AA',\n",
+    "    '20190528AB',\n",
+    "    '20190528AC',\n",
+    "    '20190529AA',\n",
+    "    '20190529AB',\n",
+    "    '20190529AC',\n",
+    "    '20190530AA',\n",
+    "    '20190530AB',\n",
+    "    '20190530AC',\n",
+    "    '20190531AA',\n",
+    "    '20190603AA',\n",
+    "    '20190604AA',\n",
+    "    '20190604AB',\n",
+    "    '20190604AC',\n",
+    "    '20190604AD',\n",
+    "    '20190604AE',\n",
+    "    '20190605AA',\n",
+    "    '20190605AB',\n",
+    "    '20190605AC',\n",
+    "    '20190606AA',\n",
+    "    '20190606AB',\n",
+    "    '20190606AC',\n",
+    "    '20190606AD',\n",
+    "    '20190607AA',\n",
+    "    '20190608AA',\n",
+    "    '20190609AA',\n",
+    "    '20190611AA',\n",
+    "    '20190612AA',\n",
+    "    '20190613AA',\n",
+    "    '20190613AB',\n",
+    "    '20190614AA',\n",
+    "    '20190615AA',\n",
+    "    '20190616AA',\n",
+    "    '20190616AB',\n",
+    "    '20190616AC',\n",
+    "    '20190617AA',\n",
+    "    '20190618AA',\n",
+    "    '20190620AA',\n",
+    "    '20190620AB',\n",
+    "    '20190620AC',\n",
+    "    '20190620AD',\n",
+    "    '20190621AA',\n",
+    "    '20190621AB',\n",
+    "    '20190624AA',\n",
+    "    '20190625AA',\n",
+    "    '20190625AB',\n",
+    "    '20190625AC',\n",
+    "    '20190625AD',\n",
+    "    '20190625AE',\n",
+    "    '20190626AA',\n",
+    "    '20190701AA',\n",
+    "    '20190703AA',\n",
+    "    '20190707AA',\n",
+    "    '20190707AB',\n",
+    "    '20190708AA',\n",
+    "    '20190708AB',\n",
+    "    '20190709AA',\n",
+    "    '20190709AB',\n",
+    "    '20190710AA',\n",
+    "    '20190711AA',\n",
+    "    '20190711AB',\n",
+    "    '20190711AC',\n",
+    "    '20190712AA',\n",
+    "    '20190713AA',\n",
+    "    '20190714AA',\n",
+    "    '20190716AA',\n",
+    "    '20190716AB',\n",
+    "    '20190717AA',\n",
+    "    '20190717AB',\n",
+    "    '20190717AC',\n",
+    "    '20190718AA',\n",
+    "    '20190718AB',\n",
+    "    '20190718AC',\n",
+    "    '20190719AA',\n",
+    "    '20190719AB',\n",
+    "    '20190722AA',\n",
+    "    '20190722AB',\n",
+    "    '20190722AC',\n",
+    "    '20190723AA',\n",
+    "    '20190724AA',\n",
+    "    '20190724AB',\n",
+    "    '20190724AC',\n",
+    "    '20190724AD',\n",
+    "    '20190725AA',\n",
+    "    '20190726AA',\n",
+    "    '20190729AA',\n",
+    "    '20190729AB',\n",
+    "    '20190730AA',\n",
+    "    '20190731AA',\n",
+    "    '20190731AB',\n",
+    "    '20190731AC',\n",
+    "    '20190731AD',\n",
+    "    '20190731AE',\n",
+    "    '20190802AA',\n",
+    "    '20190804AA',\n",
+    "    '20190806AA',\n",
+    "    '20190807AA',\n",
+    "    '20190807AB',\n",
+    "    '20190808AA',\n",
+    "    '20190808AB',\n",
+    "    '20190810AA',\n",
+    "    '20190810AB',\n",
+    "    '20190812AA',\n",
+    "    '20190812AB',\n",
+    "    '20190813AA',\n",
+    "    '20190813AB',\n",
+    "    '20190814AA',\n",
+    "    '20190814AB',\n",
+    "    '20190814AC',\n",
+    "    '20190815AA',\n",
+    "    '20190816AA',\n",
+    "    '20190819AA',\n",
+    "    '20190819AB',\n",
+    "    '20190821AA',\n",
+    "    '20190822AA',\n",
+    "    '20190822AB',\n",
+    "    '20190822AC',\n",
+    "    '20190823AA',\n",
+    "    '20190825AA',\n",
+    "    '20190826AA',\n",
+    "    '20190826AB',\n",
+    "    '20190827AA',\n",
+    "    '20190828AA',\n",
+    "    '20190828AB',\n",
+    "    '20190828AC',\n",
+    "    '20190829AA',\n",
+    "    '20190901AA',\n",
+    "    '20190901AB',\n",
+    "    '20190901AC',\n",
+    "    '20190903AA',\n",
+    "    '20190903AB',\n",
+    "    '20190904AA',\n",
+    "    '20190905AA',\n",
+    "    '20190905AB',\n",
+    "    '20190906AA',\n",
+    "    '20190906AB',\n",
+    "    '20190906AC',\n",
+    "    '20190909AA',\n",
+    "    '20190909AB',\n",
+    "    '20190909AC',\n",
+    "    '20190911AA',\n",
+    "    '20190912AA',\n",
+    "    '20190912AB',\n",
+    "    '20190912AC',\n",
+    "    '20190912AD',\n",
+    "    '20190912AE',\n",
+    "    '20190912AF',\n",
+    "    '20190913AA',\n",
+    "    '20190914AA',\n",
+    "    '20190914AB',\n",
+    "    '20190915AA',\n",
+    "    '20190916AA',\n",
+    "    '20190916AB',\n",
+    "    '20190916AC',\n",
+    "    '20190917AA',\n",
+    "    '20190917AB',\n",
+    "    '20190917AC',\n",
+    "    '20190917AD',\n",
+    "    '20190919AA',\n",
+    "    '20190920AA',\n",
+    "    '20190920AB',\n",
+    "    '20190922AA',\n",
+    "    '20190922AB',\n",
+    "    '20190923AA',\n",
+    "    '20190924AA',\n",
+    "    '20190924AB',\n",
+    "    '20190924AC',\n",
+    "    '20190924AD',\n",
+    "    '20190925AA',\n",
+    "    '20190925AB',\n",
+    "    '20190925AC',\n",
+    "    '20190926AA',\n",
+    "    '20190926AB',\n",
+    "    '20190926AC',\n",
+    "    '20190927AA',\n",
+    "    '20190930AA',\n",
+    "    '20190930AB',\n",
+    "    '20190930AC',\n",
+    "    '20191001AA',\n",
+    "    '20191003AA',\n",
+    "    '20191003AB',\n",
+    "    '20191003AC',\n",
+    "    '20191004AA',\n",
+    "    '20191007AA',\n",
+    "    '20191008AA',\n",
+    "    '20191008AB',\n",
+    "    '20191009AA',\n",
+    "    '20191009AB',\n",
+    "    '20191010AA',\n",
+    "    '20191011AA',\n",
+    "    '20191012AA',\n",
+    "    '20191013AA',\n",
+    "    '20191014AA',\n",
+    "    '20191017AA',\n",
+    "    '20191017AB',\n",
+    "    '20191017AC',\n",
+    "    '20191020AA',\n",
+    "    '20191021AA',\n",
+    "    '20191021AB',\n",
+    "    '20191022AA',\n",
+    "    '20191023AA',\n",
+    "    '20191023AB',\n",
+    "    '20191023AC',\n",
+    "    '20191023AD',\n",
+    "    '20191024AA',\n",
+    "    '20191024AB',\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "create_file_tree_and_json(author_source, registry_source, metadata_directory)"
    ]
   },
   {
@@ -19,8 +1497,25 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "create_file_tree_and_json(author_source, registry_source, target_directory)"
+    "audit_files(raw_files_directory)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "convert_files(raw_files_directory, metadata_directory, directories_to_process)\n",
+    "print('Done converting files.')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
diff --git a/scripts/EGAP/files_to_import_structure.py b/scripts/EGAP/files_to_import_structure.py
index 89cbe3934ce..0a3d78860f0 100644
--- a/scripts/EGAP/files_to_import_structure.py
+++ b/scripts/EGAP/files_to_import_structure.py
@@ -3,16 +3,28 @@
 import shutil
 import argparse
 from distutils.dir_util import copy_tree
+import logging
 
 from nose.tools import assert_equal
 
+logger = logging.getLogger(__name__)
+
+
 # This takes the item id from the path of the project directory for example '20121001AA Findley' -> '20121001AA'
 get_item_id = lambda _path: _path.split(os.sep)[-1].split(' ')[0]
 
 
+def get_project_id(root, source_dir):
+    project_id_base = root.split(source_dir)[-1]
+    if ' ' in project_id_base:
+        project_id = project_id_base.split(' ')[0].split('/')[-1]
+    else:
+        project_id = project_id_base.split('/')[0]
+    return project_id
+
+
 # Check if file name starts with EGAP id for example '20121001AA_PAP.pdf'
-def check_id(root, item):
-    project_id = get_item_id(root.split('/')[-3])
+def check_id(project_id, item):
     return item.startswith(project_id)
 
 
@@ -20,16 +32,17 @@ def check_id(root, item):
 check_anon = lambda item: 'pap_anon' in item.lower() or 'anonymous' in item.lower()
 
 
-def action_files_by_name(root, source, item_name):
+def action_files_by_name(root, source_item, item_name):
     """
     Pick out anonymous and create new folder to move them into it, remove ones that don't follow id naming convention.
     :param root:
-    :param source:
+    :param source_item:
     :param item_name:
     :return:
     """
-    if not check_id(root, item_name):
-        path = os.path.join(root, item_name)
+    project_id = get_project_id(root, source_item)
+    path = os.path.join(root, item_name)
+    if not check_id(project_id, item_name):
         os.remove(path)
         return
 
@@ -38,18 +51,20 @@ def action_files_by_name(root, source, item_name):
 
         if not os.path.exists(destination_parent):
             os.mkdir(destination_parent)
+        destination_item = os.path.join(destination_parent, item_name)
+        shutil.move(path, destination_item)
 
-        destination = os.path.join(destination_parent, item_name)
-        shutil.move(source, destination)
 
+def audit_files(source_directory):
+    logger.info("Running audit. Source: {}".format(source_directory))
 
-def audit_files(source):
     including = open('including.txt', 'w+')
     ignoring = open('ignoring.txt', 'w+')
-    for root, dir, files in os.walk(source):
+    for root, directory, files in os.walk(source_directory):
         for item in files:
-            name = os.path.join(root.split('/')[-1], item) # get file/folder name after slash
-            if not check_id(root, name):
+            project_id = get_project_id(root, source_directory)
+            name = '{}/{}'.format(root.split(source_directory)[-1], item)  # get file/folder name from just under source
+            if not check_id(project_id, item):
                 ignoring.writelines(name + '\r')
             else:
                 including.writelines(name + '\r')
@@ -57,7 +72,7 @@ def audit_files(source):
     ignoring.close()
     including.close()
 
-    projects = set(os.listdir(source))
+    projects = set(os.listdir(source_directory))
     project_ids = set([get_item_id(folders) for folders in list(projects)])
 
     # check for duplicate ids
@@ -77,30 +92,34 @@ def main(files_dir, metadata_dir, id_list=None):
 
     :param files_dir: the source path we're picking files out of
     :param metadata_dir: a pre-made directory structure for importing projects that we are packing files into.
+    :param id_list: an optional list of project ids to limit what gets processed
     :return:
     """
+    logger.info("Processing files. Source: {} Destination: {}".format(files_dir, metadata_dir))
+
     project_dirs = os.listdir(files_dir)
     if id_list:
         project_dirs = [project for project in project_dirs if get_item_id(project) in id_list]
 
+    logger.info('Processing directories: {}'.format(project_dirs))
+
     # Copy whole tree to preserve file hierarchy then
     for item in project_dirs:
         item_id = get_item_id(item)
-        source = os.path.join(files_dir, item)
-        destination = os.path.join(metadata_dir, item_id, 'data', 'nonanonymous')
-        if os.path.isdir(source):
-            copy_tree(source, destination)
+        source_item = os.path.join(files_dir, item)
+        destination_item = os.path.join(metadata_dir, item_id, 'data', 'nonanonymous')
+        if os.path.isdir(source_item):
+            copy_tree(source_item, destination_item)
 
-    for root, dir, files in os.walk(metadata_dir):
+    for root, directory, files in os.walk(metadata_dir):
         for item in files:
             if item not in ('project.json', 'registration-schema.json'):
-                source = os.path.join(root, item)
-                action_files_by_name(root, source, item)
+                action_files_by_name(root, metadata_dir, item)
 
     # Check All anon files in /anonymous/ directory
-    for root, dir, files in os.walk(metadata_dir):
+    for root, directory, files in os.walk(metadata_dir):
         for item in files:
-            if item not in ('project.json', 'registration-schema.json'):
+            if item not in ('project.json', 'registration-schema.json', '.DS_Store'):
                 if check_anon(item):
                     assert '/anonymous' in root
                 else:
@@ -112,22 +131,22 @@ def main(files_dir, metadata_dir, id_list=None):
     parser.add_argument(
         '-source',
         '--source',
-        help='This should be the directory for the EGAP data dump, traditionally called "EGAP_REGISTRY_staging/3 Registrations/"'
+        help='The directory for the EGAP data files, traditionally called "EGAP_REGISTRY_staging/3 Registrations/"'
     )
     parser.add_argument(
         '-destination',
         '--destination',
-        help='This should be the directory of the import file structure containing the bags of data.'
+        help='The directory of the import file structure containing the bags of data.'
     )
     parser.add_argument(
         '-list',
         '--list',
-        help='This is a list of ids to import into a the new metadata directory.'
+        help='An optional list of ids to import into a the new metadata directory.'
     )
     parser.add_argument(
         '-audit',
         '--audit',
-        help='This includes all files that don\'t follow the "<id>_PAP" naming convention.'
+        help='Boolean to generate two lists of all files that should and should not be included. Needs "source".'
     )
 
     args = parser.parse_args()
diff --git a/scripts/EGAP/requirements.txt b/scripts/EGAP/requirements.txt
new file mode 100644
index 00000000000..7e65d67ac4f
--- /dev/null
+++ b/scripts/EGAP/requirements.txt
@@ -0,0 +1,68 @@
+appnope==0.1.0
+attrs==19.3.0
+backcall==0.1.0
+bcrypt==3.1.7
+bleach==3.1.0
+blinker==1.4
+bson==0.5.8
+cffi==1.13.1
+Click==7.0
+decorator==4.4.0
+defusedxml==0.6.0
+Django==2.2.6
+django-rest-framework==0.1.0
+djangorestframework==3.10.3
+entrypoints==0.3
+Flask==1.1.1
+furl==2.1.0
+future==0.18.1
+importlib-metadata==0.23
+ipykernel==5.1.3
+ipython==7.8.0
+ipython-genutils==0.2.0
+ipywidgets==7.5.1
+itsdangerous==1.1.0
+jedi==0.15.1
+Jinja2==2.10.3
+json5==0.8.5
+jsonschema==3.1.1
+jupyter==1.0.0
+jupyter-client==5.3.4
+jupyter-console==6.0.0
+jupyter-core==4.6.1
+jupyterlab==1.1.4
+jupyterlab-server==1.0.6
+MarkupSafe==1.1.1
+mistune==0.8.4
+more-itertools==7.2.0
+nbconvert==5.6.1
+nbformat==4.4.0
+nose==1.3.7
+notebook==6.0.1
+orderedmultidict==1.0.1
+pandocfilters==1.4.2
+parso==0.5.1
+pexpect==4.7.0
+pickleshare==0.7.5
+prometheus-client==0.7.1
+prompt-toolkit==2.0.10
+ptyprocess==0.6.0
+pycparser==2.19
+Pygments==2.4.2
+pyrsistent==0.15.4
+python-dateutil==2.8.0
+pytz==2019.3
+pyzmq==18.1.0
+qtconsole==4.5.5
+Send2Trash==1.5.0
+six==1.12.0
+sqlparse==0.3.0
+terminado==0.8.2
+testpath==0.4.2
+tornado==6.0.3
+traitlets==4.3.3
+wcwidth==0.1.7
+webencodings==0.5.1
+Werkzeug==0.16.0
+widgetsnbextension==3.5.1
+zipp==0.6.0

From c9e3449e7e91a9bc58e94a3d2f99a32a1b8b1f28 Mon Sep 17 00:00:00 2001
From: John Tordoff <Johnetordoff@users.noreply.github.com>
Date: Wed, 6 Nov 2019 13:08:14 -0500
Subject: [PATCH 8/9] fix double quotes for travis (#9194)

---
 scripts/EGAP/files_to_import_structure.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/EGAP/files_to_import_structure.py b/scripts/EGAP/files_to_import_structure.py
index 0a3d78860f0..43b9dbfa3f1 100644
--- a/scripts/EGAP/files_to_import_structure.py
+++ b/scripts/EGAP/files_to_import_structure.py
@@ -56,7 +56,7 @@ def action_files_by_name(root, source_item, item_name):
 
 
 def audit_files(source_directory):
-    logger.info("Running audit. Source: {}".format(source_directory))
+    logger.info('Running audit. Source: {}'.format(source_directory))
 
     including = open('including.txt', 'w+')
     ignoring = open('ignoring.txt', 'w+')
@@ -95,7 +95,7 @@ def main(files_dir, metadata_dir, id_list=None):
     :param id_list: an optional list of project ids to limit what gets processed
     :return:
     """
-    logger.info("Processing files. Source: {} Destination: {}".format(files_dir, metadata_dir))
+    logger.info('Processing files. Source: {} Destination: {}'.format(files_dir, metadata_dir))
 
     project_dirs = os.listdir(files_dir)
     if id_list:

From 5b58c5803b0f666d1f5d8be69d5ee574a1732efd Mon Sep 17 00:00:00 2001
From: "Brian J. Geiger" <bgeiger@pobox.com>
Date: Thu, 7 Nov 2019 09:40:18 -0500
Subject: [PATCH 9/9] Update changelog and package.json for release

---
 CHANGELOG    | 5 +++++
 package.json | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG b/CHANGELOG
index da406c46bf7..f0b26e1e32d 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,6 +2,11 @@
 
 We follow the CalVer (https://calver.org/) versioning scheme: YY.MINOR.MICRO.
 
+19.31.0 (2019-11-7)
+===================
+- EGAP: Parse project structure, add contributors, add files, ingest the draft registration, and add a Jupyter notebook
+- Modify a Chronos field for proper contributor classification
+
 19.30.0 (2019-10-16)
 ===================
 - Fix weirdness around deleted nodes by not deleing OSF Storage
diff --git a/package.json b/package.json
index 8a756331088..f884cb97f9d 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "OSF",
-  "version": "19.30.0",
+  "version": "19.31.0",
   "description": "Facilitating Open Science",
   "repository": "https://github.com/CenterForOpenScience/osf.io",
   "author": "Center for Open Science",