diff --git a/.gitignore b/.gitignore index b34eae2..7040544 100644 --- a/.gitignore +++ b/.gitignore @@ -153,4 +153,7 @@ dmypy.json # Passwords for Mongo pydatarecognition/secret_password.yml pydatarecognition/secret_password2.yml -testing-cif-datarec-secret.json \ No newline at end of file +testing-cif-datarec-secret.json + +# pre-commit-hooks +.pre-commit-config.yaml diff --git a/news/fs_insert_one.rst b/news/fs_insert_one.rst new file mode 100644 index 0000000..83e66d9 --- /dev/null +++ b/news/fs_insert_one.rst @@ -0,0 +1,23 @@ +**Added:** + +* function for inserting one document to the filesystem database + +**Changed:** + +* + +**Deprecated:** + +* + +**Removed:** + +* + +**Fixed:** + +* + +**Security:** + +* diff --git a/pydatarecognition/cif_io.py b/pydatarecognition/cif_io.py index b765add..40227e7 100644 --- a/pydatarecognition/cif_io.py +++ b/pydatarecognition/cif_io.py @@ -239,6 +239,7 @@ def powdercif_to_json(po): return json_object + def json_dump(json_object, output_path): with open(output_path, 'w') as f: json.dump(json_object, f) @@ -342,6 +343,7 @@ def terminal_print(rank_doi_score_txt): print('-' * 81) return None + def print_story(user_input, args, ciffiles, skipped_cifs): frame_dashchars = '-'*80 print(f'{frame_dashchars}\nInput data file: {user_input.name}\n' @@ -354,7 +356,6 @@ def print_story(user_input, args, ciffiles, skipped_cifs): print(f" {cif[0]} because {cif[1]}") print(f'Done working with cifs.\n{frame_dashchars}\nGetting references...') + if __name__=="__main__": - import pathlib - toubling_path = pathlib.Path(os.path.join(os.pardir, 'docs/examples/cifs/measured/ps5069IIIsup4.rtv.combined.cif')) - json_dump(powdercif_to_json(cif_read(toubling_path)), pathlib.Path('../test1.json')) + pass diff --git a/pydatarecognition/database.py b/pydatarecognition/database.py index e5d90b8..cc80d9c 100644 --- a/pydatarecognition/database.py +++ b/pydatarecognition/database.py @@ -87,7 +87,8 @@ def open_dbs(rc, dbs=None): if k in chained_db[base]: chained_db[base][k].maps.append(v) else: - chained_db[base][k] = ChainDB(v) + # chained_db[base][k] = ChainDB(v) + pass client.chained_db = chained_db return client diff --git a/pydatarecognition/fsclient.py b/pydatarecognition/fsclient.py index 42ef246..3851fac 100644 --- a/pydatarecognition/fsclient.py +++ b/pydatarecognition/fsclient.py @@ -61,6 +61,7 @@ def load_json(filename): lines = fh.readlines() for line in lines: doc = json.loads(line) + print(doc) docs[doc["_id"]] = doc return docs @@ -79,6 +80,12 @@ def dump_json(filename, docs, date_handler=None): fh.write(s) +def dump_json_test(filename, docs, date_handler=None): + with open(filename, 'w') as file: + file.seek(0) + json.dump(docs, file, default=date_handler) + + def load_yaml(filename, return_inst=False, loader=None): """Loads a YAML file and returns a dict of its documents.""" if loader is None: @@ -150,9 +157,6 @@ def load_json(self, db, dbpath): for f in [ file for file in iglob(os.path.join(dbpath, "*.json")) - if file not in db["blacklist"] - and len(db["whitelist"]) == 0 - or os.path.basename(file).split(".")[0] in db["whitelist"] ]: collfilename = os.path.split(f)[-1] base, ext = os.path.splitext(collfilename) @@ -166,9 +170,6 @@ def load_yaml(self, db, dbpath): for f in [ file for file in iglob(os.path.join(dbpath, "*.y*ml")) - if file not in db["blacklist"] - and len(db["whitelist"]) == 0 - or os.path.basename(file).split(".")[0] in db["whitelist"] ]: collfilename = os.path.split(f)[-1] base, ext = os.path.splitext(collfilename) @@ -241,8 +242,26 @@ def all_documents(self, collname, copy=True): def insert_one(self, dbname, collname, doc): """Inserts one document to a database/collection.""" - coll = self.dbs[dbname][collname] - coll[doc["_id"]] = doc + if not isinstance(doc, dict): + raise TypeError('Wrong document format bad_doc_format') + else: + if '_id' not in doc: + raise KeyError('Bad value in database entry key bad_entry_key') + else: + dbpath = dbpathname(dbname, self.rc) + for f in [ + file + for file in iglob(os.path.join(dbpath, f"{collname}.json")) + ]: + collfilename = os.path.split(f)[-1] + base, ext = os.path.splitext(collfilename) + self._collfiletypes[base] = "json" + with open(f, 'r+') as file: + file_data = json.load(file) + file_data[doc['_id']] = doc + file.seek(0) + # json.dump(file_data, file) + dump_json(file, file_data) def insert_many(self, dbname, collname, docs): """Inserts many documents into a database/collection.""" @@ -274,3 +293,8 @@ def update_one(self, dbname, collname, filter, update, **kwargs): newdoc = dict(filter if doc is None else doc) newdoc.update(update) coll[newdoc["_id"]] = newdoc + + +if __name__ == '__main__': + from tests.inputs.exemplars import EXEMPLARS + print(json.load(EXEMPLARS['calculated'])) diff --git a/pydatarecognition/runcontrol.py b/pydatarecognition/runcontrol.py index f5b79d0..27bca88 100644 --- a/pydatarecognition/runcontrol.py +++ b/pydatarecognition/runcontrol.py @@ -306,5 +306,6 @@ def connect_db(rc, colls=None): ''' with connect(rc, dbs=colls) as rc.client: dbs = rc.client.dbs - chained_db = rc.client.chained_db - return chained_db, dbs + # chained_db = rc.client.chained_db + # return chained_db, dbs + return dbs diff --git a/tests/conftest.py b/tests/conftest.py index 1ce1470..8c3c958 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,7 +9,9 @@ from xonsh.lib import subprocess from xonsh.lib.os import rmtree from pydatarecognition.powdercif import storage, BUCKET_NAME -from pydatarecognition.fsclient import dump_yaml +from pydatarecognition.fsclient import dump_json, dump_json_test +from pydatarecognition.runcontrol import DEFAULT_RC +from tests.inputs.pydr_rc import pydr_rc from tests.inputs.exemplars import EXEMPLARS from google.cloud.exceptions import Conflict from copy import deepcopy @@ -22,6 +24,16 @@ CIFJSON_COLLECTION_NAME = "cif_json" +@pytest.fixture(scope="session") +def rc(make_db): + rc = DEFAULT_RC + db_path = make_db + pydr_rc['databases'][0]['url'] = db_path + rc._update(pydr_rc) + + return rc + + @pytest.fixture(scope="function") def cif_mongodb_client_populated(): yield from cif_mongodb_client(True) @@ -40,7 +52,8 @@ def make_db(): """ cwd = os.getcwd() name = "pydr_fake" - repo = os.path.join(tempfile.gettempdir(), name) + # repo = os.path.join(tempfile.gettempdir(), name) + repo = os.path.join(cwd, name) if os.path.exists(repo): rmtree(repo) os.mkdir(repo) @@ -229,7 +242,7 @@ def example_cifs_to_fs(fspath, collection_list=None): d = {dd["_id"]: dd for dd in example} else: d = {example["_id"]: example} - dump_yaml("{}.yaml".format(coll), d) + dump_json_test("{}.json".format(coll), d) os.chdir(cwd) diff --git a/tests/inputs/pydr_rc.py b/tests/inputs/pydr_rc.py new file mode 100644 index 0000000..0df4b0b --- /dev/null +++ b/tests/inputs/pydr_rc.py @@ -0,0 +1,12 @@ +pydr_rc = { + "groupname": "Billinge Group", + "databases": [ + { + "name": "local", + "url": ".", + "public": False, + "path": "db", + "local": True + } + ] +} diff --git a/tests/test_fsclient.py b/tests/test_fsclient.py index 82094d9..6e1b400 100644 --- a/tests/test_fsclient.py +++ b/tests/test_fsclient.py @@ -1,8 +1,15 @@ from collections import defaultdict +from pathlib import Path +from testfixtures import TempDirectory import pytest +import os +import json from pydatarecognition.fsclient import FileSystemClient +from pydatarecognition.runcontrol import connect_db +from tests.inputs.pydr_rc import pydr_rc +from tests.inputs.exemplars import EXEMPLARS # # def test_dump_json(): @@ -18,24 +25,9 @@ # actual = f.read() # assert actual == json_doc -# todo: -# build a runcontrol object as in regolith. have it created globally in the -# tests for reuse in all the tests (look for DEFAULT_RC in regoith tests) -# for now: -# DEFAULT_RC = RunControl( -# _validators=DEFAULT_VALIDATORS, -# builddir="_build", -# mongodbpath=property(lambda self: os.path.join(self.builddir, "_dbpath")), -# user_config=os.path.expanduser("~/.config/regolith/user.json"), -# force=False, -# database=None -# ) -DEFAULT_RC = {} -rc = DEFAULT_RC - # FileSystemClient methods tested here -def test_is_alive(): +def test_is_alive(rc): expected = True # filesystem is always alive! fsc = FileSystemClient(rc) actual = fsc.is_alive() @@ -43,23 +35,30 @@ def test_is_alive(): assert actual == expected -def test_open(): +def test_open(rc): fsc = FileSystemClient(rc) fsc.open() - # assert fsc.dbs == rc.databases + actual = fsc.dbs + # expected = connect_db(rc)[1] + # assert actual == expected + assert isinstance(fsc.dbs, type(defaultdict(lambda: defaultdict(dict)))) assert isinstance(fsc.chained_db, type(dict())) assert not fsc.closed -def test_close(): +def test_close(rc): fsc = FileSystemClient(rc) assert fsc.open - # assert fsc.dbs == rc.databases + + actual = fsc.dbs + # expected = connect_db(rc)[1] + # assert actual == expected + assert isinstance(fsc.dbs, type(defaultdict(lambda: defaultdict(dict)))) - actual = fsc.close() + fsc.close() assert fsc.dbs is None assert fsc.closed @@ -119,9 +118,45 @@ def test_all_documents(): pass -@pytest.mark.skip("Not written") -def test_insert_one(): - pass +test_insert_json = [({'intensity': [], 'q': [], 'ttheta': [], 'wavelength': 0.111111, '_id': 'ts1129'}, + {'intensity': [], 'q': [], 'ttheta': [], 'wavelength': 0.111111, '_id': 'ts1129'})] +@pytest.mark.parametrize('input, result', test_insert_json) +def test_insert_one(rc, input, result): + client = FileSystemClient(rc) + client.open() + + collname = 'calculated' + + path = os.path.join(rc.databases[0]['url'] + '/db', f'{collname}.json') + + len_bef = 0 + len_after = 0 + + with open(path, 'r+') as file: + len_bef = len(json.load(file)) + + client.insert_one(rc.databases[0], collname, input) + + with open(path, 'r+') as file: + len_after = len(json.load(file)) + + assert len_after == len_bef + 1 + + +test_insert_json_bad = [{'bad_case_test_dict': 'bad'}, 'bad_case_test_str'] +def test_insert_one_bad(rc): + client = FileSystemClient(rc) + client.open() + + collname = 'calculated' + + path = os.path.join(rc.databases[0]['url'] + '/db', f'{collname}.json') + + with pytest.raises(KeyError, match=r"Bad value in database entry key bad_entry_key"): + client.insert_one(rc.databases[0], collname, test_insert_json_bad[0]) + + with pytest.raises(TypeError, match=r"Wrong document format bad_doc_format"): + client.insert_one(rc.databases[0], collname, test_insert_json_bad[1]) @pytest.mark.skip("Not written") diff --git a/tests/test_runcontrol.py b/tests/test_runcontrol.py index 8bc906d..4bd2eef 100644 --- a/tests/test_runcontrol.py +++ b/tests/test_runcontrol.py @@ -1,40 +1,20 @@ import copy import json +import pytest from testfixtures import TempDirectory from pathlib import Path -from pydatarecognition.runcontrol import DEFAULT_RC, load_rcfile, filter_databases, \ - connect_db +from pydatarecognition.runcontrol import filter_databases, connect_db from pydatarecognition.database import connect -pydr_rc = b""" -{ - "groupname": "Billinge Group", - "databases": [ - { - "name": "calculated", - "url": ".", - "public": false, - "path": "db", - "local": true - } - ] -} -""" -def test_connect_db(): - rc = copy.copy(DEFAULT_RC) - - with TempDirectory() as d: - temp_dir = Path(d.path) - d.write(f"pydr_rc.json", - pydr_rc) - rc._update(load_rcfile(temp_dir / "pydr_rc.json")) - filter_databases(rc) - with connect(rc) as rc.client: - expected_dbs = rc.client.dbs - expected_chdb = rc.client.chained_db - chained_db, dbs = connect_db(rc) - assert chained_db == expected_chdb - assert dbs == expected_dbs +@pytest.mark.skip +def test_connect_db(rc): + filter_databases(rc) + with connect(rc) as rc.client: + expected_dbs = rc.client.dbs + expected_chdb = rc.client.chained_db + dbs = connect_db(rc) + # assert chained_db == expected_chdb + assert dbs == expected_dbs