diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py index de0ed534..9074dd76 100644 --- a/abcd/backends/atoms_opensearch.py +++ b/abcd/backends/atoms_opensearch.py @@ -9,8 +9,6 @@ from ase import Atoms from ase.io import iread -from luqum.parser import parser -from luqum.elasticsearch import SchemaAnalyzer, ElasticsearchQueryBuilder from opensearchpy import OpenSearch, helpers, AuthenticationException, ConnectionTimeout from abcd.backends import utils @@ -34,55 +32,18 @@ class OpenSearchQuery(AbstractQuerySet): - """ - Class to parse and build queries for OpenSearch. - - Attributes - ---------- - query_builder: ElasticsearchQueryBuilder - Query builder to convert a Tree in an OpenSearch query. - """ - - def __init__( - self, - client: Union[OpenSearch, None] = None, - index_name: Union[str, None] = None, - analyse_schema: bool = False, - ): - """ " - Initialises class. - - Parameters - ---------- - client: Union[OpenSearch, None] - OpenSearch client, used for if analyse_schema is `True` to - characterise the schema. Default is `None`. - index_name: Union[str, None] - Name of OpenSearch index to be analysed, used if analyse_schema - is `True` to characterise the schema. Default is `None`. - analyse_schema: bool, optional - Whether to analyse the schema, as defined by the index_name and client. - Default is `False`. - """ - if analyse_schema and client is not None and index_name is not None: - schema = client.indices.get_mapping()[index_name] - schema_analizer = SchemaAnalyzer(schema) - self.query_builder = ElasticsearchQueryBuilder( - **schema_analizer.query_builder_options() - ) - else: - self.query_builder = ElasticsearchQueryBuilder() + """Class to parse and build queries for OpenSearch.""" def __call__(self, query: Union[dict, str, list, None]) -> Union[dict, None]: """ - Parses and builds queries from strings using ElasticsearchQueryBuilder. + Parses and builds queries for OpenSearch. Parameters ---------- query: Union[dict, str, list, None] - Query to be parsed for OpenSearch. If given as a dictionary, - the query is left unchanged. If given as a string, the - ElasticsearchQueryBuilder is used to build the query. + Query to be parsed for OpenSearch. If passed as a dictionary, the query is + left unchanged. If passed a string or list, the query is treated as a query + string, based on Lucene query syntax. Returns ------- @@ -91,13 +52,9 @@ def __call__(self, query: Union[dict, str, list, None]) -> Union[dict, None]: """ if not query: query = self.get_default_query() - logger.info("parsed query: %s", query) - if isinstance(query, dict): - return query if isinstance(query, str): - tree = parser.parse(query) - return self.query_builder(tree) + return self.build_query_string(query) if isinstance(query, list): if len(query) == 0: return None @@ -105,11 +62,28 @@ def __call__(self, query: Union[dict, str, list, None]) -> Union[dict, None]: return None separator = " AND " joined_query = separator.join(query) - tree = parser.parse(joined_query) - return self.query_builder(tree) + return self.build_query_string(joined_query) + logger.info("parsed query: %s", query) return query if query else None + @staticmethod + def build_query_string(query: str) -> dict: + """ + Build query_string (Lucene syntax) query. + + Parameters + ---------- + query : str + Query with Lucene syntax. + + Returns + ------- + dict + Parsed query for query_string query. + """ + return {"query_string": {"query": query}} + @staticmethod def get_default_query() -> dict: """ @@ -254,7 +228,6 @@ def __init__( index_name: str = "atoms", username: str = "admin", password: str = "admin", - analyse_schema: bool = True, **kwargs, ): """ @@ -275,9 +248,6 @@ def __init__( OpenSearch username. Default is `admin`. password: str, optional OpenSearch password. Default is `admin`. - analyse_schema: bool, optional - Whether to analyse the OpenSearch schema when building queries. - Default is `True`. """ super().__init__() @@ -314,7 +284,7 @@ def __init__( self.db = db_name self.index_name = index_name self.create() - self.parser = OpenSearchQuery(self.client, self.index_name, analyse_schema) + self.parser = OpenSearchQuery() def info(self): """ diff --git a/poetry.lock b/poetry.lock index 5d734094..e4f1bcdb 100644 --- a/poetry.lock +++ b/poetry.lock @@ -481,14 +481,14 @@ files = [ [[package]] name = "comm" -version = "0.2.1" +version = "0.2.2" description = "Jupyter Python Comm implementation, for usage in ipykernel, xeus-python etc." category = "main" optional = false python-versions = ">=3.8" files = [ - {file = "comm-0.2.1-py3-none-any.whl", hash = "sha256:87928485c0dfc0e7976fd89fc1e187023cf587e7c353e4a9b417555b44adf021"}, - {file = "comm-0.2.1.tar.gz", hash = "sha256:0bc91edae1344d39d3661dcbc36937181fdaddb304790458f8b044dbc064b89a"}, + {file = "comm-0.2.2-py3-none-any.whl", hash = "sha256:e6fb86cb70ff661ee8c9c14e7d36d6de3b4066f1441be4063df9c5009f0a64d3"}, + {file = "comm-0.2.2.tar.gz", hash = "sha256:3fd7a84065306e07bea1773df6eb8282de51ba82f77c72f9c85716ab11fe980e"}, ] [package.dependencies] @@ -1166,14 +1166,14 @@ test = ["codecov", "coverage", "ipykernel (>=6.12)", "ipython", "mypy", "pre-com [[package]] name = "jupyter-core" -version = "5.7.1" +version = "5.7.2" description = "Jupyter core package. A base package on which Jupyter projects rely." category = "main" optional = false python-versions = ">=3.8" files = [ - {file = "jupyter_core-5.7.1-py3-none-any.whl", hash = "sha256:c65c82126453a723a2804aa52409930434598fd9d35091d63dfb919d2b765bb7"}, - {file = "jupyter_core-5.7.1.tar.gz", hash = "sha256:de61a9d7fc71240f688b2fb5ab659fbb56979458dc66a71decd098e03c79e218"}, + {file = "jupyter_core-5.7.2-py3-none-any.whl", hash = "sha256:4f7315d2f6b4bcf2e3e7cb6e46772eba760ae459cd1f59d29eb57b0a01bd7409"}, + {file = "jupyter_core-5.7.2.tar.gz", hash = "sha256:aa5f8d32bbf6b431ac830496da7392035d6f61b4f54872f15c4bd2a9c3f536d9"}, ] [package.dependencies] @@ -1183,18 +1183,18 @@ traitlets = ">=5.3" [package.extras] docs = ["myst-parser", "pydata-sphinx-theme", "sphinx-autodoc-typehints", "sphinxcontrib-github-alt", "sphinxcontrib-spelling", "traitlets"] -test = ["ipykernel", "pre-commit", "pytest", "pytest-cov", "pytest-timeout"] +test = ["ipykernel", "pre-commit", "pytest (<8)", "pytest-cov", "pytest-timeout"] [[package]] name = "jupyter-events" -version = "0.9.0" +version = "0.9.1" description = "Jupyter Event System library" category = "main" optional = false python-versions = ">=3.8" files = [ - {file = "jupyter_events-0.9.0-py3-none-any.whl", hash = "sha256:d853b3c10273ff9bc8bb8b30076d65e2c9685579db736873de6c2232dde148bf"}, - {file = "jupyter_events-0.9.0.tar.gz", hash = "sha256:81ad2e4bc710881ec274d31c6c50669d71bbaa5dd9d01e600b56faa85700d399"}, + {file = "jupyter_events-0.9.1-py3-none-any.whl", hash = "sha256:e51f43d2c25c2ddf02d7f7a5045f71fc1d5cb5ad04ef6db20da961c077654b9b"}, + {file = "jupyter_events-0.9.1.tar.gz", hash = "sha256:a52e86f59eb317ee71ff2d7500c94b963b8a24f0b7a1517e2e653e24258e15c7"}, ] [package.dependencies] @@ -1250,14 +1250,14 @@ test = ["flaky", "ipykernel", "pre-commit", "pytest (>=7.0)", "pytest-console-sc [[package]] name = "jupyter-server-terminals" -version = "0.5.2" +version = "0.5.3" description = "A Jupyter Server Extension Providing Terminals." category = "main" optional = false python-versions = ">=3.8" files = [ - {file = "jupyter_server_terminals-0.5.2-py3-none-any.whl", hash = "sha256:1b80c12765da979513c42c90215481bbc39bd8ae7c0350b4f85bc3eb58d0fa80"}, - {file = "jupyter_server_terminals-0.5.2.tar.gz", hash = "sha256:396b5ccc0881e550bf0ee7012c6ef1b53edbde69e67cab1d56e89711b46052e8"}, + {file = "jupyter_server_terminals-0.5.3-py3-none-any.whl", hash = "sha256:41ee0d7dc0ebf2809c668e0fc726dfaf258fcd3e769568996ca731b6194ae9aa"}, + {file = "jupyter_server_terminals-0.5.3.tar.gz", hash = "sha256:5ae0295167220e9ace0edcfdb212afd2b01ee8d179fe6f23c899590e9b8a5269"}, ] [package.dependencies] @@ -1412,21 +1412,6 @@ interegular = ["interegular (>=0.3.1,<0.4.0)"] nearley = ["js2py"] regex = ["regex"] -[[package]] -name = "luqum" -version = "0.13.0" -description = "A Lucene query parser generating ElasticSearch queries and more !" -category = "main" -optional = false -python-versions = "*" -files = [ - {file = "luqum-0.13.0-py3-none-any.whl", hash = "sha256:bf0ac6eb3ca8a6a579ff6dd4bd9d88fbba5d9f559b4f5d864f99c4a6b5061853"}, - {file = "luqum-0.13.0.linux-x86_64.tar.gz", hash = "sha256:1af57bc37637014460858a2ae4737760015ed0b9d8b23d61f198de4736c174f5"}, -] - -[package.dependencies] -ply = ">=3.11" - [[package]] name = "markupsafe" version = "2.1.5" @@ -1671,14 +1656,14 @@ test = ["coverage", "nbval", "pytest", "pytest-cov", "pytest-jupyter", "pytest-p [[package]] name = "nbclient" -version = "0.9.0" +version = "0.9.1" description = "A client library for executing notebooks. Formerly nbconvert's ExecutePreprocessor." category = "main" optional = false python-versions = ">=3.8.0" files = [ - {file = "nbclient-0.9.0-py3-none-any.whl", hash = "sha256:a3a1ddfb34d4a9d17fc744d655962714a866639acd30130e9be84191cd97cd15"}, - {file = "nbclient-0.9.0.tar.gz", hash = "sha256:4b28c207877cf33ef3a9838cdc7a54c5ceff981194a82eac59d558f05487295e"}, + {file = "nbclient-0.9.1-py3-none-any.whl", hash = "sha256:2c50a866e8dd6c5f655de47d2e252c82d2ebe978574e760ac229f5950593a434"}, + {file = "nbclient-0.9.1.tar.gz", hash = "sha256:4f7b78c6c2a380e228f8a3bb469b847cb24e5b8ad6fda410691b5621e05ce5a2"}, ] [package.dependencies] @@ -1690,7 +1675,7 @@ traitlets = ">=5.4" [package.extras] dev = ["pre-commit"] docs = ["autodoc-traits", "mock", "moto", "myst-parser", "nbclient[test]", "sphinx (>=1.7)", "sphinx-book-theme", "sphinxcontrib-spelling"] -test = ["flaky", "ipykernel (>=6.19.3)", "ipython", "ipywidgets", "nbconvert (>=7.0.0)", "pytest (>=7.0)", "pytest-asyncio", "pytest-cov (>=4.0)", "testpath", "xmltodict"] +test = ["flaky", "ipykernel (>=6.19.3)", "ipython", "ipywidgets", "nbconvert (>=7.0.0)", "pytest (>=7.0,<8)", "pytest-asyncio", "pytest-cov (>=4.0)", "testpath", "xmltodict"] [[package]] name = "nbconvert" @@ -1733,14 +1718,14 @@ webpdf = ["playwright"] [[package]] name = "nbformat" -version = "5.9.2" +version = "5.10.2" description = "The Jupyter Notebook format" category = "main" optional = false python-versions = ">=3.8" files = [ - {file = "nbformat-5.9.2-py3-none-any.whl", hash = "sha256:1c5172d786a41b82bcfd0c23f9e6b6f072e8fb49c39250219e4acfff1efe89e9"}, - {file = "nbformat-5.9.2.tar.gz", hash = "sha256:5f98b5ba1997dff175e77e0c17d5c10a96eaed2cbd1de3533d1fc35d5e111192"}, + {file = "nbformat-5.10.2-py3-none-any.whl", hash = "sha256:7381189a0d537586b3f18bae5dbad347d7dd0a7cf0276b09cdcd5c24d38edd99"}, + {file = "nbformat-5.10.2.tar.gz", hash = "sha256:c535b20a0d4310167bf4d12ad31eccfb0dc61e6392d6f8c570ab5b45a06a49a3"}, ] [package.dependencies] @@ -2186,18 +2171,6 @@ files = [ docs = ["furo (>=2023.9.10)", "proselint (>=0.13)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"] test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)"] -[[package]] -name = "ply" -version = "3.11" -description = "Python Lex & Yacc" -category = "main" -optional = false -python-versions = "*" -files = [ - {file = "ply-3.11-py2.py3-none-any.whl", hash = "sha256:096f9b8350b65ebd2fd1346b12452efe5b9607f7482813ffca50c22722a807ce"}, - {file = "ply-3.11.tar.gz", hash = "sha256:00c7c1aaa88358b9c765b6d3000c6eec0ba42abca5351b095321aef446081da3"}, -] - [[package]] name = "prometheus-client" version = "0.20.0" @@ -3006,14 +2979,14 @@ tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"] [[package]] name = "terminado" -version = "0.18.0" +version = "0.18.1" description = "Tornado websocket backend for the Xterm.js Javascript terminal emulator library." category = "main" optional = false python-versions = ">=3.8" files = [ - {file = "terminado-0.18.0-py3-none-any.whl", hash = "sha256:87b0d96642d0fe5f5abd7783857b9cab167f221a39ff98e3b9619a788a3c0f2e"}, - {file = "terminado-0.18.0.tar.gz", hash = "sha256:1ea08a89b835dd1b8c0c900d92848147cef2537243361b2e3f4dc15df9b6fded"}, + {file = "terminado-0.18.1-py3-none-any.whl", hash = "sha256:a4468e1b37bb318f8a86514f65814e1afc977cf29b3992a4500d9dd305dcceb0"}, + {file = "terminado-0.18.1.tar.gz", hash = "sha256:de09f2c4b85de4765f7714688fff57d3e75bad1f909b589fde880460c753fd2e"}, ] [package.dependencies] @@ -3101,19 +3074,19 @@ telegram = ["requests"] [[package]] name = "traitlets" -version = "5.14.1" +version = "5.14.2" description = "Traitlets Python configuration system" category = "main" optional = false python-versions = ">=3.8" files = [ - {file = "traitlets-5.14.1-py3-none-any.whl", hash = "sha256:2e5a030e6eff91737c643231bfcf04a65b0132078dad75e4936700b213652e74"}, - {file = "traitlets-5.14.1.tar.gz", hash = "sha256:8585105b371a04b8316a43d5ce29c098575c2e477850b62b848b964f1444527e"}, + {file = "traitlets-5.14.2-py3-none-any.whl", hash = "sha256:fcdf85684a772ddeba87db2f398ce00b40ff550d1528c03c14dbf6a02003cd80"}, + {file = "traitlets-5.14.2.tar.gz", hash = "sha256:8cdd83c040dab7d1dee822678e5f5d100b514f7b72b01615b26fc5718916fdf9"}, ] [package.extras] docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"] -test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0,<7.5)", "pytest-mock", "pytest-mypy-testing"] +test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0,<8.1)", "pytest-mock", "pytest-mypy-testing"] [[package]] name = "types-python-dateutil" @@ -3267,4 +3240,4 @@ tests = [] [metadata] lock-version = "2.0" python-versions = "^3.8" -content-hash = "6302cf2d220d3a322f51e6e03bf8d745c011d22a4abe1a5d9b9023c26ecac810" +content-hash = "173000fccc184ef138c41a3ec3edae0a4e9871bbd99ceeaff97eeebbd785f019" diff --git a/pyproject.toml b/pyproject.toml index 69cbfe79..0de6f9c1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,6 @@ documentation = "https://libatoms.github.io/abcd/" [tool.poetry.dependencies] ase = "^3.22.1" lark = "^1.1.5" -luqum = "^0.13.0" matplotlib = "^3.7.1" notebook = "^6.5.4" numpy = "^1.24.3" diff --git a/tests/cli.py b/tests/cli.py index c20e25b5..117cb8a5 100644 --- a/tests/cli.py +++ b/tests/cli.py @@ -42,11 +42,16 @@ def test_summary(self): """ class_path = os.path.normpath(os.path.abspath(__file__)) data_file = os.path.dirname(class_path) + "/data/example.xyz" - subprocess.run(f"abcd upload {data_file}", shell=True, check=True) + subprocess.run(f"abcd delete -q 'test_data' -y", shell=True) + + subprocess.run( + f"abcd upload {data_file} -i -e 'test_data'", shell=True, check=True + ) summary = subprocess.run( "abcd summary", shell=True, check=True, capture_output=True, text=True ) - assert "Total number of configurations:" in summary.stdout + assert "Total number of configurations: 1" in summary.stdout + subprocess.run(f"abcd delete -q 'test_data' -y", shell=True) def test_query(self): """ @@ -55,13 +60,62 @@ def test_query(self): class_path = os.path.normpath(os.path.abspath(__file__)) data_file_1 = os.path.dirname(class_path) + "/data/example.xyz" data_file_2 = os.path.dirname(class_path) + "/data/example_2.xyz" - subprocess.run(f"abcd upload {data_file_1}", shell=True, check=True) - subprocess.run(f"abcd upload {data_file_2}", shell=True, check=True) + subprocess.run(f"abcd delete -q 'test_data' -y", shell=True) + + subprocess.run( + f"abcd upload {data_file_1} -i -e 'test_data'", shell=True, check=True + ) + subprocess.run( + f"abcd upload {data_file_2} -i -e 'test_data'", shell=True, check=True + ) summary = subprocess.run( - "abcd show -p n_atoms -q 'n_atoms : 2'", shell=True, check=True, capture_output=True, text=True + "abcd show -p n_atoms -q 'n_atoms : 2'", + shell=True, + check=True, + capture_output=True, + text=True, ) assert "2" in summary.stdout and "3" not in summary.stdout summary = subprocess.run( - "abcd show -p n_atoms -q 'n_atoms : 3'", shell=True, check=True, capture_output=True, text=True + "abcd show -p n_atoms -q 'n_atoms : 3'", + shell=True, + check=True, + capture_output=True, + text=True, ) assert "3" in summary.stdout and "2" not in summary.stdout + subprocess.run(f"abcd delete -q 'test_data' -y", shell=True) + + def test_range_query(self): + """ + Test lucene-style ranged query. + """ + class_path = os.path.normpath(os.path.abspath(__file__)) + data_file_1 = os.path.dirname(class_path) + "/data/example.xyz" + data_file_2 = os.path.dirname(class_path) + "/data/example_2.xyz" + subprocess.run(f"abcd delete -q 'test_data' -y", shell=True) + + subprocess.run( + f"abcd upload {data_file_1} -i -e 'test_data'", shell=True, check=True + ) + subprocess.run( + f"abcd upload {data_file_2} -i -e 'test_data'", shell=True, check=True + ) + summary = subprocess.run( + "abcd summary -p energy -q 'energy:[-100 TO -99]'", + shell=True, + check=True, + capture_output=True, + text=True, + ) + assert "Total number of configurations: 1" in summary.stdout + + summary = subprocess.run( + "abcd summary -p energy -q 'energy:[-102 TO -99]'", + shell=True, + check=True, + capture_output=True, + text=True, + ) + assert "Total number of configurations: 2" in summary.stdout + subprocess.run(f"abcd delete -q 'test_data' -y", shell=True) diff --git a/tests/data/example.xyz b/tests/data/example.xyz index 452b9d67..2a81c26d 100644 --- a/tests/data/example.xyz +++ b/tests/data/example.xyz @@ -1,4 +1,4 @@ 2 -Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" +Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" energy=-101.9 Si 0.00000000 1.00000000 2.00000000 Si 4.00000000 5.00000000 6.00000000 diff --git a/tests/data/example_2.xyz b/tests/data/example_2.xyz index 9582ab40..13315d57 100644 --- a/tests/data/example_2.xyz +++ b/tests/data/example_2.xyz @@ -1,5 +1,5 @@ 3 -Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" +Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" energy=-99.33 Si 0.00000000 1.00000000 2.00000000 Si 3.00000000 4.00000000 5.00000000 Si 6.00000000 7.00000000 7.00000000 diff --git a/tests/opensearch.py b/tests/opensearch.py index 1bb8eb3b..259536dc 100644 --- a/tests/opensearch.py +++ b/tests/opensearch.py @@ -39,7 +39,6 @@ def setUpClass(cls): abcd = ABCD.from_url( url, index_name="test_index", - analyse_schema=False, use_ssl=cls.security_enabled, ) except (ConnectionError, ConnectionResetError): @@ -47,7 +46,6 @@ def setUpClass(cls): abcd = ABCD.from_url( url, index_name="test_index", - analyse_schema=False, use_ssl=cls.security_enabled, )