diff --git a/.ci/opensearch/Dockerfile b/.ci/opensearch/Dockerfile new file mode 100644 index 00000000..235c3b02 --- /dev/null +++ b/.ci/opensearch/Dockerfile @@ -0,0 +1,7 @@ +FROM docker:stable + +RUN apk add --update bash + +COPY run-opensearch.sh /run-opensearch.sh + +ENTRYPOINT ["/run-opensearch.sh"] diff --git a/.ci/opensearch/action.yml b/.ci/opensearch/action.yml new file mode 100644 index 00000000..f917a612 --- /dev/null +++ b/.ci/opensearch/action.yml @@ -0,0 +1,33 @@ +name: 'Run OpenSearch' +description: 'This action spins up an Opensearch instance that can be accessed and used in your subsequent steps.' + +inputs: + opensearch-version: + description: 'The version of the OpenSearch you want to run' + required: true + security-enabled: + description: 'Enable or disable HTTPS, enabled by default' + default: 'false' + required: false + nodes: + description: 'Number of nodes in the cluster' + required: false + default: 1 + port: + description: 'Port where you want to run OpenSearch' + required: false + default: 9200 + opensearch-initial-admin-password: + description: 'The password for the user admin in your cluster' + required: false + default: 'myStrongPassword123!' + +runs: + using: 'docker' + image: 'Dockerfile' + env: + OPENSEARCH_VERSION: ${{ inputs.opensearch-version }} + NODES: ${{ inputs.nodes }} + PORT: ${{ inputs.port }} + SECURITY_ENABLED: ${{ inputs.security-enabled }} + OPENSEARCH_INITIAL_ADMIN_PASSWORD: ${{ inputs.opensearch-initial-admin-password }} diff --git a/.ci/opensearch/functions/imports.sh b/.ci/opensearch/functions/imports.sh new file mode 100755 index 00000000..a3ece964 --- /dev/null +++ b/.ci/opensearch/functions/imports.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# +# Sets up all the common variables and imports relevant functions +# +# Version 1.0.1 +# - Initial version after refactor +# From https://github.com/opensearch-project/opensearch-py/blob/main/.ci/functions/imports.sh + +source ./.ci/opensearch/functions/wait-for-container.sh diff --git a/.ci/opensearch/functions/wait-for-container.sh b/.ci/opensearch/functions/wait-for-container.sh new file mode 100755 index 00000000..c1b6d5e1 --- /dev/null +++ b/.ci/opensearch/functions/wait-for-container.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash +# +# Exposes a routine scripts can call to wait for a container if that container set up a health command +# +# Please source .ci/functions/imports.sh as a whole not just this file +# +# Version 1.0.1 +# - Initial version after refactor +# - Make sure wait_for_contiainer is silent +# From https://github.com/opensearch-project/opensearch-py/blob/main/.ci/functions/wait-for-container.sh + +function container_running { + if [[ "$(docker ps -q -f name=$1)" ]]; then + return 0; + else return 1; + fi +} + +function wait_for_container { + set +x + until ! container_running "$1" || (container_running "$1" && [[ "$(docker inspect -f "{{.State.Health.Status}}" ${1})" != "starting" ]]); do + echo "" + docker inspect -f "{{range .State.Health.Log}}{{.Output}}{{end}}" ${1} + echo -e "\033[34;1mINFO:\033[0m waiting for node $1 to be up\033[0m" + sleep 4; + done; + + # Always show logs if the container is running, this is very useful both on CI as well as while developing + if container_running $1; then + docker logs $1 + fi + + if ! container_running $1 || [[ "$(docker inspect -f "{{.State.Health.Status}}" ${1})" != "healthy" ]]; then + echo -e "\033[31;1mERROR:\033[0m Failed to start $1 in detached mode beyond health checks\033[0m" + echo -e "\033[31;1mERROR:\033[0m dumped the docker log before shutting the node down\033[0m" + return 1 + else + echo + echo -e "\033[32;1mSUCCESS:\033[0m Detached and healthy: ${1}\033[0m" + return 0 + fi +} diff --git a/.ci/opensearch/run-opensearch.sh b/.ci/opensearch/run-opensearch.sh new file mode 100755 index 00000000..4f3678b4 --- /dev/null +++ b/.ci/opensearch/run-opensearch.sh @@ -0,0 +1,57 @@ +#!/usr/bin/env bash +source ./.ci/opensearch/functions/imports.sh +set -euxo pipefail + +if [[ -z $OPENSEARCH_VERSION ]]; then + echo -e "\033[31;1mERROR:\033[0m Required environment variable [OPENSEARCH_VERSION] not set\033[0m" + exit 1 +fi + +OPENSEARCH_REQUIRED_VERSION="latest" +# Starting in 2.12.0, security demo configuration script requires an initial admin password +if [ "$OPENSEARCH_VERSION" != "$OPENSEARCH_REQUIRED_VERSION" ]; then + OPENSEARCH_INITIAL_ADMIN_PASSWORD="admin" +fi + +for (( node=1; node<=${NODES-1}; node++ )) +do + port=$((PORT + $node - 1)) + + if [[ "$SECURITY_ENABLED" == "true" ]]; then + healthcmd="curl -vvv -s --insecure -u admin:$OPENSEARCH_INITIAL_ADMIN_PASSWORD --fail https://localhost:$port/_cluster/health || exit 1" + security=($(cat <<-END + +END + )) + elif [[ "$SECURITY_ENABLED" == "false" ]]; then + healthcmd="curl -vvv -s --fail http://localhost:$port/_cluster/health || exit 1" + security=($(cat <<-END + --env plugins.security.disabled=true +END + )) + fi + + docker run \ + --rm \ + --detach \ + --name="os${node}" \ + --env "cluster.name=docker-opensearch" \ + --env "http.port=${port}" \ + --env discovery.type=single-node \ + --env bootstrap.memory_lock=true \ + --env "OPENSEARCH_JAVA_OPTS=-Xms4g -Xmx4g" \ + --env OPENSEARCH_INITIAL_ADMIN_PASSWORD=$OPENSEARCH_INITIAL_ADMIN_PASSWORD \ + "${security[@]}" \ + --publish "${port}:${port}" \ + --ulimit nofile=65536:65536 \ + --ulimit memlock=-1:-1 \ + --health-cmd="$(echo $healthcmd)" \ + --health-interval=2s \ + --health-retries=20 \ + --health-timeout=2s \ + opensearchproject/opensearch:${OPENSEARCH_VERSION} + + if wait_for_container "os$node"; then + echo -e "\033[32;1mSUCCESS:\033[0m OpenSearch up and running\033[0m" + fi +done diff --git a/.ci/opensearch/test.sh b/.ci/opensearch/test.sh new file mode 100755 index 00000000..c4eb9fd4 --- /dev/null +++ b/.ci/opensearch/test.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +script_path=$(dirname $(realpath -s $0)) +source $script_path/functions/imports.sh +set -euxo pipefail + +echo $script_path/functions/imports.sh diff --git a/.flake8 b/.flake8 new file mode 100644 index 00000000..94f58d25 --- /dev/null +++ b/.flake8 @@ -0,0 +1,16 @@ +[flake8] +select = B,BLK,C,E,F,I,S,W +max-complexity = 30 +max-line-length = 88 +ignore = E203,W503 # ignore conflicts with black +application-import-names = abcd,tests +import-order-style = google +exclude = + abcd/backends/atoms_pymongo.py, + abcd/frontends, + abcd/model.py, + abcd/parsers/queries_new.py, + abcd/parsers/queries.py, + abcd/parsers/extras.py, + abcd/server, + tests/__init__.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bc1cdc84..2a673481 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,12 +7,29 @@ jobs: build: runs-on: ubuntu-latest strategy: + fail-fast: false matrix: python-version: [ "3.9", "3.10", "3.11", "3.12" ] + opensearch: ['1.0.1', '2.0.1', 'latest'] + security-enabled: ["true", "false"] steps: - uses: actions/checkout@v4 + - name: Configure sysctl limits + run: | + sudo swapoff -a + sudo sysctl -w vm.swappiness=1 + sudo sysctl -w fs.file-max=262144 + sudo sysctl -w vm.max_map_count=262144 + + - name: Start OpenSearch + uses: ./.ci/opensearch + with: + port: 9250 + opensearch-version: ${{ matrix.opensearch }} + security-enabled: ${{ matrix.security-enabled }} + - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: @@ -29,6 +46,10 @@ jobs: - name: Run unit tests run: | poetry run pytest --cov=abcd --cov-report xml --cov-report term:skip-covered + env: + port: 9250 + security_enabled: ${{ matrix.security-enabled }} + opensearch-version: ${{ matrix.opensearch }} - name: Upload coverage reports to Codecov uses: codecov/codecov-action@v4.4.1 diff --git a/README.md b/README.md index b6ab1c5d..9fe129c4 100644 --- a/README.md +++ b/README.md @@ -3,86 +3,156 @@ [![Doc](https://img.shields.io/badge/docs-master-green.svg)](https://libatoms.github.io/abcd/) [![Build Status](https://travis-ci.org/libAtoms/abcd.svg?branch=master)](https://travis-ci.org/libAtoms/abcd) -Database storage and discovery of atomistic data. +Database storage and discovery of atomistic data. Take a look at the `examples.md` file for.. examples! Main features: -- Configurations that consist of atom positions, elements, forces, and various metadata are stored as a dictionary by a MongoDB backend. -- There is no predefined schema, any combination of keys are allowed for all configurations. -- Two modes: "discovery" and "download". Both use filter-type queries, but in "discovery" mode, summary statistics of the configurations that pass the filter are reported. In "download" mode, the matching configurations are downloaded and exported to a file. -- The "discovery" mode can be used to learn what keys exist in the set of configurations that have passed the current quiery filter. The user can use this to refine the query. -- Complex queries on dictionary key-value pairs are allowed, and their logical combinations. +- Configurations that consist of atom positions, elements, forces, and various metadata are stored as a dictionary by a MongoDB backend. +- There is no predefined schema, any combination of keys are allowed for all configurations. +- Two modes: "discovery" and "download". Both use filter-type queries, but in "discovery" mode, summary statistics of the configurations that pass the filter are reported. In "download" mode, the matching configurations are downloaded and exported to a file. +- The "discovery" mode can be used to learn what keys exist in the set of configurations that have passed the current query filter. The user can use this to refine the query. +- Complex queries on dictionary key-value pairs are allowed, and their logical combinations. ## Installation +### General Setup + creating tables and views -``` + +```sh $ pip install git+https://github.com/libAtoms/abcd.git ``` -## Setup +Example Docker installation on Ubuntu: + +```sh +sudo apt-get update +sudo apt upgrade +sudo apt install docker.io +sudo groupadd docker +sudo usermod -aG docker $USER +newgrp docker # or exit and log in +``` + +Docker can be tested by running: + +```sh +docker run hello-world +``` + +Example Python setup on Ubuntu (pip must be updated for poetry to be used successfully): + +```sh +sudo apt install software-properties-common +sudo add-apt-repository ppa:deadsnakes/ppa +sudo apt install python3.10 +sudo apt-get install python3.10-distutils +sudo apt install python3-virtualenv +virtualenv -p /usr/bin/python3.10 venv_10 +source venv_10/bin/activate +curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 +``` -If you have an already running mongo server, or install your own, they you are ready to go. Alternatively, +Building and installing ABCD dependencies via poetry: +```sh +git clone https://github.com/libAtoms/abcd.git +curl -sSL https://install.python-poetry.org | python3 - +export PATH="/home/ubuntu/.local/bin:$PATH" +cd abcd +poetry install +poetry build ``` + +### MongoDB + +If you have an already running MongoDB server, or install your own, then you are ready to go. Alternatively, + +```sh docker run -d --rm --name abcd-mongodb -v :/data/db -p 27017:27017 mongo ``` -will download and install a docker and run a database in it. +will download and install a docker and run a database in it. To connect to a mongodb that is already running, use -``` + +```sh abcd login mongodb://localhost ``` If you are running `abcd` inside a docker, and want to connect to a mongodb outside that docker use something like this (example is for Mac OS): -``` +```sh abcd login mongodb://docker.for.mac.localhost ``` The above login command will place create an `~/.abcd` file with the following contents: -``` +```sh {"url": "mongodb://localhost"} ``` -# Remote access +### OpenSearch +If you have an already running OpenSearch server, or install your own, then you are ready to go. Alternatively, -You can set up an `abcd` user on your machine where the database is running, and then access it remotely for discovering data. Make sure you have the `~/.abcd` file created for this user, then put this in the `.ssh/authorized_keys` file (substituting your public key for the last part): +```sh +sudo swapoff -a # optional +sudo sysctl -w vm.swappiness=1 # optional +sudo sysctl -w fs.file-max=262144 # optional +sudo sysctl -w vm.max_map_count=262144 +docker run -d --rm --name abcd-opensearch -v :/data/db -p 9200:9200 --env discovery.type=single-node -it opensearchproject/opensearch:latest ``` -command="/path/to/abcd --remote ${SSH_ORIGINAL_COMMAND}",no-port-forwarding,no-X11-forwarding,no-agent-forwarding,no-pty ssh-rsa your@email + +will download and install an OpenSearch image and run it. The connection can be tested with: + +```sh +curl -vvv -s --insecure -u admin:admin --fail https://localhost:9200 ``` -Then you'll be able to access the database remotely using, e.g. +To connect to an OpenSearch database that is already running, use + +```sh +abcd login opensearch://username:password@localhost +``` + +## Remote access + +You can set up an `abcd` user on your machine where the database is running, and then access it remotely for discovering data. Make sure you have the `~/.abcd` file created for this user, then put this in the `.ssh/authorized_keys` file (substituting your public key for the last part): + +```sh +command="/path/to/abcd --remote ${SSH_ORIGINAL_COMMAND}",no-port-forwarding,no-X11-forwarding,no-agent-forwarding,no-pty ssh-rsa your@email ``` + +Then you'll be able to access the database remotely using, e.g. + +```sh ssh abcd@your.machine summary ``` -# GUI through a browser + visualisation +## GUI through a browser + visualisation The database has a simple GUI, coupled with a visualiser. Data for now needs to be uploaded on the command line, but query can be done through the browsers. Instructions below (they include running `abcd` from a docker too, but of course you can run it outside the docker as well. ) -#### Usage in docker +## Usage in docker Currently a manual uploaded image is available, that was built on 7/2/2020 by Tamas K. Stenczel. To access it: 1. pull the image - ``` + ```sh docker pull stenczelt/projection-abcd:latest ``` 2. create a docker network, which enables the containers to communicate with each other and the outside world as well - ``` + ```sh docker network create --driver bridge abcd-network ``` 3. run the mongo (ABCD) and the visualiser as well - ``` + ```sh docker run -d --rm --name abcd-mongodb-net -v :/data/db -p 27017:27017 --network abcd-network mongo - + docker run -it --rm --name visualiser-dev -p 9999:9999 --network abcd-network stenczelt/projection-abcd ``` NB: You need a a directory where the database files are kept locally and you need to connect this to the mongo @@ -91,8 +161,17 @@ To access it: This will start the visualiser with ABCD integration! Have fun! After usage, for cleanup: -``` + +```sh docker stop visualiser-dev abcd-mongodb-net # stop the containers docker rm visualiser-dev abcd-mongodb-net # remove them if --rm did not docker network rm abcd-network # remove the docker network ``` + +## Testing + +Unit tests are automatically run on push and creation of pull requests. Unit testing using mock databases can also be run in the command line using: + +```sh +python -m unittest tests +``` diff --git a/abcd/__init__.py b/abcd/__init__.py index b8008379..70f8cdc9 100644 --- a/abcd/__init__.py +++ b/abcd/__init__.py @@ -8,6 +8,7 @@ class ConnectionType(Enum): mongodb = 1 http = 2 + opensearch = 3 class ABCD(object): @@ -23,8 +24,10 @@ def from_url(cls, url, **kwargs): r = parse.urlparse(url) logger.info(r) - if r.scheme == "mongodb": + db = r.path.split("/")[1] if r.path else None + db = db if db else "abcd" + if ConnectionType[r.scheme] is ConnectionType.mongodb: conn_settings = { "host": r.hostname, "port": r.port, @@ -33,13 +36,22 @@ def from_url(cls, url, **kwargs): "authSource": "admin", } - db = r.path.split("/")[1] if r.path else None - db = db if db else "abcd" - from abcd.backends.atoms_pymongo import MongoDatabase return MongoDatabase(db_name=db, **conn_settings, **kwargs) + if ConnectionType[r.scheme] is ConnectionType.opensearch: + conn_settings = { + "host": r.hostname, + "port": r.port, + "username": r.username, + "password": r.password, + } + + from abcd.backends.atoms_opensearch import OpenSearchDatabase + + return OpenSearchDatabase(db=db, **conn_settings, **kwargs) + elif r.scheme == "http" or r.scheme == "https": raise NotImplementedError("http not yet supported! soon...") elif r.scheme == "ssh": @@ -57,11 +69,3 @@ def from_url(cls, url, **kwargs): url = "mongodb://mongoadmin:secret@localhost:27017/abcd_new" abcd = ABCD.from_url(url) abcd.print_info() - - # from ase.io import iread - # for atoms in iread('../tutorials/data/bcc_bulk_54_expanded_2_high.xyz', index=slice(1)): - # # Hack to fix the representation of forces - # atoms.calc.results['forces'] = atoms.arrays['force'] - # - # abcd.push(atoms) - # print(atoms) diff --git a/abcd/backends/atoms_opensearch.py b/abcd/backends/atoms_opensearch.py new file mode 100644 index 00000000..9a795cee --- /dev/null +++ b/abcd/backends/atoms_opensearch.py @@ -0,0 +1,1026 @@ +from __future__ import annotations + +from collections.abc import Iterator +from datetime import datetime +from typing import Iterable, Optional, Union +import logging +from os import linesep +from pathlib import Path + +from ase import Atoms +from ase.io import iread +from opensearchpy import ( + OpenSearch, + helpers, + AuthenticationException, + ConnectionTimeout, + RequestError, +) + +from abcd.backends import utils +from abcd.database import AbstractABCD +import abcd.errors +from abcd.model import AbstractModel +from abcd.parsers import extras +from abcd.queryset import AbstractQuerySet + + +logger = logging.getLogger(__name__) + +map_types = { + bool: "bool", + float: "float", + int: "int", + str: "str", + datetime: "date", + dict: "dict", +} + + +class OpenSearchQuery(AbstractQuerySet): + """Class to parse and build queries for OpenSearch.""" + + def __call__(self, query: Optional[Union[dict, str, list]]) -> Optional[dict]: + """ + Parses and builds queries for OpenSearch. + + Parameters + ---------- + query: Optional[Union[dict, str, list]] + Query to be parsed for OpenSearch. If passed as a dictionary, the query is + left unchanged. If passed a string or list, the query is treated as a query + string, based on Lucene query syntax. + + Returns + ------- + Optional[dict] + The parsed query for OpenSearch. + """ + if not query: + query = self.get_default_query() + + if isinstance(query, str): + return self.build_query_string(query) + if isinstance(query, list): + if len(query) == 0: + return None + if query[0] is None: + return None + separator = " AND " + joined_query = separator.join(query) + return self.build_query_string(joined_query) + + logger.info("parsed query: %s", query) + return query if query else None + + @staticmethod + def build_query_string(query: str) -> dict: + """ + Build query_string (Lucene syntax) query. + + Parameters + ---------- + query : str + Query with Lucene syntax. + + Returns + ------- + dict + Parsed query for query_string query. + """ + return {"query_string": {"query": query}} + + @staticmethod + def get_default_query() -> dict: + """ + Defines a default OpenSearch query. Currently, matches all documents. + + Returns + ------- + The default query for OpenSearch. + """ + return {"match_all": {}} + + +class AtomsModel(AbstractModel): + """ + Class to interface between Atoms data and OpenSearch. + + Attributes + ---------- + _client: Optional[OpenSearch] + OpenSearch client. + _index_name: Optional[str] + OpenSearch index name. + """ + + def __init__( + self, + client: Optional[OpenSearch] = None, + index_name: Optional[str] = None, + dict: Optional[dict] = None, + ): + """ + Initialises class. + + Parameters + ---------- + client: Optional[OpenSearch] + OpenSearch client. Default is `None`. + index_name: Optional[str] + OpenSearch index name. Default is `None`. + dict: Optional[dict] + Dictionary of atoms data. Default is `None`. + """ + super().__init__(dict) + + self._client = client + self._index_name = index_name + + @classmethod + def from_atoms( + cls, + client: OpenSearch, + index_name: str, + atoms: Atoms, + extra_info: Optional[dict] = None, + store_calc: bool = True, + ) -> AtomsModel: + """ + Reads and prepares atoms data and extra information for OpenSearch. + + Parameters + ---------- + client: OpenSearch + OpenSearch client. + index_name: str + OpenSearch index name. + atoms: Atoms + Atoms data to be stored. + extra_info: Optional[dict] + Extra information to store in the document with the atoms data. + Default is `None`. + store_calc: bool, optional + Whether to store data from the calculator attached to atoms. + Default is `True`. + + Returns + ------- + Data from atoms and extra information to be saved in OpenSearch. + """ + obj = super().from_atoms(atoms, extra_info, store_calc) + obj._client = client + obj._index_name = index_name + return obj + + @property + def _id(self) -> Optional[str]: + """ + Get the OpenSearch document ID stored in data. + + Returns + ------- + Optional[str] + Current document ID. + """ + return self.get("_id", None) + + def save(self): + """ + Saves data in OpenSearch. If the data being saved includes a document + ID, updates the matching document in OpenSearch with the current data. + """ + body = {} + body.update(self.data) + body["derived"] = self.derived + if self._client is not None: + if not self._id: + self._client.index(index=self._index_name, body=body) + else: + del body["_id"] + body = {"doc": body} + self._client.update(index=self._index_name, id=self._id, body=body) + + def remove(self): + """ + If current data includes a document ID, deletes the matching document + OpenSearch. + """ + if self._client is not None and self._id: + self._client.delete(index=self._index_name, id=self._id) + self.clear() + + +class OpenSearchDatabase(AbstractABCD): + """ + Wrapper to make OpenSearch operations easy. + + Attributes + ---------- + client: OpenSearch + OpenSearch client. + db_name: str + Database name. + index_name: str + OpenSearch index name. + parser: OpenSearchQuery + Query parser and builder for OpenSearch queries. + """ + + def __init__( + self, + host: str = "localhost", + port: int = 9200, + db_name: str = "abcd", + index_name: str = "atoms", + username: str = "admin", + password: str = "admin", + **kwargs, + ): + """ + Initialises class. + + Parameters + ---------- + host: str, optional + Name of OpenSearch host. Default is `localhost`. + port: int, optional + OpenSearch port. Default is `9200`. + db_name: str, optional + Label for OpenSearch database. Used only when printing information. + Default is `abcd`. + index_name: str, optional + Name of OpenSearch index. Default is `atoms`. + username: str, optional + OpenSearch username. Default is `admin`. + password: str, optional + OpenSearch password. Default is `admin`. + """ + super().__init__() + + logger.info((host, port, index_name, username, password, kwargs)) + + client_settings = { + "verify_certs": False, + "ca_certs": None, + "use_ssl": True, + "ssl_assert_hostname": False, + "ssl_show_warn": False, + } + + for key in client_settings: + if key in kwargs: + client_settings[key] = kwargs[key] + + self.client = OpenSearch( + hosts=[{"host": host, "port": port}], + http_auth=(username, password), + **client_settings, + ) + + try: + info = self.client.info() + logger.info("DB info: %s", info) + + except AuthenticationException as err: + raise abcd.errors.AuthenticationError() from err + + except ConnectionTimeout as err: + raise abcd.errors.TimeoutError() from err + + self.db = db_name + self.index_name = index_name + self.create() + self.parser = OpenSearchQuery() + + def info(self): + """ + Gets information from OpenSearch client about the database. + + Returns + ------- + Dictionary of database information. + """ + if self.client.transport.hosts is not None: + host = self.client.transport.hosts[0]["host"] + port = self.client.transport.hosts[0]["port"] + else: + host, port = None, None + + self.refresh() + return { + "host": host, + "port": port, + "db": self.db, + "index": self.index_name, + "number of confs": self.client.count(index=self.index_name)["count"], + "type": "opensearch", + } + + def delete(self, query: Optional[Union[dict, str]] = None): + """ + Deletes documents from the database. + + Parameters + ---------- + query: Optional[Union[dict, str]] + Query to filter documents to be deleted. Default is `None`. + """ + query = self.parser(query) + logger.info("parsed query: %s", query) + body = {"query": query} + + self.client.delete_by_query( + index=self.index_name, + body=body, + ) + + def destroy(self): + """ + Deletes the current index in OpenSearch. + Ignores errors if the index does not exist. + """ + self.client.indices.delete(index=self.index_name, ignore=404) + + def create(self): + """ + Creates a new index in OpenSearch. + Ignores errors if the index already exists. + """ + self.client.indices.create(index=self.index_name, ignore=400) + + def refresh(self): + """ + Refresh index to ensure recent operations performed are available for search. + """ + self.client.indices.refresh(index=self.index_name) + + def save_bulk(self, actions: Iterable[dict], **kwargs): + """ + Save a collection of documents in bulk. + + Parameters + ---------- + actions: Iterable + Documents to be saved. + """ + request_timeout = kwargs.get("request_timeout", 30) + chunk_size = kwargs.get("chunk_size", 500) + helpers.bulk( + client=self.client, + actions=actions, + index=self.index_name, + chunk_size=chunk_size, + request_timeout=request_timeout, + ) + + def push( + self, + atoms: Union[Atoms, Iterable], + extra_info: Optional[Union[dict, str, list]] = None, + store_calc: bool = True, + **kwargs, + ): + """ + Save data from atoms object(s) to database. + + Parameters + ---------- + atoms: Union[Atoms, Iterable] + extra_info: Optional[Union[dict, str, list]] + Extra information to store in the document with the atoms data. + Default is `None`. + store_calc: bool, optional + Whether to store data from the calculator attached to atoms. + Default is `True`. + """ + if extra_info and isinstance(extra_info, str): + extra_info = extras.parser.parse(extra_info) # type: ignore + if extra_info and isinstance(extra_info, list): + for i, info in enumerate(extra_info): + if isinstance(info, str): + extra_info[i] = extras.parser.parse(info) + + if isinstance(atoms, Atoms): + data = AtomsModel.from_atoms( + self.client, + self.index_name, + atoms, + extra_info=extra_info, # type: ignore + store_calc=store_calc, + ) + data.save() + + elif isinstance(atoms, Iterator) or isinstance(atoms, list): + actions = [] + for i, item in enumerate(atoms): + if isinstance(extra_info, list): + info = extra_info[i] + else: + info = extra_info + data = AtomsModel.from_atoms( + self.client, + self.index_name, + item, + extra_info=info, # type: ignore + store_calc=store_calc, + ) + actions.append(data.data) + actions[-1]["derived"] = data.derived + self.save_bulk(actions, **kwargs) + + def upload( + self, + file: Path, + extra_infos: Union[Iterable, dict] = (), + store_calc: bool = True, + ): + """ + Upload data from a file to the database. + + Parameters + ---------- + file: Path + Path to file to be uploaded + extra_infos: Union[Iterable, dict] + Extra information to store in the document with the atoms data. + Default is `()`. + store_calc: bool, optional + Whether to store data from the calculator attached to atoms. + Default is `True`. + """ + + if isinstance(file, str): + file = Path(file) + + extra_info = {} + for info in extra_infos: + extra_info.update(extras.parser.parse(info)) + + extra_info["filename"] = str(file) + + data = iread(str(file)) + self.push(data, extra_info, store_calc=store_calc) + + def get_items(self, query: Optional[Union[dict, str]] = None) -> Iterator[dict]: + """ + Get data as a dictionary from documents in the database. + + Parameters + ---------- + query: Optional[Union[dict, str]] + Query to filter documents to get data from. Default is `None`. + + Returns + ------- + Iterator[dict] + Iterator for dictionary of data. + """ + query = self.parser(query) + logger.info("parsed query: %s", query) + query = { + "query": query, + } + + for hit in helpers.scan( + self.client, + index=self.index_name, + query=query, + ): + yield {"_id": hit["_id"], **hit["_source"]} + + def get_atoms(self, query: Optional[Union[dict, str]] = None) -> Iterator[Atoms]: + """ + Get data as Atoms object from documents in the database. + + Parameters + ---------- + query: Optional[Union[dict, str]] + Query to filter documents to get data from. Default is `None`. + + Returns + ------- + Iterator[Atoms] + Generator for AtomsModel object of data. + """ + query = self.parser(query) + logger.info("parsed query: %s", query) + query = { + "query": query, + } + + for hit in helpers.scan( + self.client, + index=self.index_name, + query=query, + ): + yield AtomsModel(dict=hit["_source"]).to_ase() + + def count(self, query: Optional[Union[dict, str]] = None, timeout=30.0) -> int: + """ + Counts number of documents in the database. + + Parameters + ---------- + query: Optional[Union[dict, str]] + Query to filter documents to be counted. Default is `None`. + timeout: float + Timeout for request in seconds. + + Returns + ------- + Count of number of documents. + """ + query = self.parser(query) + logger.info("parsed query: %s", query) + body = { + "query": query, + } + + return self.client.count(index=self.index_name, body=body, timeout=timeout)[ + "count" + ] + + def _get_props_from_source( + self, + names: Union[str, list[str]], + query: Optional[Union[dict, str]] = None, + ) -> dict: + """ + Gets all values of specified properties using the original data from _source. + + Parameters + ---------- + names: Union[str, list[str]] + Name or list of names of properties to return. + query: Optional[Union[dict, str]] + Query to filter documents to get properties from. Default is `None`. + + Returns + ------- + dict + Dictionary of lists of values for the specified properties. + """ + props = {} + hits = [ + dict(hit["_source"].items()) + for hit in helpers.scan( + self.client, + index=self.index_name, + query=query, + stored_fields=names, + _source=names, + ) + if "_source" in hit and all(name in hit["_source"] for name in names) + ] + for name in names: + props[name] = [hit[name] for hit in hits] + return props + + def property( + self, + names: Union[str, list[str]], + allow_flatten: bool = True, + query: Optional[Union[dict, str]] = None, + ) -> Union[dict, list]: + """ + Gets all values of specified properties for matching documents in the database. + + Parameters + ---------- + names: Union[str, list[str]] + Name or list of names of properties to return. + allow_flatten: bool = True + Whether to allow arrays to be returned flattened. There is no guarantee + for the order of returned values. Default is `True`. + query: Optional[Union[dict, str]] + Query to filter documents to get properties from. Default is `None`. + + Returns + ------- + Union[dict, list] + Dictionary of lists of values for the specified properties, or list + if only one property is given. + """ + query = self.parser(query) + logger.info("parsed query: %s", query) + query = { + "query": query, + } + + if isinstance(names, str): + names = [names] + names = [format(name) for name in names] + + # Try to use docvalue_fields to avoid loading entire document + # But not all datatypes supported by default + if allow_flatten: + props = {} + try: + hits = [ + dict(hit["fields"].items()) + for hit in helpers.scan( + self.client, + index=self.index_name, + query=query, + _source=False, + stored_fields="_none_", + docvalue_fields=names, + ) + if "fields" in hit and all(name in hit["fields"] for name in names) + ] + for name in names: + props[name] = [ + hit[name][0] if len(hit[name]) == 1 else hit[name] + for hit in hits + ] + + except RequestError: + props = self._get_props_from_source(names, query) + + # Use _source to ensure arrays are not flattened + else: + props = self._get_props_from_source(names, query) + + if len(names) == 1: + return props[names[0]] + return props + + def count_property(self, name, query: Optional[Union[dict, str]] = None) -> dict: + """ + Counts values of a specified property for matching documents in the + database. This method much faster than performing a Count on the list + returned by self.property, so this method should be used preferentially. + + Parameters + ---------- + query: Optional[Union[dict, str]] + Query to filter documents to count properties from. Default is `None`. + + Returns + ------- + Dictionary of values and counts for the specified property for all + matching documents. + """ + query = self.parser(query) + logger.info("parsed query: %s", query) + + body = { + "size": 0, + "query": query, + "aggs": { + format(name): { + "terms": { + "field": format(name), + "size": 10000, # Use composite for all results? + }, + }, + }, + } + + prop = {} + + for val in self.client.search(index=self.index_name, body=body)["aggregations"][ + format(name) + ]["buckets"]: + prop[val["key"]] = val["doc_count"] + + return prop + + def properties(self, query: Optional[Union[dict, str]] = None) -> dict: + """ + Gets lists of all properties from matching documents, separated into + info, derived, and array properties. + + Parameters + ---------- + query: Optional[Union[dict, str]] + Query to filter documents to get properties from. Default is `None`. + + Returns + ------- + Dictionary of properties, with keys corresponding to info, derived, + and arrays of properties, and values corresponding to a list of + the properties of that type. + """ + query = self.parser(query) + logger.info("parsed query: %s", query) + + properties = {} + + for prop in self.client.indices.get_mapping(index=self.index_name)[ + self.index_name + ]["mappings"]["properties"].keys(): + body = { + "size": 0, + "query": query, + "aggs": { + "info_keys": { + "filter": {"term": {"derived.info_keys.keyword": prop}}, + }, + "derived_keys": { + "filter": {"term": {"derived.derived_keys.keyword": prop}}, + }, + "arrays_keys": { + "filter": {"term": {"derived.arrays_keys.keyword": prop}}, + }, + }, + } + + res = self.client.search( + index=self.index_name, + body=body, + ) + + for label in ("info_keys", "derived_keys", "arrays_keys"): + count = res["aggregations"][label]["doc_count"] + if count > 0: + key = label.split("_", maxsplit=1)[0] + if key in properties: + properties[key].append(prop) + else: + properties[key] = [prop] + + return properties + + def get_type_of_property(self, prop: str, category: str) -> str: + """ + Gets type of a property, given its category. + + Parameters + ---------- + prop: str + Name of the property. + catagory: str + Name of property's category. Current options are `info`, `derived`, + and `arrays`. + + Returns + ------- + Type of the property. + """ + atoms = self.client.search( + index=self.index_name, + body={"size": 1, "query": {"exists": {"field": prop}}}, + ) + + data = atoms["hits"]["hits"][0]["_source"][prop] + + if category == "arrays": + if isinstance(data[0], list): + return "array({}, N x {})".format( + map_types[type(data[0][0])], len(data[0]) + ) + return "vector({}, N)".format(map_types[type(data[0])]) + + if isinstance(data, list): + if isinstance(data[0], list): + if isinstance(data[0][0], list): + return "list(list(...)" + return "array({})".format(map_types[type(data[0][0])]) + return "vector({})".format(map_types[type(data[0])]) + return "scalar({})".format(map_types[type(data)]) + + def count_properties(self, query: Optional[Union[dict, str]] = None) -> dict: + """ + Counts all properties from matching documents. + + Parameters + ---------- + query: Optional[Union[dict, str]] + Query to filter documents to count properties from. Default is `None`. + + Returns + ------- + Dictionary of properties, with keys property names, and values + corresponding to their counts, categories and data types. + """ + query = self.parser(query) + logger.info("parsed query: %s", query) + properties = {} + + try: + keys = self.client.indices.get_mapping(index=self.index_name)[ + self.index_name + ]["mappings"]["properties"].keys() + except KeyError: + return properties + + for key in keys: + body = { + "size": 0, + "query": query, + "aggs": { + "info_keys": { + "filter": {"term": {"derived.info_keys.keyword": key}}, + }, + "derived_keys": { + "filter": {"term": {"derived.derived_keys.keyword": key}}, + }, + "arrays_keys": { + "filter": {"term": {"derived.arrays_keys.keyword": key}}, + }, + }, + } + + res = self.client.search( + index=self.index_name, + body=body, + ) + + for label in ("info_keys", "derived_keys", "arrays_keys"): + count = res["aggregations"][label]["doc_count"] + if count > 0: + properties[key] = { + "count": count, + "category": label.split("_", maxsplit=1)[0], + "dtype": self.get_type_of_property( + key, label.split("_", maxsplit=1)[0] + ), + } + + return properties + + def add_property(self, data: dict, query: Optional[Union[dict, str]] = None): + """ + Adds properties to matching documents. + + Parameters + ---------- + data: dict + Property key-value pairs to be added to matching documents. + query: Optional[Union[dict, str]] + Query to filter documents to add properties to. Default is `None`. + """ + query = self.parser(query) + logger.info("add: data=%s, query=%s", data, query) + + script_txt = "ctx._source.derived.info_keys.addAll(params.keys);" + for key, val in data.items(): + script_txt += f"ctx._source.{key} = '{val}';" + + body = { + "script": { + "source": script_txt, + "lang": "painless", + "params": {"keys": list(data.keys())}, + }, + "query": query, + } + + self.client.update_by_query( + index=self.index_name, + body=body, + ) + + def rename_property( + self, name: str, new_name: str, query: Optional[Union[dict, str]] = None + ): + """ + Renames property for all matching documents. + + Parameters + ---------- + name: str + Current name of property to be renamed. + new_name: str + New name of property to be renamed. + query: Optional[Union[dict, str]] + Query to filter documents to rename property. Default is `None`. + """ + query = self.parser(query) + logger.info("rename: query=%s, old=%s, new=%s", query, name, new_name) + + script_txt = "if (!ctx._source.containsKey(params.new_name)) { " + script_txt += ( + f"ctx._source.{new_name} = ctx._source.{name};" + " ctx._source.remove(params.name);" + " for (int i=0; i Optional[dict]: + """ + Calculate histogram statistics for a property from all matching documents. + + Parameters + ---------- + name: str + Name of property. + query: Optional[Union[dict, str]] + Query to filter documents. Default is `None`. + + Returns + ------- + Optional[dict] + Dictionary containing histogram statistics, including the number of + bins, edges, counts, min, max, and standard deviation. + """ + query = self.parser(query) + logger.info("parsed query: %s", query) + + data = self.property(name, query=query) + return utils.histogram(name, data, **kwargs) + + def __repr__(self): + """ + OpenSearch class representation. + + Returns + ------- + String for OpenSearch class representation, containing the connected + database host, port, and index name. + """ + if self.client.transport.hosts is not None: + host = self.client.transport.hosts[0]["host"] + port = self.client.transport.hosts[0]["port"] + else: + host, port = None, None + + return ( + f"{self.__class__.__name__}(" + f"url={host}:{port}, " + f"index={self.index_name}) " + ) + + def _repr_html_(self): + """ + Jupyter notebook representation of OpenSearch class. + + Returns + ------- + String for HTML representation. + """ + return "ABCD OpenSearch database" + + def print_info(self): + """ + Show basic information about the connected OpenSearch database. + """ + out = linesep.join( + [ + "{:=^50}".format(" ABCD OpenSearch "), + "{:>10}: {}".format("type", "opensearch"), + linesep.join("{:>10}: {}".format(k, v) for k, v in self.info().items()), + ] + ) + + print(out) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + pass + + +if __name__ == "__main__": + db = OpenSearchDatabase(username="admin", password="admin") + print(db.info()) diff --git a/abcd/backends/atoms_properties.py b/abcd/backends/atoms_properties.py new file mode 100644 index 00000000..7372e797 --- /dev/null +++ b/abcd/backends/atoms_properties.py @@ -0,0 +1,201 @@ +from __future__ import annotations +import pandas as pd +import numpy as np +from typing import Union +from pathlib import Path +import chardet + + +class Properties: + """ + Wrapper to identify and manipulate properties to be passed + as extra_info to the database. + + Attributes + ---------- + data_file: Union[str, Path] + Name or path to data file containing properties. Treated as a csv file + by default, but Excel spreadsheets may also be read. + store_struct_file: bool + Whether to construct a filename for each structure. + struct_file_template: str + Template string for path to files containing structure. + struct_name_label: str + Field name in data file containing values for `struct_name`. + df: pd.Dataframe + Dataframe containing loaded property data from data file. + units: Union[dict, None], optional + Units. + struct_files: list[str] + List containing a filename for each structure in the dataframe. + encoding: str, optional + Encoding of csv file to be read. Default is `utf-8`. + """ + + def __init__( + self, + data_file: Union[str, Path], + store_struct_file: bool = False, + struct_file_template: Union[str, None] = None, + struct_name_label: Union[str, None] = None, + units: Union[dict, None] = None, + infer_units: bool = False, + encoding: str = "utf-8", + ): + """ + Initialises class. + + Parameters + ---------- + data_file: Union[str, Path] + Path or filename of data file containing properties to be loaded. + Assumed to be a csv file by default, but Excel spreadsheets may + also be read. + store_struct_file: bool, optional + If true, use struct_file_template and struct_name_label to + construct filename for each structure. Default is `False`. + struct_file_template: Union[str, None], optiona + Template string for path to files containing structure. + Required only if store_struct_file is True. + Template must contain `{struct_name}`, to ensure a unique file + for each structure. Default is `None`. + struct_name_label: Union[str, None], optional + Field name in data file containing values for `struct_name`. + Required only if store_struct_file is True. Default is `None`. + units: Union[dict, None], optional + Units for fields in data file. If unspecified, _separate_units() + is used to identify units in field names. Default is `None`. + infer_units: bool, optional + Whether to attempt to infer units from field names in the + dataframe. Unused if units is not `None`. Default is `False`. + encoding: str, optional + Encoding of file to be read. Default is `utf-8`. + For pandas==1.2, setting this to `None` means `errors='replace'` + is passed to `open()`, which replaces invalid characters with + the replacement character. Otherwise, `errors='strict'` is passed + to `open()`, which means UnicodeDecodeError are thrown if the + encoding is wrong. + For pandas==1.3, `encoding` no longer defines how errors are + handled. `encoding_errors` instead defaults to `strict`, which has + the same effect as non-None values of `encoding` for pandas==1.2. + """ + self.data_file = data_file + self.encoding = encoding + try: + self.df = pd.read_csv(self.data_file, encoding=self.encoding) + except UnicodeDecodeError: + detected = chardet.detect(Path(self.data_file).read_bytes()) + raise ValueError( + f"File cannot be decoded using encoding: {self.encoding}." + f" Detected encoding: {detected}." + ) + except pd.errors.ParserError: + self.df = pd.read_excel(self.data_file, header=0) + + self.df.replace({np.nan: None}, inplace=True) + + if units is not None: + for key in units: + if key not in self.df.columns.values: + raise ValueError( + f"Invalid field name: {key}. Keys in `units` must " + "correspond to field names in the loaded data." + ) + self.units = units + elif infer_units: + self._separate_units() + else: + self.units = None + + self.store_struct_file = store_struct_file + if self.store_struct_file: + if struct_file_template is None: + raise ValueError( + "`struct_file_template` must be specified if " + "store_struct_file is True." + ) + self.struct_file_template = struct_file_template + + if struct_name_label is None: + raise ValueError( + "`struct_name_label` must be specified if store_struct_file is" + " True." + ) + self.struct_name_label = struct_name_label + self.set_struct_files() + + def _separate_units(self): + """ + Parse field names to determine units. + """ + columns = [] + self.units = {} + for column in list(self.df.columns.values): + if "," in column: + column_name = column.split(",")[0].strip() + self.units[column_name] = column.split(",")[1].strip() + elif "(" in column: + column_name = column.split("(")[0].strip() + self.units[column_name] = column.split("(")[1].strip()[:-1] + else: + column_name = column + + columns.append(column_name) + + self.df.columns = columns + + def set_struct_files(self): + """ + Sets a list containing a filename for each structure in the dataframe. + """ + self.struct_files = [] + + for i in range(len(self.df)): + try: + struct_name = self.df.iloc[i][self.struct_name_label] + except KeyError: + raise ValueError( + f"{self.struct_name_label} is not a valid column in " + "the data loaded." + ) + struct_file = self.get_struct_file(struct_name) + self.struct_files.append(struct_file) + + def get_struct_file(self, struct_name: str) -> str: + """ + Evaluate struct_file_template to determine structure filename + for current structure. + + Parameters + ---------- + struct_name: str + Name of current structure. + + Returns + ------- + Filename for the current structure. + """ + if "{struct_name}" not in self.struct_file_template: + raise ValueError( + "'struct_name' must be a variable in the template file: " + f"{self.struct_file_template}" + ) + return eval(f"f'{self.struct_file_template}'") + + def to_list(self) -> list[dict]: + """ + Convert dataframe into list of properties for each structure. + + Returns + ------- + List of property dictionaries for each structure in the dataframe. + """ + properties_list = [] + for i in range(len(self.df)): + properties = self.df.iloc[i].to_dict() + if self.units is not None: + properties["units"] = self.units + properties_list.append( + {key: value for key, value in properties.items() if value is not None} + ) + return properties_list diff --git a/abcd/backends/atoms_pymongo.py b/abcd/backends/atoms_pymongo.py index 993c6eb0..f51311f6 100644 --- a/abcd/backends/atoms_pymongo.py +++ b/abcd/backends/atoms_pymongo.py @@ -1,27 +1,23 @@ -import types +from datetime import datetime import logging -import numpy as np - -from typing import Union, Iterable from os import linesep -from operator import itemgetter -from collections import Counter -from datetime import datetime +from pathlib import Path +import types +from typing import Union, Iterable from ase import Atoms from ase.io import iread +from bson import ObjectId +from pymongo import MongoClient +import pymongo.errors +from abcd.backends import utils +from abcd.database import AbstractABCD import abcd.errors from abcd.model import AbstractModel -from abcd.database import AbstractABCD -from abcd.queryset import AbstractQuerySet from abcd.parsers import extras +from abcd.queryset import AbstractQuerySet -import pymongo.errors -from pymongo import MongoClient -from bson import ObjectId - -from pathlib import Path logger = logging.getLogger(__name__) @@ -135,6 +131,15 @@ def __call__(self, ast): p = parser(ast) return self.visit(p) + elif isinstance(ast, list): + from abcd.parsers.queries import parser + + if len(ast) == 0: + return {} + else: + ast = ("AND", *[parser(q) for q in ast]) + return self.visit(ast) + return self.visit(ast) if ast else {} @@ -222,7 +227,6 @@ def destroy(self): self.collection.drop() def push(self, atoms: Union[Atoms, Iterable], extra_info=None, store_calc=True): - if extra_info and isinstance(extra_info, str): extra_info = extras.parser.parse(extra_info) @@ -234,15 +238,17 @@ def push(self, atoms: Union[Atoms, Iterable], extra_info=None, store_calc=True): # self.collection.insert_one(data) elif isinstance(atoms, types.GeneratorType) or isinstance(atoms, list): - - for item in atoms: + for i, item in enumerate(atoms): + if isinstance(extra_info, list): + info = extra_info[i] + else: + info = extra_info data = AtomsModel.from_atoms( - self.collection, item, extra_info=extra_info, store_calc=store_calc + self.collection, item, extra_info=info, store_calc=store_calc ) data.save() def upload(self, file: Path, extra_infos=None, store_calc=True): - if isinstance(file, str): file = Path(file) @@ -435,9 +441,8 @@ def delete_property(self, name, query=None): ) def hist(self, name, query=None, **kwargs): - data = self.property(name, query) - return histogram(name, data, **kwargs) + return utils.histogram(name, data, **kwargs) def exec(self, code, query=None): # TODO: Separate python environment with its own packages loaded @@ -480,139 +485,6 @@ def __exit__(self, exc_type, exc_val, exc_tb): pass -def histogram(name, data, **kwargs): - if not data: - return None - - elif data and isinstance(data, list): - - ptype = type(data[0]) - - if not all(isinstance(x, ptype) for x in data): - print("Mixed type error of the {} property!".format(name)) - return None - - if ptype == float: - bins = kwargs.get("bins", 10) - return _hist_float(name, data, bins) - - elif ptype == int: - bins = kwargs.get("bins", 10) - return _hist_int(name, data, bins) - - elif ptype == str: - return _hist_str(name, data, **kwargs) - - elif ptype == datetime: - bins = kwargs.get("bins", 10) - return _hist_date(name, data, bins) - - else: - print( - "{}: Histogram for list of {} types are not supported!".format( - name, type(data[0]) - ) - ) - logger.info( - "{}: Histogram for list of {} types are not supported!".format( - name, type(data[0]) - ) - ) - - else: - logger.info( - "{}: Histogram for {} types are not supported!".format(name, type(data)) - ) - return None - - -def _hist_float(name, data, bins=10): - data = np.array(data) - hist, bin_edges = np.histogram(data, bins=bins) - - return { - "type": "hist_float", - "name": name, - "bins": bins, - "edges": bin_edges, - "counts": hist, - "min": data.min(), - "max": data.max(), - "median": data.mean(), - "std": data.std(), - "var": data.var(), - } - - -def _hist_date(name, data, bins=10): - hist_data = np.array([t.timestamp() for t in data]) - hist, bin_edges = np.histogram(hist_data, bins=bins) - - fromtimestamp = datetime.fromtimestamp - - return { - "type": "hist_date", - "name": name, - "bins": bins, - "edges": [fromtimestamp(d) for d in bin_edges], - "counts": hist, - "min": fromtimestamp(hist_data.min()), - "max": fromtimestamp(hist_data.max()), - "median": fromtimestamp(hist_data.mean()), - "std": fromtimestamp(hist_data.std()), - "var": fromtimestamp(hist_data.var()), - } - - -def _hist_int(name, data, bins=10): - data = np.array(data) - delta = max(data) - min(data) + 1 - - if bins > delta: - bins = delta - - hist, bin_edges = np.histogram(data, bins=bins) - - return { - "type": "hist_int", - "name": name, - "bins": bins, - "edges": bin_edges, - "counts": hist, - "min": data.min(), - "max": data.max(), - "median": data.mean(), - "std": data.std(), - "var": data.var(), - } - - -def _hist_str(name, data, bins=10, truncate=20): - n_unique = len(set(data)) - - if truncate: - # data = (item[:truncate] for item in data) - data = ( - item[:truncate] + "..." if len(item) > truncate else item for item in data - ) - - data = Counter(data) - - if bins: - labels, counts = zip(*sorted(data.items(), key=itemgetter(1, 0), reverse=True)) - else: - labels, counts = zip(*data.items()) - - return { - "type": "hist_str", - "name": name, - "total": sum(data.values()), - "unique": n_unique, - "labels": labels[:bins], - "counts": counts[:bins], - } - - if __name__ == "__main__": # import json # from ase.io import iread diff --git a/abcd/backends/utils.py b/abcd/backends/utils.py new file mode 100644 index 00000000..e55471eb --- /dev/null +++ b/abcd/backends/utils.py @@ -0,0 +1,131 @@ +from collections import Counter +from datetime import datetime +import logging +from operator import itemgetter + +import numpy as np + +logger = logging.getLogger(__name__) + + +def histogram(name, data, **kwargs): + if not data: + return None + + if isinstance(data, list): + ptype = type(data[0]) + + if not all(isinstance(x, ptype) for x in data): + print("Mixed type error of the %s property!", name) + return None + + if ptype == float: + bins = kwargs.get("bins", 10) + return _hist_float(name, data, bins) + + if ptype == int: + bins = kwargs.get("bins", 10) + return _hist_int(name, data, bins) + + if ptype == str: + return _hist_str(name, data, **kwargs) + + if ptype == datetime: + bins = kwargs.get("bins", 10) + return _hist_date(name, data, bins) + + print( + "%s: Histogram for list of %s types are not supported!", name, type(data[0]) + ) + logger.info( + "%s: Histogram for list of %s types are not supported!", name, type(data[0]) + ) + + logger.info("%s: Histogram for %s types are not supported!", name, type(data)) + return None + + +def _hist_float(name, data, bins=10): + data = np.array(data) + hist, bin_edges = np.histogram(data, bins=bins) + + return { + "type": "hist_float", + "name": name, + "bins": bins, + "edges": bin_edges, + "counts": hist, + "min": data.min(), + "max": data.max(), + "median": data.mean(), + "std": data.std(), + "var": data.var(), + } + + +def _hist_date(name, data, bins=10): + hist_data = np.array([t.timestamp() for t in data]) + hist, bin_edges = np.histogram(hist_data, bins=bins) + + fromtimestamp = datetime.fromtimestamp + + return { + "type": "hist_date", + "name": name, + "bins": bins, + "edges": [fromtimestamp(d) for d in bin_edges], + "counts": hist, + "min": fromtimestamp(hist_data.min()), + "max": fromtimestamp(hist_data.max()), + "median": fromtimestamp(hist_data.mean()), + "std": fromtimestamp(hist_data.std()), + "var": fromtimestamp(hist_data.var()), + } + + +def _hist_int(name, data, bins=10): + data = np.array(data) + delta = max(data) - min(data) + 1 + + bins = min(bins, delta) + + hist, bin_edges = np.histogram(data, bins=bins) + + return { + "type": "hist_int", + "name": name, + "bins": bins, + "edges": bin_edges, + "counts": hist, + "min": data.min(), + "max": data.max(), + "median": data.mean(), + "std": data.std(), + "var": data.var(), + } + + +def _hist_str(name, data, bins=10, truncate=20): + n_unique = len(set(data)) + + if truncate: + # data = (item[:truncate] for item in data) + data = ( + item[:truncate] + "..." if len(item) > truncate else item for item in data + ) + + data = Counter(data) + + if bins: + labels, counts = zip(*sorted(data.items(), key=itemgetter(1, 0), reverse=True)) + else: + labels, counts = zip(*data.items()) + + return { + "type": "hist_str", + "name": name, + "total": sum(data.values()), + "unique": n_unique, + "labels": labels[:bins], + "counts": counts[:bins], + } diff --git a/abcd/frontends/commandline/commands.py b/abcd/frontends/commandline/commands.py index de158a5c..6c3e0edf 100644 --- a/abcd/frontends/commandline/commands.py +++ b/abcd/frontends/commandline/commands.py @@ -4,23 +4,22 @@ import numpy as np from abcd.frontends.commandline.decorators import check_remote, init_config, init_db +from abcd.backends.atoms_opensearch import OpenSearchDatabase logger = logging.getLogger(__name__) @init_config -def login(*, config, name, url, **kwargs): +def login(*, config, name, url, disable_ssl=False, **kwargs): logger.info( - "login args: \nconfig:{}, name:{}, url:{}, kwargs:{}".format( - config, name, url, kwargs - ) + "login args: \nconfig:%s, name:%s, url:%s, kwargs:%s", config, name, url, kwargs ) from abcd import ABCD - db = ABCD.from_url(url=url) + db = ABCD.from_url(url=url, use_ssl=(not disable_ssl)) info = db.info() - config["url"] = url + config.update(url=url, use_ssl=not disable_ssl) config.save() print("Successfully connected to the database!") @@ -36,7 +35,7 @@ def login(*, config, name, url, **kwargs): @init_config @init_db def download(*, db, query, fileformat, filename, **kwargs): - logger.info("download\n kwargs: {}".format(kwargs)) + logger.info("download\n kwargs: %s", kwargs) from ase.io import write @@ -51,7 +50,7 @@ def download(*, db, query, fileformat, filename, **kwargs): @init_db @check_remote def delete(*, db, query, yes, **kwargs): - logger.info("delete\n kwargs: {}".format(kwargs)) + logger.info("delete\n kwargs: %s", kwargs) if not yes: print( @@ -79,10 +78,10 @@ def upload(*, db, path, extra_infos, ignore_calc_results, **kwargs): elif path.is_dir(): for file in path.glob(".xyz"): - logger.info("Uploaded file: {}".format(file)) + logger.info("Uploaded file: %s", file) db.upload(file, extra_infos, store_calc=calculator) else: - logger.info("No file found: {}".format(path)) + logger.info("No file found: %s", path) raise FileNotFoundError() else: @@ -92,8 +91,8 @@ def upload(*, db, path, extra_infos, ignore_calc_results, **kwargs): @init_config @init_db def summary(*, db, query, print_all, bins, truncate, props, **kwargs): - logger.info("summary\n kwargs: {}".format(kwargs)) - logger.info("query: {}".format(query)) + logger.info("summary\n kwargs: %s", kwargs) + logger.info("query: %s", query) if print_all: truncate = None @@ -121,7 +120,6 @@ def summary(*, db, query, print_all, bins, truncate, props, **kwargs): f = Formater() if props_list is None: - props = db.count_properties(query=query) labels, categories, dtypes, counts = [], [], [], [] @@ -158,8 +156,8 @@ def summary(*, db, query, print_all, bins, truncate, props, **kwargs): @init_config @init_db def show(*, db, query, print_all, props, **kwargs): - logger.info("show\n kwargs: {}".format(kwargs)) - logger.info("query: {}".format(query)) + logger.info("show\n kwargs: %s", kwargs) + logger.info("query: %s", query) if not props: print("Please define at least on property by using the -p option!") @@ -184,14 +182,22 @@ def key_add(*, db, query, keys, **kwargs): data = parser.parse(keys) if query: - test = ("AND", query, ("OR", *(("NAME", key) for key in data.keys()))) + if isinstance(db, OpenSearchDatabase): + test = [ + f"{query} AND ({' OR '.join(f'{key}:*' for key in data)})" + for query in query + ] + else: + test = ("AND", query, ("OR", *(("NAME", key) for key in data))) + elif isinstance(db, OpenSearchDatabase): + test = " OR ".join(f"{key}:*" for key in data) else: - test = ("OR", *(("NAME", key) for key in data.keys())) + test = ("OR", *(("NAME", key) for key in data)) if db.count(query=test): print( - "The new key already exist for the given query! " - "Please make sure that the target key name don't exist" + "The new key already exists for the given query! " + "Please make sure that the target key name doesn't exist" ) exit(1) @@ -221,7 +227,13 @@ def key_delete(*, db, query, yes, keys, **kwargs): keys = " ".join(keys) data = parser.parse(keys) - query = ("AND", query, ("OR", *(("NAME", key) for key in data.keys()))) + if isinstance(db, OpenSearchDatabase): + query = [ + f"{query} AND ({' OR '.join(f'{key}:*' for key in data)})" + for query in query + ] + else: + query = ("AND", query, ("OR", *(("NAME", key) for key in data))) if not yes: print( @@ -231,7 +243,7 @@ def key_delete(*, db, query, yes, keys, **kwargs): ) exit(1) - for k in keys: + for k in data: db.delete_property(k, query=query) @@ -255,9 +267,7 @@ def server(*, abcd_url, url, api_only, **kwargs): from urllib.parse import urlparse from abcd.server.app import create_app - logger.info( - "SERVER - abcd: {}, url: {}, api_only:{}".format(abcd_url, url, api_only) - ) + logger.info("SERVER - abcd: %s, url: %s, api_only: %s", abcd_url, url, api_only) if api_only: print("Not implemented yet!") @@ -269,6 +279,12 @@ def server(*, abcd_url, url, api_only, **kwargs): app.run(host=o.hostname, port=o.port) +@init_config +@init_db +def refresh(*, db, **kwargs): + db.refresh() + + class Formater(object): partialBlocks = ["▏", "▎", "▍", "▌", "▋", "▊", "▉", "█"] # char=pb @@ -278,8 +294,8 @@ def title(self, title): def describe(self, data): if data["type"] == "hist_float": print( - "{} count: {} min: {:11.4e} med: {:11.4e} max: {:11.4e} std: {:11.4e} var:{" - ":11.4e}".format( + "{} count: {} min: {:11.4e} med: {:11.4e} max: {:11.4e} std: {:11.4e}" + " var:{:11.4e}".format( data["name"], sum(data["counts"]), data["min"], @@ -321,7 +337,6 @@ def hist_float(self, bin_edges, counts, width_hist=40): ) def hist_int(self, bin_edges, counts, width_hist=40): - ratio = width_hist / max(counts) width_count = len(str(max(counts))) @@ -373,7 +388,6 @@ def hist_str(self, total, counts, labels, width_hist=40): ) def hist_labels(self, counts, categories, dtypes, labels, width_hist=40): - width_count = len(str(max(counts))) ratio = width_hist / max(counts) for label, count, dtype in zip(labels, counts, dtypes): diff --git a/abcd/frontends/commandline/config.py b/abcd/frontends/commandline/config.py index 3aa21bea..4a2573e5 100644 --- a/abcd/frontends/commandline/config.py +++ b/abcd/frontends/commandline/config.py @@ -20,7 +20,6 @@ def from_json(cls, filename): @classmethod def load(cls): - if ( os.environ.get("ABCD_CONFIG") and Path(os.environ.get("ABCD_CONFIG")).is_file() diff --git a/abcd/frontends/commandline/decorators.py b/abcd/frontends/commandline/decorators.py index c2439be7..ce509004 100644 --- a/abcd/frontends/commandline/decorators.py +++ b/abcd/frontends/commandline/decorators.py @@ -1,9 +1,7 @@ import logging - +import functools from abcd import ABCD - from abcd.frontends.commandline.config import Config -from abcd.parsers.queries import parser logger = logging.getLogger(__name__) @@ -11,6 +9,7 @@ def init_config(func): config = Config.load() + @functools.wraps(func) def wrapper(*args, **kwargs): func(*args, config=config, **kwargs) @@ -18,39 +17,38 @@ def wrapper(*args, **kwargs): def init_db(func): + @functools.wraps(func) def wrapper(*args, config, **kwargs): url = config.get("url", None) + use_ssl = config.get("use_ssl", None) if url is None: print("Please use abcd login first!") exit(1) - db = ABCD.from_url(url=url) + if use_ssl is None: + print("use_ssl has not been saved. Please login again") + exit(1) + + db = ABCD.from_url(url=url, use_ssl=use_ssl) # TODO: AST.from_string() ?! - # TODO: parser should accept list # TODO: better ast optimisation query_list = [] for q in kwargs.pop("default_query", []): - query_list.append(parser(q)) + query_list.append(q) for q in kwargs.pop("query", []): - query_list.append(parser(q)) - - if not query_list: - query = None - elif len(query_list) == 1: - query = query_list[0] - else: - query = ("AND", *query_list) + query_list.append(q) - func(*args, db=db, query=query, **kwargs) + func(*args, db=db, query=query_list, **kwargs) return wrapper def check_remote(func): + @functools.wraps(func) def wrapper(*args, **kwargs): if kwargs.pop("remote"): print("In read only mode, you can't modify the data in the database") diff --git a/abcd/frontends/commandline/parser.py b/abcd/frontends/commandline/parser.py index 9b2c1af2..e3f4b1f1 100644 --- a/abcd/frontends/commandline/parser.py +++ b/abcd/frontends/commandline/parser.py @@ -36,6 +36,11 @@ help="url of abcd api (default: http://localhost)", default="http://localhost", ) +login_parser.add_argument( + "--disable_ssl", + action="store_true", + help="Disable SSL encryption", +) download_parser = subparsers.add_parser( "download", help="download data from the database" @@ -198,6 +203,9 @@ "-u", "--url", help="Url to run the server.", default="http://localhost:5000" ) +refresh_parser = subparsers.add_parser("refresh", help="refresh database") +refresh_parser.set_defaults(callback_func=commands.refresh) + def main(args=None): kwargs = parser.parse_args(args).__dict__ diff --git a/abcd/model.py b/abcd/model.py index f4c87b61..18c47456 100644 --- a/abcd/model.py +++ b/abcd/model.py @@ -4,6 +4,7 @@ from hashlib import md5 from collections import Counter, UserDict from ase.calculators.singlepoint import SinglePointCalculator +from ase.spacegroup.spacegroup import Spacegroup import numpy as np from ase import Atoms @@ -16,7 +17,6 @@ def __init__(self, method=md5()): self.method = method def update(self, value): - if isinstance(value, int): self.update(str(value).encode("ascii")) @@ -44,7 +44,7 @@ def update(self, value): else: raise ValueError( - "The {} type cannot be hashed! (Value: {})", format(type(value), value) + f"The {type(value)} type cannot be hashed! (Value: {value})" ) def __call__(self): @@ -80,14 +80,12 @@ def derived(self): } def __getitem__(self, key): - if key == "derived": return self.derived return super().__getitem__(key) def __setitem__(self, key, value): - if key == "derived": # raise KeyError('Please do not use "derived" as key because it is protected!') # Silent return to avoid raising error in pymongo package @@ -107,7 +105,6 @@ def convert(self, value): return value def update_key_category(self, key, value): - if key == "_id": # raise KeyError('Please do not use "derived" as key because it is protected!') return @@ -190,6 +187,8 @@ def from_atoms(cls, atoms: Atoms, extra_info=None, store_calc=True): for key, value in atoms.info.items(): if isinstance(value, np.ndarray): dct[key] = value.tolist() + elif isinstance(value, Spacegroup): + dct[key] = value.todict() else: dct[key] = value @@ -199,7 +198,6 @@ def from_atoms(cls, atoms: Atoms, extra_info=None, store_calc=True): info_keys.update({"calculator_name", "calculator_parameters"}) for key, value in atoms.calc.results.items(): - if isinstance(value, np.ndarray): if value.shape[0] == n_atoms: arrays_keys.update(key) diff --git a/abcd/parsers/extras.py b/abcd/parsers/extras.py index c007acf6..31cbc069 100644 --- a/abcd/parsers/extras.py +++ b/abcd/parsers/extras.py @@ -6,7 +6,7 @@ start: ( key | key_value )* key: NAME - key_value: NAME "=" value + key_value: NAME ("="|":") value NAME: ("_"|LETTER|DIGIT) ("_"|"-"|LETTER|DIGIT)* @@ -97,7 +97,7 @@ def string(self, s): if __name__ == "__main__": test_string = " ".join( [ - " " "flag", # start with a separator + " flag", # start with a separator 'quotedd_string="quoteddd value"', r'quotedddd_string_escaped="esc\"aped"', "false_value = F", @@ -108,8 +108,9 @@ def string(self, s): "scientific_float_2=5e-6", 'scientific_float_array="1.2 2.2e3 4e1 3.3e-1 2e-2"', 'not_array="1.2 3.4 text"', - "array_nested=[[1,2],[3,4]] " # gets flattented if not 3x3 - "array_many_other_quotes=({[4 8 12]})", + ( # gets flattented if not 3x3 + "array_nested=[[1,2],[3,4]] array_many_other_quotes=({[4 8 12]})" + ), "array_boolean={T F T F}", 'array_boolean_2=" T, F, T " ' # leading spaces # 'not_bool_array=[T F S]', diff --git a/abcd/server/app/db.py b/abcd/server/app/db.py index 65bc9c70..b61a6a04 100644 --- a/abcd/server/app/db.py +++ b/abcd/server/app/db.py @@ -5,7 +5,9 @@ class Database(ABCD): - """Wrapper for the ABCD factory method for registering a the database for the Flask application.""" + """ + Wrapper for the ABCD factory method for registering a the database for the Flask application. + """ def __init__(self): super().__init__() diff --git a/abcd/server/app/nav.py b/abcd/server/app/nav.py index ec8ea2d1..354714cf 100644 --- a/abcd/server/app/nav.py +++ b/abcd/server/app/nav.py @@ -164,7 +164,6 @@ def visit_View(self, node): return item def visit_Subgroup(self, node): - if self._in_dropdown: raise RuntimeError("Cannot render nested Subgroups") diff --git a/abcd/server/app/views/database.py b/abcd/server/app/views/database.py index 8569dcd2..432efafc 100644 --- a/abcd/server/app/views/database.py +++ b/abcd/server/app/views/database.py @@ -21,7 +21,10 @@ def database(database_name): info = { "name": database_name, - "description": "Vivamus sagittis lacus vel augue laoreet rutrum faucibus dolor auctor. Duis mollis, est non commodo luctus.", + "description": ( + "Vivamus sagittis lacus vel augue laoreet rutrum faucibus dolor auctor." + " Duis mollis, est non commodo luctus." + ), "columns": [ {"slug": "formula", "name": "Formula"}, {"slug": "energy", "name": "Energy"}, @@ -43,7 +46,10 @@ def database(database_name): def settings(database_name): info = { "name": database_name, - "description": "Vivamus sagittis lacus vel augue laoreet rutrum faucibus dolor auctor. Duis mollis, est non commodo luctus.", + "description": ( + "Vivamus sagittis lacus vel augue laoreet rutrum faucibus dolor auctor." + " Duis mollis, est non commodo luctus." + ), "columns": [ {"slug": "formula", "name": "Formula"}, {"slug": "energy", "name": "Energy"}, diff --git a/pyproject.toml b/pyproject.toml index b2d5c162..2c299702 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,26 +10,33 @@ repository = "https://github.com/libatoms/abcd" documentation = "https://libatoms.github.io/abcd/" [tool.poetry.dependencies] -python = "^3.9" +ase = "3.22.1" +chardet = "^5.2.0" +lark = "^1.1.9" +matplotlib = "^3.9" notebook = "^7.2" numpy = "^1.26" -tqdm = "^4.66" +openpyxl = "^3.1.2" +opensearch-py = "^2.4.0" +pandas = "^2.2" pymongo = "^4.7.3" -matplotlib = "^3.9" -ase = "3.22.1" -lark = "^1.1.9" +python = "^3.9" +tqdm = "^4.66" [tool.poetry.group.dev.dependencies] +black = "^22.3.0" +flake8 = "^3.7.9" mongomock = "^4.1.2" +openmock = "^2.2" pytest = "^8.2.2" pytest-cov = "^5.0.0" [tool.poetry.extras] -tests = ["mongomock", "pytest", "pytest-cov"] -mongo = ["pymongo"] http = ["requests"] +mongo = ["pymongo"] server-api = ["flask"] server-app = ["flask", "Flask-Nav", "Flask-MongoEngine", "gunicorn", "flask-paginate"] +tests = ["mongomock", "pytest", "pytest-cov"] [build-system] requires = ["poetry-core"] @@ -37,3 +44,6 @@ build-backend = "poetry.core.masonry.api" [tool.poetry.plugins."console_scripts"] "abcd" = "abcd.frontends.commandline:main" + +[tool.black] +line-length = 88 diff --git a/tests/data/example.xyz b/tests/data/example.xyz new file mode 100644 index 00000000..2a81c26d --- /dev/null +++ b/tests/data/example.xyz @@ -0,0 +1,4 @@ +2 +Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" energy=-101.9 +Si 0.00000000 1.00000000 2.00000000 +Si 4.00000000 5.00000000 6.00000000 diff --git a/tests/data/example_2.xyz b/tests/data/example_2.xyz new file mode 100644 index 00000000..13315d57 --- /dev/null +++ b/tests/data/example_2.xyz @@ -0,0 +1,5 @@ +3 +Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" energy=-99.33 +Si 0.00000000 1.00000000 2.00000000 +Si 3.00000000 4.00000000 5.00000000 +Si 6.00000000 7.00000000 7.00000000 diff --git a/tests/data/examples.csv b/tests/data/examples.csv new file mode 100755 index 00000000..6473a822 --- /dev/null +++ b/tests/data/examples.csv @@ -0,0 +1,4 @@ +Text,Integers,Floating,Boolean,Missing data,"Comma units, m",Bracket units (s) +Some,1,0.01,TRUE,Missing,0,0 +test,2,0.1,FALSE,,1,1 +data,3,1,FALSE,data,2,2 diff --git a/tests/test_cli.py b/tests/test_cli.py new file mode 100644 index 00000000..3089affc --- /dev/null +++ b/tests/test_cli.py @@ -0,0 +1,125 @@ +import logging +import os +from pathlib import Path +import subprocess +from time import sleep + +import pytest + + +DATA_PATH = Path(__file__).parent / "data" + +NOT_GTHUB_ACTIONS = True +if os.getenv("GITHUB_ACTIONS") == "true": + NOT_GTHUB_ACTIONS = False + + +@pytest.mark.skipif(NOT_GTHUB_ACTIONS, reason="Not running via GitHub Actions") +class TestCli: + """Testing OpenSearch database CLI integration.""" + + @pytest.fixture(autouse=True) + def abcd(self): + """Set up OpenSearch database connection and login with CLI.""" + security_enabled = os.getenv("security_enabled") == "true" + port = int(os.environ["port"]) + host = "localhost" + if os.environ["opensearch-version"] == "latest": + credential = "admin:myStrongPassword123!" + else: + credential = "admin:admin" + + logging.basicConfig(level=logging.INFO) + + url = f"opensearch://{credential}@{host}:{port}" + if not security_enabled: + url += " --disable_ssl" + try: + subprocess.run(f"abcd login {url}", shell=True, check=True) + except subprocess.CalledProcessError: + sleep(10) + subprocess.run(f"abcd login {url}", shell=True, check=True) + + def test_summary(self, abcd): + """ + Test summary output of uploaded data file. + """ + data_file = DATA_PATH / "example.xyz" + + subprocess.run( + f"abcd upload {data_file} -i -e 'test_data'", shell=True, check=True + ) + subprocess.run(f"abcd refresh", shell=True, check=True) + + summary = subprocess.run( + "abcd summary", shell=True, check=True, capture_output=True, text=True + ) + assert "Total number of configurations" in summary.stdout + subprocess.run(f"abcd delete -q 'test_data' -y", shell=True) + + def test_query(self, abcd): + """ + Test lucene-style query. + """ + data_file_1 = DATA_PATH / "example.xyz" + data_file_2 = DATA_PATH / "example_2.xyz" + + subprocess.run( + f"abcd upload {data_file_1} -i -e 'test_data'", shell=True, check=True + ) + subprocess.run( + f"abcd upload {data_file_2} -i -e 'test_data'", shell=True, check=True + ) + subprocess.run(f"abcd refresh", shell=True, check=True) + + summary = subprocess.run( + "abcd show -p n_atoms -q 'n_atoms : 2'", + shell=True, + check=True, + capture_output=True, + text=True, + ) + assert "2" in summary.stdout and "3" not in summary.stdout + summary = subprocess.run( + "abcd show -p n_atoms -q 'n_atoms : 3'", + shell=True, + check=True, + capture_output=True, + text=True, + ) + assert "3" in summary.stdout and "2" not in summary.stdout + subprocess.run(f"abcd delete -q 'test_data' -y", shell=True) + + def test_range_query(self, abcd): + """ + Test lucene-style ranged query. + """ + data_file_1 = DATA_PATH / "example.xyz" + data_file_2 = DATA_PATH / "example_2.xyz" + + subprocess.run( + f"abcd upload {data_file_1} -i -e 'test_data'", shell=True, check=True + ) + subprocess.run( + f"abcd upload {data_file_2} -i -e 'test_data'", shell=True, check=True + ) + subprocess.run(f"abcd refresh", shell=True, check=True) + + summary = subprocess.run( + "abcd summary -p energy -q 'energy:[-100 TO -99]'", + shell=True, + check=True, + capture_output=True, + text=True, + ) + assert "Total number of configurations: 1" in summary.stdout + + summary = subprocess.run( + "abcd summary -p energy -q 'energy:[-102 TO -99]'", + shell=True, + check=True, + capture_output=True, + text=True, + ) + assert "Total number of configurations: 2" in summary.stdout + subprocess.run(f"abcd delete -q 'test_data' -y", shell=True) diff --git a/tests/test_mongomock.py b/tests/test_mongomock.py new file mode 100644 index 00000000..5444fe59 --- /dev/null +++ b/tests/test_mongomock.py @@ -0,0 +1,44 @@ +from io import StringIO +import logging +import unittest + +from ase.io import read +from ase.atoms import Atoms +import mongomock +import pytest + +from abcd import ABCD + + +class TestMongoMock: + @pytest.fixture(autouse=True) + @mongomock.patch(servers=(("localhost", 27017),)) + def abcd(self): + logging.basicConfig(level=logging.INFO) + url = "mongodb://localhost" + mongo_abcd = ABCD.from_url(url) + mongo_abcd.print_info() + return mongo_abcd + + def test_info(self, abcd): + print(abcd.info()) + + def test_push(self, abcd): + xyz = StringIO( + """2 + Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" + Si 0.00000000 0.00000000 0.00000000 + Si 0.00000000 0.00000000 0.00000000 + """ + ) + + atoms = read(xyz, format="extxyz") + assert isinstance(atoms, Atoms) + atoms.set_cell([1, 1, 1]) + + abcd.destroy() + abcd.push(atoms) + new = list(abcd.get_atoms())[0] + + assert atoms == new + abcd.destroy() diff --git a/tests/test_opensearch.py b/tests/test_opensearch.py new file mode 100644 index 00000000..1735bca9 --- /dev/null +++ b/tests/test_opensearch.py @@ -0,0 +1,533 @@ +from io import StringIO +import logging +import os +from time import sleep + +from ase.atoms import Atoms +from ase.io import read +from opensearchpy.exceptions import ConnectionError +import pytest + +from abcd import ABCD +from abcd.backends.atoms_opensearch import AtomsModel, OpenSearchDatabase + +NOT_GTHUB_ACTIONS = True +if os.getenv("GITHUB_ACTIONS") == "true": + NOT_GTHUB_ACTIONS = False + + +@pytest.mark.skipif(NOT_GTHUB_ACTIONS, reason="Not running via GitHub Actions") +class TestOpenSearch: + """Testing live OpenSearch database functions.""" + + @pytest.fixture(autouse=True) + def abcd(self): + """Set up OpenSearch database connection.""" + security_enabled = os.getenv("security_enabled") == "true" + self.port = int(os.environ["port"]) + self.host = "localhost" + if os.environ["opensearch-version"] == "latest": + credential = "admin:myStrongPassword123!" + else: + credential = "admin:admin" + + logging.basicConfig(level=logging.INFO) + + url = f"opensearch://{credential}@{self.host}:{self.port}" + try: + abcd_opensearch = ABCD.from_url( + url, + index_name="test_index", + use_ssl=security_enabled, + ) + except (ConnectionError, ConnectionResetError): + sleep(10) + abcd_opensearch = ABCD.from_url( + url, + index_name="test_index", + use_ssl=security_enabled, + ) + + assert isinstance(abcd_opensearch, OpenSearchDatabase) + return abcd_opensearch + + def push_data(self, abcd): + """Helper function to upload an example xyz file to the database.""" + xyz = StringIO( + """2 + Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" + Si 0.00000000 0.00000000 0.00000000 + Si 0.00000000 0.00000000 0.00000000 + """ + ) + + atoms = read(xyz, format="extxyz") + assert isinstance(atoms, Atoms) + atoms.set_cell([1, 1, 1]) + abcd.push(atoms) + abcd.refresh() + + def test_info(self, abcd): + """Test printing database info.""" + abcd.destroy() + abcd.create() + abcd.refresh() + abcd.print_info() + + info = { + "host": self.host, + "port": self.port, + "db": "abcd", + "index": "test_index", + "number of confs": 0, + "type": "opensearch", + } + assert abcd.info() == info + + def test_destroy(self, abcd): + """Test destroying database index.""" + abcd.destroy() + abcd.create() + abcd.refresh() + assert abcd.client.indices.exists("test_index") is True + + abcd.destroy() + assert abcd.client.indices.exists("test_index") is False + + def test_create(self, abcd): + """Test creating database index.""" + abcd.destroy() + abcd.create() + abcd.refresh() + assert abcd.client.indices.exists("test_index") is True + assert abcd.client.indices.exists("fake_index") is False + + def test_push(self, abcd): + """Test pushing atoms objects to database individually.""" + abcd.destroy() + abcd.create() + xyz_1 = StringIO( + """2 + Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" + Si 0.00000000 0.00000000 0.00000000 + Si 0.00000000 0.00000000 0.00000000 + """ + ) + atoms_1 = read(xyz_1, format="extxyz") + assert isinstance(atoms_1, Atoms) + atoms_1.set_cell([1, 1, 1]) + abcd.push(atoms_1) + + xyz_2 = StringIO( + """2 + Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" + W 0.00000000 0.00000000 0.00000000 + W 0.00000000 0.00000000 0.00000000 + """ + ) + atoms_2 = read(xyz_2, format="extxyz") + assert isinstance(atoms_2, Atoms) + atoms_2.set_cell([1, 1, 1]) + + abcd.refresh() + result = AtomsModel( + None, + None, + abcd.client.search(index="test_index")["hits"]["hits"][0]["_source"], + ).to_ase() + assert atoms_1 == result + assert atoms_2 != result + + def test_delete(self, abcd): + """Test deleting all documents from database.""" + abcd.destroy() + abcd.create() + self.push_data(abcd) + self.push_data(abcd) + abcd.refresh() + + assert abcd.count() == 2 + abcd.delete() + assert abcd.client.indices.exists("test_index") is True + abcd.refresh() + assert abcd.count() == 0 + + def test_bulk(self, abcd): + """Test pushing atoms object to database together.""" + abcd.destroy() + abcd.create() + xyz_1 = StringIO( + """2 + Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" + Si 0.00000000 0.00000000 0.00000000 + Si 0.00000000 0.00000000 0.00000000 + """ + ) + atoms_1 = read(xyz_1, format="extxyz") + assert isinstance(atoms_1, Atoms) + atoms_1.set_cell([1, 1, 1]) + + xyz_2 = StringIO( + """1 + Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" + Si 0.00000000 0.00000000 0.00000000 + """ + ) + atoms_2 = read(xyz_2, format="extxyz") + assert isinstance(atoms_2, Atoms) + atoms_2.set_cell([1, 1, 1]) + + atoms_list = [] + atoms_list.append(atoms_1) + atoms_list.append(atoms_2) + abcd.push(atoms_list) + + abcd.refresh() + assert abcd.count() == 2 + result_1 = AtomsModel( + None, + None, + abcd.client.search(index="test_index")["hits"]["hits"][0]["_source"], + ).to_ase() + result_2 = AtomsModel( + None, + None, + abcd.client.search(index="test_index")["hits"]["hits"][1]["_source"], + ).to_ase() + assert atoms_1 == result_1 + assert atoms_2 == result_2 + + def test_count(self, abcd): + """Test counting the number of documents in the database.""" + abcd.destroy() + abcd.create() + self.push_data(abcd) + self.push_data(abcd) + assert abcd.count() == 2 + + def test_property(self, abcd): + """Test getting values of a property from the database.""" + abcd.destroy() + abcd.create() + + xyz_1 = StringIO( + """2 + Properties=species:S:1:pos:R:3 energy=-5.0 prop_1="test_1" + Si 0.00000000 0.00000000 0.00000000 + Si 0.00000000 0.00000000 0.00000000 + """ + ) + + atoms_1 = read(xyz_1, format="extxyz") + assert isinstance(atoms_1, Atoms) + atoms_1.set_cell([1, 1, 1]) + abcd.push(atoms_1, store_calc=False) + + xyz_2 = StringIO( + """2 + Properties=species:S:1:pos:R:3 energy=-10.0 prop_2="test_2" + Si 0.00000000 0.00000000 0.00000000 + Si 0.00000000 0.00000000 0.00000000 + """ + ) + + atoms_2 = read(xyz_2, format="extxyz") + assert isinstance(atoms_2, Atoms) + atoms_2.set_cell([1, 1, 1]) + abcd.push(atoms_2, store_calc=False) + + abcd.refresh() + prop = abcd.property("prop_1") + expected_prop = ["test_1"] + assert prop == expected_prop + + prop = abcd.property("energy") + expected_prop = [-5.0, -10.0] + assert prop[0] == expected_prop[0] + assert prop[1] == expected_prop[1] + + def test_properties(self, abcd): + """Test getting all properties from the database.""" + abcd.destroy() + abcd.create() + self.push_data(abcd) + props = abcd.properties() + expected_props = { + "info": ["_vtk_test", "cell", "formula", "n_atoms", "pbc", "s", "volume"], + "derived": [ + "elements", + "hash", + "hash_structure", + "modified", + "uploaded", + "username", + "volume", + ], + "arrays": ["numbers", "positions"], + } + assert props == expected_props + + def test_count_property(self, abcd): + """Test counting values of specified properties from the database.""" + abcd.destroy() + abcd.create() + + xyz_1 = StringIO( + """2 + Properties=species:S:1:pos:R:3 s="sadf" prop_1="1" pbc="F F F" + Si 0.00000000 0.00000000 0.00000000 + Si 0.00000000 0.00000000 0.00000000 + """ + ) + + atoms_1 = read(xyz_1, format="extxyz") + assert isinstance(atoms_1, Atoms) + atoms_1.set_cell([1, 1, 1]) + abcd.push(atoms_1) + + xyz_2 = StringIO( + """1 + Properties=species:S:1:pos:R:3 s="sadf" prop_2="2" pbc="F F F" + Si 0.00000000 0.00000000 0.00000000 + """ + ) + + atoms_2 = read(xyz_2, format="extxyz") + assert isinstance(atoms_2, Atoms) + atoms_2.set_cell([1, 1, 1]) + abcd.push(atoms_2) + + abcd.refresh() + assert abcd.count_property("prop_1") == {1: 1} + assert abcd.count_property("n_atoms") == {1: 1, 2: 1} + assert abcd.count_property("volume") == {1.0: 2} + + def test_count_properties(self, abcd): + """Test counting appearences of each property in documents in the database.""" + abcd.destroy() + abcd.create() + + xyz_1 = StringIO( + """2 + Properties=species:S:1:pos:R:3 s="sadf" prop_1="test_1" pbc="F F F" + Si 0.00000000 0.00000000 0.00000000 + Si 0.00000000 0.00000000 0.00000000 + """ + ) + + atoms_1 = read(xyz_1, format="extxyz") + assert isinstance(atoms_1, Atoms) + atoms_1.set_cell([1, 1, 1]) + abcd.push(atoms_1) + + xyz_2 = StringIO( + """2 + Properties=species:S:1:pos:R:3 s="sadf" prop_2="test_2" pbc="F F F" + Si 0.00000000 0.00000000 0.00000000 + Si 0.00000000 0.00000000 0.00000000 + """ + ) + + atoms_2 = read(xyz_2, format="extxyz") + assert isinstance(atoms_2, Atoms) + atoms_2.set_cell([1, 1, 1]) + abcd.push(atoms_2) + + abcd.refresh() + props = abcd.count_properties() + expected_counts = { + "prop_1": {"count": 1, "category": "info", "dtype": "scalar(str)"}, + "prop_2": {"count": 1, "category": "info", "dtype": "scalar(str)"}, + "cell": {"count": 2, "category": "info", "dtype": "array(float)"}, + "elements": {"count": 2, "category": "derived", "dtype": "scalar(dict)"}, + "formula": {"count": 2, "category": "info", "dtype": "scalar(str)"}, + "hash": {"count": 2, "category": "derived", "dtype": "scalar(str)"}, + "hash_structure": { + "count": 2, + "category": "derived", + "dtype": "scalar(str)", + }, + "modified": {"count": 2, "category": "derived", "dtype": "scalar(str)"}, + "n_atoms": {"count": 2, "category": "info", "dtype": "scalar(int)"}, + "numbers": {"count": 2, "category": "arrays", "dtype": "vector(int, N)"}, + "pbc": {"count": 2, "category": "info", "dtype": "vector(bool)"}, + "positions": { + "count": 2, + "category": "arrays", + "dtype": "array(float, N x 3)", + }, + "s": {"count": 2, "category": "info", "dtype": "scalar(str)"}, + "uploaded": {"count": 2, "category": "derived", "dtype": "scalar(str)"}, + "username": {"count": 2, "category": "derived", "dtype": "scalar(str)"}, + "volume": {"count": 2, "category": "derived", "dtype": "scalar(float)"}, + } + + assert props == expected_counts + + def test_add_property(self, abcd): + """Test adding a property to documents in the database.""" + abcd.destroy() + abcd.create() + self.push_data(abcd) + abcd.add_property({"TEST_PROPERTY": "TEST_VALUE"}) + + abcd.refresh() + data = abcd.client.search(index="test_index") + assert data["hits"]["hits"][0]["_source"]["TEST_PROPERTY"] == "TEST_VALUE" + assert ( + "TEST_PROPERTY" + in data["hits"]["hits"][0]["_source"]["derived"]["info_keys"] + ) + + def test_rename_property(self, abcd): + """Test renaming a property for documents in the database.""" + abcd.destroy() + abcd.create() + self.push_data(abcd) + abcd.add_property({"TEST_PROPERTY": "TEST_VALUE"}) + abcd.refresh() + abcd.rename_property("TEST_PROPERTY", "NEW_PROPERTY") + abcd.refresh() + + data = abcd.client.search(index="test_index") + assert data["hits"]["hits"][0]["_source"]["NEW_PROPERTY"] == "TEST_VALUE" + + def test_delete_property(self, abcd): + """Test deleting a property from documents in the database.""" + abcd.destroy() + abcd.create() + self.push_data(abcd) + + abcd.add_property({"TEST_PROPERTY": "TEST_VALUE"}) + abcd.refresh() + data = abcd.client.search(index="test_index") + assert data["hits"]["hits"][0]["_source"]["TEST_PROPERTY"] == "TEST_VALUE" + + abcd.delete_property("TEST_PROPERTY") + abcd.refresh() + data = abcd.client.search(index="test_index") + with pytest.raises(KeyError): + data["hits"]["hits"][0]["_source"]["TEST_PROPERTY"] + assert ( + "TEST_PROPERTY" + not in data["hits"]["hits"][0]["_source"]["derived"]["info_keys"] + ) + + def test_get_items(self, abcd): + """Test getting a dictionary of values from documents in the database.""" + abcd.destroy() + abcd.create() + self.push_data(abcd) + + expected_items = { + "_id": None, + "n_atoms": 2, + "numbers": [14, 14], + "_vtk_test": "t _ e s t", + "positions": [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0]], + "cell": [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]], + "pbc": [False, False, False], + "volume": 1.0, + "hash_structure": None, + "s": "sadf", + "elements": {"14": 2}, + "uploaded": None, + "formula": "Si2", + "modified": None, + "derived": { + "info_keys": [ + "s", + "n_atoms", + "_vtk_test", + "cell", + "pbc", + "formula", + "volume", + ], + "derived_keys": [ + "elements", + "username", + "uploaded", + "modified", + "volume", + "hash_structure", + "hash", + ], + "arrays_keys": ["numbers", "positions"], + "results_keys": [], + }, + "hash": None, + "username": None, + } + + abcd.refresh() + items = list(abcd.get_items())[0] + + for key in expected_items: + if key not in [ + "_id", + "hash_structure", + "uploaded", + "modified", + "hash", + "username", + ]: + if isinstance(expected_items[key], dict): + for dict_key in expected_items[key]: + if isinstance(expected_items[key][dict_key], list): + assert set(expected_items[key][dict_key]) == set( + items[key][dict_key] + ) + else: + assert expected_items[key][dict_key] == items[key][dict_key] + else: + assert expected_items[key] == items[key] + + def test_get_atoms(self, abcd): + """Test getting values from documents in the database as Atoms objects.""" + abcd.destroy() + abcd.create() + self.push_data(abcd) + expected_atoms = Atoms(symbols="Si2", pbc=False, cell=[1.0, 1.0, 1.0]) + assert expected_atoms == list(abcd.get_atoms())[0] + + def test_query(self, abcd): + """Test querying documents in the database.""" + abcd.destroy() + abcd.create() + + xyz_1 = StringIO( + """2 + Properties=species:S:1:pos:R:3 s="sadf" prop_1="test_1" pbc="F F F" + Si 0.00000000 0.00000000 0.00000000 + Si 0.00000000 0.00000000 0.00000000 + """ + ) + + atoms_1 = read(xyz_1, format="extxyz") + assert isinstance(atoms_1, Atoms) + atoms_1.set_cell([1, 1, 1]) + abcd.push(atoms_1) + + xyz_2 = StringIO( + """2 + Properties=species:S:1:pos:R:3 s="sadf" prop_2="test_2" pbc="F F F" + Si 0.00000000 0.00000000 0.00000000 + Si 0.00000000 0.00000000 0.00000000 + """ + ) + + atoms_2 = read(xyz_2, format="extxyz") + assert isinstance(atoms_2, Atoms) + atoms_2.set_cell([1, 1, 1]) + abcd.push(atoms_2) + abcd.refresh() + + query_dict = {"match": {"n_atoms": 2}} + query_all = "volume: [0 TO 10]" + query_1 = "prop_1: *" + query_2 = "prop_2: *" + assert abcd.count(query_dict) == 2 + assert abcd.count(query_all) == 2 + assert abcd.count(query_1) == 1 + assert abcd.count(query_2) == 1 diff --git a/tests/test_opensearch_mock.py b/tests/test_opensearch_mock.py new file mode 100644 index 00000000..94dd6711 --- /dev/null +++ b/tests/test_opensearch_mock.py @@ -0,0 +1,160 @@ +from io import StringIO +import logging +import os + +from ase.atoms import Atoms +from ase.io import read +from openmock import openmock +import pytest + +from abcd import ABCD +from abcd.backends.atoms_opensearch import AtomsModel, OpenSearchDatabase + + +class TestOpenSearchMock: + """Testing mock OpenSearch database functions.""" + + @pytest.fixture(autouse=True) + @openmock + def abcd(self): + """Set up database connection.""" + + if "port" in os.environ: + port = int(os.environ["port"]) + else: + port = 9200 + host = "localhost" + security_enabled = os.getenv("security_enabled") == "true" + if os.environ["opensearch-version"] == "latest": + credential = "admin:myStrongPassword123!" + else: + credential = "admin:admin" + + logging.basicConfig(level=logging.INFO) + + url = f"opensearch://{credential}@{host}:{port}" + opensearch_abcd = ABCD.from_url( + url, + index_name="test_index", + use_ssl=security_enabled, + ) + assert isinstance(opensearch_abcd, OpenSearchDatabase) + return opensearch_abcd + + def test_destroy(self, abcd): + """ + Test destroying database index. + """ + assert abcd.client.indices.exists("test_index") is True + abcd.destroy() + assert abcd.client.indices.exists("test_index") is False + + def test_create(self, abcd): + """ + Test creating database index. + """ + abcd.destroy() + abcd.create() + assert abcd.client.indices.exists("test_index") is True + abcd.client.indices.exists("fake_index") is False + + def test_push(self, abcd): + """ + Test pushing atoms objects to database individually. + """ + abcd.destroy() + abcd.create() + xyz_1 = StringIO( + """2 + Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" + Si 0.00000000 0.00000000 0.00000000 + Si 0.00000000 0.00000000 0.00000000 + """ + ) + atoms_1 = read(xyz_1, format="extxyz") + assert isinstance(atoms_1, Atoms) + atoms_1.set_cell([1, 1, 1]) + abcd.push(atoms_1) + + xyz_2 = StringIO( + """2 + Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" + W 0.00000000 0.00000000 0.00000000 + W 0.00000000 0.00000000 0.00000000 + """ + ) + atoms_2 = read(xyz_2, format="extxyz") + assert isinstance(atoms_2, Atoms) + atoms_2.set_cell([1, 1, 1]) + + abcd.refresh() + result = AtomsModel( + dict=abcd.client.search(index="test_index")["hits"]["hits"][0]["_source"], + ).to_ase() + assert atoms_1 == result + assert atoms_2 != result + + def test_bulk(self, abcd): + """ + Test pushing atoms object to database together. + """ + abcd.destroy() + abcd.create() + xyz_1 = StringIO( + """2 + Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" + Si 0.00000000 0.00000000 0.00000000 + Si 0.00000000 0.00000000 0.00000000 + """ + ) + atoms_1 = read(xyz_1, format="extxyz") + assert isinstance(atoms_1, Atoms) + atoms_1.set_cell([1, 1, 1]) + + xyz_2 = StringIO( + """1 + Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" + Si 0.00000000 0.00000000 0.00000000 + """ + ) + atoms_2 = read(xyz_2, format="extxyz") + assert isinstance(atoms_2, Atoms) + atoms_2.set_cell([1, 1, 1]) + + atoms_list = [] + atoms_list.append(atoms_1) + atoms_list.append(atoms_2) + abcd.push(atoms_list) + abcd.refresh() + assert abcd.count() == 2 + + result_1 = AtomsModel( + dict=abcd.client.search(index="test_index")["hits"]["hits"][0]["_source"], + ).to_ase() + result_2 = AtomsModel( + dict=abcd.client.search(index="test_index")["hits"]["hits"][1]["_source"], + ).to_ase() + assert atoms_1 == result_1 + assert atoms_2 == result_2 + + def test_count(self, abcd): + """ + Test counting the number of documents in the database. + """ + abcd.destroy() + abcd.create() + xyz = StringIO( + """2 + Properties=species:S:1:pos:R:3 s="sadf" _vtk_test="t _ e s t" pbc="F F F" + Si 0.00000000 0.00000000 0.00000000 + Si 0.00000000 0.00000000 0.00000000 + """ + ) + + atoms = read(xyz, format="extxyz") + assert isinstance(atoms, Atoms) + atoms.set_cell([1, 1, 1]) + abcd.push(atoms) + abcd.push(atoms) + abcd.refresh() + assert abcd.count() == 2 diff --git a/tests/test_parsers.py b/tests/test_parsers.py index 5a2211e0..708809e1 100644 --- a/tests/test_parsers.py +++ b/tests/test_parsers.py @@ -52,6 +52,7 @@ def test_string(self, parser, string, expected): ("true_value", {"true_value": True}), ("true_value_long = true", {"true_value_long": True}), ("false_value = F", {"false_value": False}), + ("false_value_colon: F", {"false_value_colon": False}), ], ) def test_boolean(self, parser, string, expected): @@ -65,6 +66,7 @@ def test_boolean(self, parser, string, expected): ("floating=1.1", {"floating": 1.1}), ("scientific_float=1.2e7", {"scientific_float": 1.2e7}), ("scientific_float_2=5e-6", {"scientific_float_2": 5e-6}), + ("floating_colon: 3.14", {"floating_colon": 3.14}), ], ) def test_numbers(self, parser, string, expected): @@ -86,6 +88,7 @@ def test_numbers(self, parser, string, expected): "array_bool_commas=[T, T, F, T]", {"array_bool_commas": [True, True, False, True]}, ), + ("int_array_colon: {4 2}", {"int_array_colon": [4, 2]}), ], ) def test_arrays(self, parser, string, expected): @@ -124,6 +127,17 @@ def test_composite(self, parser): out = parser.parse(composite_string) assert out == composite_expected + @pytest.mark.parametrize( + "string, expected", + [ + ('colon_string:"astring"', {"colon_string": "astring"}), + ('colon_string_spaces : "astring"', {"colon_string_spaces": "astring"}), + ], + ) + def test_colon_key_value_pairs(self, parser, string, expected): + """Key value pairs separated by colons""" + assert expected == parser.parse(string) + @pytest.mark.skip @pytest.mark.parametrize( "string", diff --git a/tests/test_properties.py b/tests/test_properties.py new file mode 100644 index 00000000..1578fc64 --- /dev/null +++ b/tests/test_properties.py @@ -0,0 +1,155 @@ +import os + +from pandas import DataFrame +import pytest + +from abcd.backends.atoms_properties import Properties + + +class TestProperties: + """Testing properties data reader""" + + @pytest.fixture(autouse=True) + def property(self): + """Load example data file.""" + class_path = os.path.normpath(os.path.abspath(__file__)) + data_file = os.path.dirname(class_path) + "/data/examples.csv" + return Properties(data_file) + + def test_dataframe(self, property): + """ + Test data correctly stored in pandas DataFrame. + """ + assert isinstance(property.df, DataFrame) + assert len(property.df) == 3 + + def test_specify_units(self, property): + """ + Test units can be specified manually, if they match existing fields. + """ + input_units_1 = {"Integers": "items", "Floating": "seconds"} + properties_1 = Properties( + data_file=property.data_file, + units=input_units_1, + ) + assert properties_1.units == input_units_1 + + input_units_2 = {"Fake": "m"} + with pytest.raises(ValueError): + properties_1 = Properties( + data_file=property.data_file, + units=input_units_2, + ) + + def test_infer_units(self, property): + """ + Test units can be inferred from field names. + """ + properties = Properties( + data_file=property.data_file, + infer_units=True, + ) + expected_units = {"Comma units": "m", "Bracket units": "s"} + expected_fields = [ + "Text", + "Integers", + "Floating", + "Boolean", + "Missing data", + "Comma units", + "Bracket units", + ] + assert properties.units == expected_units + assert list(properties.df.columns.values) == expected_fields + + def test_struct_file(self, property): + """ + Test structure file names can be inferred from a field. + """ + struct_file_template = "test_{struct_name}_file.txt" + struct_name_label = "Text" + properties_1 = Properties( + data_file=property.data_file, + store_struct_file=True, + struct_file_template=struct_file_template, + struct_name_label=struct_name_label, + ) + expected_struct_files = [ + "test_Some_file.txt", + "test_test_file.txt", + "test_data_file.txt", + ] + assert isinstance(properties_1.struct_files, list) + for i, file in enumerate(expected_struct_files): + assert properties_1.struct_files[i] == file + + invalid_template = "invalid_template" + with pytest.raises(ValueError): + Properties( + data_file=property.data_file, + store_struct_file=True, + struct_file_template=invalid_template, + struct_name_label=struct_name_label, + ) + + invalid_label = "label" + with pytest.raises(ValueError): + Properties( + data_file=property.data_file, + store_struct_file=True, + struct_file_template=struct_file_template, + struct_name_label=invalid_label, + ) + + def test_to_list(self, property): + """ + Test dataframe can be converted into a list of properties. + """ + assert len(property.to_list()) == 3 + assert isinstance(property.to_list(), list) + assert isinstance(property.to_list()[0], dict) + expected_property = { + "Text": "Some", + "Integers": 1, + "Floating": 0.01, + "Boolean": True, + "Missing data": "Missing", + "Comma units, m": 0, + "Bracket units (s)": 0, + } + assert property.to_list()[0] == expected_property + + def test_missing_data(self, property): + """ + Test missing data is not included in properties. + """ + expected_property = { + "Text": "test", + "Integers": 2, + "Floating": 0.1, + "Boolean": False, + "Comma units, m": 1, + "Bracket units (s)": 1, + } + assert property.to_list()[1] == expected_property + + def test_to_list_units(self, property): + """ + Test units are included in properties when converting to a list. + """ + properties_1 = Properties( + data_file=property.data_file, + infer_units=True, + ) + expected_units = {"Comma units": "m", "Bracket units": "s"} + expected_property = { + "Text": "Some", + "Integers": 1, + "Floating": 0.01, + "Boolean": True, + "Missing data": "Missing", + "Comma units": 0, + "Bracket units": 0, + "units": expected_units, + } + assert properties_1.to_list()[0] == expected_property diff --git a/tutorials/abcd_benchmarking.ipynb b/tutorials/abcd_benchmarking.ipynb new file mode 100644 index 00000000..aa0fdc3f --- /dev/null +++ b/tutorials/abcd_benchmarking.ipynb @@ -0,0 +1,1147 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Benchmarking OpenSearch performance against MongoDB " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "from abcd import ABCD" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "OpenSearchDatabase(url=localhost:9200, index=atoms) \n" + ] + } + ], + "source": [ + "os_url = 'opensearch://admin:admin@localhost:9200'\n", + "os_abcd = ABCD.from_url(os_url)\n", + "\n", + "print(os_abcd)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MongoDatabase(url=localhost:27017, db=abcd, collection=atoms)\n" + ] + } + ], + "source": [ + "mongo_url = 'mongodb://localhost:27017'\n", + "mongo_abcd = ABCD.from_url(mongo_url)\n", + "\n", + "print(mongo_abcd)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Data in each database consists of ~400,000 structures, made up of 2055 structures repeated 192 times. Of these, half were uploaded only as atoms objects, and half were uploaded with extra information added from a csv file." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================ ABCD OpenSearch =================\n", + " type: opensearch\n", + " host: localhost\n", + " port: 9200\n", + " db: abcd\n", + " index: atoms\n", + "number of confs: 394560\n", + " type: opensearch\n", + "CPU times: user 0 ns, sys: 2.34 ms, total: 2.34 ms\n", + "Wall time: 7.86 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "os_abcd.print_info()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================== ABCD MongoDB ==================\n", + " type: mongodb\n", + " host: localhost\n", + " port: 27017\n", + " db: abcd\n", + "collection: atoms\n", + "number of confs: 394560\n", + " type: mongodb\n", + "CPU times: user 0 ns, sys: 2.73 ms, total: 2.73 ms\n", + "Wall time: 323 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "mongo_abcd.print_info()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 67.1 ms, sys: 8.17 ms, total: 75.3 ms\n", + "Wall time: 207 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "{'info': ['1aromatico-up',\n", + " '2D',\n", + " '2aromatici-up',\n", + " '5-m-rings',\n", + " '5m-ring-leg2met',\n", + " '6m-rings',\n", + " 'Accessible Surface Area',\n", + " 'Band_gap',\n", + " 'CN-M',\n", + " 'COOM',\n", + " 'Cell volume',\n", + " 'Crit: metal',\n", + " 'Crit: pi-pi stacking',\n", + " 'Crit: redox active linker',\n", + " 'Crit: redox match',\n", + " 'Criteria#',\n", + " 'Density',\n", + " 'Dos at CBM',\n", + " 'Dos at Fermi energy',\n", + " 'Dos at VBM',\n", + " 'HSE band gap',\n", + " 'LCD',\n", + " 'M-C-C-TRIANG',\n", + " 'M-H2O-M',\n", + " 'M-N-NM-N-M',\n", + " 'M-h2o',\n", + " 'MOF_name',\n", + " 'Metal',\n", + " 'Metal 2',\n", + " 'Metal 3',\n", + " 'Metal density',\n", + " 'Metals number',\n", + " 'Multiplier_Sum',\n", + " 'N3--NCN up',\n", + " 'PLD',\n", + " 'Space_group',\n", + " 'Space_group#',\n", + " 'Temp',\n", + " 'Volume Fraction',\n", + " 'Year',\n", + " 'Zprime',\n", + " 'author',\n", + " 'benzene',\n", + " 'cell',\n", + " 'energy',\n", + " 'formula',\n", + " 'metal-N',\n", + " 'metal-O',\n", + " 'metal-S',\n", + " 'metal-halogen',\n", + " 'n_atoms',\n", + " 'pbc',\n", + " 'pyridine',\n", + " 'pyrimidine',\n", + " 'units',\n", + " 'volume',\n", + " 'without ions'],\n", + " 'derived': ['elements',\n", + " 'hash',\n", + " 'hash_structure',\n", + " 'modified',\n", + " 'uploaded',\n", + " 'username',\n", + " 'volume'],\n", + " 'arrays': ['forces', 'numbers', 'positions']}" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "os_abcd.properties()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 4.59 ms, sys: 3.13 ms, total: 7.72 ms\n", + "Wall time: 6.13 s\n" + ] + }, + { + "data": { + "text/plain": [ + "{'info': ['Dos at Fermi energy',\n", + " 'pbc',\n", + " 'Metal 3',\n", + " 'Multiplier_Sum',\n", + " '1aromatico-up',\n", + " 'Crit: redox match',\n", + " 'volume',\n", + " 'without ions',\n", + " 'Density',\n", + " 'metal-halogen',\n", + " 'MOF_name',\n", + " 'pyrimidine',\n", + " 'Dos at VBM',\n", + " 'COOM',\n", + " 'Metals number',\n", + " 'Crit: redox active linker',\n", + " 'Accessible Surface Area',\n", + " 'Metal density',\n", + " 'M-C-C-TRIANG',\n", + " 'HSE band gap',\n", + " 'metal-O',\n", + " 'M-h2o',\n", + " 'Dos at CBM',\n", + " 'PLD',\n", + " 'metal-S',\n", + " '2D',\n", + " 'energy',\n", + " 'Band_gap',\n", + " 'M-N-NM-N-M',\n", + " 'N3--NCN up',\n", + " 'Space_group',\n", + " 'cell',\n", + " 'Crit: pi-pi stacking',\n", + " '5m-ring-leg2met',\n", + " 'LCD',\n", + " 'Volume Fraction',\n", + " 'Criteria#',\n", + " 'formula',\n", + " 'Zprime',\n", + " 'Crit: metal',\n", + " '5-m-rings',\n", + " 'M-H2O-M',\n", + " 'Cell volume',\n", + " 'Metal 2',\n", + " 'author',\n", + " '2aromatici-up',\n", + " 'benzene',\n", + " 'metal-N',\n", + " 'CN-M',\n", + " '6m-rings',\n", + " 'units',\n", + " 'n_atoms',\n", + " 'Year',\n", + " 'Space_group#',\n", + " 'pyridine',\n", + " 'Temp',\n", + " 'Metal'],\n", + " 'arrays': ['forces', 'numbers', 'positions'],\n", + " 'derived': ['username',\n", + " 'volume',\n", + " 'uploaded',\n", + " 'hash',\n", + " 'modified',\n", + " 'elements',\n", + " 'hash_structure']}" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "mongo_abcd.properties()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 98.5 ms, sys: 22.1 ms, total: 121 ms\n", + "Wall time: 446 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "{'1aromatico-up': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " '2D': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " '2aromatici-up': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " '5-m-rings': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " '5m-ring-leg2met': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " '6m-rings': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Accessible Surface Area': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'Band_gap': {'count': 197280, 'category': 'info', 'dtype': 'scalar(float)'},\n", + " 'CN-M': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'COOM': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Cell volume': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Crit: metal': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Crit: pi-pi stacking': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'Crit: redox active linker': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'Crit: redox match': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'Criteria#': {'count': 197280, 'category': 'info', 'dtype': 'scalar(int)'},\n", + " 'Density': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Dos at CBM': {'count': 197280, 'category': 'info', 'dtype': 'scalar(float)'},\n", + " 'Dos at Fermi energy': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(float)'},\n", + " 'Dos at VBM': {'count': 197280, 'category': 'info', 'dtype': 'scalar(float)'},\n", + " 'HSE band gap': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'LCD': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'M-C-C-TRIANG': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'M-H2O-M': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'M-N-NM-N-M': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'M-h2o': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'MOF_name': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Metal': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Metal 2': {'count': 9034, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Metal 3': {'count': 409, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Metal density': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'Metals number': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'Multiplier_Sum': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'N3--NCN up': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'PLD': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Space_group': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Space_group#': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Temp': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Volume Fraction': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'Year': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Zprime': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'author': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'benzene': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'cell': {'count': 394560, 'category': 'info', 'dtype': 'array(float)'},\n", + " 'elements': {'count': 394560, 'category': 'derived', 'dtype': 'scalar(dict)'},\n", + " 'energy': {'count': 394560, 'category': 'info', 'dtype': 'scalar(float)'},\n", + " 'forces': {'count': 394560,\n", + " 'category': 'arrays',\n", + " 'dtype': 'array(float, N x 3)'},\n", + " 'formula': {'count': 394560, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'hash': {'count': 394560, 'category': 'derived', 'dtype': 'scalar(str)'},\n", + " 'hash_structure': {'count': 394560,\n", + " 'category': 'derived',\n", + " 'dtype': 'scalar(str)'},\n", + " 'metal-N': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'metal-O': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'metal-S': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'metal-halogen': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'modified': {'count': 394560, 'category': 'derived', 'dtype': 'scalar(str)'},\n", + " 'n_atoms': {'count': 394560, 'category': 'info', 'dtype': 'scalar(int)'},\n", + " 'numbers': {'count': 394560, 'category': 'arrays', 'dtype': 'vector(int, N)'},\n", + " 'pbc': {'count': 394560, 'category': 'info', 'dtype': 'vector(bool)'},\n", + " 'positions': {'count': 394560,\n", + " 'category': 'arrays',\n", + " 'dtype': 'array(float, N x 3)'},\n", + " 'pyridine': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'pyrimidine': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'units': {'count': 197280, 'category': 'info', 'dtype': 'scalar(dict)'},\n", + " 'uploaded': {'count': 394560, 'category': 'derived', 'dtype': 'scalar(str)'},\n", + " 'username': {'count': 394560, 'category': 'derived', 'dtype': 'scalar(str)'},\n", + " 'volume': {'count': 394560, 'category': 'derived', 'dtype': 'scalar(float)'},\n", + " 'without ions': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'}}" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "os_abcd.count_properties()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 78.6 ms, sys: 10 ms, total: 88.6 ms\n", + "Wall time: 21.7 s\n" + ] + }, + { + "data": { + "text/plain": [ + "{'Dos at Fermi energy': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(float)'},\n", + " 'pbc': {'count': 394560, 'category': 'info', 'dtype': 'vector(bool)'},\n", + " 'Metal 3': {'count': 409, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Multiplier_Sum': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " '1aromatico-up': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'Crit: redox match': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'volume': {'count': 394560, 'category': 'derived', 'dtype': 'scalar(float)'},\n", + " 'without ions': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Density': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'metal-halogen': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'MOF_name': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'pyrimidine': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Dos at VBM': {'count': 197280, 'category': 'info', 'dtype': 'scalar(float)'},\n", + " 'COOM': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Metals number': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'Crit: redox active linker': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'Accessible Surface Area': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'Metal density': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'M-C-C-TRIANG': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'HSE band gap': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'metal-O': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'M-h2o': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Dos at CBM': {'count': 197280, 'category': 'info', 'dtype': 'scalar(float)'},\n", + " 'PLD': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'metal-S': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " '2D': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'energy': {'count': 394560, 'category': 'info', 'dtype': 'scalar(float)'},\n", + " 'Space_group': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'M-N-NM-N-M': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'N3--NCN up': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Band_gap': {'count': 197280, 'category': 'info', 'dtype': 'scalar(float)'},\n", + " 'cell': {'count': 394560, 'category': 'info', 'dtype': 'array(float)'},\n", + " 'Crit: pi-pi stacking': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " '5m-ring-leg2met': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'LCD': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Volume Fraction': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'Criteria#': {'count': 197280, 'category': 'info', 'dtype': 'scalar(int)'},\n", + " 'formula': {'count': 394560, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Zprime': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Crit: metal': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " '5-m-rings': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'M-H2O-M': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Cell volume': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Metal 2': {'count': 9034, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'author': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " '2aromatici-up': {'count': 197280,\n", + " 'category': 'info',\n", + " 'dtype': 'scalar(str)'},\n", + " 'benzene': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'metal-N': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'CN-M': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " '6m-rings': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'units': {'count': 197280, 'category': 'info', 'dtype': 'scalar(dict)'},\n", + " 'Year': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'n_atoms': {'count': 394560, 'category': 'info', 'dtype': 'scalar(int)'},\n", + " 'Space_group#': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'pyridine': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Temp': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'Metal': {'count': 197280, 'category': 'info', 'dtype': 'scalar(str)'},\n", + " 'positions': {'count': 394560,\n", + " 'category': 'arrays',\n", + " 'dtype': 'array(float, N x 3)'},\n", + " 'forces': {'count': 394560,\n", + " 'category': 'arrays',\n", + " 'dtype': 'array(float, N x 3)'},\n", + " 'numbers': {'count': 394560, 'category': 'arrays', 'dtype': 'vector(int, N)'},\n", + " 'modified': {'count': 394560, 'category': 'derived', 'dtype': 'scalar(date)'},\n", + " 'uploaded': {'count': 394560, 'category': 'derived', 'dtype': 'scalar(date)'},\n", + " 'hash_structure': {'count': 394560,\n", + " 'category': 'derived',\n", + " 'dtype': 'scalar(str)'},\n", + " 'username': {'count': 394560, 'category': 'derived', 'dtype': 'scalar(str)'},\n", + " 'hash': {'count': 394560, 'category': 'derived', 'dtype': 'scalar(str)'},\n", + " 'elements': {'count': 394560, 'category': 'derived', 'dtype': 'scalar(dict)'}}" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "mongo_abcd.count_properties()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 3.18 ms, sys: 630 µs, total: 3.81 ms\n", + "Wall time: 8.16 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "{306: 67200,\n", + " 210: 64128,\n", + " 114: 60672,\n", + " 222: 15168,\n", + " 126: 10752,\n", + " 252: 7104,\n", + " 177: 6912,\n", + " 237: 6528,\n", + " 141: 6144,\n", + " 138: 5184,\n", + " 249: 5184,\n", + " 195: 4992,\n", + " 147: 4800,\n", + " 180: 4800,\n", + " 144: 4608,\n", + " 198: 4416,\n", + " 258: 4416,\n", + " 174: 4224,\n", + " 135: 4032,\n", + " 231: 4032,\n", + " 243: 4032,\n", + " 276: 3840,\n", + " 300: 3840,\n", + " 150: 3648,\n", + " 225: 3648,\n", + " 279: 3648,\n", + " 129: 3456,\n", + " 291: 3456,\n", + " 207: 3264,\n", + " 255: 3264,\n", + " 261: 3264,\n", + " 228: 3072,\n", + " 303: 3072,\n", + " 162: 2880,\n", + " 183: 2880,\n", + " 201: 2880,\n", + " 282: 2880,\n", + " 168: 2688,\n", + " 171: 2688,\n", + " 186: 2496,\n", + " 204: 2496,\n", + " 246: 2496,\n", + " 270: 2496,\n", + " 153: 2304,\n", + " 132: 2112,\n", + " 159: 2112,\n", + " 189: 1920,\n", + " 267: 1920,\n", + " 273: 1920,\n", + " 288: 1920,\n", + " 165: 1728,\n", + " 234: 1536,\n", + " 240: 1536,\n", + " 264: 1536,\n", + " 294: 1536,\n", + " 297: 1536,\n", + " 156: 1152,\n", + " 192: 1152,\n", + " 285: 960}" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "os_abcd.count_property(\"n_atoms\")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "from collections import Counter" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 328 ms, sys: 57.3 ms, total: 385 ms\n", + "Wall time: 1.01 s\n" + ] + }, + { + "data": { + "text/plain": [ + "Counter({114: 60672,\n", + " 210: 64128,\n", + " 306: 67200,\n", + " 141: 6144,\n", + " 180: 4800,\n", + " 144: 4608,\n", + " 138: 5184,\n", + " 171: 2688,\n", + " 207: 3264,\n", + " 195: 4992,\n", + " 150: 3648,\n", + " 129: 3456,\n", + " 204: 2496,\n", + " 177: 6912,\n", + " 168: 2688,\n", + " 132: 2112,\n", + " 192: 1152,\n", + " 126: 10752,\n", + " 147: 4800,\n", + " 189: 1920,\n", + " 135: 4032,\n", + " 174: 4224,\n", + " 165: 1728,\n", + " 186: 2496,\n", + " 201: 2880,\n", + " 153: 2304,\n", + " 198: 4416,\n", + " 183: 2880,\n", + " 162: 2880,\n", + " 156: 1152,\n", + " 159: 2112,\n", + " 252: 7104,\n", + " 279: 3648,\n", + " 222: 15168,\n", + " 273: 1920,\n", + " 300: 3840,\n", + " 240: 1536,\n", + " 303: 3072,\n", + " 291: 3456,\n", + " 288: 1920,\n", + " 246: 2496,\n", + " 249: 5184,\n", + " 243: 4032,\n", + " 231: 4032,\n", + " 234: 1536,\n", + " 237: 6528,\n", + " 270: 2496,\n", + " 264: 1536,\n", + " 267: 1920,\n", + " 255: 3264,\n", + " 258: 4416,\n", + " 282: 2880,\n", + " 276: 3840,\n", + " 297: 1536,\n", + " 261: 3264,\n", + " 225: 3648,\n", + " 228: 3072,\n", + " 285: 960,\n", + " 294: 1536})" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "Counter(mongo_abcd.property(\"n_atoms\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 37.5 ms, sys: 86 µs, total: 37.5 ms\n", + "Wall time: 52.8 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "(array([374., 82., 137., 105., 308., 185., 120., 80., 147., 363.]),\n", + " array([-61192.4609375 , -58157.79023438, -55123.11953125, -52088.44882813,\n", + " -49053.778125 , -46019.10742188, -42984.43671875, -39949.76601563,\n", + " -36915.0953125 , -33880.42460937, -30845.75390625]),\n", + " )" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "%%time\n", + "plt.hist(os_abcd.count_property(\"energy\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 1.16 s, sys: 46.4 ms, total: 1.2 s\n", + "Wall time: 2.03 s\n" + ] + }, + { + "data": { + "text/plain": [ + "(array([82560., 15744., 26688., 20160., 67776., 37440., 23232., 15552.,\n", + " 28416., 76992.]),\n", + " array([-61192.46163388, -58157.79081243, -55123.11999097, -52088.44916952,\n", + " -49053.77834806, -46019.10752661, -42984.43670515, -39949.7658837 ,\n", + " -36915.09506224, -33880.42424079, -30845.75341933]),\n", + " )" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAksAAAGdCAYAAAACMjetAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA4h0lEQVR4nO3df1xW9f3/8SegXKB2XfgLkESl2VKmiWLi5arbXHy8crTNRZs6Zmak04FLKX+wHDq3pdOp+Zu1Srx9yqXu9skKDHOYuunlL9RSFNeWhs0utI/BpXwUFM73j76cvAJPXIoh+bjfbue2Xef9Ou/zPu8d5LnDOecKMAzDEAAAAOoV2NQDAAAAuJkRlgAAACwQlgAAACwQlgAAACwQlgAAACwQlgAAACwQlgAAACwQlgAAACy0aOoBNKWamhqdOnVKt912mwICApp6OAAAoAEMw9C5c+cUFRWlwMAbf93nlg5Lp06dUnR0dFMPAwAAXIOTJ0+qc+fON3w/t3RYuu222yR9Ntl2u72JRwMAABrC6/UqOjra/D1+o93SYan2T292u52wBABAM/NV3ULDDd4AAAAWCEsAAAAWCEsAAAAWCEsAAAAWCEsAAAAWCEsAAAAWCEsAAAAWCEsAAAAWCEsAAAAWCEsAAAAWCEsAAAAWCEsAAAAWCEsAAAAWCEsAAAAWWjT1AL6uuk3Pa+oh+O3E3KSmHgIAADcdriwBAABYICwBAABYICwBAABYICwBAABYICwBAABYICwBAABY8CssVVdX69e//rViYmIUGhqqb3zjG/rtb38rwzDMGsMwlJWVpU6dOik0NFSJiYl6//33ffo5e/asUlJSZLfbFRYWptTUVJ0/f96n5r333tN9992nkJAQRUdHa968eXXGs379evXo0UMhISHq3bu3Nm7c6M/hAAAAfCm/wtIf/vAHrVy5UsuWLdPRo0f1hz/8QfPmzdPSpUvNmnnz5mnJkiXKzs7W7t271bp1a7lcLl28eNGsSUlJUVFRkTZv3qzc3Fxt375d48aNM9u9Xq+GDBmirl27qrCwUPPnz9esWbP0/PPPmzU7d+7UyJEjlZqaqgMHDmjYsGEaNmyYDh8+fD3zAQAA4CPAuPKy0Jd46KGHFBERoRdffNFcl5ycrNDQUL388ssyDENRUVF66qmn9PTTT0uSysvLFRERoZycHI0YMUJHjx5VbGys9u7dq/79+0uS8vPz9b3vfU8fffSRoqKitHLlSj3zzDPyeDwKDg6WJE2fPl0bNmxQcXGxJGn48OGqqKhQbm6uOZaBAwcqLi5O2dnZDToer9crh8Oh8vJy2e32hk5Dg/BSSgAAbowb+fu7Pn5dWRo0aJAKCgr0z3/+U5L07rvv6h//+IeGDh0qSTp+/Lg8Ho8SExPNbRwOhxISEuR2uyVJbrdbYWFhZlCSpMTERAUGBmr37t1mzf33328GJUlyuVw6duyYPv30U7Pmyv3U1tTupz6VlZXyer0+CwAAgBW/vu5k+vTp8nq96tGjh4KCglRdXa3f//73SklJkSR5PB5JUkREhM92ERERZpvH41F4eLjvIFq0ULt27XxqYmJi6vRR29a2bVt5PB7L/dRnzpw5+s1vfuPPIQMAgFucX1eW1q1bp1deeUVr1qzR/v37tXr1av3xj3/U6tWrb9T4GlVmZqbKy8vN5eTJk009JAAAcJPz68rSlClTNH36dI0YMUKS1Lt3b3344YeaM2eORo8ercjISElSaWmpOnXqZG5XWlqquLg4SVJkZKROnz7t0+/ly5d19uxZc/vIyEiVlpb61NR+/rKa2vb62Gw22Ww2fw4ZAADc4vy6svR///d/Cgz03SQoKEg1NTWSpJiYGEVGRqqgoMBs93q92r17t5xOpyTJ6XSqrKxMhYWFZs2WLVtUU1OjhIQEs2b79u26dOmSWbN582bdddddatu2rVlz5X5qa2r3AwAA0Bj8Ckvf//739fvf/155eXk6ceKEXnvtNS1cuFA/+tGPJEkBAQGaNGmSfve73+mNN97QoUOH9OijjyoqKkrDhg2TJPXs2VMPPvigxo4dqz179mjHjh1KT0/XiBEjFBUVJUn66U9/quDgYKWmpqqoqEhr167V4sWLlZGRYY7lySefVH5+vhYsWKDi4mLNmjVL+/btU3p6eiNNDQAAgJ9/hlu6dKl+/etf6xe/+IVOnz6tqKgo/fznP1dWVpZZM3XqVFVUVGjcuHEqKyvTvffeq/z8fIWEhJg1r7zyitLT0/XAAw8oMDBQycnJWrJkidnucDj09ttvKy0tTfHx8erQoYOysrJ83sU0aNAgrVmzRjNmzNCvfvUr3XnnndqwYYN69ep1PfMBAADgw6/3LH3d8J4lX7xnCQDQHNzU71kCAAC41RCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALPj1niUAANA0eCVN0+HKEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAW/wlK3bt0UEBBQZ0lLS5MkXbx4UWlpaWrfvr3atGmj5ORklZaW+vRRUlKipKQktWrVSuHh4ZoyZYouX77sU7N161b169dPNptN3bt3V05OTp2xLF++XN26dVNISIgSEhK0Z88ePw8dAADgy/kVlvbu3auPP/7YXDZv3ixJ+vGPfyxJmjx5st58802tX79e27Zt06lTp/Twww+b21dXVyspKUlVVVXauXOnVq9erZycHGVlZZk1x48fV1JSkgYPHqyDBw9q0qRJeuKJJ7Rp0yazZu3atcrIyNDMmTO1f/9+9enTRy6XS6dPn76uyQAAAPiiAMMwjGvdeNKkScrNzdX7778vr9erjh07as2aNXrkkUckScXFxerZs6fcbrcGDhyot956Sw899JBOnTqliIgISVJ2dramTZumM2fOKDg4WNOmTVNeXp4OHz5s7mfEiBEqKytTfn6+JCkhIUH33HOPli1bJkmqqalRdHS0Jk6cqOnTpzd4/F6vVw6HQ+Xl5bLb7dc6DfXqNj2vUfv7KpyYm9TUQwAAXAW/Vz53I39/1+ea71mqqqrSyy+/rMcff1wBAQEqLCzUpUuXlJiYaNb06NFDXbp0kdvtliS53W717t3bDEqS5HK55PV6VVRUZNZc2UdtTW0fVVVVKiws9KkJDAxUYmKiWXM1lZWV8nq9PgsAAICVaw5LGzZsUFlZmR577DFJksfjUXBwsMLCwnzqIiIi5PF4zJorg1Jte22bVY3X69WFCxf0ySefqLq6ut6a2j6uZs6cOXI4HOYSHR3t1zEDAIBbzzWHpRdffFFDhw5VVFRUY47nhsrMzFR5ebm5nDx5sqmHBAAAbnItrmWjDz/8UH/729/0P//zP+a6yMhIVVVVqayszOfqUmlpqSIjI82aLz61Vvu03JU1X3yCrrS0VHa7XaGhoQoKClJQUFC9NbV9XI3NZpPNZvPvYAEAwC3tmq4srVq1SuHh4UpK+vzGrfj4eLVs2VIFBQXmumPHjqmkpEROp1OS5HQ6dejQIZ+n1jZv3iy73a7Y2Fiz5so+amtq+wgODlZ8fLxPTU1NjQoKCswaAACAxuL3laWamhqtWrVKo0ePVosWn2/ucDiUmpqqjIwMtWvXTna7XRMnTpTT6dTAgQMlSUOGDFFsbKxGjRqlefPmyePxaMaMGUpLSzOv+IwfP17Lli3T1KlT9fjjj2vLli1at26d8vI+fwogIyNDo0ePVv/+/TVgwAA999xzqqio0JgxY653PgAAAHz4HZb+9re/qaSkRI8//nidtkWLFikwMFDJycmqrKyUy+XSihUrzPagoCDl5uZqwoQJcjqdat26tUaPHq3Zs2ebNTExMcrLy9PkyZO1ePFide7cWS+88IJcLpdZM3z4cJ05c0ZZWVnyeDyKi4tTfn5+nZu+AQAArtd1vWepueM9S754zxIA3Lz4vfK5ZvOeJQAAgFsBYQkAAMDCNb06AMCthz8BALhVcWUJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAAmEJAADAgt9h6T//+Y9+9rOfqX379goNDVXv3r21b98+s90wDGVlZalTp04KDQ1VYmKi3n//fZ8+zp49q5SUFNntdoWFhSk1NVXnz5/3qXnvvfd03333KSQkRNHR0Zo3b16dsaxfv149evRQSEiIevfurY0bN/p7OAAAAJb8Ckuffvqpvv3tb6tly5Z66623dOTIES1YsEBt27Y1a+bNm6clS5YoOztbu3fvVuvWreVyuXTx4kWzJiUlRUVFRdq8ebNyc3O1fft2jRs3zmz3er0aMmSIunbtqsLCQs2fP1+zZs3S888/b9bs3LlTI0eOVGpqqg4cOKBhw4Zp2LBhOnz48PXMBwAAgI8AwzCMhhZPnz5dO3bs0N///vd62w3DUFRUlJ566ik9/fTTkqTy8nJFREQoJydHI0aM0NGjRxUbG6u9e/eqf//+kqT8/Hx973vf00cffaSoqCitXLlSzzzzjDwej4KDg819b9iwQcXFxZKk4cOHq6KiQrm5ueb+Bw4cqLi4OGVnZzfoeLxerxwOh8rLy2W32xs6DQ3SbXpeo/b3VTgxN6mph4CbGOc00LT4Gfzcjfz9XR+/riy98cYb6t+/v3784x8rPDxcffv21Z///Gez/fjx4/J4PEpMTDTXORwOJSQkyO12S5LcbrfCwsLMoCRJiYmJCgwM1O7du82a+++/3wxKkuRyuXTs2DF9+umnZs2V+6mtqd1PfSorK+X1en0WAAAAK36FpQ8++EArV67UnXfeqU2bNmnChAn65S9/qdWrV0uSPB6PJCkiIsJnu4iICLPN4/EoPDzcp71FixZq166dT019fVy5j6vV1LbXZ86cOXI4HOYSHR3tz+EDAIBbkF9hqaamRv369dOzzz6rvn37aty4cRo7dmyD/+zV1DIzM1VeXm4uJ0+ebOohAQCAm5xfYalTp06KjY31WdezZ0+VlJRIkiIjIyVJpaWlPjWlpaVmW2RkpE6fPu3TfvnyZZ09e9anpr4+rtzH1Wpq2+tjs9lkt9t9FgAAACt+haVvf/vbOnbsmM+6f/7zn+rataskKSYmRpGRkSooKDDbvV6vdu/eLafTKUlyOp0qKytTYWGhWbNlyxbV1NQoISHBrNm+fbsuXbpk1mzevFl33XWX+eSd0+n02U9tTe1+AAAAGoNfYWny5MnatWuXnn32Wf3rX//SmjVr9PzzzystLU2SFBAQoEmTJul3v/ud3njjDR06dEiPPvqooqKiNGzYMEmfXYl68MEHNXbsWO3Zs0c7duxQenq6RowYoaioKEnST3/6UwUHBys1NVVFRUVau3atFi9erIyMDHMsTz75pPLz87VgwQIVFxdr1qxZ2rdvn9LT0xtpagAAAKQW/hTfc889eu2115SZmanZs2crJiZGzz33nFJSUsyaqVOnqqKiQuPGjVNZWZnuvfde5efnKyQkxKx55ZVXlJ6ergceeECBgYFKTk7WkiVLzHaHw6G3335baWlpio+PV4cOHZSVleXzLqZBgwZpzZo1mjFjhn71q1/pzjvv1IYNG9SrV6/rmQ8AAAAffr1n6euG9yz54p00sMI5DTQtfgY/d1O/ZwkAAOBWQ1gCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACw4FdYmjVrlgICAnyWHj16mO0XL15UWlqa2rdvrzZt2ig5OVmlpaU+fZSUlCgpKUmtWrVSeHi4pkyZosuXL/vUbN26Vf369ZPNZlP37t2Vk5NTZyzLly9Xt27dFBISooSEBO3Zs8efQwEAAGgQv68sfetb39LHH39sLv/4xz/MtsmTJ+vNN9/U+vXrtW3bNp06dUoPP/yw2V5dXa2kpCRVVVVp586dWr16tXJycpSVlWXWHD9+XElJSRo8eLAOHjyoSZMm6YknntCmTZvMmrVr1yojI0MzZ87U/v371adPH7lcLp0+ffpa5wEAAKBefoelFi1aKDIy0lw6dOggSSovL9eLL76ohQsX6rvf/a7i4+O1atUq7dy5U7t27ZIkvf322zpy5IhefvllxcXFaejQofrtb3+r5cuXq6qqSpKUnZ2tmJgYLViwQD179lR6eroeeeQRLVq0yBzDwoULNXbsWI0ZM0axsbHKzs5Wq1at9NJLLzXGnAAAAJj8Dkvvv/++oqKidMcddyglJUUlJSWSpMLCQl26dEmJiYlmbY8ePdSlSxe53W5JktvtVu/evRUREWHWuFwueb1eFRUVmTVX9lFbU9tHVVWVCgsLfWoCAwOVmJho1lxNZWWlvF6vzwIAAGDFr7CUkJCgnJwc5efna+XKlTp+/Ljuu+8+nTt3Th6PR8HBwQoLC/PZJiIiQh6PR5Lk8Xh8glJte22bVY3X69WFCxf0ySefqLq6ut6a2j6uZs6cOXI4HOYSHR3tz+EDAIBbUAt/iocOHWr+97vvvlsJCQnq2rWr1q1bp9DQ0EYfXGPLzMxURkaG+dnr9RKYAACApet6dUBYWJi++c1v6l//+pciIyNVVVWlsrIyn5rS0lJFRkZKkiIjI+s8HVf7+ctq7Ha7QkND1aFDBwUFBdVbU9vH1dhsNtntdp8FAADAynWFpfPnz+vf//63OnXqpPj4eLVs2VIFBQVm+7Fjx1RSUiKn0ylJcjqdOnTokM9Ta5s3b5bdbldsbKxZc2UftTW1fQQHBys+Pt6npqamRgUFBWYNAABAY/ErLD399NPatm2bTpw4oZ07d+pHP/qRgoKCNHLkSDkcDqWmpiojI0PvvPOOCgsLNWbMGDmdTg0cOFCSNGTIEMXGxmrUqFF69913tWnTJs2YMUNpaWmy2WySpPHjx+uDDz7Q1KlTVVxcrBUrVmjdunWaPHmyOY6MjAz9+c9/1urVq3X06FFNmDBBFRUVGjNmTCNODQAAgJ/3LH300UcaOXKk/vd//1cdO3bUvffeq127dqljx46SpEWLFikwMFDJycmqrKyUy+XSihUrzO2DgoKUm5urCRMmyOl0qnXr1ho9erRmz55t1sTExCgvL0+TJ0/W4sWL1blzZ73wwgtyuVxmzfDhw3XmzBllZWXJ4/EoLi5O+fn5dW76BgAAuF4BhmEYTT2IpuL1euVwOFReXt7o9y91m57XqP19FU7MTWrqIeAmxjkNNC1+Bj93I39/14fvhgMAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBAWAIAALBwXWFp7ty5CggI0KRJk8x1Fy9eVFpamtq3b682bdooOTlZpaWlPtuVlJQoKSlJrVq1Unh4uKZMmaLLly/71GzdulX9+vWTzWZT9+7dlZOTU2f/y5cvV7du3RQSEqKEhATt2bPneg4HAACgjmsOS3v37tWf/vQn3X333T7rJ0+erDfffFPr16/Xtm3bdOrUKT388MNme3V1tZKSklRVVaWdO3dq9erVysnJUVZWlllz/PhxJSUlafDgwTp48KAmTZqkJ554Qps2bTJr1q5dq4yMDM2cOVP79+9Xnz595HK5dPr06Ws9JAAAgDquKSydP39eKSkp+vOf/6y2bdua68vLy/Xiiy9q4cKF+u53v6v4+HitWrVKO3fu1K5duyRJb7/9to4cOaKXX35ZcXFxGjp0qH77299q+fLlqqqqkiRlZ2crJiZGCxYsUM+ePZWenq5HHnlEixYtMve1cOFCjR07VmPGjFFsbKyys7PVqlUrvfTSS9czHwAAAD6uKSylpaUpKSlJiYmJPusLCwt16dIln/U9evRQly5d5Ha7JUlut1u9e/dWRESEWeNyueT1elVUVGTWfLFvl8tl9lFVVaXCwkKfmsDAQCUmJpo19amsrJTX6/VZAAAArLTwd4NXX31V+/fv1969e+u0eTweBQcHKywszGd9RESEPB6PWXNlUKptr22zqvF6vbpw4YI+/fRTVVdX11tTXFx81bHPmTNHv/nNbxp2oAAAAPLzytLJkyf15JNP6pVXXlFISMiNGtMNk5mZqfLycnM5efJkUw8JAADc5PwKS4WFhTp9+rT69eunFi1aqEWLFtq2bZuWLFmiFi1aKCIiQlVVVSorK/PZrrS0VJGRkZKkyMjIOk/H1X7+shq73a7Q0FB16NBBQUFB9dbU9lEfm80mu93uswAAAFjxKyw98MADOnTokA4ePGgu/fv3V0pKivnfW7ZsqYKCAnObY8eOqaSkRE6nU5LkdDp16NAhn6fWNm/eLLvdrtjYWLPmyj5qa2r7CA4OVnx8vE9NTU2NCgoKzBoAAIDG4Nc9S7fddpt69erls65169Zq3769uT41NVUZGRlq166d7Ha7Jk6cKKfTqYEDB0qShgwZotjYWI0aNUrz5s2Tx+PRjBkzlJaWJpvNJkkaP368li1bpqlTp+rxxx/Xli1btG7dOuXl5Zn7zcjI0OjRo9W/f38NGDBAzz33nCoqKjRmzJjrmhAAAIAr+X2D95dZtGiRAgMDlZycrMrKSrlcLq1YscJsDwoKUm5uriZMmCCn06nWrVtr9OjRmj17tlkTExOjvLw8TZ48WYsXL1bnzp31wgsvyOVymTXDhw/XmTNnlJWVJY/Ho7i4OOXn59e56RsAAOB6BBiGYTT1IJqK1+uVw+FQeXl5o9+/1G163pcX3WROzE1q6iHgJsY5DTQtfgY/dyN/f9eH74YDAACwQFgCAACwQFgCAACwQFgCAACw0OhPwwHAzYIbYgE0Bq4sAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWCAsAQAAWPArLK1cuVJ333237Ha77Ha7nE6n3nrrLbP94sWLSktLU/v27dWmTRslJyertLTUp4+SkhIlJSWpVatWCg8P15QpU3T58mWfmq1bt6pfv36y2Wzq3r27cnJy6oxl+fLl6tatm0JCQpSQkKA9e/b4cygAAAAN4ldY6ty5s+bOnavCwkLt27dP3/3ud/XDH/5QRUVFkqTJkyfrzTff1Pr167Vt2zadOnVKDz/8sLl9dXW1kpKSVFVVpZ07d2r16tXKyclRVlaWWXP8+HElJSVp8ODBOnjwoCZNmqQnnnhCmzZtMmvWrl2rjIwMzZw5U/v371efPn3kcrl0+vTp650PAAAAHwGGYRjX00G7du00f/58PfLII+rYsaPWrFmjRx55RJJUXFysnj17yu12a+DAgXrrrbf00EMP6dSpU4qIiJAkZWdna9q0aTpz5oyCg4M1bdo05eXl6fDhw+Y+RowYobKyMuXn50uSEhISdM8992jZsmWSpJqaGkVHR2vixImaPn16g8fu9XrlcDhUXl4uu91+PdNQR7fpeY3a31fhxNykph4CbmLN8Zxujvg5xNU0x5/BG3U+38jf3/W55nuWqqur9eqrr6qiokJOp1OFhYW6dOmSEhMTzZoePXqoS5cucrvdkiS3263evXubQUmSXC6XvF6veXXK7Xb79FFbU9tHVVWVCgsLfWoCAwOVmJho1lxNZWWlvF6vzwIAAGDF77B06NAhtWnTRjabTePHj9drr72m2NhYeTweBQcHKywszKc+IiJCHo9HkuTxeHyCUm17bZtVjdfr1YULF/TJJ5+ourq63praPq5mzpw5cjgc5hIdHe3v4QMAgFuM32Hprrvu0sGDB7V7925NmDBBo0eP1pEjR27E2BpdZmamysvLzeXkyZNNPSQAAHCTa+HvBsHBwerevbskKT4+Xnv37tXixYs1fPhwVVVVqayszOfqUmlpqSIjIyVJkZGRdZ5aq31a7sqaLz5BV1paKrvdrtDQUAUFBSkoKKjemto+rsZms8lms/l7yAAA4BZ23e9ZqqmpUWVlpeLj49WyZUsVFBSYbceOHVNJSYmcTqckyel06tChQz5PrW3evFl2u12xsbFmzZV91NbU9hEcHKz4+HifmpqaGhUUFJg1AAAAjcWvK0uZmZkaOnSounTponPnzmnNmjXaunWrNm3aJIfDodTUVGVkZKhdu3ay2+2aOHGinE6nBg4cKEkaMmSIYmNjNWrUKM2bN08ej0czZsxQWlqaecVn/PjxWrZsmaZOnarHH39cW7Zs0bp165SX9/lTABkZGRo9erT69++vAQMG6LnnnlNFRYXGjBnTiFMDAADgZ1g6ffq0Hn30UX388cdyOBy6++67tWnTJv3Xf/2XJGnRokUKDAxUcnKyKisr5XK5tGLFCnP7oKAg5ebmasKECXI6nWrdurVGjx6t2bNnmzUxMTHKy8vT5MmTtXjxYnXu3FkvvPCCXC6XWTN8+HCdOXNGWVlZ8ng8iouLU35+fp2bvgEAAK7Xdb9nqTnjPUu+eL8LrDTHc7o54ucQV9Mcfwa/Lu9Z8vsGbwAAmrvmGDzQdPgiXQAAAAuEJQAAAAuEJQAAAAuEJQAAAAuEJQAAAAuEJQAAAAu8OgDNWnN8/Jf36ABA88KVJQAAAAuEJQAAAAuEJQAAAAuEJQAAAAuEJQAAAAuEJQAAAAuEJQAAAAuEJQAAAAuEJQAAAAuEJQAAAAuEJQAAAAuEJQAAAAuEJQAAAAuEJQAAAAstmnoAAIDPdZue19RD8NuJuUlNPQTghuLKEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAXCEgAAgAW/wtKcOXN0zz336LbbblN4eLiGDRumY8eO+dRcvHhRaWlpat++vdq0aaPk5GSVlpb61JSUlCgpKUmtWrVSeHi4pkyZosuXL/vUbN26Vf369ZPNZlP37t2Vk5NTZzzLly9Xt27dFBISooSEBO3Zs8efwwEAAPhSfoWlbdu2KS0tTbt27dLmzZt16dIlDRkyRBUVFWbN5MmT9eabb2r9+vXatm2bTp06pYcffthsr66uVlJSkqqqqrRz506tXr1aOTk5ysrKMmuOHz+upKQkDR48WAcPHtSkSZP0xBNPaNOmTWbN2rVrlZGRoZkzZ2r//v3q06ePXC6XTp8+fT3zAQAA4CPAMAzjWjc+c+aMwsPDtW3bNt1///0qLy9Xx44dtWbNGj3yyCOSpOLiYvXs2VNut1sDBw7UW2+9pYceekinTp1SRESEJCk7O1vTpk3TmTNnFBwcrGnTpikvL0+HDx829zVixAiVlZUpPz9fkpSQkKB77rlHy5YtkyTV1NQoOjpaEydO1PTp0xs0fq/XK4fDofLyctnt9mudhnrx/U5fDeb5q9Mc5xpfjeZ4TnM+fzVu1LlxI39/1+e6vki3vLxcktSuXTtJUmFhoS5duqTExESzpkePHurSpYsZltxut3r37m0GJUlyuVyaMGGCioqK1LdvX7ndbp8+amsmTZokSaqqqlJhYaEyMzPN9sDAQCUmJsrtdl91vJWVlaqsrDQ/e73eaz944BrxjzQANC/XfIN3TU2NJk2apG9/+9vq1auXJMnj8Sg4OFhhYWE+tREREfJ4PGbNlUGptr22zarG6/XqwoUL+uSTT1RdXV1vTW0f9ZkzZ44cDoe5REdH+3/gAADglnLNYSktLU2HDx/Wq6++2pjjuaEyMzNVXl5uLidPnmzqIQEAgJvcNf0ZLj09Xbm5udq+fbs6d+5sro+MjFRVVZXKysp8ri6VlpYqMjLSrPniU2u1T8tdWfPFJ+hKS0tlt9sVGhqqoKAgBQUF1VtT20d9bDabbDab/wcMAABuWX5dWTIMQ+np6Xrttde0ZcsWxcTE+LTHx8erZcuWKigoMNcdO3ZMJSUlcjqdkiSn06lDhw75PLW2efNm2e12xcbGmjVX9lFbU9tHcHCw4uPjfWpqampUUFBg1gAAADQGv64spaWlac2aNXr99dd12223mfcHORwOhYaGyuFwKDU1VRkZGWrXrp3sdrsmTpwop9OpgQMHSpKGDBmi2NhYjRo1SvPmzZPH49GMGTOUlpZmXvUZP368li1bpqlTp+rxxx/Xli1btG7dOuXlfX5jbEZGhkaPHq3+/ftrwIABeu6551RRUaExY8Y01twAAAD4F5ZWrlwpSfrOd77js37VqlV67LHHJEmLFi1SYGCgkpOTVVlZKZfLpRUrVpi1QUFBys3N1YQJE+R0OtW6dWuNHj1as2fPNmtiYmKUl5enyZMna/HixercubNeeOEFuVwus2b48OE6c+aMsrKy5PF4FBcXp/z8/Do3fQMAAFyP63rPUnPHe5Z88a4UANeCfztwNV+X9yzx3XAAAAAWCEsAAAAWCEsAAAAWCEsAAAAWCEsAAAAWCEsAAAAWCEsAAAAWCEsAAAAWCEsAAAAWCEsAAAAWCEsAAAAW/PoiXXy98V1JAADURVgCAFwX/o8Wvu74MxwAAIAFwhIAAIAFwhIAAIAFwhIAAIAFwhIAAIAFwhIAAIAFwhIAAIAFwhIAAIAFwhIAAIAFwhIAAIAFwhIAAIAFwhIAAIAFwhIAAIAFwhIAAIAFwhIAAIAFwhIAAIAFwhIAAIAFwhIAAIAFv8PS9u3b9f3vf19RUVEKCAjQhg0bfNoNw1BWVpY6deqk0NBQJSYm6v333/epOXv2rFJSUmS32xUWFqbU1FSdP3/ep+a9997Tfffdp5CQEEVHR2vevHl1xrJ+/Xr16NFDISEh6t27tzZu3Ojv4QAAAFjyOyxVVFSoT58+Wr58eb3t8+bN05IlS5Sdna3du3erdevWcrlcunjxolmTkpKioqIibd68Wbm5udq+fbvGjRtntnu9Xg0ZMkRdu3ZVYWGh5s+fr1mzZun55583a3bu3KmRI0cqNTVVBw4c0LBhwzRs2DAdPnzY30MCAAC4qgDDMIxr3jggQK+99pqGDRsm6bOrSlFRUXrqqaf09NNPS5LKy8sVERGhnJwcjRgxQkePHlVsbKz27t2r/v37S5Ly8/P1ve99Tx999JGioqK0cuVKPfPMM/J4PAoODpYkTZ8+XRs2bFBxcbEkafjw4aqoqFBubq45noEDByouLk7Z2dkNGr/X65XD4VB5ebnsdvu1TkO9uk3Pa9T+AABobk7MTboh/d7I39/1adR7lo4fPy6Px6PExERzncPhUEJCgtxutyTJ7XYrLCzMDEqSlJiYqMDAQO3evdusuf/++82gJEkul0vHjh3Tp59+atZcuZ/amtr9AAAANIYWjdmZx+ORJEVERPisj4iIMNs8Ho/Cw8N9B9Gihdq1a+dTExMTU6eP2ra2bdvK4/FY7qc+lZWVqqysND97vV5/Dg8AANyCbqmn4ebMmSOHw2Eu0dHRTT0kAABwk2vUsBQZGSlJKi0t9VlfWlpqtkVGRur06dM+7ZcvX9bZs2d9aurr48p9XK2mtr0+mZmZKi8vN5eTJ0/6e4gAAOAW06hhKSYmRpGRkSooKDDXeb1e7d69W06nU5LkdDpVVlamwsJCs2bLli2qqalRQkKCWbN9+3ZdunTJrNm8ebPuuusutW3b1qy5cj+1NbX7qY/NZpPdbvdZAAAArPgdls6fP6+DBw/q4MGDkj67qfvgwYMqKSlRQECAJk2apN/97nd64403dOjQIT366KOKiooyn5jr2bOnHnzwQY0dO1Z79uzRjh07lJ6erhEjRigqKkqS9NOf/lTBwcFKTU1VUVGR1q5dq8WLFysjI8Mcx5NPPqn8/HwtWLBAxcXFmjVrlvbt26f09PTrnxUAAID/z+8bvPft26fBgwebn2sDzOjRo5WTk6OpU6eqoqJC48aNU1lZme69917l5+crJCTE3OaVV15Renq6HnjgAQUGBio5OVlLliwx2x0Oh95++22lpaUpPj5eHTp0UFZWls+7mAYNGqQ1a9ZoxowZ+tWvfqU777xTGzZsUK9eva5pIgAAAOpzXe9Zau54zxIAADcO71kCAAC4BRCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALBCWAAAALDT7sLR8+XJ169ZNISEhSkhI0J49e5p6SAAA4GukWYeltWvXKiMjQzNnztT+/fvVp08fuVwunT59uqmHBgAAviaadVhauHChxo4dqzFjxig2NlbZ2dlq1aqVXnrppaYeGgAA+Jpo0dQDuFZVVVUqLCxUZmamuS4wMFCJiYlyu931blNZWanKykrzc3l5uSTJ6/U2+vhqKv+v0fsEAKA5uRG/X6/s1zCMG9L/FzXbsPTJJ5+ourpaERERPusjIiJUXFxc7zZz5szRb37zmzrro6Ojb8gYAQC4lTmeu7H9nzt3Tg6H48buRM04LF2LzMxMZWRkmJ9ramp09uxZtW/fXgEBAU04svp5vV5FR0fr5MmTstvtTT2cmxbz1DDMU8MxVw3DPDUM89RwDZ0rwzB07tw5RUVFfSXjarZhqUOHDgoKClJpaanP+tLSUkVGRta7jc1mk81m81kXFhZ2o4bYaOx2Oz9gDcA8NQzz1HDMVcMwTw3DPDVcQ+bqq7iiVKvZ3uAdHBys+Ph4FRQUmOtqampUUFAgp9PZhCMDAABfJ832ypIkZWRkaPTo0erfv78GDBig5557ThUVFRozZkxTDw0AAHxNNOuwNHz4cJ05c0ZZWVnyeDyKi4tTfn5+nZu+myubzaaZM2fW+dMhfDFPDcM8NRxz1TDMU8MwTw13s85VgPFVPXcHAADQDDXbe5YAAAC+CoQlAAAAC4QlAAAAC4QlAAAAC4Slr0heXp4SEhIUGhqqtm3batiwYT7tJSUlSkpKUqtWrRQeHq4pU6bo8uXLPjVbt25Vv379ZLPZ1L17d+Xk5NTZz/Lly9WtWzeFhIQoISFBe/bs8Wm/ePGi0tLS1L59e7Vp00bJycl1XuzZVLp166aAgACfZe7cuWb7iRMn6rQHBARo165dPv2sX79ePXr0UEhIiHr37q2NGzf6tBuGoaysLHXq1EmhoaFKTEzU+++/71Nz9uxZpaSkyG63KywsTKmpqTp//vyNO3g/fNk8SdJ7772n++67TyEhIYqOjta8efPq9PN1n6crVVZWKi4uTgEBATp48KC5nnPK19XmSeKckqQf/OAH6tKli0JCQtSpUyeNGjVKp06dMts5nz7zZfMkNcPzycAN99e//tVo27atsXLlSuPYsWNGUVGRsXbtWrP98uXLRq9evYzExETjwIEDxsaNG40OHToYmZmZZs0HH3xgtGrVysjIyDCOHDliLF261AgKCjLy8/PNmldffdUIDg42XnrpJaOoqMgYO3asERYWZpSWlpo148ePN6Kjo42CggJj3759xsCBA41BgwZ9NRPxJbp27WrMnj3b+Pjjj83l/PnzZvvx48cNScbf/vY3n5qqqiqzZseOHUZQUJAxb94848iRI8aMGTOMli1bGocOHTJr5s6dazgcDmPDhg3Gu+++a/zgBz8wYmJijAsXLpg1Dz74oNGnTx9j165dxt///neje/fuxsiRI7+aifgSXzZP5eXlRkREhJGSkmIcPnzY+Mtf/mKEhoYaf/rTn8yaW2GervTLX/7SGDp0qCHJOHDggLmec8rX1eaJc+ozCxcuNNxut3HixAljx44dhtPpNJxOp9nO+fSZL5un5ng+EZZusEuXLhm333678cILL1y1ZuPGjUZgYKDh8XjMdStXrjTsdrtRWVlpGIZhTJ061fjWt77ls93w4cMNl8tlfh4wYICRlpZmfq6urjaioqKMOXPmGIZhGGVlZUbLli2N9evXmzVHjx41JBlut/v6DrQRdO3a1Vi0aNFV22v/IbryH/Ev+slPfmIkJSX5rEtISDB+/vOfG4ZhGDU1NUZkZKQxf/58s72srMyw2WzGX/7yF8MwDOPIkSOGJGPv3r1mzVtvvWUEBAQY//nPf67hyBrXl83TihUrjLZt25rnjmEYxrRp04y77rrL/HwrzFOtjRs3Gj169DCKioquGpZu9XPKMKzniXOqfq+//roREBBghiHOp/p9cZ6a4/nEn+FusP379+s///mPAgMD1bdvX3Xq1ElDhw7V4cOHzRq3263evXv7vEzT5XLJ6/WqqKjIrElMTPTp2+Vyye12S5KqqqpUWFjoUxMYGKjExESzprCwUJcuXfKp6dGjh7p06WLWNLW5c+eqffv26tu3r+bPn1/nT5HSZ5d4w8PDde+99+qNN97wafuyeTp+/Lg8Ho9PjcPhUEJCglnjdrsVFham/v37mzWJiYkKDAzU7t27G+1Yr4fVPLndbt1///0KDg4217lcLh07dkyffvqpWXMrzFNpaanGjh2r//7v/1arVq2uWnern1NfNk+cU3WdPXtWr7zyigYNGqSWLVv6tN3q59OV6pun5ng+EZZusA8++ECSNGvWLM2YMUO5ublq27atvvOd7+js2bOSJI/HU+et47WfPR6PZY3X69WFCxf0ySefqLq6ut6aK/sIDg6u8+XBV9Y0pV/+8pd69dVX9c477+jnP/+5nn32WU2dOtVsb9OmjRYsWKD169crLy9P9957r4YNG+bzj9HV5unKOahdZ1UTHh7u096iRQu1a9euWczT9ZxPX6d5MgxDjz32mMaPH+/zj+WVOKcaNk+cU5+bNm2aWrdurfbt26ukpESvv/662cb59DmreWqO5xNh6RpNnz693hv5rlyKi4tVU1MjSXrmmWeUnJys+Ph4rVq1SgEBAVq/fn0TH8WN19B5kj77rr/vfOc7uvvuuzV+/HgtWLBAS5cuVWVlpSSpQ4cOysjIUEJCgu655x7NnTtXP/vZzzR//vymPMRG0Zjz9HXX0LlaunSpzp07p8zMzKv2xTnVsHn6OvPnZ0+SpkyZogMHDujtt99WUFCQHn30URn//4swOJ8aNk/NUbP+brim9NRTT+mxxx6zrLnjjjv08ccfS5JiY2PN9TabTXfccYdKSkokSZGRkXWeWqt9Qi0yMtL8zy8+tVZaWiq73a7Q0FAFBQUpKCio3por+6iqqlJZWZnP1aUraxpbQ+epPgkJCbp8+bJOnDihu+6666o1mzdvNj9fbZ6unIPadZ06dfKpiYuLM2tOnz7t08fly5d19uzZZjFPV5sD6cvPp5t9nqSGz9WWLVvkdrvrfMdU//79lZKSotWrV9e77a12TjVknr7O55S/P3sdOnRQhw4d9M1vflM9e/ZUdHS0du3aJafTWe+2t9r5VMtqnprl+eTXHU7wW3l5uWGz2Xxu8K6qqjLCw8PNO/9rb/C+8qm1P/3pT4bdbjcuXrxoGMZnN3j36tXLp++RI0fWucE7PT3d/FxdXW3cfvvtdW7w/utf/2rWFBcX3zQ3eH/Ryy+/bAQGBhpnz569as0TTzxh9O3b1/z8k5/8xHjooYd8apxOZ52bAv/4xz+a7bX/G33xpsB9+/aZNZs2bbppb5784jzV3jx55RM4mZmZdW6e/LrP04cffmgcOnTIXDZt2mRIMv76178aJ0+evOp2t9o51ZB54pyq34cffmhIMt55552r1txq51N9vjhPzfF8Iix9BZ588knj9ttvNzZt2mQUFxcbqampRnh4uPnLrfbVAUOGDDEOHjxo5OfnGx07dqz31QFTpkwxjh49aixfvrzeVwfYbDYjJyfHOHLkiDFu3DgjLCzM5ym78ePHG126dDG2bNli7Nu3r84jnU1l586dxqJFi4yDBw8a//73v42XX37Z6Nixo/Hoo4+aNTk5OcaaNWuMo0ePGkePHjV+//vfG4GBgcZLL71k1uzYscNo0aKF8cc//tE4evSoMXPmzHofNw0LCzNef/1147333jN++MMf1vu4ad++fY3du3cb//jHP4w777zzpngstyHzVFZWZkRERBijRo0yDh8+bLz66qtGq1at6jyW+3Wep/rU96QS51Rd9c0T55Rh7Nq1y1i6dKlx4MAB48SJE0ZBQYExaNAg4xvf+Ib5f2o5nxo2T83xfCIsfQWqqqqMp556yggPDzduu+02IzEx0Th8+LBPzYkTJ4yhQ4caoaGhRocOHYynnnrKuHTpkk/NO++8Y8TFxRnBwcHGHXfcYaxatarOvpYuXWp06dLFCA4ONgYMGGDs2rXLp/3ChQvGL37xC6Nt27ZGq1atjB/96EfGxx9/3OjH7K/CwkIjISHBcDgcRkhIiNGzZ0/j2WefNX+4DOOzf4h69uxptGrVyrDb7caAAQN8XoNQa926dcY3v/lNIzg42PjWt75l5OXl+bTX1NQYv/71r42IiAjDZrMZDzzwgHHs2DGfmv/93/81Ro4cabRp08aw2+3GmDFjjHPnzt2Yg/dDQ+bJMAzj3XffNe69917DZrMZt99+uzF37tw6fX2d56k+VwtLt/o59UVXe/z9Vj+n3nvvPWPw4MFGu3btDJvNZnTr1s0YP3688dFHH5k1nE8NmyfDaH7nU4BhNOM7rgAAAG4wnoYDAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACwQFgCAACw8P8AiF7xM8gGAN8AAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "%%time\n", + "plt.hist(mongo_abcd.property(\"energy\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 2.09 ms, sys: 0 ns, total: 2.09 ms\n", + "Wall time: 5.36 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "169536" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "query = 'n_atoms: [200 TO 300]'\n", + "os_abcd.count(query)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 2.8 ms, sys: 0 ns, total: 2.8 ms\n", + "Wall time: 396 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "169536" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "mongo_query = {\n", + " 'n_atoms': {'$gte': 200, '$lte': 300},\n", + "}\n", + "mongo_abcd.count(mongo_query)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 3.08 ms, sys: 0 ns, total: 3.08 ms\n", + "Wall time: 6.34 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "60672" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "os_query = 'formula: C48H28O32Zr6 AND username: ubuntu'\n", + "os_abcd.count(os_query)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 816 µs, sys: 3.33 ms, total: 4.14 ms\n", + "Wall time: 409 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "60672" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "mongo_query = {\n", + " \"formula\": \"C48H28O32Zr6\",\n", + " \"username\": \"ubuntu\",\n", + "\n", + "}\n", + "mongo_abcd.count(mongo_query)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 4.41 ms, sys: 69 µs, total: 4.48 ms\n", + "Wall time: 7.81 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "394560" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "os_query = 'pbc: true'\n", + "os_abcd.count(os_query)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 5.05 ms, sys: 246 µs, total: 5.3 ms\n", + "Wall time: 425 ms\n" + ] + }, + { + "data": { + "text/plain": [ + "394560" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%time\n", + "mongo_query = {\n", + " 'pbc': True,\n", + "}\n", + "mongo_abcd.count(mongo_query)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "152\n", + "114\n", + "CPU times: user 133 ms, sys: 7.48 ms, total: 140 ms\n", + "Wall time: 172 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "os_query = 'modified: [* TO 2023-09-06T12:30:32.0000001]'\n", + "print(os_abcd.count(os_query))\n", + "print(next(os_abcd.get_items(os_query))[\"n_atoms\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import datetime" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "152\n", + "114\n", + "CPU times: user 17.8 ms, sys: 3.97 ms, total: 21.8 ms\n", + "Wall time: 688 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "mongo_query = {\n", + " 'modified': {'$lt': datetime.fromisoformat('2023-09-04T11:19:24.310')}\n", + "}\n", + "print(mongo_abcd.count(mongo_query))\n", + "print(next(mongo_abcd.get_items(mongo_query))[\"n_atoms\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "109\n", + "2350.374085846946\n", + "CPU times: user 141 ms, sys: 22.5 ms, total: 164 ms\n", + "Wall time: 194 ms\n" + ] + } + ], + "source": [ + "%%time\n", + "os_query = 'author: Jan* AND Metal: Ag AND Space_group: Pbca'\n", + "print(os_abcd.count(os_query))\n", + "print(next(os_abcd.get_atoms(os_query)).get_volume())" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "109\n", + "2439.0408078794635\n", + "CPU times: user 32.7 ms, sys: 7.99 ms, total: 40.7 ms\n", + "Wall time: 1.25 s\n" + ] + } + ], + "source": [ + "%%time\n", + "mongo_query = {\n", + " \"author\": {'$regex': 'Jan*'},\n", + " \"Metal\": \"Ag\",\n", + " \"Space_group\": \"Pbca\"\n", + "}\n", + "print(mongo_abcd.count(mongo_query))\n", + "print(next(mongo_abcd.get_atoms(mongo_query)).get_volume())" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv_9", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tutorials/abcd_opensearch_properties.ipynb b/tutorials/abcd_opensearch_properties.ipynb new file mode 100644 index 00000000..1862549d --- /dev/null +++ b/tutorials/abcd_opensearch_properties.ipynb @@ -0,0 +1,640 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "pycharm": {} + }, + "source": [ + "# Usage of ABCD database with extra information" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "pycharm": { + "is_executing": false + } + }, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "pycharm": { + "is_executing": false + }, + "scrolled": true + }, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "from abcd import ABCD\n", + "from abcd.backends.atoms_properties import Properties" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "pycharm": {} + }, + "source": [ + "First of all, we need to define the url of the database. It could be local or remote:\n", + "\n", + "- direct access: url = 'opensearch://admin:admin@localhost:9200'\n", + "- api access: url = 'http://localhost/api'\n", + "\n", + "using with statement to catch the riased exceptions. You may can ignore them but in that case need to handle all the unexpected events. (cannot connect to db, lost connection, wrong filter, wrong url, etc. )" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "pycharm": { + "is_executing": false + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "OpenSearchDatabase(url=localhost:9200, index=atoms) \n" + ] + } + ], + "source": [ + "url = 'opensearch://admin:admin@localhost:9200'\n", + "abcd = ABCD.from_url(url)\n", + "\n", + "print(abcd)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================ ABCD OpenSearch =================\n", + " type: opensearch\n", + " host: localhost\n", + " port: 9200\n", + " db: abcd\n", + " index: atoms\n", + "number of confs: 0\n", + " type: opensearch\n" + ] + } + ], + "source": [ + "abcd.print_info()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "pycharm": {} + }, + "source": [ + "## Cleanup \n", + "\n", + "WARNING!! Remove all elements from the database.\n", + "Only supported in the case of local access" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "abcd.destroy()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "abcd.create()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================ ABCD OpenSearch =================\n", + " type: opensearch\n", + " host: localhost\n", + " port: 9200\n", + " db: abcd\n", + " index: atoms\n", + "number of confs: 0\n", + " type: opensearch\n" + ] + } + ], + "source": [ + "abcd.print_info()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "pycharm": {} + }, + "source": [ + "## Uploading configurations" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/ubuntu/abcd/tutorials\n" + ] + } + ], + "source": [ + "from ase.io import iread, read\n", + "!pwd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Data can be entered into the database as ASE Atoms objects, allowing any format readable by ase.io.read to be used." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "directory = Path('/home/ubuntu/data/')\n", + "file = directory / 'input.data.2055.xyz'" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 15 s, sys: 181 ms, total: 15.2 s\n", + "Wall time: 25 s\n" + ] + } + ], + "source": [ + "%%time\n", + "with abcd as db:\n", + " for atoms in iread(file.as_posix(), index=slice(None)):\n", + " db.push(atoms, store_calc=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Extra information can be added manually via a dictionary, or read in through a csv/Excel file. A template for the structures corresponding to each row in the data file, and units in the form of `field (unit)` or `field / unit`, can also be inferred." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "pycharm": { + "is_executing": false, + "metadata": false, + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "directory = Path('/home/ubuntu/data/')\n", + "data_file = directory / 'DATA_copy.csv'\n", + "struct_file_template = str(directory) + \"/{struct_name}_FSR-out.cif\"" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "pycharm": { + "is_executing": false + }, + "scrolled": false + }, + "outputs": [], + "source": [ + "properties = Properties(\n", + " data_file=data_file,\n", + " store_struct_file=True,\n", + " struct_file_template=struct_file_template,\n", + " struct_name_label = \"MOF_name\",\n", + " infer_units=True,\n", + " # units={\"Density\": \"g/cm3\"}\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Using the inferred structure file, this data can then be uploaded together. The `extra_info`, and properties in general, do not need to match that of existing documents stored." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "for i, data in enumerate(properties.to_list()):\n", + " if data['MOF_name'] == \"EWIKAX03\":\n", + " atoms = read(properties.struct_files[i])\n", + " with abcd as db:\n", + " db.push(\n", + " atoms,\n", + " store_calc=False,\n", + " extra_info=data,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'Dos at Fermi energy': 'eln/cell',\n", + " 'Dos at VBM': 'eln/cell',\n", + " 'Dos at CBM': 'eln/cell',\n", + " 'Density': 'g/cm3',\n", + " 'Accessible Surface Area': 'm2/g'}" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[\"units\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================ ABCD OpenSearch =================\n", + " type: opensearch\n", + " host: localhost\n", + " port: 9200\n", + " db: abcd\n", + " index: atoms\n", + "number of confs: 2056\n", + " type: opensearch\n" + ] + } + ], + "source": [ + "abcd.print_info()" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'spacegroup': Spacegroup(1, setting=1), 'unit_cell': 'conventional', 'occupancy': {'0': {'H': 1.0}, '1': {'H': 1.0}, '2': {'H': 1.0}, '3': {'H': 1.0}, '4': {'H': 1.0}, '5': {'H': 1.0}, '6': {'H': 1.0}, '7': {'H': 1.0}, '8': {'H': 1.0}, '9': {'H': 1.0}, '10': {'H': 1.0}, '11': {'H': 1.0}, '12': {'H': 1.0}, '13': {'H': 1.0}, '14': {'H': 1.0}, '15': {'H': 1.0}, '16': {'H': 1.0}, '17': {'H': 1.0}, '18': {'H': 1.0}, '19': {'H': 1.0}, '20': {'H': 1.0}, '21': {'H': 1.0}, '22': {'H': 1.0}, '23': {'H': 1.0}, '24': {'H': 1.0}, '25': {'H': 1.0}, '26': {'H': 1.0}, '27': {'H': 1.0}, '28': {'H': 1.0}, '29': {'H': 1.0}, '30': {'H': 1.0}, '31': {'H': 1.0}, '32': {'H': 1.0}, '33': {'H': 1.0}, '34': {'H': 1.0}, '35': {'H': 1.0}, '36': {'H': 1.0}, '37': {'H': 1.0}, '38': {'H': 1.0}, '39': {'H': 1.0}, '40': {'H': 1.0}, '41': {'H': 1.0}, '42': {'H': 1.0}, '43': {'H': 1.0}, '44': {'H': 1.0}, '45': {'H': 1.0}, '46': {'H': 1.0}, '47': {'H': 1.0}, '48': {'H': 1.0}, '49': {'H': 1.0}, '50': {'H': 1.0}, '51': {'H': 1.0}, '52': {'H': 1.0}, '53': {'H': 1.0}, '54': {'H': 1.0}, '55': {'H': 1.0}, '56': {'H': 1.0}, '57': {'H': 1.0}, '58': {'H': 1.0}, '59': {'H': 1.0}, '60': {'H': 1.0}, '61': {'H': 1.0}, '62': {'H': 1.0}, '63': {'H': 1.0}, '64': {'C': 1.0}, '65': {'C': 1.0}, '66': {'C': 1.0}, '67': {'C': 1.0}, '68': {'C': 1.0}, '69': {'C': 1.0}, '70': {'C': 1.0}, '71': {'C': 1.0}, '72': {'C': 1.0}, '73': {'C': 1.0}, '74': {'C': 1.0}, '75': {'C': 1.0}, '76': {'C': 1.0}, '77': {'C': 1.0}, '78': {'C': 1.0}, '79': {'C': 1.0}, '80': {'C': 1.0}, '81': {'C': 1.0}, '82': {'C': 1.0}, '83': {'C': 1.0}, '84': {'C': 1.0}, '85': {'C': 1.0}, '86': {'C': 1.0}, '87': {'C': 1.0}, '88': {'C': 1.0}, '89': {'C': 1.0}, '90': {'C': 1.0}, '91': {'C': 1.0}, '92': {'C': 1.0}, '93': {'C': 1.0}, '94': {'C': 1.0}, '95': {'C': 1.0}, '96': {'C': 1.0}, '97': {'C': 1.0}, '98': {'C': 1.0}, '99': {'C': 1.0}, '100': {'C': 1.0}, '101': {'C': 1.0}, '102': {'C': 1.0}, '103': {'C': 1.0}, '104': {'C': 1.0}, '105': {'C': 1.0}, '106': {'C': 1.0}, '107': {'C': 1.0}, '108': {'C': 1.0}, '109': {'C': 1.0}, '110': {'C': 1.0}, '111': {'C': 1.0}, '112': {'N': 1.0}, '113': {'N': 1.0}, '114': {'N': 1.0}, '115': {'N': 1.0}, '116': {'N': 1.0}, '117': {'N': 1.0}, '118': {'N': 1.0}, '119': {'N': 1.0}, '120': {'N': 1.0}, '121': {'N': 1.0}, '122': {'N': 1.0}, '123': {'N': 1.0}, '124': {'N': 1.0}, '125': {'N': 1.0}, '126': {'N': 1.0}, '127': {'N': 1.0}, '128': {'N': 1.0}, '129': {'N': 1.0}, '130': {'N': 1.0}, '131': {'N': 1.0}, '132': {'N': 1.0}, '133': {'N': 1.0}, '134': {'N': 1.0}, '135': {'N': 1.0}, '136': {'Fe': 1.0}, '137': {'Fe': 1.0}, '138': {'Fe': 1.0}, '139': {'Fe': 1.0}}}\n" + ] + } + ], + "source": [ + "print(atoms.info)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "pycharm": {} + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'info': ['1aromatico-up', '2D', '2aromatici-up', '5-m-rings', '5m-ring-leg2met', '6m-rings', 'Accessible Surface Area', 'Band_gap', 'CN-M', 'COOM', 'Cell volume', 'Crit: metal', 'Crit: pi-pi stacking', 'Crit: redox active linker', 'Crit: redox match', 'Criteria#', 'Density', 'Dos at CBM', 'Dos at Fermi energy', 'Dos at VBM', 'HSE band gap', 'LCD', 'M-C-C-TRIANG', 'M-H2O-M', 'M-N-NM-N-M', 'M-h2o', 'MOF_name', 'Metal', 'Metal density', 'Metals number', 'Multiplier_Sum', 'N3--NCN up', 'PLD', 'Space_group', 'Space_group#', 'Temp', 'Volume Fraction', 'Year', 'Zprime', 'benzene', 'cell', 'energy', 'formula', 'metal-N', 'metal-O', 'metal-S', 'metal-halogen', 'n_atoms', 'occupancy', 'pbc', 'pyridine', 'pyrimidine', 'spacegroup', 'unit_cell', 'units', 'volume', 'without ions'], 'derived': ['elements', 'hash', 'hash_structure', 'modified', 'uploaded', 'username', 'volume'], 'arrays': ['forces', 'numbers', 'positions', 'spacegroup_kinds']}\n" + ] + } + ], + "source": [ + "print(abcd.properties())" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{29.868799209594727: 1}\n" + ] + } + ], + "source": [ + "# print(abcd.property(\"6m-rings\"))\n", + "print(abcd.count_property(\"Dos at Fermi energy\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n", + "{'_id': 'J4TzUYoBQvF7oZdWKd8C', 'n_atoms': 140, 'cell': [[11.7598, 0.0, 0.0], [0.0, 11.9363, 0.0], [0.0, 0.0, 13.9234]], 'pbc': [True, True, True], 'formula': 'C48H64Fe4N24', 'numbers': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 26, 26, 26, 26], 'positions': [[1.1533188654, 6.568474272199999, 0.33047189900000007], [0.8348164422, 5.6740515043, 1.8464934612000004], [1.6283559864000001, 7.2832437888, 1.9067817832000002], [6.386829698600001, 4.7772056675, 0.5637027724000001], [5.3133598752, 5.427829507899999, 1.8410076416000005], [6.8133223651999995, 4.549222337500001, 2.290538534], [8.8241070476, 6.8505767864, 3.7879027402000007], [8.8195795246, 6.854754491399999, 6.227811509400001], [4.7265811346, 11.3359757278, 0.33047189900000007], [5.0450835578, 0.2940984957000009, 1.8464934612000004], [4.2515440136, 10.621206211199999, 1.9067817832000002], [11.252870301399998, 1.1909443325, 0.5637027724000001], [0.5665401247999999, 0.5403204921, 1.8410076416000005], [10.8263776348, 1.4189276624999998, 2.290538534], [8.815592952400001, 11.0538732136, 3.7879027402000007], [8.8201204754, 11.0496955086, 6.227811509400001], [1.1533188654, 11.3359757278, 7.2921718989999995], [0.8348164422, 0.2940984957000009, 8.8081934612], [1.6283559864000001, 10.621206211199999, 8.8684817832], [6.386829698600001, 1.1909443325, 7.5254027724000006], [5.3133598752, 0.5403204921, 8.802707641600001], [6.8133223651999995, 1.4189276624999998, 9.252238534000004], [8.8241070476, 11.0538732136, 10.7496027402], [8.8195795246, 11.0496955086, 13.189511509400003], [4.7265811346, 6.568474272199999, 7.2921718989999995], [5.0450835578, 5.6740515043, 8.8081934612], [4.2515440136, 7.2832437888, 8.8684817832], [11.252870301399998, 4.7772056675, 7.5254027724000006], [0.5665401247999999, 5.427829507899999, 8.802707641600001], [10.8263776348, 4.549222337500001, 9.252238534000004], [8.815592952400001, 6.8505767864, 10.7496027402], [8.8201204754, 6.854754491399999, 13.189511509400003], [10.606481134600001, 0.6003242722, 6.631228101000001], [10.924983557800001, 11.642213440599999, 5.1152065388], [10.1314440136, 1.3150937887999998, 5.0549182168], [5.3729703014, 10.745355667500002, 6.397997227600001], [6.4464401248000005, 11.3959795079, 5.120692358400001], [4.9464776348, 10.5173723375, 4.671161466], [2.9356929524, 0.8824267863999998, 3.1737972598], [2.9402204754, 0.8866044913999999, 0.7338884906000002], [7.033218865399999, 5.3678257278, 6.631228101000001], [6.714716442199999, 6.2622365594, 5.1152065388], [7.508255986400001, 4.6530562112, 5.0549182168], [0.5069296986, 7.159094332499999, 6.397997227600001], [11.1932598752, 6.508470492099999, 5.120692358400001], [0.9334223652, 7.3870776624999985, 4.671161466], [2.9442070476, 5.0857232136, 3.1737972598], [2.9396795246000003, 5.0815455086, 0.7338884906000002], [10.606481134600001, 5.3678257278, 13.592928101000004], [10.924983557800001, 6.2622365594, 12.076906538800003], [10.1314440136, 4.6530562112, 12.016618216800003], [5.3729703014, 7.159094332499999, 13.3596972276], [6.4464401248000005, 6.508470492099999, 12.0823923584], [4.9464776348, 7.3870776624999985, 11.632861466], [2.9356929524, 5.0857232136, 10.135497259800003], [2.9402204754, 5.0815455086, 7.6955884906000005], [7.033218865399999, 0.6003242722, 13.592928101000004], [6.714716442199999, 11.642213440599999, 12.076906538800003], [7.508255986400001, 1.3150937887999998, 12.016618216800003], [0.5069296986, 10.745355667500002, 13.3596972276], [11.1932598752, 11.3959795079, 12.0823923584], [0.9334223652, 10.5173723375, 11.632861466], [2.9442070476, 0.8824267863999998, 10.135497259800003], [2.9396795246000003, 0.8866044913999999, 7.6955884906000005], [11.3421624628, 7.352343029499999, 1.4700882656000003], [0.8413313713999999, 6.690045487699999, 1.3900565624000003], [7.1397744132, 6.4757292212, 1.541181146], [6.3813143524, 5.266367177799999, 1.5607156762000003], [8.816380858999999, 7.8135974704, 4.3231043128], [8.8125824436, 7.814814973, 5.689045546400001], [6.297537537200001, 10.552106970499999, 1.4700882656000003], [5.0385686286, 11.2144045123, 1.3900565624000003], [10.499925586800002, 11.428720778799999, 1.541181146], [11.2583856476, 0.7017828222, 1.5607156762000003], [8.823319141, 10.0908525296, 4.3231043128], [8.827117556400001, 10.089635027, 5.689045546400001], [11.3421624628, 10.552106970499999, 8.4317882656], [0.8413313713999999, 11.2144045123, 8.3517565624], [7.1397744132, 11.428720778799999, 8.502881146], [6.3813143524, 0.7017828222, 8.522415676200001], [8.816380858999999, 10.0908525296, 11.2848043128], [8.8125824436, 10.089635027, 12.6507455464], [6.297537537200001, 7.352343029499999, 8.4317882656], [5.0385686286, 6.690045487699999, 8.3517565624], [10.499925586800002, 6.4757292212, 8.502881146], [11.2583856476, 5.266367177799999, 8.522415676200001], [8.823319141, 7.8135974704, 11.2848043128], [8.827117556400001, 7.814814973, 12.6507455464], [0.41763753719999996, 1.3841930295, 5.491611734400001], [10.9184686286, 0.7218954876999999, 5.5716434376000015], [4.6200255868, 0.5075792212000001, 5.420518854], [5.378485647599999, 11.234517177799999, 5.400984323800001], [2.943419141, 1.8454474704, 2.6385956872000005], [2.9472175564, 1.8466649729999998, 1.2726544536000002], [5.4622624628, 4.583956970500001, 5.491611734400001], [6.7212313714, 5.246254512299999, 5.5716434376000015], [1.2598744132, 5.460570778799999, 5.420518854], [0.5014143524, 6.6699328222, 5.400984323800001], [2.936480859, 4.1227025296, 2.6385956872000005], [2.9326824436, 4.121485026999999, 1.2726544536000002], [0.41763753719999996, 4.583956970500001, 12.453311734400001], [10.9184686286, 5.246254512299999, 12.533343437600001], [4.6200255868, 5.460570778799999, 12.382218854000003], [5.378485647599999, 6.6699328222, 12.3626843238], [2.943419141, 4.1227025296, 9.600295687200003], [2.9472175564, 4.121485026999999, 8.234354453600004], [5.4622624628, 1.3841930295, 12.453311734400001], [6.7212313714, 0.7218954876999999, 12.533343437600001], [1.2598744132, 0.5075792212000001, 12.382218854000003], [0.5014143524, 11.234517177799999, 12.3626843238], [2.936480859, 1.8454474704, 9.600295687200003], [2.9326824436, 1.8466649729999998, 8.234354453600004], [10.3377932242, 7.937329156199999, 1.5133204226], [7.7886801772, 7.4419965787999995, 1.5317967744], [7.301906775799999, 9.9671208438, 1.5133204226], [9.851019822800001, 10.462453421200001, 1.5317967744], [10.3377932242, 9.9671208438, 8.475020422600002], [7.7886801772, 10.462453421200001, 8.4934967744], [7.301906775799999, 7.937329156199999, 8.475020422600002], [9.851019822800001, 7.4419965787999995, 8.4934967744], [8.81985, 8.952224999999999, 3.551246710400001], [8.81985, 8.952224999999999, 6.462337259000002], [8.81985, 8.952224999999999, 10.5129467104], [8.81985, 8.952224999999999, 13.424037259000002], [1.4220067758000001, 1.9691791562, 5.448379577400002], [3.9711198228, 1.4738465788, 5.429903225600001], [4.4578932242, 3.9989708437999996, 5.448379577400002], [1.9087801772000001, 4.4943034212, 5.429903225600001], [1.4220067758000001, 3.9989708437999996, 12.4100795774], [3.9711198228, 4.4943034212, 12.391603225600003], [4.4578932242, 1.9691791562, 12.4100795774], [1.9087801772000001, 1.4738465788, 12.391603225600003], [2.93995, 2.984075, 3.4104532896], [2.93995, 2.984075, 0.49936274100000005], [2.93995, 2.984075, 10.372153289600003], [2.93995, 2.984075, 7.461062741000001], [8.81985, 8.952224999999999, 1.5279956862], [8.81985, 8.952224999999999, 8.489695686200001], [2.93995, 2.984075, 5.433704313800002], [2.93995, 2.984075, 12.395404313800002]], 'spacegroup_kinds': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139], 'spacegroup': {'number': 1, 'setting': 1}, 'unit_cell': 'conventional', 'occupancy': {'0': {'H': 1.0}, '1': {'H': 1.0}, '2': {'H': 1.0}, '3': {'H': 1.0}, '4': {'H': 1.0}, '5': {'H': 1.0}, '6': {'H': 1.0}, '7': {'H': 1.0}, '8': {'H': 1.0}, '9': {'H': 1.0}, '10': {'H': 1.0}, '11': {'H': 1.0}, '12': {'H': 1.0}, '13': {'H': 1.0}, '14': {'H': 1.0}, '15': {'H': 1.0}, '16': {'H': 1.0}, '17': {'H': 1.0}, '18': {'H': 1.0}, '19': {'H': 1.0}, '20': {'H': 1.0}, '21': {'H': 1.0}, '22': {'H': 1.0}, '23': {'H': 1.0}, '24': {'H': 1.0}, '25': {'H': 1.0}, '26': {'H': 1.0}, '27': {'H': 1.0}, '28': {'H': 1.0}, '29': {'H': 1.0}, '30': {'H': 1.0}, '31': {'H': 1.0}, '32': {'H': 1.0}, '33': {'H': 1.0}, '34': {'H': 1.0}, '35': {'H': 1.0}, '36': {'H': 1.0}, '37': {'H': 1.0}, '38': {'H': 1.0}, '39': {'H': 1.0}, '40': {'H': 1.0}, '41': {'H': 1.0}, '42': {'H': 1.0}, '43': {'H': 1.0}, '44': {'H': 1.0}, '45': {'H': 1.0}, '46': {'H': 1.0}, '47': {'H': 1.0}, '48': {'H': 1.0}, '49': {'H': 1.0}, '50': {'H': 1.0}, '51': {'H': 1.0}, '52': {'H': 1.0}, '53': {'H': 1.0}, '54': {'H': 1.0}, '55': {'H': 1.0}, '56': {'H': 1.0}, '57': {'H': 1.0}, '58': {'H': 1.0}, '59': {'H': 1.0}, '60': {'H': 1.0}, '61': {'H': 1.0}, '62': {'H': 1.0}, '63': {'H': 1.0}, '64': {'C': 1.0}, '65': {'C': 1.0}, '66': {'C': 1.0}, '67': {'C': 1.0}, '68': {'C': 1.0}, '69': {'C': 1.0}, '70': {'C': 1.0}, '71': {'C': 1.0}, '72': {'C': 1.0}, '73': {'C': 1.0}, '74': {'C': 1.0}, '75': {'C': 1.0}, '76': {'C': 1.0}, '77': {'C': 1.0}, '78': {'C': 1.0}, '79': {'C': 1.0}, '80': {'C': 1.0}, '81': {'C': 1.0}, '82': {'C': 1.0}, '83': {'C': 1.0}, '84': {'C': 1.0}, '85': {'C': 1.0}, '86': {'C': 1.0}, '87': {'C': 1.0}, '88': {'C': 1.0}, '89': {'C': 1.0}, '90': {'C': 1.0}, '91': {'C': 1.0}, '92': {'C': 1.0}, '93': {'C': 1.0}, '94': {'C': 1.0}, '95': {'C': 1.0}, '96': {'C': 1.0}, '97': {'C': 1.0}, '98': {'C': 1.0}, '99': {'C': 1.0}, '100': {'C': 1.0}, '101': {'C': 1.0}, '102': {'C': 1.0}, '103': {'C': 1.0}, '104': {'C': 1.0}, '105': {'C': 1.0}, '106': {'C': 1.0}, '107': {'C': 1.0}, '108': {'C': 1.0}, '109': {'C': 1.0}, '110': {'C': 1.0}, '111': {'C': 1.0}, '112': {'N': 1.0}, '113': {'N': 1.0}, '114': {'N': 1.0}, '115': {'N': 1.0}, '116': {'N': 1.0}, '117': {'N': 1.0}, '118': {'N': 1.0}, '119': {'N': 1.0}, '120': {'N': 1.0}, '121': {'N': 1.0}, '122': {'N': 1.0}, '123': {'N': 1.0}, '124': {'N': 1.0}, '125': {'N': 1.0}, '126': {'N': 1.0}, '127': {'N': 1.0}, '128': {'N': 1.0}, '129': {'N': 1.0}, '130': {'N': 1.0}, '131': {'N': 1.0}, '132': {'N': 1.0}, '133': {'N': 1.0}, '134': {'N': 1.0}, '135': {'N': 1.0}, '136': {'Fe': 1.0}, '137': {'Fe': 1.0}, '138': {'Fe': 1.0}, '139': {'Fe': 1.0}}, 'MOF_name': 'EWIKAX03', 'Dos at Fermi energy': 29.8688, 'Band_gap': 0.004466204, 'Dos at VBM': 29.8688, 'Dos at CBM': 29.8688, 'HSE band gap': 'not found', 'LCD': '4.36078', 'PLD': '1.82778', 'Density': '1.696', 'Accessible Surface Area': '0', 'Volume Fraction': '0.51294', 'Criteria#': 2, 'Multiplier_Sum': '1', 'Space_group#': '14', 'Space_group': 'P21/c', 'Temp': '94', 'Zprime': '1', 'Year': '2008', 'Metal': 'Fe', 'Metals number': '1', 'Cell volume': '1954.407', 'Metal density': '0.000511664', 'Crit: metal': 'yes', 'Crit: redox match': 'yes', 'Crit: redox active linker': 'no', 'Crit: pi-pi stacking': 'no', 'without ions': 'no', '2D': 'no', '1aromatico-up': 'yes', '2aromatici-up': 'no', 'N3--NCN up': 'no', 'benzene': 'no', '6m-rings': 'yes', 'pyridine': 'no', 'pyrimidine': 'no', 'metal-S': 'no', 'metal-O': 'no', 'metal-N': 'yes', 'metal-halogen': 'no', '5m-ring-leg2met': 'no', '5-m-rings': 'no', 'CN-M': 'yes', 'M-h2o': 'no', 'M-H2O-M': 'no', 'M-N-NM-N-M': 'no', 'M-C-C-TRIANG': 'no', 'COOM': 'no', 'units': {'Dos at Fermi energy': 'eln/cell', 'Dos at VBM': 'eln/cell', 'Dos at CBM': 'eln/cell', 'Density': 'g/cm3', 'Accessible Surface Area': 'm2/g'}, 'volume': 1954.406783203316, 'elements': {'1': 64, '6': 48, '7': 24, '26': 4}, 'username': 'ubuntu', 'uploaded': '2023-09-01T18:13:24.859685', 'modified': '2023-09-01T18:13:24.859694', 'hash_structure': '660d3f56483cf3a6dd94d833d4478fcf', 'hash': '2c1caa8af0562303fc8cfe0eba64b444', 'derived': {'arrays_keys': ['numbers', 'spacegroup_kinds', 'positions'], 'info_keys': ['pbc', 'formula', 'occupancy', 'cell', 'unit_cell', 'spacegroup', 'n_atoms', 'MOF_name', 'Dos at Fermi energy', 'Band_gap', 'Dos at VBM', 'Dos at CBM', 'HSE band gap', 'LCD', 'PLD', 'Density', 'Accessible Surface Area', 'Volume Fraction', 'Criteria#', 'Multiplier_Sum', 'Space_group#', 'Space_group', 'Temp', 'Zprime', 'Year', 'Metal', 'Metals number', 'Cell volume', 'Metal density', 'Crit: metal', 'Crit: redox match', 'Crit: redox active linker', 'Crit: pi-pi stacking', 'without ions', '2D', '1aromatico-up', '2aromatici-up', 'N3--NCN up', 'benzene', '6m-rings', 'pyridine', 'pyrimidine', 'metal-S', 'metal-O', 'metal-N', 'metal-halogen', '5m-ring-leg2met', '5-m-rings', 'CN-M', 'M-h2o', 'M-H2O-M', 'M-N-NM-N-M', 'M-C-C-TRIANG', 'COOM', 'units', 'volume'], 'results_keys': [], 'derived_keys': ['elements', 'username', 'uploaded', 'modified', 'volume', 'hash_structure', 'hash']}}\n" + ] + } + ], + "source": [ + "query = 'n_atoms: 140'\n", + "print(len(list(abcd.get_items(query))))\n", + "print(list(abcd.get_items(query))[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n" + ] + } + ], + "source": [ + "query = 'Accessible Surface*'\n", + "print(len(list(abcd.get_items(query))))" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n" + ] + } + ], + "source": [ + "query = 'Year: [2006 TO 2009]'\n", + "print(len(list(abcd.get_items(query))))" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2056\n" + ] + } + ], + "source": [ + "query = '*ubuntu'\n", + "print(len(list(abcd.get_items(query))))" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "316\n" + ] + } + ], + "source": [ + "query = 'username:[ubunta TO ubuntx] AND formula:?48H28O32Zr6'\n", + "print(len(list(abcd.get_items(query))))" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n", + "{'_id': 'J4TzUYoBQvF7oZdWKd8C', 'n_atoms': 140, 'cell': [[11.7598, 0.0, 0.0], [0.0, 11.9363, 0.0], [0.0, 0.0, 13.9234]], 'pbc': [True, True, True], 'formula': 'C48H64Fe4N24', 'numbers': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 26, 26, 26, 26], 'positions': [[1.1533188654, 6.568474272199999, 0.33047189900000007], [0.8348164422, 5.6740515043, 1.8464934612000004], [1.6283559864000001, 7.2832437888, 1.9067817832000002], [6.386829698600001, 4.7772056675, 0.5637027724000001], [5.3133598752, 5.427829507899999, 1.8410076416000005], [6.8133223651999995, 4.549222337500001, 2.290538534], [8.8241070476, 6.8505767864, 3.7879027402000007], [8.8195795246, 6.854754491399999, 6.227811509400001], [4.7265811346, 11.3359757278, 0.33047189900000007], [5.0450835578, 0.2940984957000009, 1.8464934612000004], [4.2515440136, 10.621206211199999, 1.9067817832000002], [11.252870301399998, 1.1909443325, 0.5637027724000001], [0.5665401247999999, 0.5403204921, 1.8410076416000005], [10.8263776348, 1.4189276624999998, 2.290538534], [8.815592952400001, 11.0538732136, 3.7879027402000007], [8.8201204754, 11.0496955086, 6.227811509400001], [1.1533188654, 11.3359757278, 7.2921718989999995], [0.8348164422, 0.2940984957000009, 8.8081934612], [1.6283559864000001, 10.621206211199999, 8.8684817832], [6.386829698600001, 1.1909443325, 7.5254027724000006], [5.3133598752, 0.5403204921, 8.802707641600001], [6.8133223651999995, 1.4189276624999998, 9.252238534000004], [8.8241070476, 11.0538732136, 10.7496027402], [8.8195795246, 11.0496955086, 13.189511509400003], [4.7265811346, 6.568474272199999, 7.2921718989999995], [5.0450835578, 5.6740515043, 8.8081934612], [4.2515440136, 7.2832437888, 8.8684817832], [11.252870301399998, 4.7772056675, 7.5254027724000006], [0.5665401247999999, 5.427829507899999, 8.802707641600001], [10.8263776348, 4.549222337500001, 9.252238534000004], [8.815592952400001, 6.8505767864, 10.7496027402], [8.8201204754, 6.854754491399999, 13.189511509400003], [10.606481134600001, 0.6003242722, 6.631228101000001], [10.924983557800001, 11.642213440599999, 5.1152065388], [10.1314440136, 1.3150937887999998, 5.0549182168], [5.3729703014, 10.745355667500002, 6.397997227600001], [6.4464401248000005, 11.3959795079, 5.120692358400001], [4.9464776348, 10.5173723375, 4.671161466], [2.9356929524, 0.8824267863999998, 3.1737972598], [2.9402204754, 0.8866044913999999, 0.7338884906000002], [7.033218865399999, 5.3678257278, 6.631228101000001], [6.714716442199999, 6.2622365594, 5.1152065388], [7.508255986400001, 4.6530562112, 5.0549182168], [0.5069296986, 7.159094332499999, 6.397997227600001], [11.1932598752, 6.508470492099999, 5.120692358400001], [0.9334223652, 7.3870776624999985, 4.671161466], [2.9442070476, 5.0857232136, 3.1737972598], [2.9396795246000003, 5.0815455086, 0.7338884906000002], [10.606481134600001, 5.3678257278, 13.592928101000004], [10.924983557800001, 6.2622365594, 12.076906538800003], [10.1314440136, 4.6530562112, 12.016618216800003], [5.3729703014, 7.159094332499999, 13.3596972276], [6.4464401248000005, 6.508470492099999, 12.0823923584], [4.9464776348, 7.3870776624999985, 11.632861466], [2.9356929524, 5.0857232136, 10.135497259800003], [2.9402204754, 5.0815455086, 7.6955884906000005], [7.033218865399999, 0.6003242722, 13.592928101000004], [6.714716442199999, 11.642213440599999, 12.076906538800003], [7.508255986400001, 1.3150937887999998, 12.016618216800003], [0.5069296986, 10.745355667500002, 13.3596972276], [11.1932598752, 11.3959795079, 12.0823923584], [0.9334223652, 10.5173723375, 11.632861466], [2.9442070476, 0.8824267863999998, 10.135497259800003], [2.9396795246000003, 0.8866044913999999, 7.6955884906000005], [11.3421624628, 7.352343029499999, 1.4700882656000003], [0.8413313713999999, 6.690045487699999, 1.3900565624000003], [7.1397744132, 6.4757292212, 1.541181146], [6.3813143524, 5.266367177799999, 1.5607156762000003], [8.816380858999999, 7.8135974704, 4.3231043128], [8.8125824436, 7.814814973, 5.689045546400001], [6.297537537200001, 10.552106970499999, 1.4700882656000003], [5.0385686286, 11.2144045123, 1.3900565624000003], [10.499925586800002, 11.428720778799999, 1.541181146], [11.2583856476, 0.7017828222, 1.5607156762000003], [8.823319141, 10.0908525296, 4.3231043128], [8.827117556400001, 10.089635027, 5.689045546400001], [11.3421624628, 10.552106970499999, 8.4317882656], [0.8413313713999999, 11.2144045123, 8.3517565624], [7.1397744132, 11.428720778799999, 8.502881146], [6.3813143524, 0.7017828222, 8.522415676200001], [8.816380858999999, 10.0908525296, 11.2848043128], [8.8125824436, 10.089635027, 12.6507455464], [6.297537537200001, 7.352343029499999, 8.4317882656], [5.0385686286, 6.690045487699999, 8.3517565624], [10.499925586800002, 6.4757292212, 8.502881146], [11.2583856476, 5.266367177799999, 8.522415676200001], [8.823319141, 7.8135974704, 11.2848043128], [8.827117556400001, 7.814814973, 12.6507455464], [0.41763753719999996, 1.3841930295, 5.491611734400001], [10.9184686286, 0.7218954876999999, 5.5716434376000015], [4.6200255868, 0.5075792212000001, 5.420518854], [5.378485647599999, 11.234517177799999, 5.400984323800001], [2.943419141, 1.8454474704, 2.6385956872000005], [2.9472175564, 1.8466649729999998, 1.2726544536000002], [5.4622624628, 4.583956970500001, 5.491611734400001], [6.7212313714, 5.246254512299999, 5.5716434376000015], [1.2598744132, 5.460570778799999, 5.420518854], [0.5014143524, 6.6699328222, 5.400984323800001], [2.936480859, 4.1227025296, 2.6385956872000005], [2.9326824436, 4.121485026999999, 1.2726544536000002], [0.41763753719999996, 4.583956970500001, 12.453311734400001], [10.9184686286, 5.246254512299999, 12.533343437600001], [4.6200255868, 5.460570778799999, 12.382218854000003], [5.378485647599999, 6.6699328222, 12.3626843238], [2.943419141, 4.1227025296, 9.600295687200003], [2.9472175564, 4.121485026999999, 8.234354453600004], [5.4622624628, 1.3841930295, 12.453311734400001], [6.7212313714, 0.7218954876999999, 12.533343437600001], [1.2598744132, 0.5075792212000001, 12.382218854000003], [0.5014143524, 11.234517177799999, 12.3626843238], [2.936480859, 1.8454474704, 9.600295687200003], [2.9326824436, 1.8466649729999998, 8.234354453600004], [10.3377932242, 7.937329156199999, 1.5133204226], [7.7886801772, 7.4419965787999995, 1.5317967744], [7.301906775799999, 9.9671208438, 1.5133204226], [9.851019822800001, 10.462453421200001, 1.5317967744], [10.3377932242, 9.9671208438, 8.475020422600002], [7.7886801772, 10.462453421200001, 8.4934967744], [7.301906775799999, 7.937329156199999, 8.475020422600002], [9.851019822800001, 7.4419965787999995, 8.4934967744], [8.81985, 8.952224999999999, 3.551246710400001], [8.81985, 8.952224999999999, 6.462337259000002], [8.81985, 8.952224999999999, 10.5129467104], [8.81985, 8.952224999999999, 13.424037259000002], [1.4220067758000001, 1.9691791562, 5.448379577400002], [3.9711198228, 1.4738465788, 5.429903225600001], [4.4578932242, 3.9989708437999996, 5.448379577400002], [1.9087801772000001, 4.4943034212, 5.429903225600001], [1.4220067758000001, 3.9989708437999996, 12.4100795774], [3.9711198228, 4.4943034212, 12.391603225600003], [4.4578932242, 1.9691791562, 12.4100795774], [1.9087801772000001, 1.4738465788, 12.391603225600003], [2.93995, 2.984075, 3.4104532896], [2.93995, 2.984075, 0.49936274100000005], [2.93995, 2.984075, 10.372153289600003], [2.93995, 2.984075, 7.461062741000001], [8.81985, 8.952224999999999, 1.5279956862], [8.81985, 8.952224999999999, 8.489695686200001], [2.93995, 2.984075, 5.433704313800002], [2.93995, 2.984075, 12.395404313800002]], 'spacegroup_kinds': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139], 'spacegroup': {'number': 1, 'setting': 1}, 'unit_cell': 'conventional', 'occupancy': {'0': {'H': 1.0}, '1': {'H': 1.0}, '2': {'H': 1.0}, '3': {'H': 1.0}, '4': {'H': 1.0}, '5': {'H': 1.0}, '6': {'H': 1.0}, '7': {'H': 1.0}, '8': {'H': 1.0}, '9': {'H': 1.0}, '10': {'H': 1.0}, '11': {'H': 1.0}, '12': {'H': 1.0}, '13': {'H': 1.0}, '14': {'H': 1.0}, '15': {'H': 1.0}, '16': {'H': 1.0}, '17': {'H': 1.0}, '18': {'H': 1.0}, '19': {'H': 1.0}, '20': {'H': 1.0}, '21': {'H': 1.0}, '22': {'H': 1.0}, '23': {'H': 1.0}, '24': {'H': 1.0}, '25': {'H': 1.0}, '26': {'H': 1.0}, '27': {'H': 1.0}, '28': {'H': 1.0}, '29': {'H': 1.0}, '30': {'H': 1.0}, '31': {'H': 1.0}, '32': {'H': 1.0}, '33': {'H': 1.0}, '34': {'H': 1.0}, '35': {'H': 1.0}, '36': {'H': 1.0}, '37': {'H': 1.0}, '38': {'H': 1.0}, '39': {'H': 1.0}, '40': {'H': 1.0}, '41': {'H': 1.0}, '42': {'H': 1.0}, '43': {'H': 1.0}, '44': {'H': 1.0}, '45': {'H': 1.0}, '46': {'H': 1.0}, '47': {'H': 1.0}, '48': {'H': 1.0}, '49': {'H': 1.0}, '50': {'H': 1.0}, '51': {'H': 1.0}, '52': {'H': 1.0}, '53': {'H': 1.0}, '54': {'H': 1.0}, '55': {'H': 1.0}, '56': {'H': 1.0}, '57': {'H': 1.0}, '58': {'H': 1.0}, '59': {'H': 1.0}, '60': {'H': 1.0}, '61': {'H': 1.0}, '62': {'H': 1.0}, '63': {'H': 1.0}, '64': {'C': 1.0}, '65': {'C': 1.0}, '66': {'C': 1.0}, '67': {'C': 1.0}, '68': {'C': 1.0}, '69': {'C': 1.0}, '70': {'C': 1.0}, '71': {'C': 1.0}, '72': {'C': 1.0}, '73': {'C': 1.0}, '74': {'C': 1.0}, '75': {'C': 1.0}, '76': {'C': 1.0}, '77': {'C': 1.0}, '78': {'C': 1.0}, '79': {'C': 1.0}, '80': {'C': 1.0}, '81': {'C': 1.0}, '82': {'C': 1.0}, '83': {'C': 1.0}, '84': {'C': 1.0}, '85': {'C': 1.0}, '86': {'C': 1.0}, '87': {'C': 1.0}, '88': {'C': 1.0}, '89': {'C': 1.0}, '90': {'C': 1.0}, '91': {'C': 1.0}, '92': {'C': 1.0}, '93': {'C': 1.0}, '94': {'C': 1.0}, '95': {'C': 1.0}, '96': {'C': 1.0}, '97': {'C': 1.0}, '98': {'C': 1.0}, '99': {'C': 1.0}, '100': {'C': 1.0}, '101': {'C': 1.0}, '102': {'C': 1.0}, '103': {'C': 1.0}, '104': {'C': 1.0}, '105': {'C': 1.0}, '106': {'C': 1.0}, '107': {'C': 1.0}, '108': {'C': 1.0}, '109': {'C': 1.0}, '110': {'C': 1.0}, '111': {'C': 1.0}, '112': {'N': 1.0}, '113': {'N': 1.0}, '114': {'N': 1.0}, '115': {'N': 1.0}, '116': {'N': 1.0}, '117': {'N': 1.0}, '118': {'N': 1.0}, '119': {'N': 1.0}, '120': {'N': 1.0}, '121': {'N': 1.0}, '122': {'N': 1.0}, '123': {'N': 1.0}, '124': {'N': 1.0}, '125': {'N': 1.0}, '126': {'N': 1.0}, '127': {'N': 1.0}, '128': {'N': 1.0}, '129': {'N': 1.0}, '130': {'N': 1.0}, '131': {'N': 1.0}, '132': {'N': 1.0}, '133': {'N': 1.0}, '134': {'N': 1.0}, '135': {'N': 1.0}, '136': {'Fe': 1.0}, '137': {'Fe': 1.0}, '138': {'Fe': 1.0}, '139': {'Fe': 1.0}}, 'MOF_name': 'EWIKAX03', 'Dos at Fermi energy': 29.8688, 'Band_gap': 0.004466204, 'Dos at VBM': 29.8688, 'Dos at CBM': 29.8688, 'HSE band gap': 'not found', 'LCD': '4.36078', 'PLD': '1.82778', 'Density': '1.696', 'Accessible Surface Area': '0', 'Volume Fraction': '0.51294', 'Criteria#': 2, 'Multiplier_Sum': '1', 'Space_group#': '14', 'Space_group': 'P21/c', 'Temp': '94', 'Zprime': '1', 'Year': '2008', 'Metal': 'Fe', 'Metals number': '1', 'Cell volume': '1954.407', 'Metal density': '0.000511664', 'Crit: metal': 'yes', 'Crit: redox match': 'yes', 'Crit: redox active linker': 'no', 'Crit: pi-pi stacking': 'no', 'without ions': 'no', '2D': 'no', '1aromatico-up': 'yes', '2aromatici-up': 'no', 'N3--NCN up': 'no', 'benzene': 'no', '6m-rings': 'yes', 'pyridine': 'no', 'pyrimidine': 'no', 'metal-S': 'no', 'metal-O': 'no', 'metal-N': 'yes', 'metal-halogen': 'no', '5m-ring-leg2met': 'no', '5-m-rings': 'no', 'CN-M': 'yes', 'M-h2o': 'no', 'M-H2O-M': 'no', 'M-N-NM-N-M': 'no', 'M-C-C-TRIANG': 'no', 'COOM': 'no', 'units': {'Dos at Fermi energy': 'eln/cell', 'Dos at VBM': 'eln/cell', 'Dos at CBM': 'eln/cell', 'Density': 'g/cm3', 'Accessible Surface Area': 'm2/g'}, 'volume': 1954.406783203316, 'elements': {'1': 64, '6': 48, '7': 24, '26': 4}, 'username': 'ubuntu', 'uploaded': '2023-09-01T18:13:24.859685', 'modified': '2023-09-01T18:13:24.859694', 'hash_structure': '660d3f56483cf3a6dd94d833d4478fcf', 'hash': '2c1caa8af0562303fc8cfe0eba64b444', 'derived': {'arrays_keys': ['numbers', 'spacegroup_kinds', 'positions'], 'info_keys': ['pbc', 'formula', 'occupancy', 'cell', 'unit_cell', 'spacegroup', 'n_atoms', 'MOF_name', 'Dos at Fermi energy', 'Band_gap', 'Dos at VBM', 'Dos at CBM', 'HSE band gap', 'LCD', 'PLD', 'Density', 'Accessible Surface Area', 'Volume Fraction', 'Criteria#', 'Multiplier_Sum', 'Space_group#', 'Space_group', 'Temp', 'Zprime', 'Year', 'Metal', 'Metals number', 'Cell volume', 'Metal density', 'Crit: metal', 'Crit: redox match', 'Crit: redox active linker', 'Crit: pi-pi stacking', 'without ions', '2D', '1aromatico-up', '2aromatici-up', 'N3--NCN up', 'benzene', '6m-rings', 'pyridine', 'pyrimidine', 'metal-S', 'metal-O', 'metal-N', 'metal-halogen', '5m-ring-leg2met', '5-m-rings', 'CN-M', 'M-h2o', 'M-H2O-M', 'M-N-NM-N-M', 'M-C-C-TRIANG', 'COOM', 'units', 'volume'], 'results_keys': [], 'derived_keys': ['elements', 'username', 'uploaded', 'modified', 'volume', 'hash_structure', 'hash']}}\n", + "dict_keys(['_id', 'n_atoms', 'cell', 'pbc', 'formula', 'numbers', 'positions', 'spacegroup_kinds', 'spacegroup', 'unit_cell', 'occupancy', 'MOF_name', 'Dos at Fermi energy', 'Band_gap', 'Dos at VBM', 'Dos at CBM', 'HSE band gap', 'LCD', 'PLD', 'Density', 'Accessible Surface Area', 'Volume Fraction', 'Criteria#', 'Multiplier_Sum', 'Space_group#', 'Space_group', 'Temp', 'Zprime', 'Year', 'Metal', 'Metals number', 'Cell volume', 'Metal density', 'Crit: metal', 'Crit: redox match', 'Crit: redox active linker', 'Crit: pi-pi stacking', 'without ions', '2D', '1aromatico-up', '2aromatici-up', 'N3--NCN up', 'benzene', '6m-rings', 'pyridine', 'pyrimidine', 'metal-S', 'metal-O', 'metal-N', 'metal-halogen', '5m-ring-leg2met', '5-m-rings', 'CN-M', 'M-h2o', 'M-H2O-M', 'M-N-NM-N-M', 'M-C-C-TRIANG', 'COOM', 'units', 'volume', 'elements', 'username', 'uploaded', 'modified', 'hash_structure', 'hash', 'derived'])\n" + ] + } + ], + "source": [ + "query = 'MOF_name: *'\n", + "print(len(list(abcd.get_items(query))))\n", + "print(list(abcd.get_items(query))[0])\n", + "print(list(abcd.get_items(query))[0].keys())" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "query = 'MOF_name: *'\n", + "abcd.add_property(\n", + " data={\"example_property\": \"example_value\"},\n", + " query=query\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['example_value']" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "abcd.refresh()\n", + "abcd.property(\"example_property\", query)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "abcd.rename_property(\n", + " name=\"example_property\",\n", + " new_name=\"renamed_property\",\n", + " query=query\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['example_value']" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "abcd.refresh()\n", + "abcd.property(\"renamed_property\", query)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "abcd.delete_property(\n", + " name=\"renamed_property\",\n", + " query=query\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "abcd.refresh()\n", + "# abcd.property(\"example_property\", query)\n", + "abcd.property(\"renamed_property\", query)" + ] + } + ], + "metadata": { + "@webio": { + "lastCommId": null, + "lastKernelId": null + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.18" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tutorials/abcd_opensearch_queries.ipynb b/tutorials/abcd_opensearch_queries.ipynb new file mode 100644 index 00000000..507289d7 --- /dev/null +++ b/tutorials/abcd_opensearch_queries.ipynb @@ -0,0 +1,1143 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "pycharm": {} + }, + "source": [ + "# Usage of ABCD database with extra information" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "pycharm": { + "is_executing": false + } + }, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "pycharm": { + "is_executing": false + }, + "scrolled": true + }, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "from abcd import ABCD\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "pycharm": {} + }, + "source": [ + "First of all, we need to define the url of the database. It could be local or remote:\n", + "\n", + "- direct access: url = 'opensearch://admin:admin@localhost:9200'\n", + "- api access: url = 'http://localhost/api'\n", + "\n", + "using with statement to catch the riased exceptions. You may can ignore them but in that case need to handle all the unexpected events. (cannot connect to db, lost connection, wrong filter, wrong url, etc. )" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "pycharm": { + "is_executing": false + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "OpenSearchDatabase(url=localhost:9200, index=atoms) \n" + ] + } + ], + "source": [ + "url = 'opensearch://admin:myStrongPassword123!@localhost:9200'\n", + "abcd = ABCD.from_url(url)\n", + "\n", + "print(abcd)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================ ABCD OpenSearch =================\n", + " type: opensearch\n", + " host: localhost\n", + " port: 9200\n", + " db: abcd\n", + " index: atoms\n", + "number of confs: 2055\n", + " type: opensearch\n" + ] + } + ], + "source": [ + "abcd.print_info()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "pycharm": {} + }, + "source": [ + "## Cleanup \n", + "\n", + "WARNING!! Remove all elements from the database.\n", + "Only supported in the case of local access" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "abcd.destroy()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "abcd.create()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "================ ABCD OpenSearch =================\n", + " type: opensearch\n", + " host: localhost\n", + " port: 9200\n", + " db: abcd\n", + " index: atoms\n", + "number of confs: 0\n", + " type: opensearch\n" + ] + } + ], + "source": [ + "abcd.print_info()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": { + "pycharm": {} + }, + "source": [ + "## Uploading configurations" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/home/ubuntu/abcd/tutorials\n" + ] + } + ], + "source": [ + "from ase.io import iread\n", + "!pwd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Data can be entered into the database as ASE Atoms objects, allowing any format readable by ase.io.read to be used." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "directory = Path('/home/ubuntu/data/')\n", + "file = directory / 'input.data.2055.xyz'" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU times: user 12.6 s, sys: 152 ms, total: 12.7 s\n", + "Wall time: 18.8 s\n" + ] + } + ], + "source": [ + "%%time\n", + "with abcd as db:\n", + " for atoms in iread(file.as_posix(), index=slice(None)):\n", + " db.push(atoms, store_calc=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "abcd.refresh()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Example queries" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Text queries" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2055\n", + "316\n" + ] + } + ], + "source": [ + "# Explicit queries via dictionaries\n", + "\n", + "query = {\"match_all\": {}}\n", + "print(abcd.count(query))\n", + "query = {\n", + " \"match\": {\n", + " \"n_atoms\": 114\n", + " }\n", + "}\n", + "print(abcd.count(query))" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2055\n" + ] + } + ], + "source": [ + "# Basic text\n", + "\n", + "query = 'ubuntu'\n", + "print(abcd.count(query))" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2055\n", + "0\n" + ] + } + ], + "source": [ + "# Query specific fields\n", + "\n", + "query = 'username:ubuntu'\n", + "print(abcd.count(query))\n", + "\n", + "query = 'formula:ubuntu'\n", + "print(abcd.count(query))" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2055\n" + ] + } + ], + "source": [ + "# Range\n", + "\n", + "query = 'username:[ubunta TO ubuntx]'\n", + "print(abcd.count(query))" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2055\n", + "2055\n" + ] + } + ], + "source": [ + "# Wildcards\n", + "\n", + "query = 'ubu?tu'\n", + "print(abcd.count(query))\n", + "\n", + "query = 'username: *'\n", + "print(abcd.count(query))" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "316\n", + "2055\n" + ] + } + ], + "source": [ + "# Logical combinations\n", + "\n", + "query = 'username:[ubunta TO ubuntx] AND formula: C48H28O32Zr6'\n", + "print(abcd.count(query))\n", + "\n", + "query = 'username:[ubunta TO ubuntx] OR formula: C48H28O32Zr6'\n", + "print(abcd.count(query))" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2055\n", + "316\n", + "C48H28O32Zr6\n" + ] + } + ], + "source": [ + "# Regex - wrap with `/`\n", + "\n", + "query = '/u.untu/'\n", + "print(abcd.count(query))\n", + "\n", + "# Search for (something like) C48H28O32Zr6\n", + "# Note: anchored by default, so cannot use ^ and $\n", + "query = 'formula: /C.\\d[G-I]28O32Z\\w[^7]/'\n", + "print(abcd.count(query))\n", + "\n", + "for prop in abcd.property(\"formula\", query):\n", + " print(prop)\n", + " break" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Numerical queries" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# All energies\n", + "\n", + "data = abcd.property('energy')\n", + "hist, bins, ax = plt.hist(data, bins=50)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# No lower bound\n", + "\n", + "query = \"energy: [* TO -30000]\"\n", + "data = abcd.property('energy', query=query)\n", + "hist, bins, ax = plt.hist(data, bins=50)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Upper and lower bound\n", + "\n", + "query = \"energy: [-50000 TO -30000]\"\n", + "data = abcd.property('energy', query=query)\n", + "hist, bins, ax = plt.hist(data, bins=50)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# No upper bound\n", + "\n", + "query = \"energy: [-50000 TO *]\"\n", + "data = abcd.property('energy', query=query)\n", + "hist, bins, ax = plt.hist(data, bins=50)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# All values of an array\n", + "\n", + "query = None\n", + "data = abcd.property('forces', query=query)\n", + "hist, bins, ax = plt.hist(data, bins=50)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1356\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Range for an array (match if any values of array lie in range)\n", + "\n", + "query = \"forces: [-5 TO -3]\"\n", + "print(abcd.count(query))\n", + "data = abcd.property('forces', query=query)\n", + "hist, bins, ax = plt.hist(data, bins=50)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Script queries" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "forces = [atoms.arrays[\"forces\"] for atoms in abcd.get_atoms()]" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(114, 3)" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "forces[0].shape" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAigAAAGzCAYAAAAFROyYAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuNSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/xnp5ZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAllUlEQVR4nO3df3DU9Z3H8dcmMT8I2UCATcpBAJEaM4DYILAtZwFzRBocOaPWHsVAKVpcLBCPmrQ0iNUG0FGEQ0NtCxRlcKjajkGgaVDolIAYyg2g5KxHmhxxEywli7kjv/Z7f3TY6ZooSdjN95PwfMzsjPv9fve77y+Q5Ol3v7txWJZlCQAAwCARdg8AAADwWQQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAj7d27VxMmTFBsbKwcDocuXLhg90gAehCBAlyDtm7dKofDoffee69Hnu/999/X448/rqqqqk5t/9e//lX33Xef4uLitGnTJm3fvl3x8fHhHRKAUaLsHgBA3/f+++9r9erVmjZtmkaOHHnF7Y8ePaqLFy/qJz/5iTIzM8M/IADjcAYFgHHq6+slSQMGDAjZPhsbG0O2LwDhR6AA6FBzc7MKCwuVkZGhxMRExcfH65//+Z/19ttvt9t2586dysjIUEJCgpxOp8aNG6fnn39e0t9fTrr33nslSdOnT5fD4ZDD4dA777zT4fNOmzZNubm5kqRbb71VDodD8+fPD6zftWuXMjIyFBcXp8GDB+vb3/62zp49G7SP+fPnq3///vroo4/0jW98QwkJCZo7d64kye/36/nnn9e4ceMUGxurIUOG6I477mj3ctfLL78ceJ6kpCTdf//9qqmp6dafJYCuI1AAdMjn8+nnP/+5pk2bprVr1+rxxx/XuXPnlJWVpePHjwe2Ky0t1be+9S0NHDhQa9eu1Zo1azRt2jT98Y9/lCTddttt+v73vy9J+uEPf6jt27dr+/btuummmzp83h/96Ed68MEHJUlPPPGEtm/froceekjS32PnvvvuU2RkpIqKirRo0SK9/vrrmjp1aruLaFtbW5WVlSWXy6VnnnlGOTk5kqSFCxdq2bJlGj58uNauXav8/HzFxsbq8OHDgcc+9dRTeuCBBzRmzBg9++yzWrZsmcrKynTbbbdxsS7QUywA15wtW7ZYkqyjR49+7jatra1WU1NT0LK//e1vVnJysvWd73wnsGzp0qWW0+m0WltbP3dfu3btsiRZb7/9drfna25utlwulzV27Fjr//7v/wLLS0pKLElWYWFhYFlubq4lycrPzw/a7/79+y1J1ve///12z+n3+y3LsqyqqiorMjLSeuqpp4LWnzhxwoqKimq3HEB4cAYFQIciIyMVHR0t6e8vi5w/f16tra2aOHGijh07FthuwIABamxsVGlpaVjnee+991RfX6+HH35YsbGxgeXZ2dlKS0vT7t272z1m8eLFQfdfe+01ORwOrVq1qt22DodDkvT666/L7/frvvvu0yeffBK4paSkaMyYMR2+xAUg9AgUAJ9r27ZtGj9+vGJjYzVo0CANGTJEu3fvVkNDQ2Cbhx9+WF/+8pc1a9YsDRs2TN/5zne0d+/ekM/yl7/8RZJ04403tluXlpYWWH9ZVFSUhg0bFrTso48+0tChQ5WUlPS5z/Phhx/KsiyNGTNGQ4YMCbp98MEHgQt4AYQXbzMG0KGXX35Z8+fP15w5c7RixQq5XK7AtR8fffRRYDuXy6Xjx49r37592rNnj/bs2aMtW7bogQce0LZt22ybPyYmRhERXf9/ML/fL4fDoT179igyMrLd+v79+4diPABXQKAA6NCvf/1rXX/99Xr99dcDL39I6vDlkejoaN15552688475ff79fDDD2vz5s368Y9/rBtuuCHo8d01YsQISVJlZaVmzJgRtK6ysjKw/ouMHj1a+/bt0/nz5z/3LMro0aNlWZZGjRqlL3/5y1c9N4Du4SUeAB26fPbAsqzAsiNHjqi8vDxou7/+9a9B9yMiIjR+/HhJUlNTkyQFPgX2at4BM3HiRLlcLhUXFwf2K0l79uzRBx98oOzs7CvuIycnR5ZlafXq1e3WXT7Ou+++W5GRkVq9enXQsV/e5rPHCyA8OIMCXMN++ctfdni9yNKlSzV79my9/vrr+td//VdlZ2frzJkzKi4uVnp6uj799NPAtt/97nd1/vx5zZgxQ8OGDdNf/vIXbdy4URMmTAi8lXjChAmKjIzU2rVr1dDQoJiYGM2YMUMul6vTs1533XVau3atFixYoK9//ev61re+pbq6Oj3//PMaOXKkli9ffsV9TJ8+XfPmzdOGDRv04Ycf6o477pDf79cf/vAHTZ8+XUuWLNHo0aP15JNPqqCgQFVVVZozZ44SEhJ05swZvfHGG3rwwQf17//+752eG0A32fgOIgA2ufw23s+71dTUWH6/3/rpT39qjRgxwoqJibFuueUWq6SkxMrNzbVGjBgR2Nevf/1ra+bMmZbL5bKio6Ot1NRU66GHHrI+/vjjoOd86aWXrOuvv96KjIy84luOv+ht0K+++qp1yy23WDExMVZSUpI1d+5c63/+53+CtsnNzbXi4+M73Hdra6v19NNPW2lpaVZ0dLQ1ZMgQa9asWVZFRUXQdq+99po1depUKz4+3oqPj7fS0tIsj8djVVZWXuFPF0AoOCzrM+cwAQAAbMY1KAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwTq/8oDa/36/a2lolJCSE5CO0AQBA+FmWpYsXL2ro0KFX/F1ZvTJQamtrNXz4cLvHAAAA3VBTU9Put41/Vq8MlISEBEl/P0Cn02nzNAAAoDN8Pp+GDx8e+Dn+RXploFx+WcfpdBIoAAD0Mp25PIOLZAEAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYJwouwcAAPRuI/N3X3GbqjXZPTAJ+hLOoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADDOVQXKmjVr5HA4tGzZssCyS5cuyePxaNCgQerfv79ycnJUV1cX9Ljq6mplZ2erX79+crlcWrFihVpbW69mFAAA0Id0O1COHj2qzZs3a/z48UHLly9frjfffFO7du3SgQMHVFtbq7vvvjuwvq2tTdnZ2WpubtahQ4e0bds2bd26VYWFhd0/CgAA0Kd0K1A+/fRTzZ07Vy+99JIGDhwYWN7Q0KBf/OIXevbZZzVjxgxlZGRoy5YtOnTokA4fPixJ+t3vfqf3339fL7/8siZMmKBZs2bpJz/5iTZt2qTm5ubQHBUAAOjVuhUoHo9H2dnZyszMDFpeUVGhlpaWoOVpaWlKTU1VeXm5JKm8vFzjxo1TcnJyYJusrCz5fD6dOnWqw+dramqSz+cLugEAgL4rqqsP2Llzp44dO6ajR4+2W+f1ehUdHa0BAwYELU9OTpbX6w1s849xcnn95XUdKSoq0urVq7s6KgAA6KW6dAalpqZGS5cu1SuvvKLY2NhwzdROQUGBGhoaAreampoee24AANDzuhQoFRUVqq+v11e+8hVFRUUpKipKBw4c0IYNGxQVFaXk5GQ1NzfrwoULQY+rq6tTSkqKJCklJaXdu3ou37+8zWfFxMTI6XQG3QAAQN/VpUC5/fbbdeLECR0/fjxwmzhxoubOnRv47+uuu05lZWWBx1RWVqq6ulput1uS5Ha7deLECdXX1we2KS0tldPpVHp6eogOCwAA9GZdugYlISFBY8eODVoWHx+vQYMGBZYvXLhQeXl5SkpKktPp1COPPCK3260pU6ZIkmbOnKn09HTNmzdP69atk9fr1cqVK+XxeBQTExOiwwIAAL1Zly+SvZLnnntOERERysnJUVNTk7KysvTCCy8E1kdGRqqkpESLFy+W2+1WfHy8cnNz9cQTT4R6FAAA0Es5LMuy7B6iq3w+nxITE9XQ0MD1KABgs5H5u6+4TdWa7B6YBKbrys9vfhcPAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4UXYPAAAw18j83XaPgGsUZ1AAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGCcLgXKiy++qPHjx8vpdMrpdMrtdmvPnj2B9ZcuXZLH49GgQYPUv39/5eTkqK6uLmgf1dXVys7OVr9+/eRyubRixQq1traG5mgAAECf0KVAGTZsmNasWaOKigq99957mjFjhu666y6dOnVKkrR8+XK9+eab2rVrlw4cOKDa2lrdfffdgce3tbUpOztbzc3NOnTokLZt26atW7eqsLAwtEcFAAB6NYdlWdbV7CApKUlPP/207rnnHg0ZMkQ7duzQPffcI0k6ffq0brrpJpWXl2vKlCnas2ePZs+erdraWiUnJ0uSiouL9dhjj+ncuXOKjo7u1HP6fD4lJiaqoaFBTqfzasYHAHyBkfm7Q7KfqjXZIdkPereu/Pzu9jUobW1t2rlzpxobG+V2u1VRUaGWlhZlZmYGtklLS1NqaqrKy8slSeXl5Ro3blwgTiQpKytLPp8vcBamI01NTfL5fEE3AADQd0V19QEnTpyQ2+3WpUuX1L9/f73xxhtKT0/X8ePHFR0drQEDBgRtn5ycLK/XK0nyer1BcXJ5/eV1n6eoqEirV6/u6qgAAEN05kwMZ1nwj7p8BuXGG2/U8ePHdeTIES1evFi5ubl6//33wzFbQEFBgRoaGgK3mpqasD4fAACwV5fPoERHR+uGG26QJGVkZOjo0aN6/vnn9c1vflPNzc26cOFC0FmUuro6paSkSJJSUlL07rvvBu3v8rt8Lm/TkZiYGMXExHR1VAAA0Etd9eeg+P1+NTU1KSMjQ9ddd53KysoC6yorK1VdXS232y1JcrvdOnHihOrr6wPblJaWyul0Kj09/WpHAQAAfUSXzqAUFBRo1qxZSk1N1cWLF7Vjxw6988472rdvnxITE7Vw4ULl5eUpKSlJTqdTjzzyiNxut6ZMmSJJmjlzptLT0zVv3jytW7dOXq9XK1eulMfj4QwJAAAI6FKg1NfX64EHHtDHH3+sxMREjR8/Xvv27dO//Mu/SJKee+45RUREKCcnR01NTcrKytILL7wQeHxkZKRKSkq0ePFiud1uxcfHKzc3V0888URojwoAAPRqV/05KHbgc1AAoGeE6nNQOoN38fR9PfI5KAAAAOFCoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjBNl9wAAAHuMzN9t9wjA5+IMCgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAME6XAqWoqEi33nqrEhIS5HK5NGfOHFVWVgZtc+nSJXk8Hg0aNEj9+/dXTk6O6urqgraprq5Wdna2+vXrJ5fLpRUrVqi1tfXqjwYAAPQJXQqUAwcOyOPx6PDhwyotLVVLS4tmzpypxsbGwDbLly/Xm2++qV27dunAgQOqra3V3XffHVjf1tam7OxsNTc369ChQ9q2bZu2bt2qwsLC0B0VAADo1RyWZVndffC5c+fkcrl04MAB3XbbbWpoaNCQIUO0Y8cO3XPPPZKk06dP66abblJ5ebmmTJmiPXv2aPbs2aqtrVVycrIkqbi4WI899pjOnTun6OjoKz6vz+dTYmKiGhoa5HQ6uzs+AFzTRubvtnuEIFVrsu0eAWHWlZ/fV3UNSkNDgyQpKSlJklRRUaGWlhZlZmYGtklLS1NqaqrKy8slSeXl5Ro3blwgTiQpKytLPp9Pp06d6vB5mpqa5PP5gm4AAKDv6nag+P1+LVu2TF/72tc0duxYSZLX61V0dLQGDBgQtG1ycrK8Xm9gm3+Mk8vrL6/rSFFRkRITEwO34cOHd3dsAADQC3Q7UDwej06ePKmdO3eGcp4OFRQUqKGhIXCrqakJ+3MCAAD7RHXnQUuWLFFJSYkOHjyoYcOGBZanpKSoublZFy5cCDqLUldXp5SUlMA27777btD+Lr/L5/I2nxUTE6OYmJjujAoAAHqhLp1BsSxLS5Ys0RtvvKH9+/dr1KhRQeszMjJ03XXXqaysLLCssrJS1dXVcrvdkiS3260TJ06ovr4+sE1paamcTqfS09Ov5lgAAEAf0aUzKB6PRzt27NBvf/tbJSQkBK4ZSUxMVFxcnBITE7Vw4ULl5eUpKSlJTqdTjzzyiNxut6ZMmSJJmjlzptLT0zVv3jytW7dOXq9XK1eulMfj4SwJAACQ1MVAefHFFyVJ06ZNC1q+ZcsWzZ8/X5L03HPPKSIiQjk5OWpqalJWVpZeeOGFwLaRkZEqKSnR4sWL5Xa7FR8fr9zcXD3xxBNXdyQAAKDPuKrPQbELn4MCAFePz0FBT+uxz0EBAAAIBwIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMaJsnsAAAAkaWT+7ituU7UmuwcmgQk4gwIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAME6U3QMAAEJvZP5uu0cArgpnUAAAgHEIFAAAYBwCBQAAGIdrUAAAvUZnrq2pWpPdA5Mg3DiDAgAAjEOgAAAA43Q5UA4ePKg777xTQ4cOlcPh0G9+85ug9ZZlqbCwUF/60pcUFxenzMxMffjhh0HbnD9/XnPnzpXT6dSAAQO0cOFCffrpp1d1IAAAoO/ocqA0Njbq5ptv1qZNmzpcv27dOm3YsEHFxcU6cuSI4uPjlZWVpUuXLgW2mTt3rk6dOqXS0lKVlJTo4MGDevDBB7t/FAAAoE/p8kWys2bN0qxZszpcZ1mW1q9fr5UrV+quu+6SJP3qV79ScnKyfvOb3+j+++/XBx98oL179+ro0aOaOHGiJGnjxo36xje+oWeeeUZDhw69isMBAAB9QUivQTlz5oy8Xq8yMzMDyxITEzV58mSVl5dLksrLyzVgwIBAnEhSZmamIiIidOTIkQ7329TUJJ/PF3QDAAB9V0gDxev1SpKSk5ODlicnJwfWeb1euVyuoPVRUVFKSkoKbPNZRUVFSkxMDNyGDx8eyrEBAIBhesW7eAoKCtTQ0BC41dTU2D0SAAAIo5AGSkpKiiSprq4uaHldXV1gXUpKiurr64PWt7a26vz584FtPismJkZOpzPoBgAA+q6QBsqoUaOUkpKisrKywDKfz6cjR47I7XZLktxuty5cuKCKiorANvv375ff79fkyZNDOQ4AAOiluvwunk8//VR//vOfA/fPnDmj48ePKykpSampqVq2bJmefPJJjRkzRqNGjdKPf/xjDR06VHPmzJEk3XTTTbrjjju0aNEiFRcXq6WlRUuWLNH999/PO3gAAICkbgTKe++9p+nTpwfu5+XlSZJyc3O1detW/eAHP1BjY6MefPBBXbhwQVOnTtXevXsVGxsbeMwrr7yiJUuW6Pbbb1dERIRycnK0YcOGEBwOAADoCxyWZVl2D9FVPp9PiYmJamho4HoUAOhAZ36pXl/FLws0V1d+fveKd/EAAIBrC4ECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOFF2DwAA6JqR+bvtHgEIO86gAAAA43AGBQDQp3TmDFPVmuwemARXgzMoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4BAoAADAOgQIAAIxDoAAAAOMQKAAAwDgECgAAMA6BAgAAjEOgAAAA4xAoAADAOAQKAAAwDoECAACMQ6AAAADjECgAAMA4UXYPAABATxuZv/uK21Stye6BSfB5OIMCAACMQ6AAAADjECgAAMA4XIMCAAbpzLURwLWAMygAAMA4BAoAADAOgQIAAIxDoAAAAONwkSwAAB3gw9zsxRkUAABgHAIFAAAYh0ABAADGIVAAAIBxuEgWAHoInxILdB5nUAAAgHEIFAAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHN5mDABAN/H7esKHQAGAEOAzToDQsjVQNm3apKefflper1c333yzNm7cqEmTJtk5EoA+JFT/d0t8AD3PtkB59dVXlZeXp+LiYk2ePFnr169XVlaWKisr5XK57BoLQJgRDbjW8DJQ99gWKM8++6wWLVqkBQsWSJKKi4u1e/du/fKXv1R+fr5dYwEwAPEBwJZAaW5uVkVFhQoKCgLLIiIilJmZqfLy8nbbNzU1qampKXC/oaFBkuTz+cIy39hV+664zcnVWWF5bqArOvNvtTNC9e85VPP0pNTlu+weAejUz7POfn115uvZrp9zl4/TsqwrbmtLoHzyySdqa2tTcnJy0PLk5GSdPn263fZFRUVavXp1u+XDhw8P24xXkrjetqcGQo5/z4C9Qvk1GKp9hfP7wsWLF5WYmPiF2/SKd/EUFBQoLy8vcN/v9+v8+fMaNGiQHA6HjZN1js/n0/Dhw1VTUyOn02n3OD2KY+fYOfZrB8fOsV/p2C3L0sWLFzV06NAr7teWQBk8eLAiIyNVV1cXtLyurk4pKSntto+JiVFMTEzQsgEDBoRzxLBwOp3X3D/cyzh2jv1aw7Fz7Neazh77lc6cXGbLJ8lGR0crIyNDZWVlgWV+v19lZWVyu912jAQAAAxi20s8eXl5ys3N1cSJEzVp0iStX79ejY2NgXf1AACAa5dtgfLNb35T586dU2FhobxeryZMmKC9e/e2u3C2L4iJidGqVavavUx1LeDYOfZrDcfOsV9rwnXsDqsz7/UBAADoQfw2YwAAYBwCBQAAGIdAAQAAxiFQAACAcQgUAABgHAIlzJ566il99atfVb9+/T7302+rq6uVnZ2tfv36yeVyacWKFWptbe3ZQXvAf/3Xf+muu+7S4MGD5XQ6NXXqVL399tt2j9Vjdu/ercmTJysuLk4DBw7UnDlz7B6pRzU1NWnChAlyOBw6fvy43eOEXVVVlRYuXKhRo0YpLi5Oo0eP1qpVq9Tc3Gz3aGGxadMmjRw5UrGxsZo8ebLeffddu0cKu6KiIt16661KSEiQy+XSnDlzVFlZafdYtlizZo0cDoeWLVsWsn0SKGHW3Nyse++9V4sXL+5wfVtbm7Kzs9Xc3KxDhw5p27Zt2rp1qwoLC3t40vCbPXu2WltbtX//flVUVOjmm2/W7Nmz5fV67R4t7F577TXNmzdPCxYs0H/+53/qj3/8o/7t3/7N7rF61A9+8INO/f6NvuL06dPy+/3avHmzTp06peeee07FxcX64Q9/aPdoIffqq68qLy9Pq1at0rFjx3TzzTcrKytL9fX1do8WVgcOHJDH49Hhw4dVWlqqlpYWzZw5U42NjXaP1qOOHj2qzZs3a/z48aHdsYUesWXLFisxMbHd8rfeesuKiIiwvF5vYNmLL75oOZ1Oq6mpqQcnDK9z585ZkqyDBw8Glvl8PkuSVVpaauNk4dfS0mL90z/9k/Xzn//c7lFs89Zbb1lpaWnWqVOnLEnWn/70J7tHssW6deusUaNG2T1GyE2aNMnyeDyB+21tbdbQoUOtoqIiG6fqefX19ZYk68CBA3aP0mMuXrxojRkzxiotLbW+/vWvW0uXLg3ZvjmDYrPy8nKNGzcu6BN0s7Ky5PP5dOrUKRsnC61Bgwbpxhtv1K9+9Ss1NjaqtbVVmzdvlsvlUkZGht3jhdWxY8d09uxZRURE6JZbbtGXvvQlzZo1SydPnrR7tB5RV1enRYsWafv27erXr5/d49iqoaFBSUlJdo8RUs3NzaqoqFBmZmZgWUREhDIzM1VeXm7jZD2voaFBkvrc3/EX8Xg8ys7ODvr7DxUCxWZer7fdx/tfvt+XXvpwOBz6/e9/rz/96U9KSEhQbGysnn32We3du1cDBw60e7yw+u///m9J0uOPP66VK1eqpKREAwcO1LRp03T+/Hmbpwsvy7I0f/58fe9739PEiRPtHsdWf/7zn7Vx40Y99NBDdo8SUp988ona2to6/D7Wl76HXYnf79eyZcv0ta99TWPHjrV7nB6xc+dOHTt2TEVFRWHZP4HSDfn5+XI4HF94O336tN1j9ojO/llYliWPxyOXy6U//OEPevfddzVnzhzdeeed+vjjj+0+jG7p7LH7/X5J0o9+9CPl5OQoIyNDW7ZskcPh0K5du2w+iu7p7LFv3LhRFy9eVEFBgd0jh0x3vv7Pnj2rO+64Q/fee68WLVpk0+QIJ4/Ho5MnT2rnzp12j9IjampqtHTpUr3yyiuKjY0Ny3PY9ssCe7NHH31U8+fP/8Jtrr/++k7tKyUlpd3V7nV1dYF1puvsn8X+/ftVUlKiv/3tb3I6nZKkF154QaWlpdq2bZvy8/N7YNrQ6uyxXw6w9PT0wPKYmBhdf/31qq6uDueIYdOVv/fy8vJ2v0Rs4sSJmjt3rrZt2xbGKcOjq1//tbW1mj59ur761a/qZz/7WZin63mDBw9WZGRk4PvWZXV1db3ie1goLFmyRCUlJTp48KCGDRtm9zg9oqKiQvX19frKV74SWNbW1qaDBw/qP/7jP9TU1KTIyMireg4CpRuGDBmiIUOGhGRfbrdbTz31lOrr6+VyuSRJpaWlcjqdQT/QTNXZP4v//d//lfT316b/UUREROAMQ2/T2WPPyMhQTEyMKisrNXXqVElSS0uLqqqqNGLEiHCPGRadPfYNGzboySefDNyvra1VVlaWXn31VU2ePDmcI4ZNV77+z549q+nTpwfOmn32339fEB0drYyMDJWVlQXeOu/3+1VWVqYlS5bYO1yYWZalRx55RG+88YbeeecdjRo1yu6Resztt9+uEydOBC1bsGCB0tLS9Nhjj111nEgESthVV1fr/Pnzqq6uVltbW+DzH2644Qb1799fM2fOVHp6uubNm6d169bJ6/Vq5cqV8ng8ferXdrvdbg0cOFC5ubkqLCxUXFycXnrpJZ05c0bZ2dl2jxdWTqdT3/ve97Rq1SoNHz5cI0aM0NNPPy1Juvfee22eLrxSU1OD7vfv31+SNHr06D7/f5pnz57VtGnTNGLECD3zzDM6d+5cYF1fO7OQl5en3NxcTZw4UZMmTdL69evV2NioBQsW2D1aWHk8Hu3YsUO//e1vlZCQELjmJjExUXFxcTZPF14JCQntrrWJj4/XoEGDQncNTsjeD4QO5ebmWpLa3d5+++3ANlVVVdasWbOsuLg4a/Dgwdajjz5qtbS02Dd0mBw9etSaOXOmlZSUZCUkJFhTpkyx3nrrLbvH6hHNzc3Wo48+arlcLishIcHKzMy0Tp48afdYPe7MmTPXzNuMt2zZ0uHXfl/9trtx40YrNTXVio6OtiZNmmQdPnzY7pHC7vP+frds2WL3aLYI9duMHZZlWaFJHQAAgNDoey+IAgCAXo9AAQAAxiFQAACAcQgUAABgHAIFAAAYh0ABAADGIVAAAIBxCBQAAGAcAgUAABiHQAEAAMYhUAAAgHH+H+Y/bXR71PodAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import numpy as np\n", + "\n", + "data = [np.mean(force) for force in forces]\n", + "hist, bins, ax = plt.hist(data, bins=50)\n", + "plt.title(\"Mean\")\n", + "plt.show()\n", + "\n", + "data = [force.min() for force in forces]\n", + "hist, bins, ax = plt.hist(data, bins=50)\n", + "plt.title(\"Min\")\n", + "plt.show()\n", + "\n", + "data = [force.max() for force in forces]\n", + "hist, bins, ax = plt.hist(data, bins=50)\n", + "plt.title(\"Max\")\n", + "plt.show()\n", + "\n", + "data = [force[0][0] for force in forces]\n", + "hist, bins, ax = plt.hist(data, bins=50)\n", + "plt.title(\"First force\")\n", + "plt.show()\n", + "\n", + "data = [force[-1][-1] for force in forces]\n", + "hist, bins, ax = plt.hist(data, bins=50)\n", + "plt.title(\"Last force\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Visualise forces\n", + "\n", + "query = None\n", + "data = abcd.property('forces', query=query)\n", + "hist, bins, ax = plt.hist(data, bins=50)\n", + "plt.show()\n", + "\n", + "data = abcd.count_property(\"forces\", query=query)\n", + "hist, bins, ax = plt.hist(data, bins=50)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "366\n" + ] + } + ], + "source": [ + "count = 0\n", + "for force in forces:\n", + " if np.sum(np.logical_and((force < 3), force > -3)) == force.size:\n", + " count +=1\n", + "print(count)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "366\n" + ] + } + ], + "source": [ + "# Check all forces for each structure are between -3 and 3\n", + "\n", + "script = \"\"\"\n", + "int count = 0;\n", + "for (int i=0; i 3 || doc.forces[i] < -3) {\n", + " count += 1; \n", + " }\n", + "}\n", + "return count == 0;\n", + "\"\"\"\n", + "\n", + "query = {\n", + " \"bool\": {\n", + " \"filter\": {\n", + " \"script\": {\n", + " \"script\": script\n", + " }\n", + " }\n", + " },\n", + "}\n", + "\n", + "\n", + "print(abcd.count(query=query, timeout=90))" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1321\n" + ] + } + ], + "source": [ + "# Check forces no greater than 3\n", + "\n", + "count = 0\n", + "for force in forces:\n", + " if force.max() > 3:\n", + " count +=1\n", + "print(count)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1321\n" + ] + } + ], + "source": [ + "# Check forces no greater than 3\n", + "\n", + "script = \"\"\"\n", + "double max = doc.forces[0];\n", + "for (int i=1; i 3.0;\n", + "\"\"\"\n", + "\n", + "\n", + "query = {\n", + " \"bool\": {\n", + " \"filter\": {\n", + " \"script\": {\n", + " \"script\": script\n", + " }\n", + " }\n", + " },\n", + "}\n", + "\n", + "\n", + "print(abcd.count(query=query, timeout=60))" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1010\n" + ] + } + ], + "source": [ + "# Check average force less than 0\n", + "\n", + "count = 0\n", + "for force in forces:\n", + " if np.mean(force) < 0.0:\n", + " count +=1\n", + "print(count)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1005\n" + ] + } + ], + "source": [ + "# Check average force less than 0\n", + "\n", + "script = \"\"\"\n", + "double force = 0;\n", + "for (int i=0; i" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "(-30849.977784286697, -30845.753419331242)" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = abcd.property('energy', query)\n", + "hist, bins, ax = plt.hist(data, bins=50)\n", + "plt.show()\n", + "min(data), max(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "968" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = {\n", + " \"bool\": {\n", + " \"must\": [\n", + " {\n", + " \"range\" : {\n", + " \"energy\" : {\n", + " \"gte\" : -30850,\n", + " \"lte\" : -30848,\n", + " }\n", + " }\n", + " },\n", + " {\n", + " \"match\": {\n", + " \"n_atoms\": 114\n", + " }\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "abcd.count(query)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'count': 968, 'category': 'info', 'dtype': 'scalar(float)'}" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "abcd.count_properties(query)['energy']" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Fetching the data" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "968" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query = {\n", + " \"bool\": {\n", + " \"must\": [\n", + " {\n", + " \"range\" : {\n", + " \"energy\" : {\n", + " \"gte\" : -30850,\n", + " \"lte\" : -30848,\n", + " }\n", + " }\n", + " },\n", + " {\n", + " \"match\": {\n", + " \"n_atoms\": 114\n", + " }\n", + " }\n", + " ]\n", + " }\n", + "}\n", + "abcd.count(query)" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "968" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "traj = list(abcd.get_atoms(query=query))\n", + "len(traj)" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Atoms(symbols='C48H28O32Zr6', pbc=True, cell=[[14.759483662029265, 0.0, 0.0], [7.380258413807584, 12.781786651387147, 0.0], [7.380243655055182, 4.260782501715179, 12.050631347394049]], forces=...)" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "traj[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "242" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = list(abcd.get_items(query=query))\n", + "len(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'_id': '2UDM2okBtksDlC5r8IGq',\n", + " 'n_atoms': 114,\n", + " 'cell': [[14.759483662029265, 0.0, 0.0],\n", + " [7.380258413807584, 12.781786651387147, 0.0],\n", + " [7.380243655055182, 4.260782501715179, 12.050631347394049]],\n", + " 'pbc': [True, True, True],\n", + " 'formula': 'C48H28O32Zr6',\n", + " 'numbers': [1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 1,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 6,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 8,\n", + " 40,\n", + " 40,\n", + " 40,\n", + " 40,\n", + " 40,\n", + " 40],\n", + " 'positions': [[17.166810638040264, 11.566799628342661, 2.3959431306453296],\n", + " [10.391931260040497, 9.232075241735581, 8.799170748954813],\n", + " [15.152442318761134, 3.2144705981189303, 0.6236271192356346],\n", + " [15.428455018627362, 13.198368239182761, 6.757442369774353],\n", + " [20.968952462595865, 8.354501228588285, 5.937790321351722],\n", + " [12.821718368988067, 11.860905590260213, 0.764468940894911],\n", + " [20.164574198879585, 13.449131931085539, 8.500504258460039],\n", + " [5.203325638335655, 4.037525599970674, 3.6535544413570706],\n", + " [6.476452578322519, 9.882112891744764, 0.7336632917566172],\n", + " [14.332783438660714, 4.5739237510789845, 5.763830060388294],\n", + " [12.20845295758527, 7.975607890442319, 2.7181563401019804],\n", + " [26.126453831046035, 15.25865575215541, 8.681035572143871],\n", + " [7.431639790543854, 7.68880010777489, 3.739705967641281],\n", + " [21.52510600020679, 15.432405681052952, 8.675468268048236],\n", + " [11.49107468172553, 9.60164215963523, 0.7009214784567679],\n", + " [18.70674083756121, 4.607625571215378, 5.677858158016438],\n", + " [17.34676875755316, 10.130528920703508, 4.483049170020872],\n", + " [2.9330861621787743, 3.3990818416373494, 0.720770788622487],\n", + " [22.67189915206641, 9.23882668038352, 8.661796350384211],\n", + " [15.54501705742674, 14.931708899088871, 4.905010140501105],\n", + " [20.180891240581246, 11.991517760259551, 0.36399634878062614],\n", + " [13.537900990107627, 8.71222318139275, 4.81955270950513],\n", + " [13.02082403030889, 3.7798366294145125, 3.8744821907763676],\n", + " [12.738608267554484, 13.15125952920471, 8.605595280531846],\n", + " [9.30512423974256, 4.003262597986021, 2.08391144947309],\n", + " [13.66172535110934, 6.786427797477926, 9.451058899918706],\n", + " [19.297496722626608, 6.66303528741421, 9.65403361924748],\n", + " [16.364750768476505, 11.479160632545504, 9.561987519221761],\n", + " [16.965060879285595, 12.039276627942046, 3.4048076978088133],\n", + " [9.872075532499599, 8.868306222192839, 9.697764141289875],\n", + " [23.093789800187132, 7.8655671087878325, 11.819108411864843],\n", + " [15.963054660441902, 12.957563889928995, 5.796919712452191],\n", + " [20.789096532494103, 7.4060484208004835, 5.418655862348822],\n", + " [20.271838371834924, 15.445983071791856, 11.893743962525676],\n", + " [20.235785459686173, 13.946169611391733, 9.533785527794883],\n", + " [5.468638782470736, 4.963675408702813, 4.207234520017469],\n", + " [13.30165782031905, 14.29182491089219, 11.848895929341783],\n", + " [13.860085269446175, 5.316906102226919, 5.113333629612867],\n", + " [12.690982408563865, 7.210540402496312, 3.452020777408105],\n", + " [26.695527891724396, 14.913992347710204, 9.505586002333807],\n", + " [6.71805210100351, 7.061748291218562, 4.294338782243351],\n", + " [20.991757417986378, 15.139143269943215, 9.631524849633491],\n", + " [19.51051107137078, 14.241804768445284, 11.863723702327984],\n", + " [19.467115888926717, 5.338714780974876, 5.342773829476735],\n", + " [17.04442989892783, 11.179573262577135, 4.5377135674632525],\n", + " [9.75214492091448, 7.909665520118783, 11.856922309351365],\n", + " [23.22825354947115, 8.805217731024735, 9.58442235219589],\n", + " [16.00517946832365, 13.88422938185283, 4.718010810191537],\n", + " [27.480789782170447, 15.480087960917762, 11.617172959535212],\n", + " [13.389347998136012, 7.596486021826197, 4.594226402069221],\n", + " [13.123855701056296, 4.872440299211145, 4.040302647886903],\n", + " [12.64753966896389, 13.717491998222464, 9.492966517597923],\n", + " [20.97530038245366, 8.335251317448973, 10.600423519640026],\n", + " [16.502636637988203, 10.85079294806207, 6.900496977690235],\n", + " [9.019607067373974, 1.3080109208445687, 2.1730394493971033],\n", + " [12.07299373071628, 8.404520741314311, 10.81235772773731],\n", + " [18.747600350981866, 12.258841111031488, 10.56065092033455],\n", + " [7.015172452108002, 5.4262894076561405, 2.347590240376888],\n", + " [18.78251160034123, 7.0857013283180255, 6.946566060828641],\n", + " [14.224469007695566, 4.466485050187827, 10.705760127563483],\n", + " [11.603451262170989, 5.349451807744306, 2.0915175410667244],\n", + " [18.543838401175798, 4.375079148751448, 10.585231910862703],\n", + " [14.160667831243819, 12.098710870007285, 10.795787760496086],\n", + " [14.45018544044642, 7.011509947840652, 6.832073401261807],\n", + " [16.477007379156557, 13.350335720646678, 3.479800617715256],\n", + " [10.582432902092235, 8.377782469927169, 10.803729958995012],\n", + " [22.44315777833504, 8.370354822180708, 10.648120637633818],\n", + " [16.567659589367246, 11.689973911016514, 5.702162918040675],\n", + " [19.752709342190204, 6.556676208576273, 5.912505297587319],\n", + " [20.937902942140084, 15.937436540790538, 10.817725992408736],\n", + " [19.488886710337034, 13.540165709815675, 10.668618978356523],\n", + " [6.371529009087222, 5.8630806488390075, 3.652784530849877],\n", + " [13.287113670442396, 13.309066380643525, 10.810269250499239],\n", + " [13.887372022428522, 6.66488149144997, 5.501186858702026],\n", + " [12.488596287754628, 5.853559681882998, 3.200679859004045],\n", + " [26.74856971012843, 15.90075025114669, 10.529236108850316],\n", + " [10.08315746894864, 1.501050200458931, 1.573941101482483],\n", + " [12.62721678223712, 8.955703669760464, 9.832552292079287],\n", + " [20.39919235012287, 7.4641321602901884, 11.35141521129136],\n", + " [15.34144808796582, 10.482235049392207, 7.284668181164747],\n", + " [19.016384691239136, 8.259448068479994, 7.407999661220768],\n", + " [13.965549620748263, 5.310837048175544, 11.629395661906845],\n", + " [18.964757008970206, 11.57454425853002, 9.498296919639932],\n", + " [6.724169659438005, 4.266531959190234, 1.8722045706136907],\n", + " [14.989309002358754, 11.848517668179902, 11.724244051601735],\n", + " [15.200359309885917, 6.086405481062033, 7.400205812261053],\n", + " [10.683892382055284, 6.184519761996729, 1.7395703404079805],\n", + " [17.662435367568307, 4.527759521878119, 9.64210249881064],\n", + " [7.857986041984485, 6.202923889200398, 1.7950802519056281],\n", + " [14.981497208999828, 4.615121609824532, 9.745747168438765],\n", + " [17.887354642477394, 12.0129465321045, 11.494403179921116],\n", + " [17.806453279094754, 6.309886859779258, 7.194991281595532],\n", + " [17.65884499015476, 10.488278242553303, 7.416379002429795],\n", + " [12.548166448986402, 7.693791677648976, 11.776895469518676],\n", + " [20.34393939120336, 9.206817729894354, 9.922408032378584],\n", + " [7.857463468905509, 1.74909233925955, 1.8274196418121598],\n", + " [18.857763430781485, 5.182306608404846, 11.489619301518633],\n", + " [14.163844810128705, 8.142657329873755, 7.360230728587728],\n", + " [11.792894920225645, 4.155462409412462, 1.7001921686281716],\n", + " [13.835964747759567, 11.264146337910587, 9.884038668157677],\n", + " [18.442882359243793, 7.121400257955315, 9.800715117804442],\n", + " [16.392129968717537, 10.582139510371382, 9.857218178507331],\n", + " [9.302433061412287, 3.9652007015997683, 1.0673996577713942],\n", + " [14.478398449342821, 7.162072897735833, 9.816748197723141],\n", + " [15.11113884153613, 9.160319431120458, 11.201390619831347],\n", + " [16.44457564782678, 6.551160139418165, 11.181147839950713],\n", + " [16.50639593935091, 8.350138210113135, 8.738223430491407],\n", + " [18.033642210935266, 9.122422759734661, 11.23846663261546],\n", + " [10.893906920444666, 2.90658841770915, -0.019051977695474183],\n", + " [14.757549238860731, 9.308555112561303, 9.214183379238824],\n", + " [18.225880348003468, 9.344784918068, 9.180851999025142],\n", + " [7.388359137923477, 3.030521667505481, 0.14692076074094484],\n", + " [9.220983449272417, 6.075785767588413, 0.00657883162539968],\n", + " [16.440728831136994, 6.2934328881825925, 9.187973671216682]],\n", + " 'forces': [[0.03505596579759337, 0.7596797943958487, 0.9211044616269563],\n", + " [0.16925367694563342, -0.01943702713953078, 0.38893903196958485],\n", + " [0.01574595116377608, 0.17132290092535438, -0.1999552221020049],\n", + " [0.6313750521363777, 0.05251478601615336, -0.8064430222079316],\n", + " [-0.09833287623511343, -0.138000887230052, -0.08874934559146055],\n", + " [0.19781246456634455, -0.9287673780647797, -1.0439826331463689],\n", + " [0.07987955323902354, 0.3227860853196942, 1.7840037712935266],\n", + " [0.3716884711227413, 0.4696845328184121, 0.370453313071228],\n", + " [-0.14715298673081575, -0.28619517081945, -0.2515490388965677],\n", + " [-0.019879256916508915, 0.1586797572898179, 0.03203954734206577],\n", + " [0.9120773177492224, -1.200046035662623, 1.3240873743396222],\n", + " [-0.5694124897336902, -0.12506360937075797, -1.3829512429794373],\n", + " [-0.0348132536409263, 0.07054439117941769, 0.5526864339711696],\n", + " [-0.41634750794948194, 0.12174380071939654, 1.3701661744387312],\n", + " [1.1617998448227365, 0.7015918847484289, -1.3710321220485349],\n", + " [0.19070902021846559, 0.8070636865577138, 0.18004202662121627],\n", + " [0.22735084256512936, 0.16909786808234928, -0.1733360748519467],\n", + " [-0.4446707825029187, -0.3087082660123333, -0.10186968600959667],\n", + " [1.0559542045857038, -0.7522446777152361, 1.3649098107407422],\n", + " [0.7762546692811232, -1.6189836731314526, -0.7746117342375823],\n", + " [0.028996903632322036, -0.4549212573567351, -0.15349589870642655],\n", + " [-0.30669869163043734, -1.474741689618629, -0.22373535702799768],\n", + " [-0.0288004713360364, 0.7124475973319003, -0.205686211479239],\n", + " [-0.009475030082091964, -0.9498180296696098, -0.7324728929189461],\n", + " [0.024624999488289372, -0.06313755663570486, -1.801891965772292],\n", + " [-0.06351139506345264, -0.159376012078223, -0.050473844173332186],\n", + " [-0.27379062557521333, 0.16125086064434194, -0.24263708044911655],\n", + " [0.16200265126520474, 1.5060489867253262, -0.5510645819751359],\n", + " [0.11499568285511277, -1.536591123473717, -0.46194088373525655],\n", + " [-0.3656006125998993, -0.17292778363924827, -1.3654302020596978],\n", + " [1.3289642431824835, 0.5107142001499065, -0.8944297785444403],\n", + " [-0.025697149587125892, 1.0899791579381328, 0.0022805686754628165],\n", + " [-0.7954947499036974, 0.11066234526139797, 1.0548332035231311],\n", + " [-1.8548078439117515, 0.38714131650410166, 2.989239836187592],\n", + " [0.4506861359111631, 1.1364487657471294, -0.6025277867192889],\n", + " [-1.2287668027946692, -0.09796263735206197, 0.2020090194615987],\n", + " [1.0444392611021323, -2.684139342402327, -1.0437898003947796],\n", + " [-0.07301933526848252, -0.6708481734970433, 0.9851917838418224],\n", + " [-0.8675355232354481, 0.9083975546282277, -1.1112478391074265],\n", + " [0.1290364782741685, 1.0164497157641128, 0.008965437397225301],\n", + " [0.2684093062541945, -0.321198686108611, -1.4872871313266933],\n", + " [-0.643392389215423, -0.898736890883939, -1.098486424717582],\n", + " [-0.03591728542135892, 0.6514399427150831, 0.9283400606872579],\n", + " [0.8447910287406576, -1.5317872739661245, -1.3569157361908561],\n", + " [0.17489879147082638, 0.8299521628403577, -1.920810638125738],\n", + " [0.3438521635446514, 0.034021868028403804, 0.7196574853586566],\n", + " [-0.49089716393457716, 1.32019318119886, -0.94368029174169],\n", + " [-0.8017672136473943, -0.4383576953262203, 0.7052387377464858],\n", + " [-0.5808395014825368, 3.6184937280564875, 2.299079591174048],\n", + " [-0.046264433761305436, 2.3374147421900013, 0.42705255387637825],\n", + " [-0.40195909935689456, 0.5967109224900137, -0.9082941962733758],\n", + " [0.545840099958743, 0.6951430433146117, 4.287385291900738],\n", + " [-0.6422225371891145, 0.3670378593750782, 0.6300133957998216],\n", + " [1.0342078124131395, -1.450620626387826, 0.9064692271123347],\n", + " [-1.6097744951362183, 0.26370058757071924, 1.6795418988818989],\n", + " [-1.71670514098886, -1.4817345765222634, 0.997617412133566],\n", + " [-0.36574665127043626, 0.3369549216858052, -0.5080099136416182],\n", + " [0.25302742332541905, -0.05521598720041631, 0.07509935788229247],\n", + " [0.4694140527455162, -0.5513201496485756, -0.6341384740218201],\n", + " [-0.9458055657745397, -0.43950337898099623, 2.0890692980731704],\n", + " [-0.10420990428288447, 1.1152443621616988, 0.8470916520222858],\n", + " [0.303680730512897, -1.6201972339147879, -2.1682659662612322],\n", + " [-1.573935371256837, -0.7716241121396255, 0.4766830761494265],\n", + " [0.8572325978731995, -0.38723439044553537, 0.7948401469896356],\n", + " [-0.2134458014031925, 0.03674209537749023, 0.28632115488382165],\n", + " [-2.3274044083894974, 0.6730330971277177, -0.3481623612081755],\n", + " [0.898834592811411, -1.0161931196493315, 0.970241846276607],\n", + " [-1.0154469854558483, 0.8937227251219462, 1.1335331345430992],\n", + " [-0.24175827732254024, 1.3876321937466838, 0.534561183763117],\n", + " [0.6898959355886991, -1.1971756558776325, -2.6977317373963534],\n", + " [0.3469128049778768, -0.8706239325849023, -0.7535286867304852],\n", + " [0.5164133078455445, -1.1737287359463862, -0.8774512404325054],\n", + " [0.07070174270471455, 2.0437350895055317, -2.2527981881306025],\n", + " [0.3288256070994712, -0.3856912142121501, -0.4543299035851946],\n", + " [0.46220673582211436, -1.5513682828935087, 0.2903063650833339],\n", + " [-0.38469619721394926, -2.2885375531998284, -0.3758814264732991],\n", + " [1.9752635218505312, 0.3340079230208498, -1.0452476159967947],\n", + " [0.8562020996487061, 0.4789574741768384, -0.4695076409076208],\n", + " [-0.07918329845061843, -0.20133898992740973, 0.4222739011816598],\n", + " [0.4557039012178522, 0.3706605440016026, 0.8159874720805957],\n", + " [-0.05972518826358055, -0.10986736010417913, 0.6435188875004655],\n", + " [0.0024615743516213087, 0.5677505285253259, 0.5866964749241426],\n", + " [-0.5868764521589592, -0.8929930459899325, 1.0299773189536057],\n", + " [-0.19385399384171936, 0.13704546522351088, 0.0855992297614977],\n", + " [0.5114222820137425, 0.9816344052405028, 0.021354041800663897],\n", + " [-1.066992979728017, 1.3317744591489784, -0.48617301863097473],\n", + " [0.22686850357582064, -0.6558709822362698, -0.6488395287823175],\n", + " [0.7797446849745541, 0.08798264257114148, 0.9232755012995165],\n", + " [0.23828317402856555, 0.1996127511351823, 0.16316684686413324],\n", + " [1.8341906803324939, 0.7465810512458905, -1.8839816394548647],\n", + " [0.25763432631605526, -0.3878936213456354, -0.15371958469827013],\n", + " [-0.09513905164812367, 0.4332483987401102, 1.2322542473847458],\n", + " [-1.7633197590270517, 0.7325978485419761, -0.6597173528546378],\n", + " [1.5710907025053915, 0.6496761658138797, -0.2693559865093303],\n", + " [0.13329885341529846, -0.16092278785630468, -0.8911017423623445],\n", + " [0.4560890525003599, -0.12238914766138208, 0.02436686071149516],\n", + " [0.04387896406897804, 0.4783136698968656, 0.7284398601970398],\n", + " [-0.19204033753543812, -0.5498325092476479, -0.4268478940493581],\n", + " [-0.04005727604281352, -0.4718303756780979, -0.6626314413966554],\n", + " [0.5963838781433183, -0.31271095971465634, -1.0048920919648563],\n", + " [0.2392545368759047, -0.07763909377589129, -0.3403374652590171],\n", + " [1.0458302280169298, -0.7758582051438556, -0.16412124042933254],\n", + " [-1.0646861857983383, 0.17288613176490497, 1.4279167553260372],\n", + " [0.310471014472195, 0.4872791072940909, 0.18266249516014715],\n", + " [-7.301933526848252e-05, -1.2856617204855898, 1.537077576228393],\n", + " [0.23109436904931635, 0.627365159344662, 1.1682219467816664],\n", + " [0.06169413921247506, -0.7133752514222126, 0.5373045510423942],\n", + " [-0.3402017110018982, -0.024654310066530946, -0.18435993759480393],\n", + " [-0.14411137146241382, 1.2003674235819386, 0.056440860838511554],\n", + " [-1.1119034704628301, 0.02676570014126608, -0.4762907257775261],\n", + " [0.753222725431297, 0.3270011521591009, -0.33560457820400924],\n", + " [-0.7786571082555904, 0.413619053069661, -1.222248027349609],\n", + " [-0.5719985054876705, -0.1103018765710937, 0.8759049788750947],\n", + " [-0.24311736255574165, -0.40464795924505575, -1.3254814265784451]],\n", + " 'energy': -30848.841105643754,\n", + " 'volume': 2273.382588904185,\n", + " 'elements': {'1': 28, '6': 48, '8': 32, '40': 6},\n", + " 'username': 'ubuntu',\n", + " 'uploaded': '2023-08-09T14:56:51.365526',\n", + " 'modified': '2023-08-09T14:56:51.365533',\n", + " 'hash_structure': '913be2ca3a0e3c584cc728f4c359c850',\n", + " 'hash': '96290c6b21554ece4011f07e63de82d3',\n", + " 'derived': {'arrays_keys': ['forces', 'positions', 'numbers'],\n", + " 'info_keys': ['pbc', 'n_atoms', 'cell', 'formula', 'energy', 'volume'],\n", + " 'results_keys': [],\n", + " 'derived_keys': ['elements',\n", + " 'username',\n", + " 'uploaded',\n", + " 'modified',\n", + " 'volume',\n", + " 'hash_structure',\n", + " 'hash']}}" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[0]" + ] + } + ], + "metadata": { + "@webio": { + "lastCommId": null, + "lastKernelId": null + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tutorials/abcd_uploading.ipynb b/tutorials/abcd_uploading.ipynb index 80a774c8..c991db7f 100644 --- a/tutorials/abcd_uploading.ipynb +++ b/tutorials/abcd_uploading.ipynb @@ -203,7 +203,7 @@ "pycharm": {} }, "source": [ - "Uploading configurations on-by-one directly from an ase atoms object:" + "Uploading configurations one-by-one directly from an ase atoms object:" ] }, {