-
Notifications
You must be signed in to change notification settings - Fork 35
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
302 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,197 @@ | ||
# mBuild paper | ||
@article{klein2016hierarchical, | ||
title={A hierarchical, component based approach to screening properties of soft matter}, | ||
author={Klein, Christoph and Sallai, J{\'a}nos and Jones, Trevor J and Iacovella, Christopher R and McCabe, Clare and Cummings, Peter T}, | ||
journal={Foundations of molecular modeling and simulation: Select papers from FOMMS 2015}, | ||
pages={79--92}, | ||
year={2016}, | ||
publisher={Springer} | ||
} | ||
|
||
# Foyer paper | ||
@article{klein2019formalizing, | ||
title={Formalizing atom-typing and the dissemination of force fields with foyer}, | ||
author={Klein, Christoph and Summers, Andrew Z and Thompson, Matthew W and Gilmer, Justin B and McCabe, Clare and Cummings, Peter T and Sallai, Janos and Iacovella, Christopher R}, | ||
journal={Comput. Mater. Sci.}, | ||
volume={167}, | ||
pages={215--227}, | ||
year={2019}, | ||
publisher={Elsevier} | ||
} | ||
|
||
# TRUE | ||
@article{thompson2020towards, | ||
title = {Towards molecular simulations that are transparent, reproducible, usable by others, and extensible (TRUE)}, | ||
volume = {118}, | ||
ISSN = {1362-3028}, | ||
url = {http://dx.doi.org/10.1080/00268976.2020.1742938}, | ||
DOI = {10.1080/00268976.2020.1742938}, | ||
number = {9–10}, | ||
journal = {Molecular Physics}, | ||
publisher = {Informa UK Limited}, | ||
author = {Thompson, Matthew W. and Gilmer, Justin B. and Matsumoto, Ray A. and Quach, Co D. and Shamaprasad, Parashara and Yang, Alexander H. and Iacovella, Christopher R. and McCabe, Clare and Cummings, Peter T.}, | ||
year = {2020}, | ||
month = apr, | ||
pages = {e1742938} | ||
} | ||
|
||
# MoSDeF | ||
@article{cummings2021opena, | ||
title={Open{\^a}€ source molecular modeling software in chemical engineering focusing on the Molecular Simulation Design Framework}, | ||
author={Cummings, Peter T and MCabe, Clare and Iacovella, Christopher R and Ledeczi, Akos and Jankowski, Eric and Jayaraman, Arthi and Palmer, Jeremy C and Maginn, Edward J and Glotzer, Sharon C and Anderson, Joshua A and others}, | ||
journal={AIChE J.}, | ||
volume={67}, | ||
number={3}, | ||
year={2021} | ||
} | ||
|
||
# Reproducibility issue | ||
@article{baker2016reproducibility, | ||
title={Reproducibility crisis}, | ||
author={Baker, Monya}, | ||
journal={Nature}, | ||
volume={533}, | ||
number={26}, | ||
pages={353--66}, | ||
year={2016} | ||
} | ||
|
||
# GROMACS | ||
@article{abraham2015gromacs, | ||
title={GROMACS: High performance molecular simulations through multi-level parallelism from laptops to supercomputers}, | ||
author={Abraham, Mark James and Murtola, Teemu and Schulz, Roland and P{\'a}ll, Szil{\'a}rd and Smith, Jeremy C and Hess, Berk and Lindahl, Erik}, | ||
journal={SoftwareX}, | ||
volume={1}, | ||
pages={19--25}, | ||
year={2015}, | ||
publisher={Elsevier} | ||
} | ||
|
||
# LAMMPS | ||
@article{thompson2022lammps, | ||
author = {Thompson, Aidan P and Aktulga, H Metin and Berger, Richard and Bolintineanu, Dan S and Brown, W Michael and Crozier, Paul S and {in 't Veld}, Pieter J and Kohlmeyer, Axel and Moore, Stan G and Nguyen, Trung Dac and Shan, Ray and Stevens, Mark J and Tranchida, Julien and Trott, Christian and Plimpton, Steven J}, | ||
doi = {https://doi.org/10.1016/j.cpc.2021.108171}, | ||
issn = {0010-4655}, | ||
journal = {Comput. Phys. Commun.}, | ||
keywords = {LAMMPS,Materials modeling,Molecular dynamics,Parallel algorithms}, | ||
mendeley-groups = {QE-Paper}, | ||
pages = {108171}, | ||
title = {{LAMMPS - a flexible simulation tool for particle-based materials modeling at the atomic, meso, and continuum scales}}, | ||
url = {https://www.sciencedirect.com/science/article/pii/S0010465521002836}, | ||
volume = {271}, | ||
year = {2022} | ||
} | ||
|
||
# GOMC | ||
@article{nejahi2021update, | ||
title={Update 2.70 to “GOMC: GPU Optimized Monte Carlo for the Simulation of Phase Equilibria and Physical Properties of Complex Fluids”}, | ||
author={Nejahi, Younes and Barhaghi, Mohammad Soroush and Schwing, Gregory and Schwiebert, Loren and Potoff, Jeffrey}, | ||
journal={SoftwareX}, | ||
volume={13}, | ||
pages={100627}, | ||
year={2021}, | ||
publisher={Elsevier} | ||
} | ||
|
||
# Studies using mbuild | ||
@article{albooyeh2023flowermd, | ||
doi = {10.21105/joss.05989}, | ||
url = {https://doi.org/10.21105/joss.05989}, | ||
year = {2023}, | ||
publisher = {The Open Journal}, | ||
volume = {8}, | ||
number = {92}, | ||
pages = {5989}, | ||
author = {Marjan Albooyeh and Chris Jones and Rainier Barrett and Eric Jankowski}, | ||
title = {FlowerMD: Flexible Library of Organic Workflows and Extensible Recipes for Molecular Dynamics}, | ||
journal = {Journal of Open Source Software} | ||
} | ||
|
||
@article{quach2022high, | ||
doi = {10.1063/5.0080838}, | ||
url = {https://doi.org/10.1063/5.0080838}, | ||
year = {2022}, | ||
month = apr, | ||
publisher = {{AIP} Publishing}, | ||
volume = {156}, | ||
number = {15}, | ||
pages = {154902}, | ||
author = {Co D. Quach and Justin B. Gilmer and Daniel Pert and Akanke Mason-Hogans and Christopher R. Iacovella and Peter T. Cummings and Clare McCabe}, | ||
title = {High-throughput screening of tribological properties of monolayer films using molecular dynamics and machine learning}, | ||
journal = {J. Chem. Phys.} | ||
} | ||
|
||
@article{ma2022dynamics, | ||
title = {Molecular dynamics simulation of the competitive adsorption behavior of effluent organic matters by heated aluminum oxide particles (HAOPs)}, | ||
journal = {Separation and Purification Technology}, | ||
volume = {292}, | ||
pages = {120961}, | ||
year = {2022}, | ||
issn = {1383-5866}, | ||
doi = {https://doi.org/10.1016/j.seppur.2022.120961}, | ||
url = {https://www.sciencedirect.com/science/article/pii/S1383586622005184}, | ||
author = {Yunqiao Ma and Tao Hua and Thien An Trinh and Rong Wang and Jia Wei Chew}, | ||
keywords = {Dynamic membrane, Heated aluminum oxide particles (HAOPs), Molecular dynamics simulation, Effluent organic matter (EfOM), Membrane fouling}, | ||
} | ||
|
||
# ParmEd | ||
@article{shirts2016lessons, | ||
title = {Lessons learned from comparing molecular dynamics engines on the SAMPL5 dataset}, | ||
volume = {31}, | ||
ISSN = {1573-4951}, | ||
url = {http://dx.doi.org/10.1007/s10822-016-9977-1}, | ||
DOI = {10.1007/s10822-016-9977-1}, | ||
number = {1}, | ||
journal = {Journal of Computer-Aided Molecular Design}, | ||
publisher = {Springer Science and Business Media LLC}, | ||
author = {Shirts, Michael R. and Klein, Christoph and Swails, Jason M. and Yin, Jian and Gilson, Michael K. and Mobley, David L. and Case, David A. and Zhong, Ellen D.}, | ||
year = {2016}, | ||
month = oct, | ||
pages = {147–161} | ||
} | ||
|
||
# OpenMM | ||
@article{eastman2017openmm, | ||
title = {OpenMM 7: Rapid development of high performance algorithms for molecular dynamics}, | ||
volume = {13}, | ||
ISSN = {1553-7358}, | ||
url = {http://dx.doi.org/10.1371/journal.pcbi.1005659}, | ||
DOI = {10.1371/journal.pcbi.1005659}, | ||
number = {7}, | ||
journal = {PLOS Computational Biology}, | ||
publisher = {Public Library of Science (PLoS)}, | ||
author = {Eastman, Peter and Swails, Jason and Chodera, John D. and McGibbon, Robert T. and Zhao, Yutong and Beauchamp, Kyle A. and Wang, Lee-Ping and Simmonett, Andrew C. and Harrigan, Matthew P. and Stern, Chaya D. and Wiewiora, Rafal P. and Brooks, Bernard R. and Pande, Vijay S.}, | ||
editor = {Gentleman, Robert}, | ||
year = {2017}, | ||
month = jul, | ||
pages = {e1005659} | ||
} | ||
# Sympy | ||
@article{meurer2017sympy, | ||
title = {SymPy: symbolic computing in Python}, | ||
author = {Meurer, Aaron and Smith, Christopher P. and Paprocki, Mateusz and \v{C}ert\'{i}k, Ond\v{r}ej and Kirpichev, Sergey B. and Rocklin, Matthew and Kumar, AMiT and Ivanov, Sergiu and Moore, Jason K. and Singh, Sartaj and Rathnayake, Thilina and Vig, Sean and Granger, Brian E. and Muller, Richard P. and Bonazzi, Francesco and Gupta, Harsh and Vats, Shivam and Johansson, Fredrik and Pedregosa, Fabian and Curry, Matthew J. and Terrel, Andy R. and Rou\v{c}ka, \v{S}t\v{e}p\'{a}n and Saboo, Ashutosh and Fernando, Isuru and Kulal, Sumith and Cimrman, Robert and Scopatz, Anthony}, | ||
year = 2017, | ||
month = jan, | ||
keywords = {Python, Computer algebra system, Symbolics}, | ||
volume = 3, | ||
pages = {e103}, | ||
journal = {PeerJ Computer Science}, | ||
issn = {2376-5992}, | ||
url = {https://doi.org/10.7717/peerj-cs.103}, | ||
doi = {10.7717/peerj-cs.103} | ||
} | ||
|
||
# Unyt | ||
@article{goldbaum2018unyt, | ||
doi = {10.21105/joss.00809}, | ||
url = {https://doi.org/10.21105/joss.00809}, | ||
year = {2018}, | ||
month = {aug}, | ||
publisher = {The Open Journal}, | ||
volume = {3}, | ||
number = {28}, | ||
pages = {809}, | ||
author = {Nathan J. Goldbaum and John A. ZuHone and Matthew J. Turk and Kacper Kowalik and Anna L. Rosen}, | ||
title = {unyt: Handle, manipulate, and convert data with units in Python}, | ||
journal = {Journal of Open Source Software} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
--- | ||
title: "The General Molecular Simulation Object (GMSO): A Data Structure for the Molecular Simulation Design Framework (MoSDeF)" | ||
|
||
tags: | ||
- python | ||
- molecular-simulations | ||
- data-structure | ||
- MoSDeF | ||
|
||
authors: | ||
- name: Co D. Quach | ||
orcid: 0000-0002-1255-4161 | ||
equal-contrib: true | ||
affiliation: "1, 2" | ||
- name: Nicholas C. Craven | ||
orcid: 0000-0002-4607-4377 | ||
affiliation: "2, 3" | ||
- name: Umesh Timalsina | ||
orcid: 0000-0002-5430-3993 | ||
affiliation: "4" | ||
- name: Justin B. Gilmer | ||
orcid: 0000-0002-6915-5591 | ||
affiliation: "2, 3" | ||
- name: Matthew W. Thompson | ||
orcid: 0000-0002-1460-3983 | ||
affiliation: "1, 2" | ||
- name: Alexander Yang | ||
affiliation: "1, 2" | ||
- name: Ray A. Matsumoto | ||
orcid: 0000-0002-9124-3512 | ||
affiliation: "1, 2" | ||
- name: Parashara Shamaprasad | ||
affiliation: "1, 2" | ||
- name: Chris Jones | ||
affiliation: "5" | ||
- name: Ryan S. DeFever | ||
affiliation: "6" | ||
- name: Brad Crawford | ||
orcid: 0000-0003-0638-7333 | ||
affiliation: "7, 8" | ||
- name: Christopher R. Iacovella | ||
orcid: 0000-0003-0557-0427 | ||
affiliation: "1, 2" | ||
- name: Clare McCabe | ||
orcid: 0000-0002-8552-9135 | ||
affiliation: "1, 2, 9" | ||
- name: Peter T. Cummings | ||
orcid: 0000-0002-9766-2216 | ||
affiliation: "1, 2, 9" | ||
|
||
|
||
|
||
affiliations: | ||
- name: Department of Chemical and Biological Engineering, Vanderbilt University, Nashville, TN, USA | ||
index: 1 | ||
- name: Multiscale Modeling and Simulation (MuMS) Center, Vanderbilt University, Nashville, TN, USA | ||
index: 2 | ||
- name: Interdisciplinary Material Science Program, Vanderbilt University, Nashville, TN, USA | ||
index: 3 | ||
- name: Institute for Software Integrated Systems (ISIS), Vanderbilt University, Nashville, TN, USA | ||
index: 4 | ||
- name: Micron School of Materials Science and Engineering, Boise State University, Boise, ID, USA | ||
index: 5 | ||
- name: Department of Chemical and Biomolecular Engineering, University of Notre Dame, Notre Dame, IN, USA | ||
index: 6 | ||
- name: Atomfold LLC, PA, USA | ||
index: 7 | ||
- name: Department of Chemical Engineering, Wayne State University, Detroit, MI, USA | ||
index: 8 | ||
- name: School of Engineering and Physical 551 Sciences, Heriot-Watt University, Edinburgh, Scotland, U.K | ||
index: 9 | ||
|
||
|
||
dates: 2 January, 2024 | ||
|
||
bibliography: paper.bib | ||
|
||
|
||
# Summary | ||
The General Molecular Simulation Object, or GMSO, stands as an open-source Python data structure, offering a versatile and expandable framework for handling chemical and biomolecular topologies. This library is an integral component of the Molecular Simulation Design Framework (MoSDeF), dedicated to streamlining the creation, parameterization, and representation of chemical systems for molecular simulations. The GMSO library serves as a dynamic repository for storing chemical/biomolecular structures, encompassing metadata, coordinates, and interaction potentials. Moreover, the library includes routines for exporting stored structures into various file formats, facilitating compatibility with other software for visualization (e.g., VMD and OVITO) or conducting molecular simulations (e.g., GROMACS, LAMMPS, GOMC). | ||
|
||
|
||
# Statement of need | ||
|
||
The Molecular Simulation Design Framework (MoSDeF) is a suite of software tailored to facilitate the initialization of chemical and biomolecular systems for computational simulations [@cummings2021opena]. These tools were developed to specifically address a critical aspect of the (ir)reproducibility issue within the molecular simulation community — namely, the insufficient documentation of the structure preparation process[@thompson2020towards]. The initialization step, often performed through Graphical User Interfaces (GUI) or via the use of ad-hoc, unpublished, and unreviewed code, poses the risk of introducing irreproducible and untraceable errors[@baker2016reproducibility]. By providing general-purposed and standardized tools, MoSDeF aims to trivialize the process of describing and dissiminating such process, without creating extra burden for computional simulation researcher.[@cummings2021opena] | ||
|
||
The process of initializing chemical/biomolecular systems involves constructing structures, assigning interaction parameters, and generating output structures in file formats compatible with a multitude of simulation software, such as GROMACS, LAMMPS, or GOMC[@abraham2015gromacs; @thompson2022lammps; @nejahi2021update]. Each of these steps necessitates distinct routines, and as such, is addressed by a series of specialized libraries—specifically, mBuild [@klein2016hierarchical], Foyer [@klein2019formalizing], and GMSO, which will be elaborated upon in this work. | ||
|
||
mBuild functions as a molecular builder, meticulously crafted with extensive utilities for creating, loading, and manipulating the positions of atoms and molecules, along with managing their connectivity through bonds[@klein2016hierarchical]. These utilities have been applied in various projects to explore a wide range of structures and applications, as well as integrated into other scientific libraries[@quach2022high; @albooyeh2023flowermd; @ma2022dynamics]. | ||
|
||
Foyer assumes the role of atom typing for the created structures, involving the identification and assignment of interaction parameters to each atom or group of atoms[@klein2019formalizing]. This process entails matching the connectivity (bond graph) of the provided structure with the SMARTS grammar of the corresponding atom type, defining the interactional parameters[@klein2019formalizing]. The use of a graph matching method, departing from the traditional approach of matching via atom indices, allows for a more flexible parameterization. This feature proves particularly advantageous in the study of functionalized polymers, whose structures consistently deviate slightly from the standard polymer[@quach2022high]. | ||
|
||
The parameterization step introduces additional information, requiring a more intricate data structure for representation. Beyond the initial details concerning positions and connectivity established during system construction, the new structure incorporates supplementary metadata and interaction parameters. These encompass not only atoms and bonds but also extend to angles, dihedrals, and improper dihedrals, along with specific interaction parameters associated with each of these objects. This necessitates the development of data structures that are capable of: | ||
- Supporting a variety of models | ||
- Providing flexibility for exotic potentials | ||
- Being compatible with existing community tools | ||
- Being extensible (to support new simulation models/engines/workflows) | ||
|
||
Currently, there are data structures designed to represent these systems, such as ParmEd and OpenMM[@shirts2016lessons; @eastmann2017openmm]. However, these data structures are tailored to a specific subset of simulation workflows/ecosystems, sacrificing some generality. This limitation includes, but is not limited to, the hard-coding and assumption of potential (interactional) expressions and units. They lack the generality that MoSDeF seeks, such as the ability to define and store arbitrary potential expressions or unit systems. Integrating these new features would require a major overhaul of these data structures, potentially impacting existing simulation workflows and may not be appealing to current stakeholders in those projects. Hence, we have developed a new data structure called the General Molecular Simulation Object, or GMSO, specifically catering to the MoSDeF ecosystem. GMSO satisfies our needs for generality as well as extensibility. | ||
|
||
The General Molecular Simulation Object (GMSO) library is a lightweight, extensible data structure encapsulating chemical/biomolecular systems and their associated interaction parameters, i.e., force fields. The library is designed to accommodate a wide range of chemical/biomolecular models, offering the capability to support arbitrary potential expressions and unit systems. In addition to the core data structure, the library includes routines essential for converting to and from other data structures, allowing the utilization of written parsers (enabling the creation of objects from disk). This improves interoperability with other ecosystems, while avoiding the reinvention of the wheel for well-established code. Furthermore, GMSO allows outputting to several molecular simulation engine-specific file formats, currently supporting GROMACS, LAMMPS, HOOMD-Blue, GOMC, Cassandra, with plans for future expansion. | ||
|
||
|
||
# Acknowledgements | ||
This research was partially supported by the National Science Foundation OAC-1835713 and OAC-1835874. Atomfold also donated research and development time and computational resources for this research and software. |