From 4cd4025f2bb17bf3796605aa5b865e0bf8dc2ad4 Mon Sep 17 00:00:00 2001 From: Geordy Jomon <gj82@njit.edu> Date: Wed, 7 Aug 2024 11:47:50 -0400 Subject: [PATCH] feat: more roboust parsing, better errors and docs No longer avoiding reading the minimization log as it causes errors when parsing log files without them. Now the minimization log data, if present, can be obtained with run_number set to 0 (default) and the useful logs starts with run_number set to 1 and beyond. --- .gitignore | 1 + Cargo.lock | 128 +++++++++++++++++++++++++++----------------------- Cargo.toml | 2 +- README.md | 18 +++++-- src/lib.rs | 29 +++++++----- src/reader.rs | 41 ++++++++-------- 6 files changed, 122 insertions(+), 97 deletions(-) diff --git a/.gitignore b/.gitignore index c8f0442..5aeb17e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ /target +/test # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/Cargo.lock b/Cargo.lock index c8bb9f7..0e3c010 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -101,9 +101,9 @@ checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" [[package]] name = "bytemuck" -version = "1.16.1" +version = "1.16.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b236fc92302c97ed75b38da1f4917b5cdda4984745740f153a5d3059e48d725e" +checksum = "102087e286b4677862ea56cf8fc58bb2cdfa8725c40ffb80fe3a008eb7f2fc83" dependencies = [ "bytemuck_derive", ] @@ -116,24 +116,29 @@ checksum = "1ee891b04274a59bd38b412188e24b849617b2e45a0fd8d057deb63e7403761b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", ] +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + [[package]] name = "bytes" -version = "1.6.0" +version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" +checksum = "8318a53db07bb3f8dca91a600466bdb3f2eaadeedfdbcf02e1accbad9271ba50" [[package]] name = "cc" -version = "1.0.106" +version = "1.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "066fce287b1d4eafef758e89e09d724a24808a9196fe9756b8ca90e86d0719a2" +checksum = "504bdec147f2cc13c8b57ed9401fd8a147cc66b67ad5cb241394244f2c947549" dependencies = [ "jobserver", "libc", - "once_cell", ] [[package]] @@ -280,7 +285,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -396,9 +401,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.2.6" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" +checksum = "de3fc2e30ba82dd1b3911c8de1ffc143c74a914a14e99514d7637e3099df5ea0" dependencies = [ "equivalent", "hashbrown", @@ -424,9 +429,9 @@ checksum = "9028f49264629065d057f340a86acb84867925865f73bbf8d47b4d149a7e88b8" [[package]] name = "jobserver" -version = "0.1.31" +version = "0.1.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e" +checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" dependencies = [ "libc", ] @@ -470,7 +475,7 @@ checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" [[package]] name = "log_lammps_reader" -version = "0.1.5" +version = "0.2.0" dependencies = [ "polars", "pyo3", @@ -480,9 +485,9 @@ dependencies = [ [[package]] name = "lz4" -version = "1.25.0" +version = "1.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6eab492fe7f8651add23237ea56dbf11b3c4ff762ab83d40a47f11433421f91" +checksum = "958b4caa893816eea05507c20cfe47574a43d9a697138a7872990bba8a0ece68" dependencies = [ "libc", "lz4-sys", @@ -490,9 +495,9 @@ dependencies = [ [[package]] name = "lz4-sys" -version = "1.9.5" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9764018d143cc854c9f17f0b907de70f14393b1f502da6375dce70f00514eb3" +checksum = "109de74d5d2353660401699a4174a4ff23fcc649caf553df71933c7fb45ad868" dependencies = [ "cc", "libc", @@ -1073,15 +1078,18 @@ dependencies = [ [[package]] name = "portable-atomic" -version = "1.6.0" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" +checksum = "da544ee218f0d287a911e9c99a39a8c9bc8fcad3cb8db5959940044ecfc67265" [[package]] name = "ppv-lite86" -version = "0.2.17" +version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +dependencies = [ + "zerocopy", +] [[package]] name = "proc-macro2" @@ -1148,7 +1156,7 @@ dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -1161,7 +1169,7 @@ dependencies = [ "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -1227,9 +1235,9 @@ dependencies = [ [[package]] name = "raw-cpuid" -version = "11.0.2" +version = "11.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e29830cbb1290e404f24c73af91c5d8d631ce7e128691e9477556b540cd01ecd" +checksum = "cb9ee317cfe3fbd54b36a511efc1edd42e216903c9cd575e686dd68a2ba90d8d" dependencies = [ "bitflags", ] @@ -1271,23 +1279,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" dependencies = [ "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] name = "redox_syscall" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c82cf8cff14456045f55ec4241383baeff27af886adb72ffb2162f99911de0fd" +checksum = "2a908a6e00f1fdd0dfd9c0eb08ce85126f6d8bbda50017e74bc4a4b7d4a926a4" dependencies = [ "bitflags", ] [[package]] name = "regex" -version = "1.10.5" +version = "1.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" +checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619" dependencies = [ "aho-corasick", "memchr", @@ -1347,16 +1355,17 @@ checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" dependencies = [ "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] name = "serde_json" -version = "1.0.120" +version = "1.0.122" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e0d21c9a8cae1235ad58a00c11cb40d4b1e5c784f1ef2c537876ed6ffd8b7c5" +checksum = "784b6203951c57ff748476b126ccb5e8e2959a5c19e5c617ab1956be3dbc68da" dependencies = [ "itoa", + "memchr", "ryu", "serde", ] @@ -1455,7 +1464,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -1471,9 +1480,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.70" +version = "2.0.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f0209b68b3613b093e0ec905354eccaedcfe83b8cb37cbdeae64026c3064c16" +checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af" dependencies = [ "proc-macro2", "quote", @@ -1482,9 +1491,9 @@ dependencies = [ [[package]] name = "sysinfo" -version = "0.30.12" +version = "0.30.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "732ffa00f53e6b2af46208fba5718d9662a421049204e156328b66791ffa15ae" +checksum = "0a5b4ddaee55fb2bea2bf0e5000747e5f5c0de765e5a5ff87f4cd106439f4bb3" dependencies = [ "cfg-if", "core-foundation-sys", @@ -1502,28 +1511,28 @@ checksum = "c1bbb9f3c5c463a01705937a24fdabc5047929ac764b2d5b9cf681c1f5041ed5" [[package]] name = "target-lexicon" -version = "0.12.15" +version = "0.12.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4873307b7c257eddcb50c9bedf158eb669578359fb28428bef438fec8e6ba7c2" +checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" [[package]] name = "thiserror" -version = "1.0.61" +version = "1.0.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" +checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.61" +version = "1.0.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" +checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" dependencies = [ "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -1561,18 +1570,18 @@ checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" [[package]] name = "uuid" -version = "1.9.1" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5de17fd2f7da591098415cff336e12965a28061ddace43b59cb3c430179c9439" +checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314" dependencies = [ "getrandom", ] [[package]] name = "version_check" -version = "0.9.4" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" [[package]] name = "wasi" @@ -1601,7 +1610,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", "wasm-bindgen-shared", ] @@ -1623,7 +1632,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -1750,9 +1759,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "xxhash-rust" -version = "0.8.11" +version = "0.8.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63658493314859b4dfdf3fb8c1defd61587839def09582db50b8a4e93afca6bb" +checksum = "6a5cbf750400958819fb6178eaa83bee5cd9c29a26a40cc241df8c70fdd46984" [[package]] name = "zerocopy" @@ -1760,6 +1769,7 @@ version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" dependencies = [ + "byteorder", "zerocopy-derive", ] @@ -1771,7 +1781,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -1785,18 +1795,18 @@ dependencies = [ [[package]] name = "zstd-safe" -version = "7.2.0" +version = "7.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa556e971e7b568dc775c136fc9de8c779b1c2fc3a63defaafadffdbd3181afa" +checksum = "54a3ab4db68cea366acc5c897c7b4d4d1b8994a9cd6e6f841f8964566a419059" dependencies = [ "zstd-sys", ] [[package]] name = "zstd-sys" -version = "2.0.12+zstd.1.5.6" +version = "2.0.13+zstd.1.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a4e40c320c3cb459d9a9ff6de98cff88f4751ee9275d140e2be94a2b74e4c13" +checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa" dependencies = [ "cc", "pkg-config", diff --git a/Cargo.toml b/Cargo.toml index 4afac07..e2693ba 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "log_lammps_reader" -version = "0.1.5" +version = "0.2.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/README.md b/README.md index 1609911..edf6af1 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,7 @@ Using pip: pip install log-lammps-reader ``` + ## Build From Source Alternatively, to build the Python module, follow these steps: @@ -36,7 +37,7 @@ Alternatively, to build the Python module, follow these steps: 1. Ensure you have `maturin` installed: ```bash - pip install maturin + pip install maturin # or use conda or micromamba ``` 2. Compile the Rust packages and install the python module. @@ -47,15 +48,22 @@ Alternatively, to build the Python module, follow these steps: ## Usage Examples -### Python +- Note the `run_number = 0` gives the first data output which might include the minimization run. +- To get the useful data start with `run_number = 1`. + + +### Build For Python ```python import log_lammps_reader thermo_number = 0 # Choose the nth number of thermo run df = log_lammps_reader.new('log.lammps') # polars DataFrame for 1st thermo run +# usually the minimization run # Or choose the nth number of thermo run (default n = 0) +# n = 0 might consider the MPI minimization data, so in most cases +# start with n = 1 df = log_lammps_reader.new('log.lammps', n) time = df.get_column('Time') # Get any thermo column time_squared = time ** 2 # use broadcasting operations similar to numpy @@ -73,7 +81,7 @@ Example of a DataFrame for a LAMMPS log file. ```python >>> import log_lammps_reader ->>> df = log_lammps_reader.new('log.lammps') +>>> df = log_lammps_reader.new('log.lammps', 1) >>> df shape: (10_000_002, 10) ┌──────────────┬───────────┬───────────┬───────────┬───┬───────┬────────────┬───────────┬───────────┐ @@ -125,7 +133,9 @@ use log_lammps_reader::LogLammpsReader; fn main() { let log_file_name = "log.lammps"; - let run_number = Some(0); + // skipping minimization + let run_number = Some(1); + match LogLammpsReader::new(log_file_name.into(), run_number) { Ok(df) => println!("DataFrame read successfully: {:?}", df), diff --git a/src/lib.rs b/src/lib.rs index 1834073..394150b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,17 +4,15 @@ use pyo3_polars::PyDataFrame; mod reader; use reader::LogLammpsReader; -/** This Rust code integrates with Python using PyO3 and PyPolars -to provide a Python interface for reading and processing LAMMPS -log files. The main function `new` serves as a bridge between -Rust and Python, allowing Python code to call Rust functions to -parse log files. It utilizes the LogLammpsReader struct from the -`reader` module to handle the actual parsing and conversion of log -file data into a DataFrame. +/** +### Parameters: +`log_file_name`: File path for the LAMMPS log file +`thermo_run_number`: The index of the run thermo (default = 0) +Note: +The default thermo_run_number includes the MPI minimization data +So usually what you need will start at index 1 -Parameters: -log_file_name: File path for the LAMMPS log file -thermo_run_number: The index of the run thermo (default = 0)*/ +*/ #[pyfunction] fn new(log_file_name: &str, thermo_run_number: Option<u32>) -> PyResult<PyDataFrame> { match LogLammpsReader::new(log_file_name.into(), thermo_run_number) { @@ -26,9 +24,16 @@ fn new(log_file_name: &str, thermo_run_number: Option<u32>) -> PyResult<PyDataFr } } -/// Adds the rust function to the python module. +/** Adds the rust function to the python module. +This Rust code integrates with Python using PyO3 and PyPolars +to provide a Python interface for reading and processing LAMMPS +log files. The main function `new` serves as a bridge between +Rust and Python, allowing Python code to call Rust functions to +parse log files. It utilizes the LogLammpsReader struct from the +`reader` module to handle the actual parsing and conversion of log +file data into a DataFrame. */ #[pymodule] -fn log_lammps_reader(m: &Bound<'_, PyModule>) -> PyResult<()> { +fn log_lammps_reader(_py: Python, m: &Bound<PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(new, m)?)?; Ok(()) } diff --git a/src/reader.rs b/src/reader.rs index b151767..7fb55e4 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -5,17 +5,12 @@ use std::io::{BufRead, BufReader}; use std::path::PathBuf; const ERROR_FLAGS: [&str; 2] = ["Loop time", "ERROR"]; -const MPI_FLAGS: [&str; 2] = [ - "MPI task timing breakdown", - "Per MPI rank memory allocation", -]; +const MPI_FLAG: &str = "Per MPI rank memory allocation"; /** This Rust code uses the Polars library to parse log files, particularly from LAMMPS simulations. The goal is to read specific data blocks from the log file and convert them -into a DataFrame format for further analysis. The parsing -logic focuses on extracting data between specific MPI -flags and handling error flags appropriately. */ +into a DataFrame format for further analysis. */ pub struct LogLammpsReader { log_file_name: PathBuf, thermo_run_number: u32, @@ -35,29 +30,30 @@ impl LogLammpsReader { log_file_name, thermo_run_number: run_number.unwrap_or_default(), } - .parse() + .parse_lammps_log() } /// Method to parse the log file and convert the log file into a DataFrame. - fn parse(&self) -> Result<DataFrame, Box<dyn std::error::Error>> { + fn parse_lammps_log(&self) -> Result<DataFrame, Box<dyn std::error::Error>> { let mut current_thermo_run_num: u32 = 0; let mut data_flag: bool = false; - let mut minimization_flag: bool = false; let mut log_header: Vec<String> = Vec::new(); let mut log_data: Vec<Vec<f64>> = Vec::new(); - let log_file: File = File::open(&self.log_file_name) - .map_err(|_| format!("Log file '{}' not found...", &self.log_file_name.display()))?; + let log_file: File = File::open(&self.log_file_name).map_err(|_| { + format!( + "Log file at '{}' not found...\nCheck 'log_file_name' parameter", + &self.log_file_name.display() + ) + })?; let log_reader: BufReader<File> = BufReader::new(log_file); for line_result in log_reader.lines() { let line: String = line_result?; - // Check for MPI flags to set minimization and run flags. - if !minimization_flag || !data_flag { - if line.starts_with(MPI_FLAGS[0]) { - minimization_flag = true; - } else if line.starts_with(MPI_FLAGS[1]) && minimization_flag { + // Check for MPI flag to set minimization and data flags. + if !data_flag { + if line.starts_with(MPI_FLAG) { data_flag = true; } continue; @@ -71,7 +67,6 @@ impl LogLammpsReader { // Reset flags and increase run number upon encountering error flags. if line.starts_with(ERROR_FLAGS[0]) || line.starts_with(ERROR_FLAGS[1]) { - minimization_flag = false; data_flag = false; current_thermo_run_num += 1; if current_thermo_run_num > self.thermo_run_number { @@ -86,12 +81,13 @@ impl LogLammpsReader { continue; } - // Parse data rows and filter out invalid rows. + // Parse data rows let row: Vec<f64> = line .split_whitespace() .filter_map(|s: &str| s.parse().ok()) .collect(); + // filter out invalid rows. if row.len() != log_header.len() { continue; } @@ -101,8 +97,11 @@ impl LogLammpsReader { if log_data.is_empty() { return Err(format!( - "No data found in the log file for run {}", - self.thermo_run_number + "No data found in the log file for run: {}\nThis may be caused due to + \n1. Incorrect 'run_number' parameter (Try 'run_number = {}') + \n2. Unsual format of log file", + self.thermo_run_number, + self.thermo_run_number - 1 ) .into()); }