diff --git a/.gitignore b/.gitignore index c8f0442..5aeb17e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ /target +/test # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/Cargo.lock b/Cargo.lock index c8bb9f7..0e3c010 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -101,9 +101,9 @@ checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" [[package]] name = "bytemuck" -version = "1.16.1" +version = "1.16.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b236fc92302c97ed75b38da1f4917b5cdda4984745740f153a5d3059e48d725e" +checksum = "102087e286b4677862ea56cf8fc58bb2cdfa8725c40ffb80fe3a008eb7f2fc83" dependencies = [ "bytemuck_derive", ] @@ -116,24 +116,29 @@ checksum = "1ee891b04274a59bd38b412188e24b849617b2e45a0fd8d057deb63e7403761b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", ] +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + [[package]] name = "bytes" -version = "1.6.0" +version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" +checksum = "8318a53db07bb3f8dca91a600466bdb3f2eaadeedfdbcf02e1accbad9271ba50" [[package]] name = "cc" -version = "1.0.106" +version = "1.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "066fce287b1d4eafef758e89e09d724a24808a9196fe9756b8ca90e86d0719a2" +checksum = "504bdec147f2cc13c8b57ed9401fd8a147cc66b67ad5cb241394244f2c947549" dependencies = [ "jobserver", "libc", - "once_cell", ] [[package]] @@ -280,7 +285,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -396,9 +401,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.2.6" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" +checksum = "de3fc2e30ba82dd1b3911c8de1ffc143c74a914a14e99514d7637e3099df5ea0" dependencies = [ "equivalent", "hashbrown", @@ -424,9 +429,9 @@ checksum = "9028f49264629065d057f340a86acb84867925865f73bbf8d47b4d149a7e88b8" [[package]] name = "jobserver" -version = "0.1.31" +version = "0.1.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e" +checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" dependencies = [ "libc", ] @@ -470,7 +475,7 @@ checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" [[package]] name = "log_lammps_reader" -version = "0.1.5" +version = "0.2.0" dependencies = [ "polars", "pyo3", @@ -480,9 +485,9 @@ dependencies = [ [[package]] name = "lz4" -version = "1.25.0" +version = "1.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6eab492fe7f8651add23237ea56dbf11b3c4ff762ab83d40a47f11433421f91" +checksum = "958b4caa893816eea05507c20cfe47574a43d9a697138a7872990bba8a0ece68" dependencies = [ "libc", "lz4-sys", @@ -490,9 +495,9 @@ dependencies = [ [[package]] name = "lz4-sys" -version = "1.9.5" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9764018d143cc854c9f17f0b907de70f14393b1f502da6375dce70f00514eb3" +checksum = "109de74d5d2353660401699a4174a4ff23fcc649caf553df71933c7fb45ad868" dependencies = [ "cc", "libc", @@ -1073,15 +1078,18 @@ dependencies = [ [[package]] name = "portable-atomic" -version = "1.6.0" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" +checksum = "da544ee218f0d287a911e9c99a39a8c9bc8fcad3cb8db5959940044ecfc67265" [[package]] name = "ppv-lite86" -version = "0.2.17" +version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +dependencies = [ + "zerocopy", +] [[package]] name = "proc-macro2" @@ -1148,7 +1156,7 @@ dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -1161,7 +1169,7 @@ dependencies = [ "proc-macro2", "pyo3-build-config", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -1227,9 +1235,9 @@ dependencies = [ [[package]] name = "raw-cpuid" -version = "11.0.2" +version = "11.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e29830cbb1290e404f24c73af91c5d8d631ce7e128691e9477556b540cd01ecd" +checksum = "cb9ee317cfe3fbd54b36a511efc1edd42e216903c9cd575e686dd68a2ba90d8d" dependencies = [ "bitflags", ] @@ -1271,23 +1279,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" dependencies = [ "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] name = "redox_syscall" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c82cf8cff14456045f55ec4241383baeff27af886adb72ffb2162f99911de0fd" +checksum = "2a908a6e00f1fdd0dfd9c0eb08ce85126f6d8bbda50017e74bc4a4b7d4a926a4" dependencies = [ "bitflags", ] [[package]] name = "regex" -version = "1.10.5" +version = "1.10.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b91213439dad192326a0d7c6ee3955910425f441d7038e0d6933b0aec5c4517f" +checksum = "4219d74c6b67a3654a9fbebc4b419e22126d13d2f3c4a07ee0cb61ff79a79619" dependencies = [ "aho-corasick", "memchr", @@ -1347,16 +1355,17 @@ checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" dependencies = [ "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] name = "serde_json" -version = "1.0.120" +version = "1.0.122" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e0d21c9a8cae1235ad58a00c11cb40d4b1e5c784f1ef2c537876ed6ffd8b7c5" +checksum = "784b6203951c57ff748476b126ccb5e8e2959a5c19e5c617ab1956be3dbc68da" dependencies = [ "itoa", + "memchr", "ryu", "serde", ] @@ -1455,7 +1464,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -1471,9 +1480,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.70" +version = "2.0.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f0209b68b3613b093e0ec905354eccaedcfe83b8cb37cbdeae64026c3064c16" +checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af" dependencies = [ "proc-macro2", "quote", @@ -1482,9 +1491,9 @@ dependencies = [ [[package]] name = "sysinfo" -version = "0.30.12" +version = "0.30.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "732ffa00f53e6b2af46208fba5718d9662a421049204e156328b66791ffa15ae" +checksum = "0a5b4ddaee55fb2bea2bf0e5000747e5f5c0de765e5a5ff87f4cd106439f4bb3" dependencies = [ "cfg-if", "core-foundation-sys", @@ -1502,28 +1511,28 @@ checksum = "c1bbb9f3c5c463a01705937a24fdabc5047929ac764b2d5b9cf681c1f5041ed5" [[package]] name = "target-lexicon" -version = "0.12.15" +version = "0.12.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4873307b7c257eddcb50c9bedf158eb669578359fb28428bef438fec8e6ba7c2" +checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" [[package]] name = "thiserror" -version = "1.0.61" +version = "1.0.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" +checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.61" +version = "1.0.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" +checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" dependencies = [ "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -1561,18 +1570,18 @@ checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" [[package]] name = "uuid" -version = "1.9.1" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5de17fd2f7da591098415cff336e12965a28061ddace43b59cb3c430179c9439" +checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314" dependencies = [ "getrandom", ] [[package]] name = "version_check" -version = "0.9.4" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" [[package]] name = "wasi" @@ -1601,7 +1610,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", "wasm-bindgen-shared", ] @@ -1623,7 +1632,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -1750,9 +1759,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "xxhash-rust" -version = "0.8.11" +version = "0.8.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63658493314859b4dfdf3fb8c1defd61587839def09582db50b8a4e93afca6bb" +checksum = "6a5cbf750400958819fb6178eaa83bee5cd9c29a26a40cc241df8c70fdd46984" [[package]] name = "zerocopy" @@ -1760,6 +1769,7 @@ version = "0.7.35" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" dependencies = [ + "byteorder", "zerocopy-derive", ] @@ -1771,7 +1781,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.70", + "syn 2.0.72", ] [[package]] @@ -1785,18 +1795,18 @@ dependencies = [ [[package]] name = "zstd-safe" -version = "7.2.0" +version = "7.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa556e971e7b568dc775c136fc9de8c779b1c2fc3a63defaafadffdbd3181afa" +checksum = "54a3ab4db68cea366acc5c897c7b4d4d1b8994a9cd6e6f841f8964566a419059" dependencies = [ "zstd-sys", ] [[package]] name = "zstd-sys" -version = "2.0.12+zstd.1.5.6" +version = "2.0.13+zstd.1.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a4e40c320c3cb459d9a9ff6de98cff88f4751ee9275d140e2be94a2b74e4c13" +checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa" dependencies = [ "cc", "pkg-config", diff --git a/Cargo.toml b/Cargo.toml index 4afac07..e2693ba 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "log_lammps_reader" -version = "0.1.5" +version = "0.2.0" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/README.md b/README.md index 1609911..edf6af1 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,7 @@ Using pip: pip install log-lammps-reader ``` + ## Build From Source Alternatively, to build the Python module, follow these steps: @@ -36,7 +37,7 @@ Alternatively, to build the Python module, follow these steps: 1. Ensure you have `maturin` installed: ```bash - pip install maturin + pip install maturin # or use conda or micromamba ``` 2. Compile the Rust packages and install the python module. @@ -47,15 +48,22 @@ Alternatively, to build the Python module, follow these steps: ## Usage Examples -### Python +- Note the `run_number = 0` gives the first data output which might include the minimization run. +- To get the useful data start with `run_number = 1`. + + +### Build For Python ```python import log_lammps_reader thermo_number = 0 # Choose the nth number of thermo run df = log_lammps_reader.new('log.lammps') # polars DataFrame for 1st thermo run +# usually the minimization run # Or choose the nth number of thermo run (default n = 0) +# n = 0 might consider the MPI minimization data, so in most cases +# start with n = 1 df = log_lammps_reader.new('log.lammps', n) time = df.get_column('Time') # Get any thermo column time_squared = time ** 2 # use broadcasting operations similar to numpy @@ -73,7 +81,7 @@ Example of a DataFrame for a LAMMPS log file. ```python >>> import log_lammps_reader ->>> df = log_lammps_reader.new('log.lammps') +>>> df = log_lammps_reader.new('log.lammps', 1) >>> df shape: (10_000_002, 10) ┌──────────────┬───────────┬───────────┬───────────┬───┬───────┬────────────┬───────────┬───────────┐ @@ -125,7 +133,9 @@ use log_lammps_reader::LogLammpsReader; fn main() { let log_file_name = "log.lammps"; - let run_number = Some(0); + // skipping minimization + let run_number = Some(1); + match LogLammpsReader::new(log_file_name.into(), run_number) { Ok(df) => println!("DataFrame read successfully: {:?}", df), diff --git a/src/lib.rs b/src/lib.rs index 1834073..394150b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,17 +4,15 @@ use pyo3_polars::PyDataFrame; mod reader; use reader::LogLammpsReader; -/** This Rust code integrates with Python using PyO3 and PyPolars -to provide a Python interface for reading and processing LAMMPS -log files. The main function `new` serves as a bridge between -Rust and Python, allowing Python code to call Rust functions to -parse log files. It utilizes the LogLammpsReader struct from the -`reader` module to handle the actual parsing and conversion of log -file data into a DataFrame. +/** +### Parameters: +`log_file_name`: File path for the LAMMPS log file +`thermo_run_number`: The index of the run thermo (default = 0) +Note: +The default thermo_run_number includes the MPI minimization data +So usually what you need will start at index 1 -Parameters: -log_file_name: File path for the LAMMPS log file -thermo_run_number: The index of the run thermo (default = 0)*/ +*/ #[pyfunction] fn new(log_file_name: &str, thermo_run_number: Option) -> PyResult { match LogLammpsReader::new(log_file_name.into(), thermo_run_number) { @@ -26,9 +24,16 @@ fn new(log_file_name: &str, thermo_run_number: Option) -> PyResult) -> PyResult<()> { +fn log_lammps_reader(_py: Python, m: &Bound) -> PyResult<()> { m.add_function(wrap_pyfunction!(new, m)?)?; Ok(()) } diff --git a/src/reader.rs b/src/reader.rs index b151767..7fb55e4 100644 --- a/src/reader.rs +++ b/src/reader.rs @@ -5,17 +5,12 @@ use std::io::{BufRead, BufReader}; use std::path::PathBuf; const ERROR_FLAGS: [&str; 2] = ["Loop time", "ERROR"]; -const MPI_FLAGS: [&str; 2] = [ - "MPI task timing breakdown", - "Per MPI rank memory allocation", -]; +const MPI_FLAG: &str = "Per MPI rank memory allocation"; /** This Rust code uses the Polars library to parse log files, particularly from LAMMPS simulations. The goal is to read specific data blocks from the log file and convert them -into a DataFrame format for further analysis. The parsing -logic focuses on extracting data between specific MPI -flags and handling error flags appropriately. */ +into a DataFrame format for further analysis. */ pub struct LogLammpsReader { log_file_name: PathBuf, thermo_run_number: u32, @@ -35,29 +30,30 @@ impl LogLammpsReader { log_file_name, thermo_run_number: run_number.unwrap_or_default(), } - .parse() + .parse_lammps_log() } /// Method to parse the log file and convert the log file into a DataFrame. - fn parse(&self) -> Result> { + fn parse_lammps_log(&self) -> Result> { let mut current_thermo_run_num: u32 = 0; let mut data_flag: bool = false; - let mut minimization_flag: bool = false; let mut log_header: Vec = Vec::new(); let mut log_data: Vec> = Vec::new(); - let log_file: File = File::open(&self.log_file_name) - .map_err(|_| format!("Log file '{}' not found...", &self.log_file_name.display()))?; + let log_file: File = File::open(&self.log_file_name).map_err(|_| { + format!( + "Log file at '{}' not found...\nCheck 'log_file_name' parameter", + &self.log_file_name.display() + ) + })?; let log_reader: BufReader = BufReader::new(log_file); for line_result in log_reader.lines() { let line: String = line_result?; - // Check for MPI flags to set minimization and run flags. - if !minimization_flag || !data_flag { - if line.starts_with(MPI_FLAGS[0]) { - minimization_flag = true; - } else if line.starts_with(MPI_FLAGS[1]) && minimization_flag { + // Check for MPI flag to set minimization and data flags. + if !data_flag { + if line.starts_with(MPI_FLAG) { data_flag = true; } continue; @@ -71,7 +67,6 @@ impl LogLammpsReader { // Reset flags and increase run number upon encountering error flags. if line.starts_with(ERROR_FLAGS[0]) || line.starts_with(ERROR_FLAGS[1]) { - minimization_flag = false; data_flag = false; current_thermo_run_num += 1; if current_thermo_run_num > self.thermo_run_number { @@ -86,12 +81,13 @@ impl LogLammpsReader { continue; } - // Parse data rows and filter out invalid rows. + // Parse data rows let row: Vec = line .split_whitespace() .filter_map(|s: &str| s.parse().ok()) .collect(); + // filter out invalid rows. if row.len() != log_header.len() { continue; } @@ -101,8 +97,11 @@ impl LogLammpsReader { if log_data.is_empty() { return Err(format!( - "No data found in the log file for run {}", - self.thermo_run_number + "No data found in the log file for run: {}\nThis may be caused due to + \n1. Incorrect 'run_number' parameter (Try 'run_number = {}') + \n2. Unsual format of log file", + self.thermo_run_number, + self.thermo_run_number - 1 ) .into()); }