diff --git a/.gitignore b/.gitignore index f346b22..ef5bf18 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,6 @@ Cargo.lock # macOS files .DS_Store + +# Test generated files +*.csv diff --git a/CHANGELOG.md b/CHANGELOG.md index 97e0938..6a61f36 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,7 +22,23 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Removed - PLACEHOLDER -- + +## [0.3.0] - 2024-08-14 + +### Added + +- `sasa` command to calculate the atom level SASA +- `seq` command to extract protein sequences from PDB files + +### Fixed + +- Only report chains that are part of the ligand or receptor for `contacts` + +### Changed + +- Moved previous top-level command to `contacts` sub-command +- Better path parsing + ## [0.2.0] - 2024-08-08 ### Added @@ -46,6 +62,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Initial release - Detection of common protein-protein interactions in a PDB or mmCIF file -[unreleased]: https://github.com/y1zhou/arpeggia/compare/v0.2.0...HEAD -[0.1.0]: https://github.com/y1zhou/arpeggia/releases/tag/v0.1.0 +[unreleased]: https://github.com/y1zhou/arpeggia/compare/v0.3.0...HEAD +[0.2.0]: https://github.com/y1zhou/arpeggia/releases/tag/v0.3.0 [0.2.0]: https://github.com/y1zhou/arpeggia/releases/tag/v0.2.0 +[0.1.0]: https://github.com/y1zhou/arpeggia/releases/tag/v0.1.0 diff --git a/Cargo.toml b/Cargo.toml index 4a8a731..ce9615e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,8 +1,9 @@ [package] name = "arpeggia" -version = "0.2.0" +version = "0.3.0" description = "A port of the Arpeggio library for Rust" edition = "2021" +authors = ["Yi Zhou "] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html @@ -13,5 +14,6 @@ pdbtbx = { version = "0.11.0", features = ["rayon", "rstar"], git = "https://git polars = { version = "0.39.2", features = ["lazy"] } rayon = "1.10.0" rstar = "0.12.0" +rust-sasa = "0.2.2" tracing = "0.1.40" tracing-subscriber = "0.3.18" diff --git a/src/cli/contacts.rs b/src/cli/contacts.rs new file mode 100644 index 0000000..44b2430 --- /dev/null +++ b/src/cli/contacts.rs @@ -0,0 +1,172 @@ +use crate::interactions::{InteractionComplex, Interactions, ResultEntry}; +use crate::utils::{load_model, write_df_to_csv}; +use clap::Parser; +use pdbtbx::*; +use polars::prelude::*; +use std::path::{Path, PathBuf}; +use tracing::{debug, error, info, trace, warn}; + +#[derive(Parser, Debug, Clone)] +#[command(version, about)] +pub(crate) struct Args { + /// Path to the PDB or mmCIF file to be analyzed + #[arg(short, long)] + input: PathBuf, + + /// Output directory + #[arg(short, long)] + output: PathBuf, + + /// Group chains for interactions: + /// e.g. A,B/C,D + /// where chains A and B are the "ligand" and C and D are the "receptor" + #[arg(short, long)] + groups: String, + + /// Compensation factor for VdW radii dependent interaction types + #[arg(short = 'c', long = "vdw-comp", default_value_t = 0.1)] + vdw_comp: f64, + + /// Distance cutoff when searching for neighboring atoms + #[arg(short, long, default_value_t = 4.5)] + dist_cutoff: f64, + + /// Number of threads to use for parallel processing + #[arg(short = 'j', long = "num-threads", default_value_t = 0)] + num_threads: usize, +} + +pub(crate) fn run(args: &Args) { + trace!("{args:?}"); + + // Create Rayon thread pool + rayon::ThreadPoolBuilder::new() + .num_threads(args.num_threads) + .build_global() + .unwrap(); + debug!("Using {} thread(s)", rayon::current_num_threads()); + + // Make sure `input` exists + let input_path = Path::new(&args.input).canonicalize().unwrap(); + let input_file: String = input_path.to_str().unwrap().parse().unwrap(); + + // Load file as complex structure + let (pdb, pdb_warnings) = load_model(&input_file); + if !pdb_warnings.is_empty() { + pdb_warnings.iter().for_each(|e| match e.level() { + pdbtbx::ErrorLevel::BreakingError => error!("{e}"), + pdbtbx::ErrorLevel::InvalidatingError => error!("{e}"), + _ => warn!("{e}"), + }); + } + + let (df_atomic, df_ring, i_complex) = + get_contacts(pdb, args.groups.as_str(), args.vdw_comp, args.dist_cutoff); + + // Information on the sequence of the chains in the model + let num_chains = i_complex.ligand.len() + i_complex.receptor.len(); + info!( + "Loaded {} {}", + num_chains, + match num_chains { + 1 => "chain", + _ => "chains", + } + ); + + debug!( + "Parsed ligand chains {lig:?}; receptor chains {receptor:?}", + lig = i_complex.ligand, + receptor = i_complex.receptor + ); + + // Prepare output directory + let output_path = Path::new(&args.output).canonicalize().unwrap(); + let _ = std::fs::create_dir_all(output_path.clone()); + let output_file = output_path.join("contacts.csv"); + + let output_file_str = output_file.to_str().unwrap(); + debug!("Results will be saved to {output_file_str}"); + + // Save results and log the identified interactions + info!( + "Found {} atom-atom contacts\n{}", + df_atomic.shape().0, + df_atomic + ); + let df_clash = df_atomic + .clone() + .lazy() + .filter(col("interaction").eq(lit("StericClash"))) + .collect() + .unwrap(); + if df_clash.height() > 0 { + warn!("Found {} steric clashes\n{}", df_clash.shape().0, df_clash); + } + info!("Found {} ring contacts\n{}", df_ring.shape().0, df_ring); + + // Concate dataframes for saving to CSV + let mut df_contacts = concat( + [df_atomic.clone().lazy(), df_ring.clone().lazy()], + UnionArgs::default(), + ) + .unwrap() + .collect() + .unwrap(); + + // Save res to CSV files + write_df_to_csv(&mut df_contacts, output_file); +} + +pub fn get_contacts( + pdb: PDB, + groups: &str, + vdw_comp: f64, + dist_cutoff: f64, +) -> (DataFrame, DataFrame, InteractionComplex) { + let i_complex = InteractionComplex::new(pdb, groups, vdw_comp, dist_cutoff); + + // Find interactions + let atomic_contacts = i_complex.get_atomic_contacts(); + let df_atomic = results_to_df(&atomic_contacts); + + let mut ring_contacts: Vec = Vec::new(); + ring_contacts.extend(i_complex.get_ring_atom_contacts()); + ring_contacts.extend(i_complex.get_ring_ring_contacts()); + let df_ring = results_to_df(&ring_contacts) + // .drop_many(&["from_atomn", "from_atomi", "to_atomn", "to_atomi"]) + .sort( + [ + "from_chain", + "from_resi", + "from_altloc", + "to_chain", + "to_resi", + "to_altloc", + ], + Default::default(), + ) + .unwrap(); + + (df_atomic, df_ring, i_complex) +} + +fn results_to_df(res: &[ResultEntry]) -> DataFrame { + df!( + "interaction" => res.iter().map(|x| x.interaction.to_string()).collect::>(), + "distance" => res.iter().map(|x| x.distance).collect::>(), + "from_chain" => res.iter().map(|x| x.ligand.chain.to_owned()).collect::>(), + "from_resn" => res.iter().map(|x| x.ligand.resn.to_owned()).collect::>(), + "from_resi" => res.iter().map(|x| x.ligand.resi as i64).collect::>(), + "from_altloc" => res.iter().map(|x| x.ligand.altloc.to_owned()).collect::>(), + "from_atomn" => res.iter().map(|x| x.ligand.atomn.to_owned()).collect::>(), + "from_atomi" => res.iter().map(|x| x.ligand.atomi as i64).collect::>(), + "to_chain" => res.iter().map(|x| x.receptor.chain.to_owned()).collect::>(), + "to_resn" => res.iter().map(|x| x.receptor.resn.to_owned()).collect::>(), + "to_resi" => res.iter().map(|x| x.receptor.resi as i64).collect::>(), + "to_altloc" => res.iter().map(|x| x.receptor.altloc.to_owned()).collect::>(), + "to_atomn" => res.iter().map(|x| x.receptor.atomn.to_owned()).collect::>(), + "to_atomi" => res.iter().map(|x| x.receptor.atomi as i64).collect::>(), + ) + .unwrap() +} diff --git a/src/cli/mod.rs b/src/cli/mod.rs new file mode 100644 index 0000000..3cd3b4d --- /dev/null +++ b/src/cli/mod.rs @@ -0,0 +1,3 @@ +pub mod contacts; +pub mod pdb2seq; +pub mod sasa; diff --git a/src/cli/pdb2seq.rs b/src/cli/pdb2seq.rs new file mode 100644 index 0000000..905a22b --- /dev/null +++ b/src/cli/pdb2seq.rs @@ -0,0 +1,29 @@ +use crate::chains::ChainExt; +use clap::Parser; +use std::path::PathBuf; + +use crate::utils::load_model; +use std::path::Path; + +#[derive(Parser, Debug, Clone)] +#[command(version, about)] +pub(crate) struct Args { + /// Path to the PDB or mmCIF file to be analyzed + input: Vec, +} + +pub(crate) fn run(args: &Args) { + for f in &args.input { + // Make sure `input` exists + let input_path = Path::new(f).canonicalize().unwrap(); + let input_file: String = input_path.to_str().unwrap().parse().unwrap(); + + // Load file and print sequences + let (pdb, _) = load_model(&input_file); + println!("File: {}", input_file); + for chain in pdb.chains() { + println!("{}: {}", chain.id(), chain.pdb_seq().join("")); + } + println!(); + } +} diff --git a/src/cli/sasa.rs b/src/cli/sasa.rs new file mode 100644 index 0000000..37d9408 --- /dev/null +++ b/src/cli/sasa.rs @@ -0,0 +1,143 @@ +use crate::interactions::InteractingEntity; +use crate::residues::ResidueExt; +use crate::utils::{hierarchy_to_entity, load_model, write_df_to_csv}; +use clap::Parser; +use pdbtbx::*; +use polars::prelude::*; +use rust_sasa::calculate_sasa_internal; +use rust_sasa::Atom as SASAAtom; +use std::path::{Path, PathBuf}; +use tracing::{debug, error, info, trace, warn}; + +#[derive(Parser, Debug, Clone)] +#[command(version, about)] +pub(crate) struct Args { + /// Path to the PDB or mmCIF file to be analyzed + #[arg(short, long)] + input: PathBuf, + + /// Output CSV file path + #[arg(short, long)] + output: PathBuf, + + /// Probe radius r (smaller r detects more surface details and reports a larger surface) + #[arg(short = 'r', long = "probe-radius", default_value_t = 1.4)] + probe_radius: f32, + + /// Distance cutoff when searching for neighboring atoms + #[arg(short = 'n', long = "num-points", default_value_t = 100)] + n_points: usize, + + /// Number of threads to use for parallel processing + #[arg(short = 'j', long = "num-threads", default_value_t = 0)] + num_threads: usize, +} + +pub(crate) fn run(args: &Args) { + trace!("{args:?}"); + + // Create Rayon thread pool + rayon::ThreadPoolBuilder::new() + .num_threads(args.num_threads) + .build_global() + .unwrap(); + debug!("Using {} thread(s)", rayon::current_num_threads()); + + // Make sure `input` exists + let input_path = Path::new(&args.input).canonicalize().unwrap(); + let input_file: String = input_path.to_str().unwrap().parse().unwrap(); + + // Load file as complex structure + let (pdb, pdb_warnings) = load_model(&input_file); + if !pdb_warnings.is_empty() { + pdb_warnings.iter().for_each(|e| match e.level() { + pdbtbx::ErrorLevel::BreakingError => error!("{e}"), + pdbtbx::ErrorLevel::InvalidatingError => error!("{e}"), + _ => warn!("{e}"), + }); + } + + let mut df_sasa = get_atom_sasa(&pdb, args.probe_radius, args.n_points); + + // Prepare output directory + // let file_id = input_path.file_stem().unwrap().to_str().unwrap(); + let output_path = Path::new(&args.output).canonicalize().unwrap(); + let _ = std::fs::create_dir_all(output_path.clone()); + let output_file = match output_path.is_dir() { + true => output_path.join("sasa.csv"), + false => output_path, + }; + + let output_file_str = output_file.to_str().unwrap(); + debug!("Results will be saved to {output_file_str}"); + + // Save results and log the identified SASA + let non_zero_sasa_mask = df_sasa.column("sasa").unwrap().not_equal(0.0).unwrap(); + let df_sasa_nonzero = df_sasa.filter(&non_zero_sasa_mask).unwrap(); + info!( + "Found {} atoms with non-zero SASA\n{}", + df_sasa_nonzero.shape().0, + df_sasa_nonzero + ); + + // Save res to CSV files + write_df_to_csv(&mut df_sasa, output_file); +} + +pub fn get_atom_sasa(pdb: &PDB, probe_radius: f32, n_points: usize) -> DataFrame { + // Calculate the SASA for each atom + let atoms = pdb + .atoms_with_hierarchy() + .filter(|x| { + let resn = x.residue().resn().unwrap(); + resn != "O" && resn != "X" + }) + .map(|x| SASAAtom { + position: nalgebra::Point3::new( + x.atom().pos().0 as f32, + x.atom().pos().1 as f32, + x.atom().pos().2 as f32, + ), + radius: x + .atom() + .element() + .unwrap() + .atomic_radius() + .van_der_waals + .unwrap() as f32, + id: x.atom().serial_number(), + parent_id: None, + }) + .collect::>(); + let atom_sasa = calculate_sasa_internal(&atoms, Some(probe_radius), Some(n_points)); + + // Create a DataFrame with the results + let atom_annotations = pdb + .atoms_with_hierarchy() + .map(|x| hierarchy_to_entity(&x)) + .collect::>(); + let atom_annot_df = df!( + "chain" => atom_annotations.iter().map(|x| x.chain.to_owned()).collect::>(), + "resn" => atom_annotations.iter().map(|x| x.resn.to_owned()).collect::>(), + "resi" => atom_annotations.iter().map(|x| x.resi as i64).collect::>(), + "altloc" => atom_annotations.iter().map(|x| x.altloc.to_owned()).collect::>(), + "atomn" => atom_annotations.iter().map(|x| x.atomn.to_owned()).collect::>(), + "atomi" => atom_annotations.iter().map(|x| x.atomi as i64).collect::>(), + ) + .unwrap(); + + df!( + "atomi" => atoms.iter().map(|x| x.id as i64).collect::>(), + "sasa" => atom_sasa + ) + .unwrap() + .join( + &atom_annot_df, + ["atomi"], + ["atomi"], + JoinArgs::new(JoinType::Inner), + ) + .unwrap() + .sort(["chain", "resi", "altloc", "atomi"], Default::default()) + .unwrap() +} diff --git a/src/core.rs b/src/core.rs deleted file mode 100644 index ebfe2c0..0000000 --- a/src/core.rs +++ /dev/null @@ -1,67 +0,0 @@ -use crate::interactions::{InteractionComplex, Interactions, ResultEntry}; -use crate::utils::load_model; - -use pdbtbx::*; -use polars::prelude::*; -use std::path::{Path, PathBuf}; - -/// Core function of the library. Argument documentation can be found in [`main::Args`]. -pub fn core( - input_file: PathBuf, - groups: &str, - vdw_comp: f64, - dist_cutoff: f64, -) -> Result<(DataFrame, DataFrame, InteractionComplex, Vec), Vec> { - // Make sure `input` exists - let input_path = Path::new(&input_file).canonicalize().unwrap(); - let input_file: String = input_path.to_str().unwrap().parse().unwrap(); - - // Load file as complex structure - let (pdb, errors) = load_model(&input_file)?; - - let i_complex = InteractionComplex::new(pdb, groups, vdw_comp, dist_cutoff); - - // Find interactions - let atomic_contacts = i_complex.get_atomic_contacts(); - let df_atomic = results_to_df(&atomic_contacts); - - let mut ring_contacts: Vec = Vec::new(); - ring_contacts.extend(i_complex.get_ring_atom_contacts()); - ring_contacts.extend(i_complex.get_ring_ring_contacts()); - let df_ring = results_to_df(&ring_contacts) - // .drop_many(&["from_atomn", "from_atomi", "to_atomn", "to_atomi"]) - .sort( - [ - "from_chain", - "from_resi", - "from_altloc", - "to_chain", - "to_resi", - "to_altloc", - ], - Default::default(), - ) - .unwrap(); - - Ok((df_atomic, df_ring, i_complex, errors)) -} - -fn results_to_df(res: &[ResultEntry]) -> DataFrame { - df!( - "interaction" => res.iter().map(|x| x.interaction.to_string()).collect::>(), - "distance" => res.iter().map(|x| x.distance).collect::>(), - "from_chain" => res.iter().map(|x| x.ligand.chain.to_owned()).collect::>(), - "from_resn" => res.iter().map(|x| x.ligand.resn.to_owned()).collect::>(), - "from_resi" => res.iter().map(|x| x.ligand.resi as i64).collect::>(), - "from_altloc" => res.iter().map(|x| x.ligand.altloc.to_owned()).collect::>(), - "from_atomn" => res.iter().map(|x| x.ligand.atomn.to_owned()).collect::>(), - "from_atomi" => res.iter().map(|x| x.ligand.atomi as i64).collect::>(), - "to_chain" => res.iter().map(|x| x.receptor.chain.to_owned()).collect::>(), - "to_resn" => res.iter().map(|x| x.receptor.resn.to_owned()).collect::>(), - "to_resi" => res.iter().map(|x| x.receptor.resi as i64).collect::>(), - "to_altloc" => res.iter().map(|x| x.receptor.altloc.to_owned()).collect::>(), - "to_atomn" => res.iter().map(|x| x.receptor.atomn.to_owned()).collect::>(), - "to_atomi" => res.iter().map(|x| x.receptor.atomi as i64).collect::>(), - ) - .unwrap() -} diff --git a/src/main.rs b/src/main.rs index 4253aa6..77aba76 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,138 +3,45 @@ #![doc = include_str!("../README.md")] mod chains; -mod core; +mod cli; mod interactions; mod residues; mod utils; -use crate::chains::ChainExt; +use clap::{Parser, Subcommand}; -use clap::Parser; -use polars::prelude::*; -use std::path::{Path, PathBuf}; -use tracing::{debug, error, info, trace, warn}; - -#[derive(Parser, Debug)] -#[command(version, about)] -struct Args { - /// Path to the PDB or mmCIF file to be analyzed - #[arg(short, long)] - input: PathBuf, - - /// Output directory - #[arg(short, long)] - output: PathBuf, - - /// Group chains for interactions: - /// e.g. A,B/C,D - /// where chains A and B are the "ligand" and C and D are the "receptor" - #[arg(short, long)] - groups: String, - - /// Compensation factor for VdW radii dependent interaction types - #[arg(short = 'c', long = "vdw-comp", default_value_t = 0.1)] - vdw_comp: f64, - - /// Distance cutoff when searching for neighboring atoms - #[arg(short, long, default_value_t = 4.5)] - dist_cutoff: f64, +#[derive(Parser)] +#[command(version, about, author)] +struct Cli { + #[command(subcommand)] + command: Commands, +} - /// Number of threads to use for parallel processing - #[arg(short = 'j', long = "num-threads", default_value_t = 0)] - num_threads: usize, +#[derive(Subcommand, Clone)] +enum Commands { + /// Analyze atomic and ring contacts in a PDB or mmCIF file + Contacts(crate::cli::contacts::Args), + /// Calculate the solvent accessible surface area (SASA) of each atom in a PDB or mmCIF file + Sasa(crate::cli::sasa::Args), + /// Print the sequences of all chains in a PDB or mmCIF file + Seq(crate::cli::pdb2seq::Args), } /// Entry to the CLI tool. Verbosity can be controlled with the `RUST_LOG` environment variable. fn main() { - let args = Args::parse(); tracing_subscriber::fmt::init(); - trace!("{args:?}"); - - // Create Rayon thread pool - rayon::ThreadPoolBuilder::new() - .num_threads(args.num_threads) - .build_global() - .unwrap(); - debug!("Using {} thread(s)", rayon::current_num_threads()); + config_polars_output(); - let run_result = core::core( - args.input, - args.groups.as_str(), - args.vdw_comp, - args.dist_cutoff, - ); - match run_result { - Err(e) => { - e.iter().for_each(|e| match e.level() { - pdbtbx::ErrorLevel::BreakingError => error!("{e}"), - pdbtbx::ErrorLevel::InvalidatingError => error!("{e}"), - _ => warn!("{e}"), - }); - std::process::exit(1); + let cli = Cli::parse(); + match &cli.command { + Commands::Contacts(args) => { + crate::cli::contacts::run(args); } - Ok((df_atomic, df_ring, i_complex, pdb_warnings)) => { - // Notify of any PDB warnings - if !pdb_warnings.is_empty() { - pdb_warnings.iter().for_each(|e| warn!("{e}")); - } - - // Information on the sequence of the chains in the model - info!( - "Loaded {} {}", - i_complex.model.chain_count(), - match i_complex.model.chain_count() { - 1 => "chain", - _ => "chains", - } - ); - for chain in i_complex.model.chains() { - debug!(">{}: {}", chain.id(), chain.pdb_seq().join("")); - } - - debug!( - "Parsed ligand chains {lig:?}; receptor chains {receptor:?}", - lig = i_complex.ligand, - receptor = i_complex.receptor - ); - - // Prepare output directory - // let file_id = input_path.file_stem().unwrap().to_str().unwrap(); - let output_path = Path::new(&args.output).canonicalize().unwrap(); - let _ = std::fs::create_dir_all(output_path.clone()); - let output_dir = output_path.to_str().unwrap(); - - debug!("Results will be saved to {output_dir}/contacts.csv"); - - // Save results and log the identified interactions - config_polars_output(); - info!( - "Found {} atom-atom contacts\n{}", - df_atomic.shape().0, - df_atomic - ); - let df_clash = df_atomic - .clone() - .lazy() - .filter(col("interaction").eq(lit("StericClash"))) - .collect() - .unwrap(); - if df_clash.height() > 0 { - warn!("Found {} steric clashes\n{}", df_clash.shape().0, df_clash); - } - info!("Found {} ring contacts\n{}", df_ring.shape().0, df_ring); - - // Concatenate dataframes for saving to CSV - let mut df_contacts = concat( - [df_atomic.clone().lazy(), df_ring.clone().lazy()], - UnionArgs::default(), - ) - .unwrap() - .collect() - .unwrap(); - - // Save results to CSV files - write_df_to_csv(&mut df_contacts, output_path.join("contacts.csv")); + Commands::Sasa(args) => { + crate::cli::sasa::run(args); + } + Commands::Seq(args) => { + crate::cli::pdb2seq::run(args); } } } @@ -145,8 +52,3 @@ fn config_polars_output() { std::env::set_var("POLARS_FMT_TABLE_ROUNDED_CORNERS", "1"); std::env::set_var("POLARS_FMT_MAX_COLS", "14"); } - -fn write_df_to_csv(df: &mut DataFrame, file_path: PathBuf) { - let mut file = std::fs::File::create(file_path).unwrap(); - CsvWriter::new(&mut file).finish(df).unwrap(); -} diff --git a/src/utils.rs b/src/utils.rs index a705408..9e292db 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,20 +1,22 @@ -use std::collections::HashSet; +use std::{collections::HashSet, path::PathBuf}; use crate::{interactions::structs::InteractingEntity, residues::ResidueExt}; use pdbtbx::*; +use polars::prelude::*; /// Open an atomic data file with [`pdbtbx::open`] and remove non-protein residues. -pub fn load_model(input_file: &String) -> Result<(PDB, Vec), Vec> { +pub fn load_model(input_file: &String) -> (PDB, Vec) { // Load file as complex structure let (mut pdb, errors) = pdbtbx::ReadOptions::default() .set_only_atomic_coords(true) .set_level(pdbtbx::StrictnessLevel::Loose) - .read(input_file)?; + .read(input_file) + .unwrap(); // Remove non-protein residues from model pdb.remove_residues_by(|res| res.resn().is_none()); - Ok((pdb, errors)) + (pdb, errors) } /// Parse the chain groups from the input string. @@ -76,6 +78,12 @@ pub fn hierarchy_to_entity(hierarchy: &AtomConformerResidueChainModel<'_>) -> In } } +/// Write a DataFrame to a CSV file +pub(crate) fn write_df_to_csv(df: &mut DataFrame, file_path: PathBuf) { + let mut file = std::fs::File::create(file_path).unwrap(); + CsvWriter::new(&mut file).finish(df).unwrap(); +} + #[cfg(test)] mod tests { use super::*;