Skip to content

Commit

Permalink
Feat/sasa (#9) sub-commands contacts, sasa, and seq
Browse files Browse the repository at this point in the history
* feat(sasa): add method to calculate the per-atom SASA

* fix(sasa): ignore waters when calculating SASA

* refactor(cli): BREAKING CHANGE: move contacts and sasa to separate sub-commands

* style(cli): sort imports

* refactor(cli): hardcode output path by contacts

* feat(cli): separate contacts and seq sub-commands

* docs: version bump to v0.3.0
  • Loading branch information
y1zhou authored Aug 14, 2024
1 parent 4146bf7 commit 2dca67f
Show file tree
Hide file tree
Showing 10 changed files with 411 additions and 199 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,6 @@ Cargo.lock

# macOS files
.DS_Store

# Test generated files
*.csv
23 changes: 20 additions & 3 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,23 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Removed

- PLACEHOLDER
-

## [0.3.0] - 2024-08-14

### Added

- `sasa` command to calculate the atom level SASA
- `seq` command to extract protein sequences from PDB files

### Fixed

- Only report chains that are part of the ligand or receptor for `contacts`

### Changed

- Moved previous top-level command to `contacts` sub-command
- Better path parsing

## [0.2.0] - 2024-08-08

### Added
Expand All @@ -46,6 +62,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Initial release
- Detection of common protein-protein interactions in a PDB or mmCIF file

[unreleased]: https://github.com/y1zhou/arpeggia/compare/v0.2.0...HEAD
[0.1.0]: https://github.com/y1zhou/arpeggia/releases/tag/v0.1.0
[unreleased]: https://github.com/y1zhou/arpeggia/compare/v0.3.0...HEAD
[0.2.0]: https://github.com/y1zhou/arpeggia/releases/tag/v0.3.0
[0.2.0]: https://github.com/y1zhou/arpeggia/releases/tag/v0.2.0
[0.1.0]: https://github.com/y1zhou/arpeggia/releases/tag/v0.1.0
4 changes: 3 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
[package]
name = "arpeggia"
version = "0.2.0"
version = "0.3.0"
description = "A port of the Arpeggio library for Rust"
edition = "2021"
authors = ["Yi Zhou <[email protected]>"]

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

Expand All @@ -13,5 +14,6 @@ pdbtbx = { version = "0.11.0", features = ["rayon", "rstar"], git = "https://git
polars = { version = "0.39.2", features = ["lazy"] }
rayon = "1.10.0"
rstar = "0.12.0"
rust-sasa = "0.2.2"
tracing = "0.1.40"
tracing-subscriber = "0.3.18"
172 changes: 172 additions & 0 deletions src/cli/contacts.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
use crate::interactions::{InteractionComplex, Interactions, ResultEntry};
use crate::utils::{load_model, write_df_to_csv};
use clap::Parser;
use pdbtbx::*;
use polars::prelude::*;
use std::path::{Path, PathBuf};
use tracing::{debug, error, info, trace, warn};

#[derive(Parser, Debug, Clone)]
#[command(version, about)]
pub(crate) struct Args {
/// Path to the PDB or mmCIF file to be analyzed
#[arg(short, long)]
input: PathBuf,

/// Output directory
#[arg(short, long)]
output: PathBuf,

/// Group chains for interactions:
/// e.g. A,B/C,D
/// where chains A and B are the "ligand" and C and D are the "receptor"
#[arg(short, long)]
groups: String,

/// Compensation factor for VdW radii dependent interaction types
#[arg(short = 'c', long = "vdw-comp", default_value_t = 0.1)]
vdw_comp: f64,

/// Distance cutoff when searching for neighboring atoms
#[arg(short, long, default_value_t = 4.5)]
dist_cutoff: f64,

/// Number of threads to use for parallel processing
#[arg(short = 'j', long = "num-threads", default_value_t = 0)]
num_threads: usize,
}

pub(crate) fn run(args: &Args) {
trace!("{args:?}");

// Create Rayon thread pool
rayon::ThreadPoolBuilder::new()
.num_threads(args.num_threads)
.build_global()
.unwrap();
debug!("Using {} thread(s)", rayon::current_num_threads());

// Make sure `input` exists
let input_path = Path::new(&args.input).canonicalize().unwrap();
let input_file: String = input_path.to_str().unwrap().parse().unwrap();

// Load file as complex structure
let (pdb, pdb_warnings) = load_model(&input_file);
if !pdb_warnings.is_empty() {
pdb_warnings.iter().for_each(|e| match e.level() {
pdbtbx::ErrorLevel::BreakingError => error!("{e}"),
pdbtbx::ErrorLevel::InvalidatingError => error!("{e}"),
_ => warn!("{e}"),
});
}

let (df_atomic, df_ring, i_complex) =
get_contacts(pdb, args.groups.as_str(), args.vdw_comp, args.dist_cutoff);

// Information on the sequence of the chains in the model
let num_chains = i_complex.ligand.len() + i_complex.receptor.len();
info!(
"Loaded {} {}",
num_chains,
match num_chains {
1 => "chain",
_ => "chains",
}
);

debug!(
"Parsed ligand chains {lig:?}; receptor chains {receptor:?}",
lig = i_complex.ligand,
receptor = i_complex.receptor
);

// Prepare output directory
let output_path = Path::new(&args.output).canonicalize().unwrap();
let _ = std::fs::create_dir_all(output_path.clone());
let output_file = output_path.join("contacts.csv");

let output_file_str = output_file.to_str().unwrap();
debug!("Results will be saved to {output_file_str}");

// Save results and log the identified interactions
info!(
"Found {} atom-atom contacts\n{}",
df_atomic.shape().0,
df_atomic
);
let df_clash = df_atomic
.clone()
.lazy()
.filter(col("interaction").eq(lit("StericClash")))
.collect()
.unwrap();
if df_clash.height() > 0 {
warn!("Found {} steric clashes\n{}", df_clash.shape().0, df_clash);
}
info!("Found {} ring contacts\n{}", df_ring.shape().0, df_ring);

// Concate dataframes for saving to CSV
let mut df_contacts = concat(
[df_atomic.clone().lazy(), df_ring.clone().lazy()],
UnionArgs::default(),
)
.unwrap()
.collect()
.unwrap();

// Save res to CSV files
write_df_to_csv(&mut df_contacts, output_file);
}

pub fn get_contacts(
pdb: PDB,
groups: &str,
vdw_comp: f64,
dist_cutoff: f64,
) -> (DataFrame, DataFrame, InteractionComplex) {
let i_complex = InteractionComplex::new(pdb, groups, vdw_comp, dist_cutoff);

// Find interactions
let atomic_contacts = i_complex.get_atomic_contacts();
let df_atomic = results_to_df(&atomic_contacts);

let mut ring_contacts: Vec<ResultEntry> = Vec::new();
ring_contacts.extend(i_complex.get_ring_atom_contacts());
ring_contacts.extend(i_complex.get_ring_ring_contacts());
let df_ring = results_to_df(&ring_contacts)
// .drop_many(&["from_atomn", "from_atomi", "to_atomn", "to_atomi"])
.sort(
[
"from_chain",
"from_resi",
"from_altloc",
"to_chain",
"to_resi",
"to_altloc",
],
Default::default(),
)
.unwrap();

(df_atomic, df_ring, i_complex)
}

fn results_to_df(res: &[ResultEntry]) -> DataFrame {
df!(
"interaction" => res.iter().map(|x| x.interaction.to_string()).collect::<Vec<String>>(),
"distance" => res.iter().map(|x| x.distance).collect::<Vec<f64>>(),
"from_chain" => res.iter().map(|x| x.ligand.chain.to_owned()).collect::<Vec<String>>(),
"from_resn" => res.iter().map(|x| x.ligand.resn.to_owned()).collect::<Vec<String>>(),
"from_resi" => res.iter().map(|x| x.ligand.resi as i64).collect::<Vec<i64>>(),
"from_altloc" => res.iter().map(|x| x.ligand.altloc.to_owned()).collect::<Vec<String>>(),
"from_atomn" => res.iter().map(|x| x.ligand.atomn.to_owned()).collect::<Vec<String>>(),
"from_atomi" => res.iter().map(|x| x.ligand.atomi as i64).collect::<Vec<i64>>(),
"to_chain" => res.iter().map(|x| x.receptor.chain.to_owned()).collect::<Vec<String>>(),
"to_resn" => res.iter().map(|x| x.receptor.resn.to_owned()).collect::<Vec<String>>(),
"to_resi" => res.iter().map(|x| x.receptor.resi as i64).collect::<Vec<i64>>(),
"to_altloc" => res.iter().map(|x| x.receptor.altloc.to_owned()).collect::<Vec<String>>(),
"to_atomn" => res.iter().map(|x| x.receptor.atomn.to_owned()).collect::<Vec<String>>(),
"to_atomi" => res.iter().map(|x| x.receptor.atomi as i64).collect::<Vec<i64>>(),
)
.unwrap()
}
3 changes: 3 additions & 0 deletions src/cli/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
pub mod contacts;
pub mod pdb2seq;
pub mod sasa;
29 changes: 29 additions & 0 deletions src/cli/pdb2seq.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
use crate::chains::ChainExt;
use clap::Parser;
use std::path::PathBuf;

use crate::utils::load_model;
use std::path::Path;

#[derive(Parser, Debug, Clone)]
#[command(version, about)]
pub(crate) struct Args {
/// Path to the PDB or mmCIF file to be analyzed
input: Vec<PathBuf>,
}

pub(crate) fn run(args: &Args) {
for f in &args.input {
// Make sure `input` exists
let input_path = Path::new(f).canonicalize().unwrap();
let input_file: String = input_path.to_str().unwrap().parse().unwrap();

// Load file and print sequences
let (pdb, _) = load_model(&input_file);
println!("File: {}", input_file);
for chain in pdb.chains() {
println!("{}: {}", chain.id(), chain.pdb_seq().join(""));
}
println!();
}
}
Loading

0 comments on commit 2dca67f

Please sign in to comment.