Skip to content

Commit

Permalink
integrate external python programs (3dem#34)
Browse files Browse the repository at this point in the history
* transfer programs from https://github.com/alisterburt/tomo_preprocessing into relion

* fix imports in utils

* remove deprecated matrix generation code

* remove automaticaly generated completion arguments from CLIs

* hide stub alternative from import CLI
  • Loading branch information
alisterburt authored Jul 20, 2022
1 parent bf4b52b commit d5ea705
Show file tree
Hide file tree
Showing 37 changed files with 1,832 additions and 0 deletions.
2 changes: 2 additions & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
This program is developed in the group of Sjors Scheres at the MRC Laboratory of Molecular Biology, with contributions from the following people (in alphabetical order):

- Tom Burnley (from the CCP-EM team at STFC)
- Alister Burt (from David Barford's group at the MRC-LMB)
- Liyi Dong
- Bjoern Forsberg (from the Lindahl group at SciLifeLabs)
- Shaoda He
Expand All @@ -10,6 +11,7 @@ This program is developed in the group of Sjors Scheres at the MRC Laboratory of
- Takanori Nakane
- Joaquin Oton (from the Briggs group at MRC-LMB)
- Colin Palmer (from the CCP-EM team at STFC)
- Euan Pyle (from Giulia Zanetti's group at Birkbeck)
- Sjors Scheres
- Jasenko Zivanov

Expand Down
10 changes: 10 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[build-system]
requires = [
"setuptools>=42",
"wheel",
"setuptools_scm[toml]>=3.4"
]
build-backend = "setuptools.build_meta"

[tool.setuptools_scm]
write_to = "src/tomography_python_programs/_version.py"
100 changes: 100 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@

[metadata]
name = tomography_python_programs
url = https://github.com/3dem/relion
author = RELION team
author_email = [email protected]
description = package description.
long_description = file: README.md
long_description_content_type = text/markdown
license = BSD license
classifiers =
Development Status :: 2 - Pre-Alpha
License :: OSI Approved :: BSD License
Natural Language :: English
Programming Language :: Python :: 3
Programming Language :: Python :: 3.8
Programming Language :: Python :: 3.9
Programming Language :: Python :: 3.10

project_urls =
Source Code =https://github.com/3dem/relion

[options]
zip_safe = False
package_dir =
=src
packages = find:
python_requires = >=3.8
setup_requires =
setuptools_scm
install_requires =
numpy
pandas
makefun
starfile
mrcfile
mdocfile
typer
rich
einops
lil_aretomo
yet-another-imod-wrapper

[options.extras_require]
testing =
pytest
dev =
ipython
jedi<0.18.0
black
flake8
flake8-docstrings
isort
mypy
pre-commit
pydocstyle
pytest
jupyter-book

[options.entry_points]
console_scripts =
relion_tomo_import = tomography_python_programs.import_tilt_series:cli
relion_tomo_align_tilt_series = tomography_python_programs.tilt_series_alignment:cli
relion_tomo_denoise = tomography_python_programs.denoising:cli

[bdist_wheel]
universal = 1

[flake8]
exclude = docs,_version.py,.eggs,examples
max-line-length = 88
docstring-convention = numpy
ignore = D100, D213, D401, D413, D107, W503

[isort]
profile = black
src_paths = tomography_preprocessing

[pydocstyle]
match_dir = tomography_preprocessing
convention = numpy
add_select = D402,D415,D417
ignore = D100, D213, D401, D413, D107

[tool:pytest]
addopts = -W error

[mypy]
files = tomography_preprocessing
warn_unused_configs = True
warn_unused_ignores = True
check_untyped_defs = True
implicit_reexport = False
# this is strict!
# disallow_untyped_defs = True
show_column_numbers = True
show_error_codes = True
ignore_missing_imports = True


Empty file.
4 changes: 4 additions & 0 deletions src/tomography_python_programs/denoising/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from .cryoCARE import cryoCARE_train as _cryoCARE_train
from .cryoCARE import cryoCARE_predict as _cryoCARE_predict

from ._cli import cli
4 changes: 4 additions & 0 deletions src/tomography_python_programs/denoising/_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
import typer

CLI_NAME = 'relion_tomo_denoise'
cli = typer.Typer(name=CLI_NAME, add_completion=False)
2 changes: 2 additions & 0 deletions src/tomography_python_programs/denoising/cryoCARE/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .cryoCARE_train import cryoCARE_train
from .cryoCARE_predict import cryoCARE_predict
154 changes: 154 additions & 0 deletions src/tomography_python_programs/denoising/cryoCARE/_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
from pathlib import Path
from typing import Tuple, List, Dict

import pandas as pd
import starfile
import typer
import json
import shutil

def create_denoising_directory_structure(
output_directory: Path,
training_job: bool,
) -> Tuple[Path, Path, Path]:
"""
Creates directory structure for denoising jobs. Doe not create tomogram directory if the job is for training a
denoising model as no tomograms are generated in this step.
"""
training_dir = output_directory / 'external' / 'training'
training_dir.mkdir(parents=True, exist_ok=True)
tomogram_dir = output_directory / 'tomograms'
if not training_job:
tomogram_dir.mkdir(parents=True, exist_ok=True)
tilt_series_dir = output_directory / 'tilt_series'
tilt_series_dir.mkdir(parents=True, exist_ok=True)
return training_dir, tomogram_dir, tilt_series_dir

def parse_training_tomograms(
training_tomograms: str
) -> List:
"""
Reads the string given to the CLI to ascertain which tomograms to train on. String should
be a list of : separated tomograms (name from rlnTomoName)
"""
training_tomograms = training_tomograms.strip().split(':')
return training_tomograms

def generate_training_tomograms_star(
global_star: pd.DataFrame,
training_tomograms: List,
) -> pd.DataFrame:
"""
Generates a pandas dataframe of the tomograms the user has selected for training in global star format
"""
training_tomograms_idx = pd.DataFrame(global_star.rlnTomoName.tolist()).isin(training_tomograms).values
if not any(training_tomograms_idx):
e = f"Could not user specified training tomograms ({', '.join(str(x) for x in training_tomograms)}) in tilt series star file"
console.log(f'ERROR: {e}')
raise RuntimeError(e)
training_tomograms_star = global_star[training_tomograms_idx]
return training_tomograms_star

def find_tomogram_halves(
training_tomograms_star: pd.DataFrame,
) -> Tuple[List, List]:
"""
Returns lists (even and odd) of the location of the the tomograms the user wishes to train on.
"""
return training_tomograms_star['rlnTomoReconstructedTomogramHalf1'].values.tolist(), training_tomograms_star['rlnTomoReconstructedTomogramHalf2'].values.tolist()

def generate_train_data_config_json(
even_tomos: List,
odd_tomos: List,
training_dir: Path,
number_training_subvolumes: int,
subvolume_dimensions: int,
) -> Dict:
"""
Creates a Dict which can be saved as a json file for train_data_config.json file
"""
number_normalisation_subvolumes = round(number_training_subvolumes * 0.1)
train_data_config_json = json.loads(f'{{"even": {json.dumps(even_tomos)}, "odd": {json.dumps(odd_tomos)}, "patch_shape": [{subvolume_dimensions}, {subvolume_dimensions}, {subvolume_dimensions}], \
"num_slices": {number_training_subvolumes}, "split": 0.9, "tilt_axis": "Y", "n_normalization_samples": {number_normalisation_subvolumes}, "path": "{training_dir}"}}')
return train_data_config_json

def generate_train_config_json(
training_dir: Path,
output_directory: Path,
model_name: str,
) -> Dict:
"""
Creates a Dict which can be saved as a json file for train_config.json file
"""
train_config_json = json.loads(f'{{"train_data": "{training_dir}", "epochs": 100, "steps_per_epoch": 200, "batch_size": 16, "unet_kern_size": 3, \
"unet_n_depth": 3, "unet_n_first": 16, "learning_rate": 0.0004, "model_name": "{model_name}", "path": "{output_directory}"}}')
return train_config_json

def generate_predict_json(
even_tomos: List,
odd_tomos: List,
training_dir: Path,
model_name: Path,
output_directory: Path,
n_tiles: Tuple[int,int,int],
) -> Dict:
"""
Creates a Dict which can be saved as a json file for predict_config.json file
"""
predict_json = json.loads(f'{{"path": "{model_name}", "even": {json.dumps(even_tomos)}, \
"odd": {json.dumps(odd_tomos)}, "n_tiles": {list(n_tiles)}, "output": "{output_directory / "tomograms"}"}}')
return predict_json

def save_json(
training_dir: Path,
output_json: Dict,
json_prefix: str,
):
"""
Saves json file in output directory with desired file name (prefix).
"""
with open(f'{training_dir}/{json_prefix}.json', 'w') as outfile:
json.dump(output_json, outfile, indent=4)

def save_tilt_series_stars(
global_star: pd.DataFrame,
tilt_series_dir: Path,
):
"""
Saves tilt series star files in output directory.
"""
for idx,row in global_star.iterrows():
shutil.copyfile(f"{row['rlnTomoTiltSeriesStarFile']}", f'{tilt_series_dir}/{row["rlnTomoName"]}.star')
global_star['rlnTomoTiltSeriesStarFile'] = global_star.apply(lambda x: f'{tilt_series_dir}/{x["rlnTomoName"]}.star', axis=1)

def add_denoised_tomo_to_global_star(
global_star: pd.DataFrame,
tomogram_dir: Path,
output_directory: Path,
):
"""
Adds location of the denoising tomogram to the global star file.
"""
global_star['rlnTomoReconstructedTomogramDenoised'] = global_star.apply(lambda x: f'{tomogram_dir}/rec_{x["rlnTomoName"]}.mrc', axis=1)
return global_star

def save_global_star(
global_star: pd.DataFrame,
output_directory: Path,
):
"""
Saves global star file (tomograms.star) in output directory.
"""
starfile.write({'global': global_star}, f'{output_directory}/tomograms.star')

def rename_predicted_tomograms(
even_tomos: List,
tomogram_dir: Path,
even_suffix: str,
):
"""
Gives denoised tomograms as cryoCARE likes to name them after the even tomograms.
"""
even_tomos = [Path(tomo) for tomo in even_tomos]
even_tomos = [Path(f"{tomogram_dir}/{tomo.name}") for tomo in even_tomos]
[tomo.rename(Path(f"{tomogram_dir}/{tomo.stem.replace(even_suffix,'')}{tomo.suffix}")) for tomo in even_tomos]
13 changes: 13 additions & 0 deletions src/tomography_python_programs/denoising/cryoCARE/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
EVEN_SUFFIX = '_half1' #The suffix given to even tomograms during split tomogram generation. This will be removed from the names of the output tomograms

PREDICT_CONFIG_PREFIX = 'predict_config' #Name (minus the suffix) of the predict_config.json file

TRAIN_DATA_CONFIG_PREFIX = 'train_data_config' #Name (minus the suffix) of the train_data_config.json file

MODEL_NAME = 'denoising_model' #Name of the model to be trained

TRAIN_CONFIG_PREFIX = 'train_config' #Name (minus the suffix) of the train_config.json file




Loading

0 comments on commit d5ea705

Please sign in to comment.