Skip to content

Commit

Permalink
update docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
sophiamaedler committed Jan 17, 2024
1 parent e39e47e commit c7232f0
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 13 deletions.
2 changes: 1 addition & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1 +1 @@
include src/vipercore/pretrained_models
include src/pretrained_models
2 changes: 1 addition & 1 deletion src/sparcscore/pipeline/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def get_timestamp(self):

class ProcessingStep(Logable):

"""Processing step living in its own subdirectory of the project.
"""Processing step. Can load a configuration file and create a subdirectory under the project class for the processing step.
Args:
config (dict): Config file which is passed by the Project class when called. Is loaded from the project based on the name of the class.
Expand Down
114 changes: 103 additions & 11 deletions src/sparcscore/pipeline/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@

class Project(Logable):
"""
Project base class used to create a SPARCSpy project.
Project base class used to create a SPARCSpy project. This class manages all of the SPARCSpy processing steps. It directly maps
to a directory on the file system which contains all of the project inputs as well as the generated outputs.
Parameters
----------
Expand Down Expand Up @@ -246,17 +247,15 @@ def _load_config_from_file(self, file_path):

def load_input_from_file(self, file_paths, crop=[(0, -1), (0, -1)]):
"""
Load input image from a number of files.
Load input image from a list of files. The channels need to be specified in the following order: nucleus, cytosol other channels.
Parameters
----------
file_paths : list(str)
List containing paths to each channel like
[“path1/img.tiff”, “path2/img.tiff”, “path3/img.tiff”].
Expects a list of file paths with length “input_channel” as
defined in the config.yml. Input data is NOT copied to the
project folder by default. Different segmentation functions
especially tiled segmentations might copy the input.
defined in the config.yml.
crop : list(tuple), optional
When set, it can be used to crop the input image. The first
Expand Down Expand Up @@ -296,7 +295,10 @@ def load_input_from_file(self, file_paths, crop=[(0, -1), (0, -1)]):

def load_input_from_array(self, array, remap=None):
"""
Load input image from an already loaded numpy array.
Load input image from an already loaded numpy array.
The numpy array needs to have the following shape: CXY.
The channels need to be in the following order: nucleus, cellmembrane channel,
other channnels or a remapping needs to be defined.
Parameters
----------
Expand Down Expand Up @@ -385,8 +387,10 @@ def process(self):

class TimecourseProject(Project):
"""
Timecourse Project used to create a SPARCSpy project for datasets that have multiple timepoints
over the same field of view (add additional dimension in comparision to base SPARCSpy project).
TimecourseProject class used to create a SPARCSpy project for datasets that have multiple fields of view that should be processed and analysed together.
It is also capable of handling multiple timepoints for the same field of view or a combiantion of both. Like the base SPARCSpy :func:`Project <sparcscore.pipeline.project.Project>`,
it manages all of the SPARCSpy processing steps. Because the input data has a different dimensionality than the base SPARCSpy :func:`Project <sparcscore.pipeline.project.Project>` class,
it requires the use of specialized processing classes that are able to handle this additional dimensionality.
Parameters
----------
Expand Down Expand Up @@ -431,6 +435,25 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

def load_input_from_array(self, img, label, overwrite=False):
"""
Function to load imaging data from an array into the TimecourseProject.
The provided array needs to fullfill the following conditions:
- shape: NCYX
- all images need to have the same dimensions and the same number of channels
- channels need to be in the following order: nucleus, cytosol other channels
- dtype uint16.
Parameters
----------
img : numpy.ndarray
Numpy array of shape “[num_images, channels, height, width]”.
label : numpy.ndarray
Numpy array of shape “[num_images, num_labels]” containing the labels for each image. The labels need to have the following structure: "image_index", "unique_image_identifier", "..."
overwrite : bool, default False
If set to True, the function will overwrite the existing input image.
"""

# check if already exists if so throw error message
if not os.path.isdir(
os.path.join(self.directory, self.DEFAULT_SEGMENTATION_DIR_NAME)
Expand Down Expand Up @@ -482,7 +505,45 @@ def load_input_from_files(
overwrite=False,
):
"""
Function to load timecourse experiments recorded with opera phenix into .h5 dataformat for further processing.
Function to load timecourse experiments recorded with an opera phenix into the TimecourseProject.
Before being able to use this function the exported images from the opera phenix first need to be parsed, sorted and renamed using the `sparcstools package <https://github.com/MannLabs/SPARCStools>`_.
In addition a plate layout file needs to be created that contains the information on imaged experiment and the experimental conditions for each well. This file needs to be in the following format,
using the well notation ``RowXX_WellXX``:
.. csv-table::
:header: "Well", "Condition1", "Condition2", ...
:widths: auto
"RowXX_WellXX", "A", "B", ...
A tab needs to be used as a seperator and the file saved as a .tsv file.
Parameters
----------
input_dir : str
Path to the directory containing the sorted images from the opera phenix.
channels : list(str)
List containing the names of the channels that should be loaded.
timepoints : list(str)
List containing the names of the timepoints that should be loaded. Will return a warning if you try to load a timepoint that is not found in the data.
plate_layout : str
Path to the plate layout file. For the format please see above.
img_size : int, default 1080
Size of the images that should be loaded. All images will be cropped to this size.
overwrite : bool, default False
If set to True, the function will overwrite the existing input image.
Example
-------
>>> channels = ["DAPI", "Alexa488", "mCherry"]
>>> timepoints = ["Timepoint"+str(x).zfill(3) for x in list(range(1, 3))]
>>> input_dir = "path/to/sorted/outputs/from/sparcstools"
>>> plate_layout = "plate_layout.tsv"
>>> project.load_input_from_files(input_dir = input_dir, channels = channels, timepoints = timepoints, plate_layout = plate_layout, overwrite = True)
"""

# check if already exists if so throw error message
Expand Down Expand Up @@ -701,9 +762,40 @@ def load_input_from_files_and_merge(
cytosol_channel="Alexa488",
):
"""
Function to load timecourse experiments recorded with opera phenix into .h5 dataformat for further processing after merging all the regions from each teampoint in each well.
Function to load timecourse experiments recorded with an opera phenix into a TimecourseProject. In addition to loading the images,
this wrapper function also stitches images acquired in the same well (this assumes that the tiles were aquired with overlap and in a rectangular shape)
using the `sparcstools package <https://github.com/MannLabs/SPARCStools>`_. Implementation of this function is currently still slow for many wells/timepoints as stitching
is handled consecutively and not in parallel. This will be fixed in the future.
Parameters
----------
input_dir : str
Path to the directory containing the sorted images from the opera phenix.
channels : list(str)
List containing the names of the channels that should be loaded.
timepoints : list(str)
List containing the names of the timepoints that should be loaded. Will return a warning if you try to load a timepoint that is not found in the data.
plate_layout : str
Path to the plate layout file. For the format please see above.
img_size : int, default 1080
Size of the images that should be loaded. All images will be cropped to this size.
stitching_channel : str, default "Alexa488"
string indicated on which channel the stitching should be calculated.
overlap : float, default 0.1
float indicating the overlap between the tiles that were aquired.
max_shift : int, default 10
int indicating the maximum shift that is allowed when stitching the tiles. If a calculated shift is larger than this threshold
between two tiles then the position of these tiles is not updated and is set according to the calculated position based on the overlap.
overwrite : bool, default False
If set to True, the function will overwrite the existing input image.
nucleus_channel : str, default "DAPI"
string indicating the channel that should be used for the nucleus channel.
cytosol_channel : str, default "Alexa488"
string indicating the channel that should be used for the cytosol channel.
"""
from vipertools.stitch import generate_stitched

from sparcstools.stitch import generate_stitched

# check if already exists if so throw error message
if not os.path.isdir(
Expand Down

0 comments on commit c7232f0

Please sign in to comment.