update docstrings

MannLabs · Jan 17, 2024 · c7232f0 · c7232f0
1 parent e39e47e
commit c7232f0
Show file tree

Hide file tree

Showing 3 changed files with 105 additions and 13 deletions.
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1 +1 @@
-include src/vipercore/pretrained_models
+include src/pretrained_models
diff --git a/src/sparcscore/pipeline/base.py b/src/sparcscore/pipeline/base.py
@@ -75,7 +75,7 @@ def get_timestamp(self):
 
 class ProcessingStep(Logable):
 
-    """Processing step living in its own subdirectory of the project.
+    """Processing step. Can load a configuration file and create a subdirectory under the project class for the processing step.
         
     Args:
         config (dict): Config file which is passed by the Project class when called. Is loaded from the project based on the name of the class.

diff --git a/src/sparcscore/pipeline/project.py b/src/sparcscore/pipeline/project.py
@@ -20,7 +20,8 @@
 
 class Project(Logable):
     """
-    Project base class used to create a SPARCSpy project.
+    Project base class used to create a SPARCSpy project. This class manages all of the SPARCSpy processing steps. It directly maps
+    to a directory on the file system which contains all of the project inputs as well as the generated outputs.
 
     Parameters
     ----------
@@ -246,17 +247,15 @@ def _load_config_from_file(self, file_path):
 
     def load_input_from_file(self, file_paths, crop=[(0, -1), (0, -1)]):
         """
-        Load input image from a number of files.
+        Load input image from a list of files. The channels need to be specified in the following order: nucleus, cytosol other channels.
 
         Parameters
         ----------
         file_paths : list(str)
             List containing paths to each channel like
             [“path1/img.tiff”, “path2/img.tiff”, “path3/img.tiff”].
             Expects a list of file paths with length “input_channel” as
-            defined in the config.yml. Input data is NOT copied to the
-            project folder by default. Different segmentation functions
-            especially tiled segmentations might copy the input.
+            defined in the config.yml.
 
         crop : list(tuple), optional
             When set, it can be used to crop the input image. The first
@@ -296,7 +295,10 @@ def load_input_from_file(self, file_paths, crop=[(0, -1), (0, -1)]):
 
     def load_input_from_array(self, array, remap=None):
         """
-        Load input image from an already loaded numpy array.
+        Load input image from an already loaded numpy array. 
+        The numpy array needs to have the following shape: CXY. 
+        The channels need to be in the following order: nucleus, cellmembrane channel, 
+        other channnels or a remapping needs to be defined.
 
         Parameters
         ----------
@@ -385,8 +387,10 @@ def process(self):
 
 class TimecourseProject(Project):
     """
-    Timecourse Project used to create a SPARCSpy project for datasets that have multiple timepoints
-    over the same field of view (add additional dimension in comparision to base SPARCSpy project).
+    TimecourseProject class used to create a SPARCSpy project for datasets that have multiple fields of view that should be processed and analysed together. 
+    It is also capable of handling multiple timepoints for the same field of view or a combiantion of both. Like the base SPARCSpy :func:`Project <sparcscore.pipeline.project.Project>`,
+    it manages all of the SPARCSpy processing steps. Because the input data has a different dimensionality than the base SPARCSpy :func:`Project <sparcscore.pipeline.project.Project>` class,
+    it requires the use of specialized processing classes that are able to handle this additional dimensionality.
 
     Parameters
     ----------
@@ -431,6 +435,25 @@ def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
 
     def load_input_from_array(self, img, label, overwrite=False):
+        """
+        Function to load imaging data from an array into the TimecourseProject.
+        
+        The provided array needs to fullfill the following conditions:
+        - shape: NCYX
+        - all images need to have the same dimensions and the same number of channels
+        - channels need to be in the following order: nucleus, cytosol other channels
+        - dtype uint16.
+
+        Parameters
+        ----------
+        img : numpy.ndarray
+            Numpy array of shape “[num_images, channels, height, width]”.
+        label : numpy.ndarray
+            Numpy array of shape “[num_images, num_labels]” containing the labels for each image. The labels need to have the following structure: "image_index", "unique_image_identifier", "..."
+        overwrite : bool, default False
+            If set to True, the function will overwrite the existing input image.
+        """
+
         # check if already exists if so throw error message
         if not os.path.isdir(
                 os.path.join(self.directory, self.DEFAULT_SEGMENTATION_DIR_NAME)
@@ -482,7 +505,45 @@ def load_input_from_files(
             overwrite=False,
     ):
         """
-        Function to load timecourse experiments recorded with opera phenix into .h5 dataformat for further processing.
+        Function to load timecourse experiments recorded with an opera phenix into the TimecourseProject.
+    
+        Before being able to use this function the exported images from the opera phenix first need to be parsed, sorted and renamed using the `sparcstools package <https://github.com/MannLabs/SPARCStools>`_.
+
+        In addition a plate layout file needs to be created that contains the information on imaged experiment and the experimental conditions for each well. This file needs to be in the following format,
+        using the well notation ``RowXX_WellXX``:
+
+        .. csv-table::
+            :header: "Well", "Condition1", "Condition2", ...
+            :widths: auto
+
+            "RowXX_WellXX", "A", "B", ...
+
+        A tab needs to be used as a seperator and the file saved as a .tsv file.
+
+        Parameters
+        ----------
+        input_dir : str
+            Path to the directory containing the sorted images from the opera phenix.
+        channels : list(str)
+            List containing the names of the channels that should be loaded.
+        timepoints : list(str)
+            List containing the names of the timepoints that should be loaded. Will return a warning if you try to load a timepoint that is not found in the data.
+        plate_layout : str
+            Path to the plate layout file. For the format please see above.
+        img_size : int, default 1080
+            Size of the images that should be loaded. All images will be cropped to this size.
+        overwrite : bool, default False
+            If set to True, the function will overwrite the existing input image.
+
+        Example
+        -------
+        >>> channels = ["DAPI", "Alexa488", "mCherry"]
+        >>> timepoints = ["Timepoint"+str(x).zfill(3) for x in list(range(1, 3))]
+        >>> input_dir = "path/to/sorted/outputs/from/sparcstools"
+        >>> plate_layout = "plate_layout.tsv"
+
+        >>> project.load_input_from_files(input_dir = input_dir,  channels = channels,  timepoints = timepoints, plate_layout = plate_layout, overwrite = True)
+        
         """
 
         # check if already exists if so throw error message
@@ -701,9 +762,40 @@ def load_input_from_files_and_merge(
             cytosol_channel="Alexa488",
     ):
         """
-        Function to load timecourse experiments recorded with opera phenix into .h5 dataformat for further processing after merging all the regions from each teampoint in each well.
+        Function to load timecourse experiments recorded with an opera phenix into a TimecourseProject. In addition to loading the images, 
+        this wrapper function also stitches images acquired in the same well (this assumes that the tiles were aquired with overlap and in a rectangular shape)
+        using the `sparcstools package <https://github.com/MannLabs/SPARCStools>`_. Implementation of this function is currently still slow for many wells/timepoints as stitching 
+        is handled consecutively and not in parallel. This will be fixed in the future.
+
+        Parameters
+        ----------
+        input_dir : str
+            Path to the directory containing the sorted images from the opera phenix.
+        channels : list(str)
+            List containing the names of the channels that should be loaded.
+        timepoints : list(str)
+            List containing the names of the timepoints that should be loaded. Will return a warning if you try to load a timepoint that is not found in the data.
+        plate_layout : str
+            Path to the plate layout file. For the format please see above.
+        img_size : int, default 1080
+            Size of the images that should be loaded. All images will be cropped to this size.
+        stitching_channel : str, default "Alexa488"
+            string indicated on which channel the stitching should be calculated.
+        overlap : float, default 0.1
+            float indicating the overlap between the tiles that were aquired.
+        max_shift : int, default 10
+            int indicating the maximum shift that is allowed when stitching the tiles. If a calculated shift is larger than this threshold
+            between two tiles then the position of these tiles is not updated and is set according to the calculated position based on the overlap.
+        overwrite : bool, default False
+            If set to True, the function will overwrite the existing input image.
+        nucleus_channel : str, default "DAPI"
+            string indicating the channel that should be used for the nucleus channel.
+        cytosol_channel : str, default "Alexa488"
+            string indicating the channel that should be used for the cytosol channel.
+        
         """
-        from vipertools.stitch import generate_stitched
+
+        from sparcstools.stitch import generate_stitched
 
         # check if already exists if so throw error message
         if not os.path.isdir(
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		include src/vipercore/pretrained_models
		include src/pretrained_models