Skip to content

Commit

Permalink
Improve data cleaning before running through celltypist
Browse files Browse the repository at this point in the history
  • Loading branch information
axdanbol committed Mar 5, 2024
1 parent add5e6d commit e0c6cb8
Showing 1 changed file with 13 additions and 0 deletions.
13 changes: 13 additions & 0 deletions containers/celltypist/context/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def do_run(
"""Annotate data using celltypist."""
data = scanpy.read_h5ad(matrix)
data = set_data_layer(data, options["query_layers_key"])
data = self.clean(data)
data = self.normalize(data)
data, var_names = self.normalize_var_names(data, options)
data = celltypist.annotate(
Expand All @@ -42,6 +43,18 @@ def do_run(
data.var_names = t.cast(t.Any, var_names)

return {"data": data, "organ_level": metadata["model"].replace(".", "_")}

def clean(self, data: scanpy.AnnData) -> scanpy.AnnData:
"""Cleans the data removing any incompatible preprocessing that may exist.
Args:
data (scanpy.AnnData): Original data to clean
Returns:
scanpy.AnnData: Clean data
"""
data.obsm = None
return data

def normalize(self, data: scanpy.AnnData) -> scanpy.AnnData:
"""Normalizes data according to celltypist requirements.
Expand Down

0 comments on commit e0c6cb8

Please sign in to comment.