From e0c6cb83965a244e9c125261f225de464184f656 Mon Sep 17 00:00:00 2001 From: Daniel Bolin Date: Tue, 5 Mar 2024 15:35:08 -0500 Subject: [PATCH] Improve data cleaning before running through celltypist --- containers/celltypist/context/main.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/containers/celltypist/context/main.py b/containers/celltypist/context/main.py index 5084374..c6d01c8 100644 --- a/containers/celltypist/context/main.py +++ b/containers/celltypist/context/main.py @@ -34,6 +34,7 @@ def do_run( """Annotate data using celltypist.""" data = scanpy.read_h5ad(matrix) data = set_data_layer(data, options["query_layers_key"]) + data = self.clean(data) data = self.normalize(data) data, var_names = self.normalize_var_names(data, options) data = celltypist.annotate( @@ -42,6 +43,18 @@ def do_run( data.var_names = t.cast(t.Any, var_names) return {"data": data, "organ_level": metadata["model"].replace(".", "_")} + + def clean(self, data: scanpy.AnnData) -> scanpy.AnnData: + """Cleans the data removing any incompatible preprocessing that may exist. + + Args: + data (scanpy.AnnData): Original data to clean + + Returns: + scanpy.AnnData: Clean data + """ + data.obsm = None + return data def normalize(self, data: scanpy.AnnData) -> scanpy.AnnData: """Normalizes data according to celltypist requirements.