Skip to content

Commit

Permalink
update scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
rcannood committed Sep 2, 2024
1 parent b5b64c2 commit 3aa0dff
Show file tree
Hide file tree
Showing 12 changed files with 205 additions and 65 deletions.
22 changes: 16 additions & 6 deletions src/control_methods/majority_vote/script.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import anndata as ad
import pandas as pd

## VIASH START
par = {
Expand All @@ -16,11 +17,20 @@
input_test = ad.read_h5ad(par['input_test'])

print("Compute majority vote", flush=True)
majority = input_train.obs.label.value_counts().index[0]
label_pred = input_train.obs.label.value_counts().index[0]

print("Create prediction object", flush=True)
input_test.obs["label_pred"] = majority
print("Create output data", flush=True)
output = ad.AnnData(
obs=pd.DataFrame(
{ 'label_pred': label_pred },
index=input_test.obs.index
),
uns={
'method_id': meta['name'],
"dataset_id": input_test.uns["dataset_id"],
"normalization_id": input_test.uns["normalization_id"]
}
)

print("Write output to file", flush=True)
input_test.uns["method_id"] = meta["functionality_name"]
input_test.write_h5ad(par["output"], compression="gzip")
print("Write output data", flush=True)
output.write_h5ad(par['output'], compression="gzip")
21 changes: 17 additions & 4 deletions src/control_methods/random_labels/script.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import anndata as ad
import numpy as np
import pandas as pd

## VIASH START
par = {
Expand All @@ -21,13 +22,25 @@
label_distribution = label_distribution / label_distribution.sum()

print("Create prediction object", flush=True)
input_test.obs["label_pred"] = np.random.choice(
label_pred = np.random.choice(
label_distribution.index,
size=input_test.n_obs,
replace=True,
p=label_distribution
)

print("Write output to file", flush=True)
input_test.uns["method_id"] = meta["functionality_name"]
input_test.write_h5ad(par["output"], compression="gzip")
print("Create output data", flush=True)
output = ad.AnnData(
obs=pd.DataFrame(
{ 'label_pred': label_pred },
index=input_test.obs.index
),
uns={
'method_id': meta['name'],
"dataset_id": input_test.uns["dataset_id"],
"normalization_id": input_test.uns["normalization_id"]
}
)

print("Write output data", flush=True)
output.write_h5ad(par['output'], compression="gzip")
21 changes: 17 additions & 4 deletions src/control_methods/true_labels/script.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import pandas as pd
import anndata as ad

## VIASH START
Expand All @@ -18,8 +19,20 @@
input_solution = ad.read_h5ad(par['input_solution'])

print("Create prediction object", flush=True)
input_test.obs["label_pred"] = input_solution.obs["label"]
label_pred = input_solution.obs["label"]

print("Write output to file", flush=True)
input_test.uns["method_id"] = meta["functionality_name"]
input_test.write_h5ad(par["output"], compression="gzip")
print("Create output data", flush=True)
output = ad.AnnData(
obs=pd.DataFrame(
{ 'label_pred': label_pred },
index=input_test.obs.index
),
uns={
'method_id': meta['name'],
"dataset_id": input_test.uns["dataset_id"],
"normalization_id": input_test.uns["normalization_id"]
}
)

print("Write output data", flush=True)
output.write_h5ad(par['output'], compression="gzip")
23 changes: 18 additions & 5 deletions src/methods/knn/script.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import pandas as pd
import anndata as ad
import sklearn.neighbors

Expand All @@ -8,7 +9,7 @@
'output': 'output.h5ad'
}
meta = {
'functionality_name': 'foo',
'name': 'foo',
}
## VIASH END

Expand All @@ -21,8 +22,20 @@
classifier.fit(input_train.obsm["X_pca"], input_train.obs["label"].astype(str))

print("Predict on test data", flush=True)
input_test.obs["label_pred"] = classifier.predict(input_test.obsm["X_pca"])
label_pred = classifier.predict(input_test.obsm["X_pca"])

print("Write output to file", flush=True)
input_test.uns["method_id"] = meta["functionality_name"]
input_test.write_h5ad(par['output'], compression="gzip")
print("Create output data", flush=True)
output = ad.AnnData(
obs=pd.DataFrame(
{ 'label_pred': label_pred },
index=input_test.obs.index
),
uns={
'method_id': meta['name'],
"dataset_id": input_test.uns["dataset_id"],
"normalization_id": input_test.uns["normalization_id"]
}
)

print("Write output data", flush=True)
output.write_h5ad(par['output'], compression="gzip")
23 changes: 18 additions & 5 deletions src/methods/logistic_regression/script.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import pandas as pd
import anndata as ad
import sklearn.linear_model

Expand All @@ -8,7 +9,7 @@
'output': 'output.h5ad'
}
meta = {
'functionality_name': 'foo',
'name': 'foo',
}
## VIASH END

Expand All @@ -21,8 +22,20 @@
classifier.fit(input_train.obsm["X_pca"], input_train.obs["label"].astype(str))

print("Predict on test data", flush=True)
input_test.obs["label_pred"] = classifier.predict(input_test.obsm["X_pca"])
label_pred = classifier.predict(input_test.obsm["X_pca"])

print("Write output to file", flush=True)
input_test.uns["method_id"] = meta["functionality_name"]
input_test.write_h5ad(par['output'], compression="gzip")
print("Create output data", flush=True)
output = ad.AnnData(
obs=pd.DataFrame(
{ 'label_pred': label_pred },
index=input_test.obs.index
),
uns={
'method_id': meta['name'],
"dataset_id": input_test.uns["dataset_id"],
"normalization_id": input_test.uns["normalization_id"]
}
)

print("Write output data", flush=True)
output.write_h5ad(par['output'], compression="gzip")
21 changes: 17 additions & 4 deletions src/methods/mlp/script.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import pandas as pd
import anndata as ad
from sklearn.neural_network import MLPClassifier

Expand All @@ -24,8 +25,20 @@
classifier.fit(input_train.obsm["X_pca"], input_train.obs["label"].astype(str))

print("Predict on test data", flush=True)
input_test.obs["label_pred"] = classifier.predict(input_test.obsm["X_pca"])
label_pred = classifier.predict(input_test.obsm["X_pca"])

print("Write output to file", flush=True)
input_test.uns["method_id"] = meta["functionality_name"]
input_test.write_h5ad(par['output'], compression="gzip")
print("Create output data", flush=True)
output = ad.AnnData(
obs=pd.DataFrame(
{ 'label_pred': label_pred },
index=input_test.obs.index
),
uns={
'method_id': meta['name'],
"dataset_id": input_test.uns["dataset_id"],
"normalization_id": input_test.uns["normalization_id"]
}
)

print("Write output data", flush=True)
output.write_h5ad(par['output'], compression="gzip")
23 changes: 18 additions & 5 deletions src/methods/naive_bayes/script.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import pandas as pd
import anndata as ad
import sklearn.naive_bayes

Expand All @@ -8,7 +9,7 @@
'output': 'output.h5ad'
}
meta = {
'functionality_name': 'foo',
'name': 'foo',
}
## VIASH END

Expand All @@ -21,8 +22,20 @@
classifier.fit(input_train.obsm["X_pca"], input_train.obs["label"].astype(str))

print("Predict on test data", flush=True)
input_test.obs["label_pred"] = classifier.predict(input_test.obsm["X_pca"])
label_pred = classifier.predict(input_test.obsm["X_pca"])

print("Write output to file", flush=True)
input_test.uns["method_id"] = meta["functionality_name"]
input_test.write_h5ad(par['output'], compression="gzip")
print("Create output data", flush=True)
output = ad.AnnData(
obs=pd.DataFrame(
{ 'label_pred': label_pred },
index=input_test.obs.index
),
uns={
'method_id': meta['name'],
"dataset_id": input_test.uns["dataset_id"],
"normalization_id": input_test.uns["normalization_id"]
}
)

print("Write output data", flush=True)
output.write_h5ad(par['output'], compression="gzip")
25 changes: 18 additions & 7 deletions src/methods/scanvi_scarches/script.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import anndata as ad
import numpy as np
import pandas as pd
import scvi

## VIASH START
Expand Down Expand Up @@ -52,10 +52,21 @@
train_kwargs = dict(max_epochs=par["max_epochs"], early_stopping=True)
query_model.train(plan_kwargs=dict(weight_decay=0.0), **train_kwargs)

print("Generate predictions", flush=True)
input_test.obs["label"] = "Unknown"
input_test.obs["label_pred"] = query_model.predict(input_test)
print("Predict on test data", flush=True)
label_pred = query_model.predict(input_test)

print("Write output AnnData to file", flush=True)
input_test.uns["method_id"] = meta["functionality_name"]
input_test.write_h5ad(par["output"], compression="gzip")
print("Create output data", flush=True)
output = ad.AnnData(
obs=pd.DataFrame(
{ 'label_pred': label_pred },
index=input_test.obs.index
),
uns={
'method_id': meta['name'],
"dataset_id": input_test.uns["dataset_id"],
"normalization_id": input_test.uns["normalization_id"]
}
)

print("Write output data", flush=True)
output.write_h5ad(par['output'], compression="gzip")
32 changes: 22 additions & 10 deletions src/methods/seurat_transferdata/script.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ par <- list(
input_test = "resources_test/label_projection/pancreas/test.h5ad",
output = "output.h5ad"
)
meta <- list(
name = "seurat_transferdata"
)
## VIASH END

packageVersion("Matrix")
Expand All @@ -18,11 +21,6 @@ cat(">> Load input data\n")
input_train <- read_h5ad(par$input_train)
input_test <- read_h5ad(par$input_test)

# sce_train <- zellkonverter::readH5AD(par$input_train)
# obj_train <- Seurat::as.Seurat(sce_train, data = "normalized")
# sce_test <- zellkonverter::readH5AD(par$input_test)
# obj_test <- Seurat::as.Seurat(sce_test, data = "normalized")

cat(">> Converting AnnData to Seurat\n")
anndataToSeurat <- function(adata) {
# interpreted from https://github.com/satijalab/seurat/blob/v3.1.0/R/objects.R
Expand All @@ -31,7 +29,7 @@ anndataToSeurat <- function(adata) {
counts = as(Matrix::t(adata$layers[["counts"]]), "CsparseMatrix")
) %>%
SeuratObject::SetAssayData(
slot = "data",
layer = "data",
new.data = as(Matrix::t(adata$layers[["normalized"]]), "CsparseMatrix")
) %>%
SeuratObject::AddMetaData(
Expand All @@ -43,8 +41,10 @@ anndataToSeurat <- function(adata) {

# set embedding
# could add loadings and stdev
X_pca <- adata$obsm[["X_pca"]]
dimnames(X_pca) <- list(rownames(adata), paste0("PC_", seq_len(ncol(X_pca))))
embed <- SeuratObject::CreateDimReducObject(
embeddings = adata$obsm[["X_pca"]],
embeddings = X_pca,
key = "PC_"
)
obj[["pca"]] <- embed
Expand Down Expand Up @@ -74,8 +74,20 @@ query <- Seurat::TransferData(
refdata = list(labels = "label"),
verbose = FALSE
)
input_test$obs[["label_pred"]] <- query$predicted.labels[input_test$obs_names]

cat(">> Create output data\n")
output <- anndata::AnnData(
obs = data.frame(
row.names = input_test$obs_names,
label_pred = query$predicted.labels
),
uns = list(
method_id = meta$name,
dataset_id = input_test$uns[["dataset_id"]],
normalization_id = input_test$uns[["normalization_id"]]
),
shape = c(input_test$n_obs, 0L)
)

cat(">> Write output to file\n")
input_test$uns[["method_id"]] <- meta[["functionality_name"]]
input_test$write_h5ad(par$output, compression = "gzip")
output$write_h5ad(par$output, compression = "gzip")
23 changes: 18 additions & 5 deletions src/methods/xgboost/script.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import pandas as pd
import anndata as ad
import xgboost as xgb

Expand All @@ -8,7 +9,7 @@
'output': 'output.h5ad'
}
meta = {
'functionality_name': 'foo',
'name': 'foo'
}
## VIASH END

Expand All @@ -32,8 +33,20 @@

print("Predict on test data", flush=True)
pred = xgb_op.predict(xg_test).astype(int)
input_test.obs["label_pred"] = categories[pred]
label_pred = categories[pred]

print("Write output to file", flush=True)
input_test.uns["method_id"] = meta["functionality_name"]
input_test.write_h5ad(par['output'], compression="gzip")
print("Create output data", flush=True)
output = ad.AnnData(
obs=pd.DataFrame(
{ 'label_pred': label_pred },
index=input_test.obs.index
),
uns={
'method_id': meta['name'],
"dataset_id": input_test.uns["dataset_id"],
"normalization_id": input_test.uns["normalization_id"]
}
)

print("Write output data", flush=True)
output.write_h5ad(par['output'], compression="gzip")
Loading

0 comments on commit 3aa0dff

Please sign in to comment.