nf-core · ECM893 · Aug 24, 2025 · Aug 24, 2025 · Aug 26, 2025
diff --git a/modules/local/celltypes/celltypist/templates/celltypist.py b/modules/local/celltypes/celltypist/templates/celltypist.py
@@ -47,7 +47,7 @@ def format_yaml_like(data: dict, indent: int = 0) -> str:
 if symbol_col != "index" and symbol_col:
     if symbol_col not in adata_celltypist.var.columns:
         raise ValueError(f"Symbol column {symbol_col} not found in adata.var.columns")
-    adata_celltypist.var_names = adata_celltypist.var[symbol_col]
+    adata_celltypist.var_names = adata_celltypist.var[symbol_col].to_list()
 
 df_list = []
 

diff --git a/modules/local/liana/rankaggregate/templates/rank_aggregate.py b/modules/local/liana/rankaggregate/templates/rank_aggregate.py
@@ -39,6 +39,11 @@ def format_yaml_like(data: dict, indent: int = 0) -> str:
 prefix = "${prefix}"
 obs_key = "${obs_key}"
 
+# Ensure that var_names are strings (not categorical) and unique
+# For some reason this happens when coming from scanvi, maybe also given the other changes
+adata.var_names = adata.var_names.astype(str)
+adata.var_names_make_unique()
+
 if adata.obs[obs_key].nunique() > 1:
     if (adata.X < 0).nnz == 0:
         sc.pp.log1p(adata)

diff --git a/modules/local/scvitools/scanvi/templates/scanvi.py b/modules/local/scvitools/scanvi/templates/scanvi.py
@@ -25,6 +25,40 @@
 reference_model_path = "reference_model"
 reference_model_type = "${meta2.id}"
 
+# FIXME: This is a hack, the columns from celltypist are:
+# f"celltypist:{model_name}", f"celltypist:{model_name}:conf" (from line 69)
+# But here it expects the labels under "label" currently it seems that no
+# script triggered is handling confidence cutoff and label assignment to
+# "${label_col}" ("label").
+# Here I hack a solution in the event celltypist is used:
+# Overwrite labels with CellTypist predictions (with confidence filter)
+# Find any CellTypist prediction/confidence columns
+pred_cols = [
+    c
+    for c in adata.obs.columns
+    if c.startswith("celltypist:") and not c.endswith(":conf")
+]
+
+# If multiple models are present, I will pick the first one
+pred_col = pred_cols[0]
+conf_col = pred_col + ":conf"
+
+print(f"Using CellTypist column: {pred_col}")
+
+if conf_col in adata.obs:
+    CUTOFF = 0.7
+    adata.obs["${label_col}"] = adata.obs.apply(
+        lambda r: r[pred_col] if r[conf_col] >= CUTOFF else "unknown", axis=1
+    )
+else:
+    adata.obs["${label_col}"] = adata.obs[pred_col]
+
+# Make sure labels are categorical and contain "unknown"
+adata.obs["${label_col}"] = adata.obs["${label_col}"].astype("category")
+if "unknown" not in adata.obs["${label_col}"].cat.categories:
+    adata.obs["${label_col}"] = adata.obs["${label_col}"].cat.add_categories(["unknown"])
+# ---- End Hack ----
+
 if reference_model_type == "scanvi":
     SCANVI.prepare_query_anndata(adata, reference_model_path)
     model = SCANVI.load_query_data(adata, reference_model_path)