Skip to content

Commit

Permalink
consensus calculation using HPCell function
Browse files Browse the repository at this point in the history
  • Loading branch information
stemangiola committed Oct 23, 2024
1 parent f3b59b4 commit edd72cf
Showing 1 changed file with 55 additions and 0 deletions.
55 changes: 55 additions & 0 deletions dev/HCA_cell_type_consensus.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
library(arrow)
library(dplyr)
library(duckdb)
library(HPCell)

# Read the Parquet file into an R data frame
con <- dbConnect(duckdb::duckdb(), dbdir = ":memory:")
parquet_file = "/vast/projects/cellxgene_curated/census_samples/concensus_input.parquet"

data_tbl <- tbl(con, sql(paste0("SELECT * FROM read_parquet('", parquet_file, "')")))

annotation_combination =
data_tbl |>
#select(azimuth_predicted.celltype.l2, monaco_first.labels.fine, blueprint_first.labels.fine) |>
select(cell_, dataset_id, cell_type, cell_type_ontology_term_id, azimuth_predicted.celltype.l2, monaco_first.labels.fine, blueprint_first.labels.fine)
#arrange(desc(n)) |>





annotation_consensus =
annotation_combination |>
distinct(azimuth_predicted.celltype.l2, monaco_first.labels.fine, blueprint_first.labels.fine) |>
as_tibble() |>
mutate(reannotation_consensus = reference_annotation_to_consensus(azimuth_input = azimuth_predicted.celltype.l2, monaco_input = monaco_first.labels.fine, blueprint_input = blueprint_first.labels.fine ))


annotation_combination =
annotation_combination |>
left_join(annotation_consensus, copy = TRUE)

output_parquet <- "/vast/projects/mangiola_immune_map/PostDoc/CuratedAtlasQueryR/dev/consensus_output.parquet"

# Use DuckDB's COPY TO command to write the data back to Parquet
# We need to execute a SQL command using dbExecute()
copy_query <- paste0("
COPY (
SELECT *
FROM (
", dbplyr::sql_render(annotation_combination), "
)
) TO '", output_parquet, "' (FORMAT PARQUET);
")

# Execute the COPY command
dbExecute(con, copy_query)

# Disconnect from the database
dbDisconnect(con, shutdown = TRUE)

# Read back
con <- dbConnect(duckdb::duckdb(), dbdir = ":memory:")
data_consensus <- tbl(con, sql(paste0("SELECT * FROM read_parquet('", output_parquet, "')")))

0 comments on commit edd72cf

Please sign in to comment.