Skip to content

Commit

Permalink
Update 01-multi-modal-clustering-prepare-data.R
Browse files Browse the repository at this point in the history
  • Loading branch information
aadamk authored Oct 9, 2024
1 parent b8343e3 commit 76bb57f
Showing 1 changed file with 7 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,14 @@ dir.create(output_dir, showWarnings = F, recursive = T)
source(file.path("utils", "filter_cnv.R"))

# read histology file and filter to short histology of interest
cat('Reading histology file')
histology_file <- opt$histology_file
histology_file <- readr::read_tsv(file = histology_file)
histology_file <- histology_file %>%
dplyr::filter(short_histology %in% short_histology_of_interest)

# read gtf and filter to protein coding
cat('Reading Gencode file')
gtf_file <- opt$gtf_file
gencode_gtf <- rtracklayer::import(con = gtf_file) %>%
as.data.frame() %>%
Expand All @@ -44,6 +46,7 @@ gencode_gtf <- rtracklayer::import(con = gtf_file) %>%
unique()

# 1) read count data
cat('Filtering expression data')
count_file <- opt$count_file
count_mat <- readRDS(file = count_file)
count_mat <- count_mat %>%
Expand Down Expand Up @@ -85,6 +88,7 @@ print(dim(count_mat))

# 2) Methylation
# read beta-values
cat('Reading beta values and subsetting')
methyl_file <- opt$methyl_file
methyl_data <- readRDS(file = file.path(methyl_file))
methyl_data <- methyl_data %>%
Expand Down Expand Up @@ -122,6 +126,7 @@ print(dim(methyl_data))

# 3) Splice dataset
# read splice data
cat('Reading splice data and filtering')
splice_file <- opt$splice_file
splice_mat <- readRDS(splice_file)
splice_mat <- splice_mat %>%
Expand Down Expand Up @@ -162,7 +167,7 @@ samples_of_interest <- intersect(rownames(count_mat), rownames(methyl_data))
samples_of_interest <- intersect(samples_of_interest, rownames(splice_mat))

# now final filter/transformation on samples of interest

cat('Performing feature selection')
# 1) RNA
# count_mat <- t(count_mat) %>% as.data.frame()
count_mat <- count_mat[samples_of_interest, ]
Expand Down Expand Up @@ -219,6 +224,7 @@ write_tsv(
)

# final sample map
cat('Final step: Creating final sample map')
rna_samples <- count_samples %>%
dplyr::filter(sample_id %in% samples_of_interest) %>%
dplyr::rename("Kids_First_Biospecimen_ID_RNA" = "Kids_First_Biospecimen_ID")
Expand Down

0 comments on commit 76bb57f

Please sign in to comment.