Skip to content

Commit ade4e9d

Browse files
authored
Merge pull request #164 from Sage-Bionetworks/gen-1636-add-retraction-form
[GEN-1636] add IDs from retraction form in the retraction steps for creating clinical release files
2 parents d05e927 + 8d9337f commit ade4e9d

File tree

2 files changed

+185
-141
lines changed

2 files changed

+185
-141
lines changed

scripts/release/create_release_files.R

+16-7
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,8 @@ now <- function(timeOnly = F, tz = "US/Pacific") {
8585
}
8686

8787
filter_for_release <- function(dataset, selected_dataset, selected_cohort,
88-
sor_df_filtered, retracted_patient)
88+
sor_df_filtered, retracted_patient,
89+
retracted_sample = NULL)
8990
{
9091
# get the list of released columns
9192
release_cols <- sor_df_filtered %>%
@@ -94,12 +95,18 @@ filter_for_release <- function(dataset, selected_dataset, selected_cohort,
9495

9596
release_cols <- intersect(release_cols, colnames(dataset))
9697

97-
# filter the data by cohort and release columns
98+
# filter the data by cohort, release columns, remove retracted patients
9899
release_dat <- dataset %>%
99100
filter(cohort_internal==selected_cohort) %>%
100101
filter(!record_id %in% retracted_patient) %>%
101102
select(all_of(release_cols))
102103

104+
# remove retracted sample
105+
if(!is.null(retracted_sample) && length(retracted_sample) > 0){
106+
release_dat <- release_dat %>%
107+
filter(!cpt_genie_sample_id %in% retracted_sample)
108+
}
109+
103110
return(release_dat)
104111
}
105112

@@ -159,12 +166,13 @@ if (verbose) {
159166
print(glue("{now(timeOnly = T)}: extracting release status for {selected_cohort} {release_version}-{release_type} from SOR column '{clinical_column}'..."))
160167
}
161168

162-
# get the list of retracted patient
169+
# get the lists of retracted patient and samples
163170
if (verbose) {
164-
print(glue("{now(timeOnly = T)}: loading retracted patients ({syn_id_retraction}) for the release..."))
171+
print(glue("{now(timeOnly = T)}: loading retracted patients/samples ({syn_id_retraction}) for the release..."))
165172
}
166-
retracted_table <- synTableQuery(glue("SELECT patient_id FROM {syn_id_retraction} WHERE cohort like '%{selected_cohort}%'"))$asDataFrame()
167-
retracted_pt_list <- retracted_table$patient_id
173+
retracted_table <- synTableQuery(glue("SELECT * FROM {syn_id_retraction} WHERE cohort like '%{selected_cohort}%'"))$asDataFrame()
174+
retracted_pt_list <- na.omit(retracted_table$patient_id)
175+
retracted_sam_list <- na.omit(retracted_table$sample_id)
168176

169177
# main -----------------
170178

@@ -234,7 +242,8 @@ cpt_derived_release <- filter_for_release(cpt_derived_redacted,
234242
'Cancer panel test level dataset',
235243
selected_cohort,
236244
sor_df_filtered = sor_df_filtered,
237-
retracted_patient = retracted_pt_list)
245+
retracted_patient = retracted_pt_list,
246+
retracted_sample = retracted_sam_list)
238247
if('PRISSMM Tumor Marker level dataset' %in% unique(sor_df_filtered$dataset)){
239248
prissmm_tm_derived_release <- filter_for_release(prissmm_tm_derived_redacted,
240249
'PRISSMM Tumor Marker level dataset',

0 commit comments

Comments
 (0)