|
| 1 | +--- |
| 2 | +title: "Evaluating effect of APOE " |
| 3 | +date: "`r Sys.Date()`" |
| 4 | +output: |
| 5 | + html_document: |
| 6 | + theme: |
| 7 | + bg: "#202123" |
| 8 | + fg: "#B8BCC2" |
| 9 | + primary: "#EA80FC" |
| 10 | + base_font: !expr bslib::font_google("Barlow") |
| 11 | + highlight: zenburn |
| 12 | + toc: true |
| 13 | + df_print: paged |
| 14 | + code_download: false |
| 15 | + toc_depth: 3 |
| 16 | +editor_options: |
| 17 | + chunk_output_type: inline |
| 18 | +vignette: > |
| 19 | + %\VignetteIndexEntry{Evaluating effect of APOE} |
| 20 | + %\VignetteEngine{knitr::rmarkdown} |
| 21 | + %\VignetteEncoding{UTF-8} |
| 22 | +--- |
| 23 | + |
| 24 | +```{r setup, include=FALSE} |
| 25 | +knitr::opts_chunk$set(echo = TRUE,warning = FALSE) |
| 26 | +knitr::opts_knit$set(root.dir = "../../") |
| 27 | +``` |
| 28 | + |
| 29 | +# Libs and Paths |
| 30 | +```{r libs, include=T,message=F,warning=F} |
| 31 | +library(readr) |
| 32 | +library(dplyr) |
| 33 | +library(fgsea) |
| 34 | +library(msigdbr) |
| 35 | +library(SummarizedExperiment) |
| 36 | +library(tidyverse) |
| 37 | +library(caret) |
| 38 | +library(factoextra) |
| 39 | +library(pROC) |
| 40 | +library(minfi) |
| 41 | +``` |
| 42 | + |
| 43 | +# Get Cpgs |
| 44 | + |
| 45 | +## AD associated cpgs (AD vs CN) - CpGs |
| 46 | + |
| 47 | +```{r} |
| 48 | +#--------------------------------------------------------------------------------------------------------------- |
| 49 | +# (a) AD-associated CpGs: include both 50 AD-associated CpGs + all CpGs from the 9 DMRs in Table 2 |
| 50 | +#--------------------------------------------------------------------------------------------------------------- |
| 51 | +AD_vs_CN <- readxl::read_xlsx( |
| 52 | + "DRAFT-TABLES_FIGURES_4-17-2021/_Supp Table 2 final_AD_vs_CN-selcted-columns-formatted-V3.xlsx",skip = 3 |
| 53 | +) |
| 54 | +
|
| 55 | +cpgs.ad.cn <- AD_vs_CN$cpg |
| 56 | +length(cpgs.ad.cn) |
| 57 | +``` |
| 58 | + |
| 59 | +## AD associated cpgs (AD vs CN) - DMR |
| 60 | +```{r} |
| 61 | +
|
| 62 | +dmr.ad.cn <- c( |
| 63 | + "chr1:205819345-205819464", |
| 64 | + "chr20:36148672-36148861", |
| 65 | + "chr1:206786170-206786181", |
| 66 | + "chr20:36149081-36149232", |
| 67 | + "chr13:21578684-21578734", |
| 68 | + "chr14:56777451-56777526", |
| 69 | + "chr7:2728841-2728913", |
| 70 | + "chr1:202172848-202172913", |
| 71 | + "chr1:223566643-223566710" |
| 72 | +) |
| 73 | +length(dmr.ad.cn) |
| 74 | +dmr.ad.cn.cpgs.hm450 <- lapply(dmr.ad.cn, function(x) { |
| 75 | + print(x); |
| 76 | + cpgs <- tryCatch({coMethDMR::GetCpGsInRegion(x,genome = "hg19",arrayType = "450k")}, error = function(e){return(NULL)}) |
| 77 | + cpgs |
| 78 | +}) %>% unlist %>% unique |
| 79 | +
|
| 80 | +dmr.ad.cn.cpgs.epic <- lapply(dmr.ad.cn, function(x) { |
| 81 | + print(x); |
| 82 | + cpgs <- tryCatch({coMethDMR::GetCpGsInRegion(x,genome = "hg19",arrayType = "EPIC")}, error = function(e){return(NULL)}) |
| 83 | + cpgs |
| 84 | +}) %>% unlist %>% unique |
| 85 | +
|
| 86 | +
|
| 87 | +ad.associated.cpgs.with.dmr.cpgs <- c(cpgs.ad.cn,dmr.ad.cn.cpgs.epic,dmr.ad.cn.cpgs.hm450) %>% unique |
| 88 | +length(ad.associated.cpgs.with.dmr.cpgs) |
| 89 | +``` |
| 90 | + |
| 91 | + |
| 92 | +## AD prioritized Cpgs |
| 93 | +```{R} |
| 94 | +#--------------------------------------------------------------------------------------------------------------- |
| 95 | +# (b) cross-tissue (i.e., prioritized CpGs): include both 97 CpGs + all CpGs from the 10 DMRs in Fig 2 |
| 96 | +#--------------------------------------------------------------------------------------------------------------- |
| 97 | +cpgs.prioritized <- readxl::read_xlsx( |
| 98 | + "DRAFT-TABLES_FIGURES_4-17-2021/_Supp Table 3 prioritized-CpGs-crossTissue_brain_blood-clean.xlsx",skip = 3 |
| 99 | +) |
| 100 | +cpgs.prioritized <- cpgs.prioritized$CpG %>% na.omit() %>% as.character |
| 101 | +length(cpgs.prioritized) |
| 102 | +``` |
| 103 | + |
| 104 | + |
| 105 | +## AD prioritized DMRs |
| 106 | +```{R} |
| 107 | +
|
| 108 | +dmr.prioritized <- c( |
| 109 | + "chr16:57405979-57406511", |
| 110 | + "chr6:31554829-31555016", |
| 111 | + "chr6:30853948-30854233", |
| 112 | + "chr15:39871808-39872186", |
| 113 | + "chr6:166876490-166877038", |
| 114 | + "chr1:167090618-167090757", |
| 115 | + "chr17:7832680-7832943", |
| 116 | + "chr17:62009607-62009835", |
| 117 | + "chr12:125028166-125028339", |
| 118 | + "chr10:682693-682871" |
| 119 | +) |
| 120 | +length(dmr.prioritized) |
| 121 | +
|
| 122 | +dmr.prioritized.cpgs.hm450 <- lapply(dmr.prioritized, function(x) { |
| 123 | + print(x); |
| 124 | + cpgs <- tryCatch({coMethDMR::GetCpGsInRegion(x,genome = "hg19",arrayType = "450k")}, error = function(e){return(NULL)}) |
| 125 | + cpgs |
| 126 | +}) %>% unlist %>% unique |
| 127 | +
|
| 128 | +dmr.prioritized.cpgs.epic <- lapply(dmr.prioritized, function(x) { |
| 129 | + print(x); |
| 130 | + cpgs <- tryCatch({coMethDMR::GetCpGsInRegion(x,genome = "hg19",arrayType = "EPIC")}, error = function(e){return(NULL)}) |
| 131 | + cpgs |
| 132 | +}) %>% unlist %>% unique |
| 133 | +
|
| 134 | +ad.prioritized.cpgs.with.dmr.cpgs <- c(cpgs.prioritized,dmr.prioritized.cpgs.epic,dmr.prioritized.cpgs.hm450) %>% unique |
| 135 | +length(ad.prioritized.cpgs.with.dmr.cpgs) |
| 136 | +``` |
| 137 | + |
| 138 | + |
| 139 | +# Data |
| 140 | + |
| 141 | +```{R data} |
| 142 | +#----------------------------------------------------------------------------- |
| 143 | +# ADNI |
| 144 | +#----------------------------------------------------------------------------- |
| 145 | +cohort <- "ADNI" |
| 146 | +dir.base <- getwd() |
| 147 | +dir.data <- file.path(dir.base,"datasets/",cohort,"/") |
| 148 | +dir.data.pca <- file.path(dir.data,"/data/DNA_methylation/pca_filtering/") |
| 149 | +adni.se <- readRDS( |
| 150 | + file.path(dir.data.pca, "ADNI_QNBMIQ_PCfiltered_min_age_at_visit_65.RDS") |
| 151 | +) |
| 152 | +
|
| 153 | +adni.se <- adni.se[,!adni.se$DX %in% "MCI" ] |
| 154 | +adni.se <- adni.se[,!is.na(adni.se$DX)] |
| 155 | +table(adni.se$DX) |
| 156 | +# Alzheimer's disease healthy control |
| 157 | +# 135 356 |
| 158 | +
|
| 159 | +file.age.adni <- file.path(dir.data,"/data/DNA_methylation/ADNI_Predicted_age.xlsx") |
| 160 | +if(!file.exists(file.age.adni)){ |
| 161 | + age <- age.predictor(assay(adni.se)) |
| 162 | + tab.age <- age$age.pred |
| 163 | + tab.age$Sample <- rownames(tab.age) |
| 164 | + writexl::write_xlsx(tab.age,file.age.adni) |
| 165 | +} else { |
| 166 | + tab.age <- readxl::read_xlsx(file.age.adni) |
| 167 | +} |
| 168 | +tab.age$barcodes <- tab.age$Sample |
| 169 | +
|
| 170 | +
|
| 171 | +pheno <- colData(adni.se) %>% merge(tab.age,sort = FALSE) |
| 172 | +pheno$DIAGNOSIS <- factor( |
| 173 | + ifelse(pheno$DX == "CN", "healthy control", "Alzheimer's disease"), |
| 174 | + levels = c("healthy control", "mild cognitive impairment", "Alzheimer's disease") |
| 175 | +) %>% droplevels() |
| 176 | +pheno$SEX <- pheno$PTGENDER %>% factor() |
| 177 | +pheno$AGE <- pheno$age_at_visit |
| 178 | +pheno$age.pred.Elastic_Net <- pheno$Elastic_Net |
| 179 | +pheno$DIAGNOSIS <- droplevels(pheno$DIAGNOSIS) |
| 180 | +plyr::count(pheno$DIAGNOSIS) |
| 181 | +
|
| 182 | +# Keep only last visit |
| 183 | +pheno <- pheno %>% as.data.frame %>% dplyr::group_by(RID) %>% filter(age_at_visit == max(age_at_visit)) |
| 184 | +adni.se <- adni.se[,adni.se$barcodes %in% pheno$barcodes] |
| 185 | +
|
| 186 | +# Put them in the same order |
| 187 | +adni.se <- adni.se[,pheno$barcodes] |
| 188 | +all(colnames(adni.se) == pheno$barcodes) |
| 189 | +
|
| 190 | +plyr::count(pheno$DIAGNOSIS) |
| 191 | +``` |
| 192 | + |
| 193 | +# MRS |
| 194 | + |
| 195 | +```{R mrs} |
| 196 | +#--------------------------------------------------------------------------------------------------------------- |
| 197 | +# 2. Polygenic methylation risk scores (MRS score) |
| 198 | +# (1) compute these as sum of beta values weighted by meta-analysis effect estimate (column “estimate.bacon”) in meta-analysis results file |
| 199 | +#--------------------------------------------------------------------------------------------------------------- |
| 200 | +get_MRS <- function(cpgs){ |
| 201 | + meta.analysis.results <- readr::read_csv("analysis_results/meta_analysis/Logistic_regression_model/AD_vs_CN/meta_analysis_glm_fixed_effect_ADNI_and_AIBL_AD_vs_CN_single_cpg_annotated.csv") |
| 202 | + MRS.weights <- meta.analysis.results$estimate.bacon[match(c(cpgs),meta.analysis.results$cpg)] |
| 203 | + names(MRS.weights) <- c(cpgs) |
| 204 | + MRS.weights %>% hist %>% plot |
| 205 | + |
| 206 | + # Remove if Cpgs were not evaluated in meta-analysis |
| 207 | + MRS.weights <- MRS.weights[!is.na(MRS.weights)] |
| 208 | + |
| 209 | + # Only keep the ones in ADNI |
| 210 | + common.cpgs <- intersect(rownames(adni.se),names(MRS.weights)) |
| 211 | + |
| 212 | + # Calculate the MRS for each sample beta dot product weights |
| 213 | + # MRS.weights |
| 214 | + MRS <- MRS.weights[common.cpgs] %*% assay(adni.se)[common.cpgs,] |
| 215 | + |
| 216 | + print(all(colnames(MRS) == pheno$barcodes)) |
| 217 | + return(MRS) |
| 218 | +} |
| 219 | +
|
| 220 | +pheno <- cbind(pheno, |
| 221 | + data.frame( |
| 222 | + "MRS_prioritized_cpgs" = get_MRS(cpgs.prioritized) %>% as.numeric(), |
| 223 | + "MRS_associated_cpgs" = get_MRS(cpgs.ad.cn) %>% as.numeric() |
| 224 | + ) |
| 225 | +) |
| 226 | +
|
| 227 | +pheno$MRS_prioritized_cpgs %>% hist |
| 228 | +pheno$MRS_associated_cpgs %>% hist |
| 229 | +``` |
| 230 | + |
| 231 | +# Evaluate AUC |
| 232 | + |
| 233 | +## Aux function to get AUC using the EIC package + glm fitted model |
| 234 | + |
| 235 | +```{R get_auc} |
| 236 | +get_auc <- function(model.formula = model.formula,data.training = NULL ,data.testing = NULL){ |
| 237 | + model <- glm( |
| 238 | + formula = model.formula, |
| 239 | + data = data.training, |
| 240 | + family = binomial |
| 241 | + ) |
| 242 | + #----------------------------------------------------------------------------- |
| 243 | + # Testing model |
| 244 | + #----------------------------------------------------------------------------- |
| 245 | + data.testing$probabilities <- model %>% predict(data.testing, type = "response") |
| 246 | + result <- roc(DIAGNOSIS ~ probabilities, data = data.testing,print.auc=TRUE) |
| 247 | + auc <- result$auc |
| 248 | + |
| 249 | + return(auc) |
| 250 | +} |
| 251 | +``` |
| 252 | + |
| 253 | +## Set models |
| 254 | +```{R} |
| 255 | +#----------------------------------------------------------------------------- |
| 256 | +# Models |
| 257 | +#----------------------------------------------------------------------------- |
| 258 | +set.seed(12345) |
| 259 | +var.predictors.no.apoe <- c("MRS", "AGE", "SEX", "B", "NK" ,"CD4T", "CD8T","Mono", "Neutro") |
| 260 | +var.predictors.with.apoe <- c(var.predictors.no.apoe, "APOE4") |
| 261 | +var.response <- "DIAGNOSIS" |
| 262 | +model1 <- paste0(var.response, " ~ ", paste(var.predictors.no.apoe,collapse = " + ")) |
| 263 | +model2 <- paste0(var.response, " ~ ", paste(var.predictors.with.apoe,collapse = " + ")) |
| 264 | +
|
| 265 | +data <- pheno[,c("DIAGNOSIS","APOE4","MRS_associated_cpgs","MRS_prioritized_cpgs", "AGE", "SEX", "B", "NK" ,"CD4T", "CD8T","Mono", "Neutro")] |
| 266 | +``` |
| 267 | + |
| 268 | +## Split data |
| 269 | +```{R} |
| 270 | +#----------------------------------------------------------------------------- |
| 271 | +# Split data in 10 balanced groups |
| 272 | +#----------------------------------------------------------------------------- |
| 273 | +list.of.partions <- createFolds(data$DIAGNOSIS,k = 10) |
| 274 | +df.foldid <- list.of.partions %>% plyr::ldply(.fun = function(x) data.frame(x)) %>% dplyr::arrange(x) |
| 275 | +print(table(df.foldid$.id,data$DIAGNOSIS[df.foldid$x])) |
| 276 | +``` |
| 277 | + |
| 278 | +## Fit model and test |
| 279 | + |
| 280 | +For each fold We will make the following (10x for each partition): |
| 281 | +- train on 9 partitions |
| 282 | +- test on the one left outside |
| 283 | +- get AUC |
| 284 | +Then we will get the average AUC. |
| 285 | + |
| 286 | +```{R tab} |
| 287 | +df <- list("Model1" = model1,"Model2" = model2) %>% purrr::map_dfr(.f = function(model) { |
| 288 | + print(model) |
| 289 | + list("MRS_associated_cpgs" = "MRS_associated_cpgs","MRS_prioritized_cpgs" = "MRS_prioritized_cpgs") %>% purrr::map_dfr(.f = function(col.mrs ) { |
| 290 | + |
| 291 | + print(col.mrs) |
| 292 | + unique(df.foldid$.id) %>% sort %>% purrr::map_dfr(.f = function(fold){ |
| 293 | + print(fold) |
| 294 | + data$MRS <- data[,col.mrs, drop = T] |
| 295 | + data.testing <- data[df.foldid$x[df.foldid$.id == fold],] |
| 296 | + data.training <- data[df.foldid$x[df.foldid$.id != fold],] |
| 297 | + auc <- get_auc(model.formula = model,data.training = data.training, data.testing = data.testing) |
| 298 | + data.frame("MRS" = col.mrs,"Model" = model,"Fold_ID" = fold, "AUC" = auc %>% as.numeric) |
| 299 | + }) |
| 300 | + }) |
| 301 | +}) |
| 302 | +
|
| 303 | +
|
| 304 | +summary.tab <- df %>% dplyr::group_by(MRS,Model) %>% summarise(max(AUC), min(AUC),mean(AUC),sd(AUC)) |
| 305 | +writexl::write_xlsx(summary.tab,path = "Eval_effect_APOE_summary_tab.xlsx") |
| 306 | +summary.tab %>% gt::gt() |
| 307 | +summary.tab %>% t |
| 308 | +
|
| 309 | +``` |
| 310 | + |
| 311 | +# Session info |
| 312 | +```{R} |
| 313 | +sessioninfo::session_info() |
| 314 | +``` |
0 commit comments