Skip to content

Commit b582b3d

Browse files
committed
Adding APOE evaluation code
1 parent e3514c1 commit b582b3d

File tree

1 file changed

+314
-0
lines changed

1 file changed

+314
-0
lines changed
Lines changed: 314 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,314 @@
1+
---
2+
title: "Evaluating effect of APOE "
3+
date: "`r Sys.Date()`"
4+
output:
5+
html_document:
6+
theme:
7+
bg: "#202123"
8+
fg: "#B8BCC2"
9+
primary: "#EA80FC"
10+
base_font: !expr bslib::font_google("Barlow")
11+
highlight: zenburn
12+
toc: true
13+
df_print: paged
14+
code_download: false
15+
toc_depth: 3
16+
editor_options:
17+
chunk_output_type: inline
18+
vignette: >
19+
%\VignetteIndexEntry{Evaluating effect of APOE}
20+
%\VignetteEngine{knitr::rmarkdown}
21+
%\VignetteEncoding{UTF-8}
22+
---
23+
24+
```{r setup, include=FALSE}
25+
knitr::opts_chunk$set(echo = TRUE,warning = FALSE)
26+
knitr::opts_knit$set(root.dir = "../../")
27+
```
28+
29+
# Libs and Paths
30+
```{r libs, include=T,message=F,warning=F}
31+
library(readr)
32+
library(dplyr)
33+
library(fgsea)
34+
library(msigdbr)
35+
library(SummarizedExperiment)
36+
library(tidyverse)
37+
library(caret)
38+
library(factoextra)
39+
library(pROC)
40+
library(minfi)
41+
```
42+
43+
# Get Cpgs
44+
45+
## AD associated cpgs (AD vs CN) - CpGs
46+
47+
```{r}
48+
#---------------------------------------------------------------------------------------------------------------
49+
# (a) AD-associated CpGs: include both 50 AD-associated CpGs + all CpGs from the 9 DMRs in Table 2
50+
#---------------------------------------------------------------------------------------------------------------
51+
AD_vs_CN <- readxl::read_xlsx(
52+
"DRAFT-TABLES_FIGURES_4-17-2021/_Supp Table 2 final_AD_vs_CN-selcted-columns-formatted-V3.xlsx",skip = 3
53+
)
54+
55+
cpgs.ad.cn <- AD_vs_CN$cpg
56+
length(cpgs.ad.cn)
57+
```
58+
59+
## AD associated cpgs (AD vs CN) - DMR
60+
```{r}
61+
62+
dmr.ad.cn <- c(
63+
"chr1:205819345-205819464",
64+
"chr20:36148672-36148861",
65+
"chr1:206786170-206786181",
66+
"chr20:36149081-36149232",
67+
"chr13:21578684-21578734",
68+
"chr14:56777451-56777526",
69+
"chr7:2728841-2728913",
70+
"chr1:202172848-202172913",
71+
"chr1:223566643-223566710"
72+
)
73+
length(dmr.ad.cn)
74+
dmr.ad.cn.cpgs.hm450 <- lapply(dmr.ad.cn, function(x) {
75+
print(x);
76+
cpgs <- tryCatch({coMethDMR::GetCpGsInRegion(x,genome = "hg19",arrayType = "450k")}, error = function(e){return(NULL)})
77+
cpgs
78+
}) %>% unlist %>% unique
79+
80+
dmr.ad.cn.cpgs.epic <- lapply(dmr.ad.cn, function(x) {
81+
print(x);
82+
cpgs <- tryCatch({coMethDMR::GetCpGsInRegion(x,genome = "hg19",arrayType = "EPIC")}, error = function(e){return(NULL)})
83+
cpgs
84+
}) %>% unlist %>% unique
85+
86+
87+
ad.associated.cpgs.with.dmr.cpgs <- c(cpgs.ad.cn,dmr.ad.cn.cpgs.epic,dmr.ad.cn.cpgs.hm450) %>% unique
88+
length(ad.associated.cpgs.with.dmr.cpgs)
89+
```
90+
91+
92+
## AD prioritized Cpgs
93+
```{R}
94+
#---------------------------------------------------------------------------------------------------------------
95+
# (b) cross-tissue (i.e., prioritized CpGs): include both 97 CpGs + all CpGs from the 10 DMRs in Fig 2
96+
#---------------------------------------------------------------------------------------------------------------
97+
cpgs.prioritized <- readxl::read_xlsx(
98+
"DRAFT-TABLES_FIGURES_4-17-2021/_Supp Table 3 prioritized-CpGs-crossTissue_brain_blood-clean.xlsx",skip = 3
99+
)
100+
cpgs.prioritized <- cpgs.prioritized$CpG %>% na.omit() %>% as.character
101+
length(cpgs.prioritized)
102+
```
103+
104+
105+
## AD prioritized DMRs
106+
```{R}
107+
108+
dmr.prioritized <- c(
109+
"chr16:57405979-57406511",
110+
"chr6:31554829-31555016",
111+
"chr6:30853948-30854233",
112+
"chr15:39871808-39872186",
113+
"chr6:166876490-166877038",
114+
"chr1:167090618-167090757",
115+
"chr17:7832680-7832943",
116+
"chr17:62009607-62009835",
117+
"chr12:125028166-125028339",
118+
"chr10:682693-682871"
119+
)
120+
length(dmr.prioritized)
121+
122+
dmr.prioritized.cpgs.hm450 <- lapply(dmr.prioritized, function(x) {
123+
print(x);
124+
cpgs <- tryCatch({coMethDMR::GetCpGsInRegion(x,genome = "hg19",arrayType = "450k")}, error = function(e){return(NULL)})
125+
cpgs
126+
}) %>% unlist %>% unique
127+
128+
dmr.prioritized.cpgs.epic <- lapply(dmr.prioritized, function(x) {
129+
print(x);
130+
cpgs <- tryCatch({coMethDMR::GetCpGsInRegion(x,genome = "hg19",arrayType = "EPIC")}, error = function(e){return(NULL)})
131+
cpgs
132+
}) %>% unlist %>% unique
133+
134+
ad.prioritized.cpgs.with.dmr.cpgs <- c(cpgs.prioritized,dmr.prioritized.cpgs.epic,dmr.prioritized.cpgs.hm450) %>% unique
135+
length(ad.prioritized.cpgs.with.dmr.cpgs)
136+
```
137+
138+
139+
# Data
140+
141+
```{R data}
142+
#-----------------------------------------------------------------------------
143+
# ADNI
144+
#-----------------------------------------------------------------------------
145+
cohort <- "ADNI"
146+
dir.base <- getwd()
147+
dir.data <- file.path(dir.base,"datasets/",cohort,"/")
148+
dir.data.pca <- file.path(dir.data,"/data/DNA_methylation/pca_filtering/")
149+
adni.se <- readRDS(
150+
file.path(dir.data.pca, "ADNI_QNBMIQ_PCfiltered_min_age_at_visit_65.RDS")
151+
)
152+
153+
adni.se <- adni.se[,!adni.se$DX %in% "MCI" ]
154+
adni.se <- adni.se[,!is.na(adni.se$DX)]
155+
table(adni.se$DX)
156+
# Alzheimer's disease healthy control
157+
# 135 356
158+
159+
file.age.adni <- file.path(dir.data,"/data/DNA_methylation/ADNI_Predicted_age.xlsx")
160+
if(!file.exists(file.age.adni)){
161+
age <- age.predictor(assay(adni.se))
162+
tab.age <- age$age.pred
163+
tab.age$Sample <- rownames(tab.age)
164+
writexl::write_xlsx(tab.age,file.age.adni)
165+
} else {
166+
tab.age <- readxl::read_xlsx(file.age.adni)
167+
}
168+
tab.age$barcodes <- tab.age$Sample
169+
170+
171+
pheno <- colData(adni.se) %>% merge(tab.age,sort = FALSE)
172+
pheno$DIAGNOSIS <- factor(
173+
ifelse(pheno$DX == "CN", "healthy control", "Alzheimer's disease"),
174+
levels = c("healthy control", "mild cognitive impairment", "Alzheimer's disease")
175+
) %>% droplevels()
176+
pheno$SEX <- pheno$PTGENDER %>% factor()
177+
pheno$AGE <- pheno$age_at_visit
178+
pheno$age.pred.Elastic_Net <- pheno$Elastic_Net
179+
pheno$DIAGNOSIS <- droplevels(pheno$DIAGNOSIS)
180+
plyr::count(pheno$DIAGNOSIS)
181+
182+
# Keep only last visit
183+
pheno <- pheno %>% as.data.frame %>% dplyr::group_by(RID) %>% filter(age_at_visit == max(age_at_visit))
184+
adni.se <- adni.se[,adni.se$barcodes %in% pheno$barcodes]
185+
186+
# Put them in the same order
187+
adni.se <- adni.se[,pheno$barcodes]
188+
all(colnames(adni.se) == pheno$barcodes)
189+
190+
plyr::count(pheno$DIAGNOSIS)
191+
```
192+
193+
# MRS
194+
195+
```{R mrs}
196+
#---------------------------------------------------------------------------------------------------------------
197+
# 2. Polygenic methylation risk scores (MRS score)
198+
# (1) compute these as sum of beta values weighted by meta-analysis effect estimate (column “estimate.bacon”) in meta-analysis results file
199+
#---------------------------------------------------------------------------------------------------------------
200+
get_MRS <- function(cpgs){
201+
meta.analysis.results <- readr::read_csv("analysis_results/meta_analysis/Logistic_regression_model/AD_vs_CN/meta_analysis_glm_fixed_effect_ADNI_and_AIBL_AD_vs_CN_single_cpg_annotated.csv")
202+
MRS.weights <- meta.analysis.results$estimate.bacon[match(c(cpgs),meta.analysis.results$cpg)]
203+
names(MRS.weights) <- c(cpgs)
204+
MRS.weights %>% hist %>% plot
205+
206+
# Remove if Cpgs were not evaluated in meta-analysis
207+
MRS.weights <- MRS.weights[!is.na(MRS.weights)]
208+
209+
# Only keep the ones in ADNI
210+
common.cpgs <- intersect(rownames(adni.se),names(MRS.weights))
211+
212+
# Calculate the MRS for each sample beta dot product weights
213+
# MRS.weights
214+
MRS <- MRS.weights[common.cpgs] %*% assay(adni.se)[common.cpgs,]
215+
216+
print(all(colnames(MRS) == pheno$barcodes))
217+
return(MRS)
218+
}
219+
220+
pheno <- cbind(pheno,
221+
data.frame(
222+
"MRS_prioritized_cpgs" = get_MRS(cpgs.prioritized) %>% as.numeric(),
223+
"MRS_associated_cpgs" = get_MRS(cpgs.ad.cn) %>% as.numeric()
224+
)
225+
)
226+
227+
pheno$MRS_prioritized_cpgs %>% hist
228+
pheno$MRS_associated_cpgs %>% hist
229+
```
230+
231+
# Evaluate AUC
232+
233+
## Aux function to get AUC using the EIC package + glm fitted model
234+
235+
```{R get_auc}
236+
get_auc <- function(model.formula = model.formula,data.training = NULL ,data.testing = NULL){
237+
model <- glm(
238+
formula = model.formula,
239+
data = data.training,
240+
family = binomial
241+
)
242+
#-----------------------------------------------------------------------------
243+
# Testing model
244+
#-----------------------------------------------------------------------------
245+
data.testing$probabilities <- model %>% predict(data.testing, type = "response")
246+
result <- roc(DIAGNOSIS ~ probabilities, data = data.testing,print.auc=TRUE)
247+
auc <- result$auc
248+
249+
return(auc)
250+
}
251+
```
252+
253+
## Set models
254+
```{R}
255+
#-----------------------------------------------------------------------------
256+
# Models
257+
#-----------------------------------------------------------------------------
258+
set.seed(12345)
259+
var.predictors.no.apoe <- c("MRS", "AGE", "SEX", "B", "NK" ,"CD4T", "CD8T","Mono", "Neutro")
260+
var.predictors.with.apoe <- c(var.predictors.no.apoe, "APOE4")
261+
var.response <- "DIAGNOSIS"
262+
model1 <- paste0(var.response, " ~ ", paste(var.predictors.no.apoe,collapse = " + "))
263+
model2 <- paste0(var.response, " ~ ", paste(var.predictors.with.apoe,collapse = " + "))
264+
265+
data <- pheno[,c("DIAGNOSIS","APOE4","MRS_associated_cpgs","MRS_prioritized_cpgs", "AGE", "SEX", "B", "NK" ,"CD4T", "CD8T","Mono", "Neutro")]
266+
```
267+
268+
## Split data
269+
```{R}
270+
#-----------------------------------------------------------------------------
271+
# Split data in 10 balanced groups
272+
#-----------------------------------------------------------------------------
273+
list.of.partions <- createFolds(data$DIAGNOSIS,k = 10)
274+
df.foldid <- list.of.partions %>% plyr::ldply(.fun = function(x) data.frame(x)) %>% dplyr::arrange(x)
275+
print(table(df.foldid$.id,data$DIAGNOSIS[df.foldid$x]))
276+
```
277+
278+
## Fit model and test
279+
280+
For each fold We will make the following (10x for each partition):
281+
- train on 9 partitions
282+
- test on the one left outside
283+
- get AUC
284+
Then we will get the average AUC.
285+
286+
```{R tab}
287+
df <- list("Model1" = model1,"Model2" = model2) %>% purrr::map_dfr(.f = function(model) {
288+
print(model)
289+
list("MRS_associated_cpgs" = "MRS_associated_cpgs","MRS_prioritized_cpgs" = "MRS_prioritized_cpgs") %>% purrr::map_dfr(.f = function(col.mrs ) {
290+
291+
print(col.mrs)
292+
unique(df.foldid$.id) %>% sort %>% purrr::map_dfr(.f = function(fold){
293+
print(fold)
294+
data$MRS <- data[,col.mrs, drop = T]
295+
data.testing <- data[df.foldid$x[df.foldid$.id == fold],]
296+
data.training <- data[df.foldid$x[df.foldid$.id != fold],]
297+
auc <- get_auc(model.formula = model,data.training = data.training, data.testing = data.testing)
298+
data.frame("MRS" = col.mrs,"Model" = model,"Fold_ID" = fold, "AUC" = auc %>% as.numeric)
299+
})
300+
})
301+
})
302+
303+
304+
summary.tab <- df %>% dplyr::group_by(MRS,Model) %>% summarise(max(AUC), min(AUC),mean(AUC),sd(AUC))
305+
writexl::write_xlsx(summary.tab,path = "Eval_effect_APOE_summary_tab.xlsx")
306+
summary.tab %>% gt::gt()
307+
summary.tab %>% t
308+
309+
```
310+
311+
# Session info
312+
```{R}
313+
sessioninfo::session_info()
314+
```

0 commit comments

Comments
 (0)