val_check.Rmd

---
title: "val_check"
author: "Daniel Petrie"
date: "2024-10-24"
output: html_document
editor_options: 
  chunk_output_type: console
---

```{r Global}
library("ggplot2") #For plotting
library("GGally") #ggpairs()
library("tidyverse") #Wranglin
library("dplyr") #Wranglin
library("interactions")
library("lme4") #MLM
library("lmerTest") #p-vals
library("ggeffects") #For marginal/conditional effects plots
library("marginaleffects") #For hypothesis_test()
library("parameters") #Other useful marginal effects functions
library("gdata") #upperTriangle()
library("mgcv") #GAMM
library("ggpubr") #Combining plots
library("bmlm") #Centering made easy
library("neuroCombat") #Harminization
library("LNCDR") #waterfall plot, lunaize plots 
library("gratia") #mgcv companion package. Using draw among other funcs.
library("psych") #Descriptives
library("ggrain") #Raincloud plot
library("ggseg") #Brain images
library("ggseg3d") #3d brain images
library("see") #Theme modern
library("viridis") #Additional colors
library("viridisLite") #Additional colors
library("ggnewscale") #new_scale()
#library("corrplot") #Corrplot()
#Working directory (change to something better (onedrive?, something else?) at some point)
#Hera feels correct at this moment. All files could live in directory R for this project.
setwd("C:/Users/djpet/OneDrive/Documents/daw_resting_state")
full <- read.csv("daw_project_081624.csv", header = TRUE)
```

```{r Cleaning}
#Creating a different sex variable for some plots/analyses
full <- full %>%
  mutate(sex = as.character(sex),
    sex_p = case_when(
    sex == "M" ~ "Male",
    sex == "F" ~ "Female",
    TRUE ~ sex 
  ))

#Making visitnum a factor. Can change accordingly
full$visitnum <- as.factor(full$visitnum)

#Making sex a factor
full$sex <- as.factor(full$sex)

#Making an ordered factor.
full$sex_p <- ordered(full$sex_p, levels = (c("Male", "Female")))

#Inverse age for linear models.
full$inv_age <- 1/full$age

#Checking for duplicate rows due to weird merging situation I put myself in.
full[which(duplicated(full$modelbased)),c(1:3, 8, 20)] #11475_2/11498_2/11589_2 are duplicates.
#These ids have duplicated Daw visits, but different rest days. Taking rows with imaging data.

full <- full %>%
  filter(!(id == "11475" & visitnum == "2") & 
           !(id == "11498" & visitnum == "2") &
           !(id == "11589" & visitnum == "2"))

#Treating visitnum as numeric?
full$visitnum_numeric <- as.numeric(full$visitnum)

#Treating id as factor for og gam fitting
full$id_fac <- as.factor(full$id)

#Age groups for plotting
full <- full %>%
  mutate(age_cat = cut(age,
                       breaks = c(10,13,17,24,34),
                       labels = c("10-13 years", "14-17 years", "18-24 years", "25-34 years"),
                       include.lowest = TRUE))
```


```{r}
iron_long <- full %>%
  pivot_longer(cols = c(harox_pallidum_harm, 
                        harox_caudate_harm, 
                        harox_nacc_harm,
                        harox_putamen_harm),
               names_to = "outcome", 
               values_to = "value")
iron_long$outcome_fac <- as.factor(iron_long$outcome)
```

I dislike this approach because it deviates too much from the current design.

```{r}
#Residualize perhaps?
#as lm?
val_2 <- lmer(firststagestay ~ 1 + sex + visitnum_numeric + age_cw * age_b * harox_putamen_harm_z  + (1|id),
              data = full)
summary(val_2)
val_2 <- gam(firststagestay ~ 1 + 
               sex + 
               visitnum_numeric +
               s(age, 
                 #k  = 3, 
                 fx = FALSE) +
               harox_putamen_harm_z +
               harox_caudate_harm_z +
               harox_nacc_harm_z +
               harox_pallidum_harm_z +
              #s(harox_putamen_harm_z, 
              #  #k = 3, 
              #  fx = F) +
              ##ti(age, harox_putamen_harm_z,  fx = F) +
              #s(harox_caudate_harm_z, 
              # # k = 3, 
              #  fx = F) +
              #s(harox_nacc_harm_z, 
              # # k = 3, 
              #  fx = F) +
              #s(harox_pallidum_harm_z, 
              #  #k = 3, 
              #  fx = F) +
               #s(age, by = harox_putamen_harm, k = 3, fx = F) +
               #s(age, by = harox_caudate_harm_z, k = 3, fx = F) +
               #s(age, by = harox_nacc_harm_z, k = 3, fx = F) +
               #s(age, by = harox_pallidum_harm_z, k = 3, fx = F) +
                     s(id_fac, bs = "re"),
                     method = "REML",
                     data = full)
summary(val_2)

plot(ggpredict(val_2, terms = c("age", "harox_putamen_harm_z")))

q1 <- ggpredict(val_2, terms = c("harox_putamen_harm_z")) 
q2 <- ggpredict(val_2, terms = c("harox_caudate_harm_z")) 
q3 <- ggpredict(val_2, terms = c("harox_pallidum_harm_z")) 
q4 <- ggpredict(val_2, terms = c("harox_nacc_harm_z ")) 

q1_marg <- as.data.frame(q1)
q2_marg <- as.data.frame(q2)
q3_marg <- as.data.frame(q3)
q4_marg <- as.data.frame(q4)

#Adding ROI columns
q1_marg <- q1_marg %>%
  mutate(roi = "putamen")
q2_marg <- q2_marg %>%
  mutate(roi = "caudate")
q3_marg <- q3_marg %>%
  mutate(roi = "pallidum")
q4_marg <- q4_marg %>%
  mutate(roi = "nacc")


#Scaling before merge
q1_marg$predicted_z <- scale(q1_marg$predicted, scale = FALSE)
q2_marg$predicted_z <- scale(q2_marg$predicted, scale = FALSE)
q3_marg$predicted_z <- scale(q3_marg$predicted, scale = FALSE)
q4_marg$predicted_z <- scale(q4_marg$predicted, scale = FALSE)


q_full <- rbind(q1_marg, 
                q2_marg,
                q3_marg, 
                q4_marg
                )


ggplot(q_full, aes(x = x, y = predicted, color = roi)) +
  #geom_point(data = iron_long, aes(x = value, y = firststagestay)) +
  #geom_point() +
  geom_smooth(method = "lm", aes(group = roi)) +
  scale_x_continuous(transform = "reverse") +
  xlim(2,-2) +
  #facet_wrap(~ roi, nrow = 1) +
  theme_minimal() +
  ylab("FSS") +
  xlab("nT2*w") +
  ggtitle("First-Stage Stay")

ggplot() +
  geom_point(data = iron_long, aes(x = scale(value), y = firststagestay, colour = outcome_fac)) +
  #geom_point() +
  geom_smooth(method = "gam", data = q_full, aes(x = x, y = predicted, color = roi, group = roi)) +
  scale_x_continuous(transform = "reverse") +
  xlim(2,-2) +
  #facet_wrap(~ roi, nrow = 1) +
  theme_minimal() +
  ylab("FSS") +
  xlab("nT2*w") +
  ggtitle("First-Stage Stay")


#ggplot() +
#  geom_point(data = iron_long, aes(x = scale(value), y = firststagestay, colour = age)) #+
#  #geom_point() +
#  geom_smooth(method = "gam", data = q_full, aes(x = x, y = predicted, color = roi)) +
#  scale_x_continuous(transform = "reverse") +
#  xlim(2,-2) +
#  #facet_wrap(~ roi, nrow = 1) +
#  theme_minimal() +
#  ylab("FSS") +
#  xlab("nT2*w") +
#  ggtitle("First-Stage Stay")
```


Model 3 thingy. I like this one because I do not have to do anything else. The downside is that I have no formal test between the models explicitly.

```{r}
t1 <- gam(harox_putamen_harm ~ 1 + sex + visitnum_numeric  + 
                 s(age, k = 3, fx = F) +
                 s(mb_resid_z, k = 3, fx = F) +
                 s(fss_resid_z, k = 3, fx = F) +
                   s(id_fac, bs = "re"),
                 method = "REML",
                 data = full)
summary(t1)

t2 <- gam(harox_caudate_harm ~ 1 + sex + visitnum_numeric  + 
                 s(age, k = 3, fx = F) +
                 s(mb_resid_z, k = 3, fx = F) +
                 s(fss_resid_z, k = 3, fx = F) +
                   s(id_fac, bs = "re"),
                 method = "REML",
                 data = full)
summary(t2)

t3 <- gam(harox_pallidum_harm ~ 1 + sex + visitnum_numeric  + 
                 s(age, k = 3, fx = F) +
                 s(mb_resid_z, k = 3, fx = F) +
                 s(fss_resid_z, k = 3, fx = F) +
                   s(id_fac, bs = "re"),
                 method = "REML",
                 data = full)
summary(t3)

t4 <- gam(harox_nacc_harm ~ 1 + sex + visitnum_numeric  + 
                 s(age, k = 3, fx = F) +
                 s(mb_resid_z, k = 3, fx = F) +
                 s(fss_resid_z, k = 3, fx = F) +
                   s(id_fac, bs = "re"),
                 method = "REML",
                 data = full)
summary(t4)
```

Extracting smooths

```{r}
#fss margins
t1_marg <- ggpredict(t1, terms = c("fss_resid_z"))
t2_marg <- ggpredict(t2, terms = c("fss_resid_z"))
t3_marg <- ggpredict(t3, terms = c("fss_resid_z"))
t4_marg <- ggpredict(t4, terms = c("fss_resid_z"))

t1_marg <- as.data.frame(t1_marg)
t2_marg <- as.data.frame(t2_marg)
t3_marg <- as.data.frame(t3_marg)
t4_marg <- as.data.frame(t4_marg)

#Adding ROI columns
t1_marg <- t1_marg %>%
  mutate(roi = "putamen")
t2_marg <- t2_marg %>%
  mutate(roi = "caudate")
t3_marg <- t3_marg %>%
  mutate(roi = "pallidum")
t4_marg <- t4_marg %>%
  mutate(roi = "nacc")

#Scaling before merge
t1_marg$predicted_z <- scale(t1_marg$predicted, scale = FALSE)
t2_marg$predicted_z <- scale(t2_marg$predicted, scale = FALSE)
t3_marg$predicted_z <- scale(t3_marg$predicted, scale = FALSE)
t4_marg$predicted_z <- scale(t4_marg$predicted, scale = FALSE)


t_full <- rbind(t1_marg, t2_marg, t3_marg, t4_marg)


ggplot(t_full, aes(x = x, y = predicted_z, color = roi)) +
  #geom_point() +
  geom_smooth(method = "gam", aes(group = roi)) +
  scale_y_continuous(transform = "reverse") +
  #facet_wrap(~ roi, nrow = 1) +
  theme_minimal() +
  ylab("nT2*w") +
  xlab("FSS") +
  ggtitle("First-Stage Stay")
```

```{r}
#fss margins
t1_marg_mb <- ggpredict(t1, terms = c("mb_resid_z"))
t2_marg_mb <- ggpredict(t2, terms = c("mb_resid_z"))
t3_marg_mb <- ggpredict(t3, terms = c("mb_resid_z"))
t4_marg_mb <- ggpredict(t4, terms = c("mb_resid_z"))

t1_marg_mb <- as.data.frame(t1_marg_mb)
t2_marg_mb <- as.data.frame(t2_marg_mb)
t3_marg_mb <- as.data.frame(t3_marg_mb)
t4_marg_mb <- as.data.frame(t4_marg_mb)
#Adding ROI columns
t1_marg_mb  <- t1_marg_mb  %>%
  mutate(roi = "putamen")
t2_marg_mb  <- t2_marg_mb  %>%
  mutate(roi = "caudate")
t3_marg_mb  <- t3_marg_mb  %>%
  mutate(roi = "pallidum")
t4_marg_mb  <- t4_marg_mb  %>%
  mutate(roi = "nacc")

#Scaling before merge
t1_marg_mb$predicted_z <- scale(t1_marg_mb$predicted, scale = FALSE)
t2_marg_mb$predicted_z <- scale(t2_marg_mb$predicted, scale = FALSE)
t3_marg_mb$predicted_z <- scale(t3_marg_mb$predicted, scale = FALSE)
t4_marg_mb$predicted_z <- scale(t4_marg_mb$predicted, scale = FALSE)


t_full_mb <- rbind(t1_marg_mb , t2_marg_mb , t3_marg_mb , t4_marg_mb )


ggplot(t_full_mb, aes(x = x, y = predicted_z, color = roi)) +
  #geom_point() +
  geom_smooth(method = "gam", aes(group = roi)) +
  scale_y_continuous(transform = "reverse") +
  #facet_wrap(~ roi, nrow = 1) +
  theme_minimal() +
  ylab("nT2*w") +
  xlab("MB") +
  ggtitle("Model Based")
```


This might be the most accurate modeling approach. It does not stray too far away from the original analyses. But it also increases degrees of freedom and as such, inflates p-values a bit. Observation are now not independent, with ROI nested in person, and person nested in time.

As I did below, I could also specify random intercepts for the brain region nesting. Esentially I want 

```{r}
#This is similar to main effects models, but all at once.
val_3 <- gam(value ~ 1  + sex + visitnum_numeric +
               s(age, k = 3) +
                s(fss_resid_z, by = outcome_fac, k = 3) +
               #fss_resid_z*outcome_fac +
               #fss_resid_z*outcome_fac + #sorta works
              # w_z*outcome_fac +
              # w_z +
              # pi +
               #mb_resid_z*outcome_fac +
               #firststagestay_z*outcome_fac +
               outcome_fac +
              #modelbased_z*outcome_fac +
               #s(mb_resid_z, by = outcome_fac, k = 3) +
                     s(id_fac, bs = "re"),
                     method = "REML",
                     data = iron_long)
summary(val_3)

plot(ggpredict(val_3, terms = c("fss_resid_z", "outcome_fac")), show_ci = FALSE) +
  scale_y_continuous(transform = "reverse")

plot(ggpredict(val_3, terms = c("firststagestay_z", "outcome_fac")), show_ci = FALSE) +
  scale_y_continuous(transform = "reverse")

plot(ggpredict(val_3, terms = c("fss_resid_z", "outcome_fac")), show_ci = FALSE) +
  scale_y_continuous(transform = "reverse")
```

Linear fit instead. Note the inclusion of the extra nesting.

```{r}
r <- lmer(value ~ 1 + sex + visitnum_numeric + age * fss_resid_z  * outcome_fac + 
             (1|id:visitnum_numeric), data = iron_long)
summary(r)

plot(ggpredict(r, terms = c("age", "fss_resid_z", "outcome_fac")), show_ci = F) +
  scale_y_continuous(transform = "reverse")

r <- lmer(value ~ 1 + sex + visitnum_numeric + age + fss_resid_z * outcome_fac + 
            (1|id/outcome_fac), data = iron_long)
summary(r)

plot(ggpredict(r, terms = c("fss_resid_z", "outcome_fac")), show_ci = F) +
  scale_y_continuous(transform = "reverse")
plot(ggpredict(r, terms = c("fss_resid_z")), show_ci = F) +
  scale_y_continuous(transform = "reverse")

plot(ggpredict(r, terms = c("age", "fss_resid_z")), show_ci = F) +
  scale_y_continuous(transform = "reverse")

plot(ggpredict(r, terms = c("age", "outcome_fac")), show_ci = F) +
  scale_y_continuous(transform = "reverse")

##aov_results <- aov(value ~ firststagestay*outcome_fac + Error(id), data = iron_long)
summary(aov_results)
```