-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path7_JQAS_revisions.R
129 lines (113 loc) · 4.5 KB
/
7_JQAS_revisions.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
source("0_load_stuff.R")
#################
### Load Data ###
#################
df_war_pitSzn = read_csv("df_FWAR_GWAR_2010_2019_pf_ridge.csv") %>% arrange(PIT_NAME,YEAR)
df_war_pitSzn
pit_exits = read_csv("df_pitcher_exits_2010_2019_pf_ridge.csv")
df_war_pitExits =
pit_exits %>%
select(GAME_ID,YEAR,PIT_NAME,GWAR,INNING) %>%
arrange(PIT_NAME, YEAR, GAME_ID) %>%
left_join(df_war_pitSzn %>% select(PIT_NAME,YEAR,FWAR_FIP,FWAR_RA9,N_fg,N)) %>%
mutate(PIT_SZN = paste(YEAR, PIT_NAME)) %>%
drop_na()
df_war_pitExits
###############################################################################
### R2 Comment 19a: the variability of GWAR and FWAR from season to season? ###
###############################################################################
df_szn_by_szn_variability =
df_war_pitSzn %>%
arrange(PIT_NAME, YEAR) %>%
mutate(
GWAR_prev = lag(GWAR),
FWAR_FIP_prev = lag(FWAR_FIP),
FWAR_RA9_prev = lag(FWAR_RA9),
)
df_szn_by_szn_variability
df_szn_by_szn_variability_1 =
bind_cols(
df_szn_by_szn_variability %>%
select(PIT_NAME, YEAR, GWAR, FWAR_FIP, FWAR_RA9) %>%
pivot_longer(c(GWAR, FWAR_FIP, FWAR_RA9), names_to="metric", values_to="WAR"),
df_szn_by_szn_variability %>%
select(PIT_NAME, YEAR, GWAR_prev, FWAR_FIP_prev, FWAR_RA9_prev) %>%
pivot_longer(c(GWAR_prev, FWAR_FIP_prev, FWAR_RA9_prev), names_to="metric", values_to="WAR_prev") %>%
select(-c(PIT_NAME, YEAR, metric))
) %>%
mutate(metric = case_when(
metric == "FWAR_FIP" ~ "FWAR (FIP)",
metric == "FWAR_RA9" ~ "FWAR (RA9)",
TRUE ~ metric
))
df_szn_by_szn_variability_1
require(plyr)
lm_eqn = function(df){
df = df %>% mutate(y = WAR) %>% mutate(x = WAR_prev)
m = lm(y ~ x, df);
# browser()
eq <- substitute(
italic(slope)~"="~b,
# italic(slope)~"="~b*","~~italic(r)^2~"="~r2,
list(a = format(unname(coef(m)[1]), digits = 2),
b = format(unname(coef(m)[2]), digits = 2),
r2 = format(summary(m)$r.squared, digits = 2)))
as.character(as.expression(eq));
}
eq <- ddply(df_szn_by_szn_variability_1, .(metric), lm_eqn)
plot_szn_by_szn_variability =
df_szn_by_szn_variability_1 %>%
drop_na() %>%
ggplot(aes(x = WAR_prev, y = WAR)) +
facet_wrap(~ metric) +
geom_point(shape=21, size=1) +
stat_smooth(method="lm",se=F,color="dodgerblue2", linewidth=1.5) +
geom_text(data=eq, color="dodgerblue2",
aes(x = 4, y = 11,label=V1), size=8, parse = TRUE, inherit.aes=FALSE) +
xlab("previous season's WAR") + ylim(c(-2,12))
# plot_szn_by_szn_variability
ggsave(paste0(output_folder, "plot_szn_by_szn_WAR_variability.png"),
plot_szn_by_szn_variability, width=12, height=4)
################################################################################
# R1 Comment 3: justification about the variance that exists in some players versus others.
# Maybe a plot where a player’s game-to-game variance is shown against
# the difference between their gWAR and that of the other vendors.
################################################################################
min_num_games = 30
df_plot_var =
df_war_pitExits %>%
filter( N >= min_num_games) %>%
group_by(YEAR, PIT_NAME) %>%
mutate(
GWAR_szn = sum(GWAR),
diff_GWAR_FWARr = GWAR_szn - FWAR_RA9,
gameByGame_sd_GWAR = sd(GWAR),
) %>%
ungroup() %>%
distinct(YEAR, PIT_NAME, GWAR_szn, diff_GWAR_FWARr, gameByGame_sd_GWAR)
df_plot_var
plot_var_vs_diff =
df_plot_var %>%
ggplot(aes(x = gameByGame_sd_GWAR, y = diff_GWAR_FWARr)) +
geom_point(shape=21, size=1.5) +
ylab("Seasonal GWAR - FWAR (RA9)") +
xlab(paste0("game-by-game s.d. in pitcher GWAR")) +
# xlab(paste0("game-by-game s.d. in pitcher GWAR \n(for pitcher-seasons with at least ", min_num_games, " games)")) +
annotate("text",x=0.18,y=1.5, size=7, color="dodgerblue2", label=(
paste0("slope==",round(coef(lm(df_plot_var$diff_GWAR_FWARr~df_plot_var$gameByGame_sd_GWAR))[2],2)) ),
parse=TRUE) +
stat_smooth(method="lm",se=F,color="dodgerblue2", linewidth=2)
plot_var_vs_diff
# ggsave(paste0(output_folder,"plot_var_vs_diff.png"), plot_var_vs_diff, width=6, height=5)
# df_plot_var_stability =
# df_plot_var %>%
# # select(YEAR, PIT_NAME, gameByGame_sd_GWAR) %>%
# arrange(PIT_NAME, YEAR) %>%
# group_by(PIT_NAME) %>%
# mutate(sd_prev = lag(gameByGame_sd_GWAR))
# df_plot_var_stability
#
# df_plot_var_stability %>%
# ggplot(aes(x = sd_prev, y = gameByGame_sd_GWAR)) +
# geom_point(shape=21, size=2) +
# stat_smooth(method="lm",se=F,color="dodgerblue2", linewidth=2)