-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTwinAccountsPrepAnalysis.R
468 lines (382 loc) · 34.4 KB
/
TwinAccountsPrepAnalysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
#Twin Accounts analysis
#Data sources: TA client list, TA credit score tracking (done outside of ETO), ETO data
#Create dataset combining TA participation & score information with general FOC population (including TA clients) score info
#Create matched comparison group(s) from non-TA participants in broader FOC population, compare outcomes
library("stringr", lib.loc="C:/Program Files/R/R-3.1.1/library")
#functions####
nF<-function(x) {ifelse(is.na(x), F,x)}
countN<-function(x) {as.integer(sum(!is.na(x)))}
rmean<-function(x){mean(x,na.rm=T)}
rmeanr<-function(x){round(mean(x,na.rm=T),2)}
rmedian<-function(x){median(x,na.rm=T)}
rsum<-function(x) {sum(x,na.rm=T)}
#function to get filepath depending on whether on Mac or Windows
pathFOCData<-
function(directory,filename){
if(Sys.info()["sysname"]=="Darwin"){
paste("/Users/srhrnkn/Dropbox/LISC/FOC/FOCData/",directory,"/",filename,sep="")
}
else{
paste("C:\\\\Users\\\\SRankin\\\\Dropbox\\\\LISC\\\\FOC\\\\FOCData\\\\",directory,"\\\\",filename,sep="")
}
}
#get data: TA client list, TA credit score tracking sheet, ETO data from FOC4yr####
read.csv(pathFOCData("Twin Accounts","Twin Accts client list_no names.csv"))->ta.clients
read.csv(pathFOCData("Twin Accounts","TrackingSheet40.csv"))->ta.tracking
load(pathFOCData("FOC4yr","ScoresSortedFICO")
load(pathFOCData("FOC4yr","IndividualOutcomes")
load(pathFOCData("FOC4yr","AllVars")
#clean up formats####
names(ta.clients)<-str_replace_all(names(ta.clients),"\\.\\.",".")
names(ta.tracking)<-str_replace_all(names(ta.tracking),"\\.\\.",".")
#fix scores coming in as factors - make new, set 0s to NA
ta.tracking$ScoreEntry<-ta.tracking$Credit.Score.At.Entry
ta.tracking$ScoreEntry[ta.tracking$ScoreEntry==0]<-NA
ta.tracking$Score6mo<-as.numeric(as.character(ta.tracking$Credit.Score.6.months))
ta.tracking$Score6mo[ta.tracking$Score6mo==0]<-NA
ta.tracking$Score12mo<-as.numeric(as.character(ta.tracking$Credit.Score.12.months))
ta.tracking$Score12mo[ta.tracking$Score12mo==0]<-NA
#fix dates
ta.tracking$Loan.Closing.Date<-as.Date(ta.tracking$Loan.Closing.Date,"%m/%d/%Y")
ta.tracking$Loan.Ending.Date<-as.Date(ta.tracking$Loan.Ending.Date,"%m/%d/%Y")
#additional fields####
#calc 6 mo and 12 mo
ta.tracking$Date6mo<-ta.tracking$Loan.Closing.Date+365/2
ta.tracking$Date12mo<-ta.tracking$Loan.Closing.Date+365
#add standardized site names
read.csv(pathFOCData("Twin Accounts","sitematchingTA.csv"))->ta.sitematch
names(ta.sitematch)<-c("agency","city","SITE","Notes")
ta.clients<-merge(ta.clients,ta.sitematch[,c("agency","SITE")],by="agency",all.x=T)
ta.clients$SitePartic<-paste(ta.clients$SITE,ta.clients$ID.number,sep="")
ta.tracking.id<-merge(ta.tracking,ta.clients[,c("SITE","SitePartic","acct.")],by.x="Acct.",by.y="acct.",all.x=T)
#add account statuses
ta.tracking.id$had.ta<-!is.na(ta.tracking.id$Loan.Status)
ta.tracking.id$completed.ta<-ta.tracking.id$Loan.Status=="Completed"
ta.tracking.id$ta.group<-"No Account"
ta.tracking.id$ta.group[ta.tracking.id$Loan.Status=="Completed"]<-"Completed"
ta.tracking.id$ta.group[ta.tracking.id$Loan.Status=="Open"]<-"Open"
ta.tracking.id$ta.group[grepl("Closed",ta.tracking.id$Loan.Status)]<-"Closed Early"
ta.tracking.id$ta.group<-factor(ta.tracking.id$ta.group,levels=c("No Account","Open","Completed","Closed Early"))
#Scores########
#get more recent scores for participants with missing credit status/score
#issue is that score status is blank or got score but FICO is missing.
#first reshape ta.tracking.id - there's prob a better way to do this but:
ta.entry.scores<-ta.tracking.id[,c("SITE","SitePartic","Loan.Closing.Date","ScoreEntry")]
ta.6mo.scores<-ta.tracking.id[,c("SITE","SitePartic","Date6mo","Score6mo")]
ta.12mo.scores<-ta.tracking.id[,c("SITE","SitePartic","Date12mo","Score12mo")]
names(ta.entry.scores)<-c("SITE","SitePartic","DateCreditReport","FICOScore")
names(ta.6mo.scores)<-c("SITE","SitePartic","DateCreditReport","FICOScore")
names(ta.12mo.scores)<-c("SITE","SitePartic","DateCreditReport","FICOScore")
tascoresstacked<-rbind(ta.entry.scores,ta.6mo.scores,ta.12mo.scores)
#get rid on unfound ids
tascoresstacked<-tascoresstacked[!is.na(tascoresstacked$SitePartic)&!grepl("Not included",tascoresstacked$SITE),]
tascoresstacked<-data.frame(tascoresstacked,DateCreditReportNums=as.integer(tascoresstacked$DateCreditReport),ScoreStatusFICO=ifelse(is.na(tascoresstacked$FICOScore),"Unscored","Scored"))
tascoresstacked$SITE<-factor(tascoresstacked$SITE,levels=levels(ScoresSortedFICO$SITE))
#stack ETO and tracking score data
ScoresSortedFICO.ta<-rbind(data.frame(ScoresSortedFICO[,names(ScoresSortedFICO) %in% names(tascoresstacked)],DataSource="ETO"),data.frame(tascoresstacked,DataSource="TATracking"))
#now re-do ranking etc
ScoresSortedFICO.ta$DateCreditReportRank<-ave(ScoresSortedFICO.ta$DateCreditReportNums,ScoresSortedFICO.ta$SitePartic,FUN = function(x) rank(x, ties.method = "first"))
ScoresSortedFICO.ta$MaxCreditReportCount<-ave(ScoresSortedFICO.ta$DateCreditReportRank,ScoresSortedFICO.ta$SitePartic,FUN = max)
ScoresSortedFICO.ta$DateCreditReportRank<-as.factor(ScoresSortedFICO.ta$DateCreditReportRank)
#sort by date
ScoresSortedFICO.ta[order(ScoresSortedFICO.ta$SitePartic,ScoresSortedFICO.ta$DateCreditReportRank),]->ScoresSortedFICO.ta
#Calc first date
ScoresSortedFICO.ta$FirstCr<-as.Date(ave(ScoresSortedFICO.ta$DateCreditReportNums,ScoresSortedFICO.ta$SitePartic,FUN = min),origin="1970-01-01")
#calc days from first credit report for each subsequent report
ScoresSortedFICO.ta$DaysFromFirst<-as.numeric(ScoresSortedFICO.ta$DateCreditReport-ScoresSortedFICO.ta$FirstCr)
#Add credit category
ScoresSortedFICO.ta$cr.cat<-as.factor(with(ScoresSortedFICO.ta,(ifelse(ScoreStatusFICO=="Scored",ifelse(FICOScore>=620,"Hi","Lo"),"Unscored"))))
#Add days from enrollment (this kicks out anyone not in Demo but that's ok, it will happen later anyway)
ScoresSortedFICO.ta<-merge(ScoresSortedFICO.ta,Demo[,c("SitePartic","EarliestEnrolled")],by="SitePartic")
ScoresSortedFICO.ta$DaysFromEnrolled<-as.numeric(ScoresSortedFICO.ta$DateCreditReport-ScoresSortedFICO.ta$EarliestEnrolled)
#(some of these are earlier than first enrollment date - presumably because they are pulling old credit reports? or should I be suspicious that these folks were previously enrolled?)
#first calc distance from 6monthly markers
ScoresSortedFICO.ta$diffto6mo<-ScoresSortedFICO.ta$DaysFromEnrolled-182.5
ScoresSortedFICO.ta$diffto12mo<-ScoresSortedFICO.ta$DaysFromEnrolled-365
ScoresSortedFICO.ta$diffto18mo<-ScoresSortedFICO.ta$DaysFromEnrolled-547.5
ScoresSortedFICO.ta$diffto24mo<-ScoresSortedFICO.ta$DaysFromEnrolled-730
ScoresSortedFICO.ta$diffto30mo<-ScoresSortedFICO.ta$DaysFromEnrolled-912.5
ScoresSortedFICO.ta$diffto36mo<-ScoresSortedFICO.ta$DaysFromEnrolled-1095
ScoresSortedFICO.ta$diffto42mo<-ScoresSortedFICO.ta$DaysFromEnrolled-1277.5
ScoresSortedFICO.ta$diffto48mo<-ScoresSortedFICO.ta$DaysFromEnrolled-1460
#6month scores - take the min of abs val diff to monthly, keep if it's in the 30-day range
ScoresSortedFICO.ta.6mo<-aggregate(abs(diffto6mo)~SitePartic,ScoresSortedFICO.ta,min)
names(ScoresSortedFICO.ta.6mo)[2]<-'diffto6mo'
ScoresSortedFICO.ta.6mo<-ScoresSortedFICO.ta.6mo[which(ScoresSortedFICO.ta.6mo$diffto6mo<=30),]
ScoresSortedFICO.ta.12mo<-aggregate(abs(diffto12mo)~SitePartic,ScoresSortedFICO.ta,min)
names(ScoresSortedFICO.ta.12mo)[2]<-'diffto12mo'
ScoresSortedFICO.ta.12mo<-ScoresSortedFICO.ta.12mo[which(ScoresSortedFICO.ta.12mo$diffto12mo<=30),]
ScoresSortedFICO.ta.18mo<-aggregate(abs(diffto18mo)~SitePartic,ScoresSortedFICO.ta,min)
names(ScoresSortedFICO.ta.18mo)[2]<-'diffto18mo'
ScoresSortedFICO.ta.18mo<-ScoresSortedFICO.ta.18mo[which(ScoresSortedFICO.ta.18mo$diffto18mo<=30),]
ScoresSortedFICO.ta.24mo<-aggregate(abs(diffto24mo)~SitePartic,ScoresSortedFICO.ta,min)
names(ScoresSortedFICO.ta.24mo)[2]<-'diffto24mo'
ScoresSortedFICO.ta.24mo<-ScoresSortedFICO.ta.24mo[which(ScoresSortedFICO.ta.24mo$diffto24mo<=30),]
ScoresSortedFICO.ta.30mo<-aggregate(abs(diffto30mo)~SitePartic,ScoresSortedFICO.ta,min)
names(ScoresSortedFICO.ta.30mo)[2]<-'diffto30mo'
ScoresSortedFICO.ta.30mo<-ScoresSortedFICO.ta.30mo[which(ScoresSortedFICO.ta.30mo$diffto30mo<=30),]
ScoresSortedFICO.ta.36mo<-aggregate(abs(diffto36mo)~SitePartic,ScoresSortedFICO.ta,min)
names(ScoresSortedFICO.ta.36mo)[2]<-'diffto36mo'
ScoresSortedFICO.ta.36mo<-ScoresSortedFICO.ta.36mo[which(ScoresSortedFICO.ta.36mo$diffto36mo<=30),]
ScoresSortedFICO.ta.42mo<-aggregate(abs(diffto42mo)~SitePartic,ScoresSortedFICO.ta,min)
names(ScoresSortedFICO.ta.42mo)[2]<-'diffto42mo'
ScoresSortedFICO.ta.42mo<-ScoresSortedFICO.ta.42mo[which(ScoresSortedFICO.ta.42mo$diffto42mo<=30),]
ScoresSortedFICO.ta.48mo<-aggregate(abs(diffto48mo)~SitePartic,ScoresSortedFICO.ta,min)
names(ScoresSortedFICO.ta.48mo)[2]<-'diffto48mo'
ScoresSortedFICO.ta.48mo<-ScoresSortedFICO.ta.48mo[which(ScoresSortedFICO.ta.48mo$diffto48mo<=30),]
#assign the flags
ScoresSortedFICO.ta$appr6moFICO<-paste0(ScoresSortedFICO.ta$SitePartic,abs(ScoresSortedFICO.ta$diffto6mo)) %in% paste0(ScoresSortedFICO.ta.6mo$SitePartic,ScoresSortedFICO.ta.6mo$diffto6mo)
ScoresSortedFICO.ta$appr12moFICO<-paste0(ScoresSortedFICO.ta$SitePartic,abs(ScoresSortedFICO.ta$diffto12mo)) %in% paste0(ScoresSortedFICO.ta.12mo$SitePartic,ScoresSortedFICO.ta.12mo$diffto12mo)
ScoresSortedFICO.ta$appr18moFICO<-paste0(ScoresSortedFICO.ta$SitePartic,abs(ScoresSortedFICO.ta$diffto18mo)) %in% paste0(ScoresSortedFICO.ta.18mo$SitePartic,ScoresSortedFICO.ta.18mo$diffto18mo)
ScoresSortedFICO.ta$appr24moFICO<-paste0(ScoresSortedFICO.ta$SitePartic,abs(ScoresSortedFICO.ta$diffto24mo)) %in% paste0(ScoresSortedFICO.ta.24mo$SitePartic,ScoresSortedFICO.ta.24mo$diffto24mo)
ScoresSortedFICO.ta$appr30moFICO<-paste0(ScoresSortedFICO.ta$SitePartic,abs(ScoresSortedFICO.ta$diffto30mo)) %in% paste0(ScoresSortedFICO.ta.30mo$SitePartic,ScoresSortedFICO.ta.30mo$diffto30mo)
ScoresSortedFICO.ta$appr36moFICO<-paste0(ScoresSortedFICO.ta$SitePartic,abs(ScoresSortedFICO.ta$diffto36mo)) %in% paste0(ScoresSortedFICO.ta.36mo$SitePartic,ScoresSortedFICO.ta.36mo$diffto36mo)
ScoresSortedFICO.ta$appr42moFICO<-paste0(ScoresSortedFICO.ta$SitePartic,abs(ScoresSortedFICO.ta$diffto42mo)) %in% paste0(ScoresSortedFICO.ta.42mo$SitePartic,ScoresSortedFICO.ta.42mo$diffto42mo)
ScoresSortedFICO.ta$appr48moFICO<-paste0(ScoresSortedFICO.ta$SitePartic,abs(ScoresSortedFICO.ta$diffto48mo)) %in% paste0(ScoresSortedFICO.ta.48mo$SitePartic,ScoresSortedFICO.ta.48mo$diffto48mo)
#remove temp objs
rm(ScoresSortedFICO.ta.6mo)
rm(ScoresSortedFICO.ta.12mo)
rm(ScoresSortedFICO.ta.18mo)
rm(ScoresSortedFICO.ta.24mo)
rm(ScoresSortedFICO.ta.30mo)
rm(ScoresSortedFICO.ta.36mo)
rm(ScoresSortedFICO.ta.42mo)
rm(ScoresSortedFICO.ta.48mo)
#could also add thin or thick here but would have to go back to Credit to grab it
#redo first/last. or figure out when the TA usuall falls after prog enrollment and get that one?? or??
#calc first and last
taincl.ind<-ScoresSortedFICO.ta[ScoresSortedFICO.ta$DateCreditReportRank==1,c(1:4,6:7,12)]
names(taincl.ind)[3:7]<-c("FirstCr","FirstFICOScore","FirstScoreStatusFICO","FirstDataSource","First.cr.cat")
taincl.ind<-merge(taincl.ind,ScoresSortedFICO.ta[ScoresSortedFICO.ta$DateCreditReportRank==ScoresSortedFICO.ta$MaxCreditReportCount&ScoresSortedFICO.ta$MaxCreditReportCount!=1,c(2:4,6:7,12,9,11)],by="SitePartic",all.x=T)
names(taincl.ind)[8:length(names(taincl.ind))]<-c("LastCr","LastFICOScore","LastScoreStatusFICO","LastDataSource","Last.cr.cat","LastCreditReportCount","LastDaysFromFirst")
#rename these so they can be distinguished from the IndividualOutcomes vars
names(taincl.ind)[3:length(names(taincl.ind))]<-paste("taincl",names(taincl.ind)[3:length(names(taincl.ind))],sep=".")
#add sixmonthly fields: date, Score status, score
taincl.ind<-merge(taincl.ind,unique(ScoresSortedFICO.ta[ScoresSortedFICO.ta$appr6moFICO==T,c(1,3,6,4)]),by='SitePartic',all.x=T)
names(taincl.ind)[(length(names(taincl.ind))-2):length(names(taincl.ind))]<-c('taincl.Date6mo','taincl.ScoreStatus6mo','taincl.FICOScore6mo')
taincl.ind<-merge(taincl.ind,unique(ScoresSortedFICO.ta[ScoresSortedFICO.ta$appr12moFICO==T,c(1,3,6,4)]),by='SitePartic',all.x=T)
names(taincl.ind)[(length(names(taincl.ind))-2):length(names(taincl.ind))]<-c('taincl.Date12mo','taincl.ScoreStatus12mo','taincl.FICOScore12mo')
taincl.ind<-merge(taincl.ind,unique(ScoresSortedFICO.ta[ScoresSortedFICO.ta$appr18moFICO==T,c(1,3,6,4)]),by='SitePartic',all.x=T)
names(taincl.ind)[(length(names(taincl.ind))-2):length(names(taincl.ind))]<-c('taincl.Date18mo','taincl.ScoreStatus18mo','taincl.FICOScore18mo')
taincl.ind<-merge(taincl.ind,unique(ScoresSortedFICO.ta[ScoresSortedFICO.ta$appr24moFICO==T,c(1,3,6,4)]),by='SitePartic',all.x=T)
names(taincl.ind)[(length(names(taincl.ind))-2):length(names(taincl.ind))]<-c('taincl.Date24mo','taincl.ScoreStatus24mo','taincl.FICOScore24mo')
taincl.ind<-merge(taincl.ind,unique(ScoresSortedFICO.ta[ScoresSortedFICO.ta$appr30moFICO==T,c(1,3,6,4)]),by='SitePartic',all.x=T)
names(taincl.ind)[(length(names(taincl.ind))-2):length(names(taincl.ind))]<-c('taincl.Date30mo','taincl.ScoreStatus30mo','taincl.FICOScore30mo')
taincl.ind<-merge(taincl.ind,unique(ScoresSortedFICO.ta[ScoresSortedFICO.ta$appr36moFICO==T,c(1,3,6,4)]),by='SitePartic',all.x=T)
names(taincl.ind)[(length(names(taincl.ind))-2):length(names(taincl.ind))]<-c('taincl.Date36mo','taincl.ScoreStatus36mo','taincl.FICOScore36mo')
taincl.ind<-merge(taincl.ind,unique(ScoresSortedFICO.ta[ScoresSortedFICO.ta$appr42moFICO==T,c(1,3,6,4)]),by='SitePartic',all.x=T)
names(taincl.ind)[(length(names(taincl.ind))-2):length(names(taincl.ind))]<-c('taincl.Date42mo','taincl.ScoreStatus42mo','taincl.FICOScore42mo')
taincl.ind<-merge(taincl.ind,unique(ScoresSortedFICO.ta[ScoresSortedFICO.ta$appr48moFICO==T,c(1,3,6,4)]),by='SitePartic',all.x=T)
names(taincl.ind)[(length(names(taincl.ind))-2):length(names(taincl.ind))]<-c('taincl.Date48mo','taincl.ScoreStatus48mo','taincl.FICOScore48mo')
#manually kill two duped records for now (taking the later in cases where there are symetrically timed reports)
taincl.ind<-taincl.ind[-c(30190,13250),]
#Now do the same for the TA group based on days from TA opening
#merge ScoresSortedFICO.ta with ta.tracking.id fields to get account opening dates
#three accounts have duped records - all because they have multiple accounts - leave these out
#Emerge124516 has two accounts - one closed early, one completed
#same with Metro107555 but both closed early
#NLEN145111 one closed early, one open
doubleTA<-data.frame(SitePartic=c("Emerge124516","Metro107555","NLEN145111"),DateEarlierTA=as.Date(c("2014-02-05","2013-03-25","2014-09-16")))
ScoresSortedFICO.ta.holders<-merge(ta.tracking.id[!(paste0(ta.tracking.id$SitePartic,ta.tracking.id$Loan.Closing.Date) %in% paste0(doubleTA$SitePartic,doubleTA$DateEarlierTA)),c("SitePartic","Loan.Closing.Date","Loan.Ending.Date","had.ta","completed.ta","ta.group")],ScoresSortedFICO.ta)
ScoresSortedFICO.ta.holders$DaysFromTA<-as.numeric(ScoresSortedFICO.ta.holders$DateCreditReport-ScoresSortedFICO.ta.holders$Loan.Closing.Date)
#first calc distance from 6monthly markers
ScoresSortedFICO.ta.holders$TAdiffto6mo<-ScoresSortedFICO.ta.holders$DaysFromTA-182.5
ScoresSortedFICO.ta.holders$TAdiffto12mo<-ScoresSortedFICO.ta.holders$DaysFromTA-365
ScoresSortedFICO.ta.holders$TAdiffto18mo<-ScoresSortedFICO.ta.holders$DaysFromTA-547.5
ScoresSortedFICO.ta.holders$TAdiffto24mo<-ScoresSortedFICO.ta.holders$DaysFromTA-730
ScoresSortedFICO.ta.holders$TAdiffto30mo<-ScoresSortedFICO.ta.holders$DaysFromTA-912.5
ScoresSortedFICO.ta.holders$TAdiffto36mo<-ScoresSortedFICO.ta.holders$DaysFromTA-1095
ScoresSortedFICO.ta.holders$TAdiffto42mo<-ScoresSortedFICO.ta.holders$DaysFromTA-1277.5
ScoresSortedFICO.ta.holders$TAdiffto48mo<-ScoresSortedFICO.ta.holders$DaysFromTA-1460
#6month scores - take the min of abs val TAdiff to monthly, keep if it's in the 30-day range
ScoresSortedFICO.ta.holders.6mo<-aggregate(abs(TAdiffto6mo)~SitePartic,ScoresSortedFICO.ta.holders,min)
names(ScoresSortedFICO.ta.holders.6mo)[2]<-'TAdiffto6mo'
ScoresSortedFICO.ta.holders.6mo<-ScoresSortedFICO.ta.holders.6mo[which(ScoresSortedFICO.ta.holders.6mo$TAdiffto6mo<=30),]
ScoresSortedFICO.ta.holders.12mo<-aggregate(abs(TAdiffto12mo)~SitePartic,ScoresSortedFICO.ta.holders,min)
names(ScoresSortedFICO.ta.holders.12mo)[2]<-'TAdiffto12mo'
ScoresSortedFICO.ta.holders.12mo<-ScoresSortedFICO.ta.holders.12mo[which(ScoresSortedFICO.ta.holders.12mo$TAdiffto12mo<=30),]
ScoresSortedFICO.ta.holders.18mo<-aggregate(abs(TAdiffto18mo)~SitePartic,ScoresSortedFICO.ta.holders,min)
names(ScoresSortedFICO.ta.holders.18mo)[2]<-'TAdiffto18mo'
ScoresSortedFICO.ta.holders.18mo<-ScoresSortedFICO.ta.holders.18mo[which(ScoresSortedFICO.ta.holders.18mo$TAdiffto18mo<=30),]
ScoresSortedFICO.ta.holders.24mo<-aggregate(abs(TAdiffto24mo)~SitePartic,ScoresSortedFICO.ta.holders,min)
names(ScoresSortedFICO.ta.holders.24mo)[2]<-'TAdiffto24mo'
ScoresSortedFICO.ta.holders.24mo<-ScoresSortedFICO.ta.holders.24mo[which(ScoresSortedFICO.ta.holders.24mo$TAdiffto24mo<=30),]
ScoresSortedFICO.ta.holders.30mo<-aggregate(abs(TAdiffto30mo)~SitePartic,ScoresSortedFICO.ta.holders,min)
names(ScoresSortedFICO.ta.holders.30mo)[2]<-'TAdiffto30mo'
ScoresSortedFICO.ta.holders.30mo<-ScoresSortedFICO.ta.holders.30mo[which(ScoresSortedFICO.ta.holders.30mo$TAdiffto30mo<=30),]
ScoresSortedFICO.ta.holders.36mo<-aggregate(abs(TAdiffto36mo)~SitePartic,ScoresSortedFICO.ta.holders,min)
names(ScoresSortedFICO.ta.holders.36mo)[2]<-'TAdiffto36mo'
ScoresSortedFICO.ta.holders.36mo<-ScoresSortedFICO.ta.holders.36mo[which(ScoresSortedFICO.ta.holders.36mo$TAdiffto36mo<=30),]
ScoresSortedFICO.ta.holders.42mo<-aggregate(abs(TAdiffto42mo)~SitePartic,ScoresSortedFICO.ta.holders,min)
names(ScoresSortedFICO.ta.holders.42mo)[2]<-'TAdiffto42mo'
ScoresSortedFICO.ta.holders.42mo<-ScoresSortedFICO.ta.holders.42mo[which(ScoresSortedFICO.ta.holders.42mo$TAdiffto42mo<=30),]
ScoresSortedFICO.ta.holders.48mo<-aggregate(abs(TAdiffto48mo)~SitePartic,ScoresSortedFICO.ta.holders,min)
names(ScoresSortedFICO.ta.holders.48mo)[2]<-'TAdiffto48mo'
ScoresSortedFICO.ta.holders.48mo<-ScoresSortedFICO.ta.holders.48mo[which(ScoresSortedFICO.ta.holders.48mo$TAdiffto48mo<=30),]
#assign the flags
ScoresSortedFICO.ta.holders$TAappr6moFICO<-paste0(ScoresSortedFICO.ta.holders$SitePartic,abs(ScoresSortedFICO.ta.holders$TAdiffto6mo)) %in% paste0(ScoresSortedFICO.ta.holders.6mo$SitePartic,ScoresSortedFICO.ta.holders.6mo$TAdiffto6mo)
ScoresSortedFICO.ta.holders$TAappr12moFICO<-paste0(ScoresSortedFICO.ta.holders$SitePartic,abs(ScoresSortedFICO.ta.holders$TAdiffto12mo)) %in% paste0(ScoresSortedFICO.ta.holders.12mo$SitePartic,ScoresSortedFICO.ta.holders.12mo$TAdiffto12mo)
ScoresSortedFICO.ta.holders$TAappr18moFICO<-paste0(ScoresSortedFICO.ta.holders$SitePartic,abs(ScoresSortedFICO.ta.holders$TAdiffto18mo)) %in% paste0(ScoresSortedFICO.ta.holders.18mo$SitePartic,ScoresSortedFICO.ta.holders.18mo$TAdiffto18mo)
ScoresSortedFICO.ta.holders$TAappr24moFICO<-paste0(ScoresSortedFICO.ta.holders$SitePartic,abs(ScoresSortedFICO.ta.holders$TAdiffto24mo)) %in% paste0(ScoresSortedFICO.ta.holders.24mo$SitePartic,ScoresSortedFICO.ta.holders.24mo$TAdiffto24mo)
ScoresSortedFICO.ta.holders$TAappr30moFICO<-paste0(ScoresSortedFICO.ta.holders$SitePartic,abs(ScoresSortedFICO.ta.holders$TAdiffto30mo)) %in% paste0(ScoresSortedFICO.ta.holders.30mo$SitePartic,ScoresSortedFICO.ta.holders.30mo$TAdiffto30mo)
ScoresSortedFICO.ta.holders$TAappr36moFICO<-paste0(ScoresSortedFICO.ta.holders$SitePartic,abs(ScoresSortedFICO.ta.holders$TAdiffto36mo)) %in% paste0(ScoresSortedFICO.ta.holders.36mo$SitePartic,ScoresSortedFICO.ta.holders.36mo$TAdiffto36mo)
ScoresSortedFICO.ta.holders$TAappr42moFICO<-paste0(ScoresSortedFICO.ta.holders$SitePartic,abs(ScoresSortedFICO.ta.holders$TAdiffto42mo)) %in% paste0(ScoresSortedFICO.ta.holders.42mo$SitePartic,ScoresSortedFICO.ta.holders.42mo$TAdiffto42mo)
ScoresSortedFICO.ta.holders$TAappr48moFICO<-paste0(ScoresSortedFICO.ta.holders$SitePartic,abs(ScoresSortedFICO.ta.holders$TAdiffto48mo)) %in% paste0(ScoresSortedFICO.ta.holders.48mo$SitePartic,ScoresSortedFICO.ta.holders.48mo$TAdiffto48mo)
#remove temp objs
rm(ScoresSortedFICO.ta.holders.6mo)
rm(ScoresSortedFICO.ta.holders.12mo)
rm(ScoresSortedFICO.ta.holders.18mo)
rm(ScoresSortedFICO.ta.holders.24mo)
rm(ScoresSortedFICO.ta.holders.30mo)
rm(ScoresSortedFICO.ta.holders.36mo)
rm(ScoresSortedFICO.ta.holders.42mo)
rm(ScoresSortedFICO.ta.holders.48mo)
#wrap up to ind level: date, Score status, score for each 6mo
scores.ta.holders.ind<-ta.tracking.id[!(paste0(ta.tracking.id$SitePartic,ta.tracking.id$Loan.Closing.Date) %in% paste0(doubleTA$SitePartic,doubleTA$DateEarlierTA))&(ta.tracking.id$SitePartic %in% ScoresSortedFICO.ta.holders$SitePartic),c("SitePartic","Loan.Closing.Date","Loan.Ending.Date","had.ta","completed.ta","ta.group")]
scores.ta.holders.ind<-merge(scores.ta.holders.ind,unique(ScoresSortedFICO.ta.holders[ScoresSortedFICO.ta.holders$TAappr6moFICO==T,c(1,8,11,9)]),by='SitePartic',all.x=T)
names(scores.ta.holders.ind)[(length(names(scores.ta.holders.ind))-2):length(names(scores.ta.holders.ind))]<-c('taincl.Date6mo.TA','taincl.ScoreStatus6mo.TA','taincl.FICOScore6mo.TA')
scores.ta.holders.ind<-merge(scores.ta.holders.ind,unique(ScoresSortedFICO.ta.holders[ScoresSortedFICO.ta.holders$TAappr12moFICO==T,c(1,8,11,9)]),by='SitePartic',all.x=T)
names(scores.ta.holders.ind)[(length(names(scores.ta.holders.ind))-2):length(names(scores.ta.holders.ind))]<-c('taincl.Date12mo.TA','taincl.ScoreStatus12mo.TA','taincl.FICOScore12mo.TA')
scores.ta.holders.ind<-merge(scores.ta.holders.ind,unique(ScoresSortedFICO.ta.holders[ScoresSortedFICO.ta.holders$TAappr18moFICO==T,c(1,8,11,9)]),by='SitePartic',all.x=T)
names(scores.ta.holders.ind)[(length(names(scores.ta.holders.ind))-2):length(names(scores.ta.holders.ind))]<-c('taincl.Date18mo.TA','taincl.ScoreStatus18mo.TA','taincl.FICOScore18mo.TA')
scores.ta.holders.ind<-merge(scores.ta.holders.ind,unique(ScoresSortedFICO.ta.holders[ScoresSortedFICO.ta.holders$TAappr24moFICO==T,c(1,8,11,9)]),by='SitePartic',all.x=T)
names(scores.ta.holders.ind)[(length(names(scores.ta.holders.ind))-2):length(names(scores.ta.holders.ind))]<-c('taincl.Date24mo.TA','taincl.ScoreStatus24mo.TA','taincl.FICOScore24mo.TA')
scores.ta.holders.ind<-merge(scores.ta.holders.ind,unique(ScoresSortedFICO.ta.holders[ScoresSortedFICO.ta.holders$TAappr30moFICO==T,c(1,8,11,9)]),by='SitePartic',all.x=T)
names(scores.ta.holders.ind)[(length(names(scores.ta.holders.ind))-2):length(names(scores.ta.holders.ind))]<-c('taincl.Date30mo.TA','taincl.ScoreStatus30mo.TA','taincl.FICOScore30mo.TA')
scores.ta.holders.ind<-merge(scores.ta.holders.ind,unique(ScoresSortedFICO.ta.holders[ScoresSortedFICO.ta.holders$TAappr36moFICO==T,c(1,8,11,9)]),by='SitePartic',all.x=T)
names(scores.ta.holders.ind)[(length(names(scores.ta.holders.ind))-2):length(names(scores.ta.holders.ind))]<-c('taincl.Date36mo.TA','taincl.ScoreStatus36mo.TA','taincl.FICOScore36mo.TA')
scores.ta.holders.ind<-merge(scores.ta.holders.ind,unique(ScoresSortedFICO.ta.holders[ScoresSortedFICO.ta.holders$TAappr42moFICO==T,c(1,8,11,9)]),by='SitePartic',all.x=T)
names(scores.ta.holders.ind)[(length(names(scores.ta.holders.ind))-2):length(names(scores.ta.holders.ind))]<-c('taincl.Date42mo.TA','taincl.ScoreStatus42mo.TA','taincl.FICOScore42mo.TA')
scores.ta.holders.ind<-merge(scores.ta.holders.ind,unique(ScoresSortedFICO.ta.holders[ScoresSortedFICO.ta.holders$TAappr48moFICO==T,c(1,8,11,9)]),by='SitePartic',all.x=T)
names(scores.ta.holders.ind)[(length(names(scores.ta.holders.ind))-2):length(names(scores.ta.holders.ind))]<-c('taincl.Date48mo.TA','taincl.ScoreStatus48mo.TA','taincl.FICOScore48mo.TA')
#ta.ch merges all the Individual outcomes data with the ta tracking data
ta.ch<-merge(IndividualOutcomes[,as.character(AllVars$Name[AllVars$in.IndOut==T])],ta.tracking.id,by=c("SITE","SitePartic"),all.x=T)
#and then also add the taincl version of the credit vars
ta.ch<-merge(ta.ch,taincl.ind, by=c("SitePartic","SITE"),all.x=T)
#temp line to add the new sixmonthly vars - starting over this would be included in above
#ta.ch<-merge(ta.ch,taincl.ind[,c("SitePartic",names(taincl.ind)[!names(taincl.ind) %in% names(ta.ch)])],by="SitePartic",all.x=T)
#add the 6monthly TA vars
ta.ch<-merge(ta.ch,scores.ta.holders.ind[,c("SitePartic",names(scores.ta.holders.ind)[!names(scores.ta.holders.ind) %in% names(ta.ch)])],by="SitePartic",all.x=T)
#everyone not in the ta.tracking group has no account
ta.ch$ta.group[is.na(ta.ch$ta.group)]<-"No Account"
#create boolean for whether they completed and whether they have an account at all
ta.ch$is.completed<-ta.ch$ta.group=="Completed"
ta.ch$is.ta<-ta.ch$ta.group!="No Account"
#create group for closed early. can't have NAs because using for match.
ta.ch$is.closed.early.ind<-ta.ch$ta.group=="Closed Early"
ta.ch$is.closed.early.ind[is.na(ta.ch$is.closed.early.ind)]<-F
#create final sixmonthly vars that take the .TA version for TA folks and the non .TA version for non-TA
ta.ch$taincl.FICOScore6mo.byista<-ifelse(ta.ch[,"is.ta"],ta.ch$taincl.FICOScore6mo.TA,ta.ch$taincl.FICOScore6mo)
ta.ch$taincl.FICOScore12mo.byista<-ifelse(ta.ch[,"is.ta"],ta.ch$taincl.FICOScore12mo.TA,ta.ch$taincl.FICOScore12mo)
ta.ch$taincl.FICOScore18mo.byista<-ifelse(ta.ch[,"is.ta"],ta.ch$taincl.FICOScore18mo.TA,ta.ch$taincl.FICOScore18mo)
ta.ch$taincl.FICOScore24mo.byista<-ifelse(ta.ch[,"is.ta"],ta.ch$taincl.FICOScore24mo.TA,ta.ch$taincl.FICOScore24mo)
ta.ch$taincl.FICOScore30mo.byista<-ifelse(ta.ch[,"is.ta"],ta.ch$taincl.FICOScore30mo.TA,ta.ch$taincl.FICOScore30mo)
ta.ch$taincl.FICOScore36mo.byista<-ifelse(ta.ch[,"is.ta"],ta.ch$taincl.FICOScore36mo.TA,ta.ch$taincl.FICOScore36mo)
ta.ch$taincl.FICOScore42mo.byista<-ifelse(ta.ch[,"is.ta"],ta.ch$taincl.FICOScore42mo.TA,ta.ch$taincl.FICOScore42mo)
ta.ch$taincl.FICOScore48mo.byista<-ifelse(ta.ch[,"is.ta"],ta.ch$taincl.FICOScore48mo.TA,ta.ch$taincl.FICOScore48mo)
#create credit category
ta.ch$First.cr.cat<-as.factor(with(ta.ch,(ifelse(FirstScoreStatus=="No score available (insufficient credit history)","Unscored",as.character(FirstFICOCat)))))
ta.ch$ta.start.cr.cat<-factor(NA,levels = levels(ta.ch$First.cr.cat))
ta.ch$ta.start.cr.cat[ta.ch$is.ta==T]<-as.factor(with(ta.ch[ta.ch$is.ta==T,],(ifelse(is.na(ScoreEntry),"Unscored",ifelse(ScoreEntry>=620,"Hi","Lo")))))
ta.ch$FirstCrCloseLag<-as.numeric(ta.ch$Loan.Closing.Date-ta.ch$FirstCr)
ta.ch$LoanEndLastCrLag<-as.numeric(ta.ch$LastCr-ta.ch$Loan.Ending.Date)
ta.ch$CloseLastCrLag<-as.numeric(ta.ch$LastCr-ta.ch$Loan.Closing.Date)
#add indicator to ta.ch for cases where the TA is completed AND the last Credit date is after the ending date
ta.ch$is.compl.timingok<-ta.ch$is.completed&ta.ch$LoanEndLastCrLag>=0&ta.ch$FirstCrCloseLag>=0
ta.ch$is.compl.timingok[is.na(ta.ch$is.compl.timingok)]<-F
#redo this for taincl versions of vars
#taincl.First.cr.cat already exists
ta.ch$taincl.FirstCrCloseLag<-as.numeric(ta.ch$Loan.Closing.Date-ta.ch$taincl.FirstCr)
ta.ch$taincl.LoanEndLastCrLag<-as.numeric(ta.ch$taincl.LastCr-ta.ch$Loan.Ending.Date)
ta.ch$taincl.CloseLastCrLag<-as.numeric(ta.ch$taincl.LastCr-ta.ch$Loan.Closing.Date)
#add indicator to ta.ch for cases where the TA is completed AND the last Credit date is after the ending date. But maybe we don't care about this now - the only cases where this test doesn't come out positive are the ones where the loan ending date is later than a year after the opening.
ta.ch$taincl.is.compl.timingok<-ta.ch$is.completed&ta.ch$taincl.LoanEndLastCrLag>=&ta.ch$taincl.FirstCrCloseLag>=0
ta.ch$taincl.is.compl.timingok[is.na(ta.ch$taincl.is.compl.timingok)]<-F
#compute changes for taincl versions
ta.ch$taincl.FICOCh<-ta.ch$taincl.LastFICOScore-ta.ch$taincl.FirstFICOScore
ta.ch$taincl.FICOInc<-ta.ch$taincl.FICOCh>0
ta.ch$taincl.BecameScored<-ta.ch$taincl.FirstScoreStatusFICO=="Unscored"&ta.ch$taincl.LastScoreStatusFICO!="Unscored"
ta.ch$taincl.BecameScored[ta.ch$taincl.FirstScoreStatusFICO=="Scored"]<-NA
ta.ch$taincl.First.has.score<-ifelse(ta.ch$taincl.FirstScoreStatusFICO=="Scored",1,0)
ta.ch$taincl.Last.has.score<-ifelse(ta.ch$taincl.LastScoreStatusFICO=="Scored",1,0)
#look at dates for completed
ta.datef<-ta.ch[ta.ch$is.completed,c("SitePartic","FirstCr","LastCr","First.cr.cat","FirstFICOClean","LastFICOClean","FICOCh","Loan.Closing.Date","Date12mo","Loan.Ending.Date","ScoreEntry","Score12mo","LastDateCreditReportRank")]
ta.datef$FirstCrCloseLag<-as.numeric(ta.datef$Loan.Closing.Date-ta.datef$FirstCr)
ta.datef$LoanEndLastCrLag<-as.numeric(ta.datef$LastCr-ta.datef$Loan.Ending.Date)
#Add first last lag
ta.ch$taincl.FirstLastFICOLag<-as.numeric(ta.ch$taincl.LastCr-ta.ch$taincl.FirstCr)
#add additional vars needed for match
ta.ch$BankrBase<-ta.ch$SitePartic %in% Baseline$SitePartic[Baseline$InBankruptcy=="Yes"]
#note: FICOCh calcs are showing zero for cases where there is only ever one FICO pull so it should be null - taincl version of FICO Ch handles this ok, just not FICOCh itself, don't use
#Analysis ####
#Analyze pre and post outcomes for TA account holders & compare to matched comparison group.
library("MatchIt", lib.loc="C:/Program Files/R/R-3.1.1/library")
#declare function for comparing outcome vars. df is the data frame to analyze. depvars is a character vector of names of dependent variables. deptypes is a vector with the dependent variable types - logical (boolean) or numeric. indvar is the independent variable. Returns a data frame with the means, medians, and Ns for dependent variables grouped by independent variable, and the result of either an anova or chi sq sig test for each dependent variable (depending on data type). To be used after matching is performed.
OutcomeDF<-function(df,depvars,deptypes,indvar){
tempby<-by(data = df,INDICES = df[,indvar],FUN = function(x){rMnMedNr(x,depvars)})
tempsigs<-numeric()
for(i in 1:length(depvars)){
if(deptypes[i]=="boolean"){
tempsigs<-c(tempsigs,chisq.test(table(df[,indvar],df[,depvars[i]]))$p.value)
}
else{
tempsigs<-c(tempsigs,summary(aov(as.formula(paste0(depvars[i],"~",indvar)),df))[[1]][[5]][[1]][[1]])
}
}
tempdf<-cbind(as.data.frame(tempby[[1]]),as.data.frame(tempby[[2]]),Sigs=round(tempsigs,4))
names(tempdf)<-c(paste0(names(as.data.frame(tempby[[1]])),".Comp"),paste0(names(as.data.frame(tempby[[2]])),".Tr"),"Sig")
tempdf
}
#Analysis prior to matching####
#Get list of TA analysis outcome vars. Can always change/add to this.
first.last.ta.outcome.vars<-c("taincl.FirstFICOScore","taincl.LastFICOScore","taincl.First.has.score","taincl.Last.has.score","taincl.FICOCh","taincl.FICOInc","taincl.BecameScored")
first.last.ta.outcome.vartypes<-c("numeric","numeric","boolean","boolean","numeric","boolean","boolean")
sixmonthly.ta.outcomevars<-c("taincl.FICOScore6mo.byista" , "taincl.FICOScore12mo.byista" ,"taincl.FICOScore18mo.byista" ,"taincl.FICOScore24mo.byista","taincl.FICOScore30mo.byista", "taincl.FICOScore36mo.byista" ,"taincl.FICOScore42mo.byista", "taincl.FICOScore48mo.byista")
sixmonthly.ta.outcomevartypes<-rep("numeric",8)
#outcome vars by has TA - all data
firstlast.ta.ch.byallTA<-OutcomeDF(ta.ch,first.last.ta.outcome.vars,first.last.ta.outcome.vartypes,"is.ta")
firstlast.ta.ch.bycomplTA<-OutcomeDF(ta.ch,first.last.ta.outcome.vars,first.last.ta.outcome.vartypes,"is.compl.timingok")
sixmonthly.ta.ch.byallTA<-OutcomeDF(ta.ch,sixmonthly.ta.outcomevars,sixmonthly.ta.outcomevartypes,"is.ta")
sixmonthly.ta.ch.bycomplTA<-OutcomeDF(ta.ch,sixmonthly.ta.outcomevars,sixmonthly.ta.outcomevartypes,"is.compl.timingok")
##Create matched comparison groups####
#start counter - this is so can store results & their iteration number - these are in object called matchiter - can always check back to see what filter, ind var, dep vars each iteration used. (All of this would prob be easier with broom!)
#matchiternum<-1
#start data frame
#matchiter<-data.frame(Iter=matchiternum,Vars=paste("'",ta.matchvars,collapse="','","'"),Treatment="is.completed",Notes="",Filter="")
#write function to revert to previous iter and rerun code
##this is all goofy because of string issues
# ta.setmatch<-function(x){
# ta.matchvars<-as.character(matchiter$Vars[matchiter$Iter==x])
# ta.treatment<-matchiter$Treatment[matchiter$Iter==x]
# ta.filter<-matchiter$Filter[matchiter$Iter==x]
# }
#increment counter up
matchiternum<-matchiternum + 1
#set match vars and treatment group (edit this by hand for each new model)
ta.matchvars<-c("Age","RaceEthnicity","CriminalConvictions","Education1","Gender","taincl.First.cr.cat","AnnualHHIncome")
ta.treatment<-"taincl.is.compl.timingok"
ta.filter<-'ta.ch$SITE!="FocusHope"&!grepl("Indy",ta.ch$SITE)&(ta.ch$is.ta==T|ta.ch$BankrBase==F)&(ta.ch$is.ta==F|ta.ch[,ta.treatment]==T)&(ta.ch$SitePartic %in% IndividualOutcomes$SitePartic)&ta.ch$taincl.FirstLastFICOLag>365'
#record matchvars
matchiter<-rbind(matchiter,data.frame(Iter=matchiternum,Vars=paste("'",ta.matchvars,collapse="','","'"),Treatment=ta.treatment,Filter=ta.filter,Notes="same as 27 but AnnualHHIncome instead of First Net Income"))
#create dataset which doesn't have any NAs for any match vars for matchit to work
ta.match<-ta.ch[eval(parse(text=ta.filter)),][apply(ta.ch[eval(parse(text=ta.filter)),ta.matchvars],MARGIN = 1,FUN=function(x){sum(is.na(x))==0}),c(ta.matchvars,ta.treatment,"SitePartic")]
match.data(matchit(as.formula(paste(ta.treatment,"~",paste(ta.matchvars,collapse = "+"),sep="")),ta.match,method="nearest",ratio=10))->matchdata
#merge outcome data in
merge(matchdata,ta.ch[,c("SitePartic",names(ta.ch)[!names(ta.ch) %in% names(matchdata)])],by="SitePartic")->ta.postmatch
#archive postmatch
assign(paste0("ta.postmatch",matchiternum),value = ta.postmatch)
#repeat analysis with match data
#outcome vars - match data
firstlast.ta.postmatch.byTr<-OutcomeDF(ta.postmatch,first.last.ta.outcome.vars,first.last.ta.outcome.vartypes,ta.treatment)
sixmonthly.ta.postmatch.byTr<-OutcomeDF(ta.postmatch,sixmonthly.ta.outcomevars,sixmonthly.ta.outcomevartypes,ta.treatment)
#archive this comparison
assign(paste0("firstlast.ta.postmatch.byTr",matchiternum),firstlast.ta.postmatch.byTr)
assign(paste0("sixmonthly.ta.postmatch.byTr",matchiternum),sixmonthly.ta.postmatch.byTr)
#figure out some way to document the model here and incorporate it in the file name?
#write.csv(ta.postmatch.scores.6mo.byTr,file=pathFOCData("FOC4yr",paste("TAbyTrPostmatch",matchiternum,".csv",sep="")))
#write(paste("Treatment def:",ta.treatment,"\n","Matching vars:",paste(ta.matchvars,collapse=",")),file=pathFOCData("FOC4yr",paste("TAbyTrPostmatchDesc",matchiternum,".txt",sep="")))
#try controlling for stuff rather than just doing comparison of means - doesn't change anything
#summary(glm(Placement~VocOrBridge+Age+RaceEthnicity+Education1,Bridgepostmatch,family="binomial"))
#summary(glm(Ret180DayUnSub~VocOrBridge+Age+RaceEthnicity+Education1,Bridgepostmatch,family="binomial"))
#summary(lm(lastwagemultjobs~VocOrBridge+Age+RaceEthnicity+Education1,Bridgepostmatch))
#summary(lm(wagediffmultjobs~VocOrBridge+Age+RaceEthnicity+Education1,Bridgepostmatch))