-
Notifications
You must be signed in to change notification settings - Fork 0
/
pdSRM_growth.R
463 lines (360 loc) · 15.5 KB
/
pdSRM_growth.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
require(nlme)
pdSRM <- function (value = numeric(0), form = NULL, nam = NULL, data = sys.frame(sys.parent()))
{
object <- numeric(0)
class(object) <- c("pdSRM", "pdMat")
pdConstruct(object, value, form, nam, data)
}
environment(pdSRM) <- asNamespace('nlme')
pdConstruct.pdSRM <- function (object, value = numeric(0), form = formula(object),
nam = Names(object), data = sys.frame(sys.parent()), ...)
{
val <- NextMethod()
if (length(val) == 0) {
if ((nc <- length(Names(val))) > 0) {
attr(val, "ncol") <- nc
}
return(val)
}
if (is.matrix(val)) {
# Read in a Cholesky factorization of the variance-covariance matrix
# Then, transform it to the original variance-covariance matrix
mat.cov <- crossprod(val)
# Check to see if this is positive-definite
# Build the original correlation matrix based on the variance-covariance matrix
aux <- 1/sqrt(diag(mat.cov))
mat.cor <- aux * t(mat.cov * aux)
nc <- dim(mat.cov)[2]
# Constrain the intercept variance
i.var <- mean(c(mat.cov[1,1], mat.cov[2,2]))
i.sd <- sqrt(i.var)
# Constrain the slope variance
s.var <- mean(c(mat.cov[3,3], mat.cov[4,4]))
s.sd <- sqrt(s.var)
# Constrain the within-individual i-s covariance
is.wi.cor <- mean(c(mat.cor[1,3], mat.cor[2,4]))
is.wi.cov <- mean(c(mat.cov[1,3], mat.cov[2,4]))
# Constrain the between-individual i-s covariance
is.bw.cor <- mean(c(mat.cor[1,4],mat.cor[2,3]))
is.bw.cov <- mean(c(mat.cov[1,4],mat.cov[2,3]))
# Get the intercept covariance
ii.bw.cor <- mat.cor[1,2]
ii.bw.cov <- mat.cov[1,2]
# Get the slope covariance
ss.bw.cor <- mat.cor[3,4]
ss.bw.cov <- mat.cov[3,4]
# Create a new correlation matrix by replacing the 2 constrained covariances
new.mat.cor <- mat.cor
new.mat.cor[1,3] <- is.wi.cor
new.mat.cor[3,1] <- is.wi.cor
new.mat.cor[2,4] <- is.wi.cor
new.mat.cor[4,2] <- is.wi.cor
new.mat.cor[1,4] <- is.bw.cor
new.mat.cor[4,1] <- is.bw.cor
new.mat.cor[2,3] <- is.bw.cor
new.mat.cor[3,2] <- is.bw.cor
# Create the vector of parameters to send to other functions
parms <- c(i.sd, s.sd, is.wi.cor, is.bw.cor, ii.bw.cor, ss.bw.cor)
attributes(parms) <- attributes(val)[names(attributes(val)) != "dim"]
attr(parms, "ncol") <- nc
class(parms) <- c("pdSRM", "pdMat")
return(parms)
}
}
environment(pdConstruct.pdSRM) <- asNamespace('nlme')
pdMatrix.pdSRM <- function (object, factor = FALSE)
{
if (!isInitialized(object)) {
stop("cannot extract the matrix from an uninitialized \"pdSRM\" object")
}
if (is.null(Ncol <- attr(object, "ncol"))) {
stop("cannot extract the matrix with uninitialized dimensions")
}
parms <- as.vector(object)
print(parms)
# Run through the correlations and constrain to be within -1 and 1
for(i in 3:length(parms)) {
if(parms[i] >= 1) {
parms[i] <- .99
}
if(parms[i] <= -1) {
parms[i] <- -.99
}
}
# Recreate all the components
i.sd <- parms[1]
i.var <- i.sd^2
s.sd <- parms[2]
s.var <- s.sd^2
is.wi.cor <- parms[3]
is.wi.cov <- is.wi.cor*i.sd*s.sd
is.bw.cor <- parms[4]
is.bw.cov <- is.bw.cor*i.sd*s.sd
ii.bw.cor <- parms[5]
ii.bw.cov <- ii.bw.cor*i.sd*i.sd
ss.bw.cor <- parms[6]
ss.bw.cov <- ss.bw.cor*s.sd*s.sd
# Create a new covariance matrix using these parameters
mat.cov <- diag(4)
mat.cov[1,1] <- i.var
mat.cov[2,2] <- i.var
mat.cov[3,3] <- s.var
mat.cov[4,4] <- s.var
# this is the constrained within individual i-s covariance
mat.cov[cbind(1:2, 3:4)] <- is.wi.cov
mat.cov[cbind(3:4, 1:2)] <- is.wi.cov
# This is the constrained between individual i-s covariance
mat.cov[4,1] <- is.bw.cov
mat.cov[1,4] <- is.bw.cov
mat.cov[3,2] <- is.bw.cov
mat.cov[2,3] <- is.bw.cov
# This is the estimated bw individual intercept covariance
mat.cov[2,1] <- ii.bw.cov
mat.cov[1,2] <- ii.bw.cov
# This is the estimated bw individual slope covariance
mat.cov[4,3] <- ss.bw.cov
mat.cov[3,4] <- ss.bw.cov
# Create a correlation matrix
aux <- 1/sqrt(diag(mat.cov))
mat.cor <- aux * t(mat.cov * aux)
print(mat.cor)
if(factor) {
# Test for positive definite here
# cholStatus <- try(u <- chol(mat.cov), silent = TRUE)
# cholError <- ifelse(class(cholStatus) == "try-error", TRUE, FALSE)
# if(cholError) {
# cat("matrix is not positive definite: executing work around...you should really check your results my friend!\n")
# value <- upper.tri(mat.cov, diag=TRUE)
# } else {
# value <- chol(mat.cov)
# }
value <- upper.tri(mat.cov, diag=TRUE)
ld <- determinant(mat.cov, logarithm=TRUE)[1]
attr(value, "logDet") <- ld$modulus
} else {
print("this will print value")
value <- mat.cov
print(value)
}
dimnames(value) <- attr(object, "Dimnames")
value
}
environment(pdMatrix.pdSRM) <- asNamespace('nlme')
coef.pdSRM <- function (object, unconstrained = TRUE, ...)
{
if (unconstrained || !isInitialized(object)) NextMethod()
else {
if (is.null(Ncol <- attr(object, "ncol"))) {
stop("cannot obtain constrained coefficients with uninitialized dimensions")
}
val <- as.vector(object)
val <- c(val[1], val[2], val[3], val[4], val[5], val[6])
names(val) <- c("sd i", "sd s", "i-s wi", "i-s bw", "i-i bw", "s-s bw")
val
}
}
environment(coef.pdSRM) <- asNamespace('nlme')
corMatrix.pdSRM <- function (object, ...)
{
if (!isInitialized(object)) {
stop("cannot extract the matrix from an uninitialized \"pdSRM\" object")
}
if (is.null(Ncol <- attr(object, "ncol"))) {
stop("cannot extract the matrix with uninitialized dimensions")
}
obj <- as.vector(object)
print(obj)
aux <- c(obj[1], obj[2], obj[3], obj[4], obj[5], obj[6])
is.wi.cor <- aux[3]
is.bw.cor <- aux[4]
ii.bw.cor <- aux[5]
ss.bw.cor <- aux[6]
# This builds the correlation matrix
# Create a new correlation matrix using these parameters
value <- diag(4)
# this is the constrained within individual i-s covariance
value[cbind(1:2, 3:4)] <- is.wi.cor
value[cbind(3:4, 1:2)] <- is.wi.cor
# This is the constrained between individual i-s covariance
value[4,1] <- is.bw.cor
value[1,4] <- is.bw.cor
value[3,2] <- is.bw.cor
value[2,3] <- is.bw.cor
# This is the estimated bw individual intercept covariance
value[2,1] <- ii.bw.cor
value[1,2] <- ii.bw.cor
# This is the estimated bw individual slope covariance
value[4,3] <- ss.bw.cor
value[3,4] <- ss.bw.cor
# This builds the vector of SD values
attr(value, "stdDev") <- c(rep(aux[1], 2), rep(aux[2], 2))
attr(value, "corr") <- c(aux[3], aux[4], aux[5], aux[6])
if (length(nm <- Names(object)) == 0) {
nm <- paste("V", 1:Ncol, sep = "")
dimnames(value) <- list(nm, nm)
}
names(attr(value, "stdDev")) <- nm
value
}
environment(corMatrix.pdSRM) <- asNamespace('nlme')
summary.pdSRM <- function (object, structName = "Growth model for indistinguishable dyads", ...)
{
if (isInitialized(object)) {
# Build the correlation matrix
value <- corMatrix(object)
attr(value, "structName") <- structName
attr(value, "noCorrelation") <- FALSE
attr(value, "formula") <- formula(object)
class(value) <- "summary.pdMat"
value
}
else {
object
}
}
environment(summary.pdSRM) <- asNamespace('nlme')
################################################
# Here is a function for processing the output
# of nlme run using the above method.
################################################
srm.pct <- function(object) {
# Get the variances using VarCorr
variances <- as.numeric(VarCorr(object)[,1])
num.mem <- (length(variances)-2)/2
grp.var <- variances[1]
act.var <- variances[2]
part.var <- variances[num.mem+2]
dyd.var <- variances[length(variances)]
# Get the correlations directly from the summary object
# Haven't written the method for SRM to pull these directly using VarCorr
# Hack!!
o.sum <- summary(object)
# ap.cor <- attr(o.sum$apVar, "Pars")[4]
o <- as.matrix(o.sum$modelStruct$reStruct[[1]])
ap.cor <- o[(num.mem+2), 2]/sqrt(o[2,2]*o[(num.mem+2),(num.mem+2)])
ap.cov <- ap.cor*sqrt(act.var*part.var)
dyd.cor <- coef(object$modelStruct$corStruct,unconstrained=FALSE)
# dyd.cor <- as.vector(o.sum$modelStruct$corStruct[[1]])/2
dyd.cov <- dyd.cor*dyd.var
variance.parms <- as.numeric(c(grp.var, act.var, part.var, dyd.var, ap.cov, dyd.cov))
names(variance.parms) <- c("Group", "Actor", "Partner", "Dyad", "Generalized Reciprocity", "Dyadic Reciprocity")
# Compute the percentages and return the correlations
total.var <- grp.var + act.var + part.var + dyd.var
act.pct <- 100*act.var/total.var
part.pct <- 100*part.var/total.var
grp.pct <- 100*grp.var/total.var
dyd.pct <- 100*dyd.var/total.var
variance.pcts <- c(grp.pct, act.pct, part.pct, dyd.pct, ap.cor, dyd.cor)
names(variance.pcts) <- c("Group", "Actor", "Partner", "Dyad", "Generalized Reciprocity", "Dyadic Reciprocity")
output <- round(as.data.frame(list(variances.and.covariances=variance.parms, percents.and.correlations=variance.pcts)), 3)
return(output)
}
################################
# This function creates dummy variables and a unique dyad identifier. The user inputs a
# dyadic dataset containing at least three id variables: one for group, one for actor, and one for partner. The function will return a dataset containing these original identifiers, plus new ones (using the prefix "pdSRM" to ensure no conflicts with original variable names). The function will also return a set of a... and p... dummies ranging from 1 to n where n is the size of the largest group.
# Note that this function will return a full dyadic dataset. That is, the function will take note of all individuals who show up either on the actor or the partner side and build a dyadic dataset with a "full" set of dyadic observations for each group. That is, it creates the full round robin, irrespective of whether the original dataset contains these.
# The arguments for the function are as follows:
# group.id = supply a string with the name of your group identifier variable
# act.id = supply a string with the name of your actor identifier variable
# part.id = supply a string with the name of your partner identifier variable
# include.self = logical for whether you want the dataset to contain self ratings. By default the function excludes these.
# merge.original = logical for whether you want to return the full original dataset, with the new identifiers and dummies added to the dataset or whether you just want a data.frame containing the old and new identifier variables. Default is to just return the identifiers.
# Sample call is:
# new.d <- srm.create.dummies(group.id = "team_id", act.id = "act_id", part.id = "part_id", d = old.d)
################################
srm.create.dummies <- function(group.id, act.id, part.id, d, include.self=FALSE, merge.original=FALSE) {
require(data.table)
# sort the dataset by group.id, act.id, part.id
d <- d[with(d,order(d[,group.id], d[,act.id], d[, part.id])), ]
# get just the identifiers
d.sub <- d[,c(group.id, act.id, part.id)]
# get the unique groups
grps <- unique(d.sub[,c(group.id)])
# create a unique actor id and a unique partner id (in case the input data set has these as nested values (rather than unique)
d.sub$act_indiv_id <- paste(d.sub[,c(group.id)], d.sub[,c(act.id)], sep="_-")
d.sub$part_indiv_id <- paste(d.sub[,c(group.id)], d.sub[,c(part.id)], sep="_-")
# get the unique individuals as anyone who shows up on the actor or partner side
acts <- unique(d.sub$act_indiv_id)
parts <- unique(d.sub$part_indiv_id)
all.indivs <- data.frame(unique(c(acts,parts)), stringsAsFactors=F)
colnames(all.indivs) <- "string_indiv_id"
# create a unique indiv_id number for everyone
all.indivs$unique_indiv_id <- 1:length(all.indivs$string_indiv_id)
# Split back apart into the group and indiv identifiers
all.indivs$orig_group_id <- as.numeric(t(matrix(unlist(strsplit(all.indivs$string_indiv_id, split="_-")), nrow=2))[,1])
all.indivs$orig_indiv_id <- as.numeric(t(matrix(unlist(strsplit(all.indivs$string_indiv_id, split="_-")), nrow=2))[,2])
# Using all.indivs and the orig_group_id and the unique_indiv_id create the dyad dataset, with dummies
# get the maximum group size
d.dt <- data.table(all.indivs)
agg <- data.frame(d.dt[,list(group_size = length(unique_indiv_id)), by=list(orig_group_id)])
max_group_size <- max(agg[,2])
# Create the dyad dataset with an act_number and part_number variable, build it from the ground up
grps <- unique(all.indivs$orig_group_id)
grp <- grps[1]
count <- 1
for(grp in grps) {
# Get the people in this group
members <- unique(all.indivs[all.indivs$orig_group_id == grp, c("unique_indiv_id")])
act_num <- 1
for(act in members) {
part_num <- 1
for(part in members) {
res.line <- c(grp, act, act_num, part, part_num)
if(count == 1) {
res <- res.line
} else {
res <- rbind(res, res.line)
}
part_num <- part_num + 1
count <- count + 1
}
act_num <- act_num + 1
}
}
res <- data.frame(res)
colnames(res) <- c("orig_group_id", "unique_act_id", "act_num", "unique_part_id", "part_num")
# create the dummies for each group
res[,c(paste("a",1:max_group_size, sep=""), paste("p", 1:max_group_size, sep=""))] <- NA
for(i in 1:max_group_size) {
v <- paste("a", i, sep="")
res[,v] <- ifelse(res$act_num == i, 1, 0)
v <- paste("p", i, sep="")
res[,v] <- ifelse(res$part_num == i, 1, 0)
}
# If self ratings should be excluded...
if(!include.self) {
res <- res[res$unique_act_id != res$unique_part_id, ]
}
# Add a unique dyad_id
res$dyad_id <- NA
count <- 1
for(grp in grps) {
# Get the actors and partners in this team
actors <- unique(res[res$orig_group_id == grp, c("unique_act_id")])
partners <- unique(res[res$orig_group_id == grp, c("unique_part_id")])
indivs <- sort(unique(c(actors, partners)))
for(a in 1:length(indivs)) {
for(p in 1:length(indivs)) {
if(a > p) {
res$dyad_id <- ifelse((res$unique_act_id == indivs[a] & res$unique_part_id == indivs[p]) | (res$unique_act_id == indivs[p] & res$unique_part_id == indivs[a]), count, res$dyad_id)
count <- count + 1
}
}
}
}
# Add the original identifiers to this data.frame
res1 <- merge(res, all.indivs[,c("orig_group_id", "unique_indiv_id", "orig_indiv_id")], by.x=c("orig_group_id", "unique_act_id"), by.y=c("orig_group_id", "unique_indiv_id"), all.x=T)
colnames(res1)[length(res1)] <- c("orig_act_id")
res2 <- merge(res1, all.indivs[,c("orig_group_id", "unique_indiv_id", "orig_indiv_id")], by.x=c("orig_group_id", "unique_part_id"), by.y=c("orig_group_id", "unique_indiv_id"), all.x=T)
colnames(res2)[length(res2)] <- c("orig_part_id")
colnames(res2) <- c(group.id, "pdSRM_part_id", "pdSRM_act_id", "pdSRM_act_num", "pdSRM_part_num",paste("a",1:max_group_size, sep=""), paste("p", 1:max_group_size, sep=""), "pdSRM_dyad_id", act.id, part.id)
# Order the rows columns in a nicer way
res3 <- res2[with(res2,order(res2[,group.id], res2[,"pdSRM_act_id"], res2[, "pdSRM_part_id"])),c(group.id, act.id, part.id, "pdSRM_act_id", "pdSRM_part_id", "pdSRM_dyad_id", "pdSRM_act_num", "pdSRM_part_num", paste("a",1:max_group_size, sep=""), paste("p", 1:max_group_size, sep=""))]
# Merge with the original dataset
if(merge.original) {
res4 <- merge(res3, d, by=c(group.id, act.id, part.id), all.x=T)
return(res4)
} else {
return(res3)
}
}