-
Notifications
You must be signed in to change notification settings - Fork 3
/
distance_functions.R
323 lines (288 loc) · 13.3 KB
/
distance_functions.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
# CBJ November 2017 (Coryn Bailer-Jones, [email protected])
# Functions for distance and inference
source("metropolis.R") # for quantiles.distpost3
library(PolynomF) # for mode.distpost3
library(mvtnorm) # for d.likemulti
##### Naming conventions
# w - measured parallax, as
# wsd - uncertainty in w
# r - inferred distance, pc
# rTrue - true distance, pc (if doing simulations)
# fTrue - wsd/w (fractional parallax uncertainty)
# rMean/rMedian/rMode - mean/median/mode of posterior
# d.xxx - density of PDF xxx (normalized)
# ud.xxx - density of PDF xxx (unnormalized)
# r.xxx - random sample from PDF xxx
# like - likelihood PDF
# prior - prior PDF
# post - posterior PDF
# {} - in comments indicates a vector
##### General functions
# Return 1 if rlo <= r <= rhi, otherwise zero. Vectorized in any one parameter.
lim <- function(r, rlo, rhi) ifelse(r<rlo | r>rhi, 0, 1)
# Normalized Gaussian likelihood in w. Vectorized in any one parameter.
d.like <- function(w, r, wsd) dnorm(x=w, mean=1/r, sd=wsd)
##### (1) uniform distance prior (between rlo and rhi)
# Vectorized in any one parameter
d.distprior1 <- function(r, rmax) lim(r, 0, rmax)*1/rmax
r.distprior1 <- function(Nsamp, rmax) runif(Nsamp, min=0, max=rmax)
# Vectorized in any one parameter
ud.distpost1 <- function(r, w, wsd, rlo, rhi) {
d.like(w, r, wsd)*lim(r, rlo, rhi)
}
##### (2) uniform space density prior (between rlo and rhi)
d.distprior2 <- function(r, rmax) lim(r, 0, rmax)*(3/rmax^3)*r^2 # Vectorized in r or rmax
r.distprior2 <- function(Nsamp, rmax) { # Vectorized in Nsamp
# Use rejection sampling. Expect 1/3 * Nsamp samples to be returned
r1 <- runif(Nsamp, min=0, max=1)
r2 <- runif(Nsamp, min=0, max=1)
r <- rmax*r1[which(r2<=r1^2)]
return(r)
}
# Vectorized in any one parameter
ud.distpost2 <- function(r, w, wsd, rlo, rhi) {
d.like(w, r, wsd)*lim(r, rlo, rhi)*r^2
}
# Define posterior function func() as required by metrop()
func.distpost2 <- function(r, w, wsd, rmax) {
return( c(log10(d.distprior2(r, rmax)), log10(d.like(w, r, wsd))) )
}
# The mode of the posterior PDF is its smaller root; undefined when f>1/sqrt(8) (gives NaN)
mode.distpost2 <- function(w, wsd) {(w/(4*wsd^2))*(1 - sqrt(1 - 8*(wsd/w)^2))}
##### (3) exponentially decreasing space density prior (with length scale rlen)
# Vectorized in r or rlen
d.distprior3 <- function(r, rlen) lim(r,0,Inf)*(1/(2*rlen^3))*r^2*exp(-r/rlen)
# Cannot use rejection sampling as support of r is semi-infinite. So use MCMC.
# Acceptance rate is around 0.7. I don't deal with correlations, so small Nsamp can and does
# return some identical values.
r.distprior3 <- function(Nsamp, rlen) {
return(metrop(func=func.distprior3, thetaInit=rlen, Nburnin=50,
Nsamp=Nsamp, sampleCov=2*rlen^2, verbose=Inf, rlen=rlen)[,3])
}
# Test prior sampling by comparing the following
# rlen <- 1e3
# x=seq(from=1,to=(10*rlen),length.out=1e3); plot(x, x^2*exp(-x/rlen), type="l")
# truehist(r.distprior3(1e5, rlen))
# Vectorized in any one parameter
ud.distpost3 <- function(r, w, wsd, rlen) {
d.like(w, r, wsd)*exp(-r/rlen)*r^2
}
# Define M=ud.distpost3(r=rMode) and F(r)=log(ud.distpost3(r) - log(M/2)
# Function returns F(r) which we can use in a root finding algorithm to find r,
# which will gives bound of FWHM. (If wanted x*Max instead of half*Max,
# use x*M instead of M/2 in expression.)
rootfunc.ud.distpost3 <- function(r, w, wsd, rlen, M) {
#return(2*log(r) - r/rlen - (1/(2*wsd^2))*(w-1/r)^2 - log(wsd*M/2) - 0.5*log(2*pi))
return(log(ud.distpost3(r, w, wsd, rlen))-log(M/2))
}
# Define func() as required by metrop() for prior and posterior sampling
func.distprior3 <- function(r, rlen) {
return( c(log10(d.distprior3(r, rlen)), 0) )
}
func.distpost3 <- function(r, w, wsd, rlen) {
return( c(log10(d.distprior3(r, rlen)), log10(d.like(w, r, wsd))) )
}
# Return posterior mode - a single real number (or NA if something is wrong).
# If retall=TRUE, return all three roots of derivative of PDF (default is FALSE).
# Inputs:
# w - parallax, unrestricted
# wsd - parallax uncertainty, must be >0
# rlen - prior length scale, must be >0 and <Inf
# There are then only two types of solutions of the cubic equation:
# 1. only one root is real => one maxima. Take this as the mode.
# 2. all three roots are real => two maxima.
# if w>=0, take the smallest.
# if w<0 take the positive one (there should be only one).
mode.distpost3 <- function(w, wsd, rlen, retall=FALSE) {
# special cases:
# w<=0 works normally, except for w=-Inf
if(w==-Inf) return(Inf)
if(w==Inf) return(0)
if(wsd==0) return(1/w)
if(wsd==Inf) return(2*rlen)
if(wsd<0) return(NA) # makes no sense
if(rlen<=0) return(NA) # makes no sense
r <- polynom()
p <- r^3/rlen - 2*r^2 + (w/wsd^2)*r - 1/wsd^2
roots <- solve(p) # complex vector of length 3, sorted by increasing size of real part
rMode <- switch(EXPR = toString(length(which(Im(roots)==0))),
"0" = NA,
"1" = Re(roots[which(Im(roots)==0)]),
"2" = NA,
"3" = ifelse(w>0, min(roots), roots[roots>0]) # should be real and unique
)
if(retall) {
return(roots)
} else {
return(rMode)
}
}
# Compute bounds of FWHM by root finding. Providing rMode avoids recalculating this.
# rmax is maximum value of upper bound (needed for root finding)
fwhm.distpost3 <- function(w, wsd, rMode=NA, rlen, rmax=1e6) {
if(is.na(rMode)) {
rMode <- mode.distpost3(w, wsd, rlen)
}
M <- ud.distpost3(r=rMode, w, wsd, rlen)
lo <- uniroot(rootfunc.ud.distpost3, interval=c(0, rMode),
w=w, wsd=wsd, rlen=rlen, M=M)$root
hi <- uniroot(rootfunc.ud.distpost3, interval=c(rMode, rmax),
w=w, wsd=wsd, rlen=rlen, M=M)$root
return(c(lo=lo,hi=hi))
}
# Return value(s) at quantile(s) specified by probs (default is just median)
# Computed via MCMC, as integral appears to be non-analytic.
# Both Metropolis and HybridMC are slow and sometimes inaccurate.
# If diagnose=TRUE and MCMC is performed, plot histogram of samples (as well as
# posterior PDF Z!=NA), report mid and final acceptance rates and step size.
quantiles.distpost3 <- function(w, wsd, rlen, probs=0.5, Nsamp=1e5, diagnose=FALSE) {
if(diagnose) {cat("Inputs:", w, wsd, rlen, probs, Nsamp, "\n")}
if(wsd<=0) return(NA) # makes no sense
if(rlen<=0) return(NA) # makes no sense
rMode <- mode.distpost3(w, wsd, rlen)
stepSize <- rMode # originally used rSD if it existed, otherwise rMode
verbose <- ifelse(diagnose, Nsamp/2, Inf)
samp <- metrop(func=func.distpost3, thetaInit=rMode, Nburnin=1e-3*Nsamp, Nsamp=Nsamp,
sampleCov=stepSize^2, verbose=verbose, w=w, wsd=wsd, rlen=rlen)[,3]
#samp <- as.vector(hybridMC(y.start=rMode, n.samp=Nsamp, logDens=logDens.post3,
# dLogDens=dLogDens.post3, epsilon=0.1, LFsteps=1,
# w=w, wsd=wsd, rlen=rlen))
if(diagnose) {
cat("stepSize =", stepSize, "\n")
truehist(samp)
# need to compute normalization constant Z for the following
# if may not exist (see original code)
#Z <- Z.post3(w, wsd, rlen)
#r <- seq(from=min(samp), to=max(samp), length.out=1e4)
#lines(fac*r, d.post3(r, w, wsd, rlen, Z))
}
return(quantile(samp, probs=probs))
}
##### (3b) - as (3), but with multiple stars and covariance
# Return correlation coefficient between parallaxes for two
# sources with specified sky positions
# and amplitude amp (0-1) and length scale len (degrees)
# using an exponential correlation model.
# Note that this uses a simplified angular separation computation
# which does not work for large separations or near the poles.
parcor <- function(ra1, dec1, ra2, dec2, amp=0.5, len=0.25) {
sep <- sqrt(((ra1-ra2)*cos(conv*0.5*(dec1+dec2)))^2 + (dec1-dec2)^2)
return( amp*exp(-sep/len))
}
# Return covariance matrix between parallaxes for all sources in
# data frame dat, using correlation function parcor.
parcovmat <- function(dat, amp, len) {
V <- matrix(data=0, nrow=nrow(dat), ncol=nrow(dat))
for(i in 2:nrow(dat)) {
for(j in 1:(i-1)) {
V[i,j] <- dat$parallax_error[i]*dat$parallax_error[j] *
parcor(ra1=dat$ra[i], dec1=dat$dec[i],
ra2=dat$ra[j], dec2=dat$dec[j], amp=amp, len=len)
V[j,i] <- V[i,j]
}
}
return(V + diag(dat$parallax_error^2))
}
# Return density of a normalized dim(w) dimensional Gaussian likelihood
# in {w} with mean (1/r)*{1} and covariance matrix parcovmat.
# w and wsd must be vectors of same size, and r a scalar.
# If parcovmat=NULL, then assume product of dim(w) 1D Gaussians.
d.likemulti <- function(w, r, wsd, parcovmat=NULL) {
if(length(w)!=length(wsd)) stop("w and wsd must be same length")
if(length(r)==1) {
if(is.null(parcovmat)) {
return(prod(dnorm(x=w, mean=1/r, sd=wsd)))
} else {
return(dmvnorm(x=w, mean=rep.int(1/r, length(w)), sigma=parcovmat))
}
} else {
like <- double(length(r))
for(i in 1:length(r)) {
if(is.null(parcovmat)) {
like[i] <- prod(dnorm(x=w, mean=1/r[i], sd=wsd))
} else {
like[i] <- dmvnorm(x=w, mean=rep.int(1/r[i], length(w)), sigma=parcovmat)
}
}
return(like)
}
}
# Requires vector w and wsd, scalar rlen, and scalar or vector r.
# Returns nonormalized posterior values for each r.
ud.distpost3multi <- function(r, w, wsd, parcovmat=NULL, rlen) {
d.likemulti(w, r, wsd, parcovmat)*exp(-r/rlen)*r^2
}
##### Likelihood for multiple stars and finite cluster size (no correlations)
# The functions below compute the product of N 1-dimensional integrals,
# each integral being over the true, unknown distance for each star (likelihood for that star).
# The functions differ in cluster geometry assumptions and how the integral is done, but they
# all compute P({w} | rc, sc, ...)
# 1. Assume true distances from cluster center are described by a 1D Gaussian
# with mean 0 and stdev sc, along l.o.s: cluster has negligible extent
# transverse to l.o.s. Uses binomial approximation in quadratic expansion
# to make integral a standard one (with error function).
# (It turns out this is quite a bad approximation.)
# Given vectors or scalars {w}, {wsd}, and scalar rc, sc,
# compute (normalized) likelihood P({w} | {wsd}, rc, sc),
# where rc and sc are cluster distance.
d.likecluster1 <- function(w, wsd, rc, sc) {
if(any(wsd<=0) || rc<=0 || sc<=0) stop("wsd, rc, sc must all be positive")
like <- double(length(w))
for(i in 1:length(w)) {
gg <- (w[i]*rc-1)/(wsd[i]^2*rc^3)
bb <- wsd[i]^2*rc^4/(2*(1+wsd[i]^2*rc^4/sc^2))
integ <- sqrt(pi*bb)*exp(bb*gg^2)*(1+sqrt(pi)/2-sqrt(pi)*pnorm(gg*sqrt(2*bb)))
like[i] <- integ*exp(-(w[i]*rc-1)^2/(2*wsd[i]^2*rc^2))/(2*pi*wsd[i]*sc)
}
return(prod(like))
}
# 2. Assume cluster is spherical, i.e. true distances from cluster centre
# are described by an isotropic 3D Gaussian with mean 0 and stdev sc.
# No other geometric approximation, so each integral is done numerically.
# costheta = cos(theta) where theta is angular separation between assumed
# cluster centre and star (as seen by observer).
# However, if theta<10 deg or so, cos(theta)~1 and can reduce to 1D case
# again, but this time with numerical integral, not binomial approximation.
# In that case no need to pass costheta (as NULL will trigger the 1D case).
# Given vectors or scalars {w}, {wsd}, {costheta}, and scalar rc, sc,
# compute (normalized) likelihood P({w} | {wsd}, {costheta}, rc, sc),
# where rc and sc are cluster distance.
d.likecluster2 <- function(w, wsd, costheta=NULL, rc, sc) {
if(any(wsd<=0) || rc<=0 || sc<=0) stop("wsd, rc, sc must all be positive")
# integrand takes a vector r (others all scalar) and returns vector of same length
intFail <- 0
integrand <- function(r, w, wsd, costheta, rc, sc) {
if(is.null(costheta)) {
return( d.like(w=w, r=r, wsd=wsd)*dnorm(x=r, mean=rc, sd=sc) )
} else {
#stop("unsolved problem for 3D case (costheta=/=NULL): numerical integration often doesn't converge")
x <- sqrt(r^2 + rc^2 - 2*r*rc*costheta) # term in sqrt always non-negative
tl <- double(length(x))
sel <- which(x/rc<1e-6) # special case to avoid divide by small x (i.e. neglect costheta).
# keep limit very small, or else it introduces artefacts.
tl[sel] <- d.like(w=w, r=r[sel], wsd=wsd)*dnorm(x=r[sel], mean=rc, sd=sc)
sel <- which(x/rc>=1e-6)
tl[sel] <- d.like(w=w, r=r[sel], wsd=wsd)*dnorm(x=x[sel], mean=0, sd=sc)*(r[sel]-rc*costheta)/x[sel]
return(tl)
# The following is the principle, but has no trap for small x.
#return( d.like(w=w, r=r, wsd=wsd)*dnorm(x=x, mean=0, sd=sc)*(r-rc*costheta)/x )
}
}
like <- double(length(w))
for(i in 1:length(w)) {
# In principle limits are (0,Inf), but finite ones are faster.
# Also works okay if costheta is NULL.
like[i] <- integrate.func(integrand, lower=max(0,rc-5*sc), upper=rc+5*sc,
w=w[i], wsd=wsd[i], costheta=costheta[i], rc=rc, sc=sc)
if(is.na(like[i])) { # if numerical integral fails, revert to 1D case.
# this may not be valid, but what else can we do?
intFail <- intFail+1
like[i] <- integrate.func(integrand, lower=max(0,rc-5*sc), upper=rc+5*sc,
w=w[i], wsd=wsd[i], costheta=NULL, rc=rc, sc=sc)
}
#cat(like[i], "\n")
}
#if(intFail>0) cat("Numerical integration failures:", intFail, "\n")
#cat(intFail, prod(like), "\n")
return(prod(like))
}