abcrubin <- function(x,a=1,b=1,iters=1000){ # # Illustrates the abc method for discrete Bernoulli data. # # x is a vector of zeros and ones (Bernoulli trial results) # Assume that theta = P(1) has a beta(a,b) prior. # Then we generate a sample of size iters from the posterior distribution of theta via abc. # n <- length(x) theta <- c() i <- 0 while(i < iters){ thetacand <- rbeta(1,a,b) y <- rbinom(n,1,thetacand) if (isTRUE(all.equal(y,x))){ theta <- c(theta,thetacand) i <- i+1 } } return(theta) } abcrubinsuff <- function(x,a=1,b=1,iters=1000){ # # Illustrates the abc method for discrete Bernoulli data based on the use of a sufficient statistic. # # x is a vector of zeros and ones (Bernoulli trial results) # Assume that theta = P(1) has a beta(a,b) prior. # Then we generate a sample of size iters from the posterior distribution of theta via abc. # n <- length(x) sumx <- sum(x) theta <- c() i <- 0 while(i < iters){ thetacand <- rbeta(1,a,b) y <- rbinom(1,n,thetacand) if (y==sumx){ theta <- c(theta,thetacand) i <- i+1 } } return(theta) } # # Sample code for the original method ... # a <- 1 b <- 1 thetatrue <- 0.4 n <- 5 x <- rbinom(n,1,thetatrue) thetagrid <- c(0:1000)/1000 ftrue <- dbeta(thetagrid,a+sum(x),b+n-sum(x)) iters <- 2000 ptm <- proc.time() theta <- abcrubin(x,a,b,iters) proc.time() - ptm d <- density(theta) hist(theta,probability=TRUE,nclass=20,xlim=c(0,1),ylim=c(0,max(d$y,ftrue)),xlab='theta',ylab='f(theta|x)',main='') lines(d,type='l',lwd=2,col='green') lines(thetagrid,ftrue,type='l',lwd=2,col='red') # # ... and for the method with the sufficient statistic. # ptm <- proc.time() theta <- abcrubinsuff(x,a,b,iters) proc.time() - ptm # # Note that the computational time is much quicker ... # d <- density(theta) hist(theta,probability=TRUE,nclass=20,xlim=c(0,1),ylim=c(0,max(d$y,ftrue)),xlab='theta',ylab='f(theta|x)',main='') lines(d,type='l',lwd=2,col='green') lines(thetagrid,ftrue,type='l',lwd=2,col='red') # # but that the fit is the same. # # Try this with a bigger value of n (e.g. n=10) The first approach will become unfeasible very quick although the # use of the sufficient statistic means that the second approach is still very fast. #