- Toy collector solution
- Plug-In and the Bootstrap
- Nonparametric and Parametric Bootstraps
- Examples
Children (and some adults) are frequently enticed to buy breakfast cereal in an effort to collect all the action figures. Assume there are 15 action figures and each cereal box contains exactly one with each figure being equally likely.
| Figure | A | B | C | D | E | F | G | H | I | J | K | L | M | N | O |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Probability | .2 | .1 | .1 | .1 | .1 | .1 | .05 | .05 | .05 | .05 | .02 | .02 | .02 | .02 | .02 |
prob.table <- c(.2, .1, .1, .1, .1, .1, .05, .05, .05, .05, .02, .02, .02, .02, .02)
boxes <- seq(1,15)
box.count <- function(prob=prob.table){
check <- double(length(prob))
i <- 0
while(sum(check)<length(prob)){
x <- sample(boxes, 1, prob=prob)
check[x] <- 1
i <- i+1
}
return(i)
}
trials <- 1000
sim.boxes <- double(trials)
for(i in 1:trials){
sim.boxes[i] <- box.count()
}
est <- mean(sim.boxes)
mcse <- sd(sim.boxes) / sqrt(trials)
interval <- est + c(-1,1)*1.96*mcse
est
## [1] 115.468
interval
## [1] 112.0715 118.8645
hist(sim.boxes, main="Histogram of Total Boxes", xlab="Boxes") abline(v=300, col="red", lwd=2)
| World | Real | Bootstrap |
|---|---|---|
| true distribution | \(F\) | \(\hat{F}_n\) |
| data | \(X_1, \dots, X_n\) IID \(F\) | \(X_1^*, \dots, X_n^*\) IID \(\hat{F}_n\) |
| empirical distribution | \(\hat{F}_n\) | \(F_n^*\) |
| parameter | \(\theta = t(F)\) | \(\hat{\theta}_n = t(\hat{F}_n)\) |
| estimator | \(\hat{\theta}_n = t(\hat{F}_n)\) | \(\theta_n^* = t(F_n^*)\) |
| error | \(\hat{\theta}_n - \theta\) | \(\theta_n^* - \hat{\theta}_n\) |
| standardized error | \(\frac{\hat{\theta}_n - \theta}{s(\hat{F}_n)}\) | \(\frac{\theta_n^* - \hat{\theta}_n}{s(F_n^*)}\) |
Notation \(\theta = t(F)\) means \(\theta\) is some function of the true unknown distribution
| World | Real | Bootstrap |
|---|---|---|
| parameter | \(\theta\) | \(\hat{\theta}_n\) |
| true distribution | \(F_{\theta}\) | \(F_{\hat{\theta}_n}\) |
| data | \(X_1, \dots, X_n\) IID \(F_{\theta}\) | \(X_1^*, \dots, X_n^*\) IID \(F_{\hat{\theta}_n}\) |
| estimator | \(\hat{\theta}_n = t(X_1, \dots, X_n)\) | \(\theta_n^* = t(X_1^*, \dots, X_n^*)\) |
| error | \(\hat{\theta}_n - \theta\) | \(\theta_n^* - \hat{\theta}_n\) |
| standardized error | \(\frac{\hat{\theta}_n - \theta}{s(X_1, \dots, X_n)}\) | \(\frac{\theta_n^* - \hat{\theta}_n}{s(X_1^*, \dots, X_n^*)}\) |
speed <- c(28, -44, 29, 30, 26, 27, 22, 23, 33, 16, 24, 29, 24, 40 , 21, 31, 34, -2, 25, 19)
mean(speed)
## [1] 21.75
newspeed <- speed - mean(speed) + 33.02
newspeed will have exactly the same shape as speed, but will be shiftedn <- 1000
bstrap <- double(n)
for (i in 1:n){
newsample <- sample(newspeed, 20, replace=T)
bstrap[i] <- mean(newsample)
}
(sum(bstrap < 21.75) + sum(bstrap > 44.29))/1000
## [1] 0.011
bootstrap.resample <- function (object) sample (object, length(object), replace=TRUE)
diff.in.means <- function(df) {
mean(df[df$group==1,"extra"]) - mean(df[df$group==2,"extra"])
}
resample.diffs <- replicate(2000, diff.in.means(sleep[bootstrap.resample(1:nrow(sleep)),]))
hist(resample.diffs, main="Bootstrap Sampling Distribution") abline(v=diff.in.means(sleep), col=2, lwd=3)
boot librarybootcity data included in the boot packagelibrary(boot) data(city) ratio <- function(d, w) sum(d$x * w)/sum(d$u * w) results <- boot(city, ratio, R=1000, stype="w")
results
## ## ORDINARY NONPARAMETRIC BOOTSTRAP ## ## ## Call: ## boot(data = city, statistic = ratio, R = 1000, stype = "w") ## ## ## Bootstrap Statistics : ## original bias std. error ## t1* 1.520313 0.03619229 0.2300674
boot.ci(results, type="bca")
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS ## Based on 1000 bootstrap replicates ## ## CALL : ## boot.ci(boot.out = results, type = "bca") ## ## Intervals : ## Level BCa ## 95% ( 1.275, 2.183 ) ## Calculations and Intervals on Original Scale
mpg) on car weight (wt) and displacement (disp)mtcarsrsq <- function(formula, data, indices) {
d <- data[indices,]
fit <- lm(formula, data=d)
return(summary(fit)$r.square)
}
results <- boot(data=mtcars, statistic=rsq,
R=1000, formula=mpg~wt+disp)
results
## ## ORDINARY NONPARAMETRIC BOOTSTRAP ## ## ## Call: ## boot(data = mtcars, statistic = rsq, R = 1000, formula = mpg ~ ## wt + disp) ## ## ## Bootstrap Statistics : ## original bias std. error ## t1* 0.7809306 0.00958034 0.05058801
boot.ci(results, type="bca")
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS ## Based on 1000 bootstrap replicates ## ## CALL : ## boot.ci(boot.out = results, type = "bca") ## ## Intervals : ## Level BCa ## 95% ( 0.6424, 0.8557 ) ## Calculations and Intervals on Original Scale ## Some BCa intervals may be unstable
plot(results)