Approximating \(\pi\) (slide
6)
set.seed(100)
nrep <- 10000
x <- runif(nrep, min = -1, max = 1)
y <- runif(nrep, min = -1, max = 1)
mean(x^2 + y^2 < 1) * 4
## [1] 3.1388
plot(1:nrep, 4 * cumsum(x^2 + y^2 < 1) / 1:nrep, type = "l")
abline(h = pi, col = "red")
Bootstrap Confidence Interval (slide 30)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(nycflights13)
set.seed(100)
# Sample of size 50
flights_smpl <- flights |>
select(dep_time, arr_time, air_time, distance) |>
slice_sample(n = 50)
xbar <- mean(flights_smpl$air_time, na.rm = T)
true_mean <- mean(flights$air_time, na.rm = T)
n <- length(flights_smpl$air_time)
n_boot <- 1000
boot_smpl <- matrix(nrow = n_boot, ncol = n)
boot_mean <- vector(length = n_boot)
# Fill in 1000 bootstrap samples
for (i in seq_len(n_boot)) {
boot_smpl[i, ] <- sample(flights_smpl$air_time, n, replace = T)
boot_mean[i] <- mean(boot_smpl[i, ], na.rm = T)
}
# Create the histogram
hist(boot_mean, main = "Histogram of Bootstrap Sample Means",
xlab = "Means")
abline(v = xbar, col = "blue", lty = 1, lwd = 2)
se <- sd(boot_mean, na.rm = T) # Estimate standard error of bootstrap means
abline(v = xbar + c(- 2 * se, 2 * se), col = "blue", lwd = 1, lty = 2)
abline(v = true_mean, col = "red", lwd = 2)
legend("topright", legend = c("True mean", "xbar", "bounds"),
lty = c(1, 1, 2), col = c("red", "blue", "blue"),
lwd = c(2, 2, 1))
Bootstrap Regression (slide 33)
hibbs <- as_tibble(read.csv("hibbs.dat", sep = ""))
par(mar=c(3,3,2,.1), mgp=c(1.7,.5,0), tck=-.01)
plot(c(-.7, 4.5), c(43,63), type="n",
xlab="Avg recent growth in personal income",
ylab="Incumbent party's vote share",
xaxt="n", yaxt="n", mgp=c(2,.5,0),
main="Forecasting the election from the economy ", bty="l")
axis(1, 0:4, paste(0:4,"%",sep=""), mgp=c(2,.5,0))
axis(2, seq(45,60,5), paste(seq(45,60,5),"%",sep=""), mgp=c(2,.5,0))
with(hibbs, text(growth, vote, year, cex=.8))
abline(50, 0, lwd=.5, col="gray")
lm_hibbs <- lm(vote ~ growth, data = hibbs)
set.seed(100)
# Draw trendline for each bootstrap sample.
for (i in seq_len(10)) {
resample <- slice_sample(hibbs, n = nrow(hibbs), replace = T)
abline(lm(vote ~ growth, data = resample), col = "azure3")
}
abline(lm_hibbs, col = "red")
legend("topleft",
legend = c("orig. sample", "boot sample"),
col = c("red", "azure3"),
lty = 1, bty = "n")