## ----include=FALSE------------------------------------------------- library(knitr) opts_chunk$set( tidy=FALSE,fig.width=6,fig.height=5,fig.path='figs/simulation' ) ## ----echo=FALSE, results='asis'------------------------------------ options(width=70) patchDVI::useknitr() ## ------------------------------------------------------------------ random.number <- numeric(50) # this will store the # pseudorandom output random.seed <- 27218 for (j in 1:50) { random.seed <- (171 * random.seed) %% 30269 random.number[j] <- random.seed / 30269 } ## ------------------------------------------------------------------ random.number ## ----echo=FALSE---------------------------------------------------- set.seed(31415) ## ------------------------------------------------------------------ runif(5) runif(10, min = -3, max = -1) ## ------------------------------------------------------------------ set.seed(32789) # this ensures that your output will match ours runif(5) ## ----eval=FALSE---------------------------------------------------- ## U2 <- runif(1000) ## U1 <- runif(1000) ## X <- U1 + U2 ## Y <- U1 - U2 ## plot(Y ~ X) ## cor(X,Y) # this calculates the sample correlation ## ------------------------------------------------------------------ set.seed(23207) # use this to obtain our output guesses <- runif(20) correct.answers <- (guesses < 0.2) correct.answers ## ------------------------------------------------------------------ table(correct.answers) ## ------------------------------------------------------------------ dbinom(x = 4, size = 6, prob = 0.5) ## ------------------------------------------------------------------ pbinom(4, 6, 0.5) ## ------------------------------------------------------------------ qbinom(0.89, 6, 0.5) ## ----echo=FALSE---------------------------------------------------- set.seed(203) ## ------------------------------------------------------------------ defectives <- rbinom(24, 15, 0.1) defectives any(defectives > 5) ## ----eval=FALSE---------------------------------------------------- ## ranbin <- function(n, size, prob) { ## cumpois <- pbinom(0:(size - 1), size, prob) ## singlenumber <- function() { ## x <- runif(1) ## N <- sum(x > cumpois) ## N ## } ## replicate(n, singlenumber()) ## } ## ----eval=FALSE---------------------------------------------------- ## ranbin2 <- function(n, size, prob) { ## singlenumber <- function(size, prob) { ## x <- runif(size) ## N <- sum(x < prob) ## N ## } ## replicate(n, singlenumber(size, prob)) ## } ## ----eval=FALSE---------------------------------------------------- ## ranbin3 <- function(n, size, prob) { ## singlenumber <- function(size, prob) { ## k <- 0 ## U <- runif(1) ## X <- numeric(size) ## while (k < size) { ## k <- k + 1 ## if (U <= prob) { ## X[k] <- 1 ## U <- U / prob ## } else { ## X[k] <- 0 ## U <- (U - prob)/(1 - prob) ## } ## } ## return(sum(X)) ## } ## replicate(n, singlenumber(size, prob)) ## } ## ----eval=FALSE---------------------------------------------------- ## for (m in 1:100) { ## z <- (rbinom(20000, size = m, prob = 0.4) - m * 0.4) / ## sqrt(m * 0.4 * 0.6) ## qqnorm(z, ylim = c(-4, 4), main = paste("QQ-plot, m = ", m)) ## qqline(z) ## } ## ------------------------------------------------------------------ dpois(x = 3, lambda = 0.5) ## ----echo=FALSE---------------------------------------------------- set.seed(722) ## ------------------------------------------------------------------ rpois(10, 3.7) ## ----echo=FALSE---------------------------------------------------- set.seed(2307) # ensures that your output matches ours<<>>= ## ------------------------------------------------------------------ N <- rpois(1, 1.5 * 10) P <- runif(N, max = 10) sort(P) ## ----eval=FALSE---------------------------------------------------- ## poissonproc <- function() { ## N <- rpois(1, 1.5 * 10) ## P <- runif(N, max = 10) ## return(sum( 4 <= P & P <= 5 )) ## } ## counts <- replicate(10000, poissonproc()) ## qqplot(counts, rpois(10000, 1.5)) ## abline(0, 1) # the points lie reasonably close to this line ## ----eval=FALSE---------------------------------------------------- ## for (m in seq(1, 100, 2)) { ## z <- (rpois(20000, lambda = m) - m) / sqrt(m) ## qqnorm(z, ylim = c(-4, 4), main = "QQ-plot") ## qqline(z) ## mtext(bquote(lambda == .(m)), 3) # this creates a subtitle which ## # mixes mathematical and numerical notation ## } ## ------------------------------------------------------------------ lambda <- 2 regionlength <- 10 regionwidth <- 10 N <- rpois(1, lambda*regionlength*regionwidth) U1 <- runif(N, min=0, max=regionwidth) U2 <- runif(N, min=0, max=regionlength) ## ----eval=FALSE---------------------------------------------------- ## plot(U1, U2) ## ------------------------------------------------------------------ pexp(1, rate = 3) ## ----echo=FALSE---------------------------------------------------- set.seed(270987) ## ----echo = -3----------------------------------------------------- servicetimes <- rexp(10, rate = 3) servicetimes totalservice <- sum(servicetimes) ## ------------------------------------------------------------------ sum(servicetimes) ## ----echo=FALSE---------------------------------------------------- set.seed(8770) # ensures that your output matches ours ## ------------------------------------------------------------------ X <- rexp(25, rate = 1.5) cumsum(X) ## ------------------------------------------------------------------ qnorm(0.95, mean = 2.7, sd = 3.3) ## ----echo=FALSE---------------------------------------------------- set.seed(2730987) ## ------------------------------------------------------------------ rnorm(10, -3, 0.5) ## ----simhist,echo=2:4,fig.cap="Histogram of simulated values with the standard normal density (solid curve) and the rescaled normal density (dashed curve) overlaid."---- set.seed(8320) x <- rnorm(100000) # simulate from the standard normal x <- x[(0 < x) & (x < 3)] # reject all x's outside (0,3) hist(x, probability=TRUE) # show the simulated values curve(dnorm, 0, 3, add=TRUE) rescalednorm <- function(x) dnorm(x)/(pnorm(3)-pnorm(0)) curve(rescalednorm, 0, 3, add=TRUE, lty=2) ## ----boxmuller, fig.height=2, echo=-1, fig.cap="Histogram of a 10000 values of $\\mbox{e}^{-(Z_1^2+Z_2)^2/2}$ where $Z_1$ and $Z_2$ are simulated independent standard normal random variates.", cache=TRUE---- par(mar=c(3, 3, .75, .5)) Z1 <- rnorm(10000) Z2 <- rnorm(10000) hist(exp(-(Z1^2 + Z2^2)/2), main="", xlim=c(-1, 2)) ## ----Z1Z2, fig.align="center", fig.height=1.75, fig.width=2.5, echo=FALSE, fig.cap="Location of a pair of independent standard normal variates on a circle of given radius (i.e. the length of the segment A)."---- library(grid) grid.circle(r=.4) pushViewport(viewport(x=.5, y=.7, height=.4)) grid.xaxis(label=FALSE) upViewport() pushViewport(viewport(y=.5, x=.7, width=.4)) grid.yaxis(label=FALSE) upViewport() grid.lines(c(.5, .5 + .4*sin(.35)), c(.5, .5 + .4*cos(.35))) grid.text("A", x = c(.5 + .24*sin(.35)), y = c(.5 + .18*cos(.35))) pushViewport(viewport(x = .5 + .4*sin(.35), y = .5 + .4*cos(.35), width=.02, height=.02)) grid.circle(gp=gpar(fill="grey")) grid.text(expression(paste("(",Z[1], ",",Z[2],")")), x=2.5, y=2.5) ## ----eval=FALSE---------------------------------------------------- ## U <- runif(n) # generate n uniform[0,1] values ## D <- sqrt(-2*log(U)) # calculate the distance ## V <- runif(n, 0, 2*pi) # generate uniform[0,2pi] values ## Z1 <- D*cos(V) ## Z2 <- D*sin(V) ## ----boxmullereg, fig.height=1.75, echo=FALSE, fig.cap='Histograms of $Z_1$ and $Z_2$ variates obtained by the Box-M\\"{u}ller transformation method.'---- par(mfrow=c(1, 2), mar=c(3, 3, .75, .5)) n <- 100000; U <- runif(n) # generate n uniform[0,1] values D <- sqrt(-2*log(U)) # calculate the distance V <- runif(n)*2*pi # generate uniform[0,2pi] values Z1 <- D*sin(V); Z2 <- D*cos(V) hist(Z1, freq=FALSE, main=""); hist(Z2,freq=FALSE, main="") ## ------------------------------------------------------------------ RNGkind(normal.kind = "Box-Muller") ## ------------------------------------------------------------------ RNGkind(normal.kind = "default") ## ------------------------------------------------------------------ test <- function() for (i in 1:10) x <- rnorm(100000000) ## ------------------------------------------------------------------ X <- numeric(6) X[1] <- 100 results <- replicate(1000, { for (t in 1:5) X[t + 1] <- X[t] * exp(rnorm(1, mean = 0, sd = log(1.01))) X }) str(results) ## ------------------------------------------------------------------ table(apply(results, 2, function(x) all(x[2:6] > 100))) ## ----eval = FALSE-------------------------------------------------- ## sample(1:n, size = 1, prob = P[X[t], ]) ## ------------------------------------------------------------------ P <- matrix(c(0.99, 0.01, 0, 0.5, 0.4, 0.1, 0, 0.25, 0.75), 3, 3, byrow = TRUE) healthy <- numeric(10000) healthy[1] <- 1 for (t in 1:9999) healthy[t + 1] <- sample(1:3, size = 1, prob = P[healthy[t], ]) table(healthy) sick <- numeric(10000) sick[1] <- 3 for (t in 1:9999) sick[t + 1] <- sample(1:3, size = 1, prob = P[sick[t], ]) table(sick) ## ----echo=FALSE---------------------------------------------------- set.seed(1122) ## ------------------------------------------------------------------ u <- runif(100000) mean(u^4) ## ----echo=FALSE---------------------------------------------------- set.seed(1135) ## ------------------------------------------------------------------ u <- runif(100000, min = 2, max = 5) mean(sin(u))*(5-2) ## ----echo=FALSE---------------------------------------------------- set.seed(1172) ## ------------------------------------------------------------------ U <- runif(100000, min = 1, max = 7) V <- runif(100000, min = 3, max = 10) mean(sin(U - V))*42 ## ----echo=FALSE---------------------------------------------------- set.seed(1179) ## ------------------------------------------------------------------ X <- rexp(100000) mean( exp( -(X + 1)^2 ) / dexp(X) ) ## ----triangulardf,echo=FALSE, fig.width=5, fig.height=3, fig.cap="The graph of the triangular density function on $(0,2)$, together with a dashed rectangle in the right hand panel."---- par(mfrow=c(1,2)) curve((1-abs(1-x))*(x<2)*(x>0), from = -1, to = 3, ylab="1-|1-x|") curve((1-abs(1-x))*(x<2)*(x>0), from = -1, to = 3, ylab="1-|1-x|") lines(c(0,2,2,0,0),c(0,0,1,1,0), lty=2) ## ----eval=FALSE---------------------------------------------------- ## U1 <- runif(100000, max=2) ## U2 <- runif(100000) ## X <- U1[U2 < (1 - abs(1 - U1))] ## ----kdensity,echo=FALSE,fig.height=4,fig.cap="Density $f(x)$ (solid line) and $kg(x)$ (dashed line). The points are uniformly distributed below $f(x)$; those above $kg(x)$ (open dots) are rejected, while those below (solid dots) are accepted. The tick marks show the output values of $X$."---- set.seed(1458) f <- function(x) dnorm(x, sd=0.4) g <- function(x) 0.3*(0.3*dnorm(x, -0.4, 0.2) + 0.7*dnorm(x, 0.4, 0.2)) curve(f, from=-1, to=1, ylim=c(0, 1)) curve(g, from=-1, to=1, add=TRUE, lty=2) u <- runif(50) x <- rnorm(50, sd=0.4) points(x, u*f(x), pch=ifelse(u*f(x) < g(x), 16, 1), cex=0.6) rug(x[u*f(x) < g(x)]) ## ----echo=FALSE---------------------------------------------------- set.seed(1) ## ------------------------------------------------------------------ kg <- function(x) 0.5*exp(-(x^1.5)) X <- rexp(100000) U <- runif(100000) # accept only those X X <- X[ U*dexp(X) < kg(X) ] # for which Uf(X) < kg(X) ## ----expexample,echo=1,fig.cap="Histogram of a sample from $g(x) = C e^{-x^{1.5}}$. The approximate density $g(x)$ is shown with the solid line, and $f(x)/k$ is shown dashed."---- hist(X, freq = FALSE, breaks="Scott") k <- length(X) / 100000 g <- function(x) kg(x) / k curve(g, from=0, to=max(X), add=TRUE) fbyk <- function(x) dexp(x) / k curve(fbyk, from=0, to=max(X), add=TRUE, lty=2) ## ----eval=FALSE---------------------------------------------------- ## rannorm <- function(n, mean = 0, sd = 1){ ## singlenumber <- function() { ## repeat{ ## U <- runif(1) ## U2 <- sign(runif(1, min = -1)) # value is +1 or -1 ## Y <- rexp(1) * U2 # Y is a double exponential r.v. ## if (U < dnorm(Y) / exp(-abs(Y))) break ## } ## return(Y) ## } ## replicate(n, singlenumber()) * sd + mean ## } ## ----eval=FALSE---------------------------------------------------- ## probs <- c(0.2, 0.3, 0.1, 0.15, 0.05, 0.2) ## randiscrete1 <- function(n, probs) { ## cumprobs <- cumsum(probs) ## singlenumber <- function() { ## x <- runif(1) ## N <- sum(x > cumprobs) ## N ## } ## replicate(n, singlenumber()) ## } ## ----eval=FALSE---------------------------------------------------- ## randiscrete2 <- function(n, probs) { ## singlenumber <- function() { ## repeat{ ## U <- runif(2, min=c(-0.5, 0), max=c(length(probs) - 0.5, ## max(probs))) ## if (U[2] < probs[round(U[1]) + 1]) break ## } ## return(round(U[1])) ## } ## replicate(n, singlenumber()) ## } ## ----eval=FALSE---------------------------------------------------- ## set.seed(91626) ## probs <- runif(100) ## probs <- probs / sum(probs) ## ------------------------------------------------------------------ rbexp <- function(n, theta = 2) { nremaining <- n rejectStep <- function(n, theta) { X <- rexp(n, rate = theta - 1) Y <- rexp(n, rate = theta - 1) V <- runif(n)*dexp(X, rate = theta - 1)* dexp(Y, rate = theta - 1)/(theta-1)^2 ACCEPT <- V <= exp(-(X*Y + theta*(X+Y))) X <- X[ACCEPT]; Y <- Y[ACCEPT] cbind(X, Y) } XY <- cbind(c(), c()) while (nremaining > 0) { XY <- rbind(XY, rejectStep(nremaining, theta = theta)) nObs <- nrow(XY) nremaining <- n - nObs } XY[1:n, ] } ## ------------------------------------------------------------------ rbexp(4, 2) ## ------------------------------------------------------------------ X <- rexp(100000) W <- g(X)/dexp(X) mean <- weighted.mean(X, W) mean weighted.mean( (X - mean)^2, W ) # The variance as E[ (X - Xbar)^2 ] ## ----bayesian, fig.cap="Scaled prior and posterior densities from coin tipping experiment."---- theta <- seq(0, 1, length.out = 200) prior <- dnorm(theta, mean = 0.5, sd = 0.1) prior <- prior/max(prior) n <- 200 x <- 54 posterior <- prior * theta^x * (1 - theta)^(n - x) posterior <- posterior/max(posterior) plot(theta, prior, type = "l", col = "blue", xlab = expression(theta), ylab = "Scaled Density") lines(theta, posterior, col = "green") legend("topright", legend = c("Prior", "Posterior"), lty = 1, col = c("blue", "green")) ## ----metropolis, fig.cap = "One hundred steps of the Metropolis algorithm targeting the posterior density.", echo = -1---- set.seed(155) logg <- function(theta) { dnorm(theta, mean = 0.5, sd = 0.1, log = TRUE) + x*log(theta) + (n - x)*log(1-theta) } N <- 100 xt <- numeric(N + 1) xt[1] <- 0.5 sd <- 0.1 steps <- rnorm(N, 0, sd) u <- runif(N) for (i in 1:N) { y <- xt[i] + steps[i] if (y <= 0 || y >= 1 || log(u[i]) > logg(y) - logg(xt[i])) xt[i + 1] <- xt[i] else xt[i + 1] <- y } plot(0:N, xt, type = "l", ylab = expression(x[t]), xlab = "t") ## ----echo = FALSE-------------------------------------------------- N <- 10000 xt <- numeric(N + 1) xt[1] <- 0.5 sd <- 0.1 steps <- rnorm(N, 0, sd) u <- runif(N) for (i in 1:N) { y <- xt[i] + steps[i] if (y <= 0 || y >= 1 || log(u[i]) > logg(y) - logg(xt[i])) xt[i + 1] <- xt[i] else xt[i + 1] <- y } ## ------------------------------------------------------------------ library(NLP) library(janeaustenr) chap1 <- as.String(prideprejudice[10:120]) ## ------------------------------------------------------------------ tokens <- chap1[whitespace_tokenizer(chap1)] tokens[1:12] ## ----eval = FALSE-------------------------------------------------- ## tab <- table(tokens) ## paste(sample(names(tab), 20, replace = TRUE, prob = tab), ## collapse = " ") ## ----eval=FALSE---------------------------------------------------- ## N <- rpois(1, 15) # N cluster centers ## U1 <- runif(N, min = 0, max = 1000) ## U2 <- runif(N, min = 0, max = 1000) ## # (U1, U2) are the coordinates of the cluster centers ## ## M <- rpois(N, 40) # Numbers of points in each cluster ## x <- vector("list", N) # create a list with N empty components ## y <- vector("list", N) ## for (i in 1:N) { ## x[[i]] <- rnorm(M[i], mean = U1[i], sd = 20) ## y[[i]] <- rnorm(M[i], mean = U2[i], sd = 20) ## } ## clusters <- data.frame(x = unlist(x), y = unlist(y)) ## plot(clusters) # visualize the result