## ----include=FALSE------------------------------------------------------ library(knitr) opts_chunk$set( tidy=FALSE,fig.width=6,fig.height=5,fig.path='figs/simulation' ) ## ----echo=FALSE, results='asis'----------------------------------------- options(width=70) patchDVI::useknitr() ## ----------------------------------------------------------------------- random.number <- numeric(50) # this will store the # pseudorandom output random.seed <- 27218 for (j in 1:50) { random.seed <- (171 * random.seed) %% 30269 random.number[j] <- random.seed / 30269 } ## ----------------------------------------------------------------------- random.number ## ----echo=FALSE--------------------------------------------------------- set.seed(31415) ## ----------------------------------------------------------------------- runif(5) runif(10, min = -3, max = -1) ## ----------------------------------------------------------------------- set.seed(32789) # this ensures that your output will match ours runif(5) ## ----eval=FALSE--------------------------------------------------------- ## U2 <- runif(1000) ## U1 <- runif(1000) ## X <- U1 + U2 ## Y <- U1 - U2 ## plot(Y ~ X) ## cor(X,Y) # this calculates the sample correlation ## ----------------------------------------------------------------------- set.seed(23207) # use this to obtain our output guesses <- runif(20) correct.answers <- (guesses < 0.2) correct.answers ## ----------------------------------------------------------------------- table(correct.answers) ## ----------------------------------------------------------------------- dbinom(x = 4, size = 6, prob = 0.5) ## ----------------------------------------------------------------------- pbinom(4, 6, 0.5) ## ----------------------------------------------------------------------- qbinom(0.89, 6, 0.5) ## ----echo=FALSE--------------------------------------------------------- set.seed(203) ## ----------------------------------------------------------------------- defectives <- rbinom(24, 15, 0.1) defectives any(defectives > 5) ## ----eval=FALSE--------------------------------------------------------- ## ranbin <- function(n, size, prob) { ## cumpois <- pbinom(0:(size - 1), size, prob) ## singlenumber <- function() { ## x <- runif(1) ## N <- sum(x > cumpois) ## N ## } ## replicate(n, singlenumber()) ## } ## ----eval=FALSE--------------------------------------------------------- ## ranbin2 <- function(n, size, prob) { ## singlenumber <- function(size, prob) { ## x <- runif(size) ## N <- sum(x < prob) ## N ## } ## replicate(n, singlenumber(size, prob)) ## } ## ----eval=FALSE--------------------------------------------------------- ## ranbin3 <- function(n, size, prob) { ## singlenumber <- function(size, prob) { ## k <- 0 ## U <- runif(1) ## X <- numeric(size) ## while (k < size) { ## k <- k + 1 ## if (U <= prob) { ## X[k] <- 1 ## U <- U / prob ## } else { ## X[k] <- 0 ## U <- (U - prob)/(1 - prob) ## } ## } ## return(sum(X)) ## } ## replicate(n, singlenumber(size, prob)) ## } ## ----eval=FALSE--------------------------------------------------------- ## for (m in 1:100) { ## z <- (rbinom(20000, size = m, prob = 0.4) - m * 0.4) / ## sqrt(m * 0.4 * 0.6) ## qqnorm(z, ylim = c(-4, 4), main = paste("QQ-plot, m = ", m)) ## qqline(z) ## } ## ----------------------------------------------------------------------- dpois(x = 3, lambda = 0.5) ## ----echo=FALSE--------------------------------------------------------- set.seed(722) ## ----------------------------------------------------------------------- rpois(10, 3.7) ## ----echo=FALSE--------------------------------------------------------- set.seed(2307) # ensures that your output matches ours<<>>= ## ----------------------------------------------------------------------- N <- rpois(1, 1.5 * 10) P <- runif(N, max = 10) sort(P) ## ----eval=FALSE--------------------------------------------------------- ## poissonproc <- function() { ## N <- rpois(1, 1.5 * 10) ## P <- runif(N, max = 10) ## return(sum( 4 <= P & P <= 5 )) ## } ## counts <- replicate(10000, poissonproc()) ## qqplot(counts, rpois(10000, 1.5)) ## abline(0, 1) # the points lie reasonably close to this line ## ----eval=FALSE--------------------------------------------------------- ## for (m in seq(1, 120, 2)) { ## z <- (rpois(20000, lambda = m) - m) / sqrt(m) ## qqnorm(z, ylim = c(-4, 4), main = "QQ-plot") ## qqline(z) ## mtext(bquote(lambda == .(m)), 3) # this creates a subtitle which ## # mixes mathematical and numerical notation ## } ## ----------------------------------------------------------------------- lambda <- 2 regionlength <- 10 regionwidth <- 10 N <- rpois(1, lambda*regionlength*regionwidth) U1 <- runif(N, min=0, max=regionwidth) U2 <- runif(N, min=0, max=regionlength) ## ----eval=FALSE--------------------------------------------------------- ## plot(U1, U2) ## ----------------------------------------------------------------------- pexp(1, rate = 3) ## ----echo=FALSE--------------------------------------------------------- set.seed(270987) ## ----------------------------------------------------------------------- servicetimes <- rexp(10, rate = 3) servicetimes ## ----------------------------------------------------------------------- sum(servicetimes) ## ----echo=FALSE--------------------------------------------------------- set.seed(8770) # ensures that your output matches ours ## ----------------------------------------------------------------------- X <- rexp(25, rate = 1.5) cumsum(X) ## ----------------------------------------------------------------------- qnorm(0.95, mean = 2.7, sd = 3.3) ## ----echo=FALSE--------------------------------------------------------- set.seed(2730987) ## ----------------------------------------------------------------------- rnorm(10, -3, 0.5) ## ----simhist,echo=2:4,fig.cap="Histogram of simulated values with the standard normal density (solid curve) and the rescaled normal density (dashed curve) overlaid."---- set.seed(8320) x <- rnorm(100000) # simulate from the standard normal x <- x[(0 < x) & (x < 3)] # reject all x's outside (0,3) hist(x, probability=TRUE) # show the simulated values curve(dnorm, 0, 3, add=TRUE) rescalednorm <- function(x) dnorm(x)/(pnorm(3)-pnorm(0)) curve(rescalednorm, 0, 3, add=TRUE, lty=2) ## ----------------------------------------------------------------------- X <- numeric(6) X[1] <- 100 results <- replicate(1000, { for (t in 1:5) X[t + 1] <- X[t] * exp(rnorm(1, mean = 0, sd = log(1.01))) X }) str(results) ## ----------------------------------------------------------------------- table(apply(results, 2, function(x) all(x[2:6] > 100))) ## ----eval = FALSE------------------------------------------------------- ## sample(1:n, size = 1, prob = P[X[t], ]) ## ----------------------------------------------------------------------- P <- matrix(c(0.99, 0.01, 0, 0.5, 0.4, 0.1, 0, 0.25, 0.75), 3, 3, byrow = TRUE) healthy <- numeric(10000) healthy[1] <- 1 for (t in 1:9999) healthy[t + 1] <- sample(1:3, size = 1, prob = P[healthy[t], ]) table(healthy) sick <- numeric(10000) sick[1] <- 3 for (t in 1:9999) sick[t + 1] <- sample(1:3, size = 1, prob = P[sick[t], ]) table(sick) ## ----echo=FALSE, comment=""--------------------------------------------- cat(readLines("ode.ly"), sep="\n") ## ----------------------------------------------------------------------- notes <- scan("ode.ly", skip = 3, nlines = 4, what = character()) ## ----------------------------------------------------------------------- random <- sample(notes, 63, replace = TRUE) writeLilypond <- function(notes, filename = "") { cat(c("\\score{\n {\n \\tempo 4 = 120", paste(" ", strwrap(paste(notes, collapse = " "))), " }\n \\layout{}\n \\midi{}\n}"), sep = "\n", file = filename) } writeLilypond(random, filename = "indep.txt") ## ----------------------------------------------------------------------- countTransitions <- function(notes, n) { len <- length(notes) # Put special markers at the start and end notes <- c(rep("START", n), notes, "STOP") result <- list(n = n) # Loop over the data + STOP, counting symbols for (i in 1:(len+1)) { index <- notes[ seq_len(n+1) + i - 1 ] # We only store positive results, so if we've never seen this # transition before, it will be NULL; insert an empty list() for (j in seq_len(n)) { if (is.null(result[[ index[1:j] ]])) result[[ index[1:j] ]] <- list() } prevcount <- result[[ index ]] if (is.null(prevcount)) result[[ index ]] <- 1 else result[[ index ]] <- result[[ index ]] + 1 } result } ## ----------------------------------------------------------------------- generateTransitions <- function(counts, len, filename = "") { # Initialize with START n <- counts$n len <- len + n notes <- rep("START", len) i <- n while (i < len && notes[i] != "STOP" ) { i <- i + 1 index <- notes[i - n:1] distn <- counts[[ index ]] notes[i] <- sample(names(distn), 1, prob = distn) } # Now leave off START and STOP, and return the result notes[ !(notes %in% c("START", "STOP")) ] } ## ----echo=-1, comment = ""---------------------------------------------- set.seed(123) counts <- countTransitions(notes, 2) writeLilypond(generateTransitions(counts, 100)) ## ----echo=FALSE--------------------------------------------------------- set.seed(1122) ## ----------------------------------------------------------------------- u <- runif(100000) mean(u^4) ## ----echo=FALSE--------------------------------------------------------- set.seed(1135) ## ----------------------------------------------------------------------- u <- runif(100000, min = 2, max = 5) mean(sin(u))*(5-2) ## ----echo=FALSE--------------------------------------------------------- set.seed(1172) ## ----------------------------------------------------------------------- U <- runif(100000, min = 1, max = 7) V <- runif(100000, min = 3, max = 10) mean(sin(U - V))*42 ## ----echo=FALSE--------------------------------------------------------- set.seed(1179) ## ----------------------------------------------------------------------- X <- rexp(100000) mean( exp( -(X + 1)^2 ) / dexp(X) ) ## ----triangulardf,echo=FALSE, fig.width=5, fig.height=3, fig.cap="The graph of the triangular density function on $(0,2)$, together with a dashed rectangle in the right hand panel."---- par(mfrow=c(1,2)) curve((1-abs(1-x))*(x<2)*(x>0), from = -1, to = 3, ylab="1-|1-x|") curve((1-abs(1-x))*(x<2)*(x>0), from = -1, to = 3, ylab="1-|1-x|") lines(c(0,2,2,0,0),c(0,0,1,1,0), lty=2) ## ----eval=FALSE--------------------------------------------------------- ## U1 <- runif(100000, max=2) ## U2 <- runif(100000) ## X <- U1[U2 < (1 - abs(1 - U1))] ## ----kdensity,echo=FALSE,fig.height=4,fig.cap="Density $f(x)$ (solid line) and $kg(x)$ (dashed line). The points are uniformly distributed below $f(x)$; those above $kg(x)$ (open dots) are rejected, while those below (solid dots) are accepted. The tick marks show the output values of $X$."---- set.seed(1458) f <- function(x) dnorm(x, sd=0.4) g <- function(x) 0.3*(0.3*dnorm(x, -0.4, 0.2) + 0.7*dnorm(x, 0.4, 0.2)) curve(f, from=-1, to=1, ylim=c(0, 1)) curve(g, from=-1, to=1, add=TRUE, lty=2) u <- runif(50) x <- rnorm(50, sd=0.4) points(x, u*f(x), pch=ifelse(u*f(x) < g(x), 16, 1), cex=0.6) rug(x[u*f(x) < g(x)]) ## ----echo=FALSE--------------------------------------------------------- set.seed(1) ## ----------------------------------------------------------------------- kg <- function(x) 0.5*exp(-(x^1.5)) X <- rexp(100000) U <- runif(100000) # accept only those X X <- X[ U*dexp(X) < kg(X) ] # for which Uf(X) < kg(X) ## ----expexample,echo=1,fig.cap="Histogram of a sample from $g(x) = C e^{-x^{1.5}}$. The approximate density $g(x)$ is shown with the solid line, and $f(x)/k$ is shown dashed."---- hist(X, freq = FALSE, breaks="Scott") k <- length(X) / 100000 g <- function(x) kg(x) / k curve(g, from=0, to=max(X), add=TRUE) fbyk <- function(x) dexp(x) / k curve(fbyk, from=0, to=max(X), add=TRUE, lty=2) ## ----------------------------------------------------------------------- X <- rexp(100000) W <- g(X)/dexp(X) mean <- weighted.mean(X, W) mean weighted.mean( (X - mean)^2, W ) # The variance as E[ (X - Xbar)^2 ] ## ----eval=FALSE--------------------------------------------------------- ## rannorm <- function(n, mean = 0, sd = 1){ ## singlenumber <- function() { ## repeat{ ## U <- runif(1) ## U2 <- sign(runif(1, min = -1)) # value is +1 or -1 ## Y <- rexp(1) * U2 # Y is a double exponential r.v. ## if (U < dnorm(Y) / exp(-abs(Y))) break ## } ## return(Y) ## } ## replicate(n, singlenumber()) * sd + mean ## } ## ----eval=FALSE--------------------------------------------------------- ## probs <- c(0.2, 0.3, 0.1, 0.15, 0.05, 0.2) ## randiscrete1 <- function(n, probs) { ## cumprobs <- cumsum(probs) ## singlenumber <- function() { ## x <- runif(1) ## N <- sum(x > cumprobs) ## N ## } ## replicate(n, singlenumber()) ## } ## ----eval=FALSE--------------------------------------------------------- ## randiscrete2 <- function(n, probs) { ## singlenumber <- function() { ## repeat{ ## U <- runif(2, min=c(-0.5, 0), max=c(length(probs) - 0.5, ## max(probs))) ## if (U[2] < probs[round(U[1]) + 1]) break ## } ## return(round(U[1])) ## } ## replicate(n, singlenumber()) ## } ## ----eval=FALSE--------------------------------------------------------- ## set.seed(91626) ## probs <- runif(100) ## probs <- probs / sum(probs) ## ----eval=FALSE--------------------------------------------------------- ## N <- rpois(1, 15) # N cluster centers ## U1 <- runif(N, min = 0, max = 1000) ## U2 <- runif(N, min = 0, max = 1000) ## # (U1, U2) are the coordinates of the cluster centers ## ## M <- rpois(N, 40) # Numbers of points in each cluster ## x <- vector("list", N) # create a list with N empty components ## y <- vector("list", N) ## for (i in 1:N) { ## x[[i]] <- rnorm(M[i], mean = U1[i], sd = 20) ## y[[i]] <- rnorm(M[i], mean = U2[i], sd = 20) ## } ## clusters <- data.frame(x = unlist(x), y = unlist(y)) ## plot(clusters) # visualize the result