# R macros for Math 324, originally written spring 2005. rdist <- function(num.stat, stat, dist, ...) { # Random generation for a statistic of a distribution. # num.stat is the number of statistics `stat' to be generated. # Some choices for `stat': mean, median, sum, sd, var, min, max. # Some choices for `dist': # rnorm, rexp, rcauchy, rlaplace, rt, rchisq, rf, runif, rbinom, rgeo, rpois. # ...: optional arguments to `dist' # The first argument (`n') of `dist' is the number of observations # used to compute each stat, and is necessary. x <- NA for (i in 1:num.stat) { x <- c(x, match.fun(stat)(match.fun(dist)(...))) } x[2:length(x)] } # Garren, 2005 plot.dist.old <- function(xmin, xmax, dist, ...) { # Plots a probability density function (pdf) or # a cumulative distribution function (cdf). # `xmin' and `xmax' represent the limiting values on the x-axis. # When plotting the pdf, some choices for `dist' are # dnorm, dexp, dcauchy, dlaplace, dt, dt.nc, dchisq, df, df.nc, dunif, dbinom, dgeo, dpois. # When plotting the cdf, some choices for `dist' are # pnorm, pexp, pcauchy, plaplace, pt, pchisq, pf, punif, pbinom, pgeo, ppois. # ...: optional arguments to `dist', EXCLUDING the first argument. # May use function `plot.dist' instead of `plot.dist.old'. # The only advantage of `plot.dist.old' over `plot.dist' is that # `plot.dist.old' is not limited to five parameters. num.grid.points <- 10001; options(warn= -1) x <- xmin + (xmax-xmin)*(0:num.grid.points)/num.grid.points discrete.pdf <- sum( match.fun(dist)(x[floor(x)!=ceiling(x)], ...) )==0 if (discrete.pdf) { x <- ceiling(xmin):floor(xmax) plot(x, (match.fun(dist)(x, ...)), type="h", ylab="") } else { plot(x, (match.fun(dist)(x, ...)), pch=20, cex=0.05, ylab="") } } # Garren, 2005 plot.dist <- function(xmin, xmax, distA, parmA1=NULL, parmA2=NULL, distB=NULL, parmB1=NULL, parmB2=NULL, distC=NULL, parmC1=NULL, parmC2=NULL, col=c("black","red","darkgreen")) { # Plots one, two, or three functions. # `xmin' and `xmax' represent the limiting values on the x-axis. # When plotting the pdf, some choices for `distA', `distB' and `distC' are # dnorm, dexp, dcauchy, dlaplace, dt, dt.nc, dchisq, df, df.nc, dunif, dbinom, dgeo, dpois. # When plotting the cdf, some choices for `distA', `distB' and `distC' are # pnorm, pexp, pcauchy, plaplace, pt, pchisq, pf, punif, pbinom, pgeo, ppois. # `parmA1' and `parmA2' are the parameters of `distA' (excluding the first argument), # where NULL is the default value; likewise for the other two functions. # If `parmA2', `parmB2' or `parmC2' does not exist, keep the value equal to NULL. # If `parmA1' is NULL, then `parmA2' is automatically set equal to NULL; # likewise for the other two functions. # `col' may be a scalar or vector, and specifies the colors of the plotted functions. # Type `colors()' for selections. # If only one function is to be plotted, then use `distA'. # If a distribution, say `distA', needs more than 2 parameters, then assign `parmA1' # to be a vector of all (as many as 5) parameters, and keep `parmA2' equal to NULL. # For example, to plot a noncentral F pdf with 7 and 9 df and ncp=2.5, type: # > plot.dist( 0, 8, df.nc, c(7,9,2.5) ) num.grid.points <- 10001; options(warn= -1) if (xmin>xmax) {temp <- xmin; xmin <- xmax; xmax <- temp} x <- xmin + (xmax-xmin)*(0:num.grid.points)/num.grid.points x0 <- ceiling(xmin):floor(xmax) if (length(col)==1) {col <- rep(col, 3)}; if (length(col)==2) {col <- c(col, col[1])} fA <- function(z) { if (length(parmA1)>1) { if (length(parmA1)==2) {return(match.fun(distA)(z, parmA1[1], parmA1[2]))} if (length(parmA1)==3) {return(match.fun(distA)(z, parmA1[1], parmA1[2], parmA1[3]))} if (length(parmA1)==4) {return(match.fun(distA)(z, parmA1[1], parmA1[2], parmA1[3], parmA1[4]))} if (length(parmA1)==5) {return(match.fun(distA)(z, parmA1[1], parmA1[2], parmA1[3], parmA1[4], parmA1[5]))} } else { if (is.null(parmA1)) { return(match.fun(distA)(z)) } if (!is.null(parmA1) & is.null(parmA2)) { return(match.fun(distA)(z, parmA1)) } if (!is.null(parmA1) & !is.null(parmA2)) {return(match.fun(distA)(z, parmA1, parmA2))} } } discrete.pdfA <- sum( fA(x[floor(x)!=ceiling(x)]) )==0 ; ylim1 <- NULL discrete.pdfB <- F; discrete.pdfC <- F if (!is.null(distB)) { fB <- function(z) { if (length(parmB1)>1) { if (length(parmB1)==2) {return(match.fun(distB)(z, parmB1[1], parmB1[2]))} if (length(parmB1)==3) {return(match.fun(distB)(z, parmB1[1], parmB1[2], parmB1[3]))} if (length(parmB1)==4) {return(match.fun(distB)(z, parmB1[1], parmB1[2], parmB1[3], parmB1[4]))} if (length(parmB1)==5) {return(match.fun(distB)(z, parmB1[1], parmB1[2], parmB1[3], parmB1[4], parmB1[5]))} } else { if (is.null(parmB1)) { return(match.fun(distB)(z)) } if (!is.null(parmB1) & is.null(parmB2)) { return(match.fun(distB)(z, parmB1)) } if (!is.null(parmB1) & !is.null(parmB2)) {return(match.fun(distB)(z, parmB1, parmB2))} } } discrete.pdfB <- sum( fB(x[floor(x)!=ceiling(x)]) )==0 } if (!is.null(distC)) { fC <- function(z) { if (length(parmC1)>1) { if (length(parmC1)==2) {return(match.fun(distC)(z, parmC1[1], parmC1[2]))} if (length(parmC1)==3) {return(match.fun(distC)(z, parmC1[1], parmC1[2], parmC1[3]))} if (length(parmC1)==4) {return(match.fun(distC)(z, parmC1[1], parmC1[2], parmC1[3], parmC1[4]))} if (length(parmC1)==5) {return(match.fun(distC)(z, parmC1[1], parmC1[2], parmC1[3], parmC1[4], parmC1[5]))} } else { if (is.null(parmC1)) { return(match.fun(distC)(z)) } if (!is.null(parmC1) & is.null(parmC2)) { return(match.fun(distC)(z, parmC1)) } if (!is.null(parmC1) & !is.null(parmC2)) {return(match.fun(distC)(z, parmC1, parmC2))} } } discrete.pdfC <- sum( fC(x[floor(x)!=ceiling(x)]) )==0 } if (!is.null(distB) & is.null(distC)) { ylim1 <- c( min(c(fA(x),fB(x),fA(x0),fB(x0))), max(c(fA(x),fB(x),fA(x0),fB(x0))) ) } if (is.null(distB) & !is.null(distC)) { ylim1 <- c( min(c(fA(x),fC(x),fA(x0),fC(x0))), max(c(fA(x),fC(x),fA(x0),fC(x0))) ) } if (!is.null(distB) & !is.null(distC)) { ylim1 <- c( min(c(fA(x),fB(x),fC(x),fA(x0),fB(x0),fC(x0))), max(c(fA(x),fB(x),fC(x),fA(x0),fB(x0),fC(x0))) ) } if (discrete.pdfA) { plot(x0, fA(x0), ylim=ylim1, type="h", xlab="X", ylab="", col=col[1]) } else { plot(x, fA(x), ylim=ylim1, type="p", pch=20, cex=0.05, xlab="X", ylab="", col=col[1]) } if (discrete.pdfB) { curve(fB, x0[1], x0[length(x0)], length(x0), add=T, type="h", col=col[2]) } if (!is.null(distB) & !discrete.pdfB) { curve(fB, xmin, xmax, num.grid.points, add=T, type="p", pch=20, cex=0.05, col=col[2]) } if (discrete.pdfC) { curve(fC, x0[1], x0[length(x0)], length(x0), add=T, type="h", col=col[3]) } if (!is.null(distC) & !discrete.pdfC) { curve(fC, xmin, xmax, num.grid.points, add=T, type="p", pch=20, cex=0.05, col=col[3]) } if (discrete.pdfA & discrete.pdfB & !discrete.pdfC) { x1 <- x0[ fA(x0) > fB(x0) ]; x2 <- x0[ fA(x0) <= fB(x0) ] curve(fA, x1[1], x1[length(x1)], length(x1), add=T, type="h", col=col[1]) curve(fB, x1[1], x1[length(x1)], length(x1), add=T, type="h", col=col[2]) curve(fB, x2[1], x2[length(x2)], length(x2), add=T, type="h", col=col[2]) curve(fA, x2[1], x2[length(x2)], length(x2), add=T, type="h", col=col[1]) } if (discrete.pdfA & !discrete.pdfB & discrete.pdfC) { x1 <- x0[ fA(x0) > fC(x0) ]; x2 <- x0[ fA(x0) <= fC(x0) ] curve(fA, x1[1], x1[length(x1)], length(x1), add=T, type="h", col=col[1]) curve(fC, x1[1], x1[length(x1)], length(x1), add=T, type="h", col=col[3]) curve(fC, x2[1], x2[length(x2)], length(x2), add=T, type="h", col=col[3]) curve(fA, x2[1], x2[length(x2)], length(x2), add=T, type="h", col=col[1]) } if (!discrete.pdfA & discrete.pdfB & discrete.pdfC) { x1 <- x0[ fB(x0) > fC(x0) ]; x2 <- x0[ fB(x0) <= fC(x0) ] curve(fB, x1[1], x1[length(x1)], length(x1), add=T, type="h", col=col[2]) curve(fC, x1[1], x1[length(x1)], length(x1), add=T, type="h", col=col[3]) curve(fC, x2[1], x2[length(x2)], length(x2), add=T, type="h", col=col[3]) curve(fB, x2[1], x2[length(x2)], length(x2), add=T, type="h", col=col[2]) } if (discrete.pdfA & discrete.pdfB & discrete.pdfC) { x1 <- x0[ fA(x0) >= fB(x0) & fB(x0) >= fC(x0) ] x2 <- x0[ fA(x0) >= fC(x0) & fC(x0) >= fB(x0) ] x3 <- x0[ fB(x0) >= fA(x0) & fA(x0) >= fC(x0) ] x4 <- x0[ fB(x0) >= fC(x0) & fC(x0) >= fA(x0) ] x5 <- x0[ fC(x0) >= fA(x0) & fA(x0) >= fB(x0) ] x6 <- x0[ fC(x0) >= fB(x0) & fB(x0) >= fA(x0) ] curve(fA, x1[1], x1[length(x1)], length(x1), add=T, type="h", col=col[1]) curve(fB, x1[1], x1[length(x1)], length(x1), add=T, type="h", col=col[2]) curve(fC, x1[1], x1[length(x1)], length(x1), add=T, type="h", col=col[3]) curve(fA, x2[1], x2[length(x2)], length(x2), add=T, type="h", col=col[1]) curve(fC, x2[1], x2[length(x2)], length(x2), add=T, type="h", col=col[3]) curve(fB, x2[1], x2[length(x2)], length(x2), add=T, type="h", col=col[2]) curve(fB, x3[1], x3[length(x3)], length(x3), add=T, type="h", col=col[2]) curve(fA, x3[1], x3[length(x3)], length(x3), add=T, type="h", col=col[1]) curve(fC, x3[1], x3[length(x3)], length(x3), add=T, type="h", col=col[3]) curve(fB, x4[1], x4[length(x4)], length(x4), add=T, type="h", col=col[2]) curve(fC, x4[1], x4[length(x4)], length(x4), add=T, type="h", col=col[3]) curve(fA, x4[1], x4[length(x4)], length(x4), add=T, type="h", col=col[1]) curve(fC, x5[1], x5[length(x5)], length(x5), add=T, type="h", col=col[3]) curve(fA, x5[1], x5[length(x5)], length(x5), add=T, type="h", col=col[1]) curve(fB, x5[1], x5[length(x5)], length(x5), add=T, type="h", col=col[2]) curve(fC, x6[1], x6[length(x6)], length(x6), add=T, type="h", col=col[3]) curve(fB, x6[1], x6[length(x6)], length(x6), add=T, type="h", col=col[2]) curve(fA, x6[1], x6[length(x6)], length(x6), add=T, type="h", col=col[1]) } if (!is.null(distC) & !discrete.pdfC) { curve(fC, xmin, xmax, num.grid.points, add=T, type="p", pch=20, cex=0.05, col=col[3]) } if (!is.null(distB) & !discrete.pdfB) { curve(fB, xmin, xmax, num.grid.points, add=T, type="p", pch=20, cex=0.05, col=col[2]) } if (!is.null(distA) & !discrete.pdfA) { curve(fA, xmin, xmax, num.grid.points, add=T, type="p", pch=20, cex=0.05, col=col[1]) } } # Garren, 2005 trunc.hist <- function(xmin, xmax, x, ...) { # Produces a truncated histogram. # When constructing the histogram, exclude values # outside the domain from `xmin' to `xmax'. # `x' is the original data set. # ...: optional arguments to `hist'. y <- x[ x>xmin & x shade2.dist( 0, 8, 6, T, 'df.nc', c(7,9,2.5) ) # To avoid ambiguity with discrete distributions, avoid boarder values with `xshade'. # For example, if X ~ Binomial(n=10, p=0.4), then to graph P(X>2) type: # > shade2.dist( 0, 10, 2.5, T, 'dbinom', 10, 0.4 ) num.grid.points <- 10001; col <- c("black", "red"); ylimchisq1 <- 1.2 options(warn = -1); if (substring(ddist,1,1)!="d") stop("`ddist' must be a pdf.") temp.vec <- get.min.max(xmin, xmax, ddist, parm1, parm2) if (is.null(xshade)) {xshade <- temp.vec$medianA} if (length(xshade)!=1 & length(xshade)!=2) stop("xshade must have length 1 or 2.") if (length(xshade)==2) {if (xshade[1]>xshade[2]) {temp <- xshade[1]; xshade[1] <- xshade[2]; xshade[2] <- temp}} if (is.null(xmin) || is.null(xmax)) { xmin <- temp.vec$xmin ; xmax <- temp.vec$xmax xmin.temp <- min( xmin, xshade[1]-(xmax-xmin)*0.1 ) xmax.temp <- max( xmax, xshade[length(xshade)]+(xmax-xmin)*0.1 ) xmin <- xmin.temp ; xmax <- xmax.temp } else { if (xmin>xmax) { temp <- xmin; xmin <- xmax; xmax <- temp } } pdist <- paste("p",substring(ddist,2),sep="") if (pdist=="pf.nc") pdist <- "pf" ; if (pdist=="pt.nc") pdist <- "pt" f <- function(z) { if (length(parm1)>1) { if (length(parm1)==2) {return(match.fun(ddist)(z, parm1[1], parm1[2]))} if (length(parm1)==3) {return(match.fun(ddist)(z, parm1[1], parm1[2], parm1[3]))} if (length(parm1)==4) {return(match.fun(ddist)(z, parm1[1], parm1[2], parm1[3], parm1[4]))} if (length(parm1)==5) {return(match.fun(ddist)(z, parm1[1], parm1[2], parm1[3], parm1[4], parm1[5]))} } else { if (is.null(parm1)) { return(match.fun(ddist)(z)) } if (!is.null(parm1) & is.null(parm2)) { return(match.fun(ddist)(z, parm1)) } if (!is.null(parm1) & !is.null(parm2)) {return(match.fun(ddist)(z, parm1, parm2))} } } g <- function(z) { if (length(parm1)>1) { if (length(parm1)==2) {return(match.fun(pdist)(z, parm1[1], parm1[2]))} if (length(parm1)==3) {return(match.fun(pdist)(z, parm1[1], parm1[2], parm1[3]))} if (length(parm1)==4) {return(match.fun(pdist)(z, parm1[1], parm1[2], parm1[3], parm1[4]))} if (length(parm1)==5) {return(match.fun(pdist)(z, parm1[1], parm1[2], parm1[3], parm1[4], parm1[5]))} } else { if (is.null(parm1)) { return(match.fun(pdist)(z)) } if (!is.null(parm1) & is.null(parm2)) { return(match.fun(pdist)(z, parm1)) } if (!is.null(parm1) & !is.null(parm2)) {return(match.fun(pdist)(z, parm1, parm2))} } } x <- xmin + (xmax-xmin)*(0:num.grid.points)/num.grid.points; options(warn = -1) medianA <- temp.vec$medianA discrete.pdf <- ( f(floor(medianA)-0.5) + f(floor(medianA)+0.5) + f(floor(medianA)+1.5) == 0 ) options(warn = 0) if (discrete.pdf) {xmin <- ceiling(xmin); xmax<-floor(xmax); x <- xmin:xmax } if (length(xshade)==2 & xshade[1]>xshade[2]) { temp <- xshade[1]; xshade[1] <- xshade[2]; xshade[2] <- temp } if (xshade[1]xmax) stop("xshade must be between xmin and xmax.") if (complement==F & length(xshade)==1) {the.prob <- g(xshade); x1 <- x[ x<=xshade ] } if (complement==T & length(xshade)==1) {the.prob <- 1-g(xshade); x1 <- x[ x>xshade ] } if (complement==F & length(xshade)==2) {the.prob <- g(xshade[2])-g(xshade[1]) x1 <- x[ xshade[1]xshade[2] ] } maxA <- ifelse( ((ddist %in% c("dchisq", "df")) && parm1[1]==1), min(ylimchisq1, max(f(x),na.rm=T)), max(f(x)) ) ylim1 <- c(min(f(x),na.rm=T), maxA) if (discrete.pdf) { plot(x, f(x), xlim=c(xmin, xmax), ylim=ylim1, type="h", xlab="X", ylab="probability density function", main=paste(c("Probability is", prettyNum(the.prob)), collapse=" "), col=col[1]) curve(f, min(x1), max(x1), (max(x1)-min(x1)+1), add=T, type="h", col=col[2]) if (complement==T & length(xshade)==2) { curve(f, min(x2), max(x2), (max(x2)-min(x2)+1), add=T, type="h", col=col[2]) } } else { plot(x1, f(x1), xlim=c(xmin,xmax), ylim=ylim1, type="h", xlab="X", ylab="probability density function", main=paste(c("Probability is", prettyNum(the.prob)), collapse=" "), col=col[2]) if (complement==T & length(xshade)==2) { curve(f, min(x2), max(x2), num.grid.points, add=T, type="h", col=col[2]) } curve(f, xmin, xmax, num.grid.points, add=T, type="p", pch=20, cex=0.05, col=col[1]) } } # Garren, 2005 plot2.dist <- function(xmin=NULL, xmax=NULL, distA="dnorm", parmA1=NULL, parmA2=NULL, distB=NULL, parmB1=NULL, parmB2=NULL, distC=NULL, parmC1=NULL, parmC2=NULL, col=c("black","red","darkgreen")) { # Can replace `plot.dist'. # Plots one, two, or three functions. # `xmin' and `xmax' represent the limiting values on the x-axis. # Reasonable values of `xmin' and `xmax' are attempted when originally set to NULL. # When plotting the pdf, some choices for `distA', `distB' and `distC' are # dnorm, dexp, dcauchy, dlaplace, dt, dt.nc, dchisq, df, df.nc, dunif, dbinom, dgeo, dpois. # When plotting the cdf, some choices for `distA', `distB' and `distC' are # pnorm, pexp, pcauchy, plaplace, pt, pchisq, pf, punif, pbinom, pgeo, ppois. # The values of `distA', `distB' and `distC' must be in quotes if `xmin' or `xmax' is NULL. # `parmA1' and `parmA2' are the parameters of `distA' (excluding the first argument), # where NULL is the default value; likewise for the other two functions. # If `parmA2', `parmB2' or `parmC2' does not exist, keep the value equal to NULL. # If `parmA1' is NULL, then `parmA2' is automatically set equal to NULL; # likewise for the other two functions. # `col' may be a scalar or vector, and specifies the colors of the plotted functions. # Type `colors()' for selections. # If only one function is to be plotted, then use `distA'. # If a distribution, say `distA', needs more than 2 parameters, then assign `parmA1' # to be a vector of all (as many as 5) parameters, and keep `parmA2' equal to NULL. # For example, to plot a noncentral F pdf with 7 and 9 df and ncp=2.5, type: # > plot2.dist( 0, 8, df.nc, c(7,9,2.5) ) num.grid.points <- 10001; options(warn= -1); ylimchisq1 <- 1.2 temp.vec <- get.min.max(xmin, xmax, distA, parmA1, parmA2, distB, parmB1, parmB2, distC, parmC1, parmC2 ) xmin <- temp.vec$xmin ; xmax <- temp.vec$xmax if (xmin>xmax) {temp <- xmin; xmin <- xmax; xmax <- temp} x <- xmin + (xmax-xmin)*(0:num.grid.points)/num.grid.points x0 <- ceiling(xmin):floor(xmax) if (length(col)==1) {col <- rep(col, 3)}; if (length(col)==2) {col <- c(col, col[1])} fA <- function(z) { if (length(parmA1)>1) { if (length(parmA1)==2) {return(match.fun(distA)(z, parmA1[1], parmA1[2]))} if (length(parmA1)==3) {return(match.fun(distA)(z, parmA1[1], parmA1[2], parmA1[3]))} if (length(parmA1)==4) {return(match.fun(distA)(z, parmA1[1], parmA1[2], parmA1[3], parmA1[4]))} if (length(parmA1)==5) {return(match.fun(distA)(z, parmA1[1], parmA1[2], parmA1[3], parmA1[4], parmA1[5]))} } else { if (is.null(parmA1)) { return(match.fun(distA)(z)) } if (!is.null(parmA1) & is.null(parmA2)) { return(match.fun(distA)(z, parmA1)) } if (!is.null(parmA1) & !is.null(parmA2)) {return(match.fun(distA)(z, parmA1, parmA2))} } } medianA <- temp.vec$medianA discrete.pdfA <- (substring(distA,1,1)=="d") & ( fA(floor(medianA)-0.5) + fA(floor(medianA)+0.5) + fA(floor(medianA)+1.5) == 0 ) discrete.pdfB <- F ; discrete.pdfC <- F ; ylim1 <- NULL if (!is.null(distB)) { fB <- function(z) { if (length(parmB1)>1) { if (length(parmB1)==2) {return(match.fun(distB)(z, parmB1[1], parmB1[2]))} if (length(parmB1)==3) {return(match.fun(distB)(z, parmB1[1], parmB1[2], parmB1[3]))} if (length(parmB1)==4) {return(match.fun(distB)(z, parmB1[1], parmB1[2], parmB1[3], parmB1[4]))} if (length(parmB1)==5) {return(match.fun(distB)(z, parmB1[1], parmB1[2], parmB1[3], parmB1[4], parmB1[5]))} } else { if (is.null(parmB1)) { return(match.fun(distB)(z)) } if (!is.null(parmB1) & is.null(parmB2)) { return(match.fun(distB)(z, parmB1)) } if (!is.null(parmB1) & !is.null(parmB2)) {return(match.fun(distB)(z, parmB1, parmB2))} } } medianB <- temp.vec$medianB discrete.pdfB <- (substring(distB,1,1)=="d") & ( fB(floor(medianB)-0.5) + fB(floor(medianB)+0.5) + fB(floor(medianB)+1.5) == 0 ) } if (!is.null(distC)) { fC <- function(z) { if (length(parmC1)>1) { if (length(parmC1)==2) {return(match.fun(distC)(z, parmC1[1], parmC1[2]))} if (length(parmC1)==3) {return(match.fun(distC)(z, parmC1[1], parmC1[2], parmC1[3]))} if (length(parmC1)==4) {return(match.fun(distC)(z, parmC1[1], parmC1[2], parmC1[3], parmC1[4]))} if (length(parmC1)==5) {return(match.fun(distC)(z, parmC1[1], parmC1[2], parmC1[3], parmC1[4], parmC1[5]))} } else { if (is.null(parmC1)) { return(match.fun(distC)(z)) } if (!is.null(parmC1) & is.null(parmC2)) { return(match.fun(distC)(z, parmC1)) } if (!is.null(parmC1) & !is.null(parmC2)) {return(match.fun(distC)(z, parmC1, parmC2))} } } medianC <- temp.vec$medianC discrete.pdfC <- (substring(distC,1,1)=="d") & ( fC(floor(medianC)-0.5) + fC(floor(medianC)+0.5) + fC(floor(medianC)+1.5) == 0 ) } maxA <- ifelse( ((distA %in% c("dchisq", "df")) && parmA1[1]==1), min(ylimchisq1, max(c(fA(x),fA(x0)),na.rm=T)), max(c(fA(x),fA(x0))) ) ylim1 <- c(min(c(fA(x),fA(x0)),na.rm=T), maxA) if (!is.null(distB) & is.null(distC)) { maxB <- ifelse( ((distB %in% c("dchisq", "df")) && parmB1[1]==1), min(ylimchisq1, max(c(fB(x),fB(x0)),na.rm=T)), max(c(fB(x),fB(x0))) ) ylim1 <- c( min(c(fA(x),fB(x),fA(x0),fB(x0)),na.rm=T), max(maxA, maxB) ) } if (is.null(distB) & !is.null(distC)) { maxC <- ifelse( ((distC %in% c("dchisq", "df")) && parmC1[1]==1), min(ylimchisq1, max(c(fC(x),fC(x0)),na.rm=T)), max(c(fC(x),fC(x0))) ) ylim1 <- c( min(c(fA(x),fC(x),fA(x0),fC(x0)),na.rm=T), max(maxA, maxC) ) } if (!is.null(distB) & !is.null(distC)) { maxB <- ifelse( ((distB %in% c("dchisq", "df")) && parmB1[1]==1), min(ylimchisq1, max(c(fB(x),fB(x0)),na.rm=T)), max(c(fB(x),fB(x0))) ) maxC <- ifelse( ((distC %in% c("dchisq", "df")) && parmC1[1]==1), min(ylimchisq1, max(c(fC(x),fC(x0)),na.rm=T)), max(c(fC(x),fC(x0))) ) ylim1 <- c( min(c(fA(x),fB(x),fC(x),fA(x0),fB(x0),fC(x0)),na.rm=T), max(maxA, maxB, maxC) ) } ylab=""; pdfA <- substring(distA, 1, 1)=="d" pdfB <- ifelse(is.null(distB), T, (substring(distB, 1, 1)=="d")) pdfC <- ifelse(is.null(distC), T, (substring(distC, 1, 1)=="d")) if (pdfA & is.null(distB) & is.null(distC)) {ylab <- "probability density function"} else { if (pdfA & pdfB & pdfB) ylab <- "probability density functions" } cdfA <- substring(distA, 1, 1)=="p" cdfB <- ifelse(is.null(distB), T, (substring(distB, 1, 1)=="p")) cdfC <- ifelse(is.null(distC), T, (substring(distB, 1, 1)=="p")) if (cdfA & is.null(distB) & is.null(distC)) {ylab <- "cumulative distribution function"} else { if (cdfA & cdfB & cdfB) ylab <- "cumulative distribution functions" } if (discrete.pdfA) { plot(x0, fA(x0), ylim=ylim1, type="h", xlab="X", ylab=ylab, col=col[1]) } else { plot(x, fA(x), ylim=ylim1, type="p", pch=20, cex=0.05, xlab="X", ylab=ylab, col=col[1]) } if (discrete.pdfB) { curve(fB, x0[1], x0[length(x0)], length(x0), add=T, type="h", col=col[2]) } if (!is.null(distB) & !discrete.pdfB) { curve(fB, xmin, xmax, num.grid.points, add=T, type="p", pch=20, cex=0.05, col=col[2]) } if (discrete.pdfC) { curve(fC, x0[1], x0[length(x0)], length(x0), add=T, type="h", col=col[3]) } if (!is.null(distC) & !discrete.pdfC) { curve(fC, xmin, xmax, num.grid.points, add=T, type="p", pch=20, cex=0.05, col=col[3]) } if (discrete.pdfA & discrete.pdfB & !discrete.pdfC) { x1 <- x0[ fA(x0) > fB(x0) ]; x2 <- x0[ fA(x0) <= fB(x0) ] curve(fA, x1[1], x1[length(x1)], length(x1), add=T, type="h", col=col[1]) curve(fB, x1[1], x1[length(x1)], length(x1), add=T, type="h", col=col[2]) curve(fB, x2[1], x2[length(x2)], length(x2), add=T, type="h", col=col[2]) curve(fA, x2[1], x2[length(x2)], length(x2), add=T, type="h", col=col[1]) } if (discrete.pdfA & !discrete.pdfB & discrete.pdfC) { x1 <- x0[ fA(x0) > fC(x0) ]; x2 <- x0[ fA(x0) <= fC(x0) ] curve(fA, x1[1], x1[length(x1)], length(x1), add=T, type="h", col=col[1]) curve(fC, x1[1], x1[length(x1)], length(x1), add=T, type="h", col=col[3]) curve(fC, x2[1], x2[length(x2)], length(x2), add=T, type="h", col=col[3]) curve(fA, x2[1], x2[length(x2)], length(x2), add=T, type="h", col=col[1]) } if (!discrete.pdfA & discrete.pdfB & discrete.pdfC) { x1 <- x0[ fB(x0) > fC(x0) ]; x2 <- x0[ fB(x0) <= fC(x0) ] curve(fB, x1[1], x1[length(x1)], length(x1), add=T, type="h", col=col[2]) curve(fC, x1[1], x1[length(x1)], length(x1), add=T, type="h", col=col[3]) curve(fC, x2[1], x2[length(x2)], length(x2), add=T, type="h", col=col[3]) curve(fB, x2[1], x2[length(x2)], length(x2), add=T, type="h", col=col[2]) } if (discrete.pdfA & discrete.pdfB & discrete.pdfC) { x1 <- x0[ fA(x0) >= fB(x0) & fB(x0) >= fC(x0) ] x2 <- x0[ fA(x0) >= fC(x0) & fC(x0) >= fB(x0) ] x3 <- x0[ fB(x0) >= fA(x0) & fA(x0) >= fC(x0) ] x4 <- x0[ fB(x0) >= fC(x0) & fC(x0) >= fA(x0) ] x5 <- x0[ fC(x0) >= fA(x0) & fA(x0) >= fB(x0) ] x6 <- x0[ fC(x0) >= fB(x0) & fB(x0) >= fA(x0) ] curve(fA, x1[1], x1[length(x1)], length(x1), add=T, type="h", col=col[1]) curve(fB, x1[1], x1[length(x1)], length(x1), add=T, type="h", col=col[2]) curve(fC, x1[1], x1[length(x1)], length(x1), add=T, type="h", col=col[3]) curve(fA, x2[1], x2[length(x2)], length(x2), add=T, type="h", col=col[1]) curve(fC, x2[1], x2[length(x2)], length(x2), add=T, type="h", col=col[3]) curve(fB, x2[1], x2[length(x2)], length(x2), add=T, type="h", col=col[2]) curve(fB, x3[1], x3[length(x3)], length(x3), add=T, type="h", col=col[2]) curve(fA, x3[1], x3[length(x3)], length(x3), add=T, type="h", col=col[1]) curve(fC, x3[1], x3[length(x3)], length(x3), add=T, type="h", col=col[3]) curve(fB, x4[1], x4[length(x4)], length(x4), add=T, type="h", col=col[2]) curve(fC, x4[1], x4[length(x4)], length(x4), add=T, type="h", col=col[3]) curve(fA, x4[1], x4[length(x4)], length(x4), add=T, type="h", col=col[1]) curve(fC, x5[1], x5[length(x5)], length(x5), add=T, type="h", col=col[3]) curve(fA, x5[1], x5[length(x5)], length(x5), add=T, type="h", col=col[1]) curve(fB, x5[1], x5[length(x5)], length(x5), add=T, type="h", col=col[2]) curve(fC, x6[1], x6[length(x6)], length(x6), add=T, type="h", col=col[3]) curve(fB, x6[1], x6[length(x6)], length(x6), add=T, type="h", col=col[2]) curve(fA, x6[1], x6[length(x6)], length(x6), add=T, type="h", col=col[1]) } if (!is.null(distC) & !discrete.pdfC) { curve(fC, xmin, xmax, num.grid.points, add=T, type="p", pch=20, cex=0.05, col=col[3]) } if (!is.null(distB) & !discrete.pdfB) { curve(fB, xmin, xmax, num.grid.points, add=T, type="p", pch=20, cex=0.05, col=col[2]) } if (!is.null(distA) & !discrete.pdfA) { curve(fA, xmin, xmax, num.grid.points, add=T, type="p", pch=20, cex=0.05, col=col[1]) } } # Garren, 2005 get.min.max <- function(xmin=NULL, xmax=NULL, distA, parmA1=NULL, parmA2=NULL, distB=NULL, parmB1=NULL, parmB2=NULL, distC=NULL, parmC1=NULL, parmC2=NULL) { # Outputs reasonable minimum and maximum boundaries for plotting functions. # Also, outputs the medians. # `xmin' and `xmax' represent the limiting values on the x-axis. # Reasonable values of `xmin' and `xmax' are attempted when originally set to NULL. # When plotting the pdf, some choices for `distA', `distB' and `distC' are # dnorm, dexp, dcauchy, dlaplace, dt, dt.nc, dchisq, df, df.nc, dunif, dbinom, dgeo, dpois. # When plotting the cdf, some choices for `distA', `distB' and `distC' are # pnorm, pexp, pcauchy, plaplace, pt, pchisq, pf, punif, pbinom, pgeo, ppois. # The values of `distA', `distB' and `distC' must be in quotes if `xmin' or `xmax' is NULL. # `parmA1' and `parmA2' are the parameters of `distA' (excluding the first argument), # where NULL is the default value; likewise for the other two functions. # If `parmA2', `parmB2' or `parmC2' does not exist, keep the value equal to NULL. # If `parmA1' is NULL, then `parmA2' is automatically set equal to NULL; # likewise for the other two functions. # If only one function is to be plotted, then use `distA'. # If a distribution, say `distA', needs more than 2 parameters, then assign `parmA1' # to be a vector of all (as many as 5) parameters, and keep `parmA2' equal to NULL. # For example, to plot a noncentral F pdf with 7 and 9 df and ncp=2.5, type: # > plot2.dist( 0, 8, "df.nc", c(7,9,2.5) ) pmin <- 0.125 ; amountover <- 1 ; buffer <- 0.15 return.now <- F ; medianA <- medianB <- medianC <- NULL if (!is.null(xmin) & !is.null(xmax)) medianA <- medianB <- medianC <- mean(xmin,xmax) if ("dt.nc" %in% c(distA, distB, distC)) return.now <- T if ("df.nc" %in% c(distA, distB, distC)) return.now <- T if ("pf" %in% c(distA, distB, distC)) return.now <- T if ("pt" %in% c(distA, distB, distC)) return.now <- T if (return.now & (is.null(xmin) || is.null(xmax))) stop("Neither `xmin' nor `xmax' can be NULL for this distribution.") if (!return.now) { if (!is.null(distA)) qdistA <- paste("q",substring(distA,2),sep="") if (!is.null(distB)) qdistB <- paste("q",substring(distB,2),sep="") if (!is.null(distC)) qdistC <- paste("q",substring(distC,2),sep="") if (!is.null(distA)) { qfA <- function(z) { if (length(parmA1)>1) { if (length(parmA1)==2) {return(match.fun(qdistA)(z, parmA1[1], parmA1[2]))} if (length(parmA1)==3) {return(match.fun(qdistA)(z, parmA1[1], parmA1[2], parmA1[3]))} if (length(parmA1)==4) {return(match.fun(qdistA)(z, parmA1[1], parmA1[2], parmA1[3], parmA1[4]))} if (length(parmA1)==5) {return(match.fun(qdistA)(z, parmA1[1], parmA1[2], parmA1[3], parmA1[4], parmA1[5]))} } else { if (is.null(parmA1)) { return(match.fun(qdistA)(z)) } if (!is.null(parmA1) & is.null(parmA2)) { return(match.fun(qdistA)(z, parmA1)) } if (!is.null(parmA1) & !is.null(parmA2)) {return(match.fun(qdistA)(z, parmA1, parmA2))} } } } if (!is.null(distB)) { qfB <- function(z) { if (length(parmB1)>1) { if (length(parmB1)==2) {return(match.fun(qdistB)(z, parmB1[1], parmB1[2]))} if (length(parmB1)==3) {return(match.fun(qdistB)(z, parmB1[1], parmB1[2], parmB1[3]))} if (length(parmB1)==4) {return(match.fun(qdistB)(z, parmB1[1], parmB1[2], parmB1[3], parmB1[4]))} if (length(parmB1)==5) {return(match.fun(qdistB)(z, parmB1[1], parmB1[2], parmB1[3], parmB1[4], parmB1[5]))} } else { if (is.null(parmB1)) { return(match.fun(qdistB)(z)) } if (!is.null(parmB1) & is.null(parmB2)) { return(match.fun(qdistB)(z, parmB1)) } if (!is.null(parmB1) & !is.null(parmB2)) {return(match.fun(qdistB)(z, parmB1, parmB2))} } } } if (!is.null(distC)) { qfC <- function(z) { if (length(parmC1)>1) { if (length(parmC1)==2) {return(match.fun(qdistC)(z, parmC1[1], parmC1[2]))} if (length(parmC1)==3) {return(match.fun(qdistC)(z, parmC1[1], parmC1[2], parmC1[3]))} if (length(parmC1)==4) {return(match.fun(qdistC)(z, parmC1[1], parmC1[2], parmC1[3], parmC1[4]))} if (length(parmC1)==5) {return(match.fun(qdistC)(z, parmC1[1], parmC1[2], parmC1[3], parmC1[4], parmC1[5]))} } else { if (is.null(parmC1)) { return(match.fun(qdistC)(z)) } if (!is.null(parmC1) & is.null(parmC2)) { return(match.fun(qdistC)(z, parmC1)) } if (!is.null(parmC1) & !is.null(parmC2)) {return(match.fun(qdistC)(z, parmC1, parmC2))} } } } xminA.temp <- NULL; xmaxA.temp <- NULL; xminB.temp <- NULL; xmaxB.temp <- NULL xminC.temp <- NULL; xmaxC.temp <- NULL xmin2A.temp <- NULL; xmax2A.temp <- NULL; xmin2B.temp <- NULL; xmax2B.temp <- NULL xmin2C.temp <- NULL; xmax2C.temp <- NULL if (is.null(xmin)) { if (!is.null(distA)) xminA.temp <- qfA(pmin) if (!is.null(distB)) xminB.temp <- qfB(pmin) if (!is.null(distC)) xminC.temp <- qfC(pmin) } else { xminA.temp <- xmin; xminB.temp <- xmin; xminC.temp <- xmin } if (is.null(xmax)) { if (!is.null(distA)) xmaxA.temp <- qfA(1-pmin) if (!is.null(distB)) xmaxB.temp <- qfB(1-pmin) if (!is.null(distC)) xmaxC.temp <- qfC(1-pmin) } else { xmaxA.temp <- xmax; xmaxB.temp <- xmax; xmaxC.temp <- xmax } if (!is.null(distA)) { xmin2A.temp <- xminA.temp - (xmaxA.temp-xminA.temp)*amountover xmax2A.temp <- xmaxA.temp + (xmaxA.temp-xminA.temp)*amountover qfA0 <- ifelse( is.na(qfA(0)), -Inf, qfA(0) ) qfA1 <- ifelse( is.na(qfA(1)), Inf, qfA(1) ) left.bounded <- ( xmin2A.temp < qfA0 ) right.bounded <- ( xmax2A.temp > qfA1 ) xmin2A.temp <- max( xmin2A.temp, qfA0 ) xmax2A.temp <- min( xmax2A.temp, qfA1 ) if (left.bounded) xmin3A.temp <- xmin2A.temp - (xmax2A.temp - xmin2A.temp) * buffer if (right.bounded) xmax3A.temp <- xmax2A.temp + (xmax2A.temp - xmin2A.temp) * buffer if (left.bounded) xmin2A.temp <- xmin3A.temp if (right.bounded) xmax2A.temp <- xmax3A.temp } if (!is.null(distB)) { xmin2B.temp <- xminB.temp - (xmaxB.temp-xminB.temp)*amountover xmax2B.temp <- xmaxB.temp + (xmaxB.temp-xminB.temp)*amountover qfB0 <- ifelse( is.na(qfB(0)), -Inf, qfB(0) ) qfB1 <- ifelse( is.na(qfB(1)), Inf, qfB(1) ) left.bounded <- ( xmin2B.temp < qfB0 ) right.bounded <- ( xmax2B.temp > qfB1 ) xmin2B.temp <- max( xmin2B.temp, qfB0 ) xmax2B.temp <- min( xmax2B.temp, qfB1 ) if (left.bounded) xmin3B.temp <- xmin2B.temp - (xmax2B.temp - xmin2B.temp) * buffer if (right.bounded) xmax3B.temp <- xmax2B.temp + (xmax2B.temp - xmin2B.temp) * buffer if (left.bounded) xmin2B.temp <- xmin3B.temp if (right.bounded) xmax2B.temp <- xmax3B.temp } if (!is.null(distC)) { xmin2C.temp <- xminC.temp - (xmaxC.temp-xminC.temp)*amountover xmax2C.temp <- xmaxC.temp + (xmaxC.temp-xminC.temp)*amountover qfC0 <- ifelse( is.na(qfC(0)), -Inf, qfC(0) ) qfC1 <- ifelse( is.na(qfC(1)), Inf, qfC(1) ) left.bounded <- ( xmin2C.temp < qfC0 ) right.bounded <- ( xmax2C.temp > qfC1 ) xmin2C.temp <- max( xmin2C.temp, qfC0 ) xmax2C.temp <- min( xmax2C.temp, qfC1 ) if (left.bounded) xmin3C.temp <- xmin2C.temp - (xmax2C.temp - xmin2C.temp) * buffer if (right.bounded) xmax3C.temp <- xmax2C.temp + (xmax2C.temp - xmin2C.temp) * buffer if (left.bounded) xmin2C.temp <- xmin3C.temp if (right.bounded) xmax2C.temp <- xmax3C.temp } if (is.null(xmin)) xmin <- min(xmin2A.temp, xmin2B.temp, xmin2C.temp) if (is.null(xmax)) xmax <- max(xmax2A.temp, xmax2B.temp, xmax2C.temp) if (xmin>xmax) {temp <- xmin; xmin <- xmax; xmax <- temp} medianA <- ifelse( is.null(distA), NA, qfA(0.5) ) medianB <- ifelse( is.null(distB), NA, qfB(0.5) ) medianC <- ifelse( is.null(distC), NA, qfC(0.5) ) } # End of `if (!return.now)' structure(list(xmin=xmin, xmax=xmax, medianA=medianA, medianB=medianB, medianC=medianC)) } # Garren, 2005 shade.dist <- function(xmin, xmax, xshade, complement=FALSE, ddist, pdist, ...) { # Plots a continuous probability density function (pdf) and shades in a region. # `xmin' and `xmax' represent the limiting values on the x-axis. # `xshade' can be a scalar or a vector of size 2. # When `xshade' is a scalar and complement is FALSE, # the area from -Inf to xshade is shaded. # When `xshade' is a scalar and complement is TRUE, # the area from xshade to +Inf is shaded. # When `xshade' is a vector and complement is FALSE, # the area from xshade[1] to xshade[2] is shaded. # When `xshade' is a vector and complement is TRUE, # the tail probabilities are shaded. # `ddist' is the pdf, and some choices are # dnorm, dexp, dcauchy, dlaplace, dt, dt.nc, dchisq, df, df.nc, dunif. # `pdist' is the cdf, and some choices are # pnorm, pexp, pcauchy, plaplace, pt, pchisq, pf, punif. # ...: optional arguments to `ddist' and `pdist', EXCLUDING the first argument. # `ddist' and `pdist' should represent the same distribution. # To avoid ambiguity with discrete distributions, avoid boarder values with `xshade'. # For example, if X ~ Binomial(n=10, p=0.4), then to graph P(X>2) type: # > shade.dist( 0, 10, 2.5, T, dbinom, pbinom, 10, 0.4 ) num.grid.points <- 10001; col <- c("black", "red") f <- function(z) { match.fun(ddist)(z, ...) } if (xmin>xmax) { temp <- xmin; xmin <- xmax; xmax <- temp } x <- xmin + (xmax-xmin)*(0:num.grid.points)/num.grid.points; options(warn = -1) discrete.pdf <- sum( f(x[floor(x)!=ceiling(x)]) )==0 options(warn = 0) if (discrete.pdf) {xmin <- ceiling(xmin); xmax<-floor(xmax); x <- xmin:xmax } if (length(xshade)!=1 & length(xshade)!=2) stop("xshade must have length 1 or 2.") if (length(xshade)==2 & xshade[1]>xshade[2]) { temp <- xshade[1]; xshade[1] <- xshade[2]; xshade[2] <- temp } if (xshade[1]xmax) stop("xshade must be between xmin and xmax.") if (complement==F & length(xshade)==1) {the.prob <- match.fun(pdist)(xshade, ...); x1 <- x[ x<=xshade ] } if (complement==T & length(xshade)==1) {the.prob <- 1-match.fun(pdist)(xshade, ...); x1 <- x[ x>xshade ] } if (complement==F & length(xshade)==2) {the.prob <- match.fun(pdist)(xshade[2], ...)-match.fun(pdist)(xshade[1], ...) x1 <- x[ xshade[1]xshade[2] ] } if (discrete.pdf) { plot(x, f(x), xlim=c(xmin, xmax), type="h", xlab="X", ylab="probability density function", main=paste(c("Probability is", prettyNum(the.prob)), collapse=" "), col=col[1]) curve(f, min(x1), max(x1), (max(x1)-min(x1)+1), add=T, type="h", col=col[2]) if (complement==T & length(xshade)==2) { curve(f, min(x2), max(x2), (max(x2)-min(x2)+1), add=T, type="h", col=col[2]) } } else { plot(x1, f(x1), xlim=c(xmin,xmax), ylim=c(0, max(f(x))), type="h", xlab="X", ylab="probability density function", main=paste(c("Probability is", prettyNum(the.prob)), collapse=" "), col=col[2]) if (complement==T & length(xshade)==2) { curve(f, min(x2), max(x2), num.grid.points, add=T, type="h", col=col[2]) } curve(f, xmin, xmax, num.grid.points, add=T, type="S", col=col[1]) } } # Garren, 2005 c.i.binom.test <- function( x, p = 0.5, conf.level = 0.95 ) { # Produces an exact confidence interval on the median based # on the binomial test, where ties are excluded. # `x' is the vector of observations. # `p' is the hypothesized probability of success. # `conf.level' is the confidence level for the returned confidence interval. delta <- (max(x)-min(x))/1e10 xgrid <- c(x, x+delta, x-delta) value.in.c.i. <- rep(NA, length(xgrid)) for (iii in 1:length(xgrid)) { x1 <- c( sum( xxgrid[iii] ) ); n <- sum(x1) value.in.c.i.[iii] <- binom.test(x1, n, p, alternative = "two.sided", conf.level)$p.value>=1-conf.level } c.i. <- c( min( xgrid[value.in.c.i.] ), max( xgrid[value.in.c.i.] ) ) c.i. } # Garren, 2005 power.binom.test <- function(n, alpha=0.05, alternative="two.sided", null.median, alt.pdist, ...) { # Computes the power of the binomial test (i.e., sign test). # This power function is based on binomial probabilities, # not the normal approximation to the binomial distribution, # and produces exact solutions. # n is the sample size. # alpha is the size of the test; i.e., P(type I error). # Choices for `alternative' are "two.sided", "less" and "greater". # null.median is the population median under the null hypothesis. # `alt.pdist' is the cdf under the alternative distribution, and some choices are # pnorm, pexp, pcauchy, plaplace, pt, pchisq, pf, punif, pbinom, pgeo, ppois. # ...: optional arguments to `alt.pdist', EXCLUDING the first argument. if (alternative=="two.sided") { q <- qbinom(alpha/2, n, 0.5) ; q <- q - (pbinom(q,n,0.5)>alpha/2) prob <- pbinom( q, n, 1-match.fun(alt.pdist)(null.median, ...) ) + pbinom( q, n, match.fun(alt.pdist)(null.median, ...) ) } if (alternative=="less") { q <- qbinom(alpha, n, 0.5) ; q <- q - (pbinom(q,n,0.5)>alpha) prob <- pbinom( q, n, 1-match.fun(alt.pdist)(null.median, ...) ) } if (alternative=="greater") { q <- qbinom(alpha, n, 0.5) ; q <- q - (pbinom(q,n,0.5)>alpha) prob <- pbinom( q, n, match.fun(alt.pdist)(null.median, ...) ) } prob } # Garren, 2005 empirical.cdf <- function(x, y=NULL, col=c("black","red")) { # Graphs one or two empirical cumulative distribution functions. # `x' is the vector of observations whose empirical cdf is to be graphed. # `y' is an optional vector of observations whose empirical cdf is to be graphed. # `col' may be a scalar or vector of length 2, and specifies the colors of the plotted functions. # Preferably, col[1] should differ from col[2]. Type `colors()' for selections. num.grid.points <- 10001; delta <- 0.1 xmin <- min(x,y)-(max(x,y)-min(x,y))*delta; xmax <- max(x,y)+(max(x,y)-min(x,y))*delta x0 <- xmin + (xmax-xmin)*(0:num.grid.points)/num.grid.points y0 <- rep(NA, num.grid.points+1) for (i in 0:num.grid.points) { y0[i] <- mean(x<=x0[i]) } plot(x0, y0, pch=20, cex=0.05, xlab=ifelse(is.null(y),"X","X, Y"), ylab=ifelse(is.null(y),"Cumulative Proportion","Cumulative Proportions"), main=ifelse(is.null(y),"Empirical Cumulative Distribution Function", "Empirical Cumulative Distribution Functions"), col=col[1]) if (!is.null(y)) { if (length(col)==1) { col <- rep(col, 2) } f2 <- function(u) { f <- rep(NA, length(u)) for (i in 1:length(u)) { f[i] <- mean(y<=u[i]) } ; return(f) } curve(f2, xmin, xmax, num.grid.points, add=T, type="p", pch=20, cex=0.05, col=col[2]) } } # Garren, 2005 perm.test <- function(x, y=NULL, alternative="two.sided", mu=0, paired=FALSE, exact.p.value=TRUE, num.sim=1e4, plot=FALSE, stat=mean, ...) { # Performs one-sample and two-sample permutation tests on vectors of data. # The one-sample permutation test is based on stat(x-mu); # the two-sample paired permutation test is based on stat(x-y-mu); # and the two-sample unpaired permutation test is based on stat(x-mu)-stat(y). # `x' is a numeric vector of data values. # `y' is an optional numeric vector data values, and is used for two-sample tests only. # Choices for `alternative' are "two.sided", "less" and "greater". # `mu' is a number indicating the null value of the location parameter # (or the difference in location parameters if performing a two-sample test). # `paired' indicates whether or not a two-sample test should be paired, # and is ignored for a one-sample test. # The exact p-value is attempted when `exact.p.value' is TRUE, # and is simulated when `exact.p.value' is FALSE or when # computing an exact p-value requires more than num.sim calculations. # `num.sim' is the upper limit on the number of permutations generated. # If `plot' is TRUE, then plot the histogram of the permutation distribution; # otherwise, list the p-value. # Setting `plot' to TRUE works best when the permutation distribution # is close to continuous. # Some reasonable choices for `stat' are mean and median. # ... is the fraction (0 to 0.5) of observations to be trimmed from # each end of `x' and `y' before the mean is computed, and is # used only when stat = mean. if (!is.null(y) & !paired) { # Perform unpaired two-sample permutation test. max.loop <- 10; l1 <- min(length(x), length(y)); x <- x-mu l2 <- max(length(x), length(y)); l3 <- l1+l2 test.stat0 <- match.fun(stat)(x, ...)-match.fun(stat)(y, ...); test.stat <- NULL if (choose(l3,l1)>num.sim) exact.p.value <- FALSE output1 <- "Unpaired two-sample permutation test was performed." if (exact.p.value & l1<=max.loop) { output2 <- "Exact p-value was calculated." index <- rep(NA, max(l3, max.loop)) for (i1 in 1:(l2+1)) { for (i2 in (i1+1):max( i1+1, (l2+2)*(l1>=2) )) { for (i3 in (i2+1):max( i2+1, (l2+3)*(l1>=3) )) { for (i4 in (i3+1):max( i3+1, (l2+4)*(l1>=4) )) { for (i5 in (i4+1):max( i4+1, (l2+5)*(l1>=5) )) { for (i6 in (i5+1):max( i5+1, (l2+6)*(l1>=6) )) { for (i7 in (i6+1):max( i6+1, (l2+7)*(l1>=7) )) { for (i8 in (i7+1):max( i7+1, (l2+8)*(l1>=8) )) { for (i9 in (i8+1):max( i8+1, (l2+9)*(l1>=9) )) { for (i10 in (i9+1):max( i9+1, (l2+10)*(l1>=10) )) { index <- c(i1,i2,i3,i4,i5,i6,i7,i8,i9,i10) index <- c( index[1:l1], setdiff(c(1:l3), index[1:l1]) ) test.stat <- c(test.stat, match.fun(stat)(c(x,y)[index[1:l1]], ...) -match.fun(stat)(c(x,y)[index[(l1+1):l3]], ...) ) }}}}}}}}}} # Number of parentheses is max.loop } # End of if else { # Use simulations. output2 <- paste("p-value was estimated based on", num.sim, "simulations") for (i in 1:num.sim) { index <- sample(l3) test.stat <- c(test.stat, match.fun(stat)(c(x,y)[index[1:l1]], ...) -match.fun(stat)(c(x,y)[index[(l1+1):l3]], ...) ) } } if (length(x)>length(y)) test.stat <- -test.stat if (!plot) { if (alternative=="less") p.value <- mean( test.stat <= test.stat0 ) if (alternative=="greater") p.value <- mean( test.stat >= test.stat0 ) if (alternative=="two.sided") p.value <- mean( abs(test.stat) >= abs(test.stat0) ) structure(list(output1, output2, alternative=alternative, mu=mu, p.value=p.value)) } else {hist(test.stat, main="Histogram of Permutation Distribution", xlab="Value of test statistic", ylab="Frequency")} } # END of `if (!is.null(y) & !paired)' else { # Perform a one-sample or a paired two-sample permutation test on mu. max.loop <- 20 if (is.null(y)) { output1 <- "One-sample permutation test was performed." } else { x <- x-y; output1 <- "Paired permutation test was performed." } x <- x-mu; lx <- length(x); test.stat0 <- match.fun(stat)(x,...); test.stat <- NULL if (lx>max.loop || 2^lx>num.sim) exact.p.value <- FALSE if (exact.p.value) { output2 <- "Exact p-value was calculated." for (i1 in 0:ifelse(lx<1,0,1)) { for (i2 in 0:ifelse(lx<2,0,1)) { for (i3 in 0:ifelse(lx<3,0,1)) { for (i4 in 0:ifelse(lx<4,0,1)) { for (i5 in 0:ifelse(lx<5,0,1)) { for (i6 in 0:ifelse(lx<6,0,1)) { for (i7 in 0:ifelse(lx<7,0,1)) { for (i8 in 0:ifelse(lx<8,0,1)) { for (i9 in 0:ifelse(lx<9,0,1)) { for (i10 in 0:ifelse(lx<10,0,1)) { for (i11 in 0:ifelse(lx<11,0,1)) { for (i12 in 0:ifelse(lx<12,0,1)) { for (i13 in 0:ifelse(lx<13,0,1)) { for (i14 in 0:ifelse(lx<14,0,1)) { for (i15 in 0:ifelse(lx<15,0,1)) { for (i16 in 0:ifelse(lx<16,0,1)) { for (i17 in 0:ifelse(lx<17,0,1)) { for (i18 in 0:ifelse(lx<18,0,1)) { for (i19 in 0:ifelse(lx<19,0,1)) { for (i20 in 0:ifelse(lx<20,0,1)) { test.stat <- c(test.stat, match.fun(mean)(x*(2*(c(i1,i2,i3,i4,i5,i6,i7,i8,i9,i10, i11,i12,i13,i14,i15,i16,i17,i18,19,i20)[1:lx])-1),...)) }}}}}}}}}}}}}}}}}}}} # Number of parentheses is max.loop } # End of if else { # Use simulations. output2 <- paste("p-value was estimated based on", num.sim, "simulations") for (i in 1:num.sim) test.stat <- c(test.stat, match.fun(stat)((x*sample(c(-1,1),lx,T)),...)) } if (!plot) { if (alternative=="less") { p.value <- mean( test.stat <= test.stat0 ) } if (alternative=="greater") { p.value <- mean( test.stat >= test.stat0 ) } if (alternative=="two.sided") { p.value <- mean( abs(test.stat) >= abs(test.stat0) ) } structure(list(output1, output2, alternative=alternative, mu=mu, p.value=p.value))} else {hist(test.stat, main="Histogram of Permutation Distribution", xlab="Value of test statistic", ylab="Frequency")} } # End of `else' } # End of perm.test Garren, 2005 perm.test.old <- function(x, y, exact.p.value=TRUE, alternative="two.sided", num.sim=1e4, stat, ...) { # A permutation test is performed based on stat(x)-stat(y). # `x' and `y' are numeric vectors of data values. # The exact p-value is attempted when `exact.p.value' is TRUE, # and is simulated when `exact.p.value' is FALSE or when # computing an exact p-value requires more than num.sim calculations. # Choices for `alternative' are "two.sided", "less" and "greater". # `num.sim' is the upper limit on the number of permutations generated. # Some reasonable choices for `stat' are mean and median. # The test statistic is stat(x)-stat(y). # ... is the fraction (0 to 0.5) of observations to be trimmed from # each end of `x' and `y' before the mean is computed, and is # used only when stat = mean. # May use `perm.test' instead of `perm.test.old'. max.loop <- 10; l1 <- min(length(x), length(y)) l2 <- max(length(x), length(y)); l3 <- l1+l2 test.stat <- match.fun(stat)(x, ...)-match.fun(stat)(y, ...) if (choose(l3,l1)>num.sim) exact.p.value <- FALSE if (exact.p.value & l1<=max.loop) { output <- "Exact p-value was calculated." index <- rep(NA, max(l3, max.loop)) for (i1 in 1:(l2+1)) { for (i2 in (i1+1):max( i1+1, (l2+2)*(l1>=2) )) { for (i3 in (i2+1):max( i2+1, (l2+3)*(l1>=3) )) { for (i4 in (i3+1):max( i3+1, (l2+4)*(l1>=4) )) { for (i5 in (i4+1):max( i4+1, (l2+5)*(l1>=5) )) { for (i6 in (i5+1):max( i5+1, (l2+6)*(l1>=6) )) { for (i7 in (i6+1):max( i6+1, (l2+7)*(l1>=7) )) { for (i8 in (i7+1):max( i7+1, (l2+8)*(l1>=8) )) { for (i9 in (i8+1):max( i8+1, (l2+9)*(l1>=9) )) { for (i10 in (i9+1):max( i9+1, (l2+10)*(l1>=10) )) { index <- c(i1,i2,i3,i4,i5,i6,i7,i8,i9,i10) index <- c( index[1:l1], setdiff(c(1:l3), index[1:l1]) ) test.stat <- c(test.stat, match.fun(stat)(c(x,y)[index[1:l1]], ...) -match.fun(stat)(c(x,y)[index[(l1+1):l3]], ...) ) }}}}}}}}}} # Number of parentheses is max.loop } # End of if else { # Use simulations. output <- paste("p-value was estimated based on", num.sim, "simulations") for (i in 1:num.sim) { index <- sample(l3) test.stat <- c(test.stat, match.fun(stat)(c(x,y)[index[1:l1]], ...) -match.fun(stat)(c(x,y)[index[(l1+1):l3]], ...) ) } } if (length(x)>length(y)) { test.stat[2:length(test.stat)] <- -test.stat[2:length(test.stat)] } if (alternative=="less") { p.value <- mean( test.stat[2:length(test.stat)] <= test.stat[1] ) } if (alternative=="greater") { p.value <- mean( test.stat[2:length(test.stat)] >= test.stat[1] ) } if (alternative=="two.sided") { p.value <- mean( abs(test.stat[2:length(test.stat)]) >= abs(test.stat[1]) ) } structure(list(output, alternative=alternative, p.value=p.value)) } # End of perm.test.old Garren, 2005 score <- function(x, y=NULL, expon=FALSE) { # Generates van der Waerden scores (i.e., normal quantiles) or exponential # (similar to Savage) scores, for combined data `x' and `y'. # `x' is a positive integer equal to the number of desired scores when y is NULL, # or `x' is a vector of observations. # `y' is an optional vector of observations, typically used with two-sample tests. # When `expon' is FALSE, van der Waerden scores are computed, even for ties. # When `expon' is TRUE, Exponential scores are computed, # and interpolation is used for ties. # Output: scored values for `x', when y is NULL. # Output: `$x' are the scored values for `x', when y is not NULL. # Output: `$y' are the scored values for `y', when y is not NULL. if (is.null(y) & length(x)==1) { if (floor(x)==ceiling(x) & x>=1) {x <- 1:x} } n <- length(c(x,y)) ; if (!expon) {z <- qnorm(rank(c(x,y))/(n+1))} if (expon) { u <- cumsum( 1/(n+1-1:n) ) ; z <- u[ rank(c(x,y)) ] for (i in 1:n) { rank0 <- rank(c(x,y))[i] ; remainder <- rank0 - floor(rank0) if (remainder>0) {z[i] <- u[floor(rank0)]*(1-remainder)+u[ceiling(rank0)]*remainder}}} if (is.null(y)) { return(z) } if (!is.null(y)) { return(structure(list(x=z[1:length(x)], y=z[(length(x)+1):n]))) } } # Garren, 2005 siegel.test <- function(x, y, alternative="two.sided", reverse=FALSE, exact.p.value=TRUE, num.sim=1e4) { # Performs the Siegel-Tukey test, where ties are handled by averaging ranks, # not by asymptotic approximations. # `x' and `y' are numeric vectors of data values. # Choices for `alternative' are "two.sided", "less" and "greater". # If `reverse' is FALSE, then assign rank 1 to the smallest observation. # If `reverse' is TRUE, then assign rank 1 to the largest observation. # The exact p-value is attempted when `exact.p.value' is TRUE, # and is simulated when `exact.p.value' is FALSE or when # computing an exact p-value requires more than num.sim calculations. # `num.sim' is the upper limit on the number of permutations generated. z <- sort(c(x,y)) ; lz <- length(z) lower <- 1; upper <- lz; j <- rep(NA, lz) for (i in 1:lz) { if (i-4*as.integer(i/4)<=1) { j[lower] <- i ; lower <- lower+1 } else {j[upper] <- i; upper <- upper-1} } if (reverse) { j <- rev(j) } z0 <- z[order(j)] rank.x.mat <- matrix(NA, length(x), lz) rank.y.mat <- matrix(NA, length(y), lz) for (k in 1:lz) { for (i in 1:length(x)) { if (x[i]==z0[k]) {rank.x.mat[i,k] <- k} } for (i in 1:length(y)) { if (y[i]==z0[k]) {rank.y.mat[i,k] <- k} } } rank.x <- apply(rank.x.mat,1,mean,na.rm=T) rank.y <- apply(rank.y.mat,1,mean,na.rm=T) if (alternative=="two.sided") {p.value <- perm.test(rank.x,rank.y,alternative="two.sided",exact.p.value=exact.p.value, num.sim=num.sim,stat=mean)$p.value} if (alternative=="less") {p.value <- perm.test(rank.x,rank.y,alternative="greater",exact.p.value=exact.p.value, num.sim=num.sim,stat=mean)$p.value} if (alternative=="greater") {p.value <- perm.test(rank.x,rank.y,alternative="less",exact.p.value=exact.p.value, num.sim=num.sim,stat=mean)$p.value} structure(list(" Siegel-Tukey test", alternative=alternative, rank.x=rank.x, rank.y=rank.y, p.value=p.value)) } # Garren, 2005 rmd2.test <- function(x, y, alternative="two.sided", exact.p.value=TRUE, num.sim=1e4) { # OBSOLETE - Has been replaced by rmd.test # A permutation test is performed based on the estimated rmd, # the ratio of the absolute mean deviances. # `x' and `y' are numeric vectors of data values. # Choices for `alternative' are "two.sided", "less" and "greater". # The exact p-value is attempted when `exact.p.value' is TRUE, # and is simulated when `exact.p.value' is FALSE or when # computing an exact p-value requires more than num.sim calculations. # `num.sim' is the upper limit on the number of permutations generated. max.loop <- 10; l1 <- min(length(x), length(y)) l2 <- max(length(x), length(y)); l3 <- l1+l2 rmd <- function(x, y, alternative) { mean.dev.x <- mean(abs(x-median(x))); mean.dev.y <- mean(abs(y-median(y))) test <- mean.dev.x / mean.dev.y if (alternative=="two.sided") {test <- max(test, 1/test)} return( test ) } test.stat <- rmd(x, y, alternative) if (choose(l3,l1)>num.sim) exact.p.value <- FALSE if (exact.p.value & l1<=max.loop) { output <- "Exact p-value was calculated." index <- rep(NA, max(l3, max.loop)) for (i1 in 1:(l2+1)) { for (i2 in (i1+1):max( i1+1, (l2+2)*(l1>=2) )) { for (i3 in (i2+1):max( i2+1, (l2+3)*(l1>=3) )) { for (i4 in (i3+1):max( i3+1, (l2+4)*(l1>=4) )) { for (i5 in (i4+1):max( i4+1, (l2+5)*(l1>=5) )) { for (i6 in (i5+1):max( i5+1, (l2+6)*(l1>=6) )) { for (i7 in (i6+1):max( i6+1, (l2+7)*(l1>=7) )) { for (i8 in (i7+1):max( i7+1, (l2+8)*(l1>=8) )) { for (i9 in (i8+1):max( i8+1, (l2+9)*(l1>=9) )) { for (i10 in (i9+1):max( i9+1, (l2+10)*(l1>=10) )) { index <- c(i1,i2,i3,i4,i5,i6,i7,i8,i9,i10) index <- c( index[1:l1], setdiff(c(1:l3), index[1:l1]) ) test.stat <- c(test.stat, rmd( c(x,y)[index[1:l1]], c(x,y)[index[(l1+1):l3]], alternative ) ) }}}}}}}}}} # Number of parentheses is max.loop } # End of if else { # Use simulations. output <- paste("p-value was estimated based on", num.sim, "simulations") for (i in 1:num.sim) { index <- sample(l3) test.stat <- c(test.stat, rmd( c(x,y)[index[1:l1]], c(x,y)[index[(l1+1):l3]], alternative ) ) } } if (length(x)>length(y) & alternative!="two.sided") { test.stat[2:length(test.stat)] <- 1 / test.stat[2:length(test.stat)] } if (alternative=="less") { p.value <- mean( test.stat[2:length(test.stat)] <= test.stat[1] ) } else { p.value <- mean( test.stat[2:length(test.stat)] >= test.stat[1] ) } structure(list(output, alternative=alternative, rmd.hat=test.stat[1], p.value=p.value)) } # End of rmd.test Garren, 2005 rmd.test <- function(x, y, alternative="two.sided", exact.p.value=TRUE, num.sim=1e4) { # A permutation test is performed based on the estimated rmd, # the ratio of the absolute mean deviances. # `x' and `y' are numeric vectors of data values. # Choices for `alternative' are "two.sided", "less" and "greater". # The exact p-value is attempted when `exact.p.value' is TRUE, # and is simulated when `exact.p.value' is FALSE or when # computing an exact p-value requires more than num.sim calculations. # `num.sim' is the upper limit on the number of permutations generated. max.loop <- 10; l1 <- min(length(x), length(y)) ; tol <- 1e-10 l2 <- max(length(x), length(y)); l3 <- l1+l2 x <- x-median(x) ; y <- y-median(y) rmd <- function(x, y, alternative) { test <- mean(abs(x)) / mean(abs(y)) if (alternative=="two.sided") {test <- max(test, 1/test)} return( test ) } test.stat0 <- rmd(x, y, alternative) ; test.stat <- NULL if (choose(l3,l1)>num.sim) exact.p.value <- FALSE if (exact.p.value & l1<=max.loop) { output <- "Exact p-value was calculated." index <- rep(NA, max(l3, max.loop)) for (i1 in 1:(l2+1)) { for (i2 in (i1+1):max( i1+1, (l2+2)*(l1>=2) )) { for (i3 in (i2+1):max( i2+1, (l2+3)*(l1>=3) )) { for (i4 in (i3+1):max( i3+1, (l2+4)*(l1>=4) )) { for (i5 in (i4+1):max( i4+1, (l2+5)*(l1>=5) )) { for (i6 in (i5+1):max( i5+1, (l2+6)*(l1>=6) )) { for (i7 in (i6+1):max( i6+1, (l2+7)*(l1>=7) )) { for (i8 in (i7+1):max( i7+1, (l2+8)*(l1>=8) )) { for (i9 in (i8+1):max( i8+1, (l2+9)*(l1>=9) )) { for (i10 in (i9+1):max( i9+1, (l2+10)*(l1>=10) )) { index <- c(i1,i2,i3,i4,i5,i6,i7,i8,i9,i10) index <- c( index[1:l1], setdiff(c(1:l3), index[1:l1]) ) test.stat <- c(test.stat, rmd( c(x,y)[index[1:l1]], c(x,y)[index[(l1+1):l3]], alternative ) ) }}}}}}}}}} # Number of parentheses is max.loop } # End of if else { # Use simulations. output <- paste("p-value was estimated based on", num.sim, "simulations") for (i in 1:num.sim) { index <- sample(l3) test.stat <- c(test.stat, rmd( c(x,y)[index[1:l1]], c(x,y)[index[(l1+1):l3]], alternative ) ) } } if (length(x)>length(y) & alternative!="two.sided") { test.stat <- 1 / test.stat } if (alternative=="less") { p.value <- mean( test.stat - test.stat0 <= tol ) } else { p.value <- mean( test.stat - test.stat0 >= -tol) } structure(list(output, alternative=alternative, rmd.hat=test.stat0, p.value=p.value)) } # End of rmd.test Garren, 2005 dt.nc <- function(x, df, ncp=0) { # Approximate probability density function (pdf) for the noncentral t-distribution. # `x' is the vector of quantiles. # `df' is the degrees of freedom. # `ncp' is the noncentrality parameter. delta <- 1e-2 # `delta' should not be too small or too large; 0.01 works well. # When delta is 0.01, the maximum absolute error is 1.7e-6, # which may be compared to dnorm(0) whose value is 0.3989. if (ncp==0) {return(dt(x,df))} else {return( ( pt( (x+delta/2), df, ncp ) - pt( (x-delta/2), df, ncp ) ) / delta )} } # Garren, 2005 perm.f.test <- function(response, treatment=NULL, num.sim=1e4) { # A permutation F-test is performed, and a one-way analysis of variance F-test is performed. # Only approximated p-values are calculated for the permutation F-test. # `treatment' is the vector of treatments, and `response' is the vector of responses. # Alternatively, if `response' is NULL, then `treatment' must be an N by 2 matrix, # such that the first column represents treatment and the second column represents response. # `num.sim' is the number of permutations generated. # If `num.sim' is smaller than one, then the permutation test is not performed. # otherwise, list the p-value. if (is.null(treatment)) {treatment <- response[,2] ; response <- as.numeric(response[,1])} if (length(response)!=length(treatment)) stop("`response' and `treatment' must have the same length.") treatment <- factor(treatment) ; list1 <- c("One-way ANOVA", summary(aov(response~treatment))) if (num.sim>=1) { all.treatments <- union(treatment, NULL); k <- length(all.treatments); ni <- NULL for (i in 1:k) { ni <- c(ni, sum(treatment==all.treatments[i])) } one.way.anova <- function(treatment, response, all.treatments, ni, k) { # Returns $\sum_{i=1}^k n_i{\bar Y}_i^2$, which is monotone increasing with # the F-test statistic associated with one-way ANOVA; see Higgins, 2004, p. 84. yi.mean <- NULL for (i in 1:k) {yi.mean <- c(yi.mean, mean(response[treatment==all.treatments[i]])) } sum(ni*yi.mean^2) } test.stat0 <- one.way.anova(treatment, response, all.treatments, ni, k) ; test.stat <- NULL for (i in 1:num.sim) { test.stat <- c(test.stat, one.way.anova(treatment, sample(response), all.treatments, ni, k)) } list2 <- c( "The p-value from the permutation F-test based on", paste( num.sim, "simulations is", mean(test.stat>=test.stat0) ) ) list1 <- c(list1, list2) } return(list1) } # Garren, 2005 plot.line <- function(x, y, col=c("black", "red")) { # Constructs a scatterplot with the fitted regression line. # `x' is the vector of independent observations. # `y' is the vector of dependent observations. # `col' may be a scalar or vector of length 2, and specifies the color. # col[1] is the color of the points, and col[2] is the color of the plotted line. if (length(col)==1) col <- rep(col,2) plot(x, y, type="p", main="Fitted Regression Line", col=col[1]) intercept0 <- as.numeric(lm(y~x)$coefficients[1]) slope0 <- as.numeric(lm(y~x)$coefficients[2]) f <- function(q) {intercept0+q*slope0} # curve(f, min(x), max(y), add=T, type="s") lines( c(min(x),max(x)), c(f(min(x)), f(max(x))), col=col[2] ) } # Garren, 2005 df.nc <- function(x, df1, df2, ncp=0) { # Approximate probability density function (pdf) for the noncentral t-distribution. # `x' is the vector of quantiles. # `df1' and `df2' are the degrees of freedom. # `ncp' is the noncentrality parameter. delta <- 1e-2 # `delta' should not be too small or too large; 0.01 works well. # When delta is 0.01, the maximum absolute error is 1.7e-6, # which may be compared to dnorm(0) whose value is 0.3989. if (ncp==0) {return(df(x,df1, df2))} else {return( ( pf( (x+delta/2), df1, df2, ncp ) - pf( (x-delta/2), df1, df2, ncp ) ) / delta )} } # Garren, 2005 dlaplace <- function(x, mean=0, sd=1) { # Probability density function (pdf) for the Laplace (double exponential) distribution. # `x' is the vector of quantiles. # `mean' is the population mean. # `sd' is the population standard deviation. exp(-abs(x-mean)*sqrt(2)/sd)/(sd*sqrt(2)) } # Garren, 2005 plaplace <- function(q, mean=0, sd=1) { # Cumulative distribution function (cdf) for the Laplace # (double exponential) distribution. # `q' is the vector of quantiles. # `mean' is the population mean. # `sd' is the population standard deviation. (q>mean) - ((q>mean)*2-1) * exp(-abs(q-mean)*sqrt(2)/sd)/2 } # Garren, 2005 qlaplace <- function(p, mean=0, sd=1) { # Quantile function for the Laplace (double exponential) distribution. # `p' is the vector of probabilities. # `mean' is the population mean. # `sd' is the population standard deviation. mean + ( 2*(p<=0.5)-1 ) * sd / sqrt(2) * log( 2 * (p*(p<=0.5)+(1-p)*(p>0.5)) ) } # Garren, 2005 rlaplace <- function(n, mean=0, sd=1) { # Random generation for the Laplace (double exponential) distribution. # `n' is the number of observations. # `mean' is the population mean. # `sd' is the population standard deviation. rexp(n, (1/sd)) * sample(c(-1, 1), n, replace=T) / sqrt(2) + mean } # Garren, 2005 scan2 <- function(file.name, matrix.format=FALSE, character.rm=FALSE, disk.drive=NULL, course.num=324) { # Scans in a matrix or vector. # `file.name' is the name of the data set to be scanned. # If `matrix.format' is TRUE, then data are attempted to be saved as a rectangular matrix. # If `matrix.format' is FALSE, then data are saved as a vector. # If `character.rm' is TRUE, then remove all character (i.e., non-numeric) data. # `disk.drive' is the disk drive where the data are located, is NULL for web-based data, # and is "linux" for a disk drive on a linux-based computer. # Some choices for `disk.drive' are NULL, "linux", "a", "b", "c", "d", "e", .... # `course.num' is the course number and may be numeric or character. course.num <- as.numeric(course.num) if (is.null(disk.drive)) { full.name <- paste("http://www.math.jmu.edu/~garrenst/math", course.num, ".dir/datasets/", file.name, sep="") } else { if (disk.drive=="linux" & course.num==325) { full.name <- paste("/mnt/cdrom/SAS/DATASETS/", file.name, sep="") } if (disk.drive!="linux" & course.num==325) { full.name <- paste(disk.drive, ":/sas/datasets/", file.name, sep="") } } data <- scan(full.name, what=character(), quiet=T, comment.char="#") a.character <- F ; options(warn = -1) if (course.num==325) { for (i in 1:length(data)) { if ( is.na(as.numeric(data[i])) & data[i]!="." ) {a.character <- T} if ( data[i]=="." ) data[i] <- NA } } # Missing data. else { for (i in 1:length(data)) { if ( is.na(as.numeric(data[i])) ) {a.character <- T} } } options(warn = 0) if (a.character & character.rm) { options(warn = -1) ; data1 <- NULL for (i in 1:length(data)) { if ( !is.na(as.numeric(data[i])) || is.na(data[i]) ) data1 <- c(data1, data[i]) } options(warn = 0) ; data <- data1 } if (!a.character || character.rm) data <- as.numeric(data) if (matrix.format) { ncol <- 0 ; line.num <- 0 while (ncol==0 & line.num<=1000) { line.num <- line.num+1 first.line <- scan(full.name, what=character(), nlines=line.num, quiet=T, comment.char="#") if (!a.character || !character.rm) {ncol <- length(first.line)} else { options(warn = -1) for (i in 1:length(first.line)) { if ( !is.na(as.numeric(first.line[i])) || is.na(first.line[i]) ) {ncol <- ncol+1} } options(warn = 0) } } if (ncol!=0) data <- matrix( data, ncol=ncol, byrow=T ) } return(data) } # Garren, 2005 web.source <- function(course.num=324) { # Reads from the web the Rmacros for the MATH course. # `course.num' is the course number and may be numeric or character. source(paste("http://www.math.jmu.edu/~garrenst/math", course.num, ".dir/Rmacros", sep="")) } # Garren, 2005 G <- function(graph.name="Rplots.ps") { # Plot the current graph, but only when using Linux. dev.off(); system(paste("gs", graph.name)) } # Garren, 2005