# R macros for Math 324, updated Spring 2013. source("http://educ.jmu.edu/~garrenst/math300.dir/Rmacros") # Math 324 scan2 = function(file.name, course.num=324, na.strings=".", ...) { # Reads in a table. # `file.name' is the name of the data set to be scanned. # `course.num' is the course number and may be numeric or character. # `na.strings' is a vector of strings which are to be interpreted as `NA' values. # Blank fields are also considered to be missing values. # `...' are the optional arguments used in `read.table'. full.name <- paste("http://educ.jmu.edu/~garrenst/math", as.numeric(course.num), ".dir/datasets/", file.name, sep="") return(scan(full.name, na.strings=na.strings, comment.char="#", ...)) } # Garren, 2011 # Math 324 read.table2 = function(file.name, course.num=324, na.strings=".", ...) { # Reads in a table. # `file.name' is the name of the data set to be scanned. # `course.num' is the course number and may be numeric or character. # `na.strings' is a vector of strings which are to be interpreted as `NA' values. # Blank fields are also considered to be missing values. # `...' are the optional arguments used in `read.table'. full.name <- paste("http://educ.jmu.edu/~garrenst/math", as.numeric(course.num), ".dir/datasets/", file.name, sep="") return(read.table(full.name, na.strings=na.strings, ...)) } # Garren, 2011 # Math 324 quantile.ci <- function( x, p = 0.5, conf.level = 0.95 ) { # Produces an exact confidence interval on the 100*p_th percentile, # based on the binomial test, where ties are excluded. # `x' is the vector of observations. # `p' is the vector of the hypothesized probability of success. # `conf.level' is the confidence level for the returned confidence interval. c.i. = NULL for ( k in 1:length(p) ) { delta <- (max(x)-min(x))/1e10 xgrid <- c(x, x+delta, x-delta) value.in.c.i. <- rep(NA, length(xgrid)) for (iii in 1:length(xgrid)) { x1 <- c( sum( xxgrid[iii] ) ); n <- sum(x1) value.in.c.i.[iii] <- binom.test(x1, n, p[k], alternative = "two.sided", conf.level)$p.value>=1-conf.level } c.i. <- rbind( c.i., c( min( xgrid[value.in.c.i.] ), max( xgrid[value.in.c.i.] ) ) ) } colnames( c.i. ) = c("lower","upper") ; rownames( c.i. ) = p return( c.i. ) } # Garren, 2013 # Math 324 power.binom.test <- function(n, alpha=0.05, alternative=c("two.sided", "less", "greater"), null.median, alt.pdist, ...) { # Computes the power of the binomial test (i.e., sign test). # This power function is based on binomial probabilities, # not the normal approximation to the binomial distribution, # and produces exact solutions. # n is the sample size. # alpha is the size of the test; i.e., P(type I error). # `alternative' is a character string specifying the alternative hypothesis, and # must be one of `"two.sided"' (default), `"greater"' or `"less"'. # Only the initial letter needs to be specified. # null.median is the population median under the null hypothesis. # `alt.pdist' is the cdf under the alternative distribution, and some choices are # pnorm, pexp, pcauchy, plaplace, pt, pchisq, pf, punif, pbinom, pgeo, ppois. # ...: optional arguments to `alt.pdist', EXCLUDING the first argument. alternative <- abbreviation(alternative, c("two.sided", "less", "greater")) if (alternative=="two.sided") { q <- qbinom(alpha/2, n, 0.5) ; q <- q - (pbinom(q,n,0.5)>alpha/2) prob <- pbinom( q, n, 1-match.fun(alt.pdist)(null.median, ...) ) + pbinom( q, n, match.fun(alt.pdist)(null.median, ...) ) } if (alternative=="less") { q <- qbinom(alpha, n, 0.5) ; q <- q - (pbinom(q,n,0.5)>alpha) prob <- pbinom( q, n, 1-match.fun(alt.pdist)(null.median, ...) ) } if (alternative=="greater") { q <- qbinom(alpha, n, 0.5) ; q <- q - (pbinom(q,n,0.5)>alpha) prob <- pbinom( q, n, match.fun(alt.pdist)(null.median, ...) ) } prob } # Garren, 2005 # Math 324 perm.test <- function(x, y=NULL, alternative=c("two.sided", "less", "greater"), mu=0, paired=FALSE, all.perms=TRUE, num.sim=1e4, plot=FALSE, stat=mean, ...) { # If this version does not work on Rweb, then use "perm.test.old" instead. # Performs one-sample and two-sample permutation tests on vectors of data. # The one-sample permutation test is based on stat(x-mu); # the two-sample paired permutation test is based on stat(x-y-mu); # and the two-sample unpaired permutation test is based on stat(x-mu)-stat(y). # `x' is a numeric vector of data values. # `y' is an optional numeric vector data values, and is used for two-sample tests only. # `alternative' is a character string specifying the alternative hypothesis, and # must be one of `"two.sided"' (default), `"greater"' or `"less"'. # Only the initial letter needs to be specified. # `mu' is a number indicating the null value of the location parameter # (or the difference in location parameters if performing a two-sample test). # `paired' indicates whether or not a two-sample test should be paired, # and is ignored for a one-sample test. # The exact p-value is attempted when `all.perms' (i.e., all permutations) is TRUE, # and is simulated when `all.perms' is FALSE or when # computing an exact p-value requires more than num.sim calculations. # `num.sim' is the upper limit on the number of permutations generated. # If `plot' is TRUE, then plot the histogram of the permutation distribution; # otherwise, list the p-value. # Setting `plot' to TRUE works best when the permutation distribution # is close to continuous. # Some reasonable choices for `stat' are mean and median. # ... consists of optional arguments to `stat'; # and is the second argument to `stat' when unspecified. # For example, if `stat' equals `mean', then the second argument # `trim' denotes the fraction (0 to 0.5) of observations to be trimmed # from each end of `x' and `y' before the mean is computed. alternative <- abbreviation(alternative, c("two.sided", "less", "greater")) if (!is.null(y) & !paired) { # Perform unpaired two-sample permutation test. l1 <- min(length(x), length(y)); x <- x-mu l2 <- max(length(x), length(y)); l3 <- l1+l2 test.stat0 <- match.fun(stat)(x, ...)-match.fun(stat)(y, ...); test.stat <- NULL if (choose(l3,l1)>num.sim) all.perms <- FALSE output1 <- "Unpaired two-sample permutation test was performed." if (all.perms) { output2 <- "Exact p-value was calculated." combos <- combn( 1:l3, l1 ) for (i in 1:dim(combos)[2]) { index <- c( combos[,i], setdiff( c(1:l3), combos[,i] ) ) test.stat <- c(test.stat, match.fun(stat)(c(x,y)[index[1:l1]], ...) -match.fun(stat)(c(x,y)[index[(l1+1):l3]], ...) ) } } # End of if else { # Use simulations. output2 <- paste("p-value was estimated based on", num.sim, "simulations.") for (i in 1:num.sim) { index <- sample(l3) test.stat <- c(test.stat, match.fun(stat)(c(x,y)[index[1:l1]], ...) -match.fun(stat)(c(x,y)[index[(l1+1):l3]], ...) ) } } if (length(x)>length(y)) test.stat <- -test.stat if (!plot) { if (alternative=="less") p.value <- mean( test.stat <= test.stat0 ) if (alternative=="greater") p.value <- mean( test.stat >= test.stat0 ) if (alternative=="two.sided") p.value <- mean( abs(test.stat) >= abs(test.stat0) ) structure(list(output1, output2, alternative=alternative, mu=mu, p.value=p.value)) } else {hist(test.stat, main="Histogram of Permutation Distribution", xlab="Value of test statistic", ylab="Frequency")} } # END of `if (!is.null(y) & !paired)' else { # Perform a one-sample or a paired two-sample permutation test on mu. if (is.null(y)) { output1 <- "One-sample permutation test was performed." } else { x <- x-y; output1 <- "Paired permutation test was performed." } x <- x-mu; lx <- length(x); test.stat0 <- match.fun(stat)(x,...); test.stat <- NULL if (2^lx>num.sim) all.perms <- FALSE if (all.perms) { output2 <- "p-value was calculated based on all permutations." plus.minus.matrix <- matrix(1, 1, lx) for (i in lx:1) { temp <- plus.minus.matrix ; temp[,i] <- -1 plus.minus.matrix <- rbind( plus.minus.matrix, temp ) } for (ii in 1:2^lx) { test.stat <- c(test.stat, match.fun(mean)(x*plus.minus.matrix[ii,],...)) } } # End of if else { # Use simulations. output2 <- paste("p-value was estimated based on", num.sim, "simulations.") for (i in 1:num.sim) test.stat <- c(test.stat, match.fun(stat)((x*sample(c(-1,1),lx,T)),...)) } if (!plot) { if (alternative=="less") { p.value <- mean( test.stat <= test.stat0 ) } if (alternative=="greater") { p.value <- mean( test.stat >= test.stat0 ) } if (alternative=="two.sided") { p.value <- mean( abs(test.stat) >= abs(test.stat0) ) } structure(list(output1, output2, alternative=alternative, mu=mu, p.value=p.value))} else {hist(test.stat, main="Histogram of Permutation Distribution", xlab="Value of test statistic", ylab="Frequency")} } # End of `else' } # End of perm.test Garren, 2009 # Math 324 perm.test.old <- function(x, y=NULL, alternative=c("two.sided", "less", "greater"), mu=0, paired=FALSE, all.perms=TRUE, num.sim=1e4, plot=FALSE, stat=mean, ...) { # This version works, even on Rweb, but for exact p-values with LARGE data sets, use "perm.test". # Performs one-sample and two-sample permutation tests on vectors of data. # The one-sample permutation test is based on stat(x-mu); # the two-sample paired permutation test is based on stat(x-y-mu); # and the two-sample unpaired permutation test is based on stat(x-mu)-stat(y). # `x' is a numeric vector of data values. # `y' is an optional numeric vector data values, and is used for two-sample tests only. # `alternative' is a character string specifying the alternative hypothesis, and # must be one of `"two.sided"' (default), `"greater"' or `"less"'. # Only the initial letter needs to be specified. # `mu' is a number indicating the null value of the location parameter # (or the difference in location parameters if performing a two-sample test). # `paired' indicates whether or not a two-sample test should be paired, # and is ignored for a one-sample test. # The exact p-value is attempted when `all.perms' (i.e., all permutations) is TRUE, # and is simulated when `all.perms' is FALSE or when # computing an exact p-value requires more than num.sim calculations. # `num.sim' is the upper limit on the number of permutations generated. # If `plot' is TRUE, then plot the histogram of the permutation distribution; # otherwise, list the p-value. # Setting `plot' to TRUE works best when the permutation distribution # is close to continuous. # Some reasonable choices for `stat' are mean and median. # ... consists of optional arguments to `stat'; # and is the second argument to `stat' when unspecified. # For example, if `stat' equals `mean', then the second argument # `trim' denotes the fraction (0 to 0.5) of observations to be trimmed # from each end of `x' and `y' before the mean is computed. alternative <- abbreviation(alternative, c("two.sided", "less", "greater")) if (!is.null(y) & !paired) { # Perform unpaired two-sample permutation test. max.loop <- 10; l1 <- min(length(x), length(y)); x <- x-mu l2 <- max(length(x), length(y)); l3 <- l1+l2 test.stat0 <- match.fun(stat)(x, ...)-match.fun(stat)(y, ...); test.stat <- NULL if (choose(l3,l1)>num.sim) all.perms <- FALSE output1 <- "Unpaired two-sample permutation test was performed." if (all.perms & l1<=max.loop) { output2 <- "Exact p-value was calculated." index <- rep(NA, max(l3, max.loop)) for (i1 in 1:(l2+1)) { for (i2 in (i1+1):max( i1+1, (l2+2)*(l1>=2) )) { for (i3 in (i2+1):max( i2+1, (l2+3)*(l1>=3) )) { for (i4 in (i3+1):max( i3+1, (l2+4)*(l1>=4) )) { for (i5 in (i4+1):max( i4+1, (l2+5)*(l1>=5) )) { for (i6 in (i5+1):max( i5+1, (l2+6)*(l1>=6) )) { for (i7 in (i6+1):max( i6+1, (l2+7)*(l1>=7) )) { for (i8 in (i7+1):max( i7+1, (l2+8)*(l1>=8) )) { for (i9 in (i8+1):max( i8+1, (l2+9)*(l1>=9) )) { for (i10 in (i9+1):max( i9+1, (l2+10)*(l1>=10) )) { index <- c(i1,i2,i3,i4,i5,i6,i7,i8,i9,i10) index <- c( index[1:l1], setdiff(c(1:l3), index[1:l1]) ) test.stat <- c(test.stat, match.fun(stat)(c(x,y)[index[1:l1]], ...) -match.fun(stat)(c(x,y)[index[(l1+1):l3]], ...) ) }}}}}}}}}} # Number of parentheses is max.loop } # End of if else { # Use simulations. output2 <- paste("p-value was estimated based on", num.sim, "simulations.") for (i in 1:num.sim) { index <- sample(l3) test.stat <- c(test.stat, match.fun(stat)(c(x,y)[index[1:l1]], ...) -match.fun(stat)(c(x,y)[index[(l1+1):l3]], ...) ) } } if (length(x)>length(y)) test.stat <- -test.stat if (!plot) { if (alternative=="less") p.value <- mean( test.stat <= test.stat0 ) if (alternative=="greater") p.value <- mean( test.stat >= test.stat0 ) if (alternative=="two.sided") p.value <- mean( abs(test.stat) >= abs(test.stat0) ) structure(list(output1, output2, alternative=alternative, mu=mu, p.value=p.value)) } else {hist(test.stat, main="Histogram of Permutation Distribution", xlab="Value of test statistic", ylab="Frequency")} } # END of `if (!is.null(y) & !paired)' else { # Perform a one-sample or a paired two-sample permutation test on mu. max.loop <- 20 if (is.null(y)) { output1 <- "One-sample permutation test was performed." } else { x <- x-y; output1 <- "Paired permutation test was performed." } x <- x-mu; lx <- length(x); test.stat0 <- match.fun(stat)(x,...); test.stat <- NULL if (lx>max.loop || 2^lx>num.sim) all.perms <- FALSE if (all.perms) { output2 <- "p-value was calculated based on all permutations." for (i1 in 0:ifelse(lx<1,0,1)) { for (i2 in 0:ifelse(lx<2,0,1)) { for (i3 in 0:ifelse(lx<3,0,1)) { for (i4 in 0:ifelse(lx<4,0,1)) { for (i5 in 0:ifelse(lx<5,0,1)) { for (i6 in 0:ifelse(lx<6,0,1)) { for (i7 in 0:ifelse(lx<7,0,1)) { for (i8 in 0:ifelse(lx<8,0,1)) { for (i9 in 0:ifelse(lx<9,0,1)) { for (i10 in 0:ifelse(lx<10,0,1)) { for (i11 in 0:ifelse(lx<11,0,1)) { for (i12 in 0:ifelse(lx<12,0,1)) { for (i13 in 0:ifelse(lx<13,0,1)) { for (i14 in 0:ifelse(lx<14,0,1)) { for (i15 in 0:ifelse(lx<15,0,1)) { for (i16 in 0:ifelse(lx<16,0,1)) { for (i17 in 0:ifelse(lx<17,0,1)) { for (i18 in 0:ifelse(lx<18,0,1)) { for (i19 in 0:ifelse(lx<19,0,1)) { for (i20 in 0:ifelse(lx<20,0,1)) { test.stat <- c(test.stat, match.fun(mean)(x*(2*(c(i1,i2,i3,i4,i5,i6,i7,i8,i9,i10, i11,i12,i13,i14,i15,i16,i17,i18,19,i20)[1:lx])-1),...)) }}}}}}}}}}}}}}}}}}}} # Number of parentheses is max.loop } # End of if else { # Use simulations. output2 <- paste("p-value was estimated based on", num.sim, "simulations.") for (i in 1:num.sim) test.stat <- c(test.stat, match.fun(stat)((x*sample(c(-1,1),lx,T)),...)) } if (!plot) { if (alternative=="less") { p.value <- mean( test.stat <= test.stat0 ) } if (alternative=="greater") { p.value <- mean( test.stat >= test.stat0 ) } if (alternative=="two.sided") { p.value <- mean( abs(test.stat) >= abs(test.stat0) ) } structure(list(output1, output2, alternative=alternative, mu=mu, p.value=p.value))} else {hist(test.stat, main="Histogram of Permutation Distribution", xlab="Value of test statistic", ylab="Frequency")} } # End of `else' } # End of perm.test.old Garren, 2005 # Math 324 score <- function(x, y=NULL, expon=FALSE) { # Generates van der Waerden scores (i.e., normal quantiles) or exponential # (similar to Savage) scores, for combined data `x' and `y'. # `x' is a positive integer equal to the number of desired scores when y is NULL, # or `x' is a vector of observations. # `y' is an optional vector of observations, typically used with two-sample tests. # When `expon' is FALSE, van der Waerden scores are computed, even for ties. # When `expon' is TRUE, Exponential scores are computed, # and interpolation is used for ties. # Output: scored values for `x', when y is NULL. # Output: `$x' are the scored values for `x', when y is not NULL. # Output: `$y' are the scored values for `y', when y is not NULL. if (is.null(y) & length(x)==1) { if (floor(x)==ceiling(x) & x>=1) {x <- 1:x} } n <- length(c(x,y)) ; if (!expon) {z <- qnorm(rank(c(x,y))/(n+1))} if (expon) { u <- cumsum( 1/(n+1-1:n) ) ; z <- u[ rank(c(x,y)) ] for (i in 1:n) { rank0 <- rank(c(x,y))[i] ; remainder <- rank0 - floor(rank0) if (remainder>0) {z[i] <- u[floor(rank0)]*(1-remainder)+u[ceiling(rank0)]*remainder}}} if (is.null(y)) { return(z) } if (!is.null(y)) { return(structure(list(x=z[1:length(x)], y=z[(length(x)+1):n]))) } } # Garren, 2005 # Math 324 siegel.test <- function(x, y, alternative=c("two.sided", "less", "greater"), reverse=FALSE, all.perms=TRUE, num.sim=1e4) { # Performs the Siegel-Tukey test, where ties are handled by averaging ranks, # not by asymptotic approximations. # `x' and `y' are numeric vectors of data values. # `alternative' is a character string specifying the alternative hypothesis, and # must be one of `"two.sided"' (default), `"greater"' or `"less"'. # Only the initial letter needs to be specified. # If `reverse' is FALSE, then assign rank 1 to the smallest observation. # If `reverse' is TRUE, then assign rank 1 to the largest observation. # The exact p-value is attempted when `all.perms' (i.e., all permutations) is TRUE, # and is simulated when `all.perms' is FALSE or when # computing an exact p-value requires more than num.sim calculations. # `num.sim' is the upper limit on the number of permutations generated. alternative <- abbreviation(alternative, c("two.sided", "less", "greater")) z <- sort(c(x,y)) ; lz <- length(z) lower <- 1; upper <- lz; j <- rep(NA, lz) for (i in 1:lz) { if (i-4*as.integer(i/4)<=1) { j[lower] <- i ; lower <- lower+1 } else {j[upper] <- i; upper <- upper-1} } if (reverse) { j <- rev(j) } z0 <- z[order(j)] rank.x.mat <- matrix(NA, length(x), lz) rank.y.mat <- matrix(NA, length(y), lz) for (k in 1:lz) { for (i in 1:length(x)) { if (x[i]==z0[k]) {rank.x.mat[i,k] <- k} } for (i in 1:length(y)) { if (y[i]==z0[k]) {rank.y.mat[i,k] <- k} } } rank.x <- apply(rank.x.mat,1,mean,na.rm=TRUE) rank.y <- apply(rank.y.mat,1,mean,na.rm=TRUE) if (alternative=="two.sided") {p.value <- perm.test(rank.x,rank.y,alternative="two.sided",all.perms=all.perms, num.sim=num.sim,stat=mean)$p.value} if (alternative=="less") {p.value <- perm.test(rank.x,rank.y,alternative="greater",all.perms=all.perms, num.sim=num.sim,stat=mean)$p.value} if (alternative=="greater") {p.value <- perm.test(rank.x,rank.y,alternative="less",all.perms=all.perms, num.sim=num.sim,stat=mean)$p.value} structure(list(" Siegel-Tukey test", alternative=alternative, rank.x=rank.x, rank.y=rank.y, p.value=p.value)) } # Garren, 2005 # Math 324 rmd.test <- function(x, y, alternative=c("two.sided", "less", "greater"), all.perms=TRUE, num.sim=1e4) { # A permutation test is performed based on the estimated rmd, # the ratio of the absolute mean deviances. # `x' and `y' are numeric vectors of data values. # `alternative' is a character string specifying the alternative hypothesis, and # must be one of `"two.sided"' (default), `"greater"' or `"less"'. # Only the initial letter needs to be specified. # The exact p-value is attempted when `all.perms' (i.e., all permutations) is TRUE, # and is simulated when `all.perms' is FALSE or when # computing an exact p-value requires more than num.sim calculations. # `num.sim' is the upper limit on the number of permutations generated. max.loop <- 10; l1 <- min(length(x), length(y)) ; tol <- 1e-10 alternative <- abbreviation(alternative, c("two.sided", "less", "greater")) l2 <- max(length(x), length(y)); l3 <- l1+l2 x <- x-median(x) ; y <- y-median(y) rmd <- function(x, y, alternative) { test <- mean(abs(x)) / mean(abs(y)) if (alternative=="two.sided") {test <- max(test, 1/test)} return( test ) } test.stat0 <- rmd(x, y, alternative) ; test.stat <- NULL if (choose(l3,l1)>num.sim) all.perms <- FALSE if (all.perms & l1<=max.loop) { output <- "Exact p-value was calculated." index <- rep(NA, max(l3, max.loop)) for (i1 in 1:(l2+1)) { for (i2 in (i1+1):max( i1+1, (l2+2)*(l1>=2) )) { for (i3 in (i2+1):max( i2+1, (l2+3)*(l1>=3) )) { for (i4 in (i3+1):max( i3+1, (l2+4)*(l1>=4) )) { for (i5 in (i4+1):max( i4+1, (l2+5)*(l1>=5) )) { for (i6 in (i5+1):max( i5+1, (l2+6)*(l1>=6) )) { for (i7 in (i6+1):max( i6+1, (l2+7)*(l1>=7) )) { for (i8 in (i7+1):max( i7+1, (l2+8)*(l1>=8) )) { for (i9 in (i8+1):max( i8+1, (l2+9)*(l1>=9) )) { for (i10 in (i9+1):max( i9+1, (l2+10)*(l1>=10) )) { index <- c(i1,i2,i3,i4,i5,i6,i7,i8,i9,i10) index <- c( index[1:l1], setdiff(c(1:l3), index[1:l1]) ) test.stat <- c(test.stat, rmd( c(x,y)[index[1:l1]], c(x,y)[index[(l1+1):l3]], alternative ) ) }}}}}}}}}} # Number of parentheses is max.loop } # End of if else { # Use simulations. output <- paste("p-value was estimated based on", num.sim, "simulations.") for (i in 1:num.sim) { index <- sample(l3) test.stat <- c(test.stat, rmd( c(x,y)[index[1:l1]], c(x,y)[index[(l1+1):l3]], alternative ) ) } } if (length(x)>length(y) & alternative!="two.sided") { test.stat <- 1 / test.stat } if (alternative=="less") { p.value <- mean( test.stat - test.stat0 <= tol ) } else { p.value <- mean( test.stat - test.stat0 >= -tol) } structure(list(output, alternative=alternative, rmd.hat=test.stat0, p.value=p.value)) } # End of rmd.test Garren, 2005 # Math 324 perm.f.test <- function(response, treatment=NULL, num.sim=1e4) { # A permutation F-test is performed, and a one-way analysis of variance F-test is performed. # Only approximated p-values are calculated for the permutation F-test. # `treatment' is the vector of treatments, and `response' is the vector of responses. # Alternatively, if `response' is NULL, then `treatment' must be an N by 2 matrix, # such that the first column represents treatment and the second column represents response. # `num.sim' is the number of permutations generated. # If `num.sim' is smaller than one, then the permutation test is not performed. # otherwise, list the p-value. if (is.null(treatment)) {treatment <- response[,2] ; response <- as.numeric(response[,1])} if (length(response)!=length(treatment)) stop("`response' and `treatment' must have the same length.") treatment <- factor(treatment) ; list1 <- c("One-way ANOVA", summary(aov(response~treatment))) if (num.sim>=1) { all.treatments <- union(treatment, NULL); k <- length(all.treatments); ni <- NULL for (i in 1:k) { ni <- c(ni, sum(treatment==all.treatments[i])) } one.way.anova <- function(treatment, response, all.treatments, ni, k) { # Returns $\sum_{i=1}^k n_i{\bar Y}_i^2$, which is monotone increasing with # the F-test statistic associated with one-way ANOVA; see Higgins, 2004, p. 84. yi.mean <- NULL for (i in 1:k) {yi.mean <- c(yi.mean, mean(response[treatment==all.treatments[i]])) } sum(ni*yi.mean^2) } test.stat0 <- one.way.anova(treatment, response, all.treatments, ni, k) ; test.stat <- NULL for (i in 1:num.sim) { test.stat <- c(test.stat, one.way.anova(treatment, sample(response), all.treatments, ni, k)) } list2 <- c( "The p-value from the permutation F-test based on", paste( num.sim, "simulations is", mean(test.stat>=test.stat0) ) ) list1 <- c(list1, list2) } return(list1) } # Garren, 2005 # Math 324 perm.cor.test <- function(x, y=NULL, alternative=c("two.sided", "less", "greater"), method=c("pearson", "spearman"), num.sim=1e4 ) { # A permutation correlation test is performed. # P-values are approximated based on randomly sampling the permutations. # `x' and `y' are the paired vectors of observations. # Alternatively, if `y' is NULL, then `x' must be an N by 2 matrix, # `alternative' is a character string specifying the alternative hypothesis, and # must be one of `"two.sided"' (default), `"greater"' or `"less"'. # Only the initial letter needs to be specified. # `method' is a character string specifying the type of correlation, and # must be one of `"pearson"' (default) or `"spearman"'. # Only the initial letter needs to be specified. # Choices for `method' are "pearson", "spearman", "kendall". # `num.sim' is the number of permutations generated. alternative <- abbreviation(alternative, c("two.sided", "less", "greater")) method <- abbreviation(method, c("pearson", "spearman")) if (is.null(y)) {y <- x[,2] ; x <- as.numeric(x[,1])} if (length(x)!=length(y)) stop("`x' and `y' must have the same length.") if (method=="spearman") {x <- rank(x); y <- rank(y)} if (method %in% c("pearson", "spearman")) { test.stat0 <- cor(x,y); test.stat <- NULL for (i in 1:num.sim) {test.stat <- c(test.stat, cor(x, sample(y)))} if (alternative=="two.sided") p.value <- mean(abs(test.stat)>=abs(test.stat0)) if (alternative=="less") p.value <- mean(test.stat<=test.stat0) if (alternative=="greater") p.value <- mean(test.stat>=test.stat0) } output1 <- paste("Permutation correlation test. Method is", method) output2 <- paste("p-value was estimated based on", num.sim, "simulations.") structure(list(output1, output2, alternative=alternative, p.value=p.value)) } # Garren, 2005 # Common R macros for Math 300 and 400 level courses, originally written spring 2005. # Math 300 trunc.hist <- function(x, xmin=NULL, xmax=NULL, trim=0.025, ...) { # Produces a truncated histogram. # `x' is the vector of observations. # `trim' is the fraction (0 to 0.5) of observations to be trimmed from # each end of `x' before the histogram is constructed; # `trim' is used only when `xmin' and `xmax' are NULL. # When `xmin' and `xmax' are not NULL, then `trim' is ignored, # and the histogram excludes values outside the domain from # `xmin' to `xmax'. # ...: optional arguments to `hist'. if ( (is.null(xmin)& !is.null(xmax)) || (!is.null(xmin)&is.null(xmax))) stop("`xmin' and `xmax' must both be NULL or must both be nonNULL.") if (is.null(xmin)&is.null(xmax)) y <- sort(x)[(length(x)*trim): (length(x)*(1-trim))] if (!is.null(xmin)&!is.null(xmax)) { if (xmin >= xmax) stop("`xmin' cannot be as large as or larger than `xmax'.") y <- x[ x>xmin & x shade.dist( 6, 'df.nc', c(7,9,2.5), NULL, F, 0, 8 ) # To avoid ambiguity with discrete distributions, avoid boarder values with `xshade'. # For example, if X ~ Binomial(n=10, p=0.4), then to graph P(X>2) type: # > shade.dist( 2.5, 'dbinom', 10, 0.4, F ) # `xmin' and `xmax' represent the limiting values on the x-axis. # Reasonable values of `xmin' and `xmax' are generated when originally set to NULL. # `xlab' is the label on the x-axis. If NULL, `xlab' may be automatically set to `X', # `Z', `T', `F' or `X^2', according to `ddist'. # `digits.prob' is the number of significant digits listed in the probability. # `digits.xtic' is the number of significant digits listed on the x-axis. # If `xtic' is TRUE, then the numbers listed on the x-axis include the median and `xshade'. # If `xtic' is FALSE, then the default numbers are listed on the x-axis. # If `xtic' is a vector, then the numbers listed on the x-axis consist of `xtic'. # `is.discrete' is logical, indicating whether or not the distribution is discrete. # If NULL, then the macro attempts to assign the correct logical value. # `additional.x.range' is a vector of TWO additional `x' values for evaluating the function. # This is useful when the plot is too sparse. # `main' is the main title. If NULL, the probability is displayed. # If no title is desired, set `main' to an empty string ''. num.grid.points <- 10001; col <- c("black", "red"); ylimchisq1 <- 1.2 options(warn = -1); if (substring(ddist,1,1)!="d") stop("`ddist' must be a pdf.") all.discrete = c("dbinom", "dgeom", "dhyper", "dnbinom", "dpois") if (!is.null(is.discrete)) discrete.pdf=is.discrete if (is.null(is.discrete)) discrete.pdf=(ddist %in% all.discrete) if (is.null(xlab)) { xlab="X" if (ddist=="dnorm") { if (is.null(c(parm1,parm2))) xlab="Z" if (!is.null(parm1) & is.null(parm2)) { if (parm1==0) xlab="Z" } if (!is.null(parm1) & !is.null(parm2)) { if (parm1==0 & parm2==1) xlab="Z" } } if (xlab=="Z") {parm1=0; parm2=1} if (ddist=="dnorm" & is.null(parm2)) parm2==1 temp <- c(parm1, parm2) if (ddist=="dt" & length(temp)==1) xlab=paste(c("T_",temp[1]),sep="",collapse="") if (ddist=="dt.nc" & length(temp)==2) xlab=paste(c("T_",temp[1],",",temp[2]),sep="",collapse="") if (ddist=="dchisq" & length(temp)==1) xlab=paste(c("CHI-SQUARE_",temp[1]),sep="",collapse="") if (ddist=="dchisq" & length(temp)==2) xlab=paste(c("CHI-SQUARE_",temp[1],",",temp[2]),sep="",collapse="") if (ddist=="df" & length(temp)==2) xlab=paste(c("F_",temp[1],",",temp[2]),sep="",collapse="") if (ddist=="df.nc" & length(temp)==3) xlab=paste(c("F_",temp[1],",",temp[2],",",temp[3]),sep="",collapse="") } parm1 <- c(parm1, parm2); temp.vec <- get.min.max(xmin, xmax, ddist, parm1) if (is.null(xshade)) {xshade <- temp.vec$medianA} if (length(xshade)!=1 & length(xshade)!=2) stop("xshade must have length 1 or 2.") if (length(xshade)==2) {if (xshade[1]>xshade[2]) {temp <- xshade[1]; xshade[1] <- xshade[2]; xshade[2] <- temp}} if (is.null(xmin) || is.null(xmax)) { xmin <- temp.vec$xmin ; xmax <- temp.vec$xmax xmin.temp <- min( xmin, xshade[1]-(xmax-xmin)*0.1 ) xmax.temp <- max( xmax, xshade[length(xshade)]+(xmax-xmin)*0.1 ) xmin <- xmin.temp ; xmax <- xmax.temp } else { if (xmin>xmax) { temp <- xmin; xmin <- xmax; xmax <- temp } } pdist <- paste("p",substring(ddist,2),sep="") qdist <- paste("q",substring(ddist,2),sep="") if (pdist=="pf.nc") pdist <- "pf" ; if (pdist=="pt.nc") pdist <- "pt" f <- function(z) { if (length(parm1)==0) {return(match.fun(ddist)(z))} if (length(parm1)==1) {return(match.fun(ddist)(z, parm1[1]))} if (length(parm1)==2) {return(match.fun(ddist)(z, parm1[1], parm1[2]))} if (length(parm1)==3) {return(match.fun(ddist)(z, parm1[1], parm1[2], parm1[3]))} if (length(parm1)==4) {return(match.fun(ddist)(z, parm1[1], parm1[2], parm1[3], parm1[4]))} if (length(parm1)==5) {return(match.fun(ddist)(z, parm1[1], parm1[2], parm1[3], parm1[4], parm1[5]))} } g <- function(z, lower.tail=TRUE) { if (length(parm1)==0) {return(match.fun(pdist)(z, lower.tail=lower.tail))} if (length(parm1)==1) {return(match.fun(pdist)(z, parm1[1], lower.tail=lower.tail))} if (length(parm1)==2) {return(match.fun(pdist)(z, parm1[1], parm1[2], lower.tail=lower.tail))} if (length(parm1)==3) {return(match.fun(pdist)(z, parm1[1], parm1[2], parm1[3], lower.tail=lower.tail))} if (length(parm1)==4) {return(match.fun(pdist)(z, parm1[1], parm1[2], parm1[3], parm1[4], lower.tail=lower.tail))} if (length(parm1)==5) {return(match.fun(pdist)(z, parm1[1], parm1[2], parm1[3], parm1[4], parm1[5], lower.tail=lower.tail))} } h <- function(z) { if (length(parm1)==0) {return(match.fun(qdist)(z))} if (length(parm1)==1) {return(match.fun(qdist)(z, parm1[1]))} if (length(parm1)==2) {return(match.fun(qdist)(z, parm1[1], parm1[2]))} if (length(parm1)==3) {return(match.fun(qdist)(z, parm1[1], parm1[2], parm1[3]))} if (length(parm1)==4) {return(match.fun(qdist)(z, parm1[1], parm1[2], parm1[3], parm1[4]))} if (length(parm1)==5) {return(match.fun(qdist)(z, parm1[1], parm1[2], parm1[3], parm1[4], parm1[5]))} } x <- xmin + (xmax-xmin)*(0:num.grid.points)/num.grid.points; options(warn = -1) options(warn = 0) if (discrete.pdf) {xmin <- ceiling(xmin); xmax<-floor(xmax); x <- xmin:xmax } if (length(xshade)==2 & xshade[1]>xshade[2]) { temp <- xshade[1]; xshade[1] <- xshade[2]; xshade[2] <- temp } if (xshade[1]xmax) stop("xshade must be between xmin and xmax.") if (lower.tail & length(xshade)==1) {the.prob <- g(xshade); x1 <- x[ x<=xshade ] } if (!lower.tail & length(xshade)==1) {the.prob <- g(xshade, lower.tail=FALSE); x1 <- x[ x>xshade ] } if (!lower.tail & length(xshade)==2) {the.prob <- g(xshade[2])-g(xshade[1], lower.tail=TRUE) x1 <- x[ xshade[1]xshade[2] ] } maxA <- ifelse( ((ddist %in% c("dchisq", "df")) && parm1[1]==1), min(ylimchisq1, max(f(x),na.rm=TRUE)), max(f(x)) ) ylim1 <- c(min(f(x),na.rm=TRUE), maxA) if (is.null(main)) main= paste(c("Probability is", prettyNum(the.prob,digits=digits.prob)), collapse=" ") if (discrete.pdf) { plot(x, f(x), xlim=c(xmin, xmax), ylim=ylim1, type="h", xlab=xlab, ylab="probability density function", xaxt="n", main=main, col=col[1]) curve(f, min(x1), max(x1), (max(x1)-min(x1)+1), add=TRUE, type="h", xaxt="n", col=col[2]) if (lower.tail==TRUE & length(xshade)==2) { curve(f, min(x2), max(x2), (max(x2)-min(x2)+1), add=TRUE, type="h", xaxt="n", col=col[2]) } } else { plot(x1, f(x1), xlim=c(xmin,xmax), ylim=ylim1, type="h", xlab=xlab, ylab="probability density function", xaxt="n", main=main, col=col[2]) if (lower.tail==TRUE & length(xshade)==2) { curve(f, min(x2), max(x2), num.grid.points, add=TRUE, type="h", xaxt="n", col=col[2]) } curve(f, xmin, xmax, num.grid.points, add=TRUE, type="p", xaxt="n", pch=20, cex=0.05, col=col[1]) if (length(additional.x.range)>=2) curve(f, min(additional.x.range), max(additional.x.range), num.grid.points, add=TRUE, type="p", xaxt="n", pch=20, cex=0.05, col=col[1]) } if (!is.logical(xtic)) axis(1, at=xtic, labels=xtic) if (is.logical(xtic)) { if (!xtic) axis(1); if (xtic) { if (length(xshade)==1) xtic0=prettyNum(c(h(0.5), 2*h(0.5)-xshade, xshade),digits=digits.xtic) if (length(xshade)==2) xtic0=prettyNum(c(h(0.5), xshade),digits=digits.xtic) if (ddist %in% c("dchisq","df")) xtic0=prettyNum(c(h(0.5), xshade, 0),digits=digits.xtic) axis(1, at=xtic0, labels=xtic0) } } } # Garren, 2006 # Math 300 shade.phat <- function(xshade=NULL, size=1, prob=0.5, lower.tail=TRUE, xmin=0, xmax=1, xlab=expression(hat(p)), xtic=TRUE, digits.prob=4, digits.xtic=3, main=NULL) { # Plots a continuous probability density function (pdf) and shades in a region. # `xshade' can be a scalar or a vector of size 2. # When `xshade' is a scalar and lower.tail is TRUE, # the area from 0 to xshade is shaded. # When `xshade' is a scalar and lower.tail is FALSE, # the area from xshade to 1 is shaded. # When `xshade' is a vector and lower.tail is TRUE, # the tail probabilities are shaded. # When `xshade' is a vector and lower.tail is FALSE, # the area from xshade[1] to xshade[2] is shaded. # `size' and `prob' are scalars, corresponding to the parameters in `dbinom'. # To avoid ambiguity with discrete distributions, avoid boarder values with `xshade'. # `xmin' and `xmax' represent the limiting values on the x-axis. # `xlab' is the label on the x-axis. # `digits.prob' is the number of significant digits listed in the probability. # `digits.xtic' is the number of significant digits listed on the x-axis. # If `xtic' is TRUE, then the numbers listed on the x-axis consist of `p' and `xshade'. # If `xtic' is FALSE, then the default numbers are listed on the x-axis. # If `xtic' is a vector, then the numbers listed on the x-axis consist of `xtic'. # `main' is the main title. If NULL, the probability is displayed. # If no title is desired, set `main' to an empty string ''. num.grid.points <- 10001; col <- c("black", "red") options(warn = -1); n=size; p=prob if (is.null(xshade)) xshade <- p if (length(xshade)!=1 & length(xshade)!=2) stop("xshade must have length 1 or 2.") if (length(xshade)==2) {if (xshade[1]>xshade[2]) {temp <- xshade[1]; xshade[1] <- xshade[2]; xshade[2] <- temp}} if (xmin>xmax) { temp <- xmin; xmin <- xmax; xmax <- temp } x <- xmin + (xmax-xmin)*(0:num.grid.points)/num.grid.points; options(warn = -1) options(warn = 0) f=function(x){dbinom(x,n,p)} ; g=function(prop){dbinom(round(prop*n),n,p)} xmin1 <- ceiling(xmin*n); xmax1<-floor(xmax*n); x <- xmin1:xmax1 if (xshade[1]xmax) stop("xshade must be between xmin and xmax.") if (lower.tail & length(xshade)==1) {the.prob <- pbinom(xshade*n,n,p); x1 <- x[ x<=xshade*n ] } if (!lower.tail & length(xshade)==1) {the.prob <- 1-pbinom(xshade*n,n,p); x1 <- x[ x>xshade*n ] } if (!lower.tail & length(xshade)==2) {the.prob <- pbinom(xshade[2]*n,n,p)-pbinom(xshade[1]*n,n,p) x1 <- x[ xshade[1]*nxshade[2]*n ] } ylim1 <- c(0, max(f(x),na.rm=TRUE)) if (is.null(main)) main= paste(c("Probability is", prettyNum(the.prob,digits=digits.prob)), collapse=" ") plot(x/n, f(x), xlim=c(xmin, xmax), ylim=ylim1, type="h", xlab=xlab, ylab="probability density function", xaxt="n", main=main, col=col[1]) curve(g, min(x1/n), max(x1/n), max(x1)-min(x1)+1, add=TRUE, type="h", xaxt="n", col=col[2]) if (lower.tail & length(xshade)==2) { curve(g, min(x2/n), max(x2/n), max(x2)-min(x2)+1, add=TRUE, type="h", xaxt="n", col=col[2]) } if (is.logical(xtic)) { if (!xtic) axis(1); if (xtic) { if (length(xshade)==1) xtic=prettyNum(c(xshade, 2*p-xshade, p, xmin, xmax),digits=digits.xtic) if (length(xshade)==2) xtic=prettyNum(c(xshade, p, xmin, xmax),digits=digits.xtic) axis(1, at=xtic, labels=xtic) } } if (!is.logical(xtic)) axis(1, at=xtic, labels=xtic) } # Garren, 2006 # Math 300 plot.dist <- function(distA="dnorm", parmA1=NULL, parmA2=NULL, distB=NULL, parmB1=NULL, parmB2=NULL, distC=NULL, parmC1=NULL, parmC2=NULL, xmin=NULL, xmax=NULL, col=c("black","red","darkgreen"), xlab=NULL, is.discrete=NULL, additional.x.range=NULL, main=NULL) { # Plots one, two, or three functions. # When plotting the pdf, some choices for `distA', `distB' and `distC' are # dnorm, dexp, dcauchy, dlaplace, dt, dt.nc, dchisq, df, df.nc, dunif, dbinom, # dgeom, dpois, dhyper, dnbinom. # When plotting the cdf, some choices for `distA', `distB' and `distC' are # pnorm, pexp, pcauchy, plaplace, pt, pchisq, pf, punif, pbinom, # pgeo, ppois, phyper, pnbinom. # The values of `distA', `distB' and `distC' must be in quotes if `xmin' or `xmax' is NULL. # `parmA1' and `parmA2' are numeric scalars or vectors of the parameters of `distA' # (excluding the first argument), where NULL is the default value; # likewise for the other two functions. # `col' may be a scalar or vector, and specifies the colors of the plotted functions. # Type `colors()' for selections. # If only one function is to be plotted, then use `distA'. # If a distribution, say `distA', needs more than 2 parameters, then assign `parmA1' # to be a vector of all (as many as 5) parameters, and keep `parmA2' equal to NULL. # For example, to plot a noncentral F pdf with 7 and 9 df and ncp=2.5, type: # > plot2.dist( df.nc, c(7,9,2.5) ) # `xmin' and `xmax' represent the limiting values on the x-axis. # Reasonable values of `xmin' and `xmax' are attempted when originally set to NULL. # `xlab' is the label on the x-axis. If NULL, `xlab' may be automatically set to `X', # `Z', `T', `F' or `X^2', according to `distA', `distB' and `distC'. # `is.discrete' is a vector with 1, 2, or 3 logical values, indicating whether or not # `distA', `distB', and `distC' are discrete. # If NULL, then the macro attempts to assign the correct logical value(s). # `additional.x.range' is a vector of TWO additional `x' values for evaluating the function. # This is useful when the plot is too sparse. # `main' is the main title. If NULL, no main title is displayed. num.grid.points <- 10001; options(warn= -1); ylimchisq1 <- 1.2 all.discrete = c("dbinom", "dgeom", "dhyper", "dpois", "dnbinom", "pbinom", "pgeom", "phyper", "ppois", "pnbinom") if (!is.null(is.discrete)) discrete.pdfA=is.discrete[1] discrete.pdfB=FALSE; discrete.pdfC=FALSE if (is.null(is.discrete)) discrete.pdfA=(distA %in% all.discrete) if (!is.null(distB)) { if (!is.null(is.discrete)) discrete.pdfB=is.discrete[2] if (is.null(is.discrete)) discrete.pdfB=(distB %in% all.discrete) } if (!is.null(distC)) { if (!is.null(is.discrete)) discrete.pdfC=is.discrete[3] if (is.null(is.discrete)) discrete.pdfC=(distC %in% all.discrete) } if (is.null(xlab)) { xlab="X" if ((distA=="dnorm" || distA=="pnorm") & is.null(distB) & is.null(distC)) { if (is.null(c(parmA1,parmA2))) xlab="Z" if (!is.null(parmA1) & is.null(parmA2)) { if (parmA1==0) xlab="Z" } if (!is.null(parmA1) & !is.null(parmA2)) { if (parmA1==0 & parmA2==1) xlab="Z" } } temp = c(parmA1, parmA2) if (distA=="dt" || (distA=="pt" & length(temp)==1)) { xlab=paste(c("T_",temp[1]),sep="",collapse="") if ( !is.null(distB) ) { xlab="X" if ( length( union( union(distA,distB), union(distA,distC) ) )==1 ) xlab="T" } } if (distA=="dt.nc" || (distA=="pt" & length(temp)==2)) { xlab=paste(c("T_",temp[1],",",temp[2]),sep="",collapse="") if ( !is.null(distB) ) { xlab="X" if ( length( union( union(distA,distB), union(distA,distC) ) )==1 ) xlab="T" } } if (distA=="dchisq" || distA=="pchisq") { xlab=paste(c("CHI-SQUARE_",parmA1[1]),sep="",collapse="") if (length(temp)==2) xlab=paste(c("CHI-SQUARE_",temp[1],",",temp[2]),sep="",collapse="") if ( !is.null(distB) ) { xlab="X" if ( length( union( union(distA,distB), union(distA,distC) ) )==1 ) xlab="CHI-SQUARE" } } if (distA=="df" || (distA=="pf" & length(temp)==2)) { xlab=paste(c("F_",temp[1],",",temp[2]),sep="",collapse="") if ( !is.null(distB) ) { xlab="X" if ( length( union( union(distA,distB), union(distA,distC) ) )==1 ) xlab="F" } } if (distA=="df.nc" || (distA=="pf" & length(temp)==3)) { xlab=paste(c("F_",temp[1],",",temp[2],",",temp[3]),sep="",collapse="") if ( !is.null(distB) ) { xlab="X" if ( length( union( union(distA,distB), union(distA,distC) ) )==1 ) xlab="F" } } } parmA1 <- c(parmA1, parmA2); parmB1 <- c(parmB1, parmB2); parmC1 <- c(parmC1, parmC2) temp.vec <- get.min.max(xmin, xmax, distA, parmA1, NULL, distB, parmB1, NULL, distC, parmC1, NULL ) xmin <- temp.vec$xmin ; xmax <- temp.vec$xmax if (xmin>xmax) {temp <- xmin; xmin <- xmax; xmax <- temp} x <- xmin + (xmax-xmin)*(0:num.grid.points)/num.grid.points x0 <- ceiling(xmin):floor(xmax) if (length(col)==1) {col <- rep(col, 3)}; if (length(col)==2) {col <- c(col, col[1])} fA <- function(z) { if (length(parmA1)==0) {return(match.fun(distA)(z))} if (length(parmA1)==1) {return(match.fun(distA)(z, parmA1[1]))} if (length(parmA1)==2) {return(match.fun(distA)(z, parmA1[1], parmA1[2]))} if (length(parmA1)==3) {return(match.fun(distA)(z, parmA1[1], parmA1[2], parmA1[3]))} if (length(parmA1)==4) {return(match.fun(distA)(z, parmA1[1], parmA1[2], parmA1[3], parmA1[4]))} if (length(parmA1)==5) {return(match.fun(distA)(z, parmA1[1], parmA1[2], parmA1[3], parmA1[4], parmA1[5]))} } ylim1 <- NULL if (!is.null(distB)) { fB <- function(z) { if (length(parmB1)==0) {return(match.fun(distB)(z))} if (length(parmB1)==1) {return(match.fun(distB)(z, parmB1[1]))} if (length(parmB1)==2) {return(match.fun(distB)(z, parmB1[1], parmB1[2]))} if (length(parmB1)==3) {return(match.fun(distB)(z, parmB1[1], parmB1[2], parmB1[3]))} if (length(parmB1)==4) {return(match.fun(distB)(z, parmB1[1], parmB1[2], parmB1[3], parmB1[4]))} if (length(parmB1)==5) {return(match.fun(distB)(z, parmB1[1], parmB1[2], parmB1[3], parmB1[4], parmB1[5]))} } } if (!is.null(distC)) { fC <- function(z) { if (length(parmC1)==0) {return(match.fun(distC)(z))} if (length(parmC1)==1) {return(match.fun(distC)(z, parmC1[1]))} if (length(parmC1)==2) {return(match.fun(distC)(z, parmC1[1], parmC1[2]))} if (length(parmC1)==3) {return(match.fun(distC)(z, parmC1[1], parmC1[2], parmC1[3]))} if (length(parmC1)==4) {return(match.fun(distC)(z, parmC1[1], parmC1[2], parmC1[3], parmC1[4]))} if (length(parmC1)==5) {return(match.fun(distC)(z, parmC1[1], parmC1[2], parmC1[3], parmC1[4], parmC1[5]))} } } maxA <- ifelse( ((distA %in% c("dchisq", "df")) && parmA1[1]==1), min(ylimchisq1, max(c(fA(x),fA(x0)),na.rm=TRUE)), max(c(fA(x),fA(x0))) ) ylim1 <- c(min(c(fA(x),fA(x0)),na.rm=TRUE), maxA) if (!is.null(distB) & is.null(distC)) { maxB <- ifelse( ((distB %in% c("dchisq", "df")) && parmB1[1]==1), min(ylimchisq1, max(c(fB(x),fB(x0)),na.rm=TRUE)), max(c(fB(x),fB(x0))) ) ylim1 <- c( min(c(fA(x),fB(x),fA(x0),fB(x0)),na.rm=TRUE), max(maxA, maxB) ) } if (is.null(distB) & !is.null(distC)) { maxC <- ifelse( ((distC %in% c("dchisq", "df")) && parmC1[1]==1), min(ylimchisq1, max(c(fC(x),fC(x0)),na.rm=TRUE)), max(c(fC(x),fC(x0))) ) ylim1 <- c( min(c(fA(x),fC(x),fA(x0),fC(x0)),na.rm=TRUE), max(maxA, maxC) ) } if (!is.null(distB) & !is.null(distC)) { maxB <- ifelse( ((distB %in% c("dchisq", "df")) && parmB1[1]==1), min(ylimchisq1, max(c(fB(x),fB(x0)),na.rm=TRUE)), max(c(fB(x),fB(x0))) ) maxC <- ifelse( ((distC %in% c("dchisq", "df")) && parmC1[1]==1), min(ylimchisq1, max(c(fC(x),fC(x0)),na.rm=TRUE)), max(c(fC(x),fC(x0))) ) ylim1 <- c( min(c(fA(x),fB(x),fC(x),fA(x0),fB(x0),fC(x0)),na.rm=TRUE), max(maxA, maxB, maxC) ) } ylab=""; pdfA <- substring(distA, 1, 1)=="d" pdfB <- ifelse(is.null(distB), T, (substring(distB, 1, 1)=="d")) pdfC <- ifelse(is.null(distC), T, (substring(distC, 1, 1)=="d")) if (pdfA & is.null(distB) & is.null(distC)) {ylab <- "probability density function"} else { if (pdfA & pdfB & pdfB) ylab <- "probability density functions" } cdfA <- substring(distA, 1, 1)=="p" cdfB <- ifelse(is.null(distB), T, (substring(distB, 1, 1)=="p")) cdfC <- ifelse(is.null(distC), T, (substring(distB, 1, 1)=="p")) if (cdfA & is.null(distB) & is.null(distC)) {ylab <- "cumulative distribution function"} else { if (cdfA & cdfB & cdfB) ylab <- "cumulative distribution functions" } if (discrete.pdfA) { plot(x0, fA(x0), ylim=ylim1, type="h", xlab=xlab, ylab=ylab, col=col[1], main=main) } else { plot(x, fA(x), ylim=ylim1, type="p", pch=20, cex=0.05, xlab=xlab, ylab=ylab, col=col[1], main=main) } if (discrete.pdfB) { curve(fB, x0[1], x0[length(x0)], length(x0), add=TRUE, type="h", col=col[2]) } if (!is.null(distB) & !discrete.pdfB) { curve(fB, xmin, xmax, num.grid.points, add=TRUE, type="p", pch=20, cex=0.05, col=col[2]) } if (discrete.pdfC) { curve(fC, x0[1], x0[length(x0)], length(x0), add=TRUE, type="h", col=col[3]) } if (!is.null(distC) & !discrete.pdfC) { curve(fC, xmin, xmax, num.grid.points, add=TRUE, type="p", pch=20, cex=0.05, col=col[3]) } if (discrete.pdfA & discrete.pdfB & !discrete.pdfC) { x1 <- x0[ fA(x0) > fB(x0) ]; x2 <- x0[ fA(x0) <= fB(x0) ] curve(fA, x1[1], x1[length(x1)], length(x1), add=TRUE, type="h", col=col[1]) curve(fB, x1[1], x1[length(x1)], length(x1), add=TRUE, type="h", col=col[2]) curve(fB, x2[1], x2[length(x2)], length(x2), add=TRUE, type="h", col=col[2]) curve(fA, x2[1], x2[length(x2)], length(x2), add=TRUE, type="h", col=col[1]) } if (discrete.pdfA & !discrete.pdfB & discrete.pdfC) { x1 <- x0[ fA(x0) > fC(x0) ]; x2 <- x0[ fA(x0) <= fC(x0) ] curve(fA, x1[1], x1[length(x1)], length(x1), add=TRUE, type="h", col=col[1]) curve(fC, x1[1], x1[length(x1)], length(x1), add=TRUE, type="h", col=col[3]) curve(fC, x2[1], x2[length(x2)], length(x2), add=TRUE, type="h", col=col[3]) curve(fA, x2[1], x2[length(x2)], length(x2), add=TRUE, type="h", col=col[1]) } if (!discrete.pdfA & discrete.pdfB & discrete.pdfC) { x1 <- x0[ fB(x0) > fC(x0) ]; x2 <- x0[ fB(x0) <= fC(x0) ] curve(fB, x1[1], x1[length(x1)], length(x1), add=TRUE, type="h", col=col[2]) curve(fC, x1[1], x1[length(x1)], length(x1), add=TRUE, type="h", col=col[3]) curve(fC, x2[1], x2[length(x2)], length(x2), add=TRUE, type="h", col=col[3]) curve(fB, x2[1], x2[length(x2)], length(x2), add=TRUE, type="h", col=col[2]) } if (discrete.pdfA & discrete.pdfB & discrete.pdfC) { x1 <- x0[ fA(x0) >= fB(x0) & fB(x0) >= fC(x0) ] x2 <- x0[ fA(x0) >= fC(x0) & fC(x0) >= fB(x0) ] x3 <- x0[ fB(x0) >= fA(x0) & fA(x0) >= fC(x0) ] x4 <- x0[ fB(x0) >= fC(x0) & fC(x0) >= fA(x0) ] x5 <- x0[ fC(x0) >= fA(x0) & fA(x0) >= fB(x0) ] x6 <- x0[ fC(x0) >= fB(x0) & fB(x0) >= fA(x0) ] curve(fA, x1[1], x1[length(x1)], length(x1), add=TRUE, type="h", col=col[1]) curve(fB, x1[1], x1[length(x1)], length(x1), add=TRUE, type="h", col=col[2]) curve(fC, x1[1], x1[length(x1)], length(x1), add=TRUE, type="h", col=col[3]) curve(fA, x2[1], x2[length(x2)], length(x2), add=TRUE, type="h", col=col[1]) curve(fC, x2[1], x2[length(x2)], length(x2), add=TRUE, type="h", col=col[3]) curve(fB, x2[1], x2[length(x2)], length(x2), add=TRUE, type="h", col=col[2]) curve(fB, x3[1], x3[length(x3)], length(x3), add=TRUE, type="h", col=col[2]) curve(fA, x3[1], x3[length(x3)], length(x3), add=TRUE, type="h", col=col[1]) curve(fC, x3[1], x3[length(x3)], length(x3), add=TRUE, type="h", col=col[3]) curve(fB, x4[1], x4[length(x4)], length(x4), add=TRUE, type="h", col=col[2]) curve(fC, x4[1], x4[length(x4)], length(x4), add=TRUE, type="h", col=col[3]) curve(fA, x4[1], x4[length(x4)], length(x4), add=TRUE, type="h", col=col[1]) curve(fC, x5[1], x5[length(x5)], length(x5), add=TRUE, type="h", col=col[3]) curve(fA, x5[1], x5[length(x5)], length(x5), add=TRUE, type="h", col=col[1]) curve(fB, x5[1], x5[length(x5)], length(x5), add=TRUE, type="h", col=col[2]) curve(fC, x6[1], x6[length(x6)], length(x6), add=TRUE, type="h", col=col[3]) curve(fB, x6[1], x6[length(x6)], length(x6), add=TRUE, type="h", col=col[2]) curve(fA, x6[1], x6[length(x6)], length(x6), add=TRUE, type="h", col=col[1]) } if (!is.null(distC) & !discrete.pdfC) { curve(fC, xmin, xmax, num.grid.points, add=TRUE, type="p", pch=20, cex=0.05, col=col[3]) if (length(additional.x.range)>=2) curve(fC, min(additional.x.range), max(additional.x.range), num.grid.points, add=TRUE, type="p", pch=20, cex=0.05, col=col[3]) } if (!is.null(distB) & !discrete.pdfB) { curve(fB, xmin, xmax, num.grid.points, add=TRUE, type="p", pch=20, cex=0.05, col=col[2]) if (length(additional.x.range)>=2) curve(fB, min(additional.x.range), max(additional.x.range), num.grid.points, add=TRUE, type="p", pch=20, cex=0.05, col=col[2]) } if (!is.null(distA) & !discrete.pdfA) { curve(fA, xmin, xmax, num.grid.points, add=TRUE, type="p", pch=20, cex=0.05, col=col[1]) if (length(additional.x.range)>=2) curve(fA, min(additional.x.range), max(additional.x.range), num.grid.points, add=TRUE, type="p", pch=20, cex=0.05, col=col[1]) } } # Garren, 2006 # Math 300 get.min.max <- function(xmin=NULL, xmax=NULL, distA, parmA1=NULL, parmA2=NULL, distB=NULL, parmB1=NULL, parmB2=NULL, distC=NULL, parmC1=NULL, parmC2=NULL) { # Outputs reasonable minimum and maximum boundaries for plotting functions. # Also, outputs the medians. # `xmin' and `xmax' represent the limiting values on the x-axis. # Reasonable values of `xmin' and `xmax' are attempted when originally set to NULL. # When plotting the pdf, some choices for `distA', `distB' and `distC' are # dnorm, dexp, dcauchy, dlaplace, dt, dt.nc, dchisq, df, df.nc, dunif, dbinom, # dgeom, dhyper, dpois, dnbinom. # When plotting the cdf, some choices for `distA', `distB' and `distC' are # pnorm, pexp, pcauchy, plaplace, pt, pchisq, pf, punif, pbinom, # pgeom, phyper, ppois, pnbinom. # The values of `distA', `distB' and `distC' must be in quotes if `xmin' or `xmax' is NULL. # `parmA1' and `parmA2' are numeric scalars or vectors of the parameters of `distA' # (excluding the first argument), where NULL is the default value; # likewise for the other two functions. # If only one function is to be plotted, then use `distA'. # If a distribution, say `distA', needs more than 2 parameters, then assign `parmA1' # to be a vector of all (as many as 5) parameters, and keep `parmA2' equal to NULL. # For example, to plot a noncentral F pdf with 7 and 9 df and ncp=2.5, type: # > plot2.dist( 0, 8, "df.nc", c(7,9,2.5) ) pmin <- 0.125 ; amountover <- 1 ; buffer <- 0.15 return.now <- FALSE ; medianA <- medianB <- medianC <- NULL parmA1 <- c(parmA1, parmA2); parmB1 <- c(parmB1, parmB2); parmC1 <- c(parmC1, parmC2) if (!is.null(xmin) & !is.null(xmax)) medianA <- medianB <- medianC <- mean(xmin,xmax) if ("dt.nc" %in% c(distA, distB, distC)) return.now <- TRUE if ("df.nc" %in% c(distA, distB, distC)) return.now <- TRUE if (distA=="pt" & length(parmA1)>1) return.now <- TRUE if (!is.null(distB)) {if (distB=="pt" & length(parmB1)>1) return.now <- TRUE} if (!is.null(distC)) {if (distC=="pt" & length(parmC1)>1) return.now <- TRUE} if (distA=="pf" & length(parmA1)>2) return.now <- TRUE if (!is.null(distB)) {if (distB=="pf" & length(parmB1)>2) return.now <- TRUE} if (!is.null(distC)) {if (distC=="pf" & length(parmC1)>2) return.now <- TRUE} if (return.now & (is.null(xmin) || is.null(xmax))) stop("Neither `xmin' nor `xmax' can be NULL for this distribution.") if (!return.now) { if (!is.null(distA)) qdistA <- paste("q",substring(distA,2),sep="") if (!is.null(distB)) qdistB <- paste("q",substring(distB,2),sep="") if (!is.null(distC)) qdistC <- paste("q",substring(distC,2),sep="") if (!is.null(distA)) { qfA <- function(z) { if (length(parmA1)==0) {return(match.fun(qdistA)(z))} if (length(parmA1)==1) {return(match.fun(qdistA)(z, parmA1[1]))} if (length(parmA1)==2) {return(match.fun(qdistA)(z, parmA1[1], parmA1[2]))} if (length(parmA1)==3) {return(match.fun(qdistA)(z, parmA1[1], parmA1[2], parmA1[3]))} if (length(parmA1)==4) {return(match.fun(qdistA)(z, parmA1[1], parmA1[2], parmA1[3], parmA1[4]))} if (length(parmA1)==5) {return(match.fun(qdistA)(z, parmA1[1], parmA1[2], parmA1[3], parmA1[4], parmA1[5]))} } } if (!is.null(distB)) { qfB <- function(z) { if (length(parmB1)==0) {return(match.fun(qdistB)(z))} if (length(parmB1)==1) {return(match.fun(qdistB)(z, parmB1[1]))} if (length(parmB1)==2) {return(match.fun(qdistB)(z, parmB1[1], parmB1[2]))} if (length(parmB1)==3) {return(match.fun(qdistB)(z, parmB1[1], parmB1[2], parmB1[3]))} if (length(parmB1)==4) {return(match.fun(qdistB)(z, parmB1[1], parmB1[2], parmB1[3], parmB1[4]))} if (length(parmB1)==5) {return(match.fun(qdistB)(z, parmB1[1], parmB1[2], parmB1[3], parmB1[4], parmB1[5]))} } } if (!is.null(distC)) { qfC <- function(z) { if (length(parmC1)==0) {return(match.fun(qdistC)(z))} if (length(parmC1)==1) {return(match.fun(qdistC)(z, parmC1[1]))} if (length(parmC1)==2) {return(match.fun(qdistC)(z, parmC1[1], parmC1[2]))} if (length(parmC1)==3) {return(match.fun(qdistC)(z, parmC1[1], parmC1[2], parmC1[3]))} if (length(parmC1)==4) {return(match.fun(qdistC)(z, parmC1[1], parmC1[2], parmC1[3], parmC1[4]))} if (length(parmC1)==5) {return(match.fun(qdistC)(z, parmC1[1], parmC1[2], parmC1[3], parmC1[4], parmC1[5]))} } } xminA.temp <- NULL; xmaxA.temp <- NULL; xminB.temp <- NULL; xmaxB.temp <- NULL xminC.temp <- NULL; xmaxC.temp <- NULL xmin2A.temp <- NULL; xmax2A.temp <- NULL; xmin2B.temp <- NULL; xmax2B.temp <- NULL xmin2C.temp <- NULL; xmax2C.temp <- NULL if (is.null(xmin)) { if (!is.null(distA)) xminA.temp <- qfA(pmin) if (!is.null(distB)) xminB.temp <- qfB(pmin) if (!is.null(distC)) xminC.temp <- qfC(pmin) } else { xminA.temp <- xmin; xminB.temp <- xmin; xminC.temp <- xmin } if (is.null(xmax)) { if (!is.null(distA)) xmaxA.temp <- qfA(1-pmin) if (!is.null(distB)) xmaxB.temp <- qfB(1-pmin) if (!is.null(distC)) xmaxC.temp <- qfC(1-pmin) } else { xmaxA.temp <- xmax; xmaxB.temp <- xmax; xmaxC.temp <- xmax } if (!is.null(distA)) { xmin2A.temp <- xminA.temp - (xmaxA.temp-xminA.temp)*amountover xmax2A.temp <- xmaxA.temp + (xmaxA.temp-xminA.temp)*amountover qfA0 <- ifelse( is.na(qfA(0)), -Inf, qfA(0) ) qfA1 <- ifelse( is.na(qfA(1)), Inf, qfA(1) ) left.bounded <- ( xmin2A.temp < qfA0 ) right.bounded <- ( xmax2A.temp > qfA1 ) xmin2A.temp <- max( xmin2A.temp, qfA0 ) xmax2A.temp <- min( xmax2A.temp, qfA1 ) if (left.bounded) xmin3A.temp <- xmin2A.temp - (xmax2A.temp - xmin2A.temp) * buffer if (right.bounded) xmax3A.temp <- xmax2A.temp + (xmax2A.temp - xmin2A.temp) * buffer if (left.bounded) xmin2A.temp <- xmin3A.temp if (right.bounded) xmax2A.temp <- xmax3A.temp } if (!is.null(distB)) { xmin2B.temp <- xminB.temp - (xmaxB.temp-xminB.temp)*amountover xmax2B.temp <- xmaxB.temp + (xmaxB.temp-xminB.temp)*amountover qfB0 <- ifelse( is.na(qfB(0)), -Inf, qfB(0) ) qfB1 <- ifelse( is.na(qfB(1)), Inf, qfB(1) ) left.bounded <- ( xmin2B.temp < qfB0 ) right.bounded <- ( xmax2B.temp > qfB1 ) xmin2B.temp <- max( xmin2B.temp, qfB0 ) xmax2B.temp <- min( xmax2B.temp, qfB1 ) if (left.bounded) xmin3B.temp <- xmin2B.temp - (xmax2B.temp - xmin2B.temp) * buffer if (right.bounded) xmax3B.temp <- xmax2B.temp + (xmax2B.temp - xmin2B.temp) * buffer if (left.bounded) xmin2B.temp <- xmin3B.temp if (right.bounded) xmax2B.temp <- xmax3B.temp } if (!is.null(distC)) { xmin2C.temp <- xminC.temp - (xmaxC.temp-xminC.temp)*amountover xmax2C.temp <- xmaxC.temp + (xmaxC.temp-xminC.temp)*amountover qfC0 <- ifelse( is.na(qfC(0)), -Inf, qfC(0) ) qfC1 <- ifelse( is.na(qfC(1)), Inf, qfC(1) ) left.bounded <- ( xmin2C.temp < qfC0 ) right.bounded <- ( xmax2C.temp > qfC1 ) xmin2C.temp <- max( xmin2C.temp, qfC0 ) xmax2C.temp <- min( xmax2C.temp, qfC1 ) if (left.bounded) xmin3C.temp <- xmin2C.temp - (xmax2C.temp - xmin2C.temp) * buffer if (right.bounded) xmax3C.temp <- xmax2C.temp + (xmax2C.temp - xmin2C.temp) * buffer if (left.bounded) xmin2C.temp <- xmin3C.temp if (right.bounded) xmax2C.temp <- xmax3C.temp } if (is.null(xmin)) xmin <- min(xmin2A.temp, xmin2B.temp, xmin2C.temp) if (is.null(xmax)) xmax <- max(xmax2A.temp, xmax2B.temp, xmax2C.temp) if (xmin>xmax) {temp <- xmin; xmin <- xmax; xmax <- temp} medianA <- ifelse( is.null(distA), NA, qfA(0.5) ) medianB <- ifelse( is.null(distB), NA, qfB(0.5) ) medianC <- ifelse( is.null(distC), NA, qfC(0.5) ) } # End of `if (!return.now)' structure(list(xmin=xmin, xmax=xmax, medianA=medianA, medianB=medianB, medianC=medianC)) } # Garren, 2006 # Math 300 empirical.cdf <- function(x, y=NULL, col=c("black","red","darkgreen")) { # Graphs one or two empirical cumulative distribution functions. # `x' is the vector of observations whose empirical cdf is to be graphed. # `y' is an optional vector of observations whose empirical cdf is to be graphed. # `col' may be a scalar or vector of length 3, and specifies the colors of the plotted functions. # Preferably, all three colors should be different. col[1] and col[2] # correspond to `x' and `y', respectively. col[3] is used where # the two empirical cdfs overlap. Type `colors()' for selections. num.grid.points <- 10001; delta <- 0.1 xmin <- min(x,y)-(max(x,y)-min(x,y))*delta; xmax <- max(x,y)+(max(x,y)-min(x,y))*delta x0 <- xmin + (xmax-xmin)*(0:num.grid.points)/num.grid.points y0 <- rep(NA, num.grid.points+1) for (i in 0:num.grid.points) { y0[i] <- mean(x<=x0[i]) } plot(x0, y0, pch=20, cex=0.05, xlab=ifelse(is.null(y),"X","X, Y"), ylab=ifelse(is.null(y),"Cumulative Proportion","Cumulative Proportions"), main=ifelse(is.null(y),"Empirical Cumulative Distribution Function", "Empirical Cumulative Distribution Functions"), col=col[1]) if (!is.null(y)) { if (length(col)==1) { col <- rep(col, 3) } if (length(col)==2) { col <- c(col, col[2]) } f2 <- function(u) { f <- rep(NA, length(u)) for (i in 1:length(u)) { f[i] <- mean(y<=u[i]) } ; return(f) } color.index <- ( y0==f2(x0) ) + 2 curve(f2, xmin, xmax, num.grid.points, add=TRUE, type="p", pch=20, cex=0.05, col=col[color.index]) } } # Garren, 2006 # Math 300 plot.line <- function(x, y, xlab="X", ylab="Y", pch=19, col=c("black", "red"), ...) { # Constructs a scatterplot with the fitted regression line. # `x' is the vector of independent observations. # `y' is the vector of dependent observations. # `xlab' is the label for the x-axis. # `ylab' is the label for the y-axis. # `pch' is the plotting character, usually 19 to 25; 21 is a hollow circle. # Type `?points' for more details. # `col' may be a scalar or vector of length 2, and specifies the color. # col[1] is the color of the points, and col[2] is the color of the plotted line. # ...: optional arguments to `plot'. if (length(col)==1) col <- rep(col,2) z <- lm(y~x)$coefficients; intercept <- as.numeric(z[1]); slope <- as.numeric(z[2]) f <- function(q) {intercept+q*slope} main1 <- c("Simple Linear Regression Line", paste( ylab, "=", prettyNum(intercept), ifelse(slope>=0,"+","-"), prettyNum(abs(slope)), xlab ) ) ylim <- c( min(c(y,f(min(x)),f(max(x)))), max(c(y,f(min(x)),f(max(x)))) ) plot(x, y, ylim=ylim, type="p", xlab=xlab, ylab=ylab, main=main1, col=col[1], pch=pch, ...) lines( c(min(x),max(x)), c(f(min(x)), f(max(x))), col=col[2] ) } # Garren, 2005 # Math 300 plot.line2 <- function(x, y, xlab="X", ylab="Y", header=TRUE, col=c("black", "blue"), ...) { # Constructs a scatterplot with the fitted regression line. # `x' is the vector of independent observations. # `y' is the vector of dependent observations. # `xlab' is the label for the x-axis. # `ylab' is the label for the y-axis. # `header' indicates whether or not the least squares equation should be printed. # `col' may be a scalar or vector of length 2, and specifies the color. # col[1] is the color of the points, and col[2] is the color of the plotted line. # ...: optional arguments to `plot'. if (length(col)==1) col <- rep(col,2) z <- lm(y~x)$coefficients; intercept <- as.numeric(z[1]); slope <- as.numeric(z[2]) f <- function(q) {intercept+q*slope} main1 <- c("Simple Linear Regression Line", paste( ylab, "=", prettyNum(intercept), ifelse(slope>=0,"+","-"), prettyNum(abs(slope)), xlab ) ) if (!header) main1 <- NULL ylim <- c( min(c(y,f(min(x)),f(max(x)))), max(c(y,f(min(x)),f(max(x)))) ) plot(x, y, ylim=ylim, type="p", xlab=xlab, ylab=ylab, main=main1, col=col[1], pch=20, cex=2, ...) lines( c(min(x),max(x)), c(f(min(x)), f(max(x))), col=col[2], lwd=3 ) } # Garren, 2005 # Math 300 linegraph <- function(x, freq=TRUE, prob=NULL, col="red", ...) { # Constructs a line graph. # `x' is the vector of data. # If `freq' is TRUE and `prob' does not sum to 1, then frequencies (counts) are plotted. # If `freq' is FALSE or `prob' sums to 1, then relative frequencies are plotted. # `prob' is a vector of the probabilities or weights on `x', # and does not need to sum to one. If `prob' is NULL, # then all `x' values are equally weighted. # `col' is the color of the plotted lines. # ...: optional arguments to `plot'. x.old <- x; x <- union(x.old,x.old); if (is.null(prob)) { for (i in 1:length(x)) prob <- c( prob, sum(x.old==x[i]) ) } if (!freq) prob <- prob/sum(prob) if (sum(prob)==1 || !freq) {plot( x, prob/sum(prob), ylim=c(0,max(prob)/sum(prob)), type="h", ylab="RELATIVE FREQUENCY", col=col, ... )} else {plot( x, prob, ylim=c(0,max(prob)), type="h", ylab="FREQUENCY", col=col, ... )} } # Garren, 2005 # Math 300 df.nc <- function(x, df1, df2, ncp=0) { # Approximate probability density function (pdf) for the noncentral F-distribution. # `x' is the vector of quantiles. # `df1' and `df2' are the degrees of freedom. # `ncp' is the noncentrality parameter. delta <- 1e-2 # `delta' should not be too small or too large; 0.01 works well. if (ncp==0) {return(df(x,df1, df2))} else {return( ( pf( (x+delta/2), df1, df2, ncp ) - pf( (x-delta/2), df1, df2, ncp ) ) / delta )} } # Garren, 2006 # Math 300 dt.nc <- function(x, df, ncp=0) { # Approximate probability density function (pdf) for the noncentral t-distribution. # `x' is the vector of quantiles. # `df' is the degrees of freedom. # `ncp' is the noncentrality parameter. delta <- 1e-2 # `delta' should not be too small or too large; 0.01 works well. # When delta is 0.01, the maximum absolute error is 1.7e-6, # which may be compared to dnorm(0) whose value is 0.3989. if (ncp==0) {return(dt(x,df))} else {return( ( pt( (x+delta/2), df, ncp ) - pt( (x-delta/2), df, ncp ) ) / delta )} } # Garren, 2006 # Math 300 dlaplace <- function(x, mean=0, sd=1) { # Probability density function (pdf) for the Laplace (double exponential) distribution. # `x' is the vector of quantiles. # `mean' is the population mean. # `sd' is the population standard deviation. exp(-abs(x-mean)*sqrt(2)/sd)/(sd*sqrt(2)) } # Garren, 2005 # Math 300 plaplace <- function(q, mean=0, sd=1, lower.tail=TRUE) { # Cumulative distribution function (cdf) for the Laplace # (double exponential) distribution. # `q' is the vector of quantiles. # `mean' is the population mean. # `sd' is the population standard deviation. # `lower.tail' is logical; if TRUE, then probabilities are P[X <= x]; # otherwise, P[X > x]. p=(q>mean) - ((q>mean)*2-1) * exp(-abs(q-mean)*sqrt(2)/sd)/2 if (!lower.tail) p=1-p; return(p) } # Garren, 2005 # Math 300 qlaplace <- function(p, mean=0, sd=1) { # Quantile function for the Laplace (double exponential) distribution. # `p' is the vector of probabilities. # `mean' is the population mean. # `sd' is the population standard deviation. mean + ( 2*(p<=0.5)-1 ) * sd / sqrt(2) * log( 2 * (p*(p<=0.5)+(1-p)*(p>0.5)) ) } # Garren, 2005 # Math 300 rlaplace <- function(n, mean=0, sd=1) { # Random generation for the Laplace (double exponential) distribution. # `n' is the number of observations. # `mean' is the population mean. # `sd' is the population standard deviation. rexp(n, (1/sd)) * sample(c(-1, 1), n, replace=TRUE) / sqrt(2) + mean } # Garren, 2005 dtriang <- function(x, min=0, max=1) { # Probability density function (pdf) for the symmetric triangular distribution # with endpoints `min' and `max'. # `x' is the vector of quantiles. if (min==max) stop("Endpoints cannot be equal.") if (min>max) {temp=min; min=max; max=temp} ; c=(min+max)/2 2*(x-min)/(max-min)/(c-min)*(min<=x & x<=c) + 2*(max-x)/(max-min)/(max-c)*(c x]. if (min==max) stop("Endpoints cannot be equal.") if (min>max) {temp=min; min=max; max=temp} ; c=(min+max)/2 p=(q-min)^2/(max-min)/(c-min)*(min<=q&q<=c)+(1-(max-q)^2/(max-min)/(max-c))*(cmax) if (!lower.tail) p=1-p; return(p) } # Garren, 2006 qtriang <- function(p, min=0, max=1) { # Quantile function for the symmetric triangular distribution with endpoints `min' and `max'. # `p' is the vector of probabilities. if (min==max) stop("Endpoints cannot be equal.") if (min(p)<0 || max(p)>1) stop("Values of `p' must be between zero and one.") if (min>max) {temp=min; min=max; max=temp} min + (max-min)*( (0<=p&p<=0.5)*(0.5*sqrt(2*p))+(0.5max) {temp=min; min=max; max=temp}; ( runif(n,min,max) + runif(n,min,max) ) /2 } # Garren, 2006 # Math 300 abbreviation <- function(x, choices) { # Returns a value in `choices' specified by `x', which may be an abbreviation. # `x' is a character string, and consists of some or all letters in a # value in `choices' or may equal `choices'. # `choices' is a vector of character strings. answer <- x[1] if (length(x)==1) { for (i in 1:length(choices)) { if (!is.na(charmatch(x, choices[i]))) answer <- choices[i] } } return(answer) } # Garren, 2005 # Math 300 # G <- function(graph.name="Rplots.ps") { # # Plot the current graph, but only when using Linux. # dev.off(); system(paste("gs", graph.name)) # } # Garren, 2005 # Math 300 G <- function() { # Plot the current graph, but only when using Linux. dev.off(); system("cp Rplots.ps /home2/junk9.dir/junk9471.ps") } # Garren, 2007 # Math 300 ci.t.test <- function(x, conf.level=0.95, fpc=1) { # Generates a one-sample two-sided confidence interval on the population mean # based on the t-test. # `x' is a numeric vector of data values. # `conf.level' is the confidence level of the interval. # `fpc' is the finite population correction, and is used when sampling without replacement. if (fpc==1) { y <- as.vector(t.test(x, conf.level=conf.level)$conf.int) } else { y <- rep(NA, 2) y[1] <- mean(x) + qt( ((1-conf.level)/2), (length(x)-1) ) * sqrt( var(x)*fpc/length(x) ) y[2] <- mean(x) - qt( ((1-conf.level)/2), (length(x)-1) ) * sqrt( var(x)*fpc/length(x) ) } return(y) } # Garren, 2005 # Math 300 rstat <- function(num.stat=1, stat, rdist, n=1, ..., replace=FALSE, prob=NULL, conf.level=0.95) { # Random generation for a statistic of a distribution. # num.stat is the number of statistics `stat' to be generated. # Some choices for `stat': mean, median, sum, sd, var, min, max, ci.t.test, # where `ci.t.test' is a t-confidence interval. # `rdist' may be a vector of observations to be (sub)sampled, or may be a function. # Some choices for `rdist' when `rdist' is a function: # rnorm, rexp, rcauchy, rlaplace, rt, rchisq, rf, runif, rbinom, rgeo, rpois. # `n' is the number of observations used to compute each `stat'. # ...: optional arguments to `rdist', EXCLUDING the first argument, when `rdist' is # a function not a vector. # `replace': Should sampling be with replacement; # used only when `rdist' is a vector of observations. # `prob' is a vector of probability weights for obtaining the elements of the vector # being sampled, and is used only when `rdist' is a vector of observations. # For equal weights, keep `prob' equal to NULL. # `conf.level' is the confidence level of the interval, and is used only when # `stat' represents a confidence interval such as `ci.t.test'. y <- NULL is.ci <- length(match.fun(stat)(1:10))==2 if (is.ci) { if (is.numeric(rdist)) { fpc <- ifelse( replace, 1, 1-n/length(rdist) ) for (i in 1:num.stat) { y <- rbind(y, match.fun(stat)( sample(rdist,n,replace,prob), conf.level=conf.level, fpc=fpc)) } } else { for (i in 1:num.stat) { y <- rbind(y, match.fun(stat)(match.fun(rdist)(n, ...), conf.level=conf.level)) } } } else { if (is.numeric(rdist)) { for (i in 1:num.stat) { y <- c(y, match.fun(stat)( sample(rdist,n,replace,prob)) ) } } else { for (i in 1:num.stat) { y <- c(y, match.fun(stat)(match.fun(rdist)(n, ...))) } } } return(y) } # Garren, 2005 # Math 300 plot.ci <- function(ci, mu=NULL, plot.midpoints=TRUE, col=c("black", "red", "darkgreen", "purple")) { # Plots confidence intervals. # `ci' is an N by 2 matrix consisting of N two-sided confidence intervals. # `mu' is the value of the estimated parameter, # and is the population mean for t-confidence intervals. # `plot.midtpoints' is logical, to state whether or not the midpoints # of the intervals should be plotted in col[4]. # `col' may be a scalar or vector, and specifies the colors of the # plotted confidence intervals, the vertical line going through `mu', and the midpoints. if (length(col)==1) {col <- rep(col, 4)}; if (length(col)==2) {col <- c(col, col)} if (length(col)==3) {col <- c(col, col[1])} if (is.vector(ci)) { ci <- t(matrix(ci)) } if (dim(ci)[1]==2 & dim(ci)[2]!=2) ci <- t(ci) if (dim(ci)[2]!=2) stop("`ci' is not in the correct format.") main <- ifelse( is.null(mu), "", paste(c( prettyNum(100*mean( (ci[,1]<=mu)*(mu <=ci[,2]) )), "% of these confidence intervals contain ",prettyNum(mu),"." ), collapse =" ") ) plot(NA, NA, xlim=c(min(ci[,1],mu),max(ci[,2],mu)), ylim=c(0.5,dim(ci)[1]+0.5) , type="s", main=main, xlab=paste(c("CONFIDENCE INTERVAL",ifelse(dim(ci)[1]==1,"","S")),collapse =""), ylab="label") if (is.null(mu)) {col1 <- rep(col[3], dim(ci)[1])} else {col1 <- ifelse(ci[,1]<=mu & mu<=ci[,2], col[3], col[2])} for (i in 1:dim(ci)[1]) {lines( c(ci[i,1], ci[i,2]), c(i,i), col=col1[i])} midpoint <- (ci[,1]+ci[,2])/2 ; delta <- (max(ci[,2],mu)-min(ci[,1],mu))*0.002 if (!is.null(mu)) { lines( c(mu,mu), c(0.5,dim(ci)[1]+0.5), col=col[1]) } if (plot.midpoints) { for (i in 1:dim(ci)[1]) {lines( c(midpoint[i]-delta,midpoint[i]+delta), c(i,i), lwd=4, col=col[4])} } } # Garren, 2005 source2 <- function(course.num=300) { # Reads from the web the Rmacros for the MATH course. # `course.num' is the course number and may be numeric or character. source(paste("http://educ.jmu.edu/~garrenst/math", course.num, ".dir/Rmacros", sep="")) } # Garren, 2005