\name{golub}
\alias{golub}
\alias{golub.cl}
\alias{golub.gnames}

\title{Gene expression dataset from Golub et al. (1999)}

\usage{
data(golub)
}

\description{
  Gene expression data (3051 genes and 38 tumor mRNA samples) from the
  leukemia microarray study of Golub et al. (1999). Pre-processing
  was done as described in Dudoit et al. (2002). The R code for pre-processing is available
in the file \url{../doc/golub.R}.}

\value{
  \item{golub}{matrix of gene expression levels for the 38 tumor mRNA samples, rows correspond to genes (3051 genes) and columns to mRNA samples.}
  \item{golub.cl}{numeric vector indicating the tumor class, 27 acute lymphoblastic leukemia (ALL) cases (code 0) and 11 acute myeloid leukemia (AML) cases (code 1). }
  \item{golub.gnames}{a matrix containing the names of the 3051 genes for the expression matrix \code{golub}. The three columns correspond to the gene \code{index}, \code{ID}, and \code{Name}, respectively.
  }
}

\source{Golub et al. (1999). Molecular classification of cancer: class
discovery and class prediction by gene expression
monitoring, \emph{Science}, Vol. 286:531-537.\cr
\url{http://www-genome.wi.mit.edu/MPR/}
.}

\references{S. Dudoit, J. Fridlyand, and T. P. Speed (2002). Comparison of discrimination methods for the  classification of tumors using gene expression data. \emph{Journal of the American Statistical Association}, Vol. 97, No. 457, p. 77--87. }
\keyword{datasets}  

\eof
\name{multtest-internal}
\alias{.mt.BLIM}
\alias{.mt.RandSeed}
\alias{.mt.naNUM}
\alias{mt.number2na}
\alias{mt.na2number}
\alias{mt.getmaxB}
\alias{mt.transformL}
\alias{mt.transformX}
\alias{mt.transformV}
\alias{mt.checkothers}
\alias{mt.checkX}
\alias{mt.checkV}
\alias{mt.checkclasslabel}
\alias{mt.niceres}
\title{Internal multtest functions and variables}
\description{
  Internal multtest functions and variables
}
\usage{
.mt.BLIM
.mt.RandSeed
.mt.naNUM
mt.number2na(x,na)
mt.na2number(x,na)
mt.getmaxB(classlabel,test,B, verbose)
mt.transformL(classlabel,test)
mt.transformV(V,classlabel,test,na,nonpara)
mt.transformX(X,classlabel,test,na,nonpara)
mt.checkothers(side="abs",fixed.seed.sampling="y", B=10000,
na=.mt.naNUM, nonpara="n")
mt.checkX(X,classlabel,test)
mt.checkV(V,classlabel,test)
mt.checkclasslabel(classlabel,test)
mt.niceres<-function(res,X,index)
}
\details{
  These are not to be called directly by the user.
}
\keyword{internal}

\eof
\name{mt.maxT}
\alias{mt.maxT}
\alias{mt.minP}
\title{
  Step-down maxT and minP multiple testing procedures
}
\description{These functions compute permutation adjusted \eqn{p}-values for step-down multiple testing procedures described in Westfall & Young (1993).
}
\usage{
mt.maxT(X,classlabel,test="t",side="abs",fixed.seed.sampling="y",B=10000,na=.mt.naNUM,nonpara="n")
mt.minP(X,classlabel,test="t",side="abs",fixed.seed.sampling="y",B=10000,na=.mt.naNUM,nonpara="n")
}

\arguments{
  \item{X}{A data frame or matrix, with \eqn{m} rows corresponding to variables
    (hypotheses) and
    \eqn{n} columns to observations. In the case of gene expression data, rows
    correspond to genes and columns to mRNA samples. The data can
    be read using \code{\link{read.table}}.
  }
  \item{classlabel}{
    A vector of integers corresponding to observation (column)
    class labels. For \eqn{k} classes, the labels must be integers
    between 0 and \eqn{k-1}. For the \code{blockf} test option,
    observations may be divided into
    \eqn{n/k} blocks of \eqn{k} observations each. The observations are
    ordered by block, and within each block, they are labeled using the
    integers 0 to \eqn{k-1}.
  }	
  \item{test}{A character string specifying the statistic to be
    used to test the null hypothesis of no association between the
    variables and the class labels.\cr
    If \code{test="t"}, the tests are based on two-sample Welch t-statistics
    (unequal variances).  \cr
    If \code{test="t.equalvar"}, the tests are based on two-sample
    t-statistics with equal variance for the two samples. The
    square of the t-statistic is equal to an F-statistic for \eqn{k=2}. \cr
    If \code{test="wilcoxon"}, the tests are based on standardized rank sum Wilcoxon statistics.\cr
    If \code{test="f"}, the tests are based on F-statistics.\cr
    If \code{test="pairt"}, the tests are based on paired t-statistics. The
    square of the paired t-statistic is equal to a block F-statistic for \eqn{k=2}. \cr
    If \code{test="blockf"}, the tests are based on F-statistics which
    adjust for block differences
    (cf. two-way analysis of variance).
  }
  \item{side}{A character string specifying the type of rejection region.\cr
    If \code{side="abs"}, two-tailed tests, the null hypothesis is rejected for large absolute values of the test statistic.\cr
    If \code{side="upper"}, one-tailed tests, the null hypothesis is rejected for large values of the test statistic.\cr
    If \code{side="lower"}, one-tailed tests,  the null hypothesis is rejected for small values of the test statistic.
  }
  \item{fixed.seed.sampling}{If \code{fixed.seed.sampling="y"}, a
    fixed seed sampling procedure is used, which may double the
    computing time, but will not use extra memory to store the
    permutations. If \code{fixed.seed.sampling="n"}, permutations will
    be stored in memory.  For the \code{blockf} test, the option \code{n} was not implemented as it requires too much memory.
  }
  \item{B}{The number of permutations. For a complete
    enumeration, \code{B} should be 0 (zero) or any number not less than
    the total number of permutations.
  }
  \item{na}{Code for missing values (the default is \code{.mt.naNUM=--93074815.62}).
    Entries with missing values will be ignored in the computation, 
    i.e., test statistics will be based on a smaller sample size. This
    feature has not yet fully implemented.
  }
  \item{nonpara}{If \code{nonpara}="y", nonparametric test statistics are computed based on ranked data. \cr
    If  \code{nonpara}="n", the original data are used.
  }
}

\details{These functions compute permutation adjusted \eqn{p}-values for the step-down maxT and minP multiple testing procedures, which provide strong control of the family-wise Type I error rate (FWER). The adjusted \eqn{p}-values for the minP procedure are defined in equation (2.10) p. 66 of Westfall & Young (1993), and the maxT procedure is discussed p. 50 and 114. The permutation algorithms for estimating the adjusted \eqn{p}-values are given in Ge et al. (In preparation). The procedures are for the simultaneous test of \eqn{m} null hypotheses, namely, the null hypotheses of no association between the \eqn{m} variables corresponding to the rows of the data frame \code{X} and the class labels \code{classlabel}. For gene expression data, the null hypotheses correspond to no differential gene expression across mRNA samples.
}


\value{
  A data frame with components
  \item{index}{Vector of row indices, between 1 and \code{nrow(X)}, where rows are sorted first according to
    their adjusted \eqn{p}-values, next their unadjusted \eqn{p}-values, and finally their test statistics. }
  \item{teststat}{Vector of test statistics, ordered according to \code{index}. To get the test statistics in the original data order, use \code{teststat[order(index)]}.}
  \item{rawp}{Vector of raw (unadjusted) \eqn{p}-values, ordered according to \code{index}.}
  \item{adjp}{Vector of adjusted \eqn{p}-values, ordered according to \code{index}.}
  \item{plower}{For \code{\link{mt.minP}} function only, vector of "adjusted \eqn{p}-values", where ties in the permutation distribution of the successive minima of raw \eqn{p}-values with the observed \eqn{p}-values are counted only once. Note that procedures based on \code{plower} do not control the FWER. Comparison of \code{plower} and \code{adjp} gives an idea of the discreteness of the permutation distribution. Values in \code{plower} are ordered according to \code{index}.}
}
}

\references{
S. Dudoit, J. P. Shaffer, and J. C. Boldrick (Submitted). Multiple hypothesis testing in microarray experiments.\cr

Y. Ge, S. Dudoit, and T. P. Speed. Resampling-based multiple testing for microarray data hypothesis, Technical Report \#633 of UCB Stat. \url{http://www.stat.berkeley.edu/~gyc} \cr

P. H. Westfall and S. S. Young (1993). \emph{Resampling-based
multiple testing: Examples and methods for \eqn{p}-value adjustment}. John Wiley \& Sons.
}	
  
\author{Yongchao Ge, \email{yongchao.ge@mssm.edu}, \cr
Sandrine Dudoit, \url{http://www.stat.berkeley.edu/~sandrine}.}

\seealso{\code{\link{mt.plot}}, \code{\link{mt.rawp2adjp}}, \code{\link{mt.reject}}, \code{\link{mt.sample.teststat}}, \code{\link{mt.teststat}}, \code{\link{golub}}.}


\examples{
# Gene expression data from Golub et al. (1999)
# To reduce computation time and for illustrative purposes, we condider only
# the first 100 genes and use the default of B=10,000 permutations.
# In general, one would need a much larger number of permutations
# for microarray data.

data(golub)
smallgd<-golub[1:100,] 
classlabel<-golub.cl

# Permutation unadjusted p-values and adjusted p-values 
# for maxT and minP procedures with Welch t-statistics
resT<-mt.maxT(smallgd,classlabel)
resP<-mt.minP(smallgd,classlabel)
rawp<-resT$rawp[order(resT$index)]
teststat<-resT$teststat[order(resT$index)]

# Plot results and compare to Bonferroni procedure
bonf<-mt.rawp2adjp(rawp, proc=c("Bonferroni"))
allp<-cbind(rawp, bonf$adjp[order(bonf$index),2], resT$adjp[order(resT$index)],resP$adjp[order(resP$index)])

mt.plot(allp, teststat, plottype="rvsa", proc=c("rawp","Bonferroni","maxT","minP"),leg=c(0.7,50),lty=1,col=1:4,lwd=2)
mt.plot(allp, teststat, plottype="pvsr", proc=c("rawp","Bonferroni","maxT","minP"),leg=c(60,0.2),lty=1,col=1:4,lwd=2)
mt.plot(allp, teststat, plottype="pvst", proc=c("rawp","Bonferroni","maxT","minP"),leg=c(-6,0.6),pch=16,col=1:4)

# Permutation adjusted p-values for minP procedure with F-statistics (like equal variance t-statistics)
mt.minP(smallgd,classlabel,test="f",fixed.seed.sampling="n")

# Note that the test statistics used in the examples below are not appropriate 
# for the Golub et al. data. The sole purpose of these examples is to 
# demonstrate the use of the mt.maxT and mt.minP functions.

# Permutation adjusted p-values for maxT procedure with paired t-statistics
classlabel<-rep(c(0,1),19)
mt.maxT(smallgd,classlabel,test="pairt")

# Permutation adjusted p-values for maxT procedure with block F-statistics
classlabel<-rep(0:18,2)
mt.maxT(smallgd,classlabel,test="blockf",side="upper")

}
\keyword{htest}

\eof
\name{mt.plot}
\alias{mt.plot}
\title{Plotting results from multiple testing procedures}
\description{This function produces a number of graphical summaries
  for the results of multiple testing procedures and their corresponding
  adjusted \eqn{p}-values.}
\usage{
mt.plot(adjp, teststat, plottype="rvsa", logscale=FALSE, alpha=seq(0, 1, length = 100), proc, leg=c(0, 0), \dots)
}
\arguments{
  \item{adjp}{A matrix of adjusted \emph{p}-values, with rows
    corresponding to hypotheses (genes) and columns to multiple testing
    procedures. This matrix could be obtained from the functions
    \code{\link{mt.maxT}}, \code{\link{mt.minP}}, or \code{\link{mt.rawp2adjp}}.}
  \item{teststat}{A vector of test statistics for each of the hypotheses. This vector could be obtained from the functions \code{\link{mt.teststat}}, \code{\link{mt.maxT}}, or \code{\link{mt.minP}}.}
  \item{plottype}{A character string specifying the type of graphical
    summary for the results of the multiple testing procedures. \cr
    If \code{plottype="rvsa"}, the number of rejected hypotheses is plotted against the nominal Type I error rate for each of the procedures given in \code{proc}.\cr
    If \code{plottype="pvsr"}, the ordered adjusted \emph{p}-values are plotted for each of the procedures given in \code{proc}. This can be viewed as a plot of the Type I error rate against the number of rejected hypotheses. \cr
    If \code{plottype="pvst"}, the adjusted \emph{p}-values are plotted against the test statistics for each of the procedures given in \code{proc}.
    \cr
    If \code{plottype="pvsi"}, the adjusted \emph{p}-values are plotted for each of the procedures given in \code{proc} using the original data order.  }
  \item{logscale}{A logical variable for the \code{pvst} and \code{pvsi} plots. If \code{logscale} is \code{TRUE}, the negative decimal logarithms of the adjusted \emph{p}-values are plotted against the test statistics or gene indices. If \code{logscale} is \code{FALSE}, the adjusted \emph{p}-values are plotted against the test statistics or gene indices.}
  \item{alpha}{A vector of nominal Type I error rates for the \code{rvsa} plot.}
  \item{proc}{A vector of character strings containing the names of the
    multiple testing procedures, to be used in the legend.}
  \item{\dots}{Graphical parameters such as \code{col}, \code{lty},
    \code{pch}, and \code{lwd}
    may also be supplied as arguments to the function (see \code{\link{par}}).}
  \item{leg}{A vector of coordinates for the legend.}
}

\references{
  
  S. Dudoit, J. P. Shaffer, and J. C. Boldrick (Submitted). Multiple hypothesis testing in microarray experiments.\cr
  
  Y. Ge, S. Dudoit, and T. P. Speed. Resampling-based multiple testing for microarray data hypothesis, Technical Report \#633 of UCB Stat. \url{http://www.stat.berkeley.edu/~gyc}  \cr

}
\author{
  Sandrine Dudoit,  \url{http://www.stat.berkeley.edu/~sandrine}, \cr
  Yongchao Ge, \email{yongchao.ge@mssm.edu}.
}
\seealso{\code{\link{mt.maxT}}, \code{\link{mt.minP}}, \code{\link{mt.rawp2adjp}},  \code{\link{mt.reject}}, \code{\link{mt.teststat}}, \code{\link{golub}}.}

\examples{
# Gene expression data from Golub et al. (1999)
# To reduce computation time and for illustrative purposes, we condider only
# the first 100 genes and use the default of B=10,000 permutations.
# In general, one would need a much larger number of permutations
# for microarray data.

data(golub)
smallgd<-golub[1:100,] 
classlabel<-golub.cl

# Permutation unadjusted p-values and adjusted p-values for maxT procedure
res1<-mt.maxT(smallgd,classlabel)
rawp<-res1$rawp[order(res1$index)]
teststat<-res1$teststat[order(res1$index)]

# Permutation adjusted p-values for simple multiple testing procedures
procs<-c("Bonferroni","Holm","Hochberg","SidakSS","SidakSD","BH","BY")
res2<-mt.rawp2adjp(rawp,procs)

# Plot results from all multiple testing procedures
allp<-cbind(res2$adjp[order(res2$index),],res1$adjp[order(res1$index)])
dimnames(allp)[[2]][9]<-"maxT"
procs<-dimnames(allp)[[2]]
procs[7:9]<-c("maxT","BH","BY")
allp<-allp[,procs]

cols<-c(1:4,"orange","brown","purple",5:6)
ltypes<-c(3,rep(1,6),rep(2,2))

# Ordered adjusted p-values
mt.plot(allp,teststat,plottype="pvsr",proc=procs,leg=c(80,0.4),lty=ltypes,col=cols,lwd=2)

# Adjusted p-values in original data order
mt.plot(allp,teststat,plottype="pvsi",proc=procs,leg=c(80,0.4),lty=ltypes,col=cols,lwd=2)

# Number of rejected hypotheses vs. level of the test
mt.plot(allp,teststat,plottype="rvsa",proc=procs,leg=c(0.05,100),lty=ltypes,col=cols,lwd=2)

# Adjusted p-values vs. test statistics
mt.plot(allp,teststat,plottype="pvst",logscale=TRUE,proc=procs,leg=c(0,4),pch=ltypes,col=cols)

}
\keyword{hplot}

\eof
\name{mt.rawp2adjp}
\alias{mt.rawp2adjp}
\title{Adjusted p-values for simple multiple testing procedures}
\description{
  This function computes adjusted \eqn{p}-values for simple
  multiple testing procedures from a vector of raw (unadjusted)
  \eqn{p}-values. The procedures include the Bonferroni, Holm (1979),
  Hochberg (1988), and Sidak procedures for strong control of the
  family-wise Type I error rate (FWER), and the Benjamini & Hochberg
  (1995) and Benjamini & Yekutieli (2001) procedures for (strong)
  control of the false discovery rate (FDR). 
}
\usage{
mt.rawp2adjp(rawp, proc=c("Bonferroni", "Holm", "Hochberg", "SidakSS", "SidakSD", "BH", "BY"))
}
\arguments{
  \item{rawp}{A vector of raw (unadjusted) \eqn{p}-values for each
    hypothesis under consideration. These could be nominal
    \eqn{p}-values, for example, from t-tables, or permutation
    \eqn{p}-values as given in \code{mt.maxT} and \code{mt.minP}. If the
    \code{mt.maxT} or \code{mt.minP} functions are used, raw
    \eqn{p}-values should be given in the original data order,
    \code{rawp[order(index)]}.}
  \item{proc}{A vector of character strings containing the names of the
    multiple testing procedures for which adjusted \eqn{p}-values are to
    be computed. This vector should include any of the following:
    \code{"Bonferroni"}, \code{"Holm"}, \code{"Hochberg"},
    \code{"SidakSS"}, \code{"SidakSD"}, \code{"BH"}, \code{"BY"}. 
}
}
\details{
  Adjusted \eqn{p}-values are computed for simple FWER and FDR
  controlling procedures based on a vector of raw (unadjusted)
  \eqn{p}-values.
\item{Bonferroni}{Bonferroni single-step adjusted \eqn{p}-values
for strong control of the FWER.} 
\item{Holm}{Holm (1979) step-down adjusted \eqn{p}-values for
strong control of the FWER.}
\item{Hochberg}{ Hochberg (1988) step-up adjusted \eqn{p}-values
for 
strong control of the FWER (for raw (unadjusted) \eqn{p}-values
satisfying the Simes inequality).} 
\item{SidakSS}{Sidak single-step adjusted \eqn{p}-values for
strong control of the FWER (for positive orthant dependent test
statistics).} 
\item{SidakSD}{Sidak step-down adjusted \eqn{p}-values for
strong control of the FWER (for positive orthant dependent test
statistics).} 
\item{BH}{adjusted \eqn{p}-values for the Benjamini & Hochberg
(1995) step-up FDR controlling procedure (independent and positive
regression dependent test statistics).}
\item{BY}{adjusted \eqn{p}-values for the Benjamini & Yekutieli
(2001) step-up FDR controlling procedure (general dependency
structures).}
}

\value{
A list with components
  \item{adjp}{A matrix of adjusted \eqn{p}-values, with rows
    corresponding to hypotheses and columns to multiple testing
    procedures. Hypotheses are sorted in increasing order of their raw
    (unadjusted) \eqn{p}-values.} 
  \item{index}{A vector of row indices, between 1 and
    \code{length(rawp)}, where rows are sorted according to 
    their raw (unadjusted) \eqn{p}-values. To obtain the adjusted
    \eqn{p}-values in the original data order, use
    \code{adjp[order(index),]}. 
}
}
\references{
Y. Benjamini and Y. Hochberg (1995). Controlling the false discovery
rate: a practical and powerful approach to multiple
testing. \emph{J. R. Statist. Soc. B}. Vol. 57: 289-300.\cr 

Y. Benjamini and D. Yekutieli (2001). The control of the false discovery
rate in multiple hypothesis testing under dependency. \emph{Annals of
  Statistics}. Accepted.\cr 

S. Dudoit, J. P. Shaffer, and J. C. Boldrick (Submitted). Multiple
hypothesis testing in microarray experiments.\cr 

Y. Ge, S. Dudoit, and T. P. Speed. Resampling-based multiple testing for microarray data hypothesis, Technical Report \#633 of UCB Stat. \url{http://www.stat.berkeley.edu/~gyc}\cr

Y. Hochberg (1988). A sharper Bonferroni procedure for multiple tests of
significance, \emph{Biometrika}. Vol. 75: 800-802.\cr 

S. Holm (1979). A simple sequentially rejective multiple test
procedure. \emph{Scand. J. Statist.}. Vol. 6: 65-70. 
}
 
\author{
  Sandrine Dudoit, \url{http://www.stat.berkeley.edu/~sandrine},\cr
  Yongchao Ge, \email{yongchao.ge@mssm.edu}. 
}
\seealso{\code{\link{mt.maxT}}, \code{\link{mt.minP}},
  \code{\link{mt.plot}}, \code{\link{mt.reject}}, \code{\link{golub}}.}

\examples{
# Gene expression data from Golub et al. (1999)
# To reduce computation time and for illustrative purposes, we condider only
# the first 100 genes and use the default of B=10,000 permutations.
# In general, one would need a much larger number of permutations
# for microarray data.

data(golub)
smallgd<-golub[1:100,] 
classlabel<-golub.cl

# Permutation unadjusted p-values and adjusted p-values for maxT procedure
res1<-mt.maxT(smallgd,classlabel)
rawp<-res1$rawp[order(res1$index)]

# Permutation adjusted p-values for simple multiple testing procedures
procs<-c("Bonferroni","Holm","Hochberg","SidakSS","SidakSD","BH","BY")
res2<-mt.rawp2adjp(rawp,procs)


}

\keyword{htest}






\eof
\name{mt.reject}
\alias{mt.reject}
\title{Identity and number of rejected hypotheses }
\description{This function returns the identity and number of rejected hypotheses for several multiple testing procedures and different nominal Type I error rates.
}
\usage{
mt.reject(adjp, alpha)
}
\arguments{
  \item{adjp}{A matrix of adjusted \emph{p}-values, with rows
    corresponding to hypotheses and columns to multiple testing
    procedures. This matrix could be obtained from the function
    \code{\link{mt.rawp2adjp}}
    .}
  \item{alpha}{A vector of nominal Type I error rates.}
}
\value{
  A list with components
  \item{r}{A matrix containing the number of rejected hypotheses for several multiple testing procedures and different nominal Type I error rates. Rows correspond to Type I error rates and columns to multiple testing procedures.}
  \item{which}{A matrix of indicators for the rejection of individual hypotheses by different multiple testing procedures for a nominal Type I error rate \code{alpha[1]}. Rows correspond to hypotheses and columns to multiple testing procedures.}
}

\author{
  Sandrine Dudoit,  \url{http://www.stat.berkeley.edu/~sandrine}, \cr
  Yongchao Ge, \email{yongchao.ge@mssm.edu}.
}


\seealso{\code{\link{mt.maxT}}, \code{\link{mt.minP}}, \code{\link{mt.rawp2adjp}}, \code{\link{golub}}.}

\examples{
# Gene expression data from Golub et al. (1999)
# To reduce computation time and for illustrative purposes, we condider only
# the first 100 genes and use the default of B=10,000 permutations.
# In general, one would need a much larger number of permutations
# for microarray data.

data(golub)
smallgd<-golub[1:100,] 
classlabel<-golub.cl

# Permutation unadjusted p-values and adjusted p-values for maxT procedure
res<-mt.maxT(smallgd,classlabel)
mt.reject(cbind(res$rawp,res$adjp),seq(0,1,0.1))$r

}

\keyword{htest}

\eof
\name{mt.sample.teststat}
\title{Permutation distribution of test statistics and raw (unadjusted) p-values}
\alias{mt.sample.teststat}
\alias{mt.sample.rawp}
\alias{mt.sample.label}
\usage{
mt.sample.teststat(V,classlabel,test="t",fixed.seed.sampling="y",B=10000,na=.mt.naNUM,nonpara="n")
mt.sample.rawp(V,classlabel,test="t",side="abs",fixed.seed.sampling="y",B=10000,na=.mt.naNUM,nonpara="n")
mt.sample.label(classlabel,test="t",fixed.seed.sampling="y",B=10000)
}
\description{
  These functions provide tools to investigate the permutation distribution
  of test statistics, raw (unadjusted) \eqn{p}-values, and class labels.
}
\arguments{
  \item{V}{A numeric vector containing the data for one of the variables (genes).}

 \item{classlabel}{
A vector of integers corresponding to observation (column)
    class labels. For \eqn{k} classes, the labels must be integers
    between 0 and \eqn{k-1}. For the \code{blockf} test option,
    observations may be divided into
    \eqn{n/k} blocks of \eqn{k} observations each. The observations are
    ordered by block, and within each block, they are labeled using the
    integers 0 to \eqn{k-1}.
  }	
  \item{test}{A character string specifying the statistic to be
    used to test the null hypothesis of no association between the
    variables and the class labels.\cr
    If \code{test="t"}, the tests are based on two-sample Welch t-statistics
    (unequal variances).  \cr
    If \code{test="t.equalvar"}, the tests are based on two-sample
    t-statistics with equal variance for the two samples. The
    square of the t-statistic is equal to an F-statistic for \eqn{k=2}. \cr
    If \code{test="wilcoxon"}, the tests are based on standardized rank sum Wilcoxon statistics.\cr
    If \code{test="f"}, the tests are based on F-statistics.\cr
    If \code{test="pairt"}, the tests are based on paired t-statistics. The
    square of the paired t-statistic is equal to a block F-statistic for \eqn{k=2}. \cr
    If \code{test="blockf"}, the tests are based on F-statistics which
    adjust for block differences
    (cf. two-way analysis of variance).
  }
  \item{side}{A character string specifying the type of rejection region.\cr
    If \code{side="abs"}, two-tailed tests, the null hypothesis is rejected for large absolute values of the test statistic.\cr
    If \code{side="upper"}, one-tailed tests, the null hypothesis is rejected for large values of the test statistic.\cr
    If \code{side="lower"}, one-tailed tests,  the null hypothesis is rejected for small values of the test statistic.
  }
  \item{fixed.seed.sampling}{If \code{fixed.seed.sampling="y"}, a
    fixed seed sampling procedure is used, which may double the
    computing time, but will not use extra memory to store the
    permutations. If \code{fixed.seed.sampling="n"}, permutations will
    be stored in memory.  For the \code{blockf} test, the option \code{n} was not implemented as it requires too much memory.
  }
  \item{B}{The number of permutations. For a complete
    enumeration, \code{B} should be 0 (zero) or any number not less than
    the total number of permutations.
  }
  \item{na}{Code for missing values (the default is \code{.mt.naNUM=--93074815.62}).
    Entries with missing values will be ignored in the computation,
    i.e., test statistics will be based on a smaller sample size. This
    feature has not yet fully implemented.
  }
  \item{nonpara}{If \code{nonpara}="y", nonparametric test statistics are computed based on ranked data. \cr
    If  \code{nonpara}="n", the original data are used.
  }

}
\value{
  For \code{\link{mt.sample.teststat}},  a vector containing \code{B} permutation test statistics. \cr \cr
  For \code{\link{mt.sample.rawp}},  a vector containing \code{B} permutation unadjusted \eqn{p}-values. \cr\cr 
  For \code{\link{mt.sample.label}}, a matrix containing \code{B}
  sets of permuted class labels. Each row corresponds to one permutation.
}

\examples{

# Gene expression data from Golub et al. (1999)
data(golub)

mt.sample.label(golub.cl,B=10)

permt<-mt.sample.teststat(golub[1,],golub.cl,B=1000)
qqnorm(permt)
qqline(permt)

permt<-mt.sample.teststat(golub[50,],golub.cl,B=1000)
qqnorm(permt)
qqline(permt)

permp<-mt.sample.rawp(golub[1,],golub.cl,B=1000)
hist(permp)
}

\author{Yongchao Ge, \email{yongchao.ge@mssm.edu}, \cr
Sandrine Dudoit, \url{http://www.stat.berkeley.edu/~sandrine}.}

\seealso{\code{\link{mt.maxT}}, \code{\link{mt.minP}}, \code{\link{golub}}.}

\keyword{manip}


\eof
\name{mt.teststat}
\alias{mt.teststat}
\alias{mt.teststat.num.denum}

\title{Computing test statistics for each row of a data frame}
\usage{
mt.teststat(X,classlabel,test="t",na=.mt.naNUM,nonpara="n")
mt.teststat.num.denum(X,classlabel,test="t",na=.mt.naNUM,nonpara="n")
}
\description{
  These functions provide a convenient way to compute test statistics,
  e.g., two-sample Welch t-statistics, Wilcoxon statistics,
  F-statistics, paired t-statistics,
  block F-statistics, for each row of a data frame. 
}
\arguments{
 \item{X}{A data frame or matrix, with \eqn{m} rows corresponding to variables
    (hypotheses) and\eqn{n} columns to observations. In the case of gene 
    expression data, rows
    correspond to genes and columns to mRNA samples. The data can
    be read using \code{\link{read.table}}.
  }
  \item{classlabel}{
     A vector of integers corresponding to observation (column)
    class labels. For \eqn{k} classes, the labels must be integers
    between 0 and \eqn{k-1}. For the \code{blockf} test option,
    observations may be divided into
    \eqn{n/k} blocks of \eqn{k} observations each. The observations are
    ordered by block, and within each block, they are labeled using the
    integers 0 to \eqn{k-1}.
  }	
  \item{test}{A character string specifying the statistic to be
    used to test the null hypothesis of no association between the
    variables and the class labels.\cr
    If \code{test="t"}, the tests are based on two-sample Welch t-statistics
    (unequal variances).  \cr
    If \code{test="t.equalvar"}, the tests are based on two-sample
    t-statistics with equal variance for the two samples. The
    square of the t-statistic is equal to an F-statistic for \eqn{k=2}. \cr
    If \code{test="wilcoxon"}, the tests are based on standardized rank sum Wilcoxon statistics.\cr
    If \code{test="f"}, the tests are based on F-statistics.\cr
    If \code{test="pairt"}, the tests are based on paired t-statistics. The
    square of the paired t-statistic is equal to a block F-statistic for \eqn{k=2}. \cr
    If \code{test="blockf"}, the tests are based on F-statistics which
    adjust for block differences
    (cf. two-way analysis of variance).
  }
  \item{na}{Code for missing values (the default is \code{.mt.naNUM=--93074815.62}).
    Entries with missing values will be ignored in the computation,
    i.e., test statistics will be based on a smaller sample size. This
    feature has not yet fully implemented.
  }
  \item{nonpara}{If \code{nonpara}="y", nonparametric test statistics are computed based on ranked data. \cr
    If  \code{nonpara}="n", the original data are used.}
}

\value{
  For \code{\link{mt.teststat}}, a vector of test statistics for each row (gene). \cr \cr
  For \code{\link{mt.teststat.num.denum}}, a data frame with \cr
  \item{teststat.num}{the numerator of the test statistics for each row, depending on the
    specific \code{test} option.}
  \item{teststat.denum}{the denominator of the test statistics for each row, depending on the
    specific \code{test} option.}
  }


\author{Yongchao Ge, \email{yongchao.ge@mssm.edu}, \cr
Sandrine Dudoit, \url{http://www.stat.berkeley.edu/~sandrine}.}

\seealso{\code{\link{mt.maxT}}, \code{\link{mt.minP}}, \code{\link{golub}}.}

\examples{
# Gene expression data from Golub et al. (1999)
data(golub)

teststat<-mt.teststat(golub,golub.cl)
qqnorm(teststat)
qqline(teststat)

tmp<-mt.teststat.num.denum(golub,golub.cl,test="t")
num<-tmp$teststat.num
denum<-tmp$teststat.denum
plot(sqrt(denum),num)

tmp<-mt.teststat.num.denum(golub,golub.cl,test="f")

}
		
\keyword{univar}

	

	

\eof
