\name{checkerboard}

\alias{checkerboard}

\title{Checkerboard plot for Global Test}

\description{Produces a plot to visualize the test result produced
by \code{\link{globaltest}}, by showing the association between
pairs of samples.}

\usage{checkerboard(gt, geneset = NULL, sort = TRUE, drawlabels = TRUE, labelsize = 0.6, ...)}

\arguments{
    \item{gt}{The output of a call to \code{\link{globaltest}}.}
    \item{geneset}{The name or number of the geneset to be plotted 
(only necessary if multiple genesets were tested)}
    \item{sort}{A logical flag to indicate whether the samples should be sorted
by the clinical outcome to give a clearer picture.}
        \item{drawlabels}{Logical value to control drawing of the samplenames on the x- and y-axis of the plot.}
        \item{labelsize}{Relative size of the labels on the x- and y-axis. If it is \code{NULL} , 
        the current value for \code{\link[graphics:par]{par("cex.axis")}} is used}
    \item{...}{Any extra arguments will be forwarded to the plotting function.}}

\details{The checkerboard shows the pairs of samples which have
high covariance in white and the pairs with low covariance in
black. This can be used to visualize the data and to search for
outlying arrays.

The left and bottom margins are adjusted to allow enough space 
for the longest samplename.}

\value{A matrix giving the old and the new sample numbers.}

\references{J. J. Goeman, S. A. van de Geer, F. de Kort and J. C. 
van Houwelingen, 2004, \emph{A global test for groups of genes: 
testing association with a clinical outcome}, 
\emph{Bioinformatics} 20 (1) 93--99. See also the How To 
Globaltest.pdf included with this package.}

\author{Jelle Goeman: \email{j.j.goeman@lumc.nl}; Jan Oosting}

\seealso{\code{\link{globaltest}}, \code{\link{sampleplot}},
\code{\link{geneplot}}.}

\examples{
if (interactive()) {
    data(exampleX)      # Expression data (40 samples; 1000 genes)
    data(exampleY)      # Clinical outcome for the 40 samples
    pathway <- 1:25     # A pathway contains genes 1 to 25
    gt <- globaltest(exampleX, exampleY, test.genes = pathway)
    gt
    checkerboard(gt)
}
}

\keyword{hplot}

\eof
\name{exampleX}

\docType{data}

\alias{exampleX}

\title{Example expression data for package "globaltest".}

\description{A simulated gene expression dataset used in the 
examples of the package "globaltest".}

\usage{data(exampleX)}

\format{A matrix containing expressions for 1000 genes and 40
samples.}

\seealso{\code{\link{exampleY}}.}

\keyword{datasets}

\eof
\name{exampleY}

\docType{data}

\alias{exampleY}

\title{Example clinical variable for package "globaltest".}

\description{A simulated clinical variable used used in the 
examples of the package "globaltest".}

\usage{data(exampleY)}

\format{A vector of length 40 containing values 0 and 1.}

\seealso{\code{\link{exampleX}}.}

\keyword{datasets}

\eof
\name{geneplot}

\alias{geneplot}

\title{Gene Plot for Global Test}

\description{Produces a plot to show the influence of individual
genes on the test result produced by \code{\link{globaltest}}.}

\usage{geneplot(gt, geneset = NULL, genesubset = NULL, drawlabels = TRUE, labelsize = 0.6, ...)}

\arguments{
    \item{gt}{The output of a call to \code{\link{globaltest}}.}
    \item{geneset}{The name or number of the geneset to be plotted 
(only necessary if multiple genesets were tested).}
    \item{genesubset}{A vector of names or numbers of genes to be plotted 
(default: plot all genes)}
		\item{drawlabels}{Logical value to control drawing of gene ids (rownames) on the x-axis of the plot.}
		\item{labelsize}{Relative size of the labels on the x-axis. If it is \code{NULL} , 
		the current value for \code{\link[graphics:par]{par("cex.axis")}} is used}
    \item{...}{Any extra arguments will be forwarded to the plotting 
function.}}

\details{The geneplot shows a bar an a reference line for each 
gene. The bar shows the influence of each gene on the test 
result: the test statistic Q is the average of the bars of the 
genes. The reference line shows the expected influence if the 
gene was not associated with the outcome. The marks on the bars 
show the standard deviation of the bar under the null hypothesis. 
The color of the bar indicates positive or negative correlation 
of the gene with the clinical outcome, to distinguish between up 
and downregulation.

The bottom margin is adjusted to allow enough space for the longest 
gene id to draw under the axis.}

\value{If gene names were supplied in the call to 
\code{\link{globaltest}}, geneplot returns the vector of gene 
names corresponding to the gene numbers appearing in the plot. 
Otherwise \code{NULL}.}

\references{J. J. Goeman, S. A. van de Geer, F. de Kort and J. C. 
van Houwelingen, 2004, \emph{A global test for groups of genes: 
testing association with a clinical outcome}, 
\emph{Bioinformatics} 20 (1) 93--99. See also the How To 
Globaltest.pdf included with this package.}

\author{Jelle Goeman: \email{j.j.goeman@lumc.nl}; Jan Oosting}

\seealso{\code{\link{globaltest}}, \code{\link{sampleplot}}.}

\examples{
if (interactive()){ 
    data(exampleX)      # Expression data (40 samples; 1000 genes)
    data(exampleY)      # Clinical outcome for the 40 samples
    pathway <- 1:25     # A pathway contains genes 1 to 25
    gt <- globaltest(exampleX, exampleY, test.genes = pathway)
    gt
    geneplot(gt)
}
}
\keyword{hplot}

\eof
\name{globaltest}

\alias{globaltest}

\title{Global Test}

\description{In microarray data, tests a (list of) group(s) of 
genes for significant association with a given clinical variable.}

\usage{globaltest(X, Y, test.genes = NULL, model = NULL, 
    levels = NULL, adjust = NULL, permutation = FALSE, nperm = NULL, 
    sampling = FALSE, ndraws = NULL, verbose = TRUE) }

\arguments{
    \item{X}{Either a matrix of gene expression data, where columns correspond to
samples and rows to genes or a Bioconductor \code{\link[Biobase:exprSet-class]{exprSet}}. The data 
should be properly normalized beforehand (and log- or otherwise 
transformed), but missing values are allowed (coded as 
\code{NA}). Gene and sample names can be included as the row and 
column names of \code{X}.}
    \item{Y}{A vector with the clinical outcome of interest, having one value
for each sample. If X is an \code{\link[Biobase:exprSet-class]{exprSet}} it can 
also be the name of a covariate in the \code{\link[Biobase:phenoData-class]{phenoData}} slot of the exprSet}
    \item{test.genes}{Either a vector or a list of vectors. Indicates 
the group(s) of genes to be tested. Each vector in 
\code{test.genes} can be given in three formats. Either it can be 
a vector with 1 (\code{TRUE}) or 0 (\code{FALSE}) for each gene 
in \code{X}, with 1 indicating that the gene belongs to the 
group. Or it can be a vector containing the column numbers (in 
\code{X}) of the genes belonging to the group. Or it can be a 
subset of the rownames or \code{\link[Biobase:exprSet-class]{geneNames}} for \code{X}.}
    \item{model}{Indicates the model the test uses: 
Use \code{model = "logistic"} for a two-valued outcome \code{Y} 
(the default) or \code{model = "linear"} for a continuous 
outcome. If \code{model} is not supplied, globaltest will try to
determine the model from \code{Y}.}
    \item{levels}{If \code{Y} is a factor (or a category in the PhenoData slot of \code{X}) 
and contains more than 2 levels: \code{levels} is a vector of levels of \code{Y} to test. If 
\code{levels} is length 2: test these 2 groups against each other. 
If levels is length 1: test that level against the others.}
    \item{adjust}{Confounders or risk factors for which the test must 
be adjusted. Must be either a data frame or the names of 
covariates in the phenoData slot of the exprSet \code{X}} 
    \item{permutation}{A logical flag. If \code{TRUE} \code{nperm} 
permutations are used to calculate the p-value instead of the 
asymptotic formulas. Recommended for small sample size. Not 
possible if an adjusted globaltest is used.}
    \item{nperm}{The number permutations used. The default is 10,000. 
If a number is specified for \code{nperm}, \code{permutation} is 
automatically set to \code{TRUE}.}
    \item{sampling}{A logical flag. If \code{TRUE} \code{ndraws} 
random sets of genes are drawn with the same number of genes as 
the tested group. Using this draws, an extra column of output 
\code{comparative.p} is generated, reporting how many of these 
random sets have a lower p-value than the tested group.}
    \item{ndraws}{The number of random groups of genes to be drawn.
The default is 1,000. If a number is specified for \code{ndraws},
\code{sampling} is automatically set to \code{TRUE}.}
    \item{verbose}{Prints some progress information if set to \code{TRUE}}.}

\details{The Global Test tests whether a group of genes (of any 
size from one single gene to all genes on the array) is 
significantly associated with a clinical outcome. The group could 
be for example a known pathway, an area on the genome or the set 
of all genes. The test investigates whether samples with similar 
clinical outcomes tend to have similar gene expression patterns. 
For a significant result it is not necessary that the genes in 
the group have similar expression patterns, only that many of 
them are correlated with the outcome.}

\note{If the number of rows of a matrix \code{X} does not match 
the length of the vector \code{Y}, but the number of columns 
does, the matrix \code{X} given is tacitly replaced by 
\code{t(X)} to make \code{X} and \code{Y} match. A warning is 
printed if \code{X} is square.}

\value{The function returns an object of class 
\code{\link{gt.result-class}}.}

\references{J. J. Goeman, S. A. van de Geer, F. de Kort and J. C. 
van Houwelingen, 2004, \emph{A global test for groups of genes: 
testing association with a clinical outcome}, 
\emph{Bioinformatics} 20 (1) 93--99. See also the vignette 
Globaltest.pdf included with this package.}

\author{Jelle Goeman: \email{j.j.goeman@lumc.nl}; Jan Oosting}

\seealso{\code{\link{geneplot}}, \code{\link{sampleplot}}, 
\code{\link{permutations}}, \code{\link{checkerboard}}, 
\code{\link{regressionplot}}.}

\examples{
    data(exampleX)      # Expression data (40 samples; 1000 genes)
    data(exampleY)      # Clinical outcome for the 40 samples
    pathway1 <- 1:25    # A pathway contains genes 1 to 25
    pathway2 <- 26:50   # another pathway
    gt <- globaltest(exampleX, exampleY, list(pathway1,pathway2))
    gt
}
\keyword{htest}

\eof
\name{gt.result-class}
\docType{class}
\alias{result}
\alias{p.value}
\alias{gt.result-class}
\alias{show,gt.result-method}
\alias{result,gt.result-method}
\alias{p.value,gt.result-method}

\title{Class "gt.result" for results of the function globaltest}

\description{The class gt.result is the output of a call to
\code{\link{globaltest}} and the input of various plotting 
functions to visualize the test result. }

\section{Slots}{
  \describe{
    \item{\code{res}:}{Object of class "matrix". Test results, with a row for each element of 
    \code{test.genes}, and the following columns: \code{path.n}: length of input vector, 
    \code{test.n}: number of rows in \code{X} that matched the input vector, \code{Q}: test statistic, 
    \code{EQ}: expectation for the test statistic, \code{seQ}: standard deviation of the test 
    statistic, \code{p.val}: p value \code{comp.p}: comparative p if \code{\link[globaltest]{sampling = TRUE}}
    }
    \item{\code{X}:}{Object of class "matrix". The transformed data matrix.}
    \item{\code{Y}:}{Object of class "numeric". The transformed clinical outcome vector.}
    \item{\code{test.genes}:}{A list of vectors indicating the tested genes.}
    \item{\code{adjustmatrix}:}{Object of class "matrix" needed to calculate the expectation 
    of the test statistic.}
    \item{\code{Rsquare}:}{Object of class "numeric". Percentage of the variance of Y left after adjustment 
    for confounders or known risk factors.}
    \item{\code{model}:}{Object of class "character". The model used by the test ("linear" or "logistic").}
    \item{\code{df.adjust}:}{Object of class "numeric". The number of degrees of freedom used in the 
    adjustment of the test.} 
    }
}

\section{Methods}{ 
  \describe{
    \item{show}{(gt.result): Summarizes the test result}
    \item{p.value}{(gt.result): Extracts the p-values.}
    \item{result}{(gt.result): Extracts the results matrix.} 
    \item{geneplot}{(gt.result): Produces a plot to show the influence of individual
    genes on the test result produced by \code{\link{globaltest}}.}
    \item{sampleplot}{(gt.result): Produces a plot to show the influence of individual
    samples on the test result produced by \code{\link{globaltest}}.}
    \item{permutations}{(gt.result): Produces a histogram for visualization of the
    permutations used in \code{\link{globaltest}}.}
    \item{checkerboard}{(gt.result): Produces a plot to visualize the test result
    produced by \code{\link{globaltest}} by showing the association
    between pairs of samples.}
    \item{regressionplot}{(gt.result): Produces a plot which can be used to visualize
    the effect of specific samples on the test result produced by
    \code{\link{globaltest}}.}
    }
}

\author{Jelle Goeman: \email{j.j.goeman@lumc.nl}; Jan Oosting}

\seealso{\code{\link{globaltest}}, \code{\link{sampleplot}}, 
\code{\link{geneplot}}, \code{\link{permutations}}.}

\keyword{methods}

\eof
\name{permutations}

\alias{permutations}

\title{Permutations Plot for Global Test}

\description{Produces a histogram for visualization of the permutations
used in \code{\link{globaltest}}.}

\usage{permutations(gt, geneset = NULL, nperm = 10^4)}

\arguments{
    \item{gt}{The output of a call to \code{\link{globaltest}}.}
    \item{geneset}{The name or number of the geneset to be plotted 
(only necessary if multiple genesets were tested).}
    \item{nperm}{The number of permutations to be used.} }

\details{Produces a histogram of the value of the test statistic
calculated for permutations of the clinical outcome, compared with
the test statistic for the true clinical outcome.}

\value{\code{NULL} (no output except the plot).}

\note{Permutations does not work if the adjusted version of 
globaltest was used.}

\references{J. J. Goeman, S. A. van de Geer, F. de Kort and J. C.
van Houwelingen, 2004, \emph{A global test for groups of genes: 
testing association with a clinical outcome}, 
\emph{Bioinformatics} 20 (1) 93--99. See also the How To 
Globaltest.pdf included with this package.}

\author{Jelle Goeman: \email{j.j.goeman@lumc.nl}; Jan Oosting}

\seealso{\code{\link{globaltest}}, \code{\link{sampleplot}},
\code{\link{geneplot}}.}

\examples{
if (interactive()) {
    data(exampleX)      # Expression data (40 samples; 1000 genes)
    data(exampleY)      # Clinical outcome for the 40 samples
    pathway <- 1:25     # A pathway contains genes 1 to 25
    gt <- globaltest(exampleX, exampleY, test.genes = pathway, 
                                                permutation = TRUE)
    gt
    permutations(gt)
}
}
\keyword{hplot}

\eof
\name{regressionplot}

\alias{regressionplot}

\title{Regression Plot for Global Test}

\description{Produces a plot which can be used to visualize the
effect of specific samples on the test result produced by
\code{\link{globaltest}}.}

\usage{regressionplot(gt, geneset = NULL, sampleid = NULL, ...)}

\arguments{
    \item{gt}{The output of a call to \code{\link{globaltest}}.}
    \item{geneset}{The name or number of the geneset to be plotted 
(only necessary if multiple genesets were tested).}
    \item{sampleid}{A vector of names or numbers of the samples of interest.}
    \item{...}{Any extra arguments will be forwarded to the plotting function.}}

\details{The regressionplot plots, for all pairs of samples, the
covariance between the expression patterns against the covariance
between their clinical outcomes. Each point in the plot therefore
represents a pair of samples. A regression line is fitted through
the samples, which visualizes the test result of the function
\code{\link{globaltest}}. A steeply increasing slope indicates a
high (possibly significant) value of the test statistic.

An optional argument \code{sampleid} can be supplied, giving
sample numbers of possibly outlying arrays. In this case, all
pairs of arrays involving one of the arrays in \code{sampleid} is
marked as a red cross, while the other pairs are marked as a blue
dot. The blue line which is fitted through all points can now be
compared to a red dotted line which is fitted though only the red
crosses.}

\value{\code{NULL} (no output).}

\note{Regressionplot does not work if the adjusted version of 
globaltest was used.}

\references{J. J. Goeman, S. A. van de Geer, F. de Kort and J. C.
van Houwelingen, 2004, \emph{A global test for groups of genes: 
testing association with a clinical outcome}, 
\emph{Bioinformatics} 20 (1) 93--99. See also the How To 
Globaltest.pdf included with this package.}

\author{Jelle Goeman: \email{j.j.goeman@lumc.nl}; Jan Oosting}

\seealso{\code{\link{globaltest}}, \code{\link{sampleplot}},
\code{\link{geneplot}}.}

\examples{
if (interactive()) {
    data(exampleX)      # Expression data (40 samples; 1000 genes)
    data(exampleY)      # Clinical outcome for the 40 samples
    pathway <- 1:25     # A pathway contains genes 1 to 25
    gt <- globaltest(exampleX, exampleY, test.genes = pathway)
    gt
    regressionplot(gt)
    regressionplot(gt, sampleid = 40)
}
}
\keyword{hplot}

\eof
\name{sampleplot}

\alias{sampleplot}

\title{Sample Plot for Global Test}

\description{Produces a plot to show the influence of individual
samples on the test result produced by \code{\link{globaltest}}.}

\usage{sampleplot(gt, geneset = NULL, samplesubset = NULL, drawlabels = TRUE, labelsize = 0.6, ...)}

\arguments{
    \item{gt}{The output of a call to \code{\link{globaltest}}.}
    \item{geneset}{The name or number of the geneset to be plotted 
(only necessary if multiple genesets were tested).}
    \item{samplesubset}{A vector of names or numbers of samples to be plotted 
(default: all samples)}
		\item{drawlabels}{Logical value to control drawing of the samplenames on the x-axis of the plot.}
		\item{labelsize}{Relative size of the labels on the x-axis. If it is \code{NULL} , 
		the current value for \code{\link[graphics:par]{par("cex.axis")}} is used}
    \item{...}{Any extra arguments will be forwarded to the plotting 
function.}}

\details{The sampleplot shows a bar and a reference line for each 
sample. The bar shows the influence of each gene on the test 
statistic. Samples with a positive influence carry evidence 
against the null hypothesis (in favour of a significant p-value), 
because they are are similar in expression profile to samples 
with a similar clinical outcome. Samples with a negative 
influence bar supply evidence in favour of the null hypothesis 
and of a non-significant p-value: they are relatively similar in 
expression profile to samples with a different clinical outcome.

The influence varies around zero if the tested geneset is not 
associated with the outcome. Marks on the bars show the 
standarddeviation of the influence under the null hypothesis for 
those samples which are more than one standard deviation away 
from zero.

The color of the bar indicates the sign of the residual of Y. In 
a logistic model the coloring this distinguishes the original 
groups.

The bottom margin is adjusted to allow enough space for the longest 
samplename to draw under the axis.}

\value{If samplenames were supplied in the call to 
\code{\link{globaltest}}, sampleplot returns the vector of 
samplenames corresponding to the samplenumbers appearing in the 
plot. Otherwise \code{NULL}.}

\references{J. J. Goeman, S. A. van de Geer, F. de Kort and J. C. 
van Houwelingen, 2004, \emph{A global test for groups of genes: 
testing association with a clinical outcome}, 
\emph{Bioinformatics} 20 (1) 93--99. See also the How To 
Globaltest.pdf included with this package.}

\author{Jelle Goeman: \email{j.j.goeman@lumc.nl}; Jan Oosting}

\seealso{\code{\link{globaltest}}, \code{\link{geneplot}}, 
\code{\link{regressionplot}}, \code{\link{checkerboard}}.}

\examples{
if (interactive()){ 
    data(exampleX)      # Expression data (40 samples; 1000 genes)
    data(exampleY)      # Clinical outcome for the 40 samples
    pathway <- 1:25     # A pathway contains genes 1 to 25
    gt <- globaltest(exampleX, exampleY, test.genes = pathway)
    gt
    sampleplot(gt)
}
}
\keyword{hplot}

\eof
