.packageName <- "pickgene"
#####################################################################
##
## $Id: chip.R,v 1.1 2003/11/25 14:50:35 jgentry Exp $
##
##     Copyright (C) 2000 Brian S. Yandell
##
## This program is free software; you can redistribute it and/or modify it
## under the terms of the GNU General Public License as published by the
## Free Software Foundation; either version 2, or (at your option) any
## later version.
##
## These functions are distributed in the hope that they will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
##
## The text of the GNU General Public License, version 2, is available
## as http://www.gnu.org/copyleft or by writing to the Free Software
## Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
##
###############################################################################
makecont <- function( x, y, size = 41, cex = .1,
                     levels = c(1,5,10,50))
{
  isna <- is.na(x) | is.na(y)
  if( all( isna ))
    stop("all data missing")
  x <- x[!isna]
  y <- y[!isna]
  rx <- range( x )
  ry <- range( y )
  z <- matrix( 0, size, size )
  xx <- 1 + floor( ( size + 1 ) * ( x - rx[1] ) / diff( rx ))
  xx[xx>size] <- size
  yy <- 1 + floor( ( size + 1 ) * ( y - ry[1] ) / diff( ry ))
  yy[yy>size] <- size
  for( i in seq( length( x ))) {
    z[xx[i],yy[i]] <- 1 + z[xx[i],yy[i]]
  }
  z <- 100 * z / length( x )
  oz <- order( z )
  z[oz] <- cumsum( z[oz] )
  oz <- z[ xx + size * ( yy - 1 ) ] < 1
  
  sx <- seq( rx[1], rx[2], length = size )
  sy <- seq( ry[1], ry[2], length = size )
  contour( sx,sy,z, levels = levels )
  
  points( x[oz], y[oz], cex = cex )
  invisible( z )
}
lod.plot <- function( data, x, y, theta,
                     filename = deparse( substitute( theta )),
                     probe = "Probe.Set",
                     xlab = x, ylab = y, ps = TRUE,
                     col = rep( "black", length( x )),
                     lowlod = 0,
                     ... )
{
  col <- as.character( col )
  if( ps )
    postscript( paste( filename, "ps", sep = "." ), horizontal = FALSE )
  par( omi = rep(.5,4))
  sink( paste( filename, "lod", sep = "." ))
  print( lodprobes( exp( data[[x]] ), exp( data[[y]] ), theta,
                   oddsplot( exp( data[[x]] ), exp( data[[y]] ),
                            theta, col = col, xlab=xlab, ylab=ylab,
                            chip=""),
                   data[[probe]], col, lowlod = lowlod ))
  sink()
  if( ps )
    graphics.off()
  invisible()
}
chipnorm <- function( xx, chip = rep( 1, length( xx )))
{
  chip <- as.factor( chip )
  chipmean <- tapply( xx, chip, mean, na.rm = TRUE )
  print( chipmean )
  for( i in levels( chip )) {
    tmp <- chip == i
    xx[tmp] <- xx[tmp] - chipmean[i]
  }
  list( xx = xx, mean = chipmean )
}
#####################################################################
##
## $Id: density.R,v 1.1 2003/11/25 14:50:35 jgentry Exp $
##
##     Copyright (C) 2000 Brian S. Yandell
##
## This program is free software; you can redistribute it and/or modify it
## under the terms of the GNU General Public License as published by the
## Free Software Foundation; either version 2, or (at your option) any
## later version.
##
## These functions are distributed in the hope that they will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
##
## The text of the GNU General Public License, version 2, is available
## as http://www.gnu.org/copyleft or by writing to the Free Software
## Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
##
###############################################################################
sixden <- function( x, y, align=FALSE, crit = 5, xlim=range(x-y, na.rm =TRUE),
                   dolog = TRUE, dif = x - y, ave = (x + y) / 2 )
{
  if( dolog ) {
    x <- log( x )
    y <- log( y )
    x[is.na(x)] <- 0
    y[is.na(y)] <- 0
  }
  par(mfrow=c(3,2))
  lx6 <- ceiling( length( dif ) / 6 )
  ranks <- order( order( ave ))
  for(i in seq( 0, length(dif), by = lx6)) {
    tmp <- ranks > i & ranks < i + lx6 + 1
    tmpd <- dif[tmp]
    tmpd <- tmpd[!is.na(tmpd)]
    ##		hist(tmpd, nc=200,xlab="log diff",freq=FALSE,xlim=xlim,
    tmpdd <- density( tmpd )
		
    if( align )
      plot( tmpdd$x, tmpdd$y, type="l",xlab="log diff", xlim=xlim,
           ylab="relative frequency")
    else
      plot( tmpdd$x, tmpdd$y, type="l",xlab="log diff", #xlim=xlim,
           ylab="relative frequency")
    title( main=paste( sum(tmp), "genes from",
             round(ave[ranks==i+1],2),
             "to", round(ave[ranks==i+lx6],2) ))
    tmpd <- tmpd[abs(tmpd-mean(tmpd,na.rm=TRUE))<5*sqrt(var(tmpd,na.rm=TRUE))]
    tmps <- c(mean(tmpd,na.rm=TRUE), sqrt(var(tmpd,na.rm=TRUE)))
    print(c(i,tmps))
    lines( sort(tmpd), dnorm(sort(tmpd),tmps[1],tmps[2] ),
          col="blue", lty = 2 )
    lines( rep( tmps[1],2),
          c(0,dnorm(tmps[1],tmps[1], tmps[2])),
          col = "red", lty = 2  )
    lines( rep( tmps[1]+tmps[2],2),
          c(0,dnorm(tmps[1]+tmps[2],tmps[1], tmps[2])),
          col = "red", lty = 2  )
    lines( rep( tmps[1]-tmps[2],2),
          c(0,dnorm(tmps[1]-tmps[2],tmps[1], tmps[2])),
          col = "red", lty = 2  )
  }
  par( mfrow = c(1,1))
}
denlines <- function( x, y, align=FALSE, crit = 5, xlim=range(x-y, na.rm =TRUE),
                     ylim=c(0,2.5), dolog = TRUE, dif = x - y, ave = (x + y) / 2,
                     numlines = 6, offset = 0 )
{
  if( dolog ) {
    x <- log( x )
    y <- log( y )
    x[is.na(x)] <- 0
    y[is.na(y)] <- 0
  }
  lx6 <- ceiling( length( dif ) / numlines )
  ranks <- order( order( ave ))
  plot( xlim, ylim, type="n",xlab="log diff",
       ylab="relative frequency")
  bump <- 0
  for(i in seq( 0, length(dif), by = lx6))
    {
      tmp <- ranks > i & ranks < i + lx6 + 1
      tmpd <- dif[tmp]
      tmpd <- tmpd[!is.na(tmpd)]
      tmpdd <- density( tmpd )
      bump <- bump + offset
      lines( tmpdd$x, tmpdd$y + bump)
    }
}
dencont <- function( x, y, align=FALSE, crit = 5, xlim=range(x-y, na.rm =TRUE),
                    ylim=c(0,2.5), dolog = TRUE, byranks = TRUE,
                    dif = x - y, ave = (x + y) / 2,
                    numlines = round( length( ave ) / 200 ),
                    levels.z = pretty( range( z ), 10 ))
{
  if( dolog ) {
    x <- log10( x )
    y <- log10( y )
    x[is.na(x)] <- 0
    y[is.na(y)] <- 0
  }
  lx6 <- ceiling( length( dif ) / numlines )
  dif <- dif[ order( ave )]
  ave <- sort( ave )
  rdif <- range( dif, na.rm = TRUE )
  ranks <- seq( length( ave ))

  z <- matrix( 0, 512, numlines )
  splits <- seq( 0, length(dif), by = lx6)
  j <- 0
  for(i in splits ) {
    tmp <- ranks > i & ranks < i + lx6 + 1
    tmpd <- dif[tmp]
    tmpd <- tmpd[!is.na(tmpd)]
    tmpdd <- density( tmpd, from = rdif[1], to = rdif[2] )
    j <- j + 1
    z[,j] <- 100 * tmpdd$y
  }
  if( byranks ) {
    tmpar <- par( yaxt = "n" )
    ytmp <- seq( 1, length( ave ), len = numlines )
  }
  else
    ytmp <- seq( min( ave, na.rm = TRUE ), max( ave, na.rm = TRUE ),
                len = numlines )
  xtmp <- seq( rdif[1], rdif[2], len = 512 )
  contour( xtmp, ytmp, z, levels = levels.z )
  if( byranks ) {
    par( yaxt = "s" )
    axis( 2, at = 1 + splits, labels = FALSE )
    tmp <- c( 1 + splits[ seq( 1, length( splits ),
                           by = floor( length( splits ) / 5 )) ],
             length( ave ))
    axis( 2, at = tmp, labels = round( ave[tmp], 2 ))
  }
  lines( apply( z, 2, function(z,x)x[z==max(z)][1], xtmp ),
        ytmp, lty = 2, col = "blue" ) 
  invisible( levels.z )
}
dencum <- function( x, y, align=FALSE, crit = 5, xlim=xlims,
                   ylim = ylims, dolog = TRUE, byranks = TRUE, standardize = FALSE,
                   dif = x - y, ave = (x + y) / 2, splineit = FALSE,
                   numlines = round( length( ave ) / 200 ), show = xlim,
                   levels.z = c(1,5,10,25,50,75,90,95,99))
{
  if( dolog ) {
    x <- log10( x )
    y <- log10( y )
    x[is.na(x)] <- 0
    y[is.na(y)] <- 0
  }
  probe <- seq( length( dif ))
  isna <- is.na( dif ) | is.na( ave )
  dif <- dif[!isna]
  ave <- ave[!isna]
  probe <- probe[!isna]

  lx6 <- ceiling( length( dif ) / numlines )
  dif <- dif[ order( ave )]
  probe <- probe[ order( ave )]
  ave <- sort( ave )
  rdif <- range( dif[ dif < Inf & dif > -Inf ], na.rm = TRUE )
  if( standardize )
    xlims <- c(-5,5)
  else
    xlims <- rdif
  ranks <- seq( length( ave ))

  cat( "Data divided into", numlines, "groups to determine quantiles\n" )
               
  z <- matrix( 0, length(levels.z), numlines )
  dimnames( z ) <- list( as.character( levels.z ), NULL )
  if( byranks ) {
    tmpar <- par( yaxt = "n" )
    ylims <- range( ranks )
  }
  else
    ylims <- range(ave, na.rm = TRUE)
  plot( xlim, ylim, type="n",xlab="log diff",
       ylab="log ave")

  splits <- seq( 0, length(dif), by = lx6)
  if( byranks ) {
    par( yaxt = "s" )
    axis( 2, at = 1 + splits, labels = FALSE )
    tmp <- c( 1 + splits[ seq( 1, length( splits ),
                           by = floor( length( splits ) / 5 )) ],
             length( ave ))
    axis( 2, at = tmp, labels = round( ave[tmp], 2 ))
  }
  nz <- nrow( z )
  if( byranks )
    ytmp <- seq( 1, length( ave ), len = 2 * numlines + 1 )
  else
    ytmp <- seq( min( ave, na.rm = TRUE ), max( ave, na.rm = TRUE ),
                len = 2 * numlines + 1 )

  showvals <- NULL
  j <- 0
  for( i in splits ) {
    tmp <- ranks > i & ranks < i + lx6 + 1
    tmp[tmp] <- !is.na( dif[tmp] )
    tmpd <- sort( dif[tmp] )
    j <- j + 1
    if( !byranks ) {
      ytmp[2*j] <- mean( ave[tmp] )
      ytmp[2*j+1] <- max( ave[tmp] )
    }
    z[,j] <- tmpd[ round( 0.01 * levels.z * length( tmpd )) ]
    if( standardize ) {
      mtmp <- 0; vtmp <- 1
      tmp2 <- tmp
      tmp2[tmp] <- dif[tmp] >= z[1,j] & dif[tmp] <= z[nz,j]
      if( any( tmp2 )) {
        mtmp <- mean( dif[tmp2] )
        vtmp <- var( dif[tmp2] )
        if( vtmp == 0 | is.na( vtmp ))
          vtmp <- 1
        else
          vtmp <- sqrt( vtmp )
      }
    }
    tmp[tmp] <- dif[tmp] < z[1,j] | dif[tmp] > z[nz,j]
    diftmp <- dif[tmp]
    if( standardize ) {
      z[,j] <- ( z[,j] - mtmp ) / vtmp
      if( length( diftmp )) {
        diftmp <- ( diftmp - mtmp ) / vtmp
        mtmp <-  diftmp > show[2] | diftmp < show[1]
        if( any( mtmp )) {
          showvals <- rbind( showvals, cbind( group = rep( j, sum( mtmp )),
            probe = ( probe[tmp] )[mtmp],
            ave = ( ave[tmp] )[mtmp],
            diff = diftmp[mtmp] ))
          diftmp[mtmp] <- pmin( xlim[2], pmax( xlim[1], diftmp[mtmp] ))
        }
      }
    }
    if( byranks )
      points( diftmp, ranks[tmp], cex = 0.5, col = "blue" )
    else
      points( diftmp, ave[tmp], cex = 0.5, col = "blue" )
  }
  if( length( showvals )) {
    cat( "Extreme values by group:\n")
    row.names( showvals ) <- seq( nrow( showvals ))
    print( showvals )
  }
  for( i in as.character( levels.z )) {
    tmpd <- c(rbind(z[i,],z[i,]))
    tmpd <- c(tmpd[1],(tmpd[-1]+tmpd[-2*numlines])/2,
              tmpd[2*numlines])
    lines( tmpd, ytmp )
    if( splineit ) {
      tmp <- spline( ytmp, tmpd )
      lines( tmp$y, tmp$x )
    }
    text( z[i,c(1,numlines)], ytmp[c(1,1+2*numlines)], i, cex = 0.5 )
  }
  invisible( showvals )
}
#####################################################################
##
## $Id: newton.R,v 1.1 2003/11/25 14:50:35 jgentry Exp $
##
##     Copyright (C) 1999, 2000 Michael A. Newton.
##
## This program is free software; you can redistribute it and/or modify it
## under the terms of the GNU General Public License as published by the
## Free Software Foundation; either version 2, or (at your option) any
## later version.
##
## These functions are distributed in the hope that they will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
##
## The text of the GNU General Public License, version 2, is available
## as http://www.gnu.org/copyleft or by writing to the Free Software
## Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
##
###############################################################################
##
##Code in ftp.biostat.wisc.edu/pub/newton/Arrays/code/
## was used to implement the calculations reported
##in Newton et al. 1999. On differential variability of expression ratios:
##Improving statistical inference about gene expression changes from 
##microarray data.  Submitted to J. Comp. Biol., 11/1999.
##
##See www.stat.wisc.edu/~newton/ for further information.
##
##The files all use data read in by code in the `read' directory.
##At the moment, the raw data files are unavailable. To implement
##calculations, simply make sure that intensity measurements get read
##into two vectors, ``xx'' and ``yy'' of length equal to the number of
##spots on the microarray.
##
##Briefly, the files in this directory do the following:
##
##oddsplot        plots the odds of change, Fig. 4
##                  (uses fits from em.ggb)
##
##em.ggb              fits the Gamma-Gamma=Bernoulli model via EM
##
##pmarg           computes the profile loglikelihood for p
##
##fitgg           fits the Gamma-Gamma model to one array
##
##rankgene            compares the ranking of genes by the naive procedure
##                  and the empirical bayes procedure; uses fits stored
##                  in ../results/fits.gg (and makes Fig. 2)
##
##datplot         creates scatterplots of intensity measurements
##
##shrinkplot      plots Fig. 1, showing shrinkage
##
##s.check0          compares marginal histograms to fitted margins (Fig. 5)
##
##s.check1          diagnostic check (Fig. 6)
##
##s.check2          looks at some of the changed genes
##
#######################################################################
do.oddsplot <- function(data,
                        main = substitute( data ),
                        theta = c(2,2,2,.4),
                        col = NULL,
                        xlab = conditions[1], ylab = conditions[2],
                        redo = missing( theta ),
                        conditions = c("Cy3","Cy5"),
                        identifier = "identifier", ... ) {
  if( redo )
    theta <- em.ggb(data[[conditions[1]]], data[[conditions[2]]],
                    theta, theta[1:3], print = TRUE )
  lod <- oddsplot( data[[conditions[1]]], data[[conditions[2]]], theta,
                  xlab = xlab, ylab = ylab,
                  main = main, col = col, ... )
  if( ncol( data ) > 2 )
    probes <- data[[identifier]]
  else
    probes <- seq( nrow( data ))
  probes <- lodprobes( data[[conditions[1]]], data[[conditions[2]]], theta,
                      lod, probes, col = col )
  print( probes )
  invisible( list( theta = theta, lod = lod, probes = probes ))
}
#######################################################################
normal.richmond <- function( foo = read.table( "../data/mn2.csv", header = TRUE,
                               sep = "," ),
                            channel = "BP109CH" )
{
  ## Normalize to average intensity first using Richmond et al.

  nspot <- nrow(foo)

  ## Background adjustment using channel (very simple)
  x <- foo[[ paste( channel, "1I", sep = "" ) ]] -
    foo[[ paste( channel, "1B", sep = "" ) ]]
  y <- foo[[ paste( channel, "2I", sep = "" ) ]] -
    foo[[ paste( channel, "2B", sep = "" ) ]]

  ## Normalization
  ## Rescale to help with underflow problem 10^5 (does not affect shape params)
  x <- 100000 * x / sum( x[x>0] )
  y <- 100000 * y / sum( y[y>0] )

  ok <- x>0 & y>0
  list(xx = x[ok], yy = y[ok] )
}
#######################################################################
chen.poly <- function(cv,err=.01)
{
  ## part of table 2 from Chen et al
  bar <- rbind( c(.979, -2.706, 2.911, -2.805 ),
               c(.989, 3.082, -2.83, 28.64),
               c(.9968, -3.496,4.462, -5.002),
               c( .9648,4.810,-15.161,78.349) )
  if( err==.05 ) {
    coef <- bar[1,]
    tmp1 <- cv^3*coef[4] + cv^2*coef[3]+cv*coef[2] + coef[1]
    coef <- bar[2,]
    tmp2 <- cv^3*coef[4] + cv^2*coef[3]+cv*coef[2] + coef[1]
  }
  if( err==.01 ) {
    coef <- bar[3,]
    tmp1 <- cv^3*coef[4] + cv^2*coef[3]+cv*coef[2] + coef[1]
    coef <- bar[4,]
    tmp2 <- cv^3*coef[4] + cv^2*coef[3]+cv*coef[2] + coef[1]
  }
  c(tmp1,tmp2)
}
#######################################################################
fitgg <- function( xx, yy, start = c(10,1,1) )
## green in xx, red in yy
{
  ## Fits the Gamma-Gamma model 

  bar <- nlminb( start=start, objective=nloglik, lower=c(1,0,0),
		xx = xx, yy = yy )
  fits <- c( bar$par, length(xx) )
  names( fits ) <- c("aa","a0","nu","n")
  fits
}
#######################################################################
# following uses R's nlm() as surrogate for Splus's nlminb()
# comment out or delete when used in Splus
# See notes on nloglik and nploglik below when used in R
#######################################################################
nlminb <- function( start=c(10,1,1), objective, lower=c(1,0,0), xx, yy, zz,
                   use.optim = FALSE )
{
  ## kludge to make xx, yy and zz global to nloglik or nploglik
  if( !missing( xx ))
    assign( ".fit.xx", xx, pos = 1 )
  if( !missing( yy ))
    assign( ".fit.yy", yy, pos = 1 )
  if( !missing( zz ))
    assign( ".fit.zz", zz, pos = 1 )

  ## also has optim which does
  if( use.optim )
    theta <- optim( start, objective, lower = lower,
                   method = "L-BFGS-B" )$par
  else {    
    ## R has routine nlm, which does not take care of bounds
    ## so we just redefine parameters
    if( !missing( lower )) {
      for( i in seq( length( start ))) {
        if( lower[i] == 0 )
          start[i] <- log( start[i] )
        if( lower[i] == 1 )
          start[i] <- log( log( start[i] ))
      }
    }
    theta <- nlm( objective, start )$estimate

    ## and now we backtransform the parameters
    if( !missing( lower )) {
      for( i in seq( length( start ))) {
        if( lower[i] == 0 )
          theta[i] <- exp( theta[i] )
        if( lower[i] == 1 )
          theta[i] <- exp( exp( theta[i] ))
      }
    }
  }
  theta
}
#######################################################################
lod.ggb <- function(x,y,theta)
{
  ## Log_(10) posterior odds
  ## x = channel 1 intensity
  ## y = channel 2 intensity

  ## theta = (aa,a0,nu,pp)
  aa <- theta[1]; a0 <- theta[2]
  z0 <- y0 <- x0 <- theta[3]
  pp <- theta[4]
  tmp <- log( pp ) - log(1-pp) +
    a0*( log(x0) + log(y0) - log(z0) ) +
      (2*aa+a0)*log(x+y+z0) -
        (aa+a0)*( log(x+x0) + log(y+y0) ) +
          2*lgamma(aa+a0) - lgamma(a0) - lgamma(2*aa+a0)
  tmp / 2.3
}
#######################################################################
loglik <- function(theta,xx,yy)
{
  ## Returns loglikelihood for observed data
  ## xx,yy are intensities in the two channels

  ## theta=(aa,a0,nu,p)
  aa <- theta[1]; a0<-theta[2]; nu<-theta[3]

  n <- length(xx)

  ## p_0(r,g) (with common factor (rg)^(a-1) removed

  lp0 <- lgamma(2*aa+a0) + a0*log(nu) - 2*lgamma(aa) - lgamma(a0) -
    (2*aa+a0)*log( xx+yy+nu )

  ## p_a(r,g)
  lpa <-  2*(lgamma(aa+a0)-lgamma(aa)-lgamma(a0)) +
    + 2*a0*log(nu) - (aa+a0)*log( (xx+nu)*(yy+nu) ) 

  ll <- (aa-1)*log(xx*yy) + log( theta[4]*exp(lpa) +
                                (1-theta[4])*exp(lp0) )
  return(sum(ll))
}
#######################################################################
nloglik <- function( theta, xx = .fit.xx, yy = .fit.yy )
{
  ## theta=(log(log(aa)),log(a0),log(nu))
  ## uncomment the following two lines if used in R
  theta <- exp( theta )
  theta[1] <- exp( theta[1] )
  aa <- theta[1]; a0<-theta[2]; x0<-theta[3]; y0<- theta[3]

  n <- length(xx)

  ll <- 2*n * ( lgamma(aa+a0) - lgamma(aa) - lgamma(a0) )
  ll <- ll + n*a0 * ( log(x0)+log(y0) ) + (aa-1) * sum( log(xx)+log(yy) )
  (aa+a0) * sum( log(x0+xx) + log(y0+yy) ) - ll
}
#######################################################################
nploglik <- function( theta, xx= .fit.xx, yy = .fit.yy, zz = .fit.zz )
{
  ## xx,yy are intensities in the two channels; zz=P(b!=c|xx,yy)
  ## (I'll separately optimize pp=P(zz=1); hence npl.. for partial loglik

  ## theta=(log(log(aa)),log(a0),log(nu))
  ## uncomment the following two lines if used in R
  theta <- exp( theta )
  theta[1] <- exp( theta[1] )

  aa <- theta[1]; a0<-theta[2]; x0<-theta[3]; y0<- theta[3];
  z0 <- theta[3]
  n <- length(xx)

  ## Complete data loglikelihood
  sumzz <- sum( zz )
  lgaa <- lgamma( aa )
  lga0 <- lgamma( a0 )
  ll <- (aa-1) * sum( log(xx) + log(yy) ) +
    sumzz * 2 * ( lgamma(aa+a0) - lgaa - lga0 ) +
      sumzz*a0*(log(x0)+log(y0)) +
        (n-sumzz) * ( lgamma(2*aa+a0) - 2 * lgaa - lga0 ) +
          (n-sumzz) * a0 * log(z0) -
            (aa+a0) * sum( zz * ( log(x0+xx) + log(y0+yy) ) ) -
              (2*aa+a0) * sum( (1-zz) * ( log(z0+xx+yy) ) )
  -ll
}
#######################################################################
rankgene <- function( xx, yy, fits = fitgg( xx, yy ))
{
  ## Look at effect on rank of the shrinkage
  ## Shrinkage factors from fits.gg
  xhat <- xx + fits[3]
  yhat <- yy + fits[3]
  eps <- runif( length(xx) ) * .00001   # randomize a bit to break ties
  
  r1a  <- rank( abs( log(xx/yy) + eps ) )  # raw ranking (most change, either way)
  r2a  <- rank( abs( log(xhat/yhat) + eps ) ) # Bayes ranking

  ## Look at top 100 genes most changed by raw ranking
  counta <- rep(NA,100)
  na <- length(r1a)
  for( i in 1:100 ) {
    ind <- (1:na)[ r1a <= i ]   # highly ranked by raw method
    counta[i] <- sum( r2a[ind] <= i )  # how many similarly ranked
  }
  list( count = counta, r1 = r1a, r2 = r2a )
}
#######################################################################
em.ggb <- function( x, y, theta = c(2,2,2,.4), start = c(2,1.2,2.7),
                 pprior = 2, printit = FALSE, tol = 1e-9, offset = 0 )
{
### Fit Gamma/Gamma/Bernoulli model (equal marginal distributions)
### 
### Model:
### spot intensities x ~ Gamma(a,b); y ~ Gamma(a,c)
### w.p. p,	b=c,		common value ~ Gamma(a0,nu)
###     w.p. 1-p,	b != c, 	values ~ Gamma(a0,nu)
###     all independent

  tmp <- x > -offset & y > -offset
  x <- x[tmp] + offset
  y <- y[tmp] + offset
  if( any( !tmp ))
    warning( paste( sum( !tmp ), "probes dropped with values below", offset ))
  rm( tmp )
  n <- length(x)
  if( pprior ) {
    ## kludge to make x and y global to nploglik
    assign( ".fit.xx", x, pos = 1 )
    assign( ".fit.yy", y, pos = 1 )
  }

  ## EM algorithm

  ## starting value
  notdone <- TRUE
  iter <- 1
  while( notdone ) {
    aa <- theta[1]; a0<-theta[2]; x0<-theta[3]; y0<- theta[3];
    z0 <- theta[3]; pp <- theta[4]

    ## E-step 
    tmp <- log( pp ) - log(1-pp) +
      a0*( log(x0) + log(y0) - log(z0) ) +
        (2*aa+a0)*log(x+y+z0) -
          (aa+a0)*( log(x+x0) + log(y+y0) ) +
            2*lgamma(aa+a0) - lgamma(a0) - lgamma(2*aa+a0)
    zz <- 1/( 1 + exp(-tmp) )

    ## M-step
    fit <- nlminb( start=start, objective=nploglik,
                  lower=c(1,0,0), zz=zz )

    ## check tolerance
    chk <- sum(( theta[1:3] - fit$parameter )^2 )
    
    ## Add a prior on pp
    theta[1:3] <- fit$parameter

    ## Beta hyperparameter for p
    if( pprior )
      theta[4] <- ( pprior + sum( zz ) ) / ( 2 * pprior + n )
    if( printit )
      print(round(theta,4) )
    iter <- iter + 1
    notdone <- (chk > tol) & (iter<100)
  } 
  theta
}
#######################################################################
pmarg <- function( xx, yy, theta = c(2.75,1.37,4.12), nsupp = 20 )
{
# This file gets a profile loglikelihood for the mixing rate p

  ## kludge to make xx and yy global to nploglik
  assign( ".fit.xx", xx, pos = 1 )
  assign( ".fit.yy", yy, pos = 1 )

  ## support for heat-shock example
  psupp <- seq( .0001, .2, length = nsupp )
  lprof <- array( NA, 5, nsupp )
  dimnames( lprof ) <- list( c( names( theta[1:3] ), "pp", "lprof" ), 1:nsupp )
  for( ii in 1:nsupp ) {
    theta[4] <- psupp[ii]

    ## evaluate profile loglikelihood
    thetas[1:4,ii] <- theta <- em.ggb( xx, yy, theta, theta[1:3], 0,
                                      printit = TRUE )
    lprof[5,ii] <- loglik( theta, xx, yy )
  }
  lprof
}
#######################################################################
s.marg <- function( xx, yy,
                   aa = 22.8, a0 = 1.08, nuA = .01, nu0 = .159, p = .064 )
{
  ## Compare empirical distribution of each color, say xx, or yy, against
  ## its fitted distribution

  ##     cy3/cy5     a    a0      nu.g   nu.r   nu     p
  ##MN1   1.27     32.9  1.33    0.011  0.016  0.233  0.033
  ##MN2a  1.27     22.8  1.08    0.010  0.014  0.159  0.064
  ##MN2b  1.30     15.1  0.84    0.009  0.008  0.174  0.050
  ##MN3a  1.64      3.9  1.90    9.15   4.12   1.29   0.212
  ##MN3b  1.60      2.5  1.93   18.2    6.38   2.36   0.343

  supp <- seq( min(x), max(x), length=500 )
  logmargA <- lgamma(aa+a0) - lgamma(aa) - lgamma(a0) +
    a0*log(nuA) + (aa-1)*log(supp) - (aa+a0)*log(supp+nuA)
  logmarg0 <- lgamma(aa+a0) - lgamma(aa) - lgamma(a0) +
    a0*log(nu0) + (aa-1)*log(supp) - (aa+a0)*log(supp+nu0)
  
  p * exp(logmargA) + (1-p) * exp(logmarg0)
}
#######################################################################
shrinkplot <- function( xx, yy, fits = s.fits( xx, yy ), chip="Control")
{
## Fits the Gamma-Gamma model (like s.five from earlier)

  xhat <- xx + fits[1]
  yhat <- yy + fits[1]
  plot( xx, yy, log="xy", pch=".", xlab="Cy3", ylab="Cy5",
       xlim=lims, ylim=lims )
  text( .01, 100, chip, cex=.8, adj=0 )
  
  mm <- length(xx)
  for( i in 1:mm )
    lines( c( xx[i], xhat[i] ), c(yy[i],yhat[i]), lwd=.2)
  invisible()
}
#######################################################################
oddsplot <- function( x, y, theta, by.level = 10,
                     rotate = FALSE, offset = 0,
                     main = "", xlab = xlabs, ylab = ylabs,
                     col = NULL, cex = c(.25,.75),
                     shrink = FALSE,
                     lims = range( c( x, y )))
{
  ## Plot odds curve for Gamma Gamma Bernoulli model

  ## truncate negative values for evaluation
  tmp <- x > -offset
  x <- x + offset
  x[!tmp] <- min( x[tmp] ) / 2
  tmpy <- y > -offset
  y <- y + offset
  y[!tmpy] <- min( y[tmpy] ) / 2
  tmp <- !( tmp & tmpy )
  if( any( tmp ))
    warning( paste( sum( tmp ), "probes truncated to", offset )) 
  rm( tmp )

  logbf <- lod.ggb(x,y,theta=theta)

  if( shrink ) {
    x <- x + theta[3]
    y <- y + theta[3]
  }
  ylabs <- "Cy5"
  xlabs <- "Cy3"  
  if( rotate ) {
    tmp <- sqrt( x * y )
    y <- y / x
    x <- tmp
    rm( tmp )
    ylabs <- paste( ylabs, xlabs, sep = " / " )
    xlabs <- "Average Intensity"
    if( missing( lims )) {
      xlim <- range( x )
      ylim <- range( y )
    }
  }
  else {
    xlim <- ylim <- lims
  }
  par( pty = "s" )

  plot( x[1], y[1], log="xy", xlab=xlab, ylab=ylab, xlim=xlim,
       ylim=ylim, type="n" )
  title( main )
  ##  usr <- par( "usr" )
  ##  text( 10^( usr[1]+ strwidth("abc") ), 10^((usr[3]+3*usr[4])/4), main,
  ##     cex=.8, adj=0 )
	
  ## report points with LOD > 0
  tmp <- logbf >= 0
    if( missing( col ) | is.null( col )) {
    col <- rep( "black", length( x ))
    col[tmp] <- "blue"
  }
  if( length( col ) != length( x ) & length( col ) != 1 )
    col <- col[1]
  for( i in unique( col )) {
    coli <- ( i == col ) & tmp
    if( any( coli ))
      points( x[coli], y[coli], cex=cex[2], col=i )
    coli <- ( i == col ) & !tmp
    if( any( coli ))
      points( x[coli], y[coli], cex=cex[1], col=i )
  }

  ## contour lines
  if( rotate ) {
    abline( h = 1, lty = 2, col = "red" )
    assign( "rlod.ggb", function( z, w, theta ) {
      w <- sqrt( w )
      lod.ggb( z / w, z * w, theta ) } )
    fun <- "rlod.ggb"
  }
  else {
    abline( 0, 1, lty = 2, col = "red" )
    fun <- "lod.ggb"
  }
  vec <- seq( log10( lims[1] ), log10( lims[2] ), length = 100 )

  bf <- if( shrink )
    outer( 10^vec - theta[3], 10^vec - theta[3], fun, theta = theta )
  else
    outer( 10^vec, 10^vec, fun, theta = theta)
  
  ## filled.contour(10^vec,10^vec,bf,levels=c(0,5),col=c("lightgray","white"),
  ##   save=TRUE, plotit=TRUE, add=TRUE, labex=0, lwd=2 )
  
  ## contours at 0,1,2 LOD
  contour(10^vec,10^vec,bf,levels=0,
          save=TRUE, plotit=TRUE, add=TRUE, labex=0, lwd=1, col = "red", lty = 3 )
  if( max( logbf ) >= 1 & by.level > 0 )
    contour(10^vec,10^vec,bf,
            levels=seq(0,floor(max(logbf)),by=log10(by.level))[-1],
            save=TRUE, plotit=TRUE, add=TRUE, labex=0, lwd=1, lty = 3 )
  
  ## box()
  ## tt <- x/y
  ## chat <- sqrt( mean( (tt-1)^2/(1+tt^2) ) )
  ## tmp01 <- chen.poly(chat,err=.01)
  ## abline( -log(tmp01[1]), 1, lty=2, lwd=1.5, err=(-1) )
  ## abline( -log(tmp01[2]), 1, lty=2, lwd=1.5, err=(-1) )
  
  invisible( logbf )
}
#######################################################################
lodprobes <- function( xx, yy, theta, lod, probes, col = 1, lowlod = 0,
                      offset = 0 )
{
  tmp <- xx > -offset & yy > -offset
  xx <- xx[tmp] + offset
  yy <- yy[tmp] + offset
  if( any( !tmp ))
    warning( paste( sum( !tmp ), "probes dropped with values below", offset ))
  rm( tmp )

  tmpc <- lod >= lowlod
  lod.order <- order( -lod[tmpc] )
  
  ## everything is ordered by LOD score

  ## probe names
  add.probes <- as.character( probes[tmpc] )
  add.probes <- add.probes[lod.order]
  
  ## LOD score
  lod.probes <- data.frame( probe = add.probes,
                           LOD = -sort(-lod)[seq(length(add.probes))] )

  ## ratio of xx to yy
  lod.probes$ratio <- c(( xx[tmpc] + theta[3] ) /
                        ( yy[tmpc] + theta[3] ))[lod.order]

  ## signed LOD score
  lod.probes$LOD[lod.probes$ratio<1] <- -lod.probes$LOD[lod.probes$ratio<1]

  ## inverse ratio
  lod.probes$inverse <- 1 / lod.probes$ratio

  ## round off numbers to 3 decimal places
  lod.probes[,-1] <- round(lod.probes[,-1],3)

  ## colors from plot
  if( length( col ) == length( xx ))
    lod.probes$col <- c(col[tmpc])[lod.order]

  lod.probes
}
#######################################################################
s.check0 <- function( xx, yy, theta1, theta2, chip = "Control" )
{
  lims <- c(.0065,1208)

  ## work it on the natural log scale
  supp <- seq( log(lims[1]), log(lims[2]), length=100 )

  lden <- function(x,aa,a0,nu) {
    ## returns log density of natural log of intensity
    lgamma(aa+a0) - lgamma(aa) - lgamma(a0) +
      a0*log(nu) + (aa-1)*log( exp(x) ) - 
        (aa+a0)*log( exp(x) +nu) + x 
  }

  hist( log( c(xx,yy) ), 50, prob=TRUE, ylim=c(0,.46), xlab="",ylab="",
       xlim=c(-5,8), cex=.9 )

  aa <- theta1[1]; a0 <- theta1[2]; nu <- theta1[3]
  logmarg <-  lden( supp, aa, a0, nu )
  lines( supp, exp(logmarg), lty=1 , lwd=2)

  aa <- theta2[1]; a0 <- theta2[2]; nu <- theta2[3]
  logmarg <-  lden( supp, aa, a0, nu )
  lines( supp, exp(logmarg), lty=2 , lwd=2)

  text( -5, .35, adj=0, chip, cex=.8 )
  invisible()
}
#######################################################################
s.check1 <- function( xx, yy, theta, chip = "Control" )
{
  ## Check the fit of the Gamma-Gamma-Bernoulli model by
  ## looking at (R-G)/(R+G) for spots deemed to not change.

  supp <- seq(.001,.999,length=150)

  logbf <- lod(xx,yy,theta=theta)
  ind <- (logbf < 0 )
  xx <- xx[ind]
  yy <- yy[ind]
  stat <- .5*( (xx-yy)/(xx+yy) + 1 )
  hist(stat,50,prob=TRUE,ylim=c(0,6.7), cex=.9 )

  den <- dbeta(supp,theta[1],theta[1])
  lines(supp,den,lwd=2)

  text(.1,5,chip,adj=0,cex=.8)
  invisible()
}
#######################################################################
s.check2 <- function( foo, xa, ya, thetaa, xb, yb, thetab,
                     spots = dimnames( foo)[[1]] )
{
  ## Look at the genes with high LOD compared to predicted changes
  ##from Craig's paper 

  fix.spots <- function( x, y, theta, spots ) {
    ## fix the edges
    x[x<0] <- 0
    y[y<0] <- 0
    bfa <- lod.ggb(x,y,theta=theta)

    ## skim the top
    inda <- (1:4290)[bfa>0]
    tmpa <- bfa[inda]
    orda <- order( -tmpa )
    ( spots[inda] )[orda]
  }
  spa <- fix.spots( xa, ya, thetaa, spots )
  spb <- fix.spots( xb, yb, thetab, spots )
  
  blah <- outer( spa, spb, "==" )
  bar <- apply(blah,1,any)  # the longer dimension
  spa[bar]
}

#######################################################################
## ftp://ftp.biostat.wisc.edu/pub/newton/npvolume/s.tack
## Beckett Diaconis Tack Data
## nsuccess <- c( rep(1,3), rep(2,13), rep(3,18), rep(4,48), 
##    rep(5,47), rep(6,67), rep(7,54), rep(8,51), rep(9,19) )
## ntrials <- 9; N <- length( nsuccess )
##   ## Binomial likelihood
## 	db2 <- function(y,prob,n){return(dbinom(y,n,prob))}
## 	lik <-  outer(nsuccess,grid,FUN="db2",n=ntrials) 
## gg = dbeta(grid,shape1=.5,shape2=.5),
gammaden <- function( x, a, b )
{
  b^a * x^(a-1) * exp( -x*b ) / gamma( a )
}
## recurbayes <- function( x, theta, domain = c(.01,.99), ngrid = 100
##   grid = seq(domain[1],domain[2],length=ngrid),
##   lik, gg = gammaden( exp(grid), theta[1], theta[3] ),
##   xlab="tack success probability", 
##   ylab="posterior predictive density" )
## {
## ## grid = support of mixing distribution 
## ## gg = prior guess
## ## lik = likelihood for data
##
##   N <- length( x )
##   alpha <- 1/3 
##
## ## A weight sequence
##   weight <- 1/sqrt((alpha+1)*(alpha+1:N))
##
## ## Process tacks in random order
##   ord <- sample( 1:N )
##   delta <- grid[2]-grid[1]
##
## ## Recursion yields approximate Bayes estimate gg
##   for( i in 1:N ) {
##     post <- lik[ord[i],]*gg
##     post <- ( post/sum(post) )/delta
##     gg <- gg*( 1-weight[i] ) + weight[i]*post 
##   }
## ## Good idea to repeat loop to see variation over orderings.
##
## ## Estimated predictive density
##   plot( grid, gg, type="l", xlab=xlab, ylab=ylab )
##
##   invisible( gg )
## }
#######################################################################
## This code uses a nonparametric Bayesian predictive recursion to
## estimate the mixing distribution of scale parameters for Gamma
## distributed array data.  See Newton and Zhang (1999) Biometrika, 86,
## 15-26 for more about this recursive algorithm, or go to
## www.stat.wisc.edu/~newton/
##
## The purpose of this calculation is to diagnose inadequacies of
## the Gamma mixing assumption in the Gamma-Gamma gene expression model.
########################################################################
predrecur <- function( xx, theta = c(32.9,1.33,0.01), gridlim = c(.0001,1) )
{
## This file takes in one set of array measurements in the vector xx
  N <- length( xx )

## theta: Observation component of the model is treated as known
## Gamma(aa,theta)
## Take as a prior guess of the mixing distribution for theta a Gamma(a0,nu)
## as estimated from the G-G model

## upp: Support of mixing distribution for random effects theta
## plug in here an upper support limit for the mixing distribution
  grid <- seq( gridlim[1], gridlim[2], length = 150 )
  delta <- grid[2] - grid[1]

## Gamma prior guess
  gg <- dgamma( grid, shape = theta[2], scale = ( 1 / theta[3] ))
  alpha <- 1
  g0 <- gg

## Gamma likelihood
  dg2 <- function( y, theta, shape )
    dgamma( y, shape = shape, scale = ( 1 / theta ))
  lik <-  outer( xx, grid, FUN = "dg2", shape = theta[1] )

## Recursion yields approximate Bayes estimate gg
  weight <- 1/sqrt( ( alpha + 1 ) * ( alpha + 1:N )) # A weight sequence
  ord <- sample( 1:N )    # Process genes in random order
  for( i in 1:N ) {
    post <- lik[ord[i],] * gg
    post <- ( post / sum(post) ) / delta
    gg <- gg * ( 1 - weight[i] ) + weight[i] * post 
  }
  plot( grid, gg, type="l", xlab="scale", 
       ylab="posterior predictive density" )
  lines( grid, g0, lty=2 )   # prior guess
  invisible( gg )
}
## (Repeat loop to see variation over orderings.)
## (We recommend averaging over a half dozen or so orderings---)

## Plot the estimated predictive density for the scale parameter theta
## of a future spot.


##########################################################################

###########################################################################
##
## s.npdiag: S code to do a nonparametric mixing diagnostic for microarrays
##
##     Copyright (C) 2000 Michael A. Newton.
##
## This program is free software; you can redistribute it and/or modify it
## under the terms of the GNU General Public License as published by the
## Free Software Foundation; either version 2, or (at your option) any
## later version.
##
## These functions are distributed in the hope that they will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
##
## The text of the GNU General Public License, version 2, is available
## as http://www.gnu.org/copyleft or by writing to the Free Software
## Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
##
###############################################################################
#
# This Splus/R code uses a nonparametric Bayesian predictive recursion to
# estimate the mixing distribution of scale parameters for Gamma
# distributed array data.  
#
# The pupose of this calculation is to diagnose inadequacies of
# the Gamma mixing assumption in the Gamma-Gamma-Bernoulli
# gene expression model.  
#
# See Newton, Kendziorski, Richmond, Blattner, and Tsui 1999,
# http://www.stat.wisc.edu/~newton/papers/abstracts/btr139a.html
# where the hierarchical parametric models for gene expression data
# are presented.
#
# See Newton and Zhang (1999) Biometrika, 86, 15-26 for more about 
# the recursive algorithm used in the code below, or go to
# http://www.stat.wisc.edu/~newton/research/npb.html
#
#
########################################################################

npdiag <- function( xx, yy, aa, a0, nu, pp ) {
                                        # xx,yy raw expression measurement
                                        # vectors of equal length
                                        # Parameters of the fitted GGB model:
                                        # aa shape, observation component
                                        # a0 shape, inner component
                                        # nu scale, inner component
                                        # pp proportion of changed genes

  N <- length(xx)

                                        # grid range to approximate mixing
  ends <- log( (1/nu)*qgamma( c(.001,.999), shape=a0 ) )

                                        # Support of mixing distribution 
  grid <- seq(ends[1],ends[2],length=350)
  delta <- grid[2]-grid[1]

                                        # log-Gamma prior guess
  dgam <- function(y,shape,scale){ return( scale*dgamma(y*scale,shape) ) }
  g0 <- exp(grid)*dgam(exp(grid),shape=a0,scale=nu )
  gg <- g0

                                        # Gamma likelihood with logged scale
  dg2 <- function( y,theta,shape ) {
    return( dgam( y, shape = shape, scale = exp( theta )))
  }

                                        # Recursion yields approximate
                                        # Bayes estimate gg
  alpha <- 1
  weight <- 1/sqrt((alpha+1)*(alpha+1:N)) # A weight sequence

  ord <- sample( 1:N )    # Process genes in random order
  for( i in 1:N ) {
    j <- ord[i]
    z <- c( xx[j], yy[j] )
                                        # Joint prob( data_j | mixing parameter )
    lik <-  z*outer(z,grid,FUN="dg2",shape=aa) 
    tmp <- c( lik[1,] * lik[2,] )
    post <- tmp*gg
    post <- ( post/sum(post) )/delta
    gg <- gg*( 1-weight[i] ) + weight[i]*post 
    print(i)
  }
                                        # Repeat loop to see variation over orderings.


                                        # Take a look at the results

  plot( grid, gg, type="l" )   # nonparametric estimate
  lines( grid, g0, lty=2 )     # parametric estimate/prior guess
}

#####################################################################
##
## $Id: twoarray.R,v 1.1 2003/11/25 14:50:35 jgentry Exp $
##
##     Copyright (C) 2001 Brian S. Yandell
##
## This program is free software; you can redistribute it and/or modify it
## under the terms of the GNU General Public License as published by the
## Free Software Foundation; either version 2, or (at your option) any
## later version.
##
## These functions are distributed in the hope that they will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
##
## The text of the GNU General Public License, version 2, is available
## as http://www.gnu.org/copyleft or by writing to the Free Software
## Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
##
###############################################################################
##
# Using the normalization from Richmond et al. (1999)
# File to read data for subsequent analysis
# This one normalizes to average intensity first

twoarray.norm <- function( foo, ..., conditions = c("Cy3","Cy5"),
                       reduce = FALSE, identifier = "identifier" ) {

  foonames <- list( ... )
  if( !is.null( names( foonames )) & missing( conditions ))
    conditions <- names( foonames )
  xnorm <- foonames[[1]]
  ynorm <- foonames[[2]]

  ## Background adjustment (very simple)
  x <- foo[[xnorm[1]]] - foo[[xnorm[2]]]
  y <- foo[[ynorm[1]]] - foo[[ynorm[2]]]

  ## Normalization
  ## Rescale to help with underflow problem 10^5 (does not affect shape params)
  x <- 100000 * x / sum( x[x>0] )
  y <- 100000 * y / sum( y[y>0] )

  if( reduce ) {
    ok <- x > 0 & y > 0
    l <- data.frame( foo[[1]][ok], x[ok], y[ok] )
  }
  else {
    l <- data.frame( foo[[1]], x, y )
  }
  names( l ) <- c( identifier, conditions )
  l
}

## compare oddsplot and pickgene
twoarray.plot <- function( mydata,
                          main = deparse( substitute( mydata )),
                          theta,
                          conditions = c("Cy3","Cy5"),
                          identifier = "identifier" ) {

  par(mfrow=c(2,2))

  ## oddsplot: Newton et al. (2001)
  hist( log( mydata[[conditions[1]]] )[mydata[[conditions[1]]]>0],
       breaks = 100, xlab = "Log Adjusted Condition 1")
  hist( log( mydata[[conditions[2]]] )[mydata[[conditions[2]]]>0],
       breaks = 100, xlab = "Log Adjusted Condition 2")
  plot( log( mydata[[conditions[1]]] ), log( mydata[[conditions[2]]] ),
       xlab = paste( "Log Adjusted", conditions[1] ),
       ylab = paste( "Log Adjusted", conditions[1] ), cex = 0.4 )
  title( main )
  xxx <- qnorm( rank( mydata[[conditions[1]]] ) /
               ( 1 + length( mydata[[conditions[1]]] )))
  yyy <- qnorm( rank( mydata[[conditions[2]]] ) /
               ( 1 + length( mydata[[conditions[2]]] )))

  if( !missing( theta ))
    do.oddsplot( mydata, main, rotate = TRUE, theta = theta )
  else
    do.oddsplot( mydata, main, rotate = TRUE )

  par( mfcol = c(3,2))

  ## pickgene: Lin et al. (2001)
  plot( log( mydata[[conditions[1]]] ), log( mydata[[conditions[2]]] ),
       xlab = paste( "Log Adjusted", conditions[1] ),
       ylab = paste( "Log Adjusted", conditions[2] ), cex = 0.4 )
  title( paste( "Log of", main ))
  abline( 0, 1, col = "red", lty = 2 )
  tmp <- mydata[[conditions[1]]] > 0 & mydata[[conditions[2]]] > 0
  a <- robustscale(( log( mydata[[conditions[1]]] )
                    - log( mydata[[conditions[2]]] ))[tmp],
                   ( log( mydata[[conditions[1]]] )
                    + log( mydata[[conditions[2]]] ))[tmp] )
  qqnorm(( a$y - a$center ) / a$scale, cex = 0.4 )
  abline( 0, 1, col = "red", lty = 2 )

  pickgene( mydata[,conditions], mydata[[identifier]], mfrow = NULL )
  plot(xxx, yyy,
       xlab = paste( "Rank Adjusted", conditions[1] ),
       ylab = paste( "Rank Adjusted", conditions[2] ) ,cex=0.4)
  title( paste( "Rank of", main ))
  abline( 0, 1, col = "red", lty = 2 )
  a <- robustscale( xxx-yyy, xxx+yyy )
  ## hist(( a$y - a$center ) / a$scale, breaks = 100 )
  qqnorm(( a$y - a$center ) / a$scale, cex = 0.4 )
  abline( 0, 1, col = "red", lty = 2 )
  pickgene( mydata[,conditions], mydata[[identifier]],
    rankbased = TRUE, mfrow = NULL )

}
do.oddsplot <- function(data,
                        main = substitute( data ),
                        theta = c(2,2,2,.4),
                        col = NULL,
                        redo = missing( theta ),
                        conditions = c("Cy3","Cy5"),
                        identifier = "identifier", ... ) {
  if( redo )
    theta <- em.ggb(data[[conditions[1]]], data[[conditions[2]]],
                    theta, theta[1:3], print = TRUE )
  lod <- oddsplot( data[[conditions[1]]], data[[conditions[2]]], theta,
                  xlab = conditions[1], ylab = conditions[2],
                  main = main, col = col, ... )
  if( ncol( data ) > 2 )
    probes <- data[[identifier]]
  else
    probes <- seq( nrow( data ))
  probes <- lodprobes( data[[conditions[1]]], data[[conditions[2]]], theta,
                      lod, probes, col = col )
  print( probes )
  invisible( list( theta = theta, lod = lod, probes = probes ))
}
#####################################################################
##
## $Id: yandell.R,v 1.2 2003/12/30 19:17:27 jgentry Exp $
##
##     Copyright (C) 2001 Brian S. Yandell
##
## This program is free software; you can redistribute it and/or modify it
## under the terms of the GNU General Public License as published by the
## Free Software Foundation; either version 2, or (at your option) any
## later version.
##
## These functions are distributed in the hope that they will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
##
## The text of the GNU General Public License, version 2, is available
## as http://www.gnu.org/copyleft or by writing to the Free Software
## Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
##
###############################################################################
rangene <- function (n = 10000, center = 4, spread = 2, contamination = 0.05,
    alpha = c(1,1), noise = 0.5, omega = c(20,20))
{
    contam <- round(n * contamination)
    data <- list()
    r <- rnorm(n, center, spread)
    rr <- rank(r) / ( 1 + n )
    ## contaminate preferentially genes with smaller intensities
    cc <- seq( n )[ contam >= rank( runif( n )^2 * (2 - rr )) ]
    ## half of contaminated genes upregulate
    upreg <- .5 < runif( contam )
    for (i in 1:2) {
        data[[i]] <- r
        s <- cc[upreg]
        if( length( s ))
          data[[i]][s] <- data[[i]][s] + rnorm(length(s),3-rr[s],0.25)
        upreg <- !upreg
    }
    for (i in 1:2)
        data[[i]] <- data[[i]] + rnorm(n, 0, noise)

    known <- data <- as.data.frame(data)
    for (i in 1:2) {
        data[[i]] <- alpha[i] * exp(data[[i]])
        data[[i]] <- data[[i]] + rnorm(n, 0, omega[i])
    }
    list(observed = data, true = known,contam=cc)
}
### Find top ranked genes
toprankgene <- function( yy, n = 500 )
{
  tmpy2 <- rank( apply( yy$true, 1, function(x) -abs( diff( x )) ))
  tmpy <- pickgene(yy$obs, npickgene = n )$pick[[1]]$probe

  count <- rep(NA,n)
  for( i in 1:n ) {
    count[i] <- sum( tmpy2[ tmpy[1:i] ] <= i )
  }
  count
}
### Yi Lin's original random gene generator
### Classical contamination model for robust statistics
orangene <- function( n = 10000, center = 4, spread = 2, contamination = .05,
         alpha = c(3,2), noise = 0.1, omega = c(20,30) )
{
  contam <- round( n * contamination )

  data <- list()
  r <- rnorm(n,center,spread)
  for( i in 1:2 ) {
    ## gene expression
    data[[i]] <- r

    ## contamination = differential expression
    data[[i]][seq(contam)] <- data[[i]][seq(contam)] + rnorm(contam)

    ## intrinsic noise
    data[[i]] <- data[[i]] + rnorm(n,0,noise)
  }
  known <- data <- as.data.frame( data )
  for( i in 1:2 ) {
    ## attenuation
    data[[i]] <- alpha[i] * exp( data[[i]] )

    ## measurement error
    data[[i]] <- data[[i]] + rnorm(n,0,omega[i])
  }
  list( observed = data, true = known )
}
### Organize scores from pickgene structure
pickedhist <- function( pick, show = names( pick ), title = show, p1 = .05,
                       plotit = TRUE, rotate = FALSE, mfrow = c(nr,nc), bw = NULL )
{
  pick <- pick$score
  if( is.numeric( show ))
    show <- names( pick )[show]
  else {
    if( any( is.na( match( show, names( pick )))))
      stop( paste( "show choices do not match contrasts:",
                  paste( names( pick ), collapse = ", " )))
  }
  n <- nrow( pick[[1]] )
  if( plotit ) {
    nr <- min( 2, ceiling( length( show ) / 3 ))
    nc <- min( 3, ceiling( length( show ) / nr ))
    if( !is.null( mfrow ))
      par( mfrow = mfrow, pty = "s" )
    tmplvl <- qnorm( adjustlevel( n * 2, .05 ) / 2, lower.tail
                    = FALSE)
    names( title ) <- show
    if( !is.null( bw )) {
      bw <- array( bw, length( show ))
      names( bw ) <- show
    }
    for( i in show ) {
      if( is.null( bw ))
        tmp <- density( pick[[i]]$score )
      else
        tmp <- density( pick[[i]]$score, bw = bw[i] )
      cat( "density bandwidth:", tmp$bw, "\n" )
      if( rotate )
        plot( tmp$y, tmp$x, type = "l", ylab = title[i],
             xlab = "Relative Frequency", main = "" )
      else
        plot( tmp$x, tmp$y, type = "l", xlab = title[i],
             ylab = "Relative Frequency", main = "" )
      tmpd <- dnorm( tmp$x )
      if( rotate )
        lines( tmpd, tmp$x, lty = 2, col = "blue" )
      else
        lines( tmp$x, tmpd, lty = 2, col = "blue" )
      tmphi <- 2 * tmpd < tmp$y
      xx <-  range( tmp$x[ abs( tmp$x ) < tmplvl & !tmphi] )
      if( rotate ) {
        lines( tmp$y * tmphi, tmp$x, lty = 4, col = "red" )
        abline( h = c(-1,1) * tmplvl, lty = 3, col = "red" )
        axis( 2, xx, labels = round( xx, 1 ))
        lines(( tmp$y - (1 - p1) * tmpd ) / p1, tmp$x, lty = 5, col = "purple" )
      }
      else {
        lines( tmp$x, tmp$y * tmphi, lty = 4, col = "red" )
        abline( v = c(-1,1) * tmplvl, lty = 3, col = "red" )
        axis( 1, xx, labels = round( xx, 1 ))
        lines( tmp$x, ( tmp$y - (1 - p1) * tmpd ) / p1, lty = 5, col = "purple" )
      }
    }
  }
  invisible( tmp )
}
### Organize scores from pickgene structure
pickedchisq <- function( pick, show = names( pick ),
                        title = "Squared Distance",
                        plotit = TRUE, alpha = .05 )
{
  pick <- pick$score
  if( is.numeric( show ))
    show <- names( pick )[show]
  else {
    if( any( is.na( match( show, names( pick )))))
      stop( paste( "show choices do not match contrasts:",
                  paste( names( pick ), collapse = ", " )))
  }
  n <- nrow( pick[[1]] )
  if( plotit ) {
    tmplvl <- sqrt( qchisq(( 1 - alpha ) ^ ( 1 /n ), length( show )))
    sumsq <- 0
    for( i in show )
      sumsq <- sumsq + pick[[i]]$score^2
    tmp <- density( sumsq )
    plot( tmp$x, tmp$y, type = "l", log = "x", xlab = title,
             ylab = "Relative Frequency", main = "" )
    tmpd <- dchisq( tmp$x, length( show ))
    lines( tmp$x, tmpd, col = "blue" )
    tmphi <- 2 * tmpd < tmp$y
    xx <-  max( tmp$x[ tmp$x < tmplvl & !tmphi ] )
    lines( tmp$x, tmp$y * tmphi, col = "red" )
    abline( v = tmplvl, col = "red" )
    axis( 1, xx, labels = round( xx, 1 ))
    lines( tmp$x, tmp$y - tmpd, col = "purple" )
  }
  invisible( tmp )
}
holms <- function( x, alpha = .05, cut = TRUE ) {
  n <- length( x )
  x <- sort( 2 * ( 1 - pnorm( abs( x ))))
  x[x==0] <- min( x[x>0] ) / 2
  if( cut )
    x <- x[ x <= alpha ]
  nx <- length( x )
  plot( x, log = "y", xlab = "rank order", ylab = "raw p-value" )
  axis( 2, alpha )
  abline( h = c(alpha,alpha/n), col = "red" )
  text( nx, 0.5 * ( alpha / n ), "Bonferroni", adj = 1, col = "red" )
  abline( h = 1-(1-alpha)^(1/n), col = "blue" )
  text( nx, 2 * ( 1-(1-alpha)^(1/n)), "Sidak", adj = 1, col = "blue" )
  lines( alpha / ( 1 + n - seq( x )), col = "purple" )
  text( nx, alpha / sqrt(n), "Holms", adj = 1, col = "purple" )
  abline( v = max( seq( x )[ x < alpha / n ] ), col = "blue", lty = 2  )
  abline( v = max( seq( x )[ x < alpha / ( 1 + n - seq( x )) ] ),
    col = "purple", lty = 2 )
  points( seq( x ) / ( 1 + n ), col = "grey" )
  invisible( x )
}
### Organize scores from pickgene structure
pickedscore <- function( pick, description, show = 1:2, alpha = .05,
                        xlab = show[1], ylab = show[2], main = "",
                        mfrow = c(1,1))
{
  score <- list()
  for( i in names( pick$score ))
    score[[i]] <- pick$score[[i]]$score
  score <- data.frame( score )

  show <- array( show, 2 )
  if( is.numeric( show ))
    show <- names( pick$pick )[show]
  else {
    if( any( is.na( match( show, names( pick$pick )))))
      stop( paste( "show choices do not match contrasts:",
                  paste( names( pick$pick ), collapse = ", " )))
  }
  if( !is.null( mfrow ))
    par( mfrow = mfrow, pty = "s" )

  library(MASS)
  eqscplot( score[[show[1]]], score[[show[2]]], type = "n",
           xlab = xlab, ylab = ylab )
  title( main )
  points( score[[show[1]]], score[[show[2]]], cex = .25 )
  n <- length( score[[1]] )
  tmplvl <- qnorm( adjustlevel( n * 2, alpha ) / 2, lower.tail
                  = FALSE)
  abline( h = c(-1,1)*tmplvl, v = c(-1,1) * tmplvl, col = "red" )
  tmp <- sqrt( qchisq(( 1 - alpha ) ^ ( 1 /n ), 2 ))
  x <- seq( -tmp, tmp, length = 200)
  y <- sqrt( tmp^2 - x^2 )
  lines(x,y,col="blue")
  lines(x,-y,col="blue")
  tmp <- abs( score[[show[1]]] ) > tmplvl | abs( score[[show[2]]] ) >
    tmplvl
  points( score[[show[1]]][tmp], score[[show[2]]][tmp], cex = .75, col =
         "blue" )

  probes <- character()
  for( i in names( pick$pick ))
    probes <- c( probes, as.character( pick$pick[[i]]$probe ))
  probes <- sort( unique( probes ))
  score <- score[ match( probes, pick$score[[1]]$probe, nomatch = 0 ), ]
  dimnames( score ) <- list( probes, names( score ))

  pick <- pick$pick
  fold <- pvalue <- matrix( NA, length( probes ), length( pick ),
                           dimnames = list( probes, names( pick )))
  for( i in names( pick )) {
    fold[ as.character( pick[[i]]$probe ), i ] <- pick[[i]]$fold
    pvalue[ as.character( pick[[i]]$probe ), i ] <- pick[[i]]$pvalue
  }
  ll <- list( score = score, fold = fold, pvalue = pvalue )
  ## add descriptions if provided
  if( !missing( description ))
    ll$description <- description[probes]
  ### want to add probe description to list
  ll
}
pickedpair <- function( x, columns, description,
                       probe = "Probe.Set",
                       renorm = c(sqrt(2),sqrt(6)),
                       pick = pickgene( x[,columns],
                         x[,probe], ...,
                         renorm = renorm, plotit = FALSE ),
                       main = "", ... )
{
  par(mfrow=c(2,2),pty = "s" )
  score <- pickedscore( pick, description, mfrow = NULL,
                              xlab = "Additive Effect",
                              ylab = "Dominance Effect",
                              main = main )
  pickedhist( pick, 2, title = "Dominance Effect", mfrow = NULL,
             rotate = TRUE )
  pickedhist( pick, 1, title = "Additive Effect", mfrow = NULL )
  invisible( score )
}
robustbox<-function(y,x,nslice=400,
  xlab = "Log Average Intensity", ylab = "Standardized Difference",
  shrink = FALSE,
  crit = qnorm( adjustlevel( n , overalllevel ) / 2, lower.tail = FALSE),
  overalllevel = .05, cex = 0.1, lwd = 2, plotit = TRUE )
{
  n<-length(x)
  ylim <- if( shrink )
    c(-1,1) * crit
  else
    range( y )

  ox<-order(x)
  x<-x[ox]
  y<-y[ox]
  rx<-rank(x)
  k<-floor(n/nslice)
  slicef<-factor(cut(rx, breaks = c(k*(0:(nslice-1)),n)))
  slicex<-unlist(tapply(x, slicef, median), recursive = TRUE,  use.names=FALSE)

  slicebox <- t( matrix( unlist( tapply( y, slicef, function(x)
    boxplot.stats(x)$stats ), recursive = TRUE, use.names=FALSE), 5 ))
  robmed <- apply( slicebox, 2, median )
  plot(x,y, type = "n", xlab = xlab, ylab = ylab, ylim = ylim )
  if( plotit )
    points(x,y,cex = cex)
  for( i in seq( ncol( slicebox ))) {
    if( diff( range( slicebox[,i] )) > 0 ) {
      tmp <- smooth.spline( slicex, slicebox[,i] )
      tmp <- predict( tmp, x )$y
      lines( x, tmp, col = "blue", lwd = lwd )
    }
    else
      lines( range( x ), rep( robmed[i], 2 ), col = "red", lwd = lwd )
  }
  invisible()
}
###########################################################################
##
## $Id: yilin.R,v 1.2 2003/12/30 19:17:27 jgentry Exp $
##
##     Copyright (C) 2000 Yi Lin and Brian S. Yandell
##
## This program is free software; you can redistribute it and/or modify it
## under the terms of the GNU General Public License as published by the
## Free Software Foundation; either version 2, or (at your option) any
## later version.
##
## These functions are distributed in the hope that they will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.
##
## The text of the GNU General Public License, version 2, is available
## as http://www.gnu.org/copyleft or by writing to the Free Software
## Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
##
############################################################################
pickgene2 <- function( ... )
  pickgene.two( ... )
multipickgene<-function( ... )
  pickgene.poly( ... )
twowayanovapickgene <- function( x, fac1level, fac2level, ... )
  pickgene( x, faclevel = c(fac1level,fac2level), ... )
###########################################################################
## the following function robustly estimate the center and scale.
robustscale<-function(y,x,nslice=400,corcenter=TRUE,decrease=TRUE){
  n<-length(x)
  ox<-order(x)
  x<-x[ox]
  y<-y[ox]
  rx<-rank(x)
  k<-floor(n/nslice)
  slicef<-factor(cut(rx, breaks = c(k*(0:(nslice-1)),n)))
  slicex<-unlist(tapply(x, slicef, median), recursive = TRUE, use.names=FALSE)

  if (corcenter) {
    slicemedian<-unlist(tapply(y, slicef, median), recursive = TRUE, use.names=FALSE)
    slicemad<-unlist(tapply(y, slicef, mad), recursive = TRUE, use.names=FALSE)
  }
  else {
    slicemad<-unlist(tapply(y, slicef, mad, center=0), recursive = TRUE,
                     use.names=FALSE)
  }
  tmp <- slicemad > 0
  if( sum( tmp ) > 3 ) {
    preroblogscale <- smooth.spline(slicex[tmp],log(slicemad[tmp]))
    robscale <- exp(predict(preroblogscale,x)$y)
  }
  else {
    preroblogscale <- list( y = rep( log( median( slicemad[tmp] )),
                              length( slicemad )))
    robscale <- rep( median( slicemad ), length( x ))
  }
  if (decrease) {
    robscale[(ceiling(n/2)):1] <- cummax(robscale[(ceiling(n/2)):1])
    robscale[(ceiling(n/2)):n] <- cummin(robscale[(ceiling(n/2)):n])
  }
  if (corcenter) {
    if( length( slicex ) > 3 ) {
      prerobcenter <- smooth.spline(slicex,slicemedian/(exp(preroblogscale$y)))
      robcenter <- predict(prerobcenter,x)$y * robscale
    }
    else
      robcenter <- rep( median( slicemedian ), length( x ))
    adjustedrobscale <- mad((y-robcenter)/(robscale)) * robscale
  }
  else {
    robcenter <- rep(0,n)
    adjustedrobscale<-mad((y/robscale), center = 0) * robscale
  }
  return(list(center=robcenter,scale=adjustedrobscale,x=x,y=y))
}

###########################################################################
## significance level adjustment for multiple tests. (similar to Bonferroni)
adjustlevel<-function(ntest,alpha){
  singlelevel<- 1 - exp((log(1-alpha))/ntest)
  return(singlelevel)
}

###########################################################################
## the following function picks genes in 2 condition case with logratio (y)
## and logofproduct (intensity). It also serves as the building block for
## the later functions for multi-condition and ANOVA.
pickgene.two <- function ( y, intensity,
                          geneid = 1:length(y),
                          ## set elsewhere
                          singlelevel=0.0001,
                          ## automatic pick genes (or pick npickgene if >0)
                          npickgene = -1, meanrank = FALSE,
                          xlab="Average Intensity",
                          ylab="Trend", main = "",
                          plotit = TRUE, col = "blue",
                          negative = numeric( 0 ),
                          ... ) {
  n <- length( y ) - length( negative )
  if( length( negative )) {
    sc <- robustscale( y[-negative], intensity[-negative], ... )
    geneid <- geneid[ order( intensity[-negative] ) ]
  }
  else {
    sc <- robustscale( y, intensity, ... )
    geneid <- geneid[ order( intensity ) ]
  }
  z <- ( sc$y - sc$center ) / ( sc$scale )
  if( meanrank )
    logxy <- "y"
  else {
    logxy <- "xy"
    sc$x <- exp( sc$x )
  }
  sc$y <- exp( sc$y )
  zcut <- qnorm( singlelevel[1] / 2, lower.tail = FALSE)

  sc$lower <- exp( sc$center - zcut * sc$scale )
  sc$upper <- exp( sc$center + zcut * sc$scale )
  if ( npickgene < 0 ) {
    pickedzscore <- abs(z) > zcut
    if( plotit ) {
      rx <- range( sc$x )
      ry <- range( sc$y )
      if( length( negative )) {
        ry <- range( c( ry, exp( y[negative] )))
        if( !meanrank )
          rx <- range( c( rx, exp( intensity[negative] )))
      }
      plot( rx, ry, type = "n", log = logxy, xlab=xlab, ylab=ylab )
      if( main != "" )
        title( main )

      ## critical lines
      lines( sc$x, sc$lower, col = "red" )
      lines( sc$x, sc$upper, col = "red" )
      if( length( singlelevel ) > 1 )
        for( i in seq( 2, length( singlelevel ))) {
          zcut <- qnorm( singlelevel[i] / 2, lower.tail = FALSE)
          lines( sc$x, exp( sc$center - zcut * sc$scale ), lty = 2 )
          lines( sc$x, exp( sc$center + zcut * sc$scale ), lty = 2 )
        }
      ## negligible contrasts
      points( sc$x[!pickedzscore], sc$y[!pickedzscore], cex = 0.25 )
      ## signficant contrasts
      points( sc$x[pickedzscore], sc$y[pickedzscore], cex = 0.75, col = col )
      ## negative values
      if( length( negative ))
        points( exp( intensity[negative] ), exp( y[negative] ),
               cex = 0.25, col = "red" )

      # center line
      lines( sc$x, exp( sc$center ), col = "red", lty = 2 )
    }
    ## get the order and zscore for the top picks
    pickedorder <- seq( n )[pickedzscore]
    z <- z[pickedorder]
    tmp <- order( (-1) * abs( z ))
    pickedorder <- pickedorder[tmp]
    pickedzscore <- z[tmp]
  }
  else {
    pickedorder <- order( (-1) * abs( z ))[1:npickgene]
    pickedzscore <- z[pickedorder]
  }
  ## data frame of picked genes
  fold <- sc$y[pickedorder]
  fold[ fold < 1 ] <- - 1 / fold[ fold < 1 ]
  pick <- data.frame( probe = geneid[pickedorder],
                     average = round( sc$x[pickedorder], 3 ),
                     fold = round( fold, 2 ),
                     pvalue = round( 1 - ( 1 - 2 * pnorm( abs( pickedzscore ),
                       lower.tail = FALSE )) ^ n, 4 ),
                     row.names = NULL )
  ## score structure with centered values
  sc <- as.data.frame( sc )
  sc$y <- ( log( sc$y ) - sc$center ) / sc$scale
  sc$center <- sc$scale <- NULL
  names( sc ) <- c("intensity","score","lower","upper")
  sc$probe <- geneid
  list( pick = pick, score = sc )
}

###########################################################################
## the following function picks genes in the situation of a linear sequence
## of conditions. The argument condi can be numerical (quantitative description
## of the conditions) or ordinal (1 : numberofconditions), depending on the
## situation. The function picks genes according to linear trend (d=1), or
## linear trend and quadratic trend (d=2). Cubic trend can be included if d is
## set to be 3. The argument x is a numberofgenes by numberofconditions matrix
## consisting of logmeasurements.
pickgene.poly <-function( x, # data matrix
                         condi = 1:min(ncol(x),2), # condition levels
                         geneID = NULL,
                         overalllevel = 0.05,
                         npickgene= -1,
                         d=2, # polynomial order (1-3)
                         ylabs = paste( contrastnames, "Trend" ),
                         contrastnames = c("Linear","Quadratic","Cubic"),
                         ... ) {
  if ( d > 3 | d < 1 )
    return( cat( "d should be 1, 2, or 3.\n" ))
  x <- as.matrix( x )
  if( ncol( x ) < 2 )
    return( cat( "x must have at least 2 columns.\n" ))
  d <- min( d, ncol(x)-1 )
  if( is.null( geneID ) | length( geneID ) != nrow( x ))
    geneID <- 1:nrow(x)
  ## get orthogonal polynomial contrasts
  y <- x %*% poly( condi, d )
  intensity <- x %*% rep( 1, length( condi )) / length( condi )

  singlelevel <- adjustlevel( nrow( x ) * d , overalllevel )
  npickgene <- floor( npickgene / d )
  pickedgene <- list()
  pickedgene$pick <- pickedgene$score <- list()

  par( mfrow = c(d,1) )
  for( i in 1:d) {
    tmp <- pickgene.two( y[,i], intensity,
                        geneid = geneID,
                        singlelevel = singlelevel,
                        npick = npickgene,
                        ylab = ylabs[i],
                        ... )
    pickedgene$pick[[i]] <- tmp$pick
    pickedgene$score[[i]] <- tmp$score
  }
  names( pickedgene$pick ) <- contrastnames[1:d]
  names( pickedgene$score ) <- contrastnames[1:d]
  return( pickedgene )
}

###########################################################################
## the following function picks genes in the two-factor (ANOVA) situation.
## It ignores interaction, and picks genes according to trends in each factor.
## Similar functions can easily be written in the same vein for multi-factor
## situation and taking (some) interactions into consideration.
model.pickgene <- function( faclevel,
                           facnames = letters[ seq( length( faclevel )) ],
                           contrasts.fac = "contr.poly",
                           collapse = "+",
                           show = NULL,
                           renorm = 1,
                           modelexpr = formula( paste( "~",
                             paste( facnames, collapse = collapse ))),
                           contrasts.list = contr.list ) {
  ## set up polynomial contrasts
  dd <- expand.grid( apply( as.matrix( rev( faclevel )), 1, seq ))
  names( dd ) <- rev( facnames )
  for( i in facnames )
    dd[[i]] <- factor( dd[[i]] )

  contr.list <- as.list( array( contrasts.fac, length( faclevel )))
  names( contr.list ) <- facnames
  mat <- model.matrix( modelexpr, dd, contrasts = contr.list )
  if( any( renorm != 1 )) {
    if( is.null( show ))
      show <- seq( ncol( mat ) - 1 )
    for( i in seq( length( show )))
      mat[,1+i] <- mat[,1+i] * renorm[show[i]]
  }
  mat
}

###########################################################################
pickgene <- function( data,
                     geneID = 1:nrow(data),
                     overalllevel=0.05,
                     npickgene= -1,
                     marginal = FALSE,
                     rankbased = TRUE, allrank = FALSE,
                     meanrank = FALSE,
                     offset = 0,
                     modelmatrix = model.pickgene( faclevel, facnames,
                       contrasts.fac, collapse, show, renorm ),
                     faclevel = ncol( data ),
                     facnames = letters[ seq( length( faclevel )) ],
                     contrasts.fac = "contr.poly",
                     show = NULL,
                     main = "", renorm = 1, drop.negative = FALSE,
                     plotit = npickgene < 1,
                     mfrow = c(nr,nc), mfcol = NULL,
                     ylab = paste( shownames, "Trend" ),
                     ... ){
  data <- as.matrix( data )
  if( rankbased | allrank ) {
    tmpfn <- function( data ) qnorm( rank( data ) / ( 1 + length( data )))
    data <- if( allrank )
      matrix( tmpfn( data ), nrow( data ), ncol( data ),
             dimnames = dimnames( data ))
    else
      apply( data, 2, tmpfn)
    negative <- numeric(0)
  }
  else { # log transformation, and drop zeroes
    xmin <- apply( data, 1, min, na.rm = TRUE )
    negative <- seq( length( xmin ))[ xmin <= -offset ]
    data <- offset + data
    if( drop.negative ) {
      data <- data[-negative,]
      warn <-  "probes dropped with values below"
    }
    else {
      tmp <- c( data ) > 0
      data[!tmp] <- min( data[tmp] ) / 2
      warn <-  "probes truncated to"
    }
    if( length( negative ))
      warning( paste( length( negative ), warn, offset, "\n" ))
    if( drop.negative )
      negative <- numeric( 0 )
    data <- log( data )
  }

  collapse <- if( marginal ) "+" else "*"
  numcontr <- ncol( modelmatrix ) - 1
  y <- data %*% modelmatrix
  if( is.null( show ))
    show <- seq( numcontr )

  if( meanrank )
    intensity <- rank( y[,1] )
  else
    intensity <- y[,1] / prod( faclevel )
  singlelevel <- adjustlevel( nrow( data ) * numcontr, overalllevel )
  npickgene <- floor ( npickgene / numcontr )

  if( plotit ) {
    nc <- min( 2, ceiling( length( show ) / 3 ))
    nr <- min( 3, ceiling( length( show ) / nc ))
    if( missing( mfcol )) {
      if( !is.null( mfrow ))
        par( mfrow = mfrow )
    }
    else {
      if( !is.null( mfcol ))
        par( mfcol = mfcol )
    }
  }
  pickedgene <- list()
  pickedgene$pick <- pickedgene$score <- list()
  shownames <- dimnames( y )[[2]][1+show]
  main <- array( main, length( shownames ), dimnames = list( shownames ))
  names( ylab ) <- shownames
  for( i in shownames ) {
    tmp <- pickgene.two( y[,i], intensity, geneID, singlelevel,
                        npickgene, main = main[i], plotit = plotit,
                        meanrank = meanrank,
                        negative = negative, ylab = ylab[i], ... )
    pickedgene$pick[[i]] <- tmp$pick
    pickedgene$score[[i]] <- tmp$score
  }
  invisible(pickedgene)
}
