.packageName <- "vsn"
## Extract the intensity matrix from the argument "intensities"
## Probably this should, and may in a future version be done via S3- or S4
## method dispatching.
getIntensityMatrix = function(intensities, verbose) {
  y = switch(class(intensities),
    matrix     = { if (!is.numeric(intensities))
                     stop("'intensities' was found to be a matrix, but is not numeric.")
                   intensities
                 },
    data.frame = {  if (!all(sapply(intensities, is.numeric)))
                      stop("'intensities' was found to be a data.frame, but contains non-numeric columns.")
                    as.matrix(intensities)
                  },
    exprSet    = { exprs(intensities)
                 },
    marrayRaw  = { nrslides = as.integer(ncol(intensities@maRf))
                   nrspots  = as.integer(nrow(intensities@maRf))
                   if (verbose)
                     cat(sprintf("Converting marrayRaw (%d spots, %d slides) to %dx%d matrix.\n",
                                 nrspots, nrslides, nrspots, as.integer(2*nrslides)),
                         "Gf-Gb in odd columns, Rf-Rb in even columns.\n")
                   tmp = matrix(NA, nrow=nrspots, ncol=2*nrslides)
                   tmp[, (1:nrslides)*2-1 ] = intensities@maGf - intensities@maGb
                   tmp[, (1:nrslides)*2   ] = intensities@maRf - intensities@maRb
                   tmp
                 },
    stop(paste("'intensities' has class ", class(intensities),
         ". Permitted are: matrix, data.frame, exprSet, marrayRaw", sep=""))
  )  ## end of switch statement

  if (any(is.na(y)))
    stop(paste("'intensities' must not contain NA values.\n",
             "This could indicate that the input data has already undergone some\n",
             "thresholding or transformation (log?), and may not satisfy the\n",
             "requirements of the multiplicative-additive noise model.\n",
             "If you are sure that it is meaningful to proceed, please\n",
             "consider calling vsn on a subset of data where all values\n",
             "are defined, and then use vsnh on the full set of data.\n"))

  if (ncol(y)<=1) 
    stop(paste("'intensities' must be a matrix with at least two columns.\n",
               "Please read the documentation and the paper\n",
               "(Huber et al., Bioinformatics 18 (2002) S96-S104).\n"))
  return(y)
}
     
log.na = function(x, ...) log(ifelse(x>0, x, NA), ...)
meanSdPlot = function(x,
                      ranks=TRUE,
                      xlab = ifelse(ranks, "rank(mean)", "mean"),
                      ylab = "sd",
                      pch  = ".",
                      col, ...) {
  stopifnot(is.logical(ranks), length(ranks)==1, !is.na(ranks))

  ## the coloring
  pcol <- "black"
  if(missing(col)) {
    if(inherits(x, "exprSet")) {
      sel <- preproc(description(x))$vsnTrimSelection
      if(!is.null(sel)){
        if(!is.logical(sel) || length(sel)!=nrow(exprs(x)) || any(is.na(sel))) 
          stop(paste("The element \"vsnTrimSelection\" of the preprocessing",
                     "slot of the description slot of \"x\" is not valid.",
                     "You may remove it and try again.\n"))
        pcol <- ifelse(sel, "blue", "black")
      }
    }
  } else {
    pcol <- col
  }
  
  if(inherits(x, "exprSet"))
    x <- exprs(x)
  
  if(!inherits(x, "matrix"))
    stop("'x' must be a matrix or an exprSet (or it may inherit from these).")
  
  n    <- nrow(x)
  px   <- rowMeans(x, na.rm=TRUE)
  py   <- rowSds(  x, na.rm=TRUE)
  rpx  <- rank(px, na.last=FALSE)
  
  ## running median with centers at dm, 2*dm, 3*dm, ... and width 2*dm
  dm        <- 0.05
  midpoints <- seq(dm, 1-dm, by=dm)
  within    <- function(x, x1, x2) { x>=x1 & x<=x2 }
  mediwind  <- function(mp) median(py[within(rpx/n, mp-dm, mp+dm)], na.rm=TRUE)
  rq.sds    <- sapply(midpoints, mediwind)

  if(ranks) {
    px  <- rpx
    pxl <- midpoints*n
  } else {
    pxl <- quantile(px, probs=midpoints, na.rm=TRUE)
  }
  plot(px, py, pch=pch, xlab=xlab, ylab=ylab, col=pcol, ...)
  lines(pxl, rq.sds, col="red", type="b", pch=19)
}

  
##---------------------------------------------------------
## Enumerate all the subsets of size k of the integers 1:n.
## The result is returned in a matrix with k rows and
## (n choose k) columns
## This function is not needed for VSN, but it is nice to
## have it for the examples / the vignette and I haven't
## yet found it anywhere else.
##---------------------------------------------------------
nchoosek = function(n, k) {
  if (!is.numeric(n)||!is.numeric(k)||is.na(n)||is.na(k)||length(n)!=1||length(k)!=1)
    stop("arguments must be non-NA numeric scalars.")
  if (k>n||k<0)
    stop("Arguments must satisfy 0 <= k <= n.")

  nck = choose(n, k)
  res = matrix(NA, nrow=k, ncol = nck)
  res[, 1] = 1:k
  j = 2
  repeat {
    res[, j] = res[, j-1]
    i = k
    repeat {
      res[i,j] = res[i,j]+1
      if(res[i,j] <= n-(k-i))
        break
      i = i-1
      stopifnot(i>=1)
    }
    if (i<k)
       res[(i+1):k,j] = res[i,j] + 1:(k-i)
    j = j+1
    if (j>nck) break
  }
  ## plausibility tests
  stopifnot(all(res[, nck]==(n-k+1):n))
  stopifnot(all(res<=n) && all(res>=1))
  return(res)
}

##------------------------------------------------------------
## a wrapper for vsn to be used as a normalization method in
## the package affy
##------------------------------------------------------------
normalize.AffyBatch.vsn = function (abatch, subsample = 20000, niter = 4, ...) 
{
  if (!("package:affy" %in% search())) 
    stop("Please load the package affy before calling normalize.AffyBatch.vsn")

  ## ind = the perfect match probes. If length(ind) is larger than the value in
  ## subsample, then only use a random sample of size subsample from these
  ind = unlist(indexProbes(abatch, "pm"))
  if (!is.na(subsample)) {
    if (!is.numeric(subsample)) 
      stop("Argument \"subsample\" must be numeric.")
    if (length(ind) > subsample) 
      ind = sample(ind, subsample)
  }
  
  ## call parameter estimation (on subset of data)
  vsnres = vsn(intensity(abatch)[ind, ], niter=niter, ...)

  ## add parameters to preprocessing slot
  pars = preproc(description(vsnres))$vsnParams
  description(abatch)@preprocessing = c(description(abatch)@preprocessing, list(vsnParams=pars))

  ## apply the transformation (to all data)
  intensity(abatch) = exp(vsnh(intensity(abatch), pars))
  return(abatch)
}

rowSds = function(x, ...) {
  sqr     = function(a) a*a  ## faster than a^2
  n       = rowSums(!is.na(x))
  n[n<=1] = NA
  return(sqrt(rowSums(sqr(x-rowMeans(x, ...)), ...)/(n-1)))
}
##-----------------------------------------------------------
## mu as in the statistical model - expectation of transformed intensities 
## generate sinh(mu) according to model of Newton et al.
## shape parameter a=1, scale theta=1
## see also view.gammadistr.R
##------------------------------------------------------------
sagmbSimulateData <- function(n=8064, d=2, de=0, up=0.5) {
  stopifnot(is.numeric(n),  length(n)==1,
            is.numeric(d),  length(d)==1,
            is.numeric(de), length(de)==1,
            is.numeric(up), length(up)==1)
  sigmaeps <- 0.2
  mu <- asinh(1/rgamma(n, shape=1, scale=1))
  ##------------------------------------------------------------
  ## the calibration parameters: 
  ## additive acl[2:d], multiplicative bcl[2:d]
  ##   (w.l.o.g. acl[1]=0, bcl[1]=1)
  ## acl is drawn from a uniform distribution over the interval
  ##     [-2, +2] * 90%-quantile of mu
  ## log(bcl) is drawn from normal(0,1)
  ##------------------------------------------------------------
  Delta.a <- 0.1*quantile(sinh(mu), 0.9)
  acl <- matrix(c(0, runif(d-1, min=-Delta.a, max=Delta.a)), nrow=n, ncol=d, byrow=TRUE)
  bcl <- matrix(c(1, exp(rnorm(d-1))),                       nrow=n, ncol=d, byrow=TRUE)
  
  ##------------------------------------------------------------
  ## generate simulated data
  ##------------------------------------------------------------
  is.de   <- (runif(n)<de)
  hy     <- matrix(as.numeric(NA), nrow=n, ncol=d)
  hy[,1] <- mu + rnorm(n, sd=sigmaeps)    ## array 1 is a reference
  for (j in 2:d) {
    s      <- 2 * as.numeric(runif(n)<up) - 1
    hy[,j] <- mu + as.numeric(is.de)*s*runif(n, min=0, max=2) + rnorm(n, sd=sigmaeps)
  }
  y <- acl + bcl * sinh(hy)
  return(list(y=y, hy=hy, is.de=is.de))
}

sagmbAssess <- function(h1, sim) {
  stopifnot(all(c("y", "hy", "is.de") %in% names(sim)))
  h2    <- sim$hy
  is.de <- sim$is.de
  
  stopifnot(is.matrix(h1), is.matrix(h2), is.logical(is.de))
  n = nrow(h1)
  d = ncol(h1)
  if(nrow(h2)!=n || ncol(h2)!=d) 
    stop(paste("'h1' and 'h2' should be matrices of the same size, but have ",
         n, "x", d, " and ", nrow(h2), "x", ncol(h2), sep=""))
  stopifnot(length(is.de)==n)

  dh1 <- dh2 <- matrix(nrow=n, ncol=d-1)
  for (j in 2:d) {
    dh1[, j-1] <- h1[, j] - h1[, 1]
    dh2[, j-1] <- h2[, j] - h2[, 1]
  }

  nsum <- (d-1) * sum(!is.de)
  res  <- sqrt(sum((dh1[!is.de,]-dh2[!is.de,])^2) / nsum)
  return(res)
}

##-----------------------------------------------------------------
## Robust calibration and variance stabilization
## (C) Wolfgang Huber 2002-2003
## w.huber@dkfz.de
##-----------------------------------------------------------------
##----------------------------------------------------------------------
## vsn: the main function of this library
## This is one big chunk of a function - but lots of screen space are
## comments, and if you have suggestions on how to improve this function
## by splitting it up into smaller pieces, please come forward!
##----------------------------------------------------------------------
vsn = function(intensities,
               lts.quantile = 0.5,
               verbose      = TRUE,
               niter        = 10,
               cvg.check    = NULL,
               pstart       = NULL,
               describe.preprocessing = TRUE)
{
  ## Bureaucracy: make sure are the arguments are valid and plausible
  mess =  badarg = NULL
  if (!is.numeric(lts.quantile) || (length(lts.quantile)!=1) ||
     (lts.quantile<0.5) || (lts.quantile>1)) {
    badarg = "lts.quantile"
    mess   = "Please specify a scalar between 0.5 and 1."
  }
  if (!is.numeric(niter) || (length(niter)!=1) || (niter<1)) {
    badarg = "niter"
    mess   = "Please specify a number >=1."
  }
  if (!is.null(pstart)) {
    if (!is.numeric(pstart) || length(pstart) != 2*d || any(is.na(pstart)) ||
        any(pstart[(ncol(intensities)+1):(2*ncol(intensities))]<=0)) {
      badarg = "pstart"
      mess   = paste("Please specify a numeric vector of length", 2*ncol(intensities))
    }
  }
  if (!is.logical(verbose)) {
    badarg = "verbose"
    mess   = "Please specify a logical value."
  }
  ## Error handling
  if(!is.null(mess)) {
    mess = paste("The argument ", badarg, " has an invalid value:", get(badarg), "\n", mess, "\n", sep="")
    stop(mess)
  }

  y = getIntensityMatrix(intensities, verbose)

  ## Print welcome message
  if (verbose)
    cat("vsn: ", nrow(y), " x ", ncol(y), " matrix (lts.quantile=", 
        signif(lts.quantile, 2), "). Please wait for ", niter+1, " dots:\n.", sep="")

  ##----------------------------------------------------------------------
  ## guess a parameter scale, set boundaries for optimization,
  ## and, if they are not user-supplied, set the start parameters
  ##----------------------------------------------------------------------
  pscale  = plower = numeric(2*ncol(y))
  pscale[1:ncol(y)] = 1
  for (j in 1:ncol(y)) {
    pscale[ncol(y)+j] = 1/diff(quantile(y[,j], probs=c(0.25, 0.75)))
  }
  ## lower boundary for 'factors': a small positive value
  ## no boundary for 'offsets'
  plower = c(rep(-Inf,ncol(y)), pscale[(ncol(y)+1):(2*ncol(y))]/1e8)
  if (is.null(pstart))
    pstart = c(rep(0,ncol(y)), pscale[(ncol(y)+1):(2*ncol(y))])

  ## factr controls the convergence of the "L-BFGS-B" method. Convergence
  ## occurs when the reduction in the objective is within this factor of
  ## the machine tolerance. Default is 1e7, that is a tolerance of about
  ## 1e-8. Here we use 5e8 to save a little time.
  control     = list(trace=0, maxit=4000, parscale=pscale, factr=5e8)
  optim.niter = 10

  ## a place to save the trajectory of estimated parameters along the iterations:
  params  = matrix(NA, nrow=length(pscale), ncol=niter)
  rownames(params) = c(paste("offs", 1:ncol(y), sep=""), paste("fac", 1:ncol(y), sep=""))
    
  ## Workspace. This has two purposes: 1. we do not need to pass y around,
  ## 2. the intermediate results of "ll" can be used by "grll" (see functions
  ## vsnll, vsngrll)
  ws = new.env(hash=TRUE)

  ##----------------------------------------------------------------------------
  ## In the following, we define two functions: ll, grll. Doing this inside the 
  ## function vsn is one simply way to hide them from outside
  ##----------------------------------------------------------------------------
  ## Profile log likelihood of the model
  ##
  ##    asinh(a_i + b_i * y_ki) = m_k + epsilon_ki
  ##
  ## where k=1..n, i=1..d, y is a n-by-d matrix,
  ## (a_i, b_i) are real parameters to be estimated
  ## epsilon_ki are i.i.d N(0, sigma^2)
  ## and sigma, and all m_k are estimated from the data
  ## ("profiling")
  ##
  ## Argments:
  ## p numeric vector of length 2*d, with elements a_1,...,a_d, b_1,...,b_d
  ##
  ## Return value:
  ## Profile Log-Likelihood
  ##
  ## Side effects:
  ## 1. The function expects the matrix of input data y_ki in the environment ws.
  ## Since the matrix can be large (O(10^4) x O(10^2)), we want to avoid passing
  ## around this matrix by value.
  ## 2. The function stores intermediate results in that environment, too.
  ## They can then be re-used by grll.
  ##
  ## The variables stored in environment ws are
  ## y      the matrix of the y_ki
  ## nry    number of rows of y
  ## ncy    number of columns of y
  ## ly     the matrix of the a_i + b_i * y_ki
  ## asly   the matrix of the asinh(a_i + b_i * y_ki)
  ## res    the matrix of the residuals epsilon_ki
  ## ssq    sum of squared residuals sum(res^2)
  ## p      the call arguments of ll() . With this, when grll()
  ##        is called it can double-check whether it is indeed called
  ##        with the same arguments (see below for details)
  ##----------------------------------------------------------
  ll = function(p) {
    assign("p", p, envir=ws)
    with(ws, {
      offs = matrix(p[     1:ncy ], ncol=ncy, nrow=nry, byrow=TRUE)
      facs = matrix(p[ncy+(1:ncy)], ncol=ncy, nrow=nry, byrow=TRUE)
      ly   = offs + facs * y
      asly = asinh(ly)
      res  = asly - rowMeans(asly) ## residuals
      ssq  = sum(res*res)
      rv   = nry*ncy/2*log(ssq) - sum(log(facs/sqrt(1+ly*ly)))
    } )
    return(get("rv", ws))
  }
  ##--------------------------------------------------------------
  ## Gradient of the profile log likelihood
  ##--------------------------------------------------------------
  grll = function(p) {
    ## Generally, optim() will call the gradient of the objective function (gr)
    ## immediately after a call to the objective (fn) at the same parameter values.
    ## Anyway, we like to doublecheck
    if(any(p!=get("p", ws))) {
      mess = paste(
        "\n\n\The function grll (likelihood-gradient) was called with different\n",
        "parameters than the previous call of ll (likelihood-value).\n",
        "This should never happen. Please contact the package maintainer:\n",
        "w.huber@dkfz.de\n\n")
      error(mess)
    }
    with(ws, {
      dhda      = 1/sqrt(1+ly*ly)
      dlndhdyda = -ly/(1+ly*ly)
      gra       = nry*ncy/ssq*res*dhda - dlndhdyda
      rv        = c(colSums(gra), colSums(gra*y) - nry/p[(ncy+1):(ncy*2)])
    } )
    return(get("rv", ws))
  }
  
  sel     = rep(TRUE, nrow(y))   ## trimmed data points (LTS regression)
  oldh    = Inf  ## for calculating a convergence criterion: earlier result
  cvgcCnt = 0    ## counts the number of iterations that have already met the convergence criterion
  
  ##--------------------------------------------------
  ## begin of the outer LL iteration loop
  ##--------------------------------------------------
  for(lts.iter in 1:niter) {
    assign("y",   y[sel,],            envir=ws)
    assign("nry", length(which(sel)), envir=ws)
    assign("ncy", ncol(y),            envir=ws)

    p0 = pstart
    for (optim.iter in 1:optim.niter) {
      o  = optim(par=p0, fn=ll, gr=grll, method="L-BFGS-B",
                  control=control, lower=plower)
      if (o$convergence==0) next

      if(o$convergence==52) {
        ## ABNORMAL_TERMINATION_IN_LNSRCH
        ## This seems to indicate that a stepwidth to go along the gradient could not be found,
        ## probably because the start point p0 was already right at the optimum. Hence, try
        ## again from a slightly different start point
        ## cat("lts.iter=", lts.iter, "optim.iter=", optim.iter, "pstart was", p0, "now trying ")
        p0 = p0 + runif(length(pstart), min=0, max=0.01) * pscale
        ## cat(p0, "\n")
      } else if(o$convergence==1) {
        ## This seems to indicate that the max. number of iterations has been exceeded. Try again
        ## with more
        ## cat("lts.iter=", lts.iter, "optim.iter=", optim.iter, "maxit was", control$maxit, "now trying ")
        control$maxit = control$maxit*2
        ## cat(control$maxit, "\n")
      } else {
        stop(paste("Likelihood optimization: the function optim() returned the value convergence=",
                   o$convergence, "\nPlease make sure your data is good.",
                   "If so, contact the package maintainer.\n", sep=""))
      }
    }

    if (o$convergence!=0)
      stop(paste("Likelihood optimization did not converge even after", optim.niter, "calls to optim().",
                   "\nPlease make sure your data is good. If the problem persists,",
                   "\nplease contact the package maintainer.\n"))
    if (any(o$par[(ncol(y)+1):(2*ncol(y))]<0))
      stop(paste("Likelihood optimization produced negative parameter estimates in spite of constraints.",
                 "\nPlease contact the package maintainer.\n"))
    if(verbose)
      cat(".")

    # ----------------------------------------
    # selection of points in a LTS fashion
    # 1. calculate residuals; cf. ll()
    # ----------------------------------------
    offs   = matrix(o$par[    1      :   ncol(y) ], nrow=nrow(y), ncol=ncol(y), byrow=TRUE)
    facs   = matrix(o$par[(ncol(y)+1):(2*ncol(y))], nrow=nrow(y), ncol=ncol(y), byrow=TRUE)
    asly   = asinh(offs + facs * y)
    res    = asly - rowMeans(asly)
    rsqres = rowSums(res*res)
    hmean  = rowSums(asly)

    # 2. select those data points within lts.quantile; do this separately for
    # each of nrslice slices along hmean
    nrslice = 5
    group   = ceiling(rank(hmean)/length(hmean)*nrslice)
    grmed   = tapply(rsqres, group, quantile, probs=lts.quantile)
    sel     = (rsqres <= grmed[group])

    params[, lts.iter] = pstart = o$par
    h = vsnh(y, o$par)

    ## Convergence check
    ## after a suggestion from D Kreil 2003, UCgenetics@Kreil.Org
    if(!is.null(cvg.check)) {
      cvgc    = max(abs((h - oldh)/range(h)))
      cvgcCnt = ifelse( cvgc < cvg.check$eps, cvgcCnt + 1, 0 )
      if (verbose)
        cat(sprintf("iter %2d: cvgc=%.5f%%, par=", as.integer(lts.iter), cvgc),
            sapply(o$par, function(x) sprintf("%9.3g",x)),"\n")
      if (cvgcCnt >= cvg.check$n)
        break
      oldh = h
    }
    
  } ## end of for-loop (iter)
  if(verbose)
    cat("\n")

  ## Prepare the return result: an exprSet
  ## The transformed data goes into slot exprs.
  ## If input was allready an exprSet, pass on the values all the other slots.
  ## To the slot description@preprocessing, append the parameters and the
  ##    trimming selection.  
  res = descr = NULL
  if (class(intensities)=="exprSet") {
    res = intensities
    if (class(description(intensities))=="MIAME") {
      descr = description(intensities)
    }
  }
  if(is.null(descr))   descr = new("MIAME")
  if(is.null(res))     res   = new("exprSet", description=descr)

  exprs(res) = vsnh(y, o$par)
  if (describe.preprocessing)
    res@description@preprocessing = append(res@description@preprocessing,
                      list(vsnParams        = params[,ncol(params)],
                           vsnParamsIter    = params,
                           vsnTrimSelection = sel))
  return(res)
}

vsnPlotPar = function(x, what, xlab="iter", ylab=what, ...) {
  d = ncol(exprs(x))
  switch(what,
         factors = { j = d+(1:d) },
         offsets = { j = (1:d)   },
         stop(paste("Second argument \"what\" has undefined value \", what, \"\n", sep="")))
  
  pars = preproc(description(x))$vsnParamsIter
  if(is.null(pars) || !is.matrix(pars) || nrow(pars)!=2*d)
    stop("First argument \"ny\" does not seem to be resulting from vsn.")
  
  matplot(1:ncol(pars), t(pars[j,]), type="b", pch=16, xlab=xlab, ylab=ylab, ...)
}
##---------------------------------------------------------------------
## The "arsinh" transformation
## note: the constant -log(2*facs[1]) is added to the transformed data
## in order to achieve h_1(y) \approx log(y) for y\to\infty, that is,
## better comparability to the log transformation.
## It has no effect on the generalized log-ratios.
##---------------------------------------------------------------------
vsnh = function(y, p) {
  if (!is.matrix(y) || !is.numeric(y))
    stop("vsnh: argument y must be a numeric matrix.\n")
  if (!is.vector(p) || !is.numeric(p) || any(is.na(p)))
    stop("vsnh: argument p must be a numeric vector with no NAs.\n")
  if (2*ncol(y) != length(p))
    stop("vsnh: argument p must be a vector of length 2*ncol(y).\n")

  offs = matrix(p[         1  :  ncol(y) ], nrow=nrow(y), ncol=ncol(y), byrow=TRUE)
  facs = matrix(p[(ncol(y)+1):(2*ncol(y))], nrow=nrow(y), ncol=ncol(y), byrow=TRUE)
  hy   = asinh(offs + facs * y) - log(2*facs[1])
  dimnames(hy) = dimnames(y)
  return(hy)
}
