.packageName <- "gaga"
checkfit.gagafit <- function(gg.fit,x,groups,type='data',...) {
# Plots to check fit of TripleG model. Compares posterior of parameters with method of moments estimates or observed data with posterior predictive. To compare with prior use function sim.gg.
# - x: data used to fit the model
# - groups: vector of length ncol(x) indicating what group does each column in x correspond to
# - gg.fit: model fit with parameter estimates as returned by parest.gagafit
# - type: 'data' for marginal density of the data; 'shape' for shape parameter; 'mean' for mean parameter; 'shapemean' for joint of shape and mean parameters
# - ...: other arguments to be passed to the plot function

if (ncol(x) != length(groups)) stop('The length of argument groups does not match with the number of columns in x')
if ((type!='data') && (type!='shape') && (type!='mean') && (type!='shapemean')) stop('The argument type is not valid')

xpred <- simnewsamples(gg.fit=gg.fit,groupsnew=groups,x=x,groups=groups)
if (type=='data') {
  xnewpdf <- density(xpred$xnew); xpdf <- density(x)
  plot(xpdf,type='l',...); lines(xnewpdf,lty=2,lwd=2); legend(max(xpdf$x),max(xpdf$y),c('Observed data','Posterior predictive'),lty=1:2,lwd=1:2,xjust=1,yjust=1) 
} else if (type=='shape') {
  aest <- rowMeans(x)^2/apply(x,1,'var'); apdf <- density(xpred$anew)
  plot(apdf,lty=1,...); lines(density(aest),lty=2)
  legend(quantile(aest,probs=.95),max(apdf$y),c('Model-based','Moments estimate'),lty=1:2,xjust=0,yjust=1)
} else if (type=='mean') {
  lest <- rowMeans(x); lpdf <- density(xpred$lnew)
  plot(lpdf,lty=1,...); lines(density(lest),lty=2)
  legend(max(lest),max(lpdf$y),c('Model-based','Moments estimate'),lty=1:2,xjust=1,yjust=1)
} else if (type=='shapemean') {
  aest <- rowMeans(x)^2/apply(x,1,'var'); lest <- rowMeans(x)
  plot(xpred$lnew,xpred$anew,...); points(lest,aest,col=2)
}

}
checkfit <- function(gg.fit,x,groups,type='data',...) { UseMethod("checkfit") }
classpred.gagafit <- function(gg.fit,xnew,x,groups,prgroups,ngene=100) {
# Computes posterior prob that a new sample belongs to each group and classifies it to the group with highest prob
# Input
# - gg.fit: fitted GaGa or MiGaGa model, as returned by parest.gagafit
# - xnew: expression levels of the sample to be classified. Only the subset of the genes indicated by 'sel' is used.
# - x: vector with observations used to fit the model (genes in rows, samples in cols).
# - groups: vector indicating what group each column in x corresponds to
# - prgroups: prior probabilities for each group. Defaults to equally probable groups.
# - ngene: number of genes to use for the classifier.
# Output
# - d: numeric value indicating group that the new sample is classified into (according to factor coding in as.integer(as.factor(groups)))
# - posgroups: vector of length *K with the posterior probabilities of each group

gapprox <- TRUE
sel <- (1:nrow(x))[order(gg.fit$pp[,1])][1:ngene]
if (!is.vector(xnew)) stop('xnew must be a vector')
if (!is.numeric(sel)) stop('sel must contain numerical indexes')
if (is(x,"ExpressionSet")) {
  if (is.character(groups)) { groups <- as.factor(pData(data)[, groups]) }
  x <- exprs(x)
} else if (!is(x,"data.frame") & !is(x,"matrix")) { stop("x must be an ExpressionSet, data.frame or matrix") }
if (ncol(x)!=length(groups)) stop('Argument groups must have length equal to number of columns in argument x')
par <- getpar(gg.fit)
a0 <- as.double(par$a0); nu <- as.double(par$nu); balpha <- as.double(par$balpha)
nualpha <- as.double(par$nualpha)
probclus <- as.double(par$probclus); probpat <- as.double(par$probpat)
nclust <- as.integer(length(probclus))
patterns <- gg.fit$patterns
if (nrow(patterns)!=length(probpat)) stop('Argument patterns must be equal to the length of gg.fit@probEst')

groups <- as.integer(as.integer(as.factor(groups))-1); K <- as.integer(max(groups)+1)
if (missing(prgroups)) prgroups <- rep(1/K,K)
prgroups <- as.double(prgroups)
if (ncol(patterns)!=K) stop('patterns must have number of columns equal to the number of distinct elements in groups')
npat <- as.integer(nrow(patterns))
for (i in 1:nrow(patterns)) { patterns[i,] <- as.integer(as.integer(as.factor(patterns[i,]))-1) }
ngrouppat <- as.integer(apply(patterns,1,'max')+1)
sumx <- prodx <- double(nrow(x)*sum(ngrouppat)); nobsx <- double(sum(ngrouppat))
usesumx <- as.integer(0); gapprox <- as.integer(gapprox)

xnew <- as.double(xnew[sel])
sel <- sel-1  #in C vectors start at 0
d <- integer(1); posgroups <- double(K)

z <- .C("sampleclas_ggC",d=d,posgroups=posgroups,xnew,as.integer(length(sel)),as.integer(sel),as.integer(nrow(x)),as.integer(ncol(x)),as.double(t(x)),groups,K,prgroups,probclus,probpat,a0,nu,balpha,nualpha,nclust,npat,as.integer(t(patterns)),ngrouppat,sumx,prodx,nobsx,usesumx,gapprox)

return(list(d=z$d+1,posgroups=z$posgroups))

}
classpred <- function(gg.fit,xnew,x,groups,prgroups,ngene=100) { UseMethod("classpred") }
dcgamma <- function(x,a,b,c,d,r,s,newton=TRUE) {
# Returns density of a conjugate gamma shape distribution evaluated at x
# Input:
# - x: vector with points to evaluate the density at
# - a,b,c,d,r,s,p: parameters
#  - newton: newton==TRUE tries a Newton step to improve Gamma approximation
# Output: 
# - y: density of a conjugate gamma shape distribution with parameters a,b,c evaluated at x

if ((a<0) | (b<0) | (d<0) | (r<0) | (s<0)) stop('Parameters a,b,d,r,s must be >=0')
if (a==0) { 
  if (b-d<=0) stop('Non-valid parameters. b must be > than (p+1)*d')
  if (c<=0) stop('Non-valid parameters. c must be > 0')
} else {
  if (b+.5*a-.5<=0) stop('Non-valid parameters. b must be > .5-.5*a')
  if (c+a*log(s/a)<=0) stop('Non-valid parameters. c must be > a*log(a/s)')
}

x <- as.double(x); n <- as.integer(length(x)); a <- as.double(a); b <- as.double(b); c <- as.double(c); d <- as.double(d); r <- as.double(r); s <- as.double(s); y <- double(n); newton <- as.integer(newton)
normk <- as.double(-1)            #indicates the C routine to calculate the normalization constant
z <- .C("dcgammaC",y=y,normk,x,n,a,b,c,d,r,s,newton)
return(z$y)
}
findgenes.gagafit <- function(gg.fit,x,groups,fdrmax=.05,parametric=TRUE,B=500) {
# Computes optimal terminal decision rules and expected terminal utility for several utility functions
# Input:
# - x: matrix or ExpressionSet gene expression data
# - groups: vector indicating to which group does each column of x belong to
# - gg.fit: GaGa or MiGaGa fit, as returned by parest.gagafit
# - fdrmax: restriction on E(FDR). Ignored if util!='fnrstfdr'.
# - parametric: should the FDR be controlled parametrically or non-parametrically
# - B: number of permutations to estimate FDR non-parametrically (ignored if parametric==TRUE)
# - centers: ignored if parametric==TRUE. Indicates the number of clusters of z-scores (passed to kmeans). Note: centers==0 indicates to use no clusters (i.e. gene-specific bootstrap), while centers==1 indicates pooling residuals from all genes as in Storey's procedure.

# Output: list containing
# - efp: expected number of true positives
# - d: pattern to which each gene is assigned to
# - fdr: frequentist estimated FDR that is closest to fdrmax.
# - fdrpar: This is the target FDR used as input for Mueller's optimal Bayesian rule. If parametric==TRUE, this is equal to fdrmax. If parametric==FALSE, it's the Bayesian FDR needed to achieve frequentist estimated FDR=fdrmax.
# - fdrest: list with estimated frequentist FDR for each target Bayesian FDR
# - fnr: Bayesian FNR
# - power: Bayesian power as estimated by E(TP)/E(positives)
# - threshold: If util=='fnrstfdr', optimal threshold for posterior prob of pattern 0 (genes with prob < threshold are declared DE). Ignored otherwise.
# Note: Bayesian estimates are computed with model-based posterior probabilities. Frequentist estimates are obtained under repeated bootstrap sampling


centers <- 1; v <- gg.fit$pp
if (!is.matrix(v)) stop('gg.fit$pp must be a matrix containing posterior probabilities of each expression pattern')
cf <- as.double(2)

if (is(x,"ExpressionSet")) {
  if (is.character(groups)) { groups <- as.factor(pData(data)[, groups]) }
  x <- exprs(x)
} else if (!is(x,"data.frame") & !is(x,"matrix")) { stop("x must be an ExpressionSet, data.frame or matrix") }

groups <- as.integer(as.integer(as.factor(groups))-1); K <- as.integer(max(groups)+1)
if (length(groups) != ncol(x)) stop('groups must have length equal to the number of columns in x')
if (K==1) stop('At least two different groups must be specified')
patterns <- gg.fit$patterns
if (ncol(patterns)!=K) stop('patterns must have number of columns equal to the number of distinct elements in groups')
for (i in 1:nrow(patterns)) { patterns[i,] <- as.integer(as.integer(as.factor(patterns[i,]))-1) }
ngrouppat <- as.integer(apply(patterns,1,'max')+1)
par <- getpar(gg.fit)
a0 <- as.double(par$a0); nu <- as.double(par$nu); balpha <- as.double(par$balpha); nualpha <- as.double(par$nualpha); probclus <- as.double(par$probclus); probpat <- as.double(par$probpat)
cluslist <- as.integer(c((0:(length(probclus)-1)),-1))
if (B<10) { warning('B was set to less than 10, too small a number of permutations. Increased to B=10'); B <- 10 }

sumx <- double(nrow(x)*sum(ngrouppat)); prodx <- double(nrow(x)*sum(ngrouppat)); nobsx <- double(sum(ngrouppat))
sumxpred <- double(nrow(x)*sum(ngrouppat)); prodxpred <- double(nrow(x)*sum(ngrouppat)); nobsxpred <- double(sum(ngrouppat))
gapprox <- 1

nsel <- nrow(v); sel <- as.integer((1:nsel)-1)
util <- as.integer(1)
u <- double(1); d <- integer(nrow(v)); fdr <- fnr <- power <- double(1); threshold <- double(1)
z <- .C("utgene_parC",u=u,d=d,fdr=fdr,fnr=fnr,power=power,threshold=threshold,util=util,as.double(cf),nsel,sel,as.double(t(v)),as.integer(ncol(v)),as.double(fdrmax))
fdr <- fdrest <- fdrpar <- z$fdr

if (parametric==FALSE) {
  fdrseq <- as.double(seq(fdrmax/1000,min(fdrmax*2,1),length=1000))
  fdrest <- double(length(fdrseq))
  cat("Finding clusters of z-scores for bootstrap... ")
  m <- rowMeans(x); s <- sqrt((rowMeans(x^2)-rowMeans(x)^2)*ncol(x)/(ncol(x)-1))
  zscore <- (x-m)/s
  if (centers>1) {
    nquant <- min(10,ncol(x))
    qx <- t(matrix(unlist(apply(zscore,1,'quantile',probs=seq(0,1,length=nquant))),nrow=nquant))
    zcluster <- kmeans(qx,centers=centers,iter.max=50)
    zclustsize <- as.integer(table(zcluster$cluster))
    index <- as.integer(order(zcluster$cluster)-1)
  } else if ((centers==1) | (centers==0)) {
    zclustsize <- nrow(x)
    index <- as.integer(0:(nrow(x)-1))
  } else { stop('centers must be an integer >=0') }
  znclust <- as.integer(centers); niter <- 10

  cat("Done\nStarting",B,"bootstrap iterations...\n")
  znp <- .C("expected_fp",efp=fdrest,fdrseq,as.integer(length(fdrseq)),as.integer(B),as.integer(niter),as.double(t(zscore)),as.double(m),as.double(s),index,znclust,zclustsize,as.integer(nrow(x)),as.integer(ncol(x)),as.double(t(x)),as.integer(groups),as.integer(ncol(patterns)),as.double(a0),as.double(nu),as.double(balpha),as.double(nualpha),as.integer(length(probclus)),cluslist,as.double(t(probclus)),as.double(t(probpat)),as.integer(nrow(patterns)),as.integer(t(patterns)),ngrouppat,sumx,prodx,nobsx,sumxpred,prodxpred,nobsxpred,as.integer(gapprox))
  fdrest <- znp$efp
  fdrpar <- fdrseq[abs(fdrest-fdrmax)==min(abs(fdrest-fdrmax))][1]
  fdr <- fdrest[abs(fdrest-fdrmax)==min(abs(fdrest-fdrmax))][1]
  if (fdr>1.1*fdrmax) {
    warning('estimated FDR too large. Try decreasing fdrmax')
    #fdrpar <- 0
  }
  z <- .C("utgene_parC",u=u,d=d,fdr=fdr,fnr=fnr,power=power,threshold=threshold,util=util,as.double(cf),nsel,sel,as.double(t(v)),as.integer(ncol(v)),as.double(fdrpar))
  fdrest <- data.frame(fdrseq=fdrseq,fdrest=fdrest)
}

return(list(efp=z$u,d=z$d,fdr=fdr,fdrpar=fdrpar,fdrest=fdrest,fnr=z$fnr,power=z$power,threshold=z$threshold))

}
findgenes <- function(gg.fit,x,groups,fdrmax=.05,parametric=TRUE,B=500) { UseMethod("findgenes") }
fit.gg <- function(x,groups,patterns,nclust=1,method='Bayes',B=1000,priorpar,parini,trace=TRUE) {
# Fits GaGa or MiGaGa model via MCMC or empirical Bayes
# Input:
# - x: matrix with gene expression measurements for all groups
# - groups: vector of length ncol(x) indicating what group does each column in x correspond to
# - patterns: matrix indicating which groups are put together under each pattern, i.e. the hypotheses to consider for each gene. Defaults to two hypotheses: null hypothesis of all groups being equal and full alternative of all groups being different
# - nclust: number of clusters in hierarchical prior
# - method: 'Bayes' fits model via Gibbs' MCMC posterior simulation, 'EBayes' fits model via empirical Bayes
# - B: number of MCMC samples to obtain when method=='Bayes'. Ignored for method=='EBayes'.
# - priorpar: list with prior parameter values (a.alpha0,b.alpha0,a.nu,b.nu,a.balpha,b.balpha,a.nualpha,b.nualpha,p.probclus,p.probpat).
# - parini: list with initial values for hyper-parameters a0, nu, balpha, nualpha, probclus, probpat
# Output: an object of class gagafit, with components
# - parest: parameter estimates. Only returned if method=='EBayes', for method=='Bayes' one must call the function parest after fit.gg
# - mcmc: posterior draws for hyper-parameters. Only returned if method=='Bayes'.
# - lhood: for method=='Bayes' it is the log-likelihood evaluated at each MCMC iteration. For method=='EBayes' it is the log-likelihood evaluated at the maximum
# - nclust: number of clusters
# - patterns: same as input argument

gapprox <- TRUE
if (is(x, "ExpressionSet")) {
  if (is.character(groups)) { groups <- as.factor(pData(x)[, groups]) }
  x <- exprs(x)
} else if (!is(x,"data.frame") & !is(x,"matrix")) { stop("x must be an ExpressionSet, data.frame or matrix") }
if (min(x)<0) stop("x can only have positive values")
if (sum(is.na(x))>0) stop("x cannot have any NA values")
groups <- as.numeric(groups); groups <- as.integer(as.integer(factor(groups,levels=names(table(groups))))-1)
K <- as.integer(max(groups)+1)
if (ncol(x)!=length(groups)) stop('length(groups) must be equal to the number of columns in x')
if (missing(patterns)) patterns <- rbind(rep(0,K),0:(K-1))
if (length(table(groups))!=ncol(patterns)) stop('patterns must have the same number of columns as the number of distinct groups')
if (ncol(patterns)!=K) stop('patterns must have number of columns equal to the number of distinct elements in groups')
if (sum(is.na(patterns))>0) stop('patterns cannot have any NA values')
if (sum(is.nan(patterns))>0) stop('patterns cannot have any NaN values')
if (sum(is.infinite(patterns))>0) stop('patterns cannot have any Inf values')
for (i in 1:nrow(patterns)) { patterns[i,] <- as.integer(as.integer(as.factor(patterns[i,]))-1) }
class(patterns) <- 'gagahyp'
ngrouppat <- as.integer(apply(patterns,1,'max')+1)
nclust <- as.integer(nclust)
npat <- as.integer(nrow(patterns))

# Initialize parameters by method of moments
if (nclust==1) probclusini <- 1
if (missing(parini)) {
  if (trace) cat('Initializing parameters...')
  aest <- rowMeans(x)^2/((rowMeans(x^2)-rowMeans(x)^2)*ncol(x)/(ncol(x)-1)); lest <- 1/rowMeans(x)
  sel <- (aest<quantile(aest,probs=.99)) & (lest<quantile(lest,probs=.99))
  aest <- aest[sel]; lest <- lest[sel]
  balphaini <- as.double(mean(aest)^2/var(aest)); nualphaini <- as.double(mean(aest))
  if (nclust==1) {
    a0ini <- as.double(mean(lest)^2/var(lest)); nuini <- as.double(mean(lest))
    probclusini <- as.double(1)
  } else {
    clusini <- kmeans(x=lest,centers=nclust)$cluster
    a0ini <- as.double(tapply(lest,clusini,'mean')^2/tapply(lest,clusini,'var'))
    nuini <- as.double(tapply(lest,clusini,'mean'))
    probclusini <- as.double(table(clusini)/length(clusini))
  }
  if (trace) cat(' Done.\n')
  if (trace) cat('Refining initial estimates...')
  probpatini <- rep(1/nrow(patterns),nrow(patterns))
  for (i in 1:5) { probpatini <- colMeans(pp.gg(x,groups,a0ini,nuini,balphaini,nualphaini,probclusini,probpatini,patterns)$pp) }
  if (trace) cat(' Done.\n')
} else {
  if (is.null(parini$a0) | is.null(parini$nu) | is.null(parini$balpha) | is.null(parini$nualpha) | is.null(parini$probpat)) stop('some components of parini are empty')
  a0ini <- parini$a0; nuini <- parini$nu; balphaini <- parini$balpha; nualphaini <- parini$nualpha
  if (nclust==1) { probclusini <- 1 } else { if (is.null(parini$probclus)) { stop('component probclus of parini is empty') } else { probclusini <- parini$probclus/sum(parini$probclus) } }
  probpatini <- parini$probpat/sum(parini$probpat)
}


if (method=='EBayes') {

  if (nclust>1) stop('nclust>1 is not currently implemented for empirical Bayes method')
  if (trace) cat('Starting EM algorithm...\n')
  z <- fitfreq.gg(x,groups,patterns,nclust=1,a0ini,nuini,balphaini,nualphaini,probpatini,iter.max=100,trace=trace)
  parest <- c(alpha0=z$alpha0,nu=z$nu,balpha=z$balpha,nualpha=z$nualpha,probclus=1,probpat=z$probpat)
  gg.fit <- list(parest=parest,mcmc=as.mcmc(NA),lhood=z$lhood,nclust=nclust,patterns=patterns,method=method)
  class(gg.fit) <- 'gagafit'
  return(gg.fit)

} else if (method=='Bayes') {

  if (missing(priorpar)) {
    a.alpha0 <-  .0016; b.alpha0 <-  .0001; a.nu <-  .016; b.nu <-  .0016
    a.balpha <-  .004; b.balpha <- .001; a.nualpha <- .004; b.nualpha <- 20
    p.probclus <- as.double(rep(.1,nclust))
    p.probpat <- as.double(rep(.1,npat))
  } else {
    if (is.null(priorpar$a.alpha0) | (is.null(priorpar$b.alpha0)) | (is.null(priorpar$a.nu)) | (is.null(priorpar$b.nu)) | (is.null(priorpar$a.balpha)) | (is.null(priorpar$b.balpha)) | (is.null(priorpar$a.nualpha)) | (is.null(priorpar$b.nualpha))) stop('Some components of priorpar have not been specified')
    a.alpha0 <- priorpar$a.alpha0; b.alpha0 <- priorpar$b.alpha0
    a.nu <- priorpar$a.nu; b.nu <- priorpar$b.nu
    a.balpha <- priorpar$a.balpha; b.balpha <- priorpar$b.balpha
    a.nualpha <- priorpar$a.nualpha; b.nualpha <- priorpar$b.nualpha
    if (nclust>1) { p.probclus <- 1 } else { if (is.null(priorpar$p.probclus)) stop('component p.probclus of priorpar has not been specified') else p.probclus <- priorpar$p.probclus }
    p.probpat <- priorpar$p.probpat
  }

# Call MCMC sampling routine
  if (trace) cat('MCMC sampling...\n')
  balpha <- nualpha <- double(B); alpha0 <- nu <- probclus <- double(B*nclust); prob <- double(B*npat)
  lhood <- double(B); trace <- as.integer(trace)

  z <- .C("fit_ggC",alpha0=alpha0,nu=nu,balpha=balpha,nualpha=nualpha,probclus=probclus,prob=prob,lhood=lhood,as.integer(B),as.double(a.alpha0),as.double(b.alpha0),as.double(a.nu),as.double(b.nu),as.double(a.balpha),as.double(b.balpha),as.double(a.nualpha),as.double(b.nualpha),as.double(p.probclus),as.double(p.probpat),a0ini,nuini,balphaini,nualphaini,probclusini,probpatini,as.integer(nrow(x)),as.integer(ncol(x)),as.double(t(x)),as.integer(groups),K,nclust,npat,as.integer(t(patterns)),ngrouppat,as.integer(gapprox),trace)
  if (trace) cat('Done.\n')

  gg.fit <- list(parest=NA,mcmc=as.mcmc(data.frame(alpha0=matrix(z$alpha0,nrow=B,ncol=nclust,byrow=TRUE),nu=matrix(z$nu,nrow=B,ncol=nclust,byrow=TRUE),balpha=z$balpha,nualpha=z$nualpha,probclus=matrix(z$probclus,nrow=B,ncol=nclust,byrow=TRUE),probpat=matrix(z$prob,nrow=B,byrow=TRUE))),lhood=z$lhood,nclust=nclust,patterns=patterns,method=method)
  class(gg.fit) <- 'gagafit'
  return(gg.fit)

}

}
fitfreq.gg <- function(x,groups,patterns,nclust=1,a0ini,nuini,balphaini,nualphaini,probpatini,iter.max=100,trace=FALSE) {
# Fits Generalized Gamma/Gamma model by maximum likelihood (uses nlminb)
# Input:
# - x: matrix with gene expression measurements for all groups
# - groups: vector of length ncol(x) indicating what group does each column in x correspond to
# - patterns: matrix indicating which groups are put together under each pattern, i.e. the hypotheses to consider for each gene. Defaults to two hypotheses: null hypothesis of all groups being equal and full alternative of all groups being different
# - a0ini,nuini,balphaini,nualphaini,probpatini: initial values for hyper-parameters in optimization
# - iter.max: maximum number of iterations in the optimization process
# - trace: if trace==TRUE the optimization progress is displayed every iteration
# Output: a list with the following components
# - a0, nu, balpha, nualpha, prob: hyper-parameter estimates
# - pp: posterior probabilities of each expression pattern

gapprox <- TRUE
cluslist <- c(0,-1); probclus <- 1 #EM only implemented for 1 cluster

ilogit <- function(x) { return(1/(1+exp(-x))) }
logl <- function(th) {
a0 <- as.double(exp(th[1:nclust])); nu <- as.double(exp(th[(nclust+1):(2*nclust)]))
balpha <- as.double(exp(th[2*nclust+1])); nualpha <- as.double(exp(th[2*nclust+2]))
z <- pp.gg(x,groups,a0,nu,balpha,nualpha,probclus,prob,patterns)
return(-z$lhood)
}


#EM algorithm
a0 <- a0ini; nu <- nuini; balpha <- balphaini; nualpha <- nualphaini; prob <- probpatini
th <- double(2*nclust+2); th[1:nclust] <- log(a0); th[(nclust+1):(2*nclust)] <- log(nu)
th[2*nclust+1] <- log(balpha); th[2*nclust+2] <- log(nualpha)
i <- 1; thnorm <- lnorm <- lcur <- ldif <- 1
if (trace) cat("Iter",format('a0',width=8,justify='right'),format('nu',width=8,justify='right'),format('balpha',width=8,justify='right'),format('nualpha',width=8,justify='right'),format('prob',width=length(prob)*8+1,justify='right'),'log-likelihood',"\n")
while ((i<=iter.max) & ((thnorm>1e-3) | (lnorm>1e-5) | (ldif>1e-5))) {
z <- pp.gg(x,groups,a0,nu,balpha,nualpha,probclus,prob,patterns)
probold <- prob; prob <- colMeans(z$pp)
mstep <- nlminb(start=th,objective=logl,control=list(iter.max=50,trace=0,abs.tol=1e-5,rel.tol=1e-5,x.tol=1e-5))
thnorm <- sqrt( sum((mstep$par-th)^2) + sum((ilogit(prob[-1])-ilogit(probold[-1]))^2) )
lnorm <- abs(mstep$objective-lcur)/abs(lcur); ldif <- abs(mstep$objective-lcur)
th <- mstep$par; lcur <- mstep$objective
a0 <- exp(th[1:nclust]); nu <- exp(th[(nclust+1):(2*nclust)])
balpha <- exp(th[2*nclust+1]); nualpha <- exp(th[2*nclust+2])
if (trace>0) { cat(format(i,width=4,justify='right'),round(a0,6),round(nu,6),round(balpha,6),round(nualpha,6),format(round(prob,6),width=8),format(-lcur,nsmall=3),"\n") }
i <- i+1
}

z <- pp.gg(x,groups,a0,nu,balpha,nualpha,probclus,prob,patterns)
return(list(alpha0=a0,nu=nu,balpha=balpha,nualpha=nualpha,probpat=prob,lhood=z$lhood))
}
getpar.gagafit <- function(gg.fit) {
#returns parameter estimates from a 'gagafit' object in a named list
if (sum(is.na(gg.fit$parest)>0)) stop('Parameter estimates not available. Use function parest first.')
nclust <- gg.fit$nclust
return(list(a0=gg.fit$parest[1:nclust],nu=gg.fit$parest[(nclust+1):(2*nclust)],balpha=gg.fit$parest[2*nclust+1],nualpha=gg.fit$parest[2*nclust+2],probclus=gg.fit$parest[(2*nclust+3):(3*nclust+2)],probpat=gg.fit$parest[-1:-(3*nclust+2)]))
}
getpar <- function(gg.fit) { UseMethod("getpar") }
mcgamma <- function(a,b,c,d,r,s,newton=TRUE) {
# Computes moments for a conjugate gamma shape distribution */
# Input:
#  - a,b,c,d,r,s: parameters
#  - newton: newton==TRUE tries a Newton step to improve Gamma approximation
# Output: list with the following elements
#  - m: mean 
#  - v: variance 
#  - normk: normalization constant

if ((a<0) | (b<0) | (d<0) | (r<0) | (s<0)) stop('Parameters a,b,d,r,s must be >=0')
if (a==0) { 
  if (b-d<=0) stop('Non-valid parameters. b must be > than d')
  if (c<=0) stop('Non-valid parameters. c must be > 0')
} else {
  if (b+.5*a-.5<=0) stop('Non-valid parameters. b must be > .5-.5*a')
  if (c+a*log(s/a)<=0) stop('Non-valid parameters. c must be > a*log(a/s)')
}

a <- as.double(a); b <- as.double(b); c <- as.double(c); d <- as.double(d); r <- as.double(r); s <- as.double(s); newton <- as.integer(newton)
normk <- m <- as.double(-1); v <- double(1)
z <- .C("mcgammaC",normk=normk,m=m,v=v,a,b,c,d,r,s,newton)
return(list(m=z$m,v=z$v,normk=z$normk))

}
parest.gagafit <- function(gg.fit,x,groups,burnin,alpha=.05) {
# Parameter estimates and posterior probabilities of differential expression for GaGa and MiGaGa model
# Input:
# - gg.fit: GaGa model fit as returned by fit.gg
# - x:  matrix with gene expression measurements for all groups
# - groups: vector of length ncol(x) indicating what group does each column in x correspond to
# - burnin: if gg.fit was fit via MCMC, burnin indicates number of MCMC iterations to be discarded
# - alpha: credibility interval with 1-alpha posterior probability is returned
# Output:
# - a0, nu, balpha, nualpha, probclus, probpat: hyper-parameter estimates (posterior mean for Bayes, maximum likelihood estimate for EBayes)
# - ci: posterior credibility intervals for the hyper-parameter estimates (only available for Bayes fit)
# - nclust: number of clusters
# - pp: posterior probabilities of each expression pattern
# - patterns: matrix indicating which groups are put together under each pattern, as passed to fit.gg
# - dic: DIC (only returned for Bayes fit)

if (missing(x)) stop('argument x must be specified')
if (missing(groups)) stop('argument groups must be specified')
if (ncol(x)!=length(groups)) stop('length(groups) must be equal to the number of columns in x')

nclust <- gg.fit$nclust
if (gg.fit$method=='EBayes') {
  a0 <- gg.fit$parest[1]; nu <- gg.fit$parest[2]
  balpha <- gg.fit$parest[3]; nualpha <- gg.fit$parest[4]
  probclus <- 1; probpat <- gg.fit$parest[-1:-5]
  ci<-list(a0=NA,nu=NA,balpha=NA,nualpha=NA,probclus=NA,probpat=NA)
  pp <- pp.gg(x,groups,a0=a0,nu=nu,balpha=balpha,nualpha=nualpha,probclus=probclus,probpat=probpat,patterns=gg.fit$patterns)
  dic <- NA
} else {
  if (missing(burnin)) {
    warning('burnin not specified, discarding 25% of the MCMC samples')
    gg.fit$mcmc <- gg.fit$mcmc[-1:-(.25*nrow(gg.fit$mcmc)),]
    gg.fit$lhood <- gg.fit$lhood[-1:-(.25*nrow(gg.fit$mcmc))]
    lhood <- mean(gg.fit$lhood)
  } else {
    if (burnin>=nrow(gg.fit$mcmc)) stop('burnin must be smaller than the number of MCMC samples')
    gg.fit$mcmc <- gg.fit$mcmc[-1:-burnin,]
    gg.fit$lhood <- gg.fit$lhood[-1:-burnin]
    lhood <- mean(gg.fit$lhood)
  }

  if (nclust>1) {
    a0 <- colMeans(gg.fit$mcmc[,1:nclust])
    nu <- colMeans(gg.fit$mcmc[,(nclust+1):(2*nclust)])
    balpha <- mean(gg.fit$mcmc[,2*nclust+1])
    nualpha <- mean(gg.fit$mcmc[,2*nclust+2])
    probclus <- colMeans(gg.fit$mcmc[,(2*nclust+3):(3*nclust+2)])
    probpat <- colMeans(gg.fit$mcmc[,-1:-(3*nclust+2)])
    a0.ci <- apply(gg.fit$mcmc[,1:nclust],2,quantile,probs=c(alpha/2,1-alpha/2))
    nu.ci <- apply(gg.fit$mcmc[,(nclust+1):(2*nclust)],2,quantile,probs=c(alpha/2,1-alpha/2))
    balpha.ci <- quantile(gg.fit$mcmc[,2*nclust+1],probs=c(alpha/2,1-alpha/2))
    nualpha.ci <- quantile(gg.fit$mcmc[,2*nclust+2],probs=c(alpha/2,1-alpha/2))
    probclus.ci <- apply(gg.fit$mcmc[,(2*nclust+3):(3*nclust+2)],2,quantile,probs=c(alpha/2,1-alpha/2))
    probpat.ci <- apply(gg.fit$mcmc[,-1:-(3*nclust+2)],2,quantile,probs=c(alpha/2,1-alpha/2))
  } else {
    a0 <- mean(gg.fit$mcmc[,1:nclust])
    nu <- mean(gg.fit$mcmc[,(nclust+1):(2*nclust)])
    balpha <- mean(gg.fit$mcmc[,(2*nclust+1):(3*nclust)])
    nualpha <- mean(gg.fit$mcmc[,(3*nclust+1):(4*nclust)])
    probclus <- mean(gg.fit$mcmc[,(4*nclust+1):(5*nclust)])
    probpat <- colMeans(gg.fit$mcmc[,-1:-(5*nclust)])
    a0.ci <- quantile(gg.fit$mcmc[,1:nclust],probs=c(alpha/2,1-alpha/2))
    nu.ci <- quantile(gg.fit$mcmc[,(nclust+1):(2*nclust)],probs=c(alpha/2,1-alpha/2))
    balpha.ci <- quantile(gg.fit$mcmc[,2*nclust+1],probs=c(alpha/2,1-alpha/2))
    nualpha.ci <- quantile(gg.fit$mcmc[,2*nclust+2],probs=c(alpha/2,1-alpha/2))
    probclus.ci <- c(1,1)
    probpat.ci <- apply(gg.fit$mcmc[,-1:-(3*nclust+2)],2,quantile,probs=c(alpha/2,1-alpha/2))
  }

  gg.fit$parest <- c(a0=a0,nu=nu,balpha=balpha,nualpha=nualpha,probclus=probclus,probpat)
  ci<-list(a0=a0.ci,nu=nu.ci,balpha=balpha.ci,nualpha=nualpha.ci,probclus=probclus.ci,probpat=probpat.ci)
  pp <- pp.gg(x,groups,a0=a0,nu=nu,balpha=balpha,nualpha=nualpha,probclus=probclus,probpat=probpat,patterns=gg.fit$patterns)
  dic <- -2*(2*lhood-pp$lhood)
}

gg.fit$ci <- ci; gg.fit$pp <- pp$pp; gg.fit$dic <- dic
return(gg.fit)
}
parest <- function(gg.fit,x,groups,burnin,alpha=.05) { UseMethod("parest") }
powclasspred.gagafit <- function(gg.fit,x,groups,prgroups,v0thre=1,ngene=100,B=100) {
# Estimates expected probability that a future sample is correctly classified.
# Input
# - gg.fit: fitted Gamma/Gamma model
# - x: vector with observations used to fit the model. It's really a matrix with genes in rows and samples in cols, entered in row order.
# - groups: vector indicating what group each column in x corresponds to
# - prgroups: prior probabilities for each group. Defaults to equally probable groups.
# - v0thre: genes with v0>v0thre (prob of being equally expressed across all groups) are not used to classify samples. If no genes make the threshold, the classification is based on the gene with the smallest v0.
# - ngene: number of genes to be used to classify sample, starting with those having lowest prob of being equally expressed across all groups
# - B: maximum number of MC iterations to be used
# Output
# - ccall: estimated overall probability of correctly classifying a sample
# - seccall: standard error of the ccall estimate
# - ccgroup: estimated probability of correctly classifying a sample from group k
# - segroup: standard error of the ccgroup estimate

gapprox <- TRUE
patterns <- gg.fit$patterns
if (is(x,"ExpressionSet")) {
  if (is.character(groups)) { groups <- as.factor(pData(data)[, groups]) }
  x <- exprs(x)
} else if (!is(x,"data.frame") & !is(x,"matrix")) { stop("x must be an ExpressionSet, data.frame or matrix") }
if (ncol(x)!=length(groups)) stop('Argument groups must have length equal to number of columns in argument x')
par <- getpar(gg.fit)
a0 <- as.double(par$a0); nu <- as.double(par$nu); balpha <- as.double(par$balpha)
nualpha <- as.double(par$nualpha); probclus <- as.double(par$probclus); probpat <- as.double(par$probpat); nclust <- as.integer(length(probclus))
if (nrow(patterns)!=length(probpat)) stop('Argument patterns must be equal to the length of gg.fit$prob')
if ((missing(genelimit)) & (v0thre==1)) warning("You selected to use all genes. It's recommended to narrow the selection with the arguments v0thre and genelimit")
if (missing(genelimit)) { genelimit <- nrow(x); }

genelimit <- as.integer(genelimit); v0thre <- as.double(v0thre)
usesel <- as.integer(0); nsel <- as.integer(0); sel <- integer(nrow(x))

npat <- as.integer(nrow(patterns))
groups <- as.integer(as.integer(as.factor(groups))-1); K <- as.integer(max(groups)+1)
if (missing(prgroups)) prgroups <- rep(1/K,K)
if (ncol(patterns)!=K) stop('patterns must have number of columns equal to the number of distinct elements in groups')
for (i in 1:nrow(patterns)) { patterns[i,] <- as.integer(as.integer(as.factor(patterns[i,]))-1) }
ngrouppat <- as.integer(apply(patterns,1,'max')+1)
ncolsumx <- as.integer(sum(ngrouppat))
sumx <- prodx <- double(nrow(x)*ncolsumx); nobsx <- double(ncolsumx)
usesumx <- as.integer(0); gapprox <- as.integer(gapprox)

ccall <- seccall <- double(1); ccgroup <- double(K); ngroup <- integer(K); preceps <- as.double(0)

v <- pp.gg(x=x,groups=groups,a0=a0,nu=nu,balpha=balpha,nualpha=nualpha,probclus=probclus,probpat=probpat,patterns=patterns)$pp

z <- .C("utsample_ggC",ccall=ccall,seccall=seccall,ccgroup=ccgroup,ngroup=ngroup,as.integer(B),preceps,genelimit,v0thre,nsel,sel,usesel,as.integer(nrow(x)),as.integer(ncol(x)),as.double(t(x)),as.integer(groups),as.double(t(v)),K,as.double(prgroups),a0,nu,balpha,nualpha,nclust,probclus,probpat,npat,as.integer(t(patterns)),ngrouppat,ncolsumx,sumx,prodx,nobsx,usesumx,gapprox)

ccgroup <- z$ccgroup/z$ngroup
return(list(ccall=z$ccall,seccall=z$seccall,ccgroup=ccgroup,segroup=ccgroup*(1-ccgroup)/sqrt(z$ngroup)))

}
powclasspred <- function(gg.fit,x,groups,prgroups,v0thre=1,ngene=100,B=100) { UseMethod("powclasspred") }
pp.gg <- function(x,groups,a0,nu,balpha,nualpha,probclus,probpat,patterns) {
# Computes posterior probabilities of DE from Gamma/Gamma model given data x and hyper-param estimate
# Input:
# - x:  matrix or ExpressionSet with gene expression measurements for all groups
# - groups: vector of length ncol(x) indicating what group does each column in x correspond to
# - a0: alpha0. Shape parameter for the prior distribution (must be >0)
# - nu: nu. Scale parameter for the prior distribution (must be >0)
# - balpha: estimate for b parameter in hyper-prior for alpha
# - nualpha:
# - probclus: mixing probabilities for hierarchical prior on (alpha,lambda)
# - probpat: prior probability of each expression pattern
# - patterns: matrix indicating which groups are put together under each pattern (K cols).
# Output:
# - pp: posterior probability of each expression pattern for each gene (genes in rows, patterns in columns)
# - lhood: log-likelihood of the observed data given the hyper-parameter values
# Note:
# - This function performs the analogous calculations as 'postprob' from library 'EBarrays', but uses the generalized Gamma/Gamma model
#   and allows for the hyper-parameters to have been estimated with data other than x

gapprox <- TRUE
if (is(x,"ExpressionSet")) {
  if (is.character(groups)) { groups <- as.factor(pData(data)[, groups]) }
  x <- exprs(x)
} else if (!is(x,"data.frame") & !is(x,"matrix")) { stop("x must be an ExpressionSet, data.frame or matrix") }
groups <- as.integer(as.integer(as.factor(groups))-1); K <- as.integer(max(groups)+1)
if (ncol(x)!=length(groups)) stop('length(groups) must be equal to the number of columns in x')
if (missing(a0)) stop('a0 must be specified')
if (missing(nu)) stop('nu must be specified')
if (missing(balpha)) stop('balpha must be specified')
if (missing(nualpha)) stop('nualpha must be specified')
if (!is.vector(probclus)) stop('probclus must be a vector')
if (!is.vector(probpat)) stop('probpat must be a vector')
if ((length(a0)!=length(nu)) || (length(a0)!=length(probclus))) stop('a0,nu and probclus must have the same length')
if (length(balpha)>1 || length(nualpha)>1) stop('balpha and nualpha must be vectors of length 1')

if (ncol(patterns)!=K) stop('patterns must have number of columns equal to the number of distinct elements in groups')
for (i in 1:nrow(patterns)) { patterns[i,] <- as.integer(as.integer(as.factor(patterns[i,]))-1) }
ngrouppat <- as.integer(apply(patterns,1,'max')+1)
v <- double(nrow(x)*nrow(patterns)); lhood <- double(1)
usesumx <- as.integer(0)
sumx <- double(nrow(x)*sum(ngrouppat)); prodx <- double(nrow(x)*sum(ngrouppat)); nobsx <- double(sum(ngrouppat))
sumxpred <- double(nrow(x)*sum(ngrouppat)); prodxpred <- double(nrow(x)*sum(ngrouppat)); nobsxpred <- double(sum(ngrouppat));
nsel <- nrow(x); sel <- as.integer(0:(nsel-1))
cluslist <- as.integer(c((0:(length(probclus)-1)),-1))
z <- .C("pp_ggC",v=v,lhood=lhood,nsel,sel,as.integer(ncol(x)),as.double(t(x)),groups,as.integer(ncol(patterns)),as.double(a0),as.double(nu),as.double(balpha),as.double(nualpha),as.integer(length(probclus)),cluslist,as.double(t(probclus)),as.double(t(probpat)),as.integer(nrow(patterns)),as.integer(t(patterns)),ngrouppat,sumx,prodx,nobsx,sumxpred,prodxpred,nobsxpred,usesumx,as.integer(gapprox))
v <- matrix(z$v,nrow=nrow(x),byrow=TRUE)

return(list(pp=v,lhood=z$lhood))

}
print.gagafit <- function(x,...) {

if (x$nclust==1) cat("GaGa hierarchical model.") else cat("MiGaGa hierarchical model (",round(x$nclust)," clusters.",sep="")
if (x$method=='EBayes') cat(" Fit via empirical Bayes\n") else cat(" Fit via MCMC (",nrow(x$mcmc)," iterations kept)\n",sep="")

if (is.null(x$pp)) {
  cat("  ",ncol(x$patterns)," groups, ",nrow(x$patterns)," hypotheses (expression patterns)\n\n",sep="") } else {
  cat("  ",nrow(x$pp)," genes, ",ncol(x$patterns)," groups, ",nrow(x$patterns)," hypotheses (expression patterns)\n\n",sep="")
}
cat("The expression patterns are\n")
print(x$patterns)
cat("\n")
if (is.null(x$parest)==FALSE) {
  cat("Hyper-parameter estimates\n\n")
  par <- getpar(x)
  cat("  ",names(x$parest)[1:(2+2*x$nclust)],"\n")
  cat("  ",round(par$a0,3),round(par$nu,3),round(par$balpha,3),round(par$nualpha,3),"\n\n")
  cat("  ",names(x$parest)[(2+2*x$nclust+1):(2+3*x$nclust)],"\n")
  cat("  ",round(par$probclus,3),"\n\n")
  cat("  ",names(x$parest)[-1:-(2+3*x$nclust)],"\n")
  cat("  ",round(par$probpat,3),"\n")
}
if (is.null(x$pp)) { cat("Posterior probabilities not available. Run function parest.\n") }
if (is.null(x$parest)) { cat("Hyper-parameter estimates not available. Run function parest.\n") }
}
print.gagahyp <- function(patterns) {
  groups <- paste("Group",1:ncol(patterns))
  for (i in 1:nrow(patterns)) {
    cnt <- length(table(patterns[i,]))
    cat("  Pattern ",i-1,": ",sep="")
    for (j in 0:(cnt-1)) {
      if (j<(cnt-1)) { eq <- c(rep(" =",sum(patterns[i,]==j)-1),"!=") } else { eq <- c(rep("=",sum(patterns[i,]==j)-1),"\n") }
      cat(paste(groups[patterns[i,]==j],eq))
    }
  }
}
rcgamma <- function(n,a,b,c,d,r,s,newton=TRUE) {
# Generates random draws from a conjugate gamma shape distribution by approximating it with a Gamma */
# Input:
# - n: number of random draws to generate
# - a,b,c,e,r,s: parameters
#  - newton: newton==TRUE tries a Newton step to improve Gamma approximation
# Output:
# - x: vector of length n with the random draws

if ((a<0) | (b<0) | (d<0) | (r<0) | (s<0)) stop('Parameters a,b,d,r,s must be >=0')
if (a==0) { 
  if (b-d<=0) stop('Non-valid parameters. b must be > than d')
  if (c<=0) stop('Non-valid parameters. c must be > 0')
} else {
  if (b+.5*a-.5<=0) stop('Non-valid parameters. b must be > .5-.5*a')
  if (c+a*log(s/a)<=0) stop('Non-valid parameters. c must be > a*log(a/s)')
}

n <- as.integer(n); a <- as.double(a); b <- as.double(b); c <- as.double(c); d <- as.double(d); r <- as.double(r); s <- as.double(s); newton <- as.integer(newton)

x <- double(n)
z <- .C("rcgammaC",x=x,n,a,b,c,d,r,s,newton)
return(z$x)

}
sim.gg <- function(n,m,p.de=.1,a0,nu,balpha,nualpha,probclus=1,a=NA,l=NA,useal=FALSE) {
  # Simulates data from the GaGa model with 2 groups
  # Input:
  # - n: number of genes
  # - m: number of observations per group
  # - p.de: proportion of genes differentially expressed
  # - a0, nu: prior for mean is IG(a0,a0/nu).
  # - balpha, nualpha: prior for alpha is G(balpha,balpha/nualpha)
  # Output:
  # - x: matrix with simulated observations for group 1 in first m columns and group 2 in last m columns
  # - l: matrix with lambda parameters used to generate x and y in column 1 and 2, respectively

  if (n<=0) stop("Number of genes must be positive")
  if (sum(m<0)>0) stop("Number of observations per group must be positive")

  if (useal==FALSE) {
    if (p.de<0 & p.de>1) stop("proportion of differentially expressed genes must be between 0 and 1")
    if (balpha<=0 | nualpha<=0) stop("balpha and nualpha must be >0")
    if (min(a0)<=0 | min(nu)<=0) stop(cat("a0 and nu must be >0 but a0=",a0,", nu=",nu,"was specified \n"))

    a0 <- rep(a0,round(probclus*n)); nu <- rep(nu,round(probclus*n))
    balpha <- rep(balpha,length(a0)); nualpha <- rep(nualpha,length(nu))
 
    a <- l <- matrix(NA,nrow=n,ncol=length(m))
    a[,1] <- rgamma(n,balpha,balpha/nualpha); l[,1] <- 1/rgamma(n,a0,a0/nu)
    if (ncol(a)>1) {
      l[,2] <- l[,1]; a[,2] <- a[,1]
      if ((round((1-p.de)*n)+1)<=n) {   #generate parameter values for DE genes
        sel <- sample(1:n,round(p.de*n),replace=FALSE)
        a[sel,2] <- rgamma(round(p.de*n),balpha[sel],balpha[sel]/nualpha[sel])
        l[sel,2] <- 1/rgamma(round(p.de*n),a0[sel],a0[sel]/nu[sel])
      }
    }
  }
  x <- matrix(rgamma(m[1]*n,a[,1],a[,1]/l[,1]),nrow=n,ncol=m[1])
  i <- 2
  while (i<=length(m)) {
    x <- cbind(x,matrix(rgamma(m[i]*n,a[,i],a[,i]/l[,i]),nrow=n,ncol=m[i]))
    i <- i+1
  }

  return(list(x=x,a=a,l=l))
}
simnewsamples.gagafit <- function(gg.fit,groupsnew,sel,x,groups) {
# Simulates parameter values and new observations for a select subset of genes and the group indicated by groups from a Gamma/Gamma model
# - groupsnew: vector indicating the groups that each new observation will belong to
# - sel: numeric vector indicating the indexes of the genes we want to draw new samples for (defaults to all genes). If a logical vector is indicated, it is converted to (1:nrow(x))[sel]
# - x: matrix with observations used to fit the model
# - groups: vector of length ncol(x) indicating what group does each column in x correspond to
# - gg.fit: GaGa fit, as returned by parest.gagafit
# Output
# - xnew: matrix of nsel rows and nsamples cols with obs drawn from the predictive for groups given by groupsnew.
# - dnew: matrix of nsel rows and nsamples cols with expression pattern indicators drawn from the posterior.
# - anew: matrix of nsel rows and nsamples cols with alpha parameters from the posterior for groups given by groupsnew.
# - lnew: matrix of nsel rows and nsamples cols with lambda parameters from the posterior for groups given by groupsnew.

gapprox <- TRUE
if (is(x,"ExpressionSet")) {
  if (is.character(groups)) { groups <- as.factor(pData(data)[, groups]) }
  x <- exprs(x)
} else if (!is(x,"data.frame") & !is(x,"matrix")) { stop("x must be an ExpressionSet, data.frame or matrix") }

patterns <- gg.fit$patterns
v <- gg.fit$pp
if ((max(groupsnew)>max(groups)) | (min(groupsnew)<min(groups))) stop('Groups indicated in groupsnew do not match with those indicated in groups')
groupstemp <- factor(groups)
groupsnew <- factor(groupsnew,levels=levels(groupstemp))
groups <- as.integer(as.integer(groupstemp)-1); K <- as.integer(max(groups)+1)
groupsnew <- as.integer(as.integer(groupsnew)-1)
if (ncol(x)!=length(groups)) stop('length(groups) must be equal to the number of columns in x')
if (!is.matrix(v)) stop('Argument v must be a matrix')
if (ncol(v)!=nrow(patterns)) stop('Argument v must have as many columns as rows has patterns')
if (nrow(x)!=nrow(v)) stop('Arguments x and v must have the same number of rows')

if (ncol(patterns)!=K) stop('patterns must have number of columns equal to the number of distinct elements in groups')
for (i in 1:nrow(patterns)) { patterns[i,] <- as.integer(as.integer(as.factor(patterns[i,]))-1) }
ngrouppat <- as.integer(apply(patterns,1,'max')+1)
par <- getpar(gg.fit)
alpha0 <- as.double(par$a0); nu <- as.double(par$nu); balpha <- as.double(par$balpha)
nualpha <- as.double(par$nualpha)
nclust <- as.integer(length(gg.fit$a0)); rho <- as.double(par$probclus)
sumx <- prodx <- double(nrow(x)*sum(ngrouppat)); nobsx <- double(sum(ngrouppat))
gapprox <- as.integer(gapprox)

if (missing(sel)) sel <- 1:nrow(x)
if (is.logical(sel)) sel <- (1:nrow(x))[sel]
sel <- as.integer(sel-1) #in C indices start at 0
nsel <- length(sel); nsamples <- length(groupsnew)
xnew <- anew <- lnew <- double(nsel*nsamples); dnew <- integer(nsel*nsamples)

z <- .C("compute_sumxC",sumx=t(sumx),prodx=t(prodx),nobsx=nobsx,nsel,sel,as.integer(sum(ngrouppat)),ncol(x),as.double(t(x)),groups,K,as.integer(nrow(patterns)),as.integer(t(patterns)),ngrouppat,as.integer(1))
sumx <- matrix(z$sumx,nrow=nrow(x),byrow=TRUE); prodx <- matrix(z$prodx,nrow=nrow(x),byrow=TRUE)
nobsx <- z$nobsx

z <- .C("simnewsamples_ggC",xnew=xnew,dnew=dnew,anew=anew,lnew=lnew,nsamples,groupsnew,K,nsel,sel,alpha0,nu,balpha,nualpha,nclust,rho,as.double(t(v)),as.integer(nrow(patterns)),as.integer(t(patterns)),ngrouppat,t(sumx),t(prodx),nobsx,gapprox)

return(list(xnew=matrix(z$xnew,nrow=nsel,byrow=TRUE),dnew=matrix(z$dnew,nrow=nsel,byrow=TRUE),anew=matrix(z$anew,nrow=nsel,byrow=TRUE),lnew=matrix(z$lnew,nrow=nsel,byrow=TRUE)))

}
simnewsamples <- function(gg.fit,groupsnew,sel,x,groups) { UseMethod("simnewsamples") }
