.packageName <- "apComplex"

#A function to decide whether or not to combine complexes based on changes
# in the product of L and C

#currently this function only includes the Fisher's exact portion for complexes with less than 20 proteins. For larger complexes, the binomial criteria is adequate.

LCdelta <- function(comp1,comp2,cMat,dataMat,baitList,simMat,mu,alpha,beta,wsVal=20000000){

N <- dim(dataMat)[1]
M <- dim(dataMat)[2]-N


#compute likelihood contribution for separate complexes

#first for comp1
tP1 <- names(which(cMat[,comp1]==1))   
tB1 <- tP1[which(tP1 %in% baitList)]

nB1 <- length(tB1)
nH1 <- length(tP1) - nB1

temp1 <- matrix(dataMat[tB1,tP1],ncol=(nB1+nH1))

sim1 <- matrix(simMat[tB1,tP1],ncol=(nB1+nH1))
adjBin1 <- sum(temp1*(mu+alpha+beta*sim1)-log(1+exp(mu+alpha+beta*sim1)))-
		nB1*(mu+alpha+beta-log(1+exp(mu+alpha+beta)))

X1 <- sum(temp1)-nB1

b11 <- colSums(matrix(temp1[,1:nB1],ncol=nB1))
b11[1:nB1] <- b11-1
b01 <- (nB1-1)-b11
h11 <- NULL
h01 <- NULL

if(nH1>0){
	h11 <- colSums(matrix(temp1[,(nB1+1):(nB1+nH1)],ncol=nH1))
	h01 <- nB1 - h11
}

bh01 <- c(b01,h01)
bh11 <- c(b11,h11)



#repeat for comp2
tP2 <- names(which(cMat[,comp2]==1))
tB2 <- tP2[which(tP2 %in% baitList)]

nB2 <- length(tB2)
nH2 <- length(tP2) - nB2

temp2 <- matrix(dataMat[tB2,tP2],ncol=(nB2+nH2))

sim2 <- matrix(simMat[tB2,tP2],ncol=(nB2+nH2))
adjBin2 <- sum(temp2*(mu+alpha+beta*sim2)-log(1+exp(mu+alpha+beta*sim2)))-
		nB2*(mu+alpha+beta-log(1+exp(mu+alpha+beta)))



X2 <- sum(temp2)-nB2

b12 <- colSums(matrix(temp2[,1:nB2],ncol=nB2))
b12[1:nB2] <- b12-1
b02 <- (nB2-1)-b12
h12 <- NULL
h02 <- NULL

if(nH2>0){
	h12 <- colSums(matrix(temp2[,(nB2+1):(nB2+nH2)],ncol=nH2))
	h02 <- nB2 - h12
}

bh02 <- c(b02,h02)
bh12 <- c(b12,h12)



cX1 <- lgamma(nB1*(nB1+nH1-1)+1) - lgamma(X1+1) - lgamma(nB1*(nB1+nH1-1)-X1+1)
cX2 <- lgamma(nB2*(nB2+nH2-1)+1) - lgamma(X2+1) - lgamma(nB2*(nB2+nH2-1)-X2+1)


#compute likelihood contribution if left as 2 complexes

lK <- cX1+cX2+adjBin1+adjBin2


#combine complexes and compute likelihood contribution

tP <- names(which(rowSums(cMat[,c(comp1,comp2)])>0))
tB <- tP[which(tP %in% baitList)]

nB <- length(tB)
nH <- length(tP) - nB

temp <- matrix(dataMat[tB,tP],ncol=(nB+nH))

sim <- matrix(simMat[tB,tP],ncol=(nB+nH))
adjBin <- sum(temp*(mu+alpha+beta*sim)-log(1+exp(mu+alpha+beta*sim)))-
		nB*(mu+alpha+beta-log(1+exp(mu+alpha+beta)))



X <- sum(temp)-nB

b1 <- colSums(matrix(temp[,1:nB],ncol=nB))
b1[1:nB] <- b1-1
b0 <- (nB-1)-b1
h1 <- NULL
h0 <- NULL

if(nH>0){
	h1 <- colSums(matrix(temp[,(nB+1):(nB+nH)],ncol=nH))
	h0 <- nB - h1
}


bh0 <- c(b0,h0)
bh1 <- c(b1,h1)


cX <- lgamma(nB*(nB+nH-1)+1) - lgamma(X+1) - lgamma(nB*(nB+nH-1)-X+1)

#compute likelihood contribution if combined into 1 complex
lKm1 <- cX+adjBin

#find adjustments for edges that used to be in "no complex" bin
#make sure dataMat and cMat have their rows and columns in the same order

theseComps <- which(colSums(cMat[tP,])>0)

exCadjMat <- (1*(cMat[tP,theseComps] %*% t(cMat[tP,theseComps])) >0)[tB,]
diag(exCadjMat) <- 1


part4 <-  sum((matrix(1,nB,(nB+nH))-exCadjMat)*(dataMat[tB,tP]*alpha-
	log(1+exp(mu+alpha+beta*simMat[tB,tP])) + 
	log(1+exp(mu+beta*simMat[tB,tP]))))


ans <- lKm1-lK+part4

#if total size of complex is less than 20 proteins, 
#include Fisher's exact component
if((nB+nH)<20){

	ans <- ans + log(fisher.test(rbind(bh0,bh1),workspace=wsVal)$p.value) - 
		log(fisher.test(rbind(bh01,bh11),workspace=wsVal)$p.value) -
		log(fisher.test(rbind(bh02,bh12),workspace=wsVal)$p.value)

}
return(ans)
}


 

#function decompose a bait-hit adjacency matrix (made symmetric for baits)
#into complex memberships

#adjMat has dimensions N by (N+M) corresponding to N baits and M hits
#adjMat is named with row and column names corresponding to proteins

#this function uses another function called reduceMat 
#any columns that are subsets of other columns are eliminated


bhmaxSubgraph <- function(adjMat){

	#record the order of the columns of adjMat so the order of the
	#rows of the affiliation matrix will match the original column order
	rowOrder <- colnames(adjMat)

	N <- dim(adjMat)[1]
	M <- dim(adjMat)[2] - N

	#make diagonal entries equal to 1
	diag(adjMat) <- 1
	
	#first find baits that only have hit pairs  
	#will put these in at the end

	hitComps <- which(rowSums(adjMat[,1:N])==1)
	baitComps <- c(1:N)
	if(length(hitComps)>0) 	baitComps <- c(1:N)[-hitComps]
	

	Nbait <- length(baitComps)

	#reorder so that hit only complexes are last
	adjMat <- adjMat[c(baitComps,hitComps),
				c(baitComps,hitComps,(N+1):(N+M))]

	compMat <- NULL

	for (n in 1:Nbait){

	newComp <- as.matrix(c(rep(0,n-1),adjMat[n,n:(N+M)]))

	changeThese <- which(compMat[n,]==1)
	n2c <- length(changeThese)

	if(n2c>0){

	newVecs <- NULL
	lose <- rep(FALSE,n2c)

	for (i in 1:n2c){
		
		maybeChange <- compMat[,changeThese[i]]

		if(sum(maybeChange[(n+1):(N+M)]>newComp[(n+1):(N+M)])>0){

		lose[i] <- TRUE		

		newComp1 <- maybeChange
		newComp1[n] <- 0

		newComp2 <- c(maybeChange[1:n],
		pmin(maybeChange[(n+1):(N+M)],newComp[(n+1):(N+M)]))

		newVecs <- cbind(newVecs,cbind(newComp1,newComp2))
		
		
		}

	}
	newComp <- as.matrix(cbind(newVecs,newComp))
	if(sum(lose)>0) compMat <- as.matrix(compMat[,-changeThese[lose]])

	}

	compMat <- cbind(compMat,newComp)
	
	}

	print("reducing matrix")
	compMat <- reduceMat(compMat,compare="less")

	if (Nbait<N) compMat <- cbind(compMat,t(adjMat[(Nbait+1):N,]))

	newcolnames <- paste("bhmax",1:dim(compMat)[2],sep="")
	colnames(compMat) <- newcolnames
	compMat <- compMat[rowOrder,]

	return(compMat)
}


# a function to run the entire algorithm at once


findComplexes <- function(adjMat,simMat=NULL,sensitivity=.75,specificity=.995, beta=0){

	##find number of baits and number of hits

	N <- dim(adjMat)[1]
	M <- dim(adjMat)[2]-N

	##set parameters for logistic regression model

	mu <- log((1-specificity)/specificity)
	alpha <- log(sensitivity/(1-sensitivity))-mu

	##create simMat of zeroes with diagonal of ones if one is not specified

	if(is.null(simMat)) {
		simMat <- matrix(0,N,N+M)
		diag(simMat) <- 1
		colnames(simMat) <- colnames(adjMat)
		rownames(simMat) <- colnames(adjMat)
	}

	##make first N columns of adjMat symmetric to use with bhmaxSubgraph

	adjMatSym <- adjMat
	adjMatSym[,1:N] <- pmax(adjMatSym[,1:N],t(adjMatSym[,1:N])) 
	diag(adjMatSym) <- 1

	##find maximal BH-complete subgraphs for initial 
	##protein complex membership graph estimate

	PCMG <- bhmaxSubgraph(adjMatSym)

	##combine complexes using LC measure

	#put PCMG in order by number of baits in complex
	
	baitOrder <- order(colSums(PCMG[1:N,]),decreasing=T)
	PCMGo <- PCMG[,baitOrder]
	
	#merge complex estimates using LCdelta criteria
	PCMG2 <- mergeComplexes(PCMGo,adjMat=adjMat,simMat=simMat,beta=beta)

	return(PCMG2)

}



 
##propose complex combinations and compare to LxC measure

##PCMG is an initial estimate for the PCMG, presumably obtained 
##by finding bhMaximal subgraphs



mergeComplexes <- function(PCMG,adjMat,simMat=NULL,sensitivity=.75,specificity=.995,beta=0){

	bNames <- rownames(adjMat)
	diag(adjMat) <- 1

	mu <- log((1-specificity)/specificity)
	alpha <- log(sensitivity/(1-sensitivity))-mu

	N <- dim(adjMat)[1]
	M <- dim(adjMat)[2]-N
	if(is.null(simMat)) {simMat <- matrix(0,N,N+M);diag(simMat) <- 1}

	i <- 1 
	K <- dim(PCMG)[2]

	keepgoing <- i < K
   
 
	while(keepgoing){

	keepgoing2 <- TRUE

	while(keepgoing2){

	testset <- which(colSums(PCMG[,i]*PCMG)/
					pmax(colSums(PCMG),sum(PCMG[,i]))>.25)
	testset <- testset[-which(testset==i)]
	Ktemp <- length(testset)

	
	if(Ktemp>0 & Ktemp!=0){
	LCIncs <- rep(0,Ktemp)

	for (m in 1:Ktemp){
	
	LCIncs[m] <- LCdelta(i,testset[m],PCMG,dataMat=adjMat,
				baitList=bNames,simMat=simMat,
				mu=mu,alpha=alpha,beta=beta)
	
	
	
	}
	
	same <- sum(LCIncs>0)>0
	
	if(same){
		thisone <- testset[which.max(LCIncs)]
		PCMG[,i] <- pmax(PCMG[,i],PCMG[,thisone])
		PCMG <- PCMG[,-thisone]
		K <- dim(PCMG)[2]
		if(thisone<i) i <- i-1
		

	}
	}
	else same <- FALSE
	keepgoing2 <- same

	}

	
	i <- i+1
	keepgoing <- i < K
}
return(PCMG)
}
#function to eliminate columns that are subsets or identically equal 
#to other columns

#if working in identical mode, will keep the column that appears first 

reduceMat <- function(mat,compare="equal"){

	#first order columns from largest column sum to smallest column sum
	mat <- mat[,order(colSums(mat),decreasing=FALSE)]

	#take off colnames of mat
	colnames(mat) <- NULL

	if(compare=="equal"){

		i <- 1
		nCols <- dim(mat)[2]
		
		continue1 <- i<nCols

		while(continue1){
	
		Vec <- mat[,i]
		lose <- whichVecInMat(Vec,mat)
		if(length(lose)>1){
			lose <- lose[-1]
			mat <- mat[,-lose]
			nCols <- dim(mat)[2]
		}
		i <- i+1
		continue1 <- i <= nCols
		}	
	matfull <- mat
	}


	if(compare=="less"){

		i <- 1
		nCols <- dim(mat)[2]

		#find columns with singly affiliated members
		temp <- which(rowSums(mat)==1)

		matfull <- mat
		if(length(temp)>0){			

			if(length(temp)==1){
			notestset <- which(mat[temp,]==1)
			}
			if(length(temp)>1){
			notestset <- which(colSums(mat[temp,])>0)
			}

			mat <- mat[-temp,]
		}
				

		continue1 <- i<nCols
		lose <- NULL

		while(continue1){

		Vec <- mat[,i]
		test <- vecInMat(Vec,as.matrix(mat[,(i+1):nCols]),
							compare="less")
		if(test) lose <- c(lose,i) 
		i <- i+1		
		continue1 <- i<nCols
		#print(paste("i =",i))
		}

		if(length(temp)>0) lose <- lose[!(lose %in% notestset)]
		mat <- mat[,-lose]

		
	
	}	

	matfull <- matfull[,-lose]

	return(matfull)
}
#function to eliminate columns that are subsets of other columns
#this is a modified version of reduceMat
#it requires a vector that says which columns have not been compared 
#to the previously reduced matrix

#new vec contains a 1 for a new column and 0 for an previously reduced one
#the order of the entries in newvec should correspond to the columns of mat

reduceMatNew <- function(mat,newvec,compare="equal"){

	oldCols <- which(newvec==0)
	newCols <- which(newvec==1)

	matKeep <- as.matrix(mat[,oldCols])
	
	if(compare=="equal"){
		
		lose <- NULL
		for (i in 1:length(newCols)){

		Vec <- mat[,newCols[i]]
		test <- vecInMat(Vec,matKeep,compare="equal")
		
		if(test) lose <- c(lose,newCols[i])
		}
		if(length(lose)>0) mat <- mat[,-lose]

	}

	if(compare=="less"){

		lose <- NULL

		for (i in 1:length(newCols)){

		Vec <- mat[,newCols[i]]
		test <- vecInMat(Vec,matKeep,compare="less")
		
		if(test){
			lose <- c(lose,newCols[i])
		}

		}

		if(!is.null(lose)) {mat <- mat[,-lose]; newvec <- newvec[-lose]}

		#check to see if columns in the previously reduced matrix
		#are strictly less than a new column

		if(sum(newvec)>0){
	
			lose <- NULL
			oldCols <- which(newvec==0)
			newCols <- which(newvec==1)
			matNew <- as.matrix(mat[,newCols])

			for (j in 1:length(oldCols)){
			temp <- mat[,oldCols[j]]
			test <- vecInMat(temp,matNew,compare="less")
			if(test) lose <- c(lose,oldCols[j])
			}

			if(!is.null(lose)) mat <- mat[,-lose]
		}
	}
	return(mat)
}
#function to see if a vector x is identical to (or strictly less than) at least one of the columns in a matrix mat


vecInMat <- function(x,mat,compare="equal"){

	(length(x)==dim(mat)[1]) || stop("vector length must equal row dimension of matrix")

	if(compare=="equal") condFunc <- function(temp) sum(x != temp)==0
	if(compare=="less") condFunc <- function(temp) sum(x > temp)==0
	if(compare=="greater") condFunc <- function(temp) sum(x < temp)==0



	n <- dim(mat)[2]

	test <- FALSE
	i <- 1
	while (!test & i<=n){

	temp <- mat[,i]
	
	test <- condFunc(temp)

	i <- i+1
	}

	return(test)
}

#function to report which of the columns in a matrix mat x is equal to (or strictly less than)


#will return the column name, or the index if the matrix is unnamed

whichVecInMat <- function(x,mat,compare="equal"){

	A <- colnames(mat)

	if(compare=="equal") compFun <- function(y) sum(x != y)==0
	if(compare=="less") compFun <- function(y) sum(x > y)==0
	if(compare=="greater") compFun <- function(y) sum(x < y)==0

	v <- apply(mat,2,FUN = compFun)

	w <- which(v==1)
	if(!is.null(A)) w <- A[w]

	return(w)
}

.First.lib <- function(libname,pkgname,where){
	require("graph") || stop("Need package graph")
}

