\name{ecoli.m52.genome}
\alias{ecoli.m52.genome}
\alias{ecoligenomeCHRLOC}
\alias{ecoligenomeSYMBOL2AFFY}
\alias{ecoligenomeSYMBOL}
\alias{ecoligenomeSTRAND}
\alias{ecoli.operon}
\alias{ecoli.len}
%\docType{data}
\title{ Escherichia coli data}
\description{
  Meta-data related to Escherichia coli
}
\usage{
data(ecoli.m52.genome)
data(ecoligenomeCHRLOC)
data(ecoligenomeSYMBOL2AFFY)
data(ecoligenomeSYMBOL)
data(ecoligenomeSTRAND)
data(ecoli.operon)
ecoli.len
}
\format{
  The format for \code{ecoli.m52.genome} is \code{character}
  with genome sequence.
  The format for \code{ecoligenomeCHRLOC} is
  an environment (as a hash table). Each key
  is an Affyemtrix probe set ID, and each value is vector of
  two integers (begining and end - see the details
  below)
  The format for \code{ecoligenomeSYMBOL2AFFFY}
  is an environment (as a hash table). Each key is
  a gene symbol name.
  The format for \code{ecoligenomeSYMBOL}
  is an environment (as a hash table). Each key
  is an Affymetrix probe set id
  \code{ecoli.len} is a variable containing the size of the genome in
  \code{ecoli.m52.genome}.
}
\details{
  The environments \code{ecoligenomeSYMBOL2AFFFY} and
  \code{ecoligenomeSYMBOL} are like the ones in the
  data packages built by \code{annBuilder}.
  
  The environment
  \code{ecoligenomeCHRLOC} differs: two integers are associated
  with each key, one corresponds to the begining of the segment
  the other to the end.

  The environment ecoligenomeSTRAND returns a \code{logical}.
  \code{TRUE} means that the orientation is `+', \code{FALSE} means
  that the orientation is '-' (and \code{NA} is used when irrelevant
  for the key).
  
}
\source{
  \url{http://www.genome.wisc.edu/sequencing/k12.htm} and
  \url{http://www.biostat.harvard.edu/complab/dchip/info_file.htm}
}

\examples{
data(ecoli.m52.genome)


}
\keyword{datasets}

\eof
\name{ecoligenome.operon}
\alias{ecoligenome.operon}
\docType{data}
\title{ Known operon in E.coli - data.frame}
\description{
  The known operon in the Escherichia coli genome.
}
\usage{data(ecoligenome.operon)}
\format{
  A data frame with 932 observations (genes) on the following 4 variables.
  \describe{
    \item{gene.name}{a character vector}
    \item{gene.annotation}{a character vector}
    \item{operon.name}{a factor with levels the names of the operons}
    \item{operon.comments}{a factor with levels the comments for the operons}
  }
}
\details{
  For some operons, the source of information specifies the existence of
  regulating elements such as promoter, terminator, box, etc\ldots
  In those cases, the \code{gene.name} is set to \code{"Regulation"},
  and the \code{gene.annotation} gives what kind of regulating element
  it is. If volonteers, it would be neat to map those on the genome\ldots
  Besides that, not much to add. The data structure is fairly straightforward.
}
\source{
  Built from the webpage:
  \url{http://www.cib.nig.ac.jp/dda/backup/taitoh/ecoli.operon.html}
}
% \references{
%   ~~ possibly secondary sources and usages ~~
% }
\examples{
library(Biobase)
data(ecoligenome.operon)
data(ecoligenomeSYMBOL2AFFY)

## something that might be useful when working with Affymetrix data:
## get the Affymetrix identifiers for the probe sets bundled in operons
## (see the vignette for more details)
ecoligenome.operon$affyid <-
unname(unlist(multiget(ecoligenome.operon$gene.name,
                       ecoligenomeSYMBOL2AFFY)))

}
\keyword{datasets}

\eof
\name{ecoligenomeBNUM}
\alias{ecoligenomeBNUM}
\alias{ecoligenomeBNUM2SYMBOL}
\alias{ecoligenomeBNUM2ENZYME}
\alias{ecoligenomeBNUM2GENETYPE}
\alias{ecoligenomeBNUM2GENBANK}
\alias{ecoligenomeBNUM2GENEPRODUCT}
\alias{ecoligenomeSYMBOL2BNUM}
\docType{data}
\title{ Environment for 'bnum' identifiers}
\description{
  Environments to associate Affymetrix probe set IDs with 'bnum' IDs
}
\usage{
data(ecoligenomeBNUM)
data(ecoligenomeBNUM2SYMBOL)
data(ecoligenomeBNUM2ENZYME)
data(ecoligenomeBNUM2GENETYPE)
data(ecoligenomeBNUM2GENBANK)
data(ecoligenomeBNUM2GENEPRODUCT)
data(ecoligenomeSYMBOL2BNUM)
}
\format{
  These are \code{environment} objects.
}
\details{
  Escherichia coli genes are sometimes identified by 'bnum's. This
  identfier is typically a 'b' followed by digits.  
}
\source{
  BNUM numbers were parsed out of the Affymetrix identifiers.
  BNUM2* were obtained from the GenProtEC website.
}
\keyword{datasets}

\eof
\name{ecoligenomeBNUM2MULTIFUN}
\alias{ecoligenomeBNUM2MULTIFUN}
\alias{ecoligenomeBNUM2STRAND}
\docType{data}
\title{ Environment }
\description{
  An environment to store associtations between 'bnum' identifiers (key)
  and 'MultiFun' identifiers (or strand information).
}
\usage{data(ecoligenomeBNUM2MULTIFUN)}
\format{
  The format is:
length 0 <environment>
 - attr(*, "comments")= chr "GenProtEC: MultiFun assignments for E. coli modules
September 17th, 2003"
}
\details{
  'MultiFun' is a classification scheme. The structure is
  'approximately tree-like'.
  Several 'MultiFun' numbers can be assigned to one 'bnum'.
}
\source{
  "http://genprotec.mbl.edu/files/MultiFun.txt"
}
\keyword{datasets}

\eof
\name{gccontent}
\alias{gccontent}
\title{ function to compute gccontent }
\description{
  A simple R function to compute the GC content of a sequence
}
\usage{
gccontent(x)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
  \item{x}{ a vector of mode \code{character} }
}
\details{
  This a simple (and not particularly fast) function to compute the GC
  content of sequence. When speed is an issue, one should use the
  function in the package \code{matchprobes}. This function only exists
  to avoid dependency on this package.
}
\value{
  The GC content (\code{numeric})
}
%\seealso{ ~~objects to See Also as \code{\link{~~fun~~}}, ~~~ }
\keyword{ manip }% at least one, from doc/KEYWORDS

\eof
\name{linkedmultiget}
\alias{linkedmultiget}
\title{ A function to look for values across linked environments}
\description{
  A function to look for values across linked environments}
}
\usage{
linkedmultiget(x, envir.list = list(), unique = TRUE)
}

\arguments{
  \item{x}{The keys in the first environment in the list.}
  \item{envir.list}{A list of environments.}
  \item{unique}{Simplify the list returned by ensuring that the values
    for each key are unique.}
}
\details{
  Environments can be considered as hashtables. The keys are obviously
  strings, but in some cases the associated values are also
  strings. This is the case for annotation environments (as built with
  the package \code{AnnBuilder}). This function helps to look for values
  across several environments: the keys have associated values in a
  first environment, these values are used as keys in the second
  environments, etc...
}
\value{
  A list of length the length of \code{x}.
}
\author{ Laurent Gautier }
\seealso{ \code{\link[Biobase]{multiget}} }
\examples{
data(ecoligenomeBNUM)
data(ecoligenomeBNUM2MULTIFUN)
data(multiFun)

## get 5 Affymetrix IDs
set.seed(456)
my.affyids <- sample(ls(ecoligenomeBNUM), 5)

## get the MULTIFUN annotations for them
r <- linkedmultiget(my.affyids, list(ecoligenomeBNUM,
                    ecoligenomeBNUM2MULTIFUN, multiFun))

print(r)
}
\keyword{ manip }

\eof
\name{multiFun}
\alias{multiFun}
\alias{ecoligenomeMULTIFUN2GO}
\docType{data}
\title{multiFun classification}
\description{
  The MultiFun classification scheme
}
\usage{
data(multiFun)
data(ecoligenomeMULTIFUN2GO)
}
\format{
These are environments.
}
\details{
  
}
\source{
\url{http://genprotec.mbl.edu/files/MultiFun.txt}
}
\examples{
## To be done...
}
\keyword{datasets}

\eof
\name{pointsCircle}
\alias{linesCircle}
\alias{polygonDisk}
\alias{arrowsArc}
\alias{pointsArc}
\alias{linesArc}
\alias{polygonArc}
\title{ Functions to plot circular related figures }
\description{
  Functions to plot circular related figures
}
\usage{
linesCircle(radius, center.x = 0, center.y = 0, edges = 300, ...)
polygonDisk(radius, center.x = 0, center.y = 0, edges=300,
...)
arrowsArc(theta0, theta1, radius, center.x = 0, center.y = 0, edges = 10,
          length = 0.25, angle = 30, code = 2, ...)
pointsArc(theta0, theta1, radius, center.x = 0, center.y = 0, ...)
linesArc(theta0, theta1, radius, center.x = 0, center.y = 0, ...)
polygonArc(theta0, theta1, radius.in, radius.out,
           center.x = 0, center.y = 0,
           edges = 10,
           col = "black",
           border = NA,
           ...)
}
\arguments{
  \item{theta0, theta1}{ start and end angles for the arc}
  \item{radius}{ radius of the circle }
  \item{radius.in}{ inner radius }
  \item{radius.out}{ outer radius }
  \item{center.x, center.y}{Coordinates for the center of the circle
    (default to (0, 0))}
  \item{edges}{ number of edges the shape is made of }
  \item{col} {color}
  \item{border}{border (see \code{\link[base]{polygon}})}
  \item{length, angle, code}{see the corresponding parameters for the
    function \code{\link{arrows}}}
  \item{\dots}{ optional graphical paramaters}
}
\details{
  Details to come\ldots for now the best to run the examples and experiment by
  yourself\ldots
%%  The functions \code{linesArc}, \code{pointsArc}, \code{arrowsArc} 
}
\value{
  Function only used for their border effects.
}
\author{ laurent }
\examples{
par(mfrow=c(2,2))
n <- 10
thetas <- rev(seq(0, 2 * pi, length=n))

rhos <- rev(seq(1, n) / n)

xy <- polar2xy(rhos, thetas)
colo <- heat.colors(n)

plot(0, 0, xlim=c(-2, 2), ylim=c(-2, 2), type="n")
for (i in 1:n)
  linesCircle(rhos[i]/2, xy$x[i], xy$y[i])

plot(0, 0, xlim=c(-2, 2), ylim=c(-2, 2), type="n")
for (i in 1:n)
  polygonDisk(rhos[i]/2, xy$x[i], xy$y[i], col=colo[i])

plot(0, 0, xlim=c(-2, 2), ylim=c(-2, 2), type="n", xlab="", ylab="")
for (i in 1:n)
  polygonArc(0, thetas[i],
             rhos[i]/2, rhos[i],
             center.x = xy$x[i], center.y = xy$y[i], col=colo[i])

plot(0, 0, xlim=c(-2, 2), ylim=c(-2, 2), type="n", xlab="", ylab="")
for (i in (1:n)[-1]) {
  linesCircle(rhos[i-1], col="gray", lty=2)
  polygonArc(thetas[i-1], thetas[i],
             rhos[i-1], rhos[i], col=colo[i],
             edges=20)
  arrowsArc(thetas[i-1], thetas[i],
             rhos[i] + 1, col=colo[i],
             edges=20)
}
  
}
\keyword{ aplot }

\eof
\name{polar2xy}
\alias{polar2xy}
\alias{xy2polar}
\alias{rotate}
%- Also NEED an '\alias' for EACH other topic documented here.
\title{ Functions to perform polar coordinate related functions}
\description{
  Functions to perform polar coordinate related functions
}
\usage{
polar2xy(rho, theta)
xy2polar(x, y)
rotate(x, y, alpha)
}
%- maybe also 'usage' for other objects documented here.
\arguments{
  \item{x}{ cartesian coordinate }
  \item{y}{ cartesian coordinate  }
  \item{rho}{ polar radius \code{rho} }
  \item{theta}{ polar angle \code{theta}}
  \item{alpha}{ angle to perform rotation }
}
\details{
  \code{y} and \code{theta} can be respectively missing. In this case,
  \code{x} and \code{rho} are expected to be lists with entries
  \code{x, y}, \code{rho, theta} respectively.
}
\value{
  %Lists of
  %\item x,y
  %or
  %\item rho, theta.
}
\examples{


n <- 40
nn <- 2
thetas <- seq(0, nn * 2 * pi, length=n)

rhos <- seq(1, n) / n

plot(c(-1, 1), c(-1, 1), type="n")
abline(h=0, col="grey")
abline(v=0, col="grey")

xy <- polar2xy(rhos, thetas)

points(xy$x, xy$y, col=rainbow(n))

}
\keyword{ manip}

\eof
\name{polygonChrom}
\alias{polygonChrom}
\alias{linesChrom}
\alias{cPlotCircle}
\alias{chromPos2angle}
\title{ Functions to plot circular chromosomes informations }
\description{
  Functions to plot circular chromosomes informations
}
\usage{
cPlotCircle(radius=1, xlim=c(-2, 2), ylim=xlim, edges=300, main=NULL,
            main.inside, ...)

chromPos2angle(pos, len.chrom, rot=pi/2, clockwise=TRUE)

polygonChrom(begin, end, len.chrom, radius.in, radius.out,
             total.edges = 300,
             edges = max(round(abs(end - begin)/len.chrom *
                     total.edges), 2, na.rm = TRUE),
             rot = pi/2, clockwise = TRUE, ...)

linesChrom(begin, end, len.chrom, radius,
             total.edges = 300,
             edges = max(round(abs(end - begin)/len.chrom *
                     total.edges), 2, na.rm = TRUE),
             rot = pi/2, clockwise = TRUE, ...)

ecoli.len
}

\arguments{
  \item{radius}{radius}
  \item{xlim, ylim}{ range for the plot. Can be used to zoom-in a
    particular region.}
  \item{pos}{position (nucleic base coordinate)}
  \item{begin}{ begining of the segment (nucleic base number). }
  \item{end}{ end of the segment (nucleic base number). }
  \item{len.chrom}{ length of the chromosome in base pairs }
  \item{radius.in}{ inner radius }
  \item{radius.out}{ outer radius }
  \item{total.edges}{ total number of edges for the chromosome}
  \item{edges}{ number of edges for the specific segment(s) }
  \item{rot}{ rotation (default is \code{pi / 2}, bringing the angle
    zero at 12 o'clock)}
  \item{clockwise}{ rotate clockwise. Default to \code{TRUE}.  }
  \item{main, main.inside}{main titles for the plot}
  \item{\dots}{ optional graphical parameters }
}
\details{
  The function \code{chromPos2angle} is a convenience function.
  The variable ecoli.len contains the size of the Escheria coli genome
  considered (K12).
}
\value{
  Except \code{chromPos2angle}, the function are solely used for their
  border effects.
}
\author{ laurent <laurent@cbs.dtu.dk>}
\seealso{  }
\examples{

data(ecoligenomeSYMBOL2AFFY)
data(ecoligenomeCHRLOC)

## find the operon lactose ("lac*" genes)
lac.i <- grep("^lac", ls(ecoligenomeSYMBOL2AFFY))
lac.symbol <- ls(ecoligenomeSYMBOL2AFFY)[lac.i]
lac.affy <- unlist(lapply(lac.symbol, get, envir=ecoligenomeSYMBOL2AFFY))

beg.end <- lapply(lac.affy, get, envir=ecoligenomeCHRLOC)
beg.end <- matrix(unlist(beg.end), nc=2, byrow=TRUE)

lac.o <- order(beg.end[, 1])

lac.i <- lac.i[lac.o]
lac.symbol <- lac.symbol[lac.o]
lac.affy <- lac.affy[lac.o]
beg.end <- beg.end[lac.o, ]

lac.col <- rainbow(length(lac.affy))

par(mfrow=c(2,2))

## plot

cPlotCircle(main="lac genes")
polygonChrom(beg.end[, 1], beg.end[, 2], ecoli.len, 1, 1.2, col=lac.col)
rect(0, 0, 1.1, 1.1, border="red")

cPlotCircle(xlim=c(0, 1.2), ylim=c(0, 1.1))
polygonChrom(beg.end[, 1], beg.end[, 2], ecoli.len, 1, 1.1, col=lac.col)
rect(0.4, 0.8, 0.7, 1.1, border="red")

cPlotCircle(xlim=c(.45, .5), ylim=c(.85, 1.0))
polygonChrom(beg.end[, 1], beg.end[, 2], ecoli.len, 1, 1.03, col=lac.col)

mid.genes <- apply(beg.end, 1, mean)
mid.angles <- chromPos2angle(mid.genes, ecoli.len)
xy <- polar2xy(1.03, mid.angles)
xy.labels <- data.frame(x = seq(0.45, 0.5, length=4), y = seq(0.95, 1.0, length=4))
segments(xy$x, xy$y, xy.labels$x, xy.labels$y, col=lac.col)
text(xy.labels$x, xy.labels$y, lac.symbol, col=lac.col)

}
\keyword{ hplot}
\keyword{ dplot}

\eof
\name{wstringapply}
\alias{wstringapply}
\title{ Apply a function on a window sliding on a string }
\description{
  Apply a function on a window sliding on a string }
}
\usage{
wstringapply(x, SIZE, SLIDE, FUN, ...)
}
\arguments{
  \item{x}{ The string }
  \item{SIZE}{ The size of the window (number of characters). }
  \item{SLIDE}{ offset to move at each slide}
  \item{FUN}{ The function to be applied }
  \item{\dots}{ optional parameter for the function \code{FUN} }
}
\details{
  Apply the function \code{FUN} to substrings of \code{x} of length \code{SIZE}.
}
\value{
  A list of size \code{nchar(x) - SIZE}.
}
\author{ ~~who you are~~ }
\examples{
}
\keyword{ manip }


\eof
