\name{AminoAcidAlphabet-class}
\docType{class}
\alias{AminoAcidAlphabet-class}
\alias{BioStringNewValues,AminoAcidAlphabet,numeric-method}

\title{Class "NucleotideAlphabet" represents alphabets used in DNA or RNA}
\description{ Each object of class "NucleotideAlphabet" represents an
  amino acid alphabet. This usually also contains the gap character
  represented by \sQuote{-}.}
\section{Slots}{
  \describe{
    \item{\code{letters}:}{Object of class \code{"character"}
      representing the letters of the alphabet. Usually roman
      upper case letters are used. The one exception is the gap
      character which is always \sQuote{-}}
    \item{\code{mapping}:}{Object of class \code{"integer"} representing
      the encoding used to represent these objects internally. }
  }
}
\section{Extends}{
Class \code{"BioAlphabet"}, directly.
}
\section{Methods}{
  \describe{
    \item{BioStringNewValues(alphabet, length.string)}{Given \code{alphabet}
      of class "AminoAcidAlphabet" return a new uninitialized
      BioString object for that alphabet with length
      \code{length.string}. See \code{\link{BioString-class}} for more
      details.}
  }
}
\author{ Saikat DebRoy }
%\examples{ FIXME: add example
%}
\keyword{classes}

\eof
\name{BioAlphabet-class}
\docType{class}
\alias{BioAlphabet-class}
\alias{initialize,BioAlphabet-method}
\alias{gapletter<-,BioAlphabet,character-method}

\title{Class "BioAlphabet" represents alphabets used in biology }
\description{ Each object of class "BioAlphabet" represents a particular
  alphabet used to represent some biological sequence.}
\section{Objects from the Class}{A virtual Class: No objects may be
  created from it.}
\section{Slots}{
  \describe{
    \item{\code{letters}:}{Object of class \code{"character"}
      representing the letters of the alphabet. Usually roman
      upper case letters are used. The one exception is the gap
      character which is always \sQuote{-}}
    \item{\code{mapping}:}{Object of class \code{"integer"} representing
      the encoding used to represent these objects internally. }
    \item{\code{gap}:}{Object of class \code{"character"}, the gap
      character in the alphabet. }
  }
}
\section{Methods}{
  \describe{
    \item{initialize(.Object, letters)}{Initialize an alphabet
      object. for the given letters. If one of the letters is not the
      gap character \sQuote{-}, then it is added as the first letter.
      An encoding also created which is stored in the \code{mapping}
      slot. }
    \item{gapletter(x) <- value}{Change the letter representing gaps in
      \code{x} of class "BioPatternAlphabet" to \code{value} which must
      be a single letter not in the base alphabet of \code{x}.}
  }
}
\author{ Saikat DebRoy }
\seealso{
  \code{\link{NucleotideAlphabet-class}} and
  \code{\link{AminoAcidAlphabet-class}}, for two subclasses of the
  \code{"BioAlphabet"} class.
}
\examples{
new("NucleotideAlphabet", letters=c('A', 'G', 'C', 'T'))
new("NucleotideAlphabet", letters=c('-', 'A', 'G', 'C', 'T'))
}
\keyword{classes}

\eof
\name{BioPatternAlphabet-class}
\docType{class}
\alias{BioPatternAlphabet-class}
\alias{BioStringNewValues,BioPatternAlphabet,numeric-method}
\alias{gapletter<-,BioPatternAlphabet,character-method}
\alias{initialize,BioPatternAlphabet-method}

\title{Class representing alphabets used in patterns for matching
  biological sequences }
\description{ Each object of class "BioPatternAlphabet" represents an
  alphabet for patterns that is used to match a specific type of
  biological sequence.}
\section{Objects from the Class}{
  Objects can be created by calls of the form
  \code{new("BioPatternAlphabet", baseAlphabet, letters)} where
  \code{letters} is a named character vector with the names being single
  letters not in the base alphabet and the value corresponding to
  each name are strings made of letters from the base alphabet. Each
  such name value pair defines a new letter in the pattern alphabet
  which matches all the letters in its value string.
}
\section{Slots}{
  \describe{
    \item{\code{baseAlphabet}:}{Object of class \code{"BioAlphabet"},
      the base alphabet that for the letters matched by members of this
      pattern alphabet. }
    \item{\code{letters}:}{Object of class \code{"character"}
      representing the letters of the alphabet. Usually roman
      upper case letters are used. The one exception is the gap
      character which is always \sQuote{-}.}
    \item{\code{mapping}:}{Object of class \code{"integer"} representing
      the encoding used to represent these objects internally. }
    \item{\code{gap}:}{Object of class \code{"character"}, the gap
      character in the alphabet. }
  }
}
\section{Extends}{
Class \code{"BioAlphabet"}, directly.
}
\section{Methods}{
  \describe{
    \item{BioStringNewValues(alphabet, length.string)}{Given \code{alphabet}
      of class "BioPatternAlphabet" return a new uninitialized
      "BioString" object for that alphabet with length
      \code{length.string}. See \code{\link{BioString-class}} for more
      details.}
    \item{gapletter(x) <- value}{Change the letter representing gaps in
      \code{x} of class "BioPatternAlphabet" to \code{value} which must
      be a single letter not in the base alphabet of \code{x}.}
    \item{initialize(.Object, ...)}{Method to initialize an object of
      class "BioPatternAlphabet". Usually used indirectly via a call to
      \code{\link[methods]{new}}}
  }
}
\author{ Saikat DebRoy }
\seealso{
  \code{\link{BioAlphabet-class}}
}
\examples{
dnaAlph <- new("BioPatternAlphabet",
               DNAAlphabet(), c(N="AGCT",
                                B="CGT",
                                D="AGT",
                                H="ACT",
                                K="GT",
                                M="AC",
                                R="AG",
                                S="CG",
                                V="ACG",
                                W="AT",
                                Y="CT"))
dnaAlph
}
\keyword{classes}

\eof
\name{BioString-class}
\docType{class}
\alias{BioString-class}
\alias{[,BioString-method}
\alias{[,BioString,ANY,ANY,ANY-method}
\alias{[[,BioString-method}
\alias{allSameLetter,BioString,character-method}
\alias{allSameLetter,BioString,BioString-method}
\alias{anySameLetter,BioString,character-method}
\alias{anySameLetter,BioString,BioString-method}
\alias{as.character,BioString-method}
\alias{as.matrix,BioString-method}
\alias{initialize,BioString-method}
\alias{length,BioString-method}
\alias{matchDNAPattern,BioString,BioString-method}
\alias{nchar,BioString-method}
\alias{show,BioString-method}
\alias{substr,BioString-method}
\alias{substring,BioString-method}

\title{Class "BioString", represents a biological sequence}
\description{Class "BioString", contains an encoded string representing a
  biological sequence for a particular alphabet (RNA, DNA or amino
  acid). It represents zero or more substrings of the full string.}
\section{Objects from the Class}{
  Objects can be created by calls of the form
  \code{new("BioString", alphabet, end, start, values, initialized, ...)}.
  However, it is recommended that users should not call this directly.
  For now, use the function \code{\link{NucleotideString}} to create
  objects of class "BioString" that uses a nucleotide alphabet (RNA or
  DNA) and the function \code{\link{DNAString}} for objects using DNA
  alphabet.}
}
\section{Slots}{
  \describe{
    \item{\code{alphabet}:}{Object of class \code{"BioAlphabet"},
      the alphabet used in the sequence. }
    \item{\code{initialized}:}{Object of class \code{"logical"},
      \code{TRUE} if the sequence initialized with values. Users should
      not modify this slot directly. }
    \item{\code{offsets}:}{Object of class \code{"matrix"} and storage
      mode "integer", this stores (in two columns) the start and end
      points of the substrings represented in \code{x}. Rows with the
      first value \code{1} and the second value{0} represent empty
      substrings.}
    \item{\code{values}:}{Object of class \code{"externalptr"}, this
      internally stores the actual encoded sequence as a vector. As
      objects of class "externalptr" are passed by value in R, this
      saves copying of long sequences. }
  }
}
\section{Methods}{
  \describe{
    \item{initialize(.Object, alphabet,
      offsets=cbind(1, 0),
      values=BioStringNewValues(alphabet, end),
      initialized=!missing(values))}{Construct an object of class
      "BioString". Usually not called directly by users. }
    \item{length(x)}{Return the number of substrings represented by
      \code{x}.}
    \item{x[i]}{Return the substrings in \code{x} corresponding to index
      \code{i}.}
    \item{x[[i]]}{Return the substring in \code{x} corresponding to the
      index \code{i}. The index \code{i} must be of length \code{1}.}
    \item{nchar(x)}{Return the number of characters in each substring
      represented in \code{x}.}
    \item{show(object)}{Display \code{object} of class "BioString".}
    \item{as.character(x)}{Convert a "BioString" object to a character
      vector using its native alphabet.}
    \item{as.matrix(x)}{Return a two-column matrix of integers, the
      first column representing the start index and the scond column
      representing the end index of the substrings in the full
      string.}
    \item{substr(x, start, stop)}{Return another BioString object with
      value equivalent to \code{substr(as.character(x), start, stop)}.}
    \item{substring(text, first, last)}{Return another BioString object with
      value equivalent to
      \code{substring(as.character(text), first, last)}.}
    \item{matchDNAPattern(pattern, x, algorithm, mismatch)}{Match the DNA string
      \code{x} against \code{pattern} using \code{algorithm}. The pattern
      can use the letters A,C,G,T,- (the last being the gap character)
      and also the wildcards N (matching A,C,G,T), V (matching A,G,C),
      R (matching A,G) and Y (matching C,T).}
    \item{allSameLetter(x, letter)}{Return a logical vetor indicating
      which of the elements of \code{x} are entirely made up of the
      letter \code{letter}.}
  }
}
\author{Saikat DebRoy}
\section{The structure of the values slot}{
  The \code{values} slot of the "BioString" class is of class
  "externalptr". It always contains an R vector object in its tag
  field. The other fields are not used at present. The vector in the tag
  field is either a \code{CHARSXP} or an \code{INTSXP}. The exact type
  depends on the length of the alphabet. \code{INTSXP} is used if it is
  more than the number of bits in a C \code{char} type and
  \code{CHARSXP} is used otherwise.

  We use the \code{i}-th bit in the \code{char} or \code{int} (depending
  on whether the vector is of type CHARSXP or INTSXP) to represent the
  \code{i}-th letter in the alphabet where \code{i=0} represents the
  first bit. This effectively means that we can have at most \code{32}
  letters (including gap) in our alphabets for all standard computer
  architectures.
}
\seealso{
  \code{\link{BioAlphabet-class}} and its subclasses for valid alphabet
  objects.
  \code{\link{DNAString}} for creating objects of class "BioString"
  representing DNA sequences.
  \code{\link{NucleotideString}} for creating objects of class "BioString"
  representing DNA or RNA sequences.
}
\examples{
new("BioString", DNAAlphabet()) # creates an empty DNA string
x <- DNAString("AAGCTANA", gap="N")
x
as.character(x)
substr(x, 2, 4)
substring(x, 1, seq(length=nchar(x))) # all prefixes of x
substring(x, seq(length=nchar(x)), nchar(x)) # all suffixes of x
matchDNAPattern("GC", x)
x <- substring(x, 1:3, 3:5)
x[1:2]
x[-3] # same as x[1:2]
x[[3]]
}
\keyword{classes}

\eof
\name{BioStringNewValues}
\alias{BioStringNewValues}
\title{generic to do create an object suitable for the values slot of
  "BioStrings" class}
\description{
  This generic creates an external vector object that contains (in its
  tag field) an R vector (of type either \code{INTSXP} or
  \code{CHARSXP}) of the given length. The actual type is determined
  according to the size of the alphabet.
}
\usage{
BioStringNewValues(alphabet, length.string)
}
\arguments{
  \item{alphabet}{ An object of class "BioAlphabet" }
  \item{length.string}{ An integer, the length of the storage in the result }
}
\value{
  An external pointer.
}
\author{Saikat DebRoy}
\seealso{\code{\link{BioString-class}} for the class which uses this
  generic during its initialization and \code{\link{BioAlphabet-class}}
  for the class of the \code{alphabet} parameter.}
\examples{
BioStringNewValues(DNAAlphabet(), 0)
}
\keyword{classes}
\keyword{internal}

\eof
\name{DNAAlphabet}
\alias{DNAAlphabet}
\alias{RNAAlphabet}
\title{ functions to create DNA and RNA alphabets }
\description{
  The function \code{DNAAlphabet} returns an alphabet consisting of the
  letters \sQuote{A}, \sQuote{C}, \sQuote{G}, \sQuote{T} and \sQuote{-} (the last representing a gap).

  The function \code{RNAAlphabet} returns an alphabet consisting of the
  letters \sQuote{A}, \sQuote{C}, \sQuote{G}, \sQuote{U} and \sQuote{-} (the last representing a gap).
}
\usage{
DNAAlphabet()
RNAAlphabet()
}
\value{
  An object of class "NucleotideAlphabet". See
  \code{\link{NucleotideAlphabet-class}} for details.
}
\author{ Saikat DebRoy }
\seealso{ \code{\link{DNAPatternAlphabet}} and
  \code{\link{RNAPatternAlphabet}} return alphabets for standrad
  patterns used in DNA and RNA sequences. }
\examples{
DNAAlphabet()
RNAAlphabet()
}
\keyword{classes}

\eof
\name{DNAPatternAlphabet}
\alias{DNAPatternAlphabet}
\alias{RNAPatternAlphabet}
\title{ functions to create DNA and RNA pattern alphabets }
\description{
  The function \code{DNAAlphabet} returns an alphabet consisting of
  standard patterns used in DNA sequences.

  The function \code{RNAAlphabet} returns an alphabet consisting of
  standard patterns used in RNA sequences.
}
\usage{
DNAPatternAlphabet()
RNAPatternAlphabet()
}
\section{Memebers of the pattern alphabets}{
  In the table below, we provide the letters from the base DNA
  alphabet that are matched by the added letters in the DNA pattern
  alphabet. The corresponding table for RNA is identical except for the
  substitution of \sQuote{U} for \sQuote{T}.

  \tabular{ll}{
    \sQuote{N}\tab matches \sQuote{A}, \sQuote{C}, \sQuote{G}, \sQuote{T}\cr
    \sQuote{B}\tab matches \sQuote{C}, \sQuote{G}, \sQuote{T}\cr
    \sQuote{D}\tab matches \sQuote{A}, \sQuote{G}, \sQuote{T}\cr
    \sQuote{H}\tab matches \sQuote{A}, \sQuote{C}, \sQuote{T}\cr
    \sQuote{K}\tab matches \sQuote{G}, \sQuote{T}\cr
    \sQuote{M}\tab matches \sQuote{A}, \sQuote{C}\cr
    \sQuote{R}\tab matches \sQuote{A}, \sQuote{G}\cr
    \sQuote{S}\tab matches \sQuote{C}, \sQuote{G}\cr
    \sQuote{V}\tab matches \sQuote{A}, \sQuote{C}, \sQuote{G}\cr
    \sQuote{W}\tab matches \sQuote{A}, \sQuote{T}\cr
    \sQuote{Y}\tab matches \sQuote{C}, \sQuote{T}\cr
    \tab \cr
  }
}
\value{
  An object of class "BioPatternAlphabet". See
  \code{\link{BioPatternAlphabet-class}} for details.
}
\author{ Saikat DebRoy }
\seealso{ \code{\link{DNAAlphabet}}, \code{\link{RNAAlphabet}}}
\examples{
DNAPatternAlphabet()
RNAPatternAlphabet()
}
\keyword{classes}

\eof
\name{DNASuffixArray}
\alias{DNASuffixArray}
\title{ function to create a suffix array for a DNA string }
\description{
  This function creates a suffix array for the given DNA strings. The
  sorting of the suffix array can be done based on a given number of
  letters at the begining of each suffix.
}
\usage{
DNASuffixArray(x, prefixLength = max(nchar(x)))
}
\arguments{
  \item{x}{ an objecct of class "BioString" representing one or more DNA
    strings. If its length is more than one, suffix arrays for each of
    the string are created and merged. }
  \item{prefixLength}{ the number of letters at the begining of each
    suffix that are used in sorting the suffix array. }
}
\value{
  An object of class "BioString", with the suffixes from the argument
  sorted in increasing order. For the purpose of sorting, each string is
  assumed to be padded at the end with a letter lower in order than
  any letter in the alphabet of the string.
}
\references{  }
\author{ Saikat DebRoy }
\seealso{ \code{\link{sortDNAString}} for sorting the strings in an
  object of class "BioString". }
\examples{
data('yeastSEQCHR1')
yeast1 <- DNAString(yeastSEQCHR1)
DNASuffixArray(substr(yeast1, 1, 30))
x <- substring(yeast1, c(1, 16), c(15, 30))
x
DNASuffixArray(x)
}
\keyword{ utilities }

\eof
\name{LongestCommonPrefix-class}
\docType{class}
\alias{LongestCommonPrefix-class}
\alias{[,LongestCommonPrefix,matrix,missing,missing-method}
\title{Class of longest common prefix lengths for suffix array elemets}
\description{This class represents a matrix of the length of the longest
common prefixes between elements of a suffix array. The diagonal
elements are all zero.}
\section{Objects from the Class}{
  Objects can be created by calling the function
  \code{\link{LongestCommonPrefix}} with the (only) argument a suffix
  array.
}
\section{Slots}{
  \describe{
    \item{\code{abovediag}:}{Object of class \code{"integer"},
      represents the elements just above diagonal (ie. the elements of
      the matrix whose column numbers are one more than there row numbers). }
  }
}
\section{Methods}{
  \describe{
    \item{lcp[i]}{The only method for extracting elements from this
      class of objects. Here \code{lcp} is an object of class
      "LongestCommonPrefix" and \code{i} is a matrix of two columns, the
      columns representing the rwos and columns of the elements to be
      extracted. The result is an integer vector of the corresponding
      elements.}
  }
}
\references{Dan Gusfield (1997) \emph{Algorithms on strings, trees, and
    sequences}, Cambridge University Press, pp. 152}
\author{Saikat DebRoy}
\seealso{
  \code{\link{LongestCommonPrefix}}
}
\examples{
data('yeastSEQCHR1')
yeast1 <- DNAString(yeastSEQCHR1)
x <- substring(yeast1, c(1, 30))
x
suf <- DNASuffixArray(x)
lcp <- LongestCommonPrefix(suf)
lcp[cbind(1, 1:30)]
}
\keyword{classes}

\eof
\name{LongestCommonPrefix}
\alias{LongestCommonPrefix}
\title{Find the longest common prefixes for a suffix array}
\description{
  Given an object of class "BioString" representing a suffix array, this
  function returns an object (of class "LongestCommonPrefix")
  representing a matrix of the longest common prefixes between any two
  elements in the suffix array.
}
\usage{
LongestCommonPrefix(x)
}
\arguments{
  \item{x}{ An object of class "BioString" representing a suffix array. }
}
\details{
  The suffix array argument to the \code{LongestCommonPrefix} function
  is assumed to be created by the \code{DNASuffixArray} function. In
  other words, the suffixes are assumed to be sorted in increasing
  order.
}
\value{
  An object of class "LongestCommonPrefix".
}
\author{Saikat DebRoy}
\seealso{\code{\link{DNASuffixArray}}, \code{\link{LongestCommonPrefix-class}}}
\examples{
data('yeastSEQCHR1')
yeast1 <- DNAString(yeastSEQCHR1)
x <- substring(yeast1, c(1, 30))
x
suf <- DNASuffixArray(x)
lcp <- LongestCommonPrefix(suf)
}
\keyword{internal}

\eof
\name{NucleotideAlphabet-class}
\docType{class}
\alias{NucleotideAlphabet-class}
\alias{BioStringNewValues,NucleotideAlphabet,numeric-method}
\alias{initialize,NucleotideAlphabet-method}

\title{Class "NucleotideAlphabet" represents alphabets used in DNA or RNA}
\description{ Each object of class "NucleotideAlphabet" represents a
  nucleotide alphabet. Usually this is the RNA or DNA alphabet and
  consists of \sQuote{A}, \sQuote{C}, \sQuote{G}, \sQuote{T} for DNA and \sQuote{A}, \sQuote{C}, \sQuote{G}, \sQuote{U} for RNA
  along with \sQuote{-} which represents a gap in the sequence.}
\section{Objects from the Class}{A virtual Class: No objects may be
  created from it.}
\section{Slots}{
  \describe{
    \item{\code{letters}:}{Object of class \code{"character"}
      representing the letters of the alphabet. Usually roman
      upper case letters are used. The one exception is the gap
      character which is always \sQuote{-}.}
    \item{\code{mapping}:}{Object of class \code{"integer"} representing
      the encoding used to represent these objects internally. }
  }
}
\section{Extends}{
Class \code{"BioAlphabet"}, directly.
}
\section{Methods}{
  \describe{
    \item{BioStringNewValues(alphabet, length.string)}{Given \code{alphabet}
      of class "NucleotideAlphabet" return a new uninitialized
      "BioString" object for that alphabet with length
      \code{length.string}. See \code{\link{BioString-class}} for more
      details.}
    \item{initialize(.Object, letters)}{Initialize an nucleotide alphabet
      object for the given letters. If one of the letters is not the
      gap character \sQuote{-}, then it is added as the first letter.
      With the gap character, the length of the alphabet should be five.
      An encoding also created which is stored in the \code{mapping}
      slot. }
  }
}
\author{ Saikat DebRoy }
\examples{
myDNAAlph <- new("NucleotideAlphabet", letters=c('A', 'G', 'C', 'T'))
myDNAAlph
BioStringNewValues(myDNAAlph, 20)
}
\keyword{classes}

\eof
\name{NucleotideString}
\alias{NucleotideString}
\alias{DNAString}
\title{Functions to convert a character string to a BioString object }
\description{
  The function \code{NucleotideString} expects a character string
  representing either an RNA or DNA sequence, and converts it to a
  BioString object with the appropriate alphabet.

  The function \code{DNAString} expects a character string
  representing either a DNA sequence, and converts it to a
  BioString object with the standard DNA alphabet.
}
\usage{
NucleotideString(src, type = c("DNA", "RNA"),
                 srctype = c("character", "connection"),
                 alphabet = if (type == "DNA")
                            DNAPatternAlphabet() else RNAPatternAlphabet(),
                 gap = alphabet@gap)
DNAString(src, gap="-")
}
\arguments{
  \item{src}{ A character string }
  \item{type}{ Either "DNA" (the default) or "RNA". }
  \item{srctype}{ Currently the only valid value (and the default) is
    "character". In future, we may allow "connection" as another valid
    value denoting that the \code{src} argument is a file name.}
  \item{alphabet}{Alphabet to be used for the string - usually one of
    DNAAlphabet() or RNAAlphabet(). }
  \item{gap}{The character in the input that represents a gap. All
    occurences of this character in the input is
    converted to the gap character of the alphabet (which is usually
    \sQuote{-}).}
}
\value{
  An object of class "BioString".
}
\author{Saikat DebRoy}
\seealso{\code{\link{BioString-class}} for the class of the return
  value.

  \code{\link{DNAAlphabet}} and \code{\link{RNAAlphabet}} alphabet for
  creating the standard DNA and RNA alphabets.
}
\examples{
NucleotideString("ACTGAACT")
DNAString("ACTGAACT-GC")
DNAString("ACTGAACTNGC", gap="N")
}
\keyword{classes}

\eof
\name{allSameLetter}
\alias{allSameLetter}
\alias{allSameLetter,character,ANY-method}
\title{ Generic to find the strings which are repeats of a single letter}
\description{
  This method finds all strings in a "BioString" object which consist of
  a particular letter repeating again and again. Note that this does not
  do a pattern matching. So, if the letter is "N" for a BioString object
  with DNA patterns, then it would only match strings which are repeats
  of "N" and not others. See the examples for an illustration.
}
\usage{
allSameLetter(x, letter)
}
\arguments{
  \item{x}{ An object of class "BioString" or a character vector. In the
  later case it is converted to a BioString object.}
  \item{letter}{ A single letter. It should be part of the alphabet of
    \code{x}. }
}
\value{
  A logical vector of same length as \code{x} with elements \code{TRUE}
  or \code{FALSE} accordingly as the corresponding element of \code{x}
  is a repeat of \code{letter} or not.
}
\author{ Saikat DebRoy }
\seealso{\code{\link{BioString-class}}}
\examples{
PpiI <- "GAACNNNNNCTC"
D1 <-
  DNAString("tgctgatgcatagctagctgGAACtagctCTCtcgtagctggatgctgatNNNNNNNNNNNN")
matches <- matchDNAPattern(PpiI, D1)
matches
allN <- allSameLetter(matches, 'N')
allN
matches[!allN]
}
\keyword{classes}
\keyword{methods}

\eof
\name{alphabetFrequency}
\alias{alphabetFrequency}
\title{ function to calculate the frequency of letters in a biological sequence }
\description{
  Given an object of class "BioString" representing a biological
  sequence, this function calculates the frequency of each letter in the
  (base) alphabet for the "BioString" object.
}
\usage{
alphabetFrequency(x, baseOnly = TRUE)
}
\arguments{
  \item{x}{ An object of class "BioString". }
  \item{baseOnly}{ A logical value. If \code{TRUE}, the frequency table
    only contains the letters in the base alphabet for \code{x}. }
}
\value{
  An integer vector with names same as the letters in the alphabet. If
  the base alphabet is used for the frequency counts, there is an extra
  element with name 'Others' and it is the number of letters in the
  string which are not in the base alphabet. Usually, a non-zero entry
  here means that the string contains some letters from a pattern
  alphabet.
}
\author{ Saikat DebRoy }
\seealso{ \code{\link{BioString-class}},
  \code{\link{BioAlphabet-class}}, \code{\link{BioPatternAlphabet-class}} }
\examples{
data(yeastSEQCHR1)
yeast1 <- DNAString(yeastSEQCHR1)
alphabetFrequency(yeast1)
alphabetFrequency(yeast1, baseOnly=FALSE)
}
\keyword{category}

\eof
\name{anySameLetter}
\alias{anySameLetter}
\alias{anySameLetter,character,ANY-method}
\title{ Generic to find the strings which contain a particular letter}
\description{
  This method finds all strings in a "BioString" object which contains
  at least one occurance of a particular letter. Note that this does not
  do a pattern matching. So, if the letter is "N" for a BioString object
  with DNA patterns, then it would only match strings which contain at
  least one instance of "N" and not others. See the examples for an
  illustration.
}
\usage{
anySameLetter(x, letter)
}
\arguments{
  \item{x}{ An object of class "BioString" or a character vector. In the
  later case it is converted to a BioString object.}
  \item{letter}{ A single letter. It should be part of the alphabet of
    \code{x}. }
}
\value{
  A logical vector of same length as \code{x} with elements \code{TRUE}
  or \code{FALSE} accordingly as the corresponding element of \code{x}
  contains \code{letter} or not.
}
\author{ Saikat DebRoy }
\seealso{\code{\link{BioString-class}}}
\examples{
PpiI <- "GAACNNNNNCTC"
D1 <-
  DNAString("tgctgatgcatagctagctgGAACtagctCTCtcgtagctggatgctgatNNNNNNNNNNNN")
matches <- matchDNAPattern(PpiI, D1)
matches
anyN <- anySameLetter(matches, 'N')
anyN
matches[!anyN]
}
\keyword{classes}
\keyword{methods}

\eof
\name{gapletter<-}
\alias{gapletter<-}
\title{ function to modify the gap letter in an alphabet }
\description{
  This function modifies the gap letter in a "BioAlphabet" object.
}
\usage{
gapletter(x) <- value
}
\arguments{
  \item{x}{ An object of class "BioAlphabet" }
  \item{value}{ A single letter. }
}
\value{
  The modified object of class "BioAlphabet"
}
\author{ Saikat DebRoy }
\seealso{\code{\link{BioAlphabet-class}} }
\examples{
d <- DNAAlphabet()
gapletter(d) <- 'N'
d
r <- RNAPatternAlphabet()
gapletter(r) <- '*'
r
}
\keyword{classes}

\eof
\name{matchDNAPattern}
\alias{matchDNAPattern}
\alias{matchDNAPattern,character,ANY-method}
\alias{matchDNAPattern,ANY,character-method}
\title{ Generic to find all matches of a pattern in a DNA string}
\description{
  Generic that finds all matches of a pattern in a DNA string. Currently
  two algorithms are implemented. The default algorithm is an extension
  of the Boyer-Moore algorithm. The extended algorithm allows
  some wildcards in addition to the symbols for the bases and gap. The
  other algorithm is a simple forward search that examines all
  substrings of the full string of the same length as the pattern from
  the begining to end.
}
\usage{
matchDNAPattern(pattern, x, algorithm, mismatch)
}
\arguments{
  \item{pattern}{ An object representing the pattern string. The string in
    \code{pattern} can use any of the standard DNA pattern letters. See
    \code{\link{DNAPatternAlphabet}} for all valid letters.}
  \item{x}{ An object representing a DNA string. }
  \item{algorithm}{ Currently the only valid values are
    \code{"boyer-moore"}, \code{"forward-search"}
    and \code{"shift-or"}. The forward search algorithm is often as
    fast as the more sphisticated Boyer-Moore algorithm when the
    patterns being matched are very simple. The shift-or algorithm is
    even faster. However, it can only be used for patterns of length at
    most 32 or 64 depending on the number of bits in a machine word. The
    shift-or algorithm can also do inexact matches for a given number of
    mismatches. The default is "shift-or" where valid and "boyer-moore"
    otherwise}
  \item{mismatch}{ An integer, the number of mismatches allowed. The
    defualt is 0. If the default is non-zero an inexact match algorithm
    is used for matching. }
}
\value{
  An object of class "BioString" with the same length as the number of
  matches. Each element in the "BioString" object is a match. To obtain
  the start and end points of the matches, use \code{as.matrix} on the
  return value. See documentation for the "BioString" class for more
  details.
}
\author{ Saikat DebRoy }
\references{Dan Gusfield - Algorithms on strings, trees, and sequences}
\seealso{ \code{BioString-class} for the type of the return value. }
\examples{
x <- DNAString("AAGCGCGATATG")
m1 <- matchDNAPattern("GCNNNAT", x)
m1
as.matrix(m1)
m2 <- matchDNAPattern("GCNNNAT", x, algorithm="forward-search")
m2
as.matrix(m2)
data('yeastSEQCHR1')
yeast1 <- DNAString(yeastSEQCHR1)
PpiI <- "GAACNNNNNCTC" # a restriction enzyme pattern
match1.PpiI <- matchDNAPattern(PpiI, yeast1)
match2.PpiI <- matchDNAPattern(PpiI, yeast1, algorithm="forward-search")
match1.PpiI
match2.PpiI
match3.PpiI <- matchDNAPattern(PpiI, yeast1, mismatch=1)
match3.PpiI
}
\keyword{classes}

\eof
\name{reverseComplement}
\alias{reverseComplement}
\title{ Function to reverse a DNA or RNA sequence and complement each base}
\description{
  Given an object of class "BioString", this function complements each
  base in the underlying string (replacing A with T or U, T or U with A,
  C with G and G with C) and reverses it. It also changes the substring
  indices so that they reflect the reversing.
}
\usage{
reverseComplement(x)
}
\arguments{
  \item{x}{ an object of class "BioString" }
}
\value{
  An object of class "BioString" containg the reversed and complemented
  underlying string and substring indices.
}
\author{ Saikat DebRoy }
\examples{
reverseComplement(DNAString("ATCG-AA"))
}
\keyword{manip}

\eof
\name{sortDNAString}
\alias{sortDNAString}
\title{ function to sort DNA strings }
\description{
  This function sorts a given object of class "BioString" representing
  a vector of DNA strings in increasing order. The sorting can be done
  based only on a given number of letters at the begining of each string.
}
\usage{
sortDNAString(x, prefixLength = max(nchar(x)))
}
\arguments{
  \item{x}{ an objecct of class "BioString" representing one or more DNA
    strings. }
  \item{prefixLength}{ the number of letters at the begining of each
    string that are used in the sorting. }
}
\value{
  An object of class "BioString", with the strings from the argument
  sorted in increasing order. For the purpose of sorting, each string is
  assumed to be padded at the end with a letter lower in order than
  any letter in the alphabet of the string.
}
\author{ Saikat DebRoy }
\seealso{ \code{\link{DNASuffixArray}} }
\examples{
data('yeastSEQCHR1')
yeast1 <- DNAString(yeastSEQCHR1)
x <- substring(yeast1, seq(1, by=10, length=30),
                    seq(10, by=10, length=30))
x
sortDNAString(x)
x <- substr(yeast1, 1, 30)
x
sortDNAString(substring(x, 1:30, 30)) # suffix array for x
}
\keyword{ utilities }

\eof
\name{yeastSEQCHR1}
\alias{yeastSEQCHR1}
\title{An annotation data file for CHR1 in the yeastSEQ package}
\description{
This is a single character string containing DNA sequence of yeast chromosome number 1.  The data were obtained from the Saccharomyces Genome Database(url{ftp://genome-ftp.stanford.edu/pub/yeast/data\_download/sequence/genomic\_sequence/chromosomes/fasta/}).
}
\details{
Annotation based on data provided by Yeast Genome project.

Source data built:Yeast Genome data are built at various time intervals. Sources used were downloaded Fri Nov 21 14:00:47 2003
Package built: Fri Nov 21 14:00:47 2003
}
\references{
\url{http://www.yeastgenome.org/DownloadContents.shtml}
}
\examples{
data(yeastSEQCHR1)
nchar(yeastSEQCHR1)
}
\keyword{datasets}


\eof
