### Name: alphabetFrequency
### Title: Functions to calculate the frequency of letters in a biological
###   sequence
### Aliases: alphabetFrequency alphabetFrequency,XString-method
###   alphabetFrequency,DNAString-method alphabetFrequency,RNAString-method
###   alphabetFrequency,XStringSet-method
###   alphabetFrequency,DNAStringSet-method
###   alphabetFrequency,RNAStringSet-method
###   alphabetFrequency,XStringViews-method
###   alphabetFrequency,MaskedXString-method oligonucleotideFrequency
###   oligonucleotideFrequency,DNAString-method
###   oligonucleotideFrequency,RNAString-method
###   oligonucleotideFrequency,XStringSet-method
###   oligonucleotideFrequency,XStringViews-method
###   oligonucleotideFrequency,MaskedXString-method dinucleotideFrequency
###   trinucleotideFrequency strrev mkAllStrings
### Keywords: category

### ** Examples

  data(yeastSEQCHR1)
  yeast1 <- DNAString(yeastSEQCHR1)

  alphabetFrequency(yeast1)
  alphabetFrequency(yeast1, baseOnly=TRUE)

  dinucleotideFrequency(yeast1)
  trinucleotideFrequency(yeast1)
  oligonucleotideFrequency(yeast1, 4)

  ## With a multiple sequence input
  library(drosophila2probe)
  x <- DNAStringSet(drosophila2probe$sequence)
  alphabetFrequency(x[1:50], baseOnly=TRUE)
  alphabetFrequency(x, baseOnly=TRUE, collapse=TRUE)

  ## Get the less and most represented 6-mers
  f6 <- oligonucleotideFrequency(yeast1, 6)
  f6[f6 == min(f6)]
  f6[f6 == max(f6)]

  ## Get the result as an array
  tri <- trinucleotideFrequency(yeast1, as.array=TRUE)
  tri["A", "A", "C"] # == trinucleotideFrequency(yeast1)["AAC"]
  tri["T", , ] # frequencies of trinucleotides starting with a "T"

  ## Note that when dropping the dimensions of the 'tri' array, elements
  ## in the resulting vector are ordered as if they were obtained with
  ## 'fast.moving.side="left"':
  triL <- trinucleotideFrequency(yeast1, fast.moving.side="left")
  all(as.vector(tri) == triL) # TRUE

  ## Convert the trinucleotide frequency into the amino acid frequency based on
  ## translation
  tri1 <- trinucleotideFrequency(yeast1)
  names(tri1) <- GENETIC_CODE[names(tri1)]
  sapply(split(tri1, names(tri1)), sum) # 12512 occurences of the stop codon

  ## When the returned vector is very long (e.g. width >= 10), using
  ## 'with.labels=FALSE' will improve the performance considerably (100x, 1000x
  ## or more):
  f12 <- oligonucleotideFrequency(yeast1, 12, with.labels=FALSE) # very fast!

  ## Some related utility functions
  dict1 <- mkAllStrings(LETTERS[1:3], 4)
  dict2 <- mkAllStrings(LETTERS[1:3], 4, fast.moving.side="left")
  identical(strrev(dict1), dict2) # TRUE 



