Skip to content


Subversion checkout URL

You can clone with
Download ZIP
tree: 872a54c996
Fetching contributors…

Cannot retrieve contributors at this time

922 lines (892 sloc) 48.89 kb
% This file was created with JabRef 2.3b2.
% Encoding: Cp1252
author = {Markus Bauer and Gunnar W Klau and Knut Reinert},
title = {{Accurate multiple sequence-structure alignment of RNA sequences
using combinatorial optimization.}},
journal = {BMC Bioinformatics},
year = {2007},
volume = {8},
pages = {271},
abstract = {BACKGROUND: The discovery of functional non-coding RNA sequences has
led to an increasing interest in algorithms related to RNA analysis.
Traditional sequence alignment algorithms, however, fail at computing
reliable alignments of low-homology RNA sequences. The spatial conformation
of RNA sequences largely determines their function, and therefore
RNA alignment algorithms have to take structural information into
account. RESULTS: We present a graph-based representation for sequence-structure
alignments, which we model as an integer linear program (ILP). We
sketch how we compute an optimal or near-optimal solution to the
ILP using methods from combinatorial optimization, and present results
on a recently published benchmark set for RNA alignments. CONCLUSION:
The implementation of our algorithm yields better alignments in terms
of two published scores than the other programs that we tested: This
is especially the case with an increasing number of input sequences.
Our program LARA is freely available for academic purposes from},
doi = {10.1186/1471-2105-8-271},
institution = {Department of Mathematics and Computer Science, Free University Berlin,
Berlin, Germany.},
keywords = {Algorithms; Computer Simulation; Linear Models; Models, Chemical;
Models, Genetic; RNA; Sensitivity and Specificity; Sequence Alignment;
Sequence Analysis, RNA; Sequence Homology, Nucleic Acid},
owner = {binf007},
pii = {1471-2105-8-271},
pmid = {17662141},
timestamp = {2008.01.21},
url = {}
author = {Joachim Friedrich and Thomas Dandekar and Matthias Wolf and Tobias
title = {{ProfDist: a tool for the construction of large phylogenetic trees
based on profile distances.}},
journal = {Bionformatics},
year = {2005},
volume = {21},
pages = {2108--2109},
number = {9},
month = {May},
abstract = {SUMMARY: ProfDist is a user-friendly software package using the profile-neighbor-joining
method (PNJ) in inferring phylogenies based on profile distances
on DNA or RNA sequences. It is a tool for reconstructing and visualizing
large phylogenetic trees providing new and standard features with
a special focus on time efficency, robustness and accuracy. AVAILABILITY:
A Windows version of ProfDist comes with a graphical user interface
and is freely available at},
doi = {10.1093/bioinformatics/bti289},
institution = {Department of Bioinformatics, Biocenter, University of Würzburg,
keywords = {Algorithms; Chromosome Mapping; DNA Mutational Analysis; Gene Expression
Profiling; Linkage Disequilibrium; Phylogeny; Sequence Alignment;
Sequence Analysis, DNA; Software; User-Computer Interface},
owner = {binf007},
pii = {bti289},
pmid = {15677706},
timestamp = {2008.01.21},
url = {}
author = {O. Gascuel},
title = {{BIONJ: an improved version of the NJ algorithm based on a simple
model of sequence data.}},
journal = {Mol Biol Evol},
year = {1997},
volume = {14},
pages = {685--695},
number = {7},
month = {Jul},
abstract = {We propose an improved version of the neighbor-joining (NJ) algorithm
of Saitou and Nei. This new algorithm, BIONJ, follows the same agglomerative
scheme as NJ, which consists of iteratively picking a pair of taxa,
creating a new mode which represents the cluster of these taxa, and
reducing the distance matrix by replacing both taxa by this node.
Moreover, BIONJ uses a simple first-order model of the variances
and covariances of evolutionary distance estimates. This model is
well adapted when these estimates are obtained from aligned sequences.
At each step it permits the selection, from the class of admissible
reductions, of the reduction which minimizes the variance of the
new distance matrix. In this way, we obtain better estimates to choose
the pair of taxa to be agglomerated during the next steps. Moreover,
in comparison with NJ's estimates, these estimates become better
and better as the algorithm proceeds. BIONJ retains the good properties
of NJ--especially its low run time. Computer simulations have been
performed with 12-taxon model trees to determine BIONJ's efficiency.
When the substitution rates are low (maximum pairwise divergence
approximately 0.1 substitutions per site) or when they are constant
among lineages, BIONJ is only slightly better than NJ. When the substitution
rates are higher and vary among lineages,BIONJ clearly has better
topological accuracy. In the latter case, for the model trees and
the conditions of evolution tested, the topological error reduction
is on the average around 20\%. With highly-varying-rate trees and
with high substitution rates (maximum pairwise divergence approximately
1.0 substitutions per site), the error reduction may even rise above
50\%, while the probability of finding the correct tree may be augmented
by as much as 15\%.},
institution = {GERAD, Ecole des HEC, Montreal, Quebec, Canada.},
keywords = {Algorithms; Evolution; Models, Biological; Phylogeny; Sequence Analysis;
owner = {binf007},
pmid = {9254330},
timestamp = {2008.01.21}
author = {Daniel Gerlach and Matthias Wolf and Thomas Dandekar and Tobias Müller
and Andreas Pokorny and Sven Rahmann},
title = {{Deep metazoan phylogeny.}},
journal = {In Silico Biol},
year = {2007},
volume = {7},
pages = {151--154},
number = {2},
abstract = {We reconstructed a robust phylogenetic tree of the Metazoa, consisting
of almost 1,500 taxa, by profile neighbor joining (PNJ), an automated
computational method that inherits the efficiency of the neighbor
joining algorithm. This tree supports the one proposed in the latest
review on metazoan phylogeny. Our main goal is not to discuss aspects
of the phylogeny itself, but rather to point out that PNJ can be
a valuable tool when the basal branching pattern of a large phylogenetic
tree must be estimated, whereas traditional methods would be computationally
institution = {Department of Bioinformatics, Biocenter, University of Würzburg,
Am Hubland, D-97074 Würzburg, Germany.},
keywords = {Algorithms; Animals; Computational Biology; Models, Genetic; Phylogeny;
RNA, Ribosomal, 18S},
owner = {binf007},
pii = {2007070015},
pmid = {17688440},
timestamp = {2008.01.21}
author = {Alejandro Grajales and Catalina Aguilar and Juan A Sánchez},
title = {Phylogenetic reconstruction using secondary structures of Internal
Transcribed Spacer 2 (ITS2, rDNA): finding the molecular and morphological
gap in Caribbean gorgonian corals.},
journal = {BMC Evol Biol},
year = {2007},
volume = {7},
pages = {90},
abstract = {BACKGROUND: Most phylogenetic studies using current methods have focused
on primary DNA sequence information. However, RNA secondary structures
are particularly useful in systematics because they include characteristics,
not found in the primary sequence, that give "morphological" information.
Despite the number of recent molecular studies on octocorals, there
is no consensus opinion about a region that carries enough phylogenetic
resolution to solve intrageneric or close species relationships.
Moreover, intrageneric morphological information by itself does not
always produce accurate phylogenies; intra-species comparisons can
reveal greater differences than intra-generic ones. The search for
new phylogenetic approaches, such as by RNA secondary structure analysis,
is therefore a priority in octocoral research. RESULTS: Initially,
twelve predicted RNA secondary structures were reconstructed to provide
the basic information for phylogenetic analyses; they accorded with
the 6 helicoidal ring model, also present in other groups of corals
and eukaryotes. We obtained three similar topologies for nine species
of the Caribbean gorgonian genus Eunicea (candelabrum corals) with
two sister taxa as outgroups (genera Plexaura and Pseudoplexaura)
on the basis of molecular morphometrics of ITS2 RNA secondary structures
only, traditional primary sequence analyses and maximum likelihood,
and a Bayesian analysis of the combined data. The latter approach
allowed us to include both primary sequence and RNA molecular morphometrics;
each data partition was allowed to have a different evolution rate.
In addition, each helix was partitioned as if it had evolved at a
distinct rate. Plexaura flexuosa was found to group within Eunicea;
this was best supported by both the molecular morphometrics and combined
analyses. We suggest Eunicea flexuosa (Lamouroux, 1821) comb. nov.,
and we present a new species description including Scanning Electron
Microscopy (SEM) images of morphological characteristics (sclerites).
Eunicea flexuosa, E. pallida, E. laxispica and E. mammosa formed
a separate clade in the molecular phylogenies, and were reciprocally
monophyletic with respect to other Eunicea (Euniceopsis subgenus,
e.g. E. tourneforti and E. laciniata) in the molecular morphometrics
tree, with the exception of E. fusca. Moreover, we suggest a new
diagnostic character for Eunicea, also present in E. flexuosa: middle
layer sclerites > 1 mm in length. CONCLUSION: ITS2 was a reliable
sequence for intrageneric studies in gorgonian octocorals because
of the amount of phylogenetic signal, and was corroborated against
morphological characters separating Eunicea from Plexaura. The ITS2
RNA secondary structure approach to phylogeny presented here did
not rely on alignment methods such as INDELS, but provided clearly
homologous characters for partition analysis and RNA molecular morphometrics.
These approaches support the divergence of Eunicea flexuosa comb.
nov. from the outgroup Plexaura, although it has been considered
part of this outgroup for nearly two centuries because of morphological
doi = {10.1186/1471-2148-7-90},
institution = {>},
keywords = {Animals; Anthozoa; DNA, Ribosomal Spacer; Evolution, Molecular; Genetic
Speciation; Nucleic Acid Conformation; Phylogeny; RNA, Ribosomal;
Variation (Genetics)},
owner = {binf007},
pii = {1471-2148-7-90},
pmid = {17562014},
timestamp = {2008.07.30},
url = {}
author = {Hochsmann, M. and Hochsmann, M. and Voss, B. and Giegerich, R.},
title = {{Pure multiple RNA secondary structure alignments: a progressive
profile approach}},
journal = {IEEE/ACM Transactions on Computational Biology and Bioinformatics},
year = {2004},
volume = {1},
pages = {53--62},
number = {1},
abstract = {In functional, noncoding RNA, structure is often essential to function.
While the full 3D structure is very difficult to determine, the 2D
structure of an RNA molecule gives good clues to its 3D structure,
and for molecules of moderate length, it can be predicted with good
reliability. Structure comparison is, in analogy to sequence comparison,
the essential technique to infer related function. We provide a method
for computing multiple alignments of RNA secondary structures under
the tree alignment model, which is suitable to cluster RNA molecules
purely on the structural level, i.e., sequence similarity is not
required. We give a systematic generalization of the profile alignment
method from strings to trees and forests. We introduce a tree profile
representation of RNA secondary structure alignments which allows
reasonable scoring in structure comparison. Besides the technical
aspects, an RNA profile is a useful data structure to represent multiple
structures of RNA sequences. Moreover, we propose a visualization
of RNA consensus structures that is enriched by the full sequence
booktitle = {IEEE/ACM Transactions on Computational Biology and Bioinformatics},
doi = {10.1109/TCBB.2004.11},
editor = {Voss, B.},
issn = {1545-5963},
keywords = {biology computing, macromolecules, molecular biophysics, trees (mathematics),
forest alignment, functional noncoding RNA structure, progressive
profile approach, pure multiple RNA secondary structure alignments,
strings, structure comparison, tree alignment model, tree profile
representation, 65, Index Terms- Alignment of trees, RNA secondary
structures, noncoding RNAs.},
owner = {binf007},
timestamp = {2008.01.23}
author = {Cendrine Hudelot and Vivek Gowri-Shankar and Howsun Jow and Magnus
Rattray and Paul G Higgs},
title = {{RNA-based phylogenetic methods: application to mammalian mitochondrial
RNA sequences.}},
journal = {Mol Phylogenet Evol},
year = {2003},
volume = {28},
pages = {241--252},
number = {2},
month = {Aug},
abstract = {The PHASE software package allows phylogenetic tree construction with
a number of evolutionary models designed specifically for use with
RNA sequences that have conserved secondary structure. Evolution
in the paired regions of RNAs occurs via compensatory substitutions,
hence changes on either side of a pair are correlated. Accounting
for this correlation is important for phylogenetic inference because
it affects the likelihood calculation. In the present study we use
the complete set of tRNA and rRNA sequences from 69 complete mammalian
mitochondrial genomes. The likelihood calculation uses two evolutionary
models simultaneously for different parts of the sequence: a paired-site
model for the paired sites and a single-site model for the unpaired
sites. We use Bayesian phylogenetic methods and a Markov chain Monte
Carlo algorithm is used to obtain the most probable trees and posterior
probabilities of clades. The results are well resolved for almost
all the important branches on the mammalian tree. They support the
arrangement of mammalian orders within the four supra-ordinal clades
that have been identified by studies of much larger data sets mainly
comprising nuclear genes. Groups such as the hedgehogs and the murid
rodents, which have been problematic in previous studies with mitochondrial
proteins, appear in their expected position with the other members
of their order. Our choice of genes and evolutionary model appears
to be more reliable and less subject to biases caused by variation
in base composition than previous studies with mitochondrial genomes.},
institution = {School of Biological Sciences, University of Manchester, UK.},
keywords = {Animals; Bayes Theorem; Mammals; Models, Genetic; Phylogeny; RNA},
owner = {binf007},
pii = {S1055790303000617},
pmid = {12878461},
timestamp = {2008.01.21}
author = {H. Jow and C. Hudelot and M. Rattray and P. G. Higgs},
title = {Bayesian phylogenetics using an RNA substitution model applied to
early mammalian evolution.},
journal = {Mol Biol Evol},
year = {2002},
volume = {19},
pages = {1591--1601},
number = {9},
month = {Sep},
abstract = {We study the phylogeny of the placental mammals using molecular data
from all mitochondrial tRNAs and rRNAs of 54 species. We use probabilistic
substitution models specific to evolution in base paired regions
of RNA. A number of these models have been implemented in a new phylogenetic
inference software package for carrying out maximum likelihood and
Bayesian phylogenetic inferences. We describe our Bayesian phylogenetic
method which uses a Markov chain Monte Carlo algorithm to provide
samples from the posterior distribution of tree topologies. Our results
show support for four primary mammalian clades, in agreement with
recent studies of much larger data sets mainly comprising nuclear
DNA. We discuss some issues arising when using Bayesian techniques
on RNA sequence data.},
institution = {Department of Computer Science, University of Manchester, UK.},
keywords = {Animals; Base Sequence; Bayes Theorem; Databases, Genetic; Evolution,
Molecular; Humans; Mammals; Models, Genetic; Phylogeny; RNA; Species
owner = {binf007},
pmid = {12200486},
timestamp = {2008.06.06}
author = {Jukes, T. and Cantor, C.R.},
title = {Evolution of protein molecules},
booktitle = {Mammalian Protein Metabolism},
publisher = {Academic Press},
year = {1969},
editor = {Munro, H},
pages = {21-132},
address = {New York, USA}
author = {Tobias Müller and Sven Rahmann and Thomas Dandekar and Matthias Wolf},
title = {{Accurate and robust phylogeny estimation based on profile distances:
a study of the Chlorophyceae (Chlorophyta).}},
journal = {BMC Evol Biol},
year = {2004},
volume = {4},
pages = {20},
month = {Jun},
abstract = {BACKGROUND: In phylogenetic analysis we face the problem that several
subclade topologies are known or easily inferred and well supported
by bootstrap analysis, but basal branching patterns cannot be unambiguously
estimated by the usual methods (maximum parsimony (MP), neighbor-joining
(NJ), or maximum likelihood (ML)), nor are they well supported. We
represent each subclade by a sequence profile and estimate evolutionary
distances between profiles to obtain a matrix of distances between
subclades. RESULTS: Our estimator of profile distances generalizes
the maximum likelihood estimator of sequence distances. The basal
branching pattern can be estimated by any distance-based method,
such as neighbor-joining. Our method (profile neighbor-joining, PNJ)
then inherits the accuracy and robustness of profiles and the time
efficiency of neighbor-joining. CONCLUSIONS: Phylogenetic analysis
of Chlorophyceae with traditional methods (MP, NJ, ML and MrBayes)
reveals seven well supported subclades, but the methods disagree
on the basal branching pattern. The tree reconstructed by our method
is better supported and can be confirmed by known morphological characters.
Moreover the accuracy is significantly improved as shown by parametric
doi = {10.1186/1471-2148-4-20},
institution = {Department of Bioinformatics, Biocenter, University of Würzburg,
Am Hubland, D-97074 Würzburg, Germany.},
keywords = {Algae, Green; Animals; Computational Biology; Evolution, Molecular;
Models, Genetic; Phylogeny; RNA, Plant; RNA, Ribosomal; RNA, Ribosomal,
owner = {binf007},
pii = {1471-2148-4-20},
pmid = {15222898},
timestamp = {2008.01.21},
url = {}
author = {Tobias Müller and Rainer Spang and Martin Vingron},
title = {{Estimating amino acid substitution models: a comparison of Dayhoff's
estimator, the resolvent approach and a maximum likelihood method.}},
journal = {Mol Biol Evol},
year = {2002},
volume = {19},
pages = {8--13},
number = {1},
month = {Jan},
abstract = {Evolution of proteins is generally modeled as a Markov process acting
on each site of the sequence. Replacement frequencies need to be
estimated based on sequence alignments. Here we compare three approaches:
First, the original method by Dayhoff, Schwartz, and Orcutt (1978)
Atlas Protein Seq. Struc. 5:345-352, secondly, the resolvent method
(RV) by Müller and Vingron (2000) J. Comput. Biol. 7(6):761-776,
and finally a maximum likelihood approach (ML) developed in this
paper. We evaluate the methods using a highly divergent and inhomogeneous
set of sequence alignments as an input to the estimation procedure.
ML is the method of choice for small sets of input data. Although
the RV method is computationally much less demanding it performs
only slightly worse than ML. Therefore, it is perfectly appropriate
for large-scale applications.},
institution = {Deutsches Krebsforschungszentrum, Theoretische Bioinformatik, Im
Neuenheimer Feld 280, 69120 Heidelberg, Germany.},
keywords = {Algorithms; Amino Acid Sequence; Amino Acid Substitution; Computer
Simulation; Evolution, Molecular; Likelihood Functions; Markov Chains;
Models, Genetic; Proteins; Sequence Alignment},
owner = {binf007},
pmid = {11752185},
timestamp = {2008.01.25}
author = {T. Müller and M. Vingron},
title = {{Modeling amino acid replacement.}},
journal = {J Comput Biol},
year = {2000},
volume = {7},
pages = {761--776},
number = {6},
abstract = {The estimation of amino acid replacement frequencies during molecular
evolution is crucial for many applications in sequence analysis.
Score matrices for database search programs or phylogenetic analysis
rely on such models of protein evolution. Pioneering work was done
by Dayhoff et al. (1978) who formulated a Markov model of evolution
and derived the famous PAM score matrices. Her estimation procedure
for amino acid exchange frequencies is restricted to pairs of proteins
that have a constant and small degree of divergence. Here we present
an improved estimator, called the resolvent method, that is not subject
to these limitations. This extension of Dayhoff's approach enables
us to estimate an amino acid substitution model from alignments of
varying degree of divergence. Extensive simulations show the capability
of the new estimator to recover accurately the exchange frequencies
among amino acids. Based on the SYSTERS database of aligned protein
families (Krause and Vingron, 1998) we recompute a series of score
doi = {10.1089/10665270050514918},
institution = {Deutsches Krebsforschungszentrum, Theoretische Bioinformatik, 69120
Heidelberg, Germany. t.mueller/m.vingron},
keywords = {Amino Acid Substitution; Computer Simulation; Evolution, Molecular;
Likelihood Functions; Markov Chains; Models, Biological; Proteins;
Stochastic Processes; Time Factors},
owner = {binf007},
pmid = {11382360},
timestamp = {2008.01.25},
url = {}
author = {Sven Rahmann and Tobias Muller and Thomas Dandekar and Matthias Wolf},
title = {{Efficient and robust analysis of large phylogenetic datasets}},
booktitle = {Advanced Data Mining Technologies in Bioinformatics},
publisher = {Idea Group, Inc.},
year = {2006},
editor = {Hui-Huang Hsu},
pages = {104-117},
address = {Hershey, PA, USA},
abstract = {The goal of phylogenetics is to reconstruct ancestral relationships
between different taxa, e.g., different species in the tree of life,
by means of certain characters, such as genomic sequences. We consider
the prominent problem of reconstructing the basal phylogenetic tree
topology when several subclades have already been identified or are
well known by other means, such as morphological characteristics.
Whereas most available tools attempt to estimate a fully resolved
tree from scratch, the profile neighbor-joining (PNJ) method focuses
directly on the mentioned problem and has proven a robust and efficient
method for large-scale data sets, especially when used in an iterative
way. We describe an implementation of this idea, the ProfDist software
package, which is freely available, and apply the method to estimate
the phylogeny of the eukaryotes. Overall, the PNJ approach provides
a novel effective way to mine large sequence datasets for relevant
phylogenetic information.}
author = {N. Saitou and M. Nei},
title = {{The neighbor-joining method: a new method for reconstructing phylogenetic
journal = {Mol Biol Evol},
year = {1987},
volume = {4},
pages = {406--425},
number = {4},
month = {Jul},
abstract = {A new method called the neighbor-joining method is proposed for reconstructing
phylogenetic trees from evolutionary distance data. The principle
of this method is to find pairs of operational taxonomic units (OTUs
[= neighbors]) that minimize the total branch length at each stage
of clustering of OTUs starting with a starlike tree. The branch lengths
as well as the topology of a parsimonious tree can quickly be obtained
by using this method. Using computer simulation, we studied the efficiency
of this method in obtaining the correct unrooted tree in comparison
with that of five other tree-making methods: the unweighted pair
group method of analysis, Farris's method, Sattath and Tversky's
method, Li's method, and Tateno et al.'s modified Farris method.
The new, neighbor-joining method and Sattath and Tversky's method
are shown to be generally better than the other methods.},
institution = {Center for Demographic and Population Genetics, University of Texas
Health Science Center, Houston 77225.},
keywords = {Animals; Biometry; Evolution; Models, Genetic; Phylogeny; Ranidae},
owner = {binf007},
pmid = {3447015},
timestamp = {2008.01.21}
author = {Jörg Schultz and Stefanie Maisel and Daniel Gerlach and Tobias Müller
and Matthias Wolf},
title = {{A common core of secondary structure of the internal transcribed
spacer 2 (ITS2) throughout the Eukaryota.}},
journal = {RNA},
year = {2005},
volume = {11},
pages = {361--364},
number = {4},
month = {Apr},
abstract = {The ongoing characterization of novel species creates the need for
a molecular marker which can be used for species- and, simultaneously,
for mega-systematics. Recently, the use of the internal transcribed
spacer 2 (ITS2) sequence was suggested, as it shows a high divergence
in sequence with an assumed conservation in structure. This hypothesis
was mainly based on small-scale analyses, comparing a limited number
of sequences. Here, we report a large-scale analysis of more than
54,000 currently known ITS2 sequences with the goal to evaluate the
hypothesis of a conserved structural core and to assess its use for
automated large-scale phylogenetics. Structure prediction revealed
that the previously described core structure can be found for more
than 5000 sequences in a wide variety of taxa within the eukaryotes,
indicating that the core secondary structure is indeed conserved.
This conserved structure allowed an automated alignment of extremely
divergent sequences as exemplified for the ITS2 sequences of a ctenophorean
eumetazoon and a volvocalean green alga. All classified sequences,
together with their structures can be accessed at
Furthermore, we found that, although sample sequences are known for
most major taxa, there exists a profound divergence in coverage,
which might become a hindrance for general usage. In summary, our
analysis strengthens the potential of ITS2 as a general phylogenetic
marker and provides a data source for further ITS2-based analyses.},
doi = {10.1261/rna.7204505},
institution = {},
keywords = {Algae; Animals; Base Sequence; Conserved Sequence; DNA, Ribosomal
Spacer; Fungi; Genetic Markers; Invertebrates; Molecular Sequence
Data; Nucleic Acid Conformation; Phylogeny; Plants; Sequence Alignment;
Transcription, Genetic},
owner = {binf007},
pii = {11/4/361},
pmid = {15769870},
timestamp = {2008.01.21},
url = {}
author = {Jörg Schultz and Tobias Müller and Marco Achtziger and Philipp N
Seibel and Thomas Dandekar and Matthias Wolf},
title = {{The internal transcribed spacer 2 database--a web server for (not
only) low level phylogenetic analyses.}},
journal = {Nucleic Acids Res},
year = {2006},
volume = {34},
pages = {W704--W707},
number = {Web Server issue},
month = {Jul},
abstract = {The internal transcribed spacer 2 (ITS2) is a phylogenetic marker
which has been of broad use in generic and infrageneric level classifications,
as its sequence evolves comparably fast. Only recently, it became
clear, that the ITS2 might be useful even for higher level systematic
analyses. As the secondary structure is highly conserved within all
eukaryotes it serves as a valuable template for the construction
of highly reliable sequence-structure alignments, which build a fundament
for subsequent analyses. Thus, any phylogenetic study using ITS2
has to consider both sequence and structure. We have integrated a
homology based RNA structure prediction algorithm into a web server,
which allows the detection and secondary structure prediction for
ITS2 in any given sequence. Furthermore, the resource contains more
than 25,000 pre-calculated secondary structures for the currently
known ITS2 sequences. These can be taxonomically searched and browsed.
Thus, our resource could become a starting point for ITS2-based phylogenetic
analyses and is therefore complementary to databases of other phylogenetic
markers, which focus on higher level analyses. The current version
of the ITS2 database can be accessed via},
doi = {10.1093/nar/gkl129},
institution = {Department of Bioinformatics, Biocenter, University of Würzburg Am
Hubland, 97074 Würzburg, Germany.},
keywords = {DNA, Ribosomal Spacer; Databases, Nucleic Acid; Internet; Nucleic
Acid Conformation; Phylogeny; Sequence Analysis, RNA; Software; User-Computer
owner = {binf007},
pii = {34/suppl_2/W704},
pmid = {16845103},
timestamp = {2008.01.21},
url = {}
author = {M. Schöniger and A. von Haeseler},
title = {{A stochastic model for the evolution of autocorrelated DNA sequences.}},
journal = {Mol Phylogenet Evol},
year = {1994},
volume = {3},
pages = {240--247},
number = {3},
month = {Sep},
abstract = {Currently used stochastic models of DNA sequence evolution assume
independent and identically distributed nucleotide sites. They are
too simple to account for dependence structures obviously present
in molecular data. Up to now more realistic stochastic models for
nucleotide substitutions have been considered intractable. In this
paper a procedure that accounts for non-overlapping correlations
among pairs of sites of a DNA sequence is developed. We show that
currently used models that ignore correlated sites underestimate
distances inferred from observed sequence dissimilarities. For the
analyzed mitochondrial sequence data this underestimation is not
drastic in contrast to paired regions (stems) of bacterial 23S rRNA
doi = {10.1006/mpev.1994.1026},
institution = {Technical University Munich, Germany.},
keywords = {Animals; DNA; DNA, Mitochondrial; Evolution; Mammals; Markov Chains;
Mutation; RNA, Bacterial; RNA, Ribosomal, 23S; Species Specificity;
Stochastic Processes},
owner = {binf007},
pii = {S1055-7903(84)71026-8},
pmid = {7529616},
timestamp = {2008.01.25},
url = {}
author = {Philipp N Seibel and Tobias Müller and Thomas Dandekar and Jörg Schultz
and Matthias Wolf},
title = {{4SALE--a tool for synchronous RNA sequence and secondary structure
alignment and editing.}},
journal = {BMC Bioinformatics},
year = {2006},
volume = {7},
pages = {498},
abstract = {BACKGROUND: In sequence analysis the multiple alignment builds the
fundament of all proceeding analyses. Errors in an alignment could
strongly influence all succeeding analyses and therefore could lead
to wrong predictions. Hand-crafted and hand-improved alignments are
necessary and meanwhile good common practice. For RNA sequences often
the primary sequence as well as a secondary structure consensus is
well known, e.g., the cloverleaf structure of the t-RNA. Recently,
some alignment editors are proposed that are able to include and
model both kinds of information. However, with the advent of a large
amount of reliable RNA sequences together with their solved secondary
structures (available from e.g. the ITS2 Database), we are faced
with the problem to handle sequences and their associated secondary
structures synchronously. RESULTS: 4SALE fills this gap. The application
allows a fast sequence and synchronous secondary structure alignment
for large data sets and for the first time synchronous manual editing
of aligned sequences and their secondary structures. This study describes
an algorithm for the synchronous alignment of sequences and their
associated secondary structures as well as the main features of 4SALE
used for further analyses and editing. 4SALE builds an optimal and
unique starting point for every RNA sequence and structure analysis.
CONCLUSION: 4SALE, which provides an user-friendly and intuitive
interface, is a comprehensive toolbox for RNA analysis based on sequence
and secondary structure information. The program connects sequence
and structure databases like the ITS2 Database to phylogeny programs
as for example the CBCAnalyzer. 4SALE is written in JAVA and therefore
platform independent. The software is freely available and distributed
from the website at},
doi = {10.1186/1471-2105-7-498},
institution = {>},
keywords = {Algorithms; Base Pairing; Computational Biology; Databases, Genetic;
Nucleic Acid Conformation; RNA; Sequence Alignment; Sequence Analysis,
RNA; Software Validation; Time Factors; User-Computer Interface},
owner = {binf007},
pii = {1471-2105-7-498},
pmid = {17101042},
timestamp = {2008.01.21},
url = {}
author = {Christian Selig and Matthias Wolf and Tobias Müller and Thomas Dandekar
and Jörg Schultz},
title = {{The ITS2 Database II: homology modelling RNA structure for molecular
journal = {Nucleic Acids Res},
year = {2008},
volume = {36},
pages = {D377--D380},
number = {Database issue},
month = {Jan},
abstract = {An increasing number of phylogenetic analyses are based on the internal
transcribed spacer 2 (ITS2). They mainly use the fast evolving sequence
for low-level analyses. When considering the highly conserved structure,
the same marker could also be used for higher level phylogenies.
Furthermore, structural features of the ITS2 allow distinguishing
different species from each other. Despite its importance, the correct
structure is only rarely found by standard RNA folding algorithms.
To overcome this hindrance for a wider application of the ITS2, we
have developed a homology modelling approach to predict the structure
of RNA and present the results of modelling the ITS2 in the ITS2
Database. Here, we describe the database and the underlying algorithms
which allowed us to predict the structure for 86 784 sequences, which
is more than 55\% of all GenBank entries concerning the ITS2. These
are not equally distributed over all genera. There is a substantial
amount of genera where the structure of nearly all sequences is predicted
whereas for others no structure at all was found despite high sequence
coverage. These genera might have evolved an ITS2 structure diverging
from the standard one. The current version of the ITS2 Database can
be accessed via},
doi = {10.1093/nar/gkm827},
institution = {Department of Bioinformatics, Biocenter, University of Würzburg,
Am Hubland 97074 Würzburg, Germany.},
owner = {binf007},
pii = {gkm827},
pmid = {17933769},
timestamp = {2008.01.21},
url = {}
author = {Sven Siebert and Rolf Backofen},
title = {{MARNA: multiple alignment and consensus structure prediction of
RNAs based on sequence structure comparisons.}},
journal = {Bioinformatics},
year = {2005},
volume = {21},
pages = {3352--3359},
number = {16},
month = {Aug},
abstract = {MOTIVATION: Due to the importance of considering secondary structures
in aligning functional RNAs, several pairwise sequence-structure
alignment methods have been developed. They use extended alignment
scores that evaluate secondary structure information in addition
to sequence information. However, two problems for the multiple alignment
step remain. First, how to combine pairwise sequence-structure alignments
into a multiple alignment and second, how to generate secondary structure
information for sequences whose explicit structural information is
missing. RESULTS: We describe a novel approach for multiple alignment
of RNAs (MARNA) taking into consideration both the primary and the
secondary structures. It is based on pairwise sequence-structure
comparisons of RNAs. From these sequence-structure alignments, libraries
of weighted alignment edges are generated. The weights reflect the
sequential and structural conservation. For sequences whose secondary
structures are missing, the libraries are generated by sampling low
energy conformations. The libraries are then processed by the T-Coffee
system, which is a consistency based multiple alignment method. Furthermore,
we are able to extract a consensus-sequence and -structure from a
multiple alignment. We have successfully tested MARNA on several
datasets taken from the Rfam database.},
doi = {10.1093/bioinformatics/bti550},
institution = {Department of Bioinformatics, Institute of Computer Science, Friedrich-Schiller-University
Jena, Ernst-Abbe Platz 2, 07743 Jena, Germany.},
keywords = {Algorithms; Computer Simulation; Consensus Sequence; RNA; Sensitivity
and Specificity; Sequence Alignment; Sequence Analysis, RNA; Sequence
Homology, Nucleic Acid; Software; Structure-Activity Relationship},
owner = {binf007},
pii = {bti550},
pmid = {15972285},
timestamp = {2008.01.21},
url = {}
author = {Andrew D Smith and Thomas W H Lui and Elisabeth R M Tillier},
title = {{Empirical models for substitution in ribosomal RNA.}},
journal = {Mol Biol Evol},
year = {2004},
volume = {21},
pages = {419--427},
number = {3},
month = {Mar},
abstract = {Empirical models of substitution are often used in protein sequence
analysis because the large alphabet of amino acids requires that
many parameters be estimated in all but the simplest parametric models.
When information about structure is used in the analysis of substitutions
in structured RNA, a similar situation occurs. The number of parameters
necessary to adequately describe the substitution process increases
in order to model the substitution of paired bases. We have developed
a method to obtain substitution rate matrices empirically from RNA
alignments that include structural information in the form of base
pairs. Our data consisted of alignments from the European Ribosomal
RNA Database of Bacterial and Eukaryotic Small Subunit and Large
Subunit Ribosomal RNA ( Wuyts et al. 2001. Nucleic Acids Res. 29:175-177;
Wuyts et al. 2002. Nucleic Acids Res. 30:183-185). Using secondary
structural information, we converted each sequence in the alignments
into a sequence over a 20-symbol code: one symbol for each of the
four individual bases, and one symbol for each of the 16 ordered
pairs. Substitutions in the coded sequences are defined in the natural
way, as observed changes between two sequences at any particular
site. For given ranges (windows) of sequence divergence, we obtained
substitution frequency matrices for the coded sequences. Using a
technique originally developed for modeling amino acid substitutions
( Veerassamy, Smith, and Tillier. 2003. J. Comput. Biol. 10:997-1010),
we were able to estimate the actual evolutionary distance for each
window. The actual evolutionary distances were used to derive instantaneous
rate matrices, and from these we selected a universal rate matrix.
The universal rate matrices were incorporated into the Phylip Software
package ( Felsenstein 2002.,
and we analyzed the ribosomal RNA alignments using both distance
and maximum likelihood methods. The empirical substitution models
performed well on simulated data, and produced reasonable evolutionary
trees for 16S ribosomal RNA sequences from sequenced Bacterial genomes.
Empirical models have the advantage of being easily implemented,
and the fact that the code consists of 20 symbols makes the models
easily incorporated into existing programs for protein sequence analysis.
In addition, the models are useful for simulating the evolution of
RNA sequence and structure simultaneously.},
doi = {10.1093/molbev/msh029},
institution = {Department of Medical Biophysics, University of Toronto, and Ontario
Cancer Institute, University Health Network, Toronto, Ontario, Canada.},
keywords = {Amino Acid Substitution; Animals; Computer Simulation; Databases,
Nucleic Acid; Evolution, Molecular; Likelihood Functions; Models,
Genetic; Phylogeny; RNA, Ribosomal; Sequence Alignment},
owner = {binf007},
pii = {msh029},
pmid = {14660689},
timestamp = {2008.01.21},
url = {}
author = {Trolltech},
title = {},
year = {2008},
owner = {binf007},
timestamp = {2008.07.19},
url = {}
author = {P Vanormelingen and E Hegewald and A Braband and M Kitschke and T
Friedl and K Sabbe and W Vyverman},
title = {{The systematics of a small spineless Desmodesmus species, D-costato-granulatus
(Sphaeropleales, Chlorophyceae), based on ITS2 rDNA sequence analyses
and cell wall morphology}},
journal = {Journal of Phyclology},
year = {2007},
volume = {43},
pages = {378-396},
number = {2},
month = {APR}
author = {D. L. Wheeler and C. Chappey and A. E. Lash and D. D. Leipe and T.
L. Madden and G. D. Schuler and T. A. Tatusova and B. A. Rapp},
title = {{Database resources of the National Center for Biotechnology Information.}},
journal = {Nucleic Acids Res},
year = {2000},
volume = {28},
pages = {10--14},
number = {1},
month = {Jan},
abstract = {In addition to maintaining the GenBank(R) nucleic acid sequence database,
the National Center for Biotechnology Information (NCBI) provides
data analysis and retrieval and resources that operate on the data
in GenBank and a variety of other biological data made available
through NCBI's Web site. NCBI data retrieval resources include Entrez,
PubMed, LocusLink and the Taxonomy Browser. Data analysis resources
include BLAST, Electronic PCR, OrfFinder, RefSeq, UniGene, Database
of Single Nucleotide Polymorphisms (dbSNP), Human Genome Sequencing
pages, GeneMap'99, Davis Human-Mouse Homology Map, Cancer Chromosome
Aberration Project (CCAP) pages, Entrez Genomes, Clusters of Orthologous
Groups (COGs) database, Retroviral Genotyping Tools, Cancer Genome
Anatomy Project (CGAP) pages, SAGEmap, Online Mendelian Inheritance
in Man (OMIM) and the Molecular Modeling Database (MMDB). Augmenting
many of the Web applications are custom implementations of the BLAST
program optimized to search specialized data sets. All of the resources
can be accessed through the NCBI home page at: http://www.ncbi.nlm.nih.
institution = {National Center for Biotechnology Information, National Library of
Medicine, National Institutes of Health, Building 38A, 8600 Rockville
Pike, Bethesda, MD 20894, USA.},
keywords = {Animals; Biology; Databases, Factual; Gene Expression; Genome, Human;
Humans; Information Storage and Retrieval; Mice; Models, Molecular;
National Library of Medicine (U.S.); Neoplasms; Phenotype; United
owner = {binf007},
pii = {gkd055},
pmid = {10592169},
timestamp = {2008.01.21}
author = {Matthias Wolf and Marco Achtziger and Jörg Schultz and Thomas Dandekar
and Tobias Müller},
title = {{Homology modeling revealed more than 20,000 rRNA internal transcribed
spacer 2 (ITS2) secondary structures.}},
journal = {RNA},
year = {2005},
volume = {11},
pages = {1616--1623},
number = {11},
month = {Nov},
abstract = {Structural genomics meets phylogenetics and vice versa: Knowing rRNA
secondary structures is a prerequisite for constructing rRNA alignments
for inferring phylogenies, and inferring phylogenies is a precondition
to understand the evolution of such rRNA secondary structures. Here,
both scientific worlds go together. The rRNA internal transcribed
spacer 2 (ITS2) region is a widely used phylogenetic marker. Because
of its high variability at the sequence level, correct alignments
have to take into account structural information. In this study,
we examine the extent of the conservation in structure. We present
(1) the homology modeled secondary structure of more than 20,000
ITS2 covering about 14,000 species; (2) a computational approach
for homology modeling of rRNA structures, which additionally can
be applied to other RNA families; and (3) a database providing about
25,000 ITS2 sequences with their associated secondary structures,
a refined ITS2 specific general time reversible (GTR) substitution
model, and a scoring matrix, available at},
doi = {10.1261/rna.2144205},
institution = {Department of Bioinformatics, University of Würzburg, Biocenter,
Am Hubland, Germany.},
keywords = {Algae; Animals; Base Sequence; DNA, Ribosomal Spacer; Evolution, Molecular;
Fungi; Models, Molecular; Molecular Sequence Data; Nucleic Acid Conformation;
RNA, Ribosomal; Sequence Alignment},
owner = {binf007},
pii = {11/11/1616},
pmid = {16244129},
timestamp = {2008.01.21},
url = {}
@comment{jabref-meta: selector_publisher:}
@comment{jabref-meta: selector_author:}
@comment{jabref-meta: selector_journal:}
@comment{jabref-meta: selector_keywords:}
Jump to Line
Something went wrong with that request. Please try again.