MDA
|
The general namespace of this project. More...
Classes | |
struct | Opt_DA |
Structure to contain the MDA options. More... | |
struct | hmm_match |
class | HMM |
struct | Match_point |
Simple struct to save a match and its score. More... | |
class | Library |
A Library object stores pairwise matches and is able to do the consistency extension. More... | |
struct | SequenceGapFunc |
struct | SequenceSetGapFunc |
class | Domain |
Class to represent a domain. More... | |
class | DomainArchitecture |
Represents a set of domains. More... | |
class | DomainArchitectureSet |
Class to store several Architectures. More... | |
struct | SingleSequenceFeature |
class | SequenceFeatures |
struct | Tag |
class | StrTok |
A string tokenizer class which in difference to standard C is thread save. More... | |
struct | TreeNode |
class | Tree |
This class provides several methods to read, compute and manipulate trees. More... | |
class | Vector |
class | DNASequence |
A special class for DNA sequences. More... | |
class | ProteinSequence |
A class to represent a protein sequence. More... | |
class | ProteinSequenceSet |
A SequenceSet object for sequences of type ProteinSequence. More... | |
class | Sequence |
A generic class to represent a plain sequence. More... | |
class | Sequence_Interface |
class | SequenceSetBase |
The base class for a SequenceSet. More... | |
struct | Input_Sort |
struct | Seq_Sort |
struct | Name_Sort |
class | SequenceSetBase< SequenceType, MemSafe > |
This SequenceSet class uses less memory than the normal one. More... | |
class | SplitSet |
class | CRS_Mat |
A class to store the Pfam domain matches in a compact way. More... | |
class | Matrix |
A simple class to produce 2 dimensional matrices. More... | |
class | Matrix1Line |
A simple class to produce 2 dimensional matrices. More... | |
class | MatrixStack |
A simple class to contain a set of 2 dimensional matrices. More... | |
Typedefs | |
typedef std::pair< unsigned int, unsigned int > | Match |
A match. | |
typedef Tag< List_ > | List |
typedef std::vector < SingleSequenceFeature > ::iterator | SequenceFeatures_iter |
typedef Tag< Default_ > | Default |
Use the default implementation of the class. | |
typedef Tag< MemSafe_ > const | MemSafe |
Used to use a memory saving version of a class. | |
template<typename SequenceType , typename MemoryType > | |
using | SequenceSet = SequenceSetBase< SequenceType, MemoryType > |
Enumerations | |
enum | CleanupOptions { CleanNested = 0x01, CleanOverlap = 0x02, CleanMerged = 0x04 } |
Domain cleanup options. More... | |
Functions | |
void | nw_dyn_consistency (size_t dim1, size_t dim2, Matrix< std::pair< int, char > > &matrix) |
This is a special version of the Needleman-Wunsch algorithm for consistency chaining. More... | |
void | nw_dyn_consistency_traceback (size_t i, size_t j, const Matrix< std::pair< int, char > > &matrix, std::string &edit_string1, std::string &edit_string2) |
Traceback for the nw_dyn_consistency algorithm. More... | |
void | gotoh_dyn_consistency (int dim1, int dim2, MatrixStack< 3, std::pair< int, char > > &matrices, int gop, int gep) |
Calculation of pairwise alignment. More... | |
void | gotoh_dyn_consistency_traceback (size_t i, size_t j, MatrixStack< 3, std::pair< int, char > > &matrices, std::string &edit_string1, std::string &edit_string2) |
Traceback for the gotoh_dyn_consistency algorithm. More... | |
template<typename DataType > | |
void | enterDataIntoMatrix (Matrix< std::pair< DataType, char > > &matrix, size_t dim1, size_t dim2, const std::map< Match, DataType > &match_points) |
template<typename DataType , typename GapFunction > | |
void | progressive_consistency_align (const Library< DataType > &lib, Tree &guide_tree, DataType &set, GapFunction gap_func) |
Produces a progressive alignment of a dataset. More... | |
int | get_score (const Opt_DA &opts, const Domain &dm1, const Domain &dm2) |
Calculates the score of a match of two Domains. More... | |
void | rads_align_archi (const Opt_DA &opts, const DomainArchitecture &doms1, const DomainArchitecture &doms2, MatrixStack< 3, std::pair< int, char > > &matrices) |
Aligning two Architectures. More... | |
void | rads_traceback_archi (Library< DomainArchitectureSet > &lib, const DomainArchitecture &arch1, const DomainArchitecture &arch2, const MatrixStack< 3, std::pair< int, char > > &matrices, int aln_score) |
Matrix< float > * | all_rads_pair_align_archi (Library< DomainArchitectureSet > &lib, const ProteinSequenceSet< Default > &seqSet, const Opt_DA &opts) |
Produces all pairwise alignments. More... | |
void | run_single_rads (size_t i, Library< DomainArchitectureSet > &lib, const DomainArchitectureSet &archs, Matrix< float > *sim_mat, const Opt_DA &opts, MatrixStack< 3, std::pair< int, char > > &matrices) |
Matrix< float > * | all_rads_pair_align_archi (Library< DomainArchitectureSet > &lib, const ProteinSequenceSet< Default > &seqSet, const Opt_DA &opts, unsigned int n_threads) |
void | refine (DomainArchitectureSet &set) |
Refinement of domain alignment by shifting domains to increase number of identical domains in a column. More... | |
void | rads_traceback_archi (Library< DomainArchitectureSet > &lib, const DomainArchitecture &arch1, const DomainArchitecture &arch2, const MatrixStack< 3, std::pair< int, char > > &matrices) |
Traceback for the RADS dynamic programming. More... | |
template<typename DataType > | |
float | hmm_forward (const DataType &seq1, const DataType &seq2, const HMM &hmm, Matrix< float > &dp_mat, float **insert_matrices) |
float | hmm_forward (const HMM &hmm, std::vector< float > &ins_probs1, std::vector< float > &ins_probs2, Matrix< float > &match_probs, Matrix< float > &dp_mat, float **insert_matrices) |
template<typename DataType > | |
float | hmm_backward (const DataType &seq1, const DataType &seq2, const HMM &hmm, Matrix< float > &dp_mat, float **insert_matrices) |
float | hmm_backward (const HMM &hmm, std::vector< float > &ins_probs1, std::vector< float > &ins_probs2, Matrix< float > &match_probs, Matrix< float > &dp_mat, float **insert_matrices) |
template<typename DataType , typename LibraryDataType > | |
void | hmm2lib (const DataType &seq1, const DataType &seq2, const Matrix< float > &forward_mat, const Matrix< float > &backward_mat, Library< LibraryDataType > &lib, float total_probability) |
template<typename LibraryType > | |
void | hmm2lib (const Sequence &seq1, int id1, const Sequence &seq2, int id2, const Matrix< float > &forward_mat, const Matrix< float > &backward_mat, Library< LibraryType > &lib, float total_probability) |
template<typename DataType > | |
void | all_hmm_pairs (const std::vector< DataType > &set, Library< std::vector< DataType > > &lib, Matrix< float > &dist_mat, size_t start, size_t end) |
Calculates all hmm pairs inside the given limit. More... | |
template<typename DataType > | |
void | all_hmm_pairs (const std::vector< DataType > &set, Library< std::vector< DataType > > &lib, Matrix< float > &dist_mat) |
template<typename DataType > | |
void | all_hmm_pairs (const DataType &set, Library< DataType > &lib, Matrix< float > &dist_mat) |
template<typename DataType , typename LibraryType > | |
void | all_hmm_pairs (const DataType &set, Library< LibraryType > &lib, Matrix< float > &dist_mat, size_t start, size_t end) |
void | gotoh_align_banded (int dim1, int dim2, MatrixStack< 3, std::pair< float, char > > &matrices, int gop, int gep, size_t band_width) |
Computes the dynamic programming algorithm for as described by gotoh but limited to a certain width around the diagonal. More... | |
void | gotoh_align (int dim1, int dim2, MatrixStack< 3, std::pair< float, char > > &matrices, int gop, int gep) |
Computes the dynamic programming algorithm for as described by gotoh. More... | |
template<typename MatrixStackType > | |
void | gotoh_traceback (int dim1, int dim2, const MatrixStackType &matrices, std::string &edit_string1, std::string &edit_string2) |
Performs the gotoh traceback. More... | |
template<typename DataType > | |
void | fillGotohMatrix (const DataType &set, std::vector< size_t > ids1, std::vector< size_t > ids2, MatrixStack< 3, std::pair< float, char > > &matrixStack, const Matrix< int > &sim_mat) |
Fills the Gotoh match matrix with average scores for the alignment of two columns. More... | |
template<typename DataType > | |
void | fillGotohMatrix_banded (const DataType &set, std::vector< size_t > ids1, std::vector< size_t > ids2, MatrixStack< 3, std::pair< float, char > > &matrixStack, const Matrix< int > &sim_mat, int band_width) |
Fills the Gotoh match matrix with average scores for the alignment of two columns. More... | |
template<typename DataType > | |
void | seq_progressive_align (DataType &set, std::shared_ptr< Tree > guide_tree, const Matrix< int > &sim_mat, int gop, int gep, bool banded, MatrixStack< 3, std::pair< float, char > > &dyn_matrix) |
template<typename DataType > | |
void | seq_progressive_align (DataType &set, const Tree &guide_tree, const Matrix< int > &sim_mat, int gop, int gep, bool banded) |
template<typename DataType > | |
void | seq_prof_prof_align (DataType &set1, DataType &set2, const Matrix< int > &sim_mat, int gop, int gep, MatrixStack< 3, std::pair< float, char > > &matrix) |
Performs a profile profile alignment. More... | |
template<typename DataType > | |
void | seq_prof_prof_align_banded (DataType &set1, DataType &set2, const Matrix< int > &sim_mat, int gop, int gep, MatrixStack< 3, std::pair< float, char > > &matrix) |
Performs a profile profile alignment within a banded region. More... | |
template<typename DataType > | |
void | seq_progressive_align (DataType &set, MatrixStack< 3, std::pair< float, char > > &matrices, const Matrix< int > &sim_mat, int gop, int gep, bool banded) |
Calculates a progressive consistency alignment given a set of Sequences. More... | |
template<typename MemoryType > | |
void | same_architecture_aln (ProteinSequenceSet< MemoryType > &set, const Matrix< int > &sim_mat, int gop, int gep, SplitSet< ProteinSequenceSet< Default > > &splitSet, MatrixStack< 3, std::pair< float, char > > &matrix) |
template<typename MemoryType > | |
void | same_architecture_aln (ProteinSequenceSet< MemoryType > &set, const Matrix< int > &sim_mat, int gop, int gep, SplitSet< ProteinSequenceSet< Default > > &splitSet, ThreadPool< MatrixStack< 3, std::pair< float, char > > > &pool) |
template<typename MemoryType > | |
void | merge_sequences (SplitSet< ProteinSequenceSet< MemoryType > > &set, size_t start, size_t end) |
template<typename MemoryType > | |
void | aln_different_architectures (SplitSet< ProteinSequenceSet< MemoryType > > &set1, SplitSet< ProteinSequenceSet< MemoryType > > &set2, const Matrix< int > &sim_mat, int gop, int gep, ThreadPool< MatrixStack< 3, std::pair< float, char > > > &pool) |
template<typename MemoryType > | |
void | msa (ProteinSequenceSet< MemoryType > &set, const Tree &arch_guide_tree, const Matrix< int > &sim_mat, int gop, int gep, size_t n_threads) |
bool | domain_start_sort (const Domain &dom1, const Domain &dom2) |
std::ostream & | operator<< (std::ostream &out, const DomainArchitecture &archi) |
std::ostream & | operator<< (std::ostream &out, const DomainArchitectureSet &archi) |
bool | gap_pattern_sort (const DomainArchitecture &arch1, const DomainArchitecture &arch2) |
bool | member_sort (const DomainArchitecture &arch1, const DomainArchitecture &arch2) |
bool | id_sort (const DomainArchitecture &arch1, const DomainArchitecture &arch2) |
void | str_upper (string &str) |
void | str_lower (string &str) |
string | get_alphabet (std::string alphabet) |
Returns a reduced alphabet. More... | |
short * | encode (std::string alphabet) |
Encodes an alphabet into numbers. More... | |
template<typename SequenceType > | |
Vector< int > * | calc_km_vec (const SequenceType &seq, std::vector< short > &coded_alphabet, size_t start=0, size_t end=0) |
template<typename SequenceSetType > | |
Matrix< float > * | kmer_dist_mat (const SequenceSetType &set) |
void | reverse_complement (std::string &seq) |
template<typename SequenceType > | |
SequenceType | substr (const SequenceType &seq, size_t start, size_t length) |
returns a new Sequence that is a substring of an existing one. More... | |
template<typename SequenceType > | |
SequenceType | substr_no_renaming (const SequenceType &seq, size_t start, size_t length) |
returns a new Sequence that is a substring of an existing one. More... | |
template<typename SequenceType > | |
SequenceType * | substr_end (const SequenceType &seq, size_t start, size_t end) |
returns a new Sequence that is a substring of an existing one. More... | |
template<typename SequenceType > | |
SequenceType * | substr_end_no_renaming (const SequenceType &seq, size_t start, size_t end) |
template<typename SequenceType > | |
void | reverse_complement (SequenceType &seq) |
Computes the reverse complement of a DNA sequence. More... | |
char | identify_seq_type (const Sequence &seq) |
std::pair< size_t, size_t > | coverage (const Sequence &seq1, const Sequence &seq2) |
std::pair< size_t, size_t > | id (const Sequence &seq1, const Sequence &seq2) |
bool | seq_check (const Sequence &seq1, const Sequence &seq2) |
bool | bio_seq (const Sequence &seq) |
template<> | |
std::string | dna2prot< std::string, std::string > (const std::string &seq) |
std::ostream & | operator<< (std::ostream &out, const Sequence &seq) |
template<typename SeqType > | |
char | identify_seq_type (const SeqType &seq) |
template<typename SeqType > | |
std::pair< size_t, size_t > | coverage (const SeqType &seq1, const SeqType &seq2) |
template<typename SeqType > | |
std::pair< size_t, size_t > | id (const SeqType &seq1, const SeqType &seq2) |
template<typename SeqType > | |
bool | seq_check (const SeqType &seq1, const SeqType &seq2) |
template<typename SeqType > | |
bool | bio_seq (const SeqType &seq) |
template<typename SequenceType , typename MemoryType > | |
bool | check_set (const SequenceSetBase< SequenceType, MemoryType > &set) |
template<typename SeqType , typename MemType > | |
std::ostream & | operator<< (std::ostream &out, const SequenceSetBase< SeqType, MemType > &seqSet) |
template<typename SplitSetType > | |
std::ostream & | operator<< (std::ostream &out, const SplitSet< SplitSetType > &seqSet) |
FILE * | my_fopen (std::string name_f, std::string mode) |
String functions | |
void | str_upper (std::string &str) |
Converts all characters to lowercase. More... | |
void | str_lower (std::string &str) |
Converts all characters to uppercase. More... | |
Variables | |
struct MDAT::SequenceSetGapFunc | SequenceSetGap_obj |
const float | EXP_UNDERFLOW_THRESHOLD = -4.60f |
const float | LOG_UNDERFLOW_THRESHOLD = 7.50f |
const float | LOG_ZERO = -FLT_MAX |
const float | LOG_ONE = 0.0f |
The general namespace of this project.
enum MDAT::CleanupOptions |
Domain cleanup options.
Enumerator | |
---|---|
CleanNested |
CleanNested Cleans nested domains. |
CleanOverlap |
CleanOverlap Cleans overlapping domains. |
CleanMerged |
CleanMerged Cleans splitted domains. |
void MDAT::all_hmm_pairs | ( | const std::vector< DataType > & | set, |
Library< std::vector< DataType > > & | lib, | ||
Matrix< float > & | dist_mat, | ||
size_t | start, | ||
size_t | end | ||
) |
Calculates all hmm pairs inside the given limit.
set | The dataset. |
lib | The library to store the pairwise matches in. |
dist_mat | The distance matrix. |
start | The first element to use. |
end | The last element to use. |
Matrix< float > * MDAT::all_rads_pair_align_archi | ( | Library< DomainArchitectureSet > & | lib, |
const ProteinSequenceSet< Default > & | seqSet, | ||
const Opt_DA & | opts | ||
) |
Produces all pairwise alignments.
This functions computes all alignments between every pair of sequences and saves the matches in library.
lib | The Library object |
seqSet | The sequence set |
opts | The domain alignment options |
short * MDAT::encode | ( | std::string | alphabet | ) |
Encodes an alphabet into numbers.
string | The alphabet to use. |
void MDAT::enterDataIntoMatrix | ( | Matrix< std::pair< DataType, char > > & | matrix, |
size_t | dim1, | ||
size_t | dim2, | ||
const std::map< Match, DataType > & | match_points | ||
) |
Fills the dynamic programming matrix with values from the Library.
matrices | The dynamic programming matrix. |
dim1 | The size of the first dimension. |
dim2 | The size of the second dimension. |
match_points | The scores of the matches. Fills the dynamic programming matrix with values from the Library. |
matrices | The dynamic programming matrix. |
dim1 | The size of the first dimension. |
dim2 | The size of the second dimension. |
match_points | The scores of the matches. |
void MDAT::fillGotohMatrix | ( | const DataType & | set, |
std::vector< size_t > | ids1, | ||
std::vector< size_t > | ids2, | ||
MatrixStack< 3, std::pair< float, char > > & | matrixStack, | ||
const Matrix< int > & | sim_mat | ||
) |
Fills the Gotoh match matrix with average scores for the alignment of two columns.
set | A set of sequences to be used. |
ids1 | The positions of the first set of sequences. |
ids2 | The positions of the second set of sequences. |
matrixStack | The matrix stack. |
sim_mat | The similarity matrix. |
void MDAT::fillGotohMatrix_banded | ( | const DataType & | set, |
std::vector< size_t > | ids1, | ||
std::vector< size_t > | ids2, | ||
MatrixStack< 3, std::pair< float, char > > & | matrixStack, | ||
const Matrix< int > & | sim_mat, | ||
int | band_width | ||
) |
Fills the Gotoh match matrix with average scores for the alignment of two columns.
set | A set of sequences to be used. |
ids1 | The positions of the first set of sequences. |
ids2 | The positions of the second set of sequences. |
matrixStack | The matrix stack. |
sim_mat | The similarity matrix. |
band_width | The bandwith to be used. |
std::string MDAT::get_alphabet | ( | std::string | alphabet | ) |
Returns a reduced alphabet.
alphabet | The alphabet to use. |
int MDAT::get_score | ( | const Opt_DA & | opts, |
const Domain & | dm1, | ||
const Domain & | dm2 | ||
) |
Calculates the score of a match of two Domains.
This function has been taken from the RADS algorithm
opts | The options containing the scores. |
dm1 | The first domain |
dm2 | The second domain |
void MDAT::gotoh_align | ( | int | dim1, |
int | dim2, | ||
MatrixStack< 3, std::pair< float, char > > & | matrices, | ||
int | gop, | ||
int | gep | ||
) |
Computes the dynamic programming algorithm for as described by gotoh.
dim1 | The first dimension of the matrix. |
dim2 | The second dimension of the matrix. |
matrices | The dynamic programming matrix. |
gop | The gap opening costs. |
gep | The gap extension costs. |
void MDAT::gotoh_align_banded | ( | int | dim1, |
int | dim2, | ||
MatrixStack< 3, std::pair< float, char > > & | matrices, | ||
int | gop, | ||
int | gep, | ||
size_t | band_width | ||
) |
Computes the dynamic programming algorithm for as described by gotoh but limited to a certain width around the diagonal.
dim1 | The first dimension of the matrix. |
dim2 | The second dimension of the matrix. |
matrices | The dynamic programming matrix. |
gop | The gap opening costs. |
gep | The gap extension costs. |
band_width | The bandwidth to be used around. |
void MDAT::gotoh_dyn_consistency | ( | int | dim1, |
int | dim2, | ||
MatrixStack< 3, std::pair< int, char > > & | matrices, | ||
int | gop, | ||
int | gep | ||
) |
Calculation of pairwise alignment.
This function uses the gotoh algorithm for the consistency algorithm. Only matches with a score != 0 are allowed.
dim1 | First dimension. |
dim2 | Second dimension. |
matrices | The dynamic programming matrices. |
gop | Gap opening costs. |
gep | Gap extension costs. |
void MDAT::gotoh_dyn_consistency_traceback | ( | size_t | i, |
size_t | j, | ||
MatrixStack< 3, std::pair< int, char > > & | matrices, | ||
std::string & | edit_string1, | ||
std::string & | edit_string2 | ||
) |
Traceback for the gotoh_dyn_consistency algorithm.
i | The size of the first dimension. |
j | The size of the second dimension. |
matrix | The matrix filled by nw_dyn_consistency function. |
edit_string1 | The first edit_string. |
edit_string2 | The second edit_string. |
void MDAT::gotoh_traceback | ( | int | dim1, |
int | dim2, | ||
const MatrixStackType & | matrices, | ||
std::string & | edit_string1, | ||
std::string & | edit_string2 | ||
) |
Performs the gotoh traceback.
The result is stored in the edit_strings in the form "mm-mm–". 'm' mean match, '-' a gap is introduced.
dim1 | The size of the first dimension. |
dim2 | The size of the second dimension. |
matrices | The dynamic programming matrix. |
edit_string1 | The resulting edit string for dim1. |
edit_string2 | The resulting edit string for dim2. |
float MDAT::hmm_backward | ( | const HMM & | hmm, |
std::vector< float > & | ins_probs1, | ||
std::vector< float > & | ins_probs2, | ||
Matrix< float > & | match_probs, | ||
Matrix< float > & | dp_mat, | ||
float ** | insert_matrices | ||
) |
Calculates the backward algorithm.
hmm | The HMM to use. |
ins_probs1 | The insertion probabilites for the first sequence. |
ins_probs2 | The insertion probabilities for the second sequence. |
match_probs | The match probabilities. |
dp_mat | The dynamic programming matrix for matches. |
insert_matrices | The gap matrices. |
float MDAT::hmm_forward | ( | const DataType & | seq1, |
const DataType & | seq2, | ||
const HMM & | hmm, | ||
Matrix< float > & | dp_mat, | ||
float ** | insert_matrices | ||
) |
seq1 | |
seq2 | |
hmm | |
dp_mat | |
insert_matrices |
|
inline |
Open a file. Will throw an exception when an error occurs.
name_f | Name of the file. |
mode | Mode to open it with |
void MDAT::nw_dyn_consistency | ( | size_t | dim1, |
size_t | dim2, | ||
Matrix< std::pair< int, char > > & | matrix | ||
) |
This is a special version of the Needleman-Wunsch algorithm for consistency chaining.
This algorithm does not include gap costs and and allows matches only when there is a positive value. The reason is that the score are coming from the consistency approach and thus only matches should occur that have been seen in a pairwise alignment.
dim1 | The first dimension. |
dim2 | The second dimension. |
matrix | The dynamic programming matrix. |
void MDAT::nw_dyn_consistency_traceback | ( | size_t | i, |
size_t | j, | ||
const Matrix< std::pair< int, char > > & | matrix, | ||
std::string & | edit_string1, | ||
std::string & | edit_string2 | ||
) |
Traceback for the nw_dyn_consistency algorithm.
i | The size of the first dimension. |
j | The size of the second dimension. |
matrix | The matrix filled by nw_dyn_consistency function. |
edit_string1 | The first edit_string. |
edit_string2 | The second edit_string. |
std::ostream& MDAT::operator<< | ( | std::ostream & | out, |
const DomainArchitectureSet & | archi | ||
) |
out | The output stream. |
archi | The architecture set. |
std::ostream& MDAT::operator<< | ( | std::ostream & | out, |
const SequenceSetBase< SeqType, MemType > & | seqSet | ||
) |
Simple print of a SequenceSet in fasta format.
out[in|out] | The output stream. |
seqSet[in] | The sequence set |
void MDAT::progressive_consistency_align | ( | const Library< DataType > & | lib, |
Tree & | guide_tree, | ||
DataType & | set, | ||
GapFunction | gap_func | ||
) |
Produces a progressive alignment of a dataset.
The | data to be used. |
lib | The library containing the pairwise matches. |
guide_tree | The guide tree to be used during the process. |
set | The data set to use. |
void MDAT::rads_align_archi | ( | const Opt_DA & | opts, |
const DomainArchitecture & | doms1, | ||
const DomainArchitecture & | doms2, | ||
MatrixStack< 3, std::pair< int, char > > & | matrices | ||
) |
Aligning two Architectures.
Function is basically taken from RADS. It is based on the Gotoh-Algorithm but uses scores specifically designed for domains.
opts | The options (scores) to be used by the algorithm. |
doms1 | The first domain architecture |
doms2 | The second domain architecture |
matrices | The matrices for the Gotoh-Algorithm. |
void MDAT::rads_traceback_archi | ( | Library< DomainArchitectureSet > & | lib, |
const DomainArchitecture & | arch1, | ||
const DomainArchitecture & | arch2, | ||
const MatrixStack< 3, std::pair< int, char > > & | matrices | ||
) |
Traceback for the RADS dynamic programming.
This function does the traceback of the Gotoh-Algorithm of the RADS alignment. The identified matches are stored in the library.
lib | The Library object. |
arch1 | The first architecture. |
arch2 | The second architecture. |
matrices | The dynamic programming matrix |
void MDAT::refine | ( | DomainArchitectureSet & | set | ) |
Refinement of domain alignment by shifting domains to increase number of identical domains in a column.
set | Domain Alignment. |
void reverse_complement | ( | SequenceType & | seq | ) |
Computes the reverse complement of a DNA sequence.
seq | The sequence to change. |
void MDAT::seq_prof_prof_align | ( | DataType & | set1, |
DataType & | set2, | ||
const Matrix< int > & | sim_mat, | ||
int | gop, | ||
int | gep, | ||
MatrixStack< 3, std::pair< float, char > > & | matrix | ||
) |
Performs a profile profile alignment.
set1 | The first alignment. |
set2 | The second alignment. |
sim_mat | The similarity matrix. |
gop | The gap opening costs to use. |
gep | The gap extension costs to use. |
matrix | The dynamic programming matrix. |
void MDAT::seq_prof_prof_align_banded | ( | DataType & | set1, |
DataType & | set2, | ||
const Matrix< int > & | sim_mat, | ||
int | gop, | ||
int | gep, | ||
MatrixStack< 3, std::pair< float, char > > & | matrix | ||
) |
Performs a profile profile alignment within a banded region.
set1 | The first alignment. |
set2 | The second alignment. |
sim_mat | The similarity matrix. |
gop | The gap opening costs to use. |
gep | The gap extension costs to use. |
matrix | The dynamic programming matrix. |
void MDAT::seq_progressive_align | ( | DataType & | set, |
std::shared_ptr< Tree > | guide_tree, | ||
const Matrix< int > & | sim_mat, | ||
int | gop, | ||
int | gep, | ||
bool | banded, | ||
MatrixStack< 3, std::pair< float, char > > & | dyn_matrix | ||
) |
Calculates a progressive alignment.
set | The sequence set. |
guide_tree | The guide tree to to be used. |
sim_mat | The similarity matrix. |
gop | The gap opening penalty. |
gep | The gap extension penalty. |
Uses standard gotoh algorithm to produce alignments.
void MDAT::seq_progressive_align | ( | DataType & | set, |
MatrixStack< 3, std::pair< float, char > > & | matrices, | ||
const Matrix< int > & | sim_mat, | ||
int | gop, | ||
int | gep, | ||
bool | banded | ||
) |
Calculates a progressive consistency alignment given a set of Sequences.
set | The set of sequences |
void MDAT::str_lower | ( | std::string & | str | ) |
Converts all characters to uppercase.
[in,out] | str | The string to convert |
void MDAT::str_upper | ( | std::string & | str | ) |
Converts all characters to lowercase.
[in,out] | str | The string to convert |
SequenceType substr | ( | const SequenceType & | seq, |
size_t | start, | ||
size_t | length | ||
) |
returns a new Sequence that is a substring of an existing one.
seq | The sequence to produce the new one from. |
start | The start position of the substring. |
length | The lengths of the substr. |
SequenceType * substr_end | ( | const SequenceType & | seq, |
size_t | start, | ||
size_t | end | ||
) |
returns a new Sequence that is a substring of an existing one.
seq | The sequence to produce the new one from. |
start | The start position of the substring. |
end | The last position of the substring in sequence. |
SequenceType substr_no_renaming | ( | const SequenceType & | seq, |
size_t | start, | ||
size_t | length | ||
) |
returns a new Sequence that is a substring of an existing one.
seq | The sequence to produce the new one from. |
start | The start position of the substring. |
length | The lengths of the substr. |