9 #include "../lib/Sequence/SequenceSet.hpp"
10 #include "../lib/align/HMM.hpp"
11 #include "../lib/align/fw_bw.hpp"
12 #include "../lib/clustering/Tree.hpp"
13 #include "../lib/utils/MatrixStack.hpp"
14 #include "../lib/align/seq_align.hpp"
15 #include "../lib/align/consistency_aln.hpp"
21 #include <cxxtest/TestSuite.h>
43 void test_profile_profile_hmm()
45 vector<ProteinSequenceSet<Default> > sets(2);
46 sets[0].add_seq(
new ProteinSequence(
"seq1",
"---MERLSEDDPAAHPPPSVQHTPAYE----EGQTCLNCLLYTDASADDQDWGPCSRRVGGGK-LVSANGWCTAWVAR--"));
47 sets[0].add_seq(
new ProteinSequence(
"seq2",
"AAAMERLSEDDPAAHDASSVQH-PAYEEEEEEGQTCLNCLCYTDASA--QDWGPCS--VFPGKDLVEENGWCTAWVAREE"));
48 sets[1].add_seq(
new ProteinSequence(
"seq3",
"---MERLSEDDPAAQALEYRHDASSVQHTPAYE----EGQTCLNCLLYTDASADDQDWGPCSRRVFPGK-LVSANGWCTAWVAR--"));
49 sets[1].add_seq(
new ProteinSequence(
"seq4",
"AAAMERLSEDDPAAQA--YRHDASSVQH-PAYEEEEEEGQTCLNCLLYTDASA--QDWGPCS--VFPGPPLVSANGWCTAWVAREE"));
50 TS_ASSERT_EQUALS(sets[0].size(), 2);
58 lib.
relax(std::multiplies<double>());
60 float max_val=-FLT_MAX;
65 if (dist_mat[i][j]>max_val)
66 max_val=dist_mat[i][j];
73 dist_mat[i][j] = dist_mat[i][j]*(-1.0)+max_val;
74 dist_mat[j][i] = dist_mat[i][j];
80 std::vector<std::string> names(2,
"");
81 guide_tree.
nj(dist_mat, names);
83 TS_ASSERT_EQUALS(sets[0][0].sequence(),
"---MERLSEDDPAA------HPPPSVQHTPAYE----EGQTCLNCLLYTDASADDQDWGPCSRRVGGGK-LVSANGWCTAWVAR--");
84 TS_ASSERT_EQUALS(sets[1][0].sequence(),
"---MERLSEDDPAAQALEYRHDASSVQHTPAYE----EGQTCLNCLLYTDASADDQDWGPCSRRVFPGK-LVSANGWCTAWVAR--");
87 void test_splitted_profile_profile_hmm()
89 vector<ProteinSequenceSet<Default> > set1(2);
90 set1[0].add_seq(
new ProteinSequence(
"seq1",
"---MERLSEDDPAAHPPPSVQHTPAYE----EGQTCLNCLLYTDASADDQDWGPCSRRVGGGK-LVSANGWCTAWVAR--"));
91 set1[0].add_seq(
new ProteinSequence(
"seq2",
"AAAMERLSEDDPAAHDASSVQH-PAYEEEEEEGQTCLNCLCYTDASA--QDWGPCS--VFPGKDLVEENGWCTAWVAREE"));
92 set1[1].add_seq(
new ProteinSequence(
"seq3",
"---MERLSEDDPAAQALEYRHDASSVQHTPAYE----EGQTCLNCLLYTDASADDQDWGPCSRRVFPGK-LVSANGWCTAWVAR--"));
93 set1[1].add_seq(
new ProteinSequence(
"seq4",
"AAAMERLSEDDPAAQA--YRHDASSVQH-PAYEEEEEEGQTCLNCLLYTDASA--QDWGPCS--VFPGPPLVSANGWCTAWVAREE"));
94 vector<ProteinSequenceSet<Default> > set2(2);
95 set2[0].add_seq(
new ProteinSequence(
"seq1",
"---MERLSEDDPAAHPPPSVQHTPAYE----EGQTCLNCLLYTDASADDQDWGPCSRRVGGGK-LVSANGWCTAWVAR--"));
96 set2[0].add_seq(
new ProteinSequence(
"seq2",
"AAAMERLSEDDPAAHDASSVQH-PAYEEEEEEGQTCLNCLCYTDASA--QDWGPCS--VFPGKDLVEENGWCTAWVAREE"));
97 set2[1].add_seq(
new ProteinSequence(
"seq3",
"---MERLSEDDPAAQALEYRHDASSVQHTPAYE----EGQTCLNCLLYTDASADDQDWGPCSRRVFPGK-LVSANGWCTAWVAR--"));
98 set2[1].add_seq(
new ProteinSequence(
"seq4",
"AAAMERLSEDDPAAQA--YRHDASSVQH-PAYEEEEEEGQTCLNCLLYTDASA--QDWGPCS--VFPGPPLVSANGWCTAWVAREE"));
101 std::vector<float> ins_probs1, ins_probs2;
114 set.
add_seq(
new MDAT::ProteinSequence(
"seq1",
"MIIATAGHVDHGKTTLLQAITGVNADRLPEEKKRGMTIDLGYAYWPQPDGRVPGFIDVPGHEKFLSNMLAGVGGIDHALLVVACDDGVMAQTREHLAILQLTGNPMLTVALTKADRVDEARVDEVERQVKEVLREYGFAEAKLFITAATEGRGMDALREHLLQLPEREHASQHSFRLAIDRAFTVKGAGLVVTGTALSGEVKVGDSLWLTGVNKPMRVRALHAQNQPTETANAGQRIALNIAGDAEKEQINRGDWLLADVPPEPFTRVIVELQTHTPLTQWQPLHIHHAASHVTGRVSLLEDNLAELVFDTPLWLADNDRLVLRDISARNTLAGARVVMLNPPRRGKRKPEYLQWLASLARAQSDADALSVHLERGAVNLADFAWARQLNGEGMRELLQQPGYIQAGYSLLNAPVAARWQRKILDTLATYHEQHRDEPGPGRERLRRMALPMEDEALVLLLIEKMRESGDIHSHHGWLHLPDHKAGFSEEQQAIWQKAEPLFGDEPWWVRDLAKETGTDEQAMRLTLRQAAQQGIITAIVKDRYYRNDRIVEFANMIRDLDQECGSTCAADFRDRLGVGRKLAIQILEYFDRIGFTRRRGNDHLLRDALLFPEK",
"", 0));
115 set.add_seq(
new MDAT::ProteinSequence(
"seq2",
"XKIRSPIVSVLGTTLLDHIRGSAVASQHIGATEIPXDVIEGICGDFLKKFSIRETLPGLFFIDTPGAFTTLRKRGGALADLAILIVDINEGFKPQTQEALNILRXYRTPFVVAANKIDRIHGWRVHEGRPFXETFSKQDIQVQQKLDTKVYELVGKLHEEGFESERFDRVTDFASQVSIIPISAITGEGIPELLTXLXGLAQQYLREQLKIEEDSPARGTILEVKEETGLGXTIDAVIYDGILRKDDTIAXXTSKDVISTRIRSLLKPRPLKFQKVDEVVAAAGIKIVAPGIDDVXAGSPLRVVTDPEKVREEILSEIEDIKIDTDEAGVVVKADTLGSLEAVVKILRDXYVPIKVADIGDVSRRDVVNAGIALQEDRVYGAIIAFNVKVIPSAAQELKNSDIKLFQGNVIYRLXEEYEEWVRGIEEEKKKKWXEAIIKPASIRLIPKLVFRQSKPAIGGVEVLTGVIRQGYPLXNDDGETVGTVESXQDKGENLKSASRGQKVAXAIKDAVYGKTIHEGDTLYVDIPENHYHILKEQLLTDEELDLXDKIAEIKRKKN",
"", 1));
122 vector<string> names(2,
"");
123 guide_tree.
nj(dist_mat, names);
124 lib.
relax(std::multiplies<double>());
137 vector<MDAT::ProteinSequenceSet<MDAT::Default> >
set(2);
138 set[0].add_seq(
new MDAT::ProteinSequence(
"seq1",
"MIIATAGHVDHGKTTLLQAITGVNADRLPEEKKRGMTIDLGYAYWPQPDGRVPGFIDVPGHEKFLSNMLAGVGGIDHALLVVACDDGVMAQTREHLAILQLTGNPMLTVALTKADRVDEARVDEVERQVKEVLREYGFAEAKLFITAATEGRGMDALREHLLQLPEREHASQHSFRLAIDRAFTVKGAGLVVTGTALSGEVKVGDSLWLTGVNKPMRVRALHAQNQPTETANAGQRIALNIAGDAEKEQINRGDWLLADVPPEPFTRVIVELQTHTPLTQWQPLHIHHAASHVTGRVSLLEDNLAELVFDTPLWLADNDRLVLRDISARNTLAGARVVMLNPPRRGKRKPEYLQWLASLARAQSDADALSVHLERGAVNLADFAWARQLNGEGMRELLQQPGYIQAGYSLLNAPVAARWQRKILDTLATYHEQHRDEPGPGRERLRRMALPMEDEALVLLLIEKMRESGDIHSHHGWLHLPDHKAGFSEEQQAIWQKAEPLFGDEPWWVRDLAKETGTDEQAMRLTLRQAAQQGIITAIVKDRYYRNDRIVEFANMIRDLDQECGSTCAADFRDRLGVGRKLAIQILEYFDRIGFTRRRGNDHLLRDALLFPEK",
"", 0));
139 set[1].add_seq(
new MDAT::ProteinSequence(
"seq2",
"XKIRSPIVSVLGTTLLDHIRGSAVASQHIGATEIPXDVIEGICGDFLKKFSIRETLPGLFFIDTPGAFTTLRKRGGALADLAILIVDINEGFKPQTQEALNILRXYRTPFVVAANKIDRIHGWRVHEGRPFXETFSKQDIQVQQKLDTKVYELVGKLHEEGFESERFDRVTDFASQVSIIPISAITGEGIPELLTXLXGLAQQYLREQLKIEEDSPARGTILEVKEETGLGXTIDAVIYDGILRKDDTIAXXTSKDVISTRIRSLLKPRPLKFQKVDEVVAAAGIKIVAPGIDDVXAGSPLRVVTDPEKVREEILSEIEDIKIDTDEAGVVVKADTLGSLEAVVKILRDXYVPIKVADIGDVSRRDVVNAGIALQEDRVYGAIIAFNVKVIPSAAQELKNSDIKLFQGNVIYRLXEEYEEWVRGIEEEKKKKWXEAIIKPASIRLIPKLVFRQSKPAIGGVEVLTGVIRQGYPLXNDDGETVGTVESXQDKGENLKSASRGQKVAXAIKDAVYGKTIHEGDTLYVDIPENHYHILKEQLLTDEELDLXDKIAEIKRKKN",
"", 1));
147 vector<string> names(2,
"");
148 guide_tree.
nj(dist_mat, names);
149 lib.
relax(std::multiplies<double>());