MDA
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups
HMM_test.hpp
1 /*
2  * HMM_test.cpp
3  *
4  * Created on: Sep 27, 2013
5  * Author: ckeme_01
6  */
7 
8 
9 #include "../lib/Sequence/SequenceSet.hpp"
10 #include "../lib/align/HMM.hpp"
11 #include "../lib/align/fw_bw.hpp"
12 #include "../lib/clustering/Tree.hpp"
13 #include "../lib/utils/MatrixStack.hpp"
14 #include "../lib/align/seq_align.hpp"
15 #include "../lib/align/consistency_aln.hpp"
16 
17 // C header
18 #include <cstdlib>
19 
20 // CxxTest header
21 #include <cxxtest/TestSuite.h>
22 
23 
24 using namespace std;
25 using namespace MDAT;
26 
27 
28 class HMM_Test : public CxxTest::TestSuite
29 {
30 public:
31 
32  void test_forward()
33  {
34 
35  }
36 
37  void test_backward()
38  {
39 
40  }
41 
42 
43  void test_profile_profile_hmm()
44  {
45  vector<ProteinSequenceSet<Default> > sets(2);
46  sets[0].add_seq(new ProteinSequence("seq1", "---MERLSEDDPAAHPPPSVQHTPAYE----EGQTCLNCLLYTDASADDQDWGPCSRRVGGGK-LVSANGWCTAWVAR--"));
47  sets[0].add_seq(new ProteinSequence("seq2", "AAAMERLSEDDPAAHDASSVQH-PAYEEEEEEGQTCLNCLCYTDASA--QDWGPCS--VFPGKDLVEENGWCTAWVAREE"));
48  sets[1].add_seq(new ProteinSequence("seq3", "---MERLSEDDPAAQALEYRHDASSVQHTPAYE----EGQTCLNCLLYTDASADDQDWGPCSRRVFPGK-LVSANGWCTAWVAR--"));
49  sets[1].add_seq(new ProteinSequence("seq4", "AAAMERLSEDDPAAQA--YRHDASSVQH-PAYEEEEEEGQTCLNCLLYTDASA--QDWGPCS--VFPGPPLVSANGWCTAWVAREE"));
50  TS_ASSERT_EQUALS(sets[0].size(), 2);
51  sets[0].id(0);
52  sets[1].id(1);
53 
54  HMM hmm('P');
56  Matrix<float> dist_mat(2, 2);
57  all_hmm_pairs(sets, lib, dist_mat);
58  lib.relax(std::multiplies<double>());
59  size_t i, j;
60  float max_val=-FLT_MAX;
61  for (i=0; i<2; ++i)
62  {
63  for (j=i+1; j<2; ++j)
64  {
65  if (dist_mat[i][j]>max_val)
66  max_val=dist_mat[i][j];
67  }
68  }
69  for (i=0; i<2; ++i)
70  {
71  for (j=i; j<2; ++j)
72  {
73  dist_mat[i][j] = dist_mat[i][j]*(-1.0)+max_val;
74  dist_mat[j][i] = dist_mat[i][j];
75  }
76  }
77 
78 
79  Tree guide_tree;
80  std::vector<std::string> names(2, "");
81  guide_tree.nj(dist_mat, names);
82  progressive_consistency_align(lib, guide_tree, sets, SequenceSetGap_obj);
83  TS_ASSERT_EQUALS(sets[0][0].sequence(), "---MERLSEDDPAA------HPPPSVQHTPAYE----EGQTCLNCLLYTDASADDQDWGPCSRRVGGGK-LVSANGWCTAWVAR--");
84  TS_ASSERT_EQUALS(sets[1][0].sequence(), "---MERLSEDDPAAQALEYRHDASSVQHTPAYE----EGQTCLNCLLYTDASADDQDWGPCSRRVFPGK-LVSANGWCTAWVAR--");
85  }
86 
87  void test_splitted_profile_profile_hmm()
88  {
89  vector<ProteinSequenceSet<Default> > set1(2);
90  set1[0].add_seq(new ProteinSequence("seq1", "---MERLSEDDPAAHPPPSVQHTPAYE----EGQTCLNCLLYTDASADDQDWGPCSRRVGGGK-LVSANGWCTAWVAR--"));
91  set1[0].add_seq(new ProteinSequence("seq2", "AAAMERLSEDDPAAHDASSVQH-PAYEEEEEEGQTCLNCLCYTDASA--QDWGPCS--VFPGKDLVEENGWCTAWVAREE"));
92  set1[1].add_seq(new ProteinSequence("seq3", "---MERLSEDDPAAQALEYRHDASSVQHTPAYE----EGQTCLNCLLYTDASADDQDWGPCSRRVFPGK-LVSANGWCTAWVAR--"));
93  set1[1].add_seq(new ProteinSequence("seq4", "AAAMERLSEDDPAAQA--YRHDASSVQH-PAYEEEEEEGQTCLNCLLYTDASA--QDWGPCS--VFPGPPLVSANGWCTAWVAREE"));
94  vector<ProteinSequenceSet<Default> > set2(2);
95  set2[0].add_seq(new ProteinSequence("seq1", "---MERLSEDDPAAHPPPSVQHTPAYE----EGQTCLNCLLYTDASADDQDWGPCSRRVGGGK-LVSANGWCTAWVAR--"));
96  set2[0].add_seq(new ProteinSequence("seq2", "AAAMERLSEDDPAAHDASSVQH-PAYEEEEEEGQTCLNCLCYTDASA--QDWGPCS--VFPGKDLVEENGWCTAWVAREE"));
97  set2[1].add_seq(new ProteinSequence("seq3", "---MERLSEDDPAAQALEYRHDASSVQHTPAYE----EGQTCLNCLLYTDASADDQDWGPCSRRVFPGK-LVSANGWCTAWVAR--"));
98  set2[1].add_seq(new ProteinSequence("seq4", "AAAMERLSEDDPAAQA--YRHDASSVQH-PAYEEEEEEGQTCLNCLLYTDASA--QDWGPCS--VFPGPPLVSANGWCTAWVAREE"));
99 
100  HMM hmm('P');
101  std::vector<float> ins_probs1, ins_probs2;
102  Matrix<float> match_probs;
103  hmm.calculate_insertion_probs_splitted(set1, ins_probs1);
104  hmm.calculate_insertion_probs_splitted(set2, ins_probs2);
105  //hmm.calculate_match_probs_splitted(set1, set2, match_probs);
106 
107  }
108 
109 
110  void test_arg()
111  {
114  set.add_seq(new MDAT::ProteinSequence("seq1", "MIIATAGHVDHGKTTLLQAITGVNADRLPEEKKRGMTIDLGYAYWPQPDGRVPGFIDVPGHEKFLSNMLAGVGGIDHALLVVACDDGVMAQTREHLAILQLTGNPMLTVALTKADRVDEARVDEVERQVKEVLREYGFAEAKLFITAATEGRGMDALREHLLQLPEREHASQHSFRLAIDRAFTVKGAGLVVTGTALSGEVKVGDSLWLTGVNKPMRVRALHAQNQPTETANAGQRIALNIAGDAEKEQINRGDWLLADVPPEPFTRVIVELQTHTPLTQWQPLHIHHAASHVTGRVSLLEDNLAELVFDTPLWLADNDRLVLRDISARNTLAGARVVMLNPPRRGKRKPEYLQWLASLARAQSDADALSVHLERGAVNLADFAWARQLNGEGMRELLQQPGYIQAGYSLLNAPVAARWQRKILDTLATYHEQHRDEPGPGRERLRRMALPMEDEALVLLLIEKMRESGDIHSHHGWLHLPDHKAGFSEEQQAIWQKAEPLFGDEPWWVRDLAKETGTDEQAMRLTLRQAAQQGIITAIVKDRYYRNDRIVEFANMIRDLDQECGSTCAADFRDRLGVGRKLAIQILEYFDRIGFTRRRGNDHLLRDALLFPEK", "", 0));
115  set.add_seq(new MDAT::ProteinSequence("seq2", "XKIRSPIVSVLGTTLLDHIRGSAVASQHIGATEIPXDVIEGICGDFLKKFSIRETLPGLFFIDTPGAFTTLRKRGGALADLAILIVDINEGFKPQTQEALNILRXYRTPFVVAANKIDRIHGWRVHEGRPFXETFSKQDIQVQQKLDTKVYELVGKLHEEGFESERFDRVTDFASQVSIIPISAITGEGIPELLTXLXGLAQQYLREQLKIEEDSPARGTILEVKEETGLGXTIDAVIYDGILRKDDTIAXXTSKDVISTRIRSLLKPRPLKFQKVDEVVAAAGIKIVAPGIDDVXAGSPLRVVTDPEKVREEILSEIEDIKIDTDEAGVVVKADTLGSLEAVVKILRDXYVPIKVADIGDVSRRDVVNAGIALQEDRVYGAIIAFNVKVIPSAAQELKNSDIKLFQGNVIYRLXEEYEEWVRGIEEEKKKKWXEAIIKPASIRLIPKLVFRQSKPAIGGVEVLTGVIRQGYPLXNDDGETVGTVESXQDKGENLKSASRGQKVAXAIKDAVYGKTIHEGDTLYVDIPENHYHILKEQLLTDEELDLXDKIAEIKRKKN", "", 1));
116 
117  HMM hmm('P');
119  Matrix<float> dist_mat(2, 2);
120  all_hmm_pairs(set, lib, dist_mat);
121  Tree guide_tree;
122  vector<string> names(2, "");
123  guide_tree.nj(dist_mat, names);
124  lib.relax(std::multiplies<double>());
125  progressive_consistency_align(lib, guide_tree, set, seq_gap_func);
126 
127 
128 
129 
130  std::cout << set[0];
131  std::cout << set[1];
132  }
133 
134  void test_arg2()
135  {
136 
137  vector<MDAT::ProteinSequenceSet<MDAT::Default> >set(2);
138  set[0].add_seq(new MDAT::ProteinSequence("seq1", "MIIATAGHVDHGKTTLLQAITGVNADRLPEEKKRGMTIDLGYAYWPQPDGRVPGFIDVPGHEKFLSNMLAGVGGIDHALLVVACDDGVMAQTREHLAILQLTGNPMLTVALTKADRVDEARVDEVERQVKEVLREYGFAEAKLFITAATEGRGMDALREHLLQLPEREHASQHSFRLAIDRAFTVKGAGLVVTGTALSGEVKVGDSLWLTGVNKPMRVRALHAQNQPTETANAGQRIALNIAGDAEKEQINRGDWLLADVPPEPFTRVIVELQTHTPLTQWQPLHIHHAASHVTGRVSLLEDNLAELVFDTPLWLADNDRLVLRDISARNTLAGARVVMLNPPRRGKRKPEYLQWLASLARAQSDADALSVHLERGAVNLADFAWARQLNGEGMRELLQQPGYIQAGYSLLNAPVAARWQRKILDTLATYHEQHRDEPGPGRERLRRMALPMEDEALVLLLIEKMRESGDIHSHHGWLHLPDHKAGFSEEQQAIWQKAEPLFGDEPWWVRDLAKETGTDEQAMRLTLRQAAQQGIITAIVKDRYYRNDRIVEFANMIRDLDQECGSTCAADFRDRLGVGRKLAIQILEYFDRIGFTRRRGNDHLLRDALLFPEK", "", 0));
139  set[1].add_seq(new MDAT::ProteinSequence("seq2", "XKIRSPIVSVLGTTLLDHIRGSAVASQHIGATEIPXDVIEGICGDFLKKFSIRETLPGLFFIDTPGAFTTLRKRGGALADLAILIVDINEGFKPQTQEALNILRXYRTPFVVAANKIDRIHGWRVHEGRPFXETFSKQDIQVQQKLDTKVYELVGKLHEEGFESERFDRVTDFASQVSIIPISAITGEGIPELLTXLXGLAQQYLREQLKIEEDSPARGTILEVKEETGLGXTIDAVIYDGILRKDDTIAXXTSKDVISTRIRSLLKPRPLKFQKVDEVVAAAGIKIVAPGIDDVXAGSPLRVVTDPEKVREEILSEIEDIKIDTDEAGVVVKADTLGSLEAVVKILRDXYVPIKVADIGDVSRRDVVNAGIALQEDRVYGAIIAFNVKVIPSAAQELKNSDIKLFQGNVIYRLXEEYEEWVRGIEEEKKKKWXEAIIKPASIRLIPKLVFRQSKPAIGGVEVLTGVIRQGYPLXNDDGETVGTVESXQDKGENLKSASRGQKVAXAIKDAVYGKTIHEGDTLYVDIPENHYHILKEQLLTDEELDLXDKIAEIKRKKN", "", 1));
140  set[0].id(0);
141  set[1].id(1);
142  HMM hmm('P');
144  Matrix<float> dist_mat(2, 2);
145  all_hmm_pairs(set, lib, dist_mat);
146  Tree guide_tree;
147  vector<string> names(2, "");
148  guide_tree.nj(dist_mat, names);
149  lib.relax(std::multiplies<double>());
150  progressive_consistency_align(lib, guide_tree, set, SequenceSetGap_obj);
151 
152 
153 
154 
155  std::cout << set[0];
156  std::cout << set[1];
157  }
158 
159 };
160 
161