MDA
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups
Align_test.hpp
1 /*
2  * Align_test.hpp
3  *
4  * Created on: Oct 10, 2013
5  * Author: ckeme_01
6  */
7 
8 #ifndef ALIGN_TEST_HPP_
9 #define ALIGN_TEST_HPP_
10 
11 
12 #include "../lib/Sequence/SequenceSet.hpp"
13 #include "../lib/align/seq_align.hpp"
14 #include "../lib/utils/MatrixStack.hpp"
15 
16 
17 
18 // C header
19 #include <cstdlib>
20 
21 // CxxTest header
22 #include <cxxtest/TestSuite.h>
23 
24 
25 class Align_Test : public CxxTest::TestSuite
26 {
27 private:
29  read_similarity_matrix(const char *mat_f)
30  {
31  FILE *mat_F = fopen(mat_f, "r");
32  const int LINE_LENGTH=1001;
33  char line[LINE_LENGTH];
34  while (fgets(line, LINE_LENGTH, mat_F) != NULL)
35  {
36  if (line[0]!='#')
37  break;
38  }
39 
40 
41  MDAT::Matrix<int> *sim_mat_p =new MDAT::Matrix<int>(26,26,-999);
42  MDAT::Matrix<int> &sim_mat = *sim_mat_p;
43  int key[256];
44  char *tmp=strtok(line, " ");
45  int pos =0;
46  key[pos]=toupper(tmp[0])-65;
47  while ((tmp=strtok(NULL, " ")) != NULL)
48  {
49  key[++pos]=toupper(tmp[0])-65;
50  if ((key[pos]<0) || (key[pos]>25))
51  key[pos]=-1;
52  }
53 
54  int row_id;
55  int col_id;
56  while (fgets(line, LINE_LENGTH, mat_F) != NULL)
57  {
58  col_id=-1;
59  tmp=strtok(line, " \n");
60  if (tmp[0]=='*')
61  continue;
62  row_id=toupper(tmp[0])-65;
63  while ((tmp=strtok(NULL, " \n")) != NULL)
64  {
65  if (key[++col_id] != -1)
66  sim_mat[row_id][key[col_id]]=sim_mat[key[col_id]][row_id]=atoi(tmp);
67  }
68  }
69  fclose(mat_F);
70  return sim_mat_p;
71  }
72 
73 
74 public:
75 
76  void test_gotoh_match()
77  {
78  std::string matrix_path = getenv("HOME");
79  matrix_path.append("/.mda/BLOSUM62.txt");
80  MDAT::Matrix<int> *sim_mat = read_similarity_matrix(matrix_path.c_str());
82  set.add_seq(new MDAT::ProteinSequence("seq1", "PTPAPGLFPSPIPASYLFP"));
83  set.add_seq(new MDAT::ProteinSequence("seq2", "PTPAPGLFPSPIPASYLFP"));
84  MDAT::MatrixStack<3,std::pair<float, char> > matrices(set[0].size()+1, set[1].size()+1);
85  std::vector<size_t> ids1(1,0);
86  std::vector<size_t> ids2(1,1);
87  fillGotohMatrix(set, ids1, ids2, matrices, *sim_mat);
88  gotoh_align(set[0].size(), set[1].size(), matrices, -11, -1);
89  std::string edit_string1, edit_string2;
90  gotoh_traceback(set[0].size(), set[1].size(), matrices, edit_string1, edit_string2);
91  TS_ASSERT_EQUALS(edit_string1, "mmmmmmmmmmmmmmmmmmm");
92  TS_ASSERT_EQUALS(edit_string2, "mmmmmmmmmmmmmmmmmmm");
93  }
94 
95  void test_gotoh_gap_seq1()
96  {
97  std::string matrix_path = getenv("HOME");
98  matrix_path.append("/.mda/BLOSUM62.txt");
99  MDAT::Matrix<int> *sim_mat = read_similarity_matrix(matrix_path.c_str());
101  set.add_seq(new MDAT::ProteinSequence("seq1", "PTPAPGLFPSPIPAS"));
102  set.add_seq(new MDAT::ProteinSequence("seq2", "PTPAPGLFPSPIPASYLFP"));
103  MDAT::MatrixStack<3,std::pair<float, char> > matrices(set[0].size()+1, set[1].size()+1);
104  std::vector<size_t> ids1(1,0);
105  std::vector<size_t> ids2(1,1);
106  fillGotohMatrix(set, ids1, ids2, matrices, *sim_mat);
107  gotoh_align(set[0].size(), set[1].size(), matrices, -11, -1);
108  std::string edit_string1, edit_string2;
109  gotoh_traceback(set[0].size(), set[1].size(), matrices, edit_string1, edit_string2);
110  TS_ASSERT_EQUALS(edit_string1, "----mmmmmmmmmmmmmmm");
111  TS_ASSERT_EQUALS(edit_string2, "mmmmmmmmmmmmmmmmmmm");
112  }
113 
114  void test_gotoh_gap_seq2()
115  {
116  std::string matrix_path = getenv("HOME");
117  matrix_path.append("/.mda/BLOSUM62.txt");
118  MDAT::Matrix<int> *sim_mat = read_similarity_matrix(matrix_path.c_str());
120  set.add_seq(new MDAT::ProteinSequence("seq1", "PTPAPGLFPSPIPASYLFP"));
121  set.add_seq(new MDAT::ProteinSequence("seq2", "PTPAPGLFPSPIPAS"));
122  MDAT::MatrixStack<3,std::pair<float, char> > matrices(set[0].size()+1, set[1].size()+1);
123  std::vector<size_t> ids1(1,0);
124  std::vector<size_t> ids2(1,1);
125  fillGotohMatrix(set, ids1, ids2, matrices, *sim_mat);
126  gotoh_align(set[0].size(), set[1].size(), matrices, -11, -1);
127  std::string edit_string1, edit_string2;
128  gotoh_traceback(set[0].size(), set[1].size(), matrices, edit_string1, edit_string2);
129  TS_ASSERT_EQUALS(edit_string1, "mmmmmmmmmmmmmmmmmmm");
130  TS_ASSERT_EQUALS(edit_string2, "----mmmmmmmmmmmmmmm");
131  }
132 
133  void test_gotoh_gap_seq2_start()
134  {
135  std::string matrix_path = getenv("HOME");
136  matrix_path.append("/.mda/BLOSUM62.txt");
137  MDAT::Matrix<int> *sim_mat = read_similarity_matrix(matrix_path.c_str());
139  set.add_seq(new MDAT::ProteinSequence("seq1", "PTPAPGLFPSPIPASYLFP"));
140  set.add_seq(new MDAT::ProteinSequence("seq2", "PGLFPSPIPASYLFP"));
141  MDAT::MatrixStack<3,std::pair<float, char> > matrices(set[0].size()+1, set[1].size()+1);
142  std::vector<size_t> ids1(1,0);
143  std::vector<size_t> ids2(1,1);
144  fillGotohMatrix(set, ids1, ids2, matrices, *sim_mat);
145  gotoh_align(set[0].size(), set[1].size(), matrices, -11, -1);
146  std::string edit_string1, edit_string2;
147  gotoh_traceback(set[0].size(), set[1].size(), matrices, edit_string1, edit_string2);
148  TS_ASSERT_EQUALS(edit_string1, "mmmmmmmmmmmmmmmmmmm");
149  TS_ASSERT_EQUALS(edit_string2, "mmmmmmmmmmmmmmm----");
150  }
151  void test_gotoh_gap_seq1_start()
152  {
153  std::string matrix_path = getenv("HOME");
154  matrix_path.append("/.mda/BLOSUM62.txt");
155  MDAT::Matrix<int> *sim_mat = read_similarity_matrix(matrix_path.c_str());
157  set.add_seq(new MDAT::ProteinSequence("seq1", "PGLFPSPIPASYLFP"));
158  set.add_seq(new MDAT::ProteinSequence("seq2", "PTPAPGLFPSPIPASYLFP"));
159  MDAT::MatrixStack<3,std::pair<float, char> > matrices(set[0].size()+1, set[1].size()+1);
160  std::vector<size_t> ids1(1,0);
161  std::vector<size_t> ids2(1,1);
162  fillGotohMatrix(set, ids1, ids2, matrices, *sim_mat);
163  gotoh_align(set[0].size(), set[1].size(), matrices, -11, -1);
164  std::string edit_string1, edit_string2;
165  gotoh_traceback(set[0].size(), set[1].size(), matrices, edit_string1, edit_string2);
166  TS_ASSERT_EQUALS(edit_string1, "mmmmmmmmmmmmmmm----");
167  TS_ASSERT_EQUALS(edit_string2, "mmmmmmmmmmmmmmmmmmm");
168  }
169 
170  void test_gotoh_gap_middle()
171  {
172  std::string matrix_path = getenv("HOME");
173  matrix_path.append("/.mda/BLOSUM62.txt");
174  MDAT::Matrix<int> *sim_mat = read_similarity_matrix(matrix_path.c_str());
176  set.add_seq(new MDAT::ProteinSequence("seq1", "PGLFPSAIPASYLYLFP"));
177  set.add_seq(new MDAT::ProteinSequence("seq2", "PGLFPIPASYLEEYLFP"));
178  MDAT::MatrixStack<3,std::pair<float, char> > matrices(set[0].size()+1, set[1].size()+1);
179  std::vector<size_t> ids1(1,0);
180  std::vector<size_t> ids2(1,1);
181  fillGotohMatrix(set, ids1, ids2, matrices, *sim_mat);
182  gotoh_align(set[0].size(), set[1].size(), matrices, -11, -1);
183  std::string edit_string1, edit_string2;
184  gotoh_traceback(set[0].size(), set[1].size(), matrices, edit_string1, edit_string2);
185  TS_ASSERT_EQUALS(edit_string1, "mmmm--mmmmmmmmmmmmm");
186  TS_ASSERT_EQUALS(edit_string2, "mmmmmmmmmmmm--mmmmm");
187  set[0].insert_gaps(edit_string1);
188  set[1].insert_gaps(edit_string2);
189  }
190 
191 
192  void test_whatever()
193  {
194  std::string matrix_path = getenv("HOME");
195  matrix_path.append("/.mda/BLOSUM62.txt");
196  MDAT::Matrix<int> *sim_mat = read_similarity_matrix(matrix_path.c_str());
198  set.add_seq(new MDAT::ProteinSequence("seq1", "MRRNPSTFKYIPLRIDFMSEVPLPE"));
199  set.add_seq(new MDAT::ProteinSequence("seq2", "MSDLQDQEPSIIINGNLEPVGEPDIVEETEVVAQETQETQDADKPKKKVAFTGLEEDGETEEEKRKREFEEGGGLPEQPLNPDFSKLNPLSAEIINRQ"));
200  set.add_seq(new MDAT::ProteinSequence("seq3", "MNDALEIMKRQ"));
201  set.add_seq(new MDAT::ProteinSequence("seq4", "MADEHRQP"));
202  MDAT::MatrixStack<3,std::pair<float, char> > matrices(set[0].size()+1, set[1].size()+1);
203  std::vector<size_t> ids1(1,0);
204  std::vector<size_t> ids2(1,1);
205  fillGotohMatrix(set, ids1, ids2, matrices, *sim_mat);
206  gotoh_align(set[0].size(), set[1].size(), matrices, -11, -1);
207  std::string edit_string1, edit_string2;
208  gotoh_traceback(set[0].size(), set[1].size(), matrices, edit_string1, edit_string2);
209  set[0].insert_gaps(edit_string1);
210  set[1].insert_gaps(edit_string2);
211  std::cout<<set << std::endl;
212 
213  }
214 
215 
216 
217 
218 
219 
220 
221 };
222 
223 
224 #endif /* ALIGN_TEST_HPP_ */