MDA
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Groups
fw_bw.hpp
Go to the documentation of this file.
1 /*
2  * fw_bw.h
3  *
4  * Created on: Apr 12, 2012
5  * Author: Carsten Kemena
6  *
7  * This file is part of MDAT.
8  *
9  * MDAT is free software: you can redistribute it and/or modify
10  * it under the terms of the GNU Lesser General Public License as published by
11  * the Free Software Foundation, either version 3 of the License, or
12  * (at your option) any later version.
13  *
14  * MDAT is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17  * GNU Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public License
20  * along with MDAT. If not, see <http://www.gnu.org/licenses/>.
21  *
22  */
23 
28 #ifndef FW_BW_HPP_
29 #define FW_BW_HPP_
30 
31 
32 
33 // C header
34 #include <cstdlib>
35 #include <cfloat>
36 #include <cmath>
37 
38 // C++ header
39 #include <algorithm>
40 #include <utility>
41 #include <stack>
42 #include <vector>
43 
44 
45 // MDAT header
46 #include "HMM.hpp"
47 #include "Library.hpp"
48 #include "../utils/fast_math.hpp"
49 #include "../utils/Matrix.hpp"
50 
51 
52 
53 namespace MDAT {
54 
55 
65 template<typename DataType>
66 float
67 hmm_forward(const DataType &seq1, const DataType &seq2, const HMM &hmm, Matrix<float> &dp_mat, float **insert_matrices)
68 {
69  unsigned short n_states=5;
70  size_t l_seq1 = seq1.size()+1;
71  size_t l_seq2 = seq2.size()+1;
72  size_t i,j;
73  float single_c1, single_c2;
74  short k;
75  const Matrix<float> &matchProbs = hmm.match_probs();
76  const std::vector<float> &insProbs = hmm.ins_probs();
77  const Matrix<float> &transProbs = hmm.trans_probs();
78  const float *initDistr = hmm.init_distribution();
79 
80  // 0 matches, 1 insert1, 2 insert2, 3 del1 4 del2
81  dp_mat[0][0] =initDistr[0];
82  for (k=1; k<n_states; ++k)
83  insert_matrices[2*k-2][0]=initDistr[k];
84 
85  // initialize y direction
86  short c1,c2;
87  float tmp1, tmp2;
88  for (i=1; i<l_seq2; ++i)
89  {
90  c2= static_cast<short>(seq2[i-1]);
91  single_c2 = insProbs[c2];
92  tmp1 = insert_matrices[2][i] = LOG_ADD(insert_matrices[2][i-1]+transProbs[2][2], dp_mat[0][i-1]+transProbs[0][2]) + single_c2;
93  tmp2 = insert_matrices[6][i] = LOG_ADD(insert_matrices[6][i-1]+transProbs[4][4], dp_mat[0][i-1]+transProbs[0][4]) + single_c2;
94  dp_mat[0][i] = LOG_ADD(tmp1, tmp2);
95  }
96 
97  // transition matrix: 0 match 1 insert1 2 deletion1 3 insert2 4 del2
98  short l,m;
99  float tmp;
100 
101  //State ID: match, insert1, del1, insert2, del2
102  for (i=1; i<l_seq1; ++i)
103  {
104  c1=static_cast<short>(seq1[i-1]);
105  single_c1 = insProbs[c1];
106 
107  // insert matrices
108  tmp1 = insert_matrices[1][0] = LOG_ADD(insert_matrices[0][0]+transProbs[1][1], dp_mat[i-1][0]+transProbs[0][1]) + single_c1;
109  tmp2 = insert_matrices[5][0] = LOG_ADD(insert_matrices[4][0]+transProbs[3][3], dp_mat[i-1][0]+transProbs[0][3]) + single_c1;
110  dp_mat[i][0] = LOG_ADD(tmp1, tmp2);
111  for (j=1; j<l_seq2; ++j)
112  {
113  c2=static_cast<short>(seq2[j-1]);
114  single_c2 = insProbs[c2];
115 
116  // insertion / deletion values
117  for (k=1; k<n_states; ++k)
118  {
119  l = (k&1)?1:0;
120  m = (k&1)?0:1;
121  tmp = (l) ? single_c1 : single_c2;
122  if (((i==1) && (l==1)) || ((j==1) && (m==1)))
123  insert_matrices[2*k-1][j] = dp_mat[i-l][j-m] + transProbs[0][k] + tmp;
124  else
125  insert_matrices[2*k-1][j] = LOG_ADD(dp_mat[i-l][j-m] + transProbs[0][k], insert_matrices[2*k-1-l][j-m] + transProbs[k][k]) + tmp;
126  }
127 
128  // match value
129  tmp = dp_mat[i-1][j-1] + transProbs[0][0];
130  if ((i>1) && (j>1))
131  {
132  for (k=1; k<n_states; ++k)
133  LOG_PLUS_EQUALS(tmp, insert_matrices[2*k-2][j-1] + transProbs[k][0]);
134  }
135  else if (i>1)
136  {
137  LOG_PLUS_EQUALS(tmp, insert_matrices[0][j-1] + transProbs[1][0]);
138  LOG_PLUS_EQUALS(tmp, insert_matrices[4][j-1] + transProbs[3][0]);
139  }
140  else
141  {
142  LOG_PLUS_EQUALS(tmp, insert_matrices[2][j-1] + transProbs[2][0]);
143  LOG_PLUS_EQUALS(tmp, insert_matrices[6][j-1] + transProbs[4][0]);
144  }
145  dp_mat[i][j] = tmp + matchProbs[c1][c2];
146  }
147  std::swap(insert_matrices[0], insert_matrices[1]);
148  std::swap(insert_matrices[2], insert_matrices[3]);
149  std::swap(insert_matrices[4], insert_matrices[5]);
150  std::swap(insert_matrices[6], insert_matrices[7]);
151  }
152  float total=dp_mat[l_seq1-1][l_seq2-1];
153  LOG_PLUS_EQUALS(total, insert_matrices[0][l_seq2-1]);
154  LOG_PLUS_EQUALS(total, insert_matrices[2][l_seq2-1]);
155  LOG_PLUS_EQUALS(total, insert_matrices[4][l_seq2-1]);
156  LOG_PLUS_EQUALS(total, insert_matrices[6][l_seq2-1]);
157  return total;
158 }
159 
160 float
161 hmm_forward(const HMM &hmm, std::vector<float> &ins_probs1, std::vector<float> &ins_probs2, Matrix<float> &match_probs, Matrix<float> &dp_mat, float **insert_matrices)
162 {
163  unsigned short n_states=5;
164  size_t l_seq1 = ins_probs1.size()+1;
165  size_t l_seq2 = ins_probs2.size()+1;
166  size_t i,j;
167  float single_c1, single_c2;
168  short k;
169  const Matrix<float> &transProbs = hmm.trans_probs();
170  const float *initDistr = hmm.init_distribution();
171 
172  // 0 matches, 1 insert1, 2 insert2, 3 del1 4 del2
173  dp_mat[0][0] =initDistr[0];
174  for (k=1; k<n_states; ++k)
175  insert_matrices[2*k-2][0]=initDistr[k];
176 
177  // initialize y direction
178  float tmp1, tmp2;
179  for (i=1; i<l_seq2; ++i)
180  {
181  //c2= static_cast<short>(seq2[i-1]);
182  single_c2 = ins_probs2[i-1];
183  tmp1 = insert_matrices[2][i] = LOG_ADD(insert_matrices[2][i-1]+transProbs[2][2], dp_mat[0][i-1]+transProbs[0][2]) + single_c2;
184  tmp2 = insert_matrices[6][i] = LOG_ADD(insert_matrices[6][i-1]+transProbs[4][4], dp_mat[0][i-1]+transProbs[0][4]) + single_c2;
185  dp_mat[0][i] = LOG_ADD(tmp1, tmp2);
186  }
187 
188  // transition matrix: 0 match 1 insert1 2 deletion1 3 insert2 4 del2
189  short l,m;
190  float tmp;
191 
192  //State ID: match, insert1, del1, insert2, del2
193  for (i=1; i<l_seq1; ++i)
194  {
195  single_c1 = ins_probs1[i-1];
196 
197  // insert matrices
198  tmp1 = insert_matrices[1][0] = LOG_ADD(insert_matrices[0][0]+transProbs[1][1], dp_mat[i-1][0]+transProbs[0][1]) + single_c1;
199  tmp2 = insert_matrices[5][0] = LOG_ADD(insert_matrices[4][0]+transProbs[3][3], dp_mat[i-1][0]+transProbs[0][3]) + single_c1;
200  dp_mat[i][0] = LOG_ADD(tmp1, tmp2);
201  for (j=1; j<l_seq2; ++j)
202  {
203  single_c2 = ins_probs2[j-1];
204 
205  // insertion / deletion values
206  for (k=1; k<n_states; ++k)
207  {
208  l = (k&1)?1:0;
209  m = (k&1)?0:1;
210  tmp = (l) ? single_c1 : single_c2;
211  if (((i==1) && (l==1)) || ((j==1) && (m==1)))
212  insert_matrices[2*k-1][j] = dp_mat[i-l][j-m] + transProbs[0][k] + tmp;
213  else
214  insert_matrices[2*k-1][j] = LOG_ADD(dp_mat[i-l][j-m] + transProbs[0][k], insert_matrices[2*k-1-l][j-m] + transProbs[k][k]) + tmp;
215  }
216 
217  // match value
218  tmp = dp_mat[i-1][j-1] + transProbs[0][0];
219  if ((i>1) && (j>1))
220  {
221  for (k=1; k<n_states; ++k)
222  LOG_PLUS_EQUALS(tmp, insert_matrices[2*k-2][j-1] + transProbs[k][0]);
223  }
224  else if (i>1)
225  {
226  LOG_PLUS_EQUALS(tmp, insert_matrices[0][j-1] + transProbs[1][0]);
227  LOG_PLUS_EQUALS(tmp, insert_matrices[4][j-1] + transProbs[3][0]);
228  }
229  else
230  {
231  LOG_PLUS_EQUALS(tmp, insert_matrices[2][j-1] + transProbs[2][0]);
232  LOG_PLUS_EQUALS(tmp, insert_matrices[6][j-1] + transProbs[4][0]);
233  }
234  dp_mat[i][j] = tmp + match_probs[i-1][j-1];
235  }
236  std::swap(insert_matrices[0], insert_matrices[1]);
237  std::swap(insert_matrices[2], insert_matrices[3]);
238  std::swap(insert_matrices[4], insert_matrices[5]);
239  std::swap(insert_matrices[6], insert_matrices[7]);
240  }
241  float total=dp_mat[l_seq1-1][l_seq2-1];
242  LOG_PLUS_EQUALS(total, insert_matrices[0][l_seq2-1]);
243  LOG_PLUS_EQUALS(total, insert_matrices[2][l_seq2-1]);
244  LOG_PLUS_EQUALS(total, insert_matrices[4][l_seq2-1]);
245  LOG_PLUS_EQUALS(total, insert_matrices[6][l_seq2-1]);
246  return total;
247 }
248 
249 template<typename DataType>
250 float
251 hmm_backward(const DataType &seq1, const DataType &seq2, const HMM &hmm, Matrix<float> &dp_mat, float **insert_matrices)
252 {
253  unsigned short n_states=5;
254  int l_seq1 = seq1.size();
255  int l_seq2 = seq2.size();
256  int i,j;
257  float single_c1, single_c2;
258  short k;
259  const Matrix<float> &matchProbs = hmm.match_probs();
260  const std::vector<float> &insProbs = hmm.ins_probs();
261  const Matrix<float> &transProbs = hmm.trans_probs();
262  const float *initDistr = hmm.init_distribution();
263 
264  // 0 matches, 1 insert1, 2 insert2, 3 del1, 4 del2
265  dp_mat[l_seq1][l_seq2] =initDistr[0];
266  for (k=1; k<n_states; ++k)
267  insert_matrices[2*k-2][l_seq2]=initDistr[k];
268 
269  // initialize
270  short c1,c2;
271  float tmp1, tmp2;
272  for (i=l_seq2-1; i>=0; --i)
273  {
274  c2= static_cast<short>(seq2[i]);
275  single_c2 = insProbs[c2];
276  tmp1 = insert_matrices[2][i] = LOG_ADD(insert_matrices[2][i+1]+transProbs[2][2], dp_mat[l_seq1][i+1]+transProbs[0][2]) + single_c2;
277  tmp2 = insert_matrices[6][i] = LOG_ADD(insert_matrices[6][i+1]+transProbs[4][4], dp_mat[l_seq1][i+1]+transProbs[0][4]) + single_c2;
278  dp_mat[l_seq1][i] = LOG_ADD(tmp1, tmp2);
279  }
280 
281  // transition matrix: 0 match 1 insert1 2 deletion1 3 insert2 4 del2
282  short l,m;
283  float tmp;
284 
285  //State ID: match, insert1, del1, insert2, del2
286  for (i=l_seq1-1; i>=0; --i)
287  {
288  c1=static_cast<short>(seq1[i]);
289  single_c1 = insProbs[c1];
290 
291  // insert matrices
292  tmp1 = insert_matrices[1][l_seq2] = LOG_ADD(insert_matrices[0][l_seq2]+transProbs[1][1], dp_mat[i+1][l_seq2]+transProbs[0][1]) + single_c1;
293  tmp2 = insert_matrices[5][l_seq2] = LOG_ADD(insert_matrices[4][l_seq2]+transProbs[3][3], dp_mat[i+1][l_seq2]+transProbs[0][3]) + single_c1;
294  dp_mat[i][l_seq2] = LOG_ADD(tmp1, tmp2);
295  for (j=l_seq2-1; j>=0; --j)
296  {
297  c2=static_cast<short>(seq2[j]);
298  single_c2 = insProbs[c2];
299 
300  // insertion / deletion values
301  for (k=1; k<n_states; ++k)
302  {
303  l = (k&1)?1:0;
304  m = (k&1)?0:1;
305  tmp = (l) ? single_c1 : single_c2;
306  if (((i==(l_seq1-1)) && (l==1)) || ((j==(l_seq2-1)) && (m==1)))
307  insert_matrices[2*k-1][j] = dp_mat[i+l][j+m] + transProbs[0][k] + tmp;
308  else
309  insert_matrices[2*k-1][j] = LOG_ADD(dp_mat[i+l][j+m] + transProbs[0][k], insert_matrices[2*k-1-l][j+m] + transProbs[k][k]) + tmp;
310  }
311 
312  // match value
313  tmp = dp_mat[i+1][j+1] + transProbs[0][0];
314  if ((i<l_seq1-1) && (j<l_seq2-1))
315  {
316  for (k=1; k<n_states; ++k)
317  LOG_PLUS_EQUALS(tmp, insert_matrices[2*k-2][j+1] + transProbs[k][0]);
318  }
319  else if (i<l_seq1-1)
320  {
321  LOG_PLUS_EQUALS(tmp, insert_matrices[0][j+1] + transProbs[1][0]);
322  LOG_PLUS_EQUALS(tmp, insert_matrices[4][j+1] + transProbs[3][0]);
323  }
324  else
325  {
326  LOG_PLUS_EQUALS(tmp, insert_matrices[2][j+1] + transProbs[2][0]);
327  LOG_PLUS_EQUALS(tmp, insert_matrices[6][j+1] + transProbs[4][0]);
328  }
329  dp_mat[i][j] = tmp + matchProbs[c1][c2];
330  }
331  std::swap(insert_matrices[0], insert_matrices[1]);
332  std::swap(insert_matrices[2], insert_matrices[3]);
333  std::swap(insert_matrices[4], insert_matrices[5]);
334  std::swap(insert_matrices[6], insert_matrices[7]);
335  }
336 
337 
338  float total=dp_mat[0][0];
339  LOG_PLUS_EQUALS(total, insert_matrices[0][0]);
340  LOG_PLUS_EQUALS(total, insert_matrices[2][0]);
341  LOG_PLUS_EQUALS(total, insert_matrices[4][0]);
342  LOG_PLUS_EQUALS(total, insert_matrices[6][0]);
343  return total;
344 }
345 
346 
357 float
358 hmm_backward(const HMM &hmm, std::vector<float> &ins_probs1, std::vector<float> &ins_probs2, Matrix<float> &match_probs, Matrix<float> &dp_mat, float **insert_matrices)
359 {
360  unsigned short n_states=5;
361  int l_seq1 = ins_probs1.size();
362  int l_seq2 = ins_probs2.size();
363  int i,j;
364  float single_c1, single_c2;
365  short k;
366  const Matrix<float> &transProbs = hmm.trans_probs();
367  const float *initDistr = hmm.init_distribution();
368 
369  // 0 matches, 1 insert1, 2 insert2, 3 del1, 4 del2
370  dp_mat[l_seq1][l_seq2] =initDistr[0];
371  for (k=1; k<n_states; ++k)
372  insert_matrices[2*k-2][l_seq2]=initDistr[k];
373 
374  // initialize
375  float tmp1, tmp2;
376  for (i=l_seq2-1; i>=0; --i)
377  {
378  single_c2 = ins_probs2[i];
379  tmp1 = insert_matrices[2][i] = LOG_ADD(insert_matrices[2][i+1]+transProbs[2][2], dp_mat[l_seq1][i+1]+transProbs[0][2]) + single_c2;
380  tmp2 = insert_matrices[6][i] = LOG_ADD(insert_matrices[6][i+1]+transProbs[4][4], dp_mat[l_seq1][i+1]+transProbs[0][4]) + single_c2;
381  dp_mat[l_seq1][i] = LOG_ADD(tmp1, tmp2);
382  }
383 
384  // transition matrix: 0 match 1 insert1 2 deletion1 3 insert2 4 del2
385  short l,m;
386  float tmp;
387 
388  //State ID: match, insert1, del1, insert2, del2
389  for (i=l_seq1-1; i>=0; --i)
390  {
391  single_c1 = ins_probs1[i];
392 
393  // insert matrices
394  tmp1 = insert_matrices[1][l_seq2] = LOG_ADD(insert_matrices[0][l_seq2]+transProbs[1][1], dp_mat[i+1][l_seq2]+transProbs[0][1]) + single_c1;
395  tmp2 = insert_matrices[5][l_seq2] = LOG_ADD(insert_matrices[4][l_seq2]+transProbs[3][3], dp_mat[i+1][l_seq2]+transProbs[0][3]) + single_c1;
396  dp_mat[i][l_seq2] = LOG_ADD(tmp1, tmp2);
397  for (j=l_seq2-1; j>=0; --j)
398  {
399  single_c2 = ins_probs2[j];
400 
401  // insertion / deletion values
402  for (k=1; k<n_states; ++k)
403  {
404  l = (k&1)?1:0;
405  m = (k&1)?0:1;
406  tmp = (l) ? single_c1 : single_c2;
407  if (((i==(l_seq1-1)) && (l==1)) || ((j==(l_seq2-1)) && (m==1)))
408  insert_matrices[2*k-1][j] = dp_mat[i+l][j+m] + transProbs[0][k] + tmp;
409  else
410  insert_matrices[2*k-1][j] = LOG_ADD(dp_mat[i+l][j+m] + transProbs[0][k], insert_matrices[2*k-1-l][j+m] + transProbs[k][k]) + tmp;
411  }
412 
413  // match value
414  tmp = dp_mat[i+1][j+1] + transProbs[0][0];
415  if ((i<l_seq1-1) && (j<l_seq2-1))
416  {
417  for (k=1; k<n_states; ++k)
418  LOG_PLUS_EQUALS(tmp, insert_matrices[2*k-2][j+1] + transProbs[k][0]);
419  }
420  else if (i<l_seq1-1)
421  {
422  LOG_PLUS_EQUALS(tmp, insert_matrices[0][j+1] + transProbs[1][0]);
423  LOG_PLUS_EQUALS(tmp, insert_matrices[4][j+1] + transProbs[3][0]);
424  }
425  else
426  {
427  LOG_PLUS_EQUALS(tmp, insert_matrices[2][j+1] + transProbs[2][0]);
428  LOG_PLUS_EQUALS(tmp, insert_matrices[6][j+1] + transProbs[4][0]);
429  }
430  dp_mat[i][j] = tmp + match_probs[i][j];
431  }
432  std::swap(insert_matrices[0], insert_matrices[1]);
433  std::swap(insert_matrices[2], insert_matrices[3]);
434  std::swap(insert_matrices[4], insert_matrices[5]);
435  std::swap(insert_matrices[6], insert_matrices[7]);
436  }
437  float total=dp_mat[0][0];
438  LOG_PLUS_EQUALS(total, insert_matrices[0][0]);
439  LOG_PLUS_EQUALS(total, insert_matrices[2][0]);
440  LOG_PLUS_EQUALS(total, insert_matrices[4][0]);
441  LOG_PLUS_EQUALS(total, insert_matrices[6][0]);
442  return total;
443 }
444 
445 
446 
447 struct hmm_match
448 {
449  float match;
450  size_t x;
451  size_t y;
452 
453  hmm_match(float m, size_t x_, size_t y_):match(m), x(x_), y(y_)
454  {}
455 
456  friend bool operator <(const hmm_match& a, const hmm_match& b)
457  {
458  return(a.match > b.match);
459  }
460 };
461 
462 
463 template<typename DataType, typename LibraryDataType>
464 void
465 hmm2lib(const DataType &seq1, const DataType &seq2, const Matrix<float> &forward_mat, const Matrix<float> &backward_mat, Library<LibraryDataType> &lib, float total_probability)
466 {
467  size_t seq_id1 = seq1.id();
468  size_t seq_id2 = seq2.id();
469  size_t l_seq1 = seq1.length();
470  size_t l_seq2 = seq2.length();
471  size_t i,j;
472  std::vector<hmm_match> matches;
473  matches.reserve(l_seq1*l_seq2);
474  float tmp;
475 
476  for (i=0; i<l_seq1; ++i)
477  {
478  for (j=0; j<l_seq2; ++j)
479  {
480  if ((tmp=EXP(std::min(LOG_ONE,(forward_mat[i][j] + backward_mat[i][j] - total_probability)))) >= 0.01)
481  matches.push_back(hmm_match(tmp, i,j));
482  }
483  }
484 
485  // sorting
486  std::sort(matches.begin(), matches.end());
487  size_t max2 = std::min(4*std::min(l_seq1,l_seq2), matches.size());
488 // size_t max2=matches.size();
489  if (max2>0)
490  {
491  float min_score=matches[max2-1].match;
492  i=0;
493  while ((matches.size()!=i)&&(matches[i].match>=min_score))
494  {
495  lib.add(seq_id1, seq_id2, matches[i].x, matches[i].y, matches[i].match);
496  ++i;
497  }
498  }
499 }
500 
501 template<typename LibraryType>
502 void
503 hmm2lib(const Sequence &seq1, int id1, const Sequence &seq2, int id2, const Matrix<float> &forward_mat, const Matrix<float> &backward_mat, Library<LibraryType> &lib, float total_probability)
504 {
505  size_t l_seq1 = seq1.size();
506  size_t l_seq2 = seq2.size();
507  size_t i,j;
508  std::vector<hmm_match> matches;
509  matches.reserve(l_seq1*l_seq2);
510  float tmp;
511 
512  for (i=0; i<l_seq1; ++i)
513  {
514  for (j=0; j<l_seq2; ++j)
515  {
516  if ((tmp=EXP(std::min(LOG_ONE,(forward_mat[i][j] + backward_mat[i][j] - total_probability)))) >= 0.01)
517  matches.push_back(hmm_match(tmp, i,j));
518  }
519  }
520 
521  // sorting
522  //size_t max2=matches.size();
523  size_t max2 = std::min(std::min(l_seq1,l_seq2), matches.size());
524  if (max2>0)
525  {
526  std::sort(matches.begin(), matches.end()); //TODO needed?
527  float min_score=matches[max2-1].match;
528  i=0;
529  while ((matches.size()!=i)&&(matches[i].match>=min_score))
530  {
531  lib.add(id1, id2, matches[i].x, matches[i].y, matches[i].match);
532  ++i;
533  }
534  }
535 }
536 
537 /*
538 template<typename DataType, typename LibraryType>
539 void
540 hmm2lib(const DataType &aln1, const DataType &aln2, const Matrix<float> &forward_mat, const Matrix<float> &backward_mat, unsigned short n_states, Library<LibraryType> &lib, float total_probability)
541 {
542  size_t seq_id1 = aln1.id();
543  size_t seq_id2 = aln2.id();
544  size_t l_seq1 = aln1.size();
545  size_t l_seq2 = aln2.size();
546  size_t i,j;
547  std::vector<hmm_match> matches;
548  matches.reserve(l_seq1*l_seq2);
549  float tmp;
550 
551  for (i=0; i<l_seq1; ++i)
552  {
553  for (j=0; j<l_seq2; ++j)
554  {
555  if ((tmp=EXP(std::min(LOG_ONE,(forward_mat[i][j] + backward_mat[i][j] - total_probability)))) >= 0.01)
556  matches.push_back(hmm_match(tmp, i,j));
557  }
558  }
559 
560  // sorting
561  std::sort(matches.begin(), matches.end());
562  size_t max2 = std::min(4*std::min(l_seq1,l_seq2), matches.size());
563  float min_score=matches[max2-1].match;
564  i=0;
565  while ((matches.size()!=i)&&(matches[i].match>=min_score))
566  {
567  lib.add(seq_id1, seq_id2, matches[i].x, matches[i].y, matches[i].match);
568  ++i;
569  }
570 }*/
571 
572 
581 template<typename DataType>
582 void
583 all_hmm_pairs(const std::vector<DataType> &set, Library<std::vector<DataType> > &lib, Matrix<float> &dist_mat, size_t start, size_t end)
584 {
585  size_t n_seqs=end-start+1;
586  size_t max_len = 0;
587  size_t i,j;
588  for (i=0; i<n_seqs; ++i)
589  {
590  if (max_len<set[i].length())
591  max_len=set[i].length();
592  }
593  ++max_len;
594  HMM hmm('P');
595 
596  for (i=0; i< n_seqs; ++i)
597  dist_mat[i][i]=0;
598 
599  std::vector<std::vector<float> > ins_probs(n_seqs);
600  for (i=start; i<=end; ++i)
601  hmm.calculate_insertion_probs(set[i], ins_probs[i-start]);
602 
603  {
604  float bw_p, fw_p=0;
605  Matrix<float> forward_mat = Matrix<float>(max_len, max_len);
606  Matrix<float> backward_mat = Matrix<float>(max_len, max_len);
607  float **insert_matrices = new float*[8];
608  for (i=0; i<8; ++i)
609  insert_matrices[i] = new float[max_len];
610 
611  Matrix<float> match_probs(1,1);
612 
613  for (i=start; i<= end; ++i)
614  {
615  const DataType &aln1 = set[i];
616  for (j=i+1; j<=end; ++j)
617  {
618  const DataType &aln2 = set[j];
619  hmm.calculate_match_probs(aln1, aln2, match_probs);
620  fw_p = hmm_forward(hmm, ins_probs[i-start], ins_probs[j-start], match_probs, forward_mat, insert_matrices);
621  bw_p = hmm_backward(hmm, ins_probs[i-start], ins_probs[j-start], match_probs, backward_mat, insert_matrices);
622  hmm2lib(aln1, aln2, forward_mat, backward_mat, lib, (bw_p+fw_p)/2);
623  dist_mat[i-start][j-start] = dist_mat[j-start][i-start] = 1-EXP(((fw_p+bw_p)/2));
624  }
625  }
626  } //omp
627 }
628 
629 template<typename DataType>
630 void
631 all_hmm_pairs(const std::vector<DataType> &set, Library<std::vector<DataType> > &lib, Matrix<float> &dist_mat)
632 {
633  all_hmm_pairs(set, lib, dist_mat, 0, set.size()-1);
634 }
635 
636 template<typename DataType>
637 void
638 all_hmm_pairs(const DataType &set, Library<DataType> &lib, Matrix<float> &dist_mat)
639 {
640  size_t max_len = 0;//set.max_size()+1;
641  size_t i,j;
642  size_t n_seqs = set.n_seqs();
643  for (i=0; i<n_seqs; ++i)
644  {
645  if (set[i].size() > max_len)
646  max_len=set[i].size();
647  }
648  max_len+=1;
649  //HMM hmm(set.seq_type());
650  HMM hmm('P');
651 
652  for (i=0; i< n_seqs; ++i)
653  dist_mat[i][i]=0;
654 
655  float bw_p, fw_p=0;
656  Matrix<float> forward_mat = Matrix<float>(max_len, max_len);
657  Matrix<float> backward_mat = Matrix<float>(max_len, max_len);
658  float **insert_matrices = new float*[8];
659  for (i=0; i<8; ++i)
660  insert_matrices[i] = new float[max_len];
661 
662  for (i=0; i< n_seqs; ++i)
663  {
664  for (j=i+1; j<n_seqs; ++j)
665  {
666  const typename DataType::value_type &seq1 = set[i];
667  const typename DataType::value_type &seq2 = set[j];
668  fw_p = hmm_forward(seq1, seq2, hmm, forward_mat, insert_matrices);
669  bw_p = hmm_backward(seq1, seq2, hmm, backward_mat, insert_matrices);
670  hmm2lib(seq1, i, seq2, j, forward_mat, backward_mat, lib, (bw_p+fw_p)/2);
671  dist_mat[i][j] = dist_mat[j][i] = 1-EXP(((fw_p+bw_p)/2));
672  }
673  }
674 }
675 
676 
677 
678 
679 template<typename DataType, typename LibraryType>
680 void
681 all_hmm_pairs(const DataType &set, Library<LibraryType> &lib, Matrix<float> &dist_mat, size_t start, size_t end)
682 {
683  size_t n_seqs=end-start+1;
684  size_t max_len = 0;
685  size_t i,j;
686  for (i=0; i<n_seqs; ++i)
687  {
688  if (max_len<set[i].size())
689  max_len=set[i].size();
690  }
691  ++max_len;
692  HMM hmm(set.seq_type());
693 
694  for (i=0; i< n_seqs; ++i)
695  dist_mat[i][i]=0;
696 
697  float bw_p, fw_p=0;
698  Matrix<float> forward_mat = Matrix<float>(max_len, max_len);
699  Matrix<float> backward_mat = Matrix<float>(max_len, max_len);
700  float **insert_matrices = new float*[8];
701  for (i=0; i<8; ++i)
702  insert_matrices[i] = new float[max_len];
703 
704  for (i=start; i< end; ++i)
705  {
706  for (j=i+1; j<end; ++j)
707  {
708  const Sequence &seq1 = set[i];
709  const Sequence &seq2 = set[j];
710  fw_p = hmm_forward(seq1, seq2, hmm, forward_mat, insert_matrices);
711  bw_p = hmm_backward(seq1, seq2, hmm, backward_mat, insert_matrices);
712  hmm2lib(seq1, seq2, forward_mat, backward_mat, 5, lib, (bw_p+fw_p)/2);
713  dist_mat[i-start][j-start] = dist_mat[j-start][i-start] = 1-EXP(((fw_p+bw_p)/2));
714  }
715  }
716 }
717 
718 
719 
720 
721 } // namespace MDAT
722 
723 
724 
725 
726 #endif /* FW_BW_H_ */