38 #include <unordered_map>
42 #include "../utils/Matrix.hpp"
56 std::vector<float> _insProb;
60 short _num_ins_states;
86 return _num_ins_states;
103 const std::vector<float>&
137 template<
typename DataType>
146 template<
typename DataType>
157 template<
typename DataType>
168 template<
typename DataType>
178 template<
typename DataType>
182 size_t aln_len = aln.length();
186 ins_probs.resize(aln_len);
187 for (j=0; j<aln_len; ++j)
189 std::vector<int> non_gap_counter(aln_len);
191 size_t n_seqs = aln.n_seqs();
193 for (i=0; i<n_seqs; ++i)
195 for (j=0; j<aln_len; ++j)
200 ++non_gap_counter[j];
201 ins_probs[j] += _insProb[c];
206 for (j=0; j<aln_len; ++j)
207 ins_probs[j] /= non_gap_counter[j];
210 template<
typename DataType>
214 size_t n_pieces = aln_vec.size();
215 size_t total_len = 0;
217 for (i=0; i<n_pieces; ++i)
218 total_len+=aln_vec[i].length();
221 ins_probs.resize(total_len);
222 for (j=0; j<total_len; ++j)
224 std::vector<int> non_gap_counter(total_len);
225 typedef typename DataType::value_type SetType;
226 typedef typename SetType::value_type SeqType;
231 size_t overall_pos=0;
233 for (k=0; k<n_pieces; ++k)
235 const SetType &aln = aln_vec[k];
236 aln_length=aln.length();
238 for (i=0; i<n_seqs; ++i)
241 const SeqType &seq=aln[i];
242 for (j=0; j<aln_length; ++j)
247 ++non_gap_counter[pos];
248 ins_probs[pos] += _insProb[c];
253 overall_pos+=aln_length;
256 for (j=0; j<total_len; ++j)
257 ins_probs[j] /= non_gap_counter[j];
261 template<
typename DataType>
265 size_t aln_len1=aln1.length();
266 size_t aln_len2=aln2.length();
269 std::vector<std::unordered_map<short, int> > prof1(aln_len1);
271 std::vector<std::unordered_map<short, int> > prof2(aln_len2);
272 std::unordered_map<short, int>::iterator it;
274 size_t n_seq1=aln1.size();
275 size_t n_seq2=aln2.size();
277 std::vector<int> observed(26,0);
278 for (i=0; i<n_seq1; ++i)
280 const typename DataType::value_type &seq = aln1[i];
281 for (j=0; j<aln_len1; ++j)
286 if ((it =prof1[j].find(c)) != prof1[j].end())
294 for (i=0; i<n_seq2; ++i)
296 const typename DataType::value_type &seq = aln2[i];
297 for (j=0; j<aln_len2; ++j)
301 if ((it =prof2[j].find(c)) != prof2[j].end())
308 match_probs.
resize(aln_len1, aln_len2);
309 std::unordered_map<short, int>::iterator it1,it2,it1_end,it2_end;
311 for (i=0; i<aln_len1; ++i)
313 it1_end=prof1[i].end();
314 for (j=0; j<aln_len2; ++j)
316 it2_end=prof2[i].end();
318 for (it1=prof1[i].begin(); it1!=it1_end; ++it1)
320 for (it2=prof2[j].begin(); it2!=it2_end; ++it2)
322 match_probs[i][j] += _matchProb[it1->first][it2->first] * it1->second * it2->second;
323 tmp += it1->second * it2->second;
326 match_probs[i][j] /= tmp;
332 template<
typename DataType>
336 typedef typename DataType::value_type SetType;
337 typedef typename SetType::value_type SeqType;
338 size_t n_pieces1=end-start+1;
339 size_t n_pieces2=aln_vec2.size();
343 size_t n_seqs,complete_len1=0, complete_len2=0;
344 for (k=0; k<n_pieces1; ++k)
345 complete_len1 += aln_vec1[k].length();
346 for (k=0; k<n_pieces2; ++k)
347 complete_len2 += aln_vec2[k].length();
349 std::vector<std::unordered_map<short, int> > prof2(complete_len1);
350 std::vector<std::unordered_map<short, int> > prof1(complete_len2);
351 std::unordered_map<short, int>::iterator it1,it2,it1_end,it2_end;
352 size_t overall_pos=0;
355 std::vector<bool> is_domain1(complete_len1,
false);
356 std::vector<bool> is_domain2(complete_len2,
false);
357 std::vector<int> observed(26,0);
358 for (k=0; k<n_pieces1; ++k)
360 const SetType &aln=aln_vec1[k];
361 aln_len = aln.length();
365 for (l=overall_pos; l<overall_pos+aln_len; ++l)
368 for (i=0; i<n_seqs; ++i)
371 const SeqType &seq = aln[i];
372 for (j=0; j<aln_len; ++j)
377 if ((it1 =prof1[pos].find(c)) != prof1[pos].end())
384 overall_pos+=aln_len;
388 for (k=0; k<n_pieces2; ++k)
390 const SetType &aln=aln_vec2[k];
391 aln_len = aln.length();
395 for (l=overall_pos; l<overall_pos+aln_len; ++l)
398 for (i=0; i<n_seqs; ++i)
401 const SeqType &seq = aln[i];
402 for (j=0; j<aln_len; ++j)
407 if ((it1 =prof2[pos].find(c)) != prof2[pos].end())
414 overall_pos+=aln_len;
417 match_probs.
resize(complete_len1, complete_len2);
419 for (i=0; i<complete_len1; ++i)
421 it1_end=prof1[i].end();
422 for (j=0; j<complete_len2; ++j)
424 it2_end=prof2[i].end();
426 for (it1=prof1[i].begin(); it1!=it1_end; ++it1)
428 for (it2=prof2[j].begin(); it2!=it2_end; ++it2)
430 match_probs[i][j] += _matchProb[it1->first][it2->first] * it1->second * it2->second;
431 tmp += it1->second * it2->second;
434 match_probs[i][j] /= tmp;