14 #include "../utils/Matrix.hpp"
20 template<
typename DataType>
25 std::vector<DataType> _vec;
29 Vector(
size_t length,
size_t id_value=0):_vec(std::vector<DataType>(length)), _id(id_value)
35 DataType &operator[](
unsigned int index)
40 const DataType &operator[](
unsigned int index)
const
57 resize(
size_t new_size)
59 _vec.resize(new_size);
66 typename std::vector<DataType>::const_iterator it1, it2=other._vec.begin(), it1_end=_vec.end();
69 for (it1=_vec.begin(); it1!=it1_end; ++it1)
71 val+=std::min(*it1,*it2);
72 sum+=std::max(*it1, *it2);
75 return 100-(100.0*val/sum);
80 template<
typename SequenceType>
82 calc_km_vec(
const SequenceType &seq, std::vector<short> &coded_alphabet,
size_t start=0,
size_t end=0)
88 unsigned int value = 20 * coded_alphabet[(int)seq[start]] + coded_alphabet[(
int)seq[start+1]];
90 if (coded_alphabet[(
int)seq[start]]==20)
92 if (coded_alphabet[(
int)seq[++start]]==20)
98 for (
size_t i = start; i<end+1; ++i)
100 c = coded_alphabet[(int)seq[i-2]];
106 value += coded_alphabet[(int)seq[i]];
107 if (coded_alphabet[(
int)seq[i]]==20)
118 template<
typename SequenceSetType>
120 kmer_dist_mat(
const SequenceSetType &
set)
122 int n_seqs=
set.n_seqs();
127 std::vector<short> coded_alphabet(256);
130 coded_alphabet[i]=20;
131 char aa_alphabet[20]={
'A',
'V',
'L',
'I',
'P',
'M',
'F',
'W',
'G',
'S',
'T',
'C',
'N',
'Q',
'Y',
'D',
'E',
'K',
'R',
'H'};
135 coded_alphabet[toupper(aa_alphabet[i])-0]=++val;
136 coded_alphabet[tolower(aa_alphabet[i])-0]=val;
140 std::vector<Vector<int>* >vec_set(
set.size());
142 for (i=0; i<n_seqs; ++i)
143 vec_set[i]=calc_km_vec(
set[i], coded_alphabet, 0,
set[i].length());
146 Matrix<float> *dist_mat_p=
new Matrix<float>(n_seqs, n_seqs, 0);
147 Matrix<float> &dist_mat=*dist_mat_p;
148 for (i=0; i<n_seqs; ++i)
151 for (j=i+1; j<n_seqs; ++j)
152 dist_mat[i][j]=dist_mat[j][i]=vec_set[i]->dist(*(vec_set[j]));
154 for (i=0; i<n_seqs; ++i)