24 #ifndef SequenceSetBase_IO_MEMSAFE_HPP_
25 #define SequenceSetBase_IO_MEMSAFE_HPP_
41 #include "../Basics/basics.hpp"
42 #include "../Basics/utils.hpp"
43 #include "../utils/filesystem.h"
47 #include <boost/algorithm/string/split.hpp>
48 #include <boost/algorithm/string.hpp>
49 #include <boost/lexical_cast.hpp>
53 using boost::bad_lexical_cast;
67 template<
typename SequenceType>
71 typedef std::shared_ptr<SequenceType> Seq_ptr;
76 mutable std::vector<std::streampos> _id_index;
77 mutable std::map<std::string, size_t> _name_index;
78 mutable bool _complete;
79 mutable bool _index_changed;
80 mutable size_t _current_id;
81 mutable std::string _current_name;
82 mutable std::ifstream _seq_F;
83 mutable size_t _n_seqs;
89 _read_single_seq(
const std::streampos &pos)
const
93 getline(_seq_F, line);
101 StrTok tokenizer(&line[1]);
102 _seq->name(tokenizer.
next(
" \n"));
103 char *comment = tokenizer.
next(
"\n");
104 _seq->comment((comment != NULL)?comment:
"" );
107 while (std::getline(_seq_F, line))
121 _access_value(
size_t index_val)
const
124 if (index_val!=_current_id)
126 if (index_val >= _n_seqs)
131 _seq_F.seekg(_id_index[_n_seqs-1]);
132 std::getline(_seq_F, line);
133 std::streampos pos=0;
134 while (std::getline(_seq_F, line))
145 std::string seq_name = line.substr(1,i-1);
146 _id_index.push_back(pos);
147 _name_index[seq_name]=_n_seqs;
149 if ((_n_seqs) == index_val+1)
156 _current_id=index_val;
157 _current_name=_seq->name();
158 _read_single_seq(_id_index[index_val]);
164 _access_value(
const std::string seq_id)
const
166 std::map<std::string, size_t>::iterator it_end= _name_index.end();
167 if (_name_index.find(seq_id)==it_end)
172 _seq_F.seekg(_id_index[_n_seqs-1]);
173 std::getline(_seq_F, line);
174 std::streampos pos=0;
175 while (std::getline(_seq_F, line))
186 std::string seq_name = line.substr(1,i-1);
187 _id_index.push_back(pos);
188 _name_index[seq_name]=pos;
190 if (seq_name==seq_id)
195 _read_single_seq(_id_index[_n_seqs-1]);
199 _read_single_seq(_id_index[_name_index[seq_id]]);
211 SequenceSetBase():_complete(
false),_index_changed(
false),_current_id(-1),_n_seqs(0),_seq_type(
'x')
213 SequenceType *
seq =
new SequenceType(
"",
"",0);
219 SequenceType *
seq =
new SequenceType(
"",
"",0);
226 SequenceType *
seq =
new SequenceType(
"",
"",0);
228 set_file(seq_f, index_f);
248 if (((_current_id+1)==_n_seqs) && (_complete))
249 return Seq_ptr(NULL);
251 return _access_value(_current_id+1);
262 return *_access_value(index);
266 return *_access_value(seq_id);
274 return *_access_value(index);
277 const SequenceType &
operator[](
const std::string &seq_id)
const
279 return *_access_value(seq_id);
292 _seq_F.seekg(_id_index[_n_seqs-1]);
293 std::getline(_seq_F, line);
297 while (std::getline(_seq_F, line))
314 _id_index.push_back(pos);
315 _name_index[line.substr(1,i-1)] = _n_seqs;
331 read(
const std::string &seq_f)
337 set_file(
const std::string &seq_f)
346 std::streampos pos=_seq_F.tellg();
347 while (std::getline(_seq_F, line))
356 size_t length=line.size();
363 std::string seq_name = line.substr(1,i-1);
364 _id_index.push_back(pos);
365 _name_index[seq_name]=pos;
366 _read_single_seq(pos);
371 set_file(
const std::string &seq_f,
const std::string &index_f)
378 _index_changed=
false;
379 _read_single_seq(_id_index[0]);
382 const SequenceType*
seq(
unsigned int index)
const
384 return &(*(_access_value(index)));
388 read_index(
const std::string &index_f)
390 std::ifstream index_F(index_f);
392 std::vector<std::string> list;
394 getline(index_F, line);
395 if (line==
"#MDAT - SeqIndex v1.0")
397 getline(index_F, line);
398 _n_seqs=boost::lexical_cast<
size_t>(line);
399 _id_index.resize(_n_seqs);
400 getline(index_F, line);
401 if (line==
"complete")
405 while(getline(index_F, line))
407 split(list, line, boost::is_any_of(
" "));
408 std::streampos pos = boost::lexical_cast<
size_t>(list[2]);
409 seq_id = boost::lexical_cast<
size_t>(list[1]);
410 _id_index[seq_id] = pos;
411 _name_index[list[0]] = seq_id;
416 std::cerr <<
"Problem reading the index file" << std::endl;
423 write_index(
const std::string &index_f)
const
425 std::ofstream index_F(index_f);
426 std::map<std::string, size_t>::const_iterator it,it_end=_name_index.end();
427 index_F <<
"#MDAT - SeqIndex v1.0" << std::endl;
428 index_F << _n_seqs << std::endl;
430 index_F <<
"complete" << std::endl;
432 index_F <<
"incomplete" << std::endl;
433 for (it=_name_index.begin(); it!=it_end; ++it)
435 index_F << it->first <<
" " << it->second <<
" " << _id_index[it->second] << std::endl;
440 index_changed()
const
442 return _index_changed;