OpenMS  3.0.0
NASequence.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2022.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Samuel Wein $
32 // $Authors: Samuel Wein, Timo Sachsenberg, Hendrik Weisser $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
39 #include <OpenMS/CONCEPT/Types.h>
41 
42 #include <vector>
43 #include <iosfwd>
44 
45 namespace OpenMS
46 {
61  class OPENMS_DLLAPI NASequence
62  {
63 
68  public:
70  { //< NB: Not all fragments types are valid for all residue types, this class should probably get split
71  Full = 0,
75  AIon,
76  BIon,
77  CIon,
78  XIon,
79  YIon,
80  ZIon,
88  WIon,
90  DIon,
91  SizeOfNASFragmentType
92  };
93 
95 
96  class Iterator;
97 
103  class OPENMS_DLLAPI ConstIterator
104  {
105  public:
107  typedef const value_type& const_reference;
109  typedef const value_type* const_pointer;
110  typedef std::vector<const value_type*>::difference_type difference_type;
111  typedef const value_type* pointer;
112  typedef std::random_access_iterator_tag iterator_category;
113 
117  ConstIterator() = default;
119 
121  ConstIterator(const std::vector<const Ribonucleotide*>* vec_ptr,
122  difference_type position)
123  {
124  vector_ = vec_ptr;
125  position_ = position;
126  }
127 
130  vector_(rhs.vector_),
131  position_(rhs.position_)
132  {
133  }
134 
137  vector_(rhs.vector_),
138  position_(rhs.position_)
139  {
140  }
141 
143  virtual ~ConstIterator() {}
144 
146 
149  {
150  if (this != &rhs)
151  {
152  position_ = rhs.position_;
153  vector_ = rhs.vector_;
154  }
155  return *this;
156  }
157 
161  const_reference operator*() const
163  {
164  return *(*vector_)[position_];
165  }
166 
168  const_pointer operator->() const
169  {
170  return (*vector_)[position_];
171  }
172 
175  {
176  return ConstIterator(vector_, position_ + diff);
177  }
178 
180  {
181  return position_ - rhs.position_;
182  }
183 
186  {
187  return ConstIterator(vector_, position_ - diff);
188  }
189 
191  bool operator==(const ConstIterator& rhs) const
192  {
193  return (std::tie(vector_, position_) ==
194  std::tie(rhs.vector_, rhs.position_));
195  }
196 
198  bool operator!=(const ConstIterator& rhs) const
199  {
200  return !(operator==(rhs));
201  }
202 
205  {
206  ++position_;
207  return *this;
208  }
209 
212  {
213  --position_;
214  return *this;
215  }
216 
218 
219  protected:
220 
221  // pointer to the vector
222  const std::vector<const Ribonucleotide*>* vector_;
223 
224  // position in the vector
226  };
227 
228 
234  class OPENMS_DLLAPI Iterator
235  {
236  public:
237 
239 
241  typedef const value_type& const_reference;
243  typedef const value_type* const_pointer;
244  typedef const value_type* pointer;
245  typedef std::vector<const value_type*>::difference_type difference_type;
246 
250  Iterator() = default;
251 
253  Iterator(std::vector<const Ribonucleotide*>* vec_ptr,
254  difference_type position)
255  {
256  vector_ = vec_ptr;
257  position_ = position;
258  }
259 
261  Iterator(const Iterator& rhs) :
262  vector_(rhs.vector_),
263  position_(rhs.position_)
264  {
265  }
266 
268  virtual ~Iterator() {}
269 
271 
274  {
275  if (this != &rhs)
276  {
277  position_ = rhs.position_;
278  vector_ = rhs.vector_;
279  }
280  return *this;
281  }
282 
286  const_reference operator*() const
288  {
289  return *(*vector_)[position_];
290  }
291 
293  const_pointer operator->() const
294  {
295  return (*vector_)[position_];
296  }
297 
299  pointer operator->()
300  {
301  return (*vector_)[position_];
302  }
303 
306  {
307  return Iterator(vector_, position_ + diff);
308  }
309 
311  {
312  return position_ - rhs.position_;
313  }
314 
317  {
318  return Iterator(vector_, position_ - diff);
319  }
320 
322  bool operator==(const Iterator& rhs) const
323  {
324  return (std::tie(vector_,position_) ==
325  std::tie(rhs.vector_, rhs.position_));
326  }
327 
329  bool operator!=(const Iterator& rhs) const
330  {
331  return !this->operator==(rhs);
332  }
333 
336  {
337  ++position_;
338  return *this;
339  }
340 
343  {
344  --position_;
345  return *this;
346  }
347 
349 
350  protected:
351 
352  std::vector<const Ribonucleotide*>* vector_;
353 
354  // position in the vector
356  };
357 
358  public:
359  /*
360  * Default constructors and assignment operators.
361  */
362  NASequence() = default;
363  NASequence(const NASequence&) = default;
364  NASequence(NASequence&&) = default;
365  NASequence& operator=(const NASequence&) & = default;
366  NASequence& operator=(NASequence&&) & = default;
367 
369  NASequence(std::vector<const Ribonucleotide*> s,
370  const RibonucleotideChainEnd* five_prime,
371  const RibonucleotideChainEnd* three_prime);
372 
373  virtual ~NASequence() = default;
374 
375  bool operator==(const NASequence& rhs) const;
376  bool operator!=(const NASequence& rhs) const;
377  bool operator<(const NASequence& rhs) const;
378 
380  void setSequence(const std::vector<const Ribonucleotide*>& seq);
381 
382  const std::vector<const Ribonucleotide*>& getSequence() const
383  {
384  return seq_;
385  }
386 
387  std::vector<const Ribonucleotide*>& getSequence()
388  {
389  return seq_;
390  }
391 
393  void set(size_t index, const Ribonucleotide* r);
394 
395  const Ribonucleotide* get(size_t index)
396  {
397  return seq_[index];
398  }
399 
401  inline const Ribonucleotide*& operator[](size_t index)
402  {
403  return seq_[index];
404  }
405 
406  inline const Ribonucleotide* const& operator[](size_t index) const
407  {
408  return seq_[index];
409  }
410 
411  bool empty() const;
412  size_t size() const;
413  void clear();
414 
416  bool hasFivePrimeMod() const;
417  void setFivePrimeMod(const RibonucleotideChainEnd* r);
418  const RibonucleotideChainEnd* getFivePrimeMod() const;
419  bool hasThreePrimeMod() const;
420  void setThreePrimeMod(const RibonucleotideChainEnd* r);
421  const RibonucleotideChainEnd* getThreePrimeMod() const;
422 
424  inline Iterator begin()
425  {
426  return Iterator(&seq_, 0);
427  }
428 
429  inline ConstIterator begin() const
430  {
431  return ConstIterator(&seq_, 0);
432  }
433 
434  inline Iterator end()
435  {
436  return Iterator(&seq_, (Int) seq_.size());
437  }
438 
439  inline ConstIterator end() const
440  {
441  return ConstIterator(&seq_, (Int) seq_.size());
442  }
443 
444  inline ConstIterator cbegin() const
445  {
446  return ConstIterator(&seq_, 0);
447  }
448 
449  inline ConstIterator cend() const
450  {
451  return ConstIterator(&seq_, (Int) seq_.size());
452  }
453 
455  double getMonoWeight(NASFragmentType type = Full, Int charge = 0) const;
456  double getAverageWeight(NASFragmentType type = Full, Int charge = 0) const;
457  EmpiricalFormula getFormula(NASFragmentType type = Full, Int charge = 0) const;
458 
460  NASequence getPrefix(Size length) const;
461 
463  NASequence getSuffix(Size length) const;
464 
466  NASequence getSubsequence(Size start = 0, Size length = Size(-1)) const;
467 
475  static NASequence fromString(const String& s);
476 
480  friend OPENMS_DLLAPI std::ostream& operator<<(std::ostream& os,
481  const NASequence& seq);
482 
490  static NASequence fromString(const char* s);
491 
492  std::string toString() const ;
493 
494  private:
495  //TODO: query RNA / DNA depending on type
496  static void parseString_(const String& s, NASequence& nas);
497 
507  //TODO: query RNA / DNA depending on type
508  static String::ConstIterator parseMod_(const String::ConstIterator str_it,
509  const String& str, NASequence& nas);
510 
511  std::vector<const Ribonucleotide*> seq_;
512 
513  const RibonucleotideChainEnd* five_prime_ = nullptr;
514  const RibonucleotideChainEnd* three_prime_ = nullptr;
515  };
516 
517 }
OpenMS::NASequence::getSequence
const std::vector< const Ribonucleotide * > & getSequence() const
Definition: NASequence.h:382
OpenMS::NASequence::begin
ConstIterator begin() const
Definition: NASequence.h:429
OpenMS::NASequence::Precursor
MS:1001523 Precursor ion.
Definition: NASequence.h:81
OpenMS::NASequence::ConstIterator
ConstIterator of NASequence class.
Definition: NASequence.h:103
OpenMS::NASequence::ConstIterator::operator-
const ConstIterator operator-(difference_type diff) const
backward jump operator
Definition: NASequence.h:185
OpenMS::NASequence::YIonMinusNH3
MS:1001233 y ion without ammonia.
Definition: NASequence.h:85
OpenMS::NASequence::BIonMinusNH3
MS:1001232 b ion without ammonia.
Definition: NASequence.h:84
OpenMS::NASequence::Iterator::difference_type
std::vector< const value_type * >::difference_type difference_type
Definition: NASequence.h:245
OpenMS::NASequence::Iterator::Iterator
Iterator(std::vector< const Ribonucleotide * > *vec_ptr, difference_type position)
detailed constructor with pointer to the vector and offset position
Definition: NASequence.h:253
OpenMS::toString
const std::string & toString(const DriftTimeUnit value)
OpenMS::NASequence::getSequence
std::vector< const Ribonucleotide * > & getSequence()
Definition: NASequence.h:387
OpenMS::NASequence::ConstIterator::operator--
ConstIterator & operator--()
decrement operator
Definition: NASequence.h:211
OpenMS::NASequence::ConstIterator::pointer
const typedef value_type * pointer
Definition: NASequence.h:111
Types.h
OpenMS::NASequence::Iterator::operator-
const Iterator operator-(difference_type diff) const
backward jump operator
Definition: NASequence.h:316
OpenMS::NASequence
Representation of a nucleic acid sequence.
Definition: NASequence.h:61
OpenMS::operator*
DPosition< D, TCoordinateType > operator*(DPosition< D, TCoordinateType > position, typename DPosition< D, TCoordinateType >::CoordinateType scalar)
Scalar multiplication (a bit inefficient)
Definition: DPosition.h:426
OpenMS::NASequence::Iterator::position_
difference_type position_
Definition: NASequence.h:355
OpenMS::NASequence::DIon
D ion, added for nucleic acid support.
Definition: NASequence.h:90
OpenMS::Ribonucleotide
Representation of a ribonucleotide (modified or unmodified)
Definition: Ribonucleotide.h:51
OpenMS::NASequence::NASFragmentType
NASFragmentType
an enum of all possible fragment ion types
Definition: NASequence.h:69
OpenMS::NASequence::ConstIterator::ConstIterator
ConstIterator(const NASequence::Iterator &rhs)
copy constructor from Iterator
Definition: NASequence.h:136
OpenMS::NASequence::FivePrime
only 5' terminus
Definition: NASequence.h:73
OpenMS::NASequence::seq_
std::vector< const Ribonucleotide * > seq_
Definition: NASequence.h:511
OpenMS::NASequence::operator[]
const Ribonucleotide *const & operator[](size_t index) const
Definition: NASequence.h:406
OpenMS::NASequence::ThreePrime
only 3' terminus
Definition: NASequence.h:74
OpenMS::String
A more convenient string class.
Definition: String.h:58
KDTree::operator!=
bool operator!=(_Iterator< _Val, _Ref, _Ptr > const &, _Iterator< _Val, _Ref, _Ptr > const &)
Definition: KDTree.h:824
OpenMS::NASequence::Unannotated
no stored annotation
Definition: NASequence.h:87
OpenMS::Size
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
OpenMS::NASequence::ZIon
MS:1001230 C-alpha/carbonyl carbon bond.
Definition: NASequence.h:80
OpenMS::NASequence::Iterator::vector_
std::vector< const Ribonucleotide * > * vector_
Definition: NASequence.h:352
OpenMS::NASequence::ConstIterator::const_pointer
const typedef value_type * const_pointer
Definition: NASequence.h:109
OpenMS::NASequence::ConstIterator::iterator_category
std::random_access_iterator_tag iterator_category
Definition: NASequence.h:112
OpenMS::NASequence::YIonMinusH20
MS:1001223 y ion without water.
Definition: NASequence.h:83
OpenMS::operator<
bool operator<(const MultiplexDeltaMasses &dm1, const MultiplexDeltaMasses &dm2)
OpenMS::NASequence::ConstIterator::operator++
ConstIterator & operator++()
increment operator
Definition: NASequence.h:204
OpenMS::NASequence::Iterator::operator-
difference_type operator-(Iterator rhs) const
Definition: NASequence.h:310
OpenMS::NASequence::ConstIterator::position_
difference_type position_
Definition: NASequence.h:225
OpenMS::NASequence::CIon
MS:1001231 N-terminus up to the amide/C-alpha bond.
Definition: NASequence.h:77
Ribonucleotide.h
OpenMS::Int
int Int
Signed integer type.
Definition: Types.h:102
OpenMS::NASequence::Iterator::operator->
const_pointer operator->() const
dereference operator
Definition: NASequence.h:293
OpenMS::NASequence::ConstIterator::value_type
Ribonucleotide value_type
Definition: NASequence.h:106
OpenMS::NASequence::cend
ConstIterator cend() const
Definition: NASequence.h:449
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
OpenMS::NASequence::ConstIterator::operator-
difference_type operator-(ConstIterator rhs) const
Definition: NASequence.h:179
OpenMS::NASequence::end
ConstIterator end() const
Definition: NASequence.h:439
OpenMS::NASequence::ConstIterator::difference_type
std::vector< const value_type * >::difference_type difference_type
Definition: NASequence.h:110
OpenMS::NASequence::ConstIterator::ConstIterator
ConstIterator(const ConstIterator &rhs)
copy constructor
Definition: NASequence.h:129
OpenMS::NASequence::Iterator::value_type
Ribonucleotide value_type
Definition: NASequence.h:240
OpenMS::NASequence::Iterator::const_reference
const typedef value_type & const_reference
Definition: NASequence.h:241
OpenMS::NASequence::ConstIterator::~ConstIterator
virtual ~ConstIterator()
destructor
Definition: NASequence.h:143
OpenMS::NASequence::Iterator::operator==
bool operator==(const Iterator &rhs) const
equality comparator
Definition: NASequence.h:322
OpenMS::NASequence::ConstIterator::operator!=
bool operator!=(const ConstIterator &rhs) const
inequality operator
Definition: NASequence.h:198
OpenMS::NASequence::get
const Ribonucleotide * get(size_t index)
Definition: NASequence.h:395
OpenMS::NASequence::BIonMinusH20
MS:1001222 b ion without water.
Definition: NASequence.h:82
OpenMS::NASequence::ConstIterator::operator+
const ConstIterator operator+(difference_type diff) const
forward jump operator
Definition: NASequence.h:174
OpenMS::NASequence::ConstIterator::reference
value_type & reference
Definition: NASequence.h:108
OpenMS::NASequence::Iterator::operator->
pointer operator->()
mutable dereference operator
Definition: NASequence.h:299
OpenMS::NASequence::Iterator::reference
value_type & reference
Definition: NASequence.h:242
OpenMS::NASequence::operator[]
const Ribonucleotide *& operator[](size_t index)
getter / setter for sequence elements (C++ container style)
Definition: NASequence.h:401
OpenMS::NASequence::Iterator::operator!=
bool operator!=(const Iterator &rhs) const
inequality operator
Definition: NASequence.h:329
OpenMS::NASequence::NonIdentified
MS:1001240 Non-identified ion.
Definition: NASequence.h:86
OpenMS::NASequence::WIon
W ion, added for nucleic acid support.
Definition: NASequence.h:88
OpenMS::operator<<
std::ostream & operator<<(std::ostream &os, const AccurateMassSearchResult &amsr)
OpenMS::NASequence::Internal
internal, without any termini
Definition: NASequence.h:72
OpenMS::NASequence::Iterator
Iterator of NASequence class.
Definition: NASequence.h:234
OpenMS::NASequence::ConstIterator::operator=
ConstIterator & operator=(const ConstIterator &rhs)
assignment operator
Definition: NASequence.h:148
OpenMS::NASequence::XIon
MS:1001228 amide/C-alpha bond up to the C-terminus.
Definition: NASequence.h:78
OpenMS::NASequence::ConstIterator::operator->
const_pointer operator->() const
dereference operator
Definition: NASequence.h:168
OpenMS::NASequence::BIon
MS:1001224 N-terminus up to the peptide bond.
Definition: NASequence.h:76
OpenMS::NASequence::end
Iterator end()
Definition: NASequence.h:434
OpenMS::NASequence::ConstIterator::vector_
const std::vector< const Ribonucleotide * > * vector_
Definition: NASequence.h:222
EmpiricalFormula.h
OpenMS::NASequence::Iterator::operator++
Iterator & operator++()
increment operator
Definition: NASequence.h:335
OpenMS::NASequence::YIon
MS:1001220 peptide bond up to the C-terminus.
Definition: NASequence.h:79
OpenMS::Internal::operator==
bool operator==(const IDBoostGraph::ProteinGroup &lhs, const IDBoostGraph::ProteinGroup &rhs)
OpenMS::NASequence::ConstIterator::ConstIterator
ConstIterator(const std::vector< const Ribonucleotide * > *vec_ptr, difference_type position)
detailed constructor with pointer to the vector and offset position
Definition: NASequence.h:121
OpenMS::NASequence::Iterator::Iterator
Iterator(const Iterator &rhs)
copy constructor
Definition: NASequence.h:261
OpenMS::EmpiricalFormula
Representation of an empirical formula.
Definition: EmpiricalFormula.h:84
OpenMS::NASequence::Iterator::operator--
Iterator & operator--()
decrement operator
Definition: NASequence.h:342
OpenMS::NASequence::Iterator::operator=
Iterator & operator=(const Iterator &rhs)
assignment operator
Definition: NASequence.h:273
OpenMS::NASequence::cbegin
ConstIterator cbegin() const
Definition: NASequence.h:444
OpenMS::NASequence::ConstIterator::operator==
bool operator==(const ConstIterator &rhs) const
equality comparator
Definition: NASequence.h:191
String.h
OpenMS::NASequence::AminusB
A ion with base loss, added for nucleic acid support.
Definition: NASequence.h:89
OpenMS::NASequence::Iterator::const_pointer
const typedef value_type * const_pointer
Definition: NASequence.h:243
OpenMS::NASequence::AIon
MS:1001229 N-terminus up to the C-alpha/carbonyl carbon bond.
Definition: NASequence.h:75
OpenMS::NASequence::Iterator::operator+
const Iterator operator+(difference_type diff) const
forward jump operator
Definition: NASequence.h:305
OpenMS::String::ConstIterator
const_iterator ConstIterator
Const Iterator.
Definition: String.h:72
OpenMS::NASequence::Iterator::pointer
const typedef value_type * pointer
Definition: NASequence.h:244
OpenMS::NASequence::Iterator::~Iterator
virtual ~Iterator()
destructor
Definition: NASequence.h:268
OpenMS::NASequence::ConstIterator::const_reference
const typedef value_type & const_reference
Definition: NASequence.h:107
OpenMS::NASequence::begin
Iterator begin()
iterators
Definition: NASequence.h:424