OpenMS  3.0.0
SvmTheoreticalSpectrumGenerator.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2022.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Sandro Andreotti $
33 // --------------------------------------------------------------------------
34 
35 
36 #pragma once
37 
38 #include <OpenMS/config.h>
41 
46 
47 #include <boost/random/mersenne_twister.hpp>
48 
49 
50 
51 namespace OpenMS
52 {
71  class OPENMS_DLLAPI SvmTheoreticalSpectrumGenerator :
72  public DefaultParamHandler
73  {
75 public:
76 
81  struct IonType
83  {
87 
90  //Default constructor
91  IonType() :
92  residue((Residue::ResidueType) 0),
93  loss(),
94  charge(0)
95  {
96  }
97 
98  //Custom constructor
99  IonType(Residue::ResidueType local_residue, EmpiricalFormula local_loss = EmpiricalFormula(), Int local_charge = 1) :
100  residue(local_residue),
101  loss(local_loss),
102  charge(local_charge)
103  {
104  }
105 
106  //Copy constructor
107  IonType(const IonType & rhs) :
108  residue(rhs.residue),
109  loss(rhs.loss),
110  charge(rhs.charge)
111  {
112  }
113 
114  //Assignment operator
115  IonType & operator=(const IonType & rhs)
116  {
117  if (this != &rhs)
118  {
119  residue = rhs.residue;
120  loss = rhs.loss;
121  charge = rhs.charge;
122  }
123  return *this;
124  }
125 
126  bool operator<(const IonType & rhs) const
127  {
128  if (residue != rhs.residue)
129  return residue < rhs.residue;
130  else if (loss.toString() != rhs.loss.toString())
131  return loss.toString() < rhs.loss.toString();
132  else
133  return charge < rhs.charge;
134  }
135 
136  };
138 
141  {
142  //pointers to the svm classification models (one per ion_type)
143  std::vector<boost::shared_ptr<SVMWrapper> > class_models;
144 
145  //pointers to the svm regression models (one per ion_type)
146  std::vector<boost::shared_ptr<SVMWrapper> > reg_models;
147 
148  //The intensity for each ion type for the SVC mode
149  std::map<Residue::ResidueType, double> static_intensities;
150 
151  //The selected primary IonTypes
152  std::vector<IonType> ion_types;
153 
154  //The selected secondary IonTypes
155  std::map<IonType, std::vector<IonType> > secondary_types;
156 
157  //The number of intensity levels
159 
160  //The number of regions for every spectrum
162 
163  //upper limits (required for scaling)
164  std::vector<double> feature_max;
165 
166  //lower limits (required for scaling)
167  std::vector<double> feature_min;
168 
169  //lower bound for scaling
171 
172  //upper bound for scaling
174 
175  //border values for binning secondary types intensity
176  std::vector<double> intensity_bin_boarders;
177 
178  //intensity values for binned secondary types intensity
179  std::vector<double> intensity_bin_values;
180 
181  //conditional probabilities for secondary types
182  std::map<std::pair<IonType, Size>, std::vector<std::vector<double> > > conditional_prob;
183  };
184 
185 
186 
192 
195 
198 
199 
203 
204 
206  void simulate(PeakSpectrum & spectrum, const AASequence & peptide, boost::random::mt19937_64& rng, Size precursor_charge);
207 
209  void load();
210 
212  const std::vector<IonType> & getIonTypes() const
213  {
214  return mp_.ion_types;
215  }
216 
217 protected:
220  {
221  typedef std::vector<svm_node> DescriptorSetType;
223  };
224 
225  typedef std::map<IonType, double> IntensityMap;
226 
229 
232 
234  static std::map<String, Size> aa_to_index_;
235 
237  static std::map<String, double> hydrophobicity_;
238 
240  static std::map<String, double> helicity_;
241 
243  static std::map<String, double> basicity_;
244 
246  std::map<IonType, bool> hide_type_;
247 
249  inline void scaleSingleFeature_(double & value, double feature_min, double feature_max, double lower = -1.0, double upper = 1.0);
250 
252  void scaleDescriptorSet_(DescriptorSet & desc, double lower, double upper);
253 
255  Size generateDescriptorSet_(AASequence peptide, Size position, IonType type, Size precursor_charge, DescriptorSet & desc_set);
256 
258  String ResidueTypeToString_(Residue::ResidueType type);
259 
261  static void initializeMaps_();
262 
264  static bool initializedMaps_;
265 
266  void updateMembers_() override;
267  };
268 
269  void inline SvmTheoreticalSpectrumGenerator::scaleSingleFeature_(double & value, double lower, double upper, double feature_min, double feature_max)
270  {
271  double prev = value;
272  if (feature_max == feature_min)
273  {
274  return;
275  }
276 
277  if (value <= feature_min)
278  {
279  value = lower;
280  }
281  else if (value >= feature_max)
282  {
283  value = upper;
284  }
285  else
286  {
287  value = lower + (upper - lower) *
288  (value - feature_min) /
289  (feature_max - feature_min);
290  }
291 
292  if (value < 0)
293  {
294  std::cerr << "negative value!! " << value << " l: " << lower << " u: " << upper << " fm: " << feature_min << " fma: " << feature_max << " prev: " << prev << std::endl;
295  }
296  }
297 
298 } // namespace OpenMS
299 
OpenMS::SvmTheoreticalSpectrumGenerator::SvmModelParameterSet::intensity_bin_boarders
std::vector< double > intensity_bin_boarders
Definition: SvmTheoreticalSpectrumGenerator.h:176
OpenMS::SvmTheoreticalSpectrumGenerator::SvmModelParameterSet::intensity_bin_values
std::vector< double > intensity_bin_values
Definition: SvmTheoreticalSpectrumGenerator.h:179
OpenMS::SvmTheoreticalSpectrumGenerator::DescriptorSet::DescriptorSetType
std::vector< svm_node > DescriptorSetType
Definition: SvmTheoreticalSpectrumGenerator.h:221
OpenMS::SvmTheoreticalSpectrumGenerator::IonType::IonType
IonType()
Definition: SvmTheoreticalSpectrumGenerator.h:91
OpenMS::SvmTheoreticalSpectrumGenerator::SvmModelParameterSet::static_intensities
std::map< Residue::ResidueType, double > static_intensities
Definition: SvmTheoreticalSpectrumGenerator.h:149
OpenMS::SvmTheoreticalSpectrumGenerator::hydrophobicity_
static std::map< String, double > hydrophobicity_
hydrophobicity values for each AA
Definition: SvmTheoreticalSpectrumGenerator.h:237
OpenMS::SvmTheoreticalSpectrumGeneratorTrainer
Train SVM models that are used by SvmTheoreticalSpectrumGenerator.
Definition: SvmTheoreticalSpectrumGeneratorTrainer.h:65
OpenMS::String
A more convenient string class.
Definition: String.h:58
SVMWrapper.h
OpenMS::SvmTheoreticalSpectrumGenerator::DescriptorSet::descriptors
DescriptorSetType descriptors
Definition: SvmTheoreticalSpectrumGenerator.h:222
OpenMS::SvmTheoreticalSpectrumGenerator::SvmModelParameterSet::feature_max
std::vector< double > feature_max
Definition: SvmTheoreticalSpectrumGenerator.h:164
OpenMS::SvmTheoreticalSpectrumGenerator::SvmModelParameterSet::number_intensity_levels
Size number_intensity_levels
Definition: SvmTheoreticalSpectrumGenerator.h:158
OpenMS::Size
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
OpenMS::SvmTheoreticalSpectrumGenerator::SvmModelParameterSet::class_models
std::vector< boost::shared_ptr< SVMWrapper > > class_models
Definition: SvmTheoreticalSpectrumGenerator.h:143
OpenMS::SvmTheoreticalSpectrumGenerator::IonType::residue
Residue::ResidueType residue
Definition: SvmTheoreticalSpectrumGenerator.h:84
OpenMS::SvmTheoreticalSpectrumGenerator::helicity_
static std::map< String, double > helicity_
helicity values for each AA
Definition: SvmTheoreticalSpectrumGenerator.h:240
OpenMS::SvmTheoreticalSpectrumGenerator::basicity_
static std::map< String, double > basicity_
basicity values for each AA
Definition: SvmTheoreticalSpectrumGenerator.h:243
OpenMS::SvmTheoreticalSpectrumGenerator::SvmModelParameterSet::number_regions
Size number_regions
Definition: SvmTheoreticalSpectrumGenerator.h:161
OpenMS::DefaultParamHandler
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:92
OpenMS::EmpiricalFormula::toString
String toString() const
returns the formula as a string (charges are not included)
OpenMS::Int
int Int
Signed integer type.
Definition: Types.h:102
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
OpenMS::SvmTheoreticalSpectrumGenerator::IonType::loss
EmpiricalFormula loss
Definition: SvmTheoreticalSpectrumGenerator.h:85
OpenMS::SvmTheoreticalSpectrumGenerator::initializedMaps_
static bool initializedMaps_
flag to indicate if the hydrophobicity, helicity, and basicity maps were already initialized
Definition: SvmTheoreticalSpectrumGenerator.h:264
OpenMS::Residue
Representation of an amino acid residue.
Definition: Residue.h:62
OpenMS::SvmTheoreticalSpectrumGenerator::SvmModelParameterSet
Simple container storing the model parameters required for simulation.
Definition: SvmTheoreticalSpectrumGenerator.h:140
OpenMS::SvmTheoreticalSpectrumGenerator::IntensityMap
std::map< IonType, double > IntensityMap
Definition: SvmTheoreticalSpectrumGenerator.h:225
OpenMS::SvmTheoreticalSpectrumGenerator::SvmModelParameterSet::secondary_types
std::map< IonType, std::vector< IonType > > secondary_types
Definition: SvmTheoreticalSpectrumGenerator.h:155
OpenMS::SvmTheoreticalSpectrumGenerator::SvmModelParameterSet::scaling_upper
double scaling_upper
Definition: SvmTheoreticalSpectrumGenerator.h:173
OpenMS::SvmTheoreticalSpectrumGenerator
Simulates MS2 spectra with support vector machines.
Definition: SvmTheoreticalSpectrumGenerator.h:71
OpenMS::SvmTheoreticalSpectrumGenerator::IonType::IonType
IonType(const IonType &rhs)
Definition: SvmTheoreticalSpectrumGenerator.h:107
OpenMS::Residue::ResidueType
ResidueType
Definition: Residue.h:151
MSExperiment.h
OpenMS::SvmTheoreticalSpectrumGenerator::SvmModelParameterSet::conditional_prob
std::map< std::pair< IonType, Size >, std::vector< std::vector< double > > > conditional_prob
Definition: SvmTheoreticalSpectrumGenerator.h:182
OpenMS::SvmTheoreticalSpectrumGenerator::precursor_charge_
Size precursor_charge_
charge of the precursors used for training
Definition: SvmTheoreticalSpectrumGenerator.h:228
OpenMS::SvmTheoreticalSpectrumGenerator::SvmModelParameterSet::ion_types
std::vector< IonType > ion_types
Definition: SvmTheoreticalSpectrumGenerator.h:152
OpenMS::EmpiricalFormula
Representation of an empirical formula.
Definition: EmpiricalFormula.h:84
OpenMS::SvmTheoreticalSpectrumGenerator::SvmModelParameterSet::reg_models
std::vector< boost::shared_ptr< SVMWrapper > > reg_models
Definition: SvmTheoreticalSpectrumGenerator.h:146
OpenMS::SvmTheoreticalSpectrumGenerator::IonType::operator<
bool operator<(const IonType &rhs) const
Definition: SvmTheoreticalSpectrumGenerator.h:126
OpenMS::SvmTheoreticalSpectrumGenerator::IonType::IonType
IonType(Residue::ResidueType local_residue, EmpiricalFormula local_loss=EmpiricalFormula(), Int local_charge=1)
Definition: SvmTheoreticalSpectrumGenerator.h:99
StandardDeclarations.h
OpenMS::AASequence
Representation of a peptide/protein sequence.
Definition: AASequence.h:111
TheoreticalSpectrumGenerator.h
OpenMS::SvmTheoreticalSpectrumGenerator::mp_
SvmModelParameterSet mp_
set of model parameters read from model file
Definition: SvmTheoreticalSpectrumGenerator.h:231
OpenMS::SvmTheoreticalSpectrumGenerator::getIonTypes
const std::vector< IonType > & getIonTypes() const
return the set of ion types that are modeled by the loaded SVMs
Definition: SvmTheoreticalSpectrumGenerator.h:212
OpenMS::SvmTheoreticalSpectrumGenerator::scaleSingleFeature_
void scaleSingleFeature_(double &value, double feature_min, double feature_max, double lower=-1.0, double upper=1.0)
scale value to the interval [lower,max] given the maximal and minimal entries for a feature
Definition: SvmTheoreticalSpectrumGenerator.h:269
OpenMS::SvmTheoreticalSpectrumGenerator::aa_to_index_
static std::map< String, Size > aa_to_index_
map AA to integers
Definition: SvmTheoreticalSpectrumGenerator.h:234
OpenMS::MSSpectrum
The representation of a 1D spectrum.
Definition: MSSpectrum.h:66
OpenMS::SvmTheoreticalSpectrumGenerator::IonType
nested class
Definition: SvmTheoreticalSpectrumGenerator.h:82
OpenMS::SvmTheoreticalSpectrumGenerator::hide_type_
std::map< IonType, bool > hide_type_
whether ion types are hidden or not
Definition: SvmTheoreticalSpectrumGenerator.h:246
OpenMS::SvmTheoreticalSpectrumGenerator::IonType::charge
Int charge
Definition: SvmTheoreticalSpectrumGenerator.h:86
OpenMS::SvmTheoreticalSpectrumGenerator::SvmModelParameterSet::feature_min
std::vector< double > feature_min
Definition: SvmTheoreticalSpectrumGenerator.h:167
StandardTypes.h
OpenMS::SvmTheoreticalSpectrumGenerator::IonType::operator=
IonType & operator=(const IonType &rhs)
Definition: SvmTheoreticalSpectrumGenerator.h:115
MSSpectrum.h
OpenMS::SvmTheoreticalSpectrumGenerator::SvmModelParameterSet::scaling_lower
double scaling_lower
Definition: SvmTheoreticalSpectrumGenerator.h:170
OpenMS::SvmTheoreticalSpectrumGenerator::DescriptorSet
A set of descriptors for a single training row.
Definition: SvmTheoreticalSpectrumGenerator.h:219