OpenMS  3.0.0
SVMWrapper.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2022.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Nico Pfeifer, Chris Bielow $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
37 #include <OpenMS/CONCEPT/Types.h>
40 #include <OpenMS/FORMAT/TextFile.h>
41 #include <OpenMS/SYSTEM/File.h>
43 
44 #include <string>
45 #include <vector>
46 #include <map>
47 #include <cmath>
48 
49 // forward declare svm types
50 struct svm_problem;
51 struct svm_parameter;
52 struct svm_model;
53 struct svm_node;
54 
55 namespace OpenMS
56 {
58  struct OPENMS_DLLAPI SVMData
59  {
60  std::vector<std::vector<std::pair<Int, double> > > sequences;
61  std::vector<double> labels;
62 
63  SVMData();
64 
65  SVMData(std::vector<std::vector<std::pair<Int, double> > >& seqs, std::vector<double>& lbls);
66 
67  bool operator==(const SVMData& rhs) const;
68 
69  bool store(const String& filename) const;
70 
71  bool load(const String& filename);
72 
73  };
74 
83  class OPENMS_DLLAPI SVMWrapper :
84  public ProgressLogger
85  {
86 public:
87 
95  {
99  C,
100  NU,
101  P,
105  BORDER_LENGTH
106  };
107 
110  {
111  OLIGO = 19,
112  OLIGO_COMBINED
113  };
114 
116  SVMWrapper();
117 
119  ~SVMWrapper() override;
120 
162  void setParameter(SVM_parameter_type type, Int value);
163 
170  void setParameter(SVM_parameter_type type, double value);
171 
177  Int train(struct svm_problem* problem);
178 
184  Int train(SVMData& problem);
185 
196  void saveModel(std::string modelFilename) const;
197 
206  void loadModel(std::string modelFilename);
207 
213  void predict(struct svm_problem* problem, std::vector<double>& predicted_labels);
214 
220  void predict(const SVMData& problem, std::vector<double>& results);
221 
261  Int getIntParameter(SVM_parameter_type type);
262 
290  double getDoubleParameter(SVM_parameter_type type);
291 
297  void createRandomPartitions(svm_problem* problem, Size number, std::vector<svm_problem*>& partitions);
298 
304  void createRandomPartitions(const SVMData& problem,
305  Size number,
306  std::vector<SVMData>& problems);
310  static svm_problem* mergePartitions(const std::vector<svm_problem*>& problems, Size except);
311 
315  static void mergePartitions(const std::vector<SVMData>& problems,
316  Size except,
317  SVMData& merged_problem);
318 
325  void predict(const std::vector<svm_node*>& vectors, std::vector<double>& predicted_rts);
326 
331  static void getLabels(svm_problem* problem, std::vector<double>& labels);
332 
337  double performCrossValidation(svm_problem* problem_ul,
338  const SVMData& problem_l,
339  const bool is_labeled,
340  const std::map<SVM_parameter_type, double>& start_values_map,
341  const std::map<SVM_parameter_type, double>& step_sizes_map,
342  const std::map<SVM_parameter_type, double>& end_values_map,
343  Size number_of_partitions,
344  Size number_of_runs,
345  std::map<SVM_parameter_type, double>& best_parameters,
346  bool additive_step_sizes = true,
347  bool output = false,
348  String performances_file_name = "performances.txt",
349  bool mcc_as_performance_measure = false);
350 
351 
361  double getSVRProbability();
362 
378  static double kernelOligo(const std::vector<std::pair<int, double> >& x,
379  const std::vector<std::pair<int, double> >& y,
380  const std::vector<double>& gauss_table,
381  int max_distance = -1);
382 
390  static double kernelOligo(const svm_node* x, const svm_node* y, const std::vector<double>& gauss_table, double sigma_square = 0, Size max_distance = 50);
391 
395  void getSignificanceBorders(svm_problem* data, std::pair<double, double>& borders, double confidence = 0.95, Size number_of_runs = 5, Size number_of_partitions = 5, double step_size = 0.01, Size max_iterations = 1000000);
396 
400  void getSignificanceBorders(const SVMData& data,
401  std::pair<double, double>& sigmas,
402  double confidence = 0.95,
403  Size number_of_runs = 5,
404  Size number_of_partitions = 5,
405  double step_size = 0.01,
406  Size max_iterations = 1000000);
407 
414  double getPValue(double sigma1, double sigma2, std::pair<double, double> point);
415 
425  void getDecisionValues(svm_problem* data, std::vector<double>& decision_values);
426 
433  void scaleData(svm_problem* data, Int max_scale_value = -1);
434 
435  static void calculateGaussTable(Size border_length, double sigma, std::vector<double>& gauss_table);
436 
444  svm_problem* computeKernelMatrix(svm_problem* problem1, svm_problem* problem2);
445 
453  svm_problem* computeKernelMatrix(const SVMData& problem1, const SVMData& problem2);
454 
459  void setTrainingSample(svm_problem* training_sample);
460 
464  void setTrainingSample(SVMData& training_sample);
465 
475  void getSVCProbabilities(struct svm_problem* problem, std::vector<double>& probabilities, std::vector<double>& prediction_labels);
476 
480  void setWeights(const std::vector<Int>& weight_labels, const std::vector<double>& weights);
481 
482 private:
489  bool nextGrid_(const std::vector<double>& start_values,
490  const std::vector<double>& step_sizes,
491  const std::vector<double>& end_values,
492  const bool additive_step_sizes,
493  std::vector<double>& actual_values);
494 
495  Size getNumberOfEnclosedPoints_(double m1, double m2, const std::vector<std::pair<double, double> >& points);
496 
500  void initParameters_();
501 
507  static void printToVoid_(const char* /*s*/);
508 
509  svm_parameter* param_;
510  svm_model* model_;
511  double sigma_;
512  std::vector<double> sigmas_;
513  std::vector<double> gauss_table_;
514  std::vector<std::vector<double> > gauss_tables_;
517  svm_problem* training_set_ = nullptr;
518  svm_problem* training_problem_ = nullptr;
521  };
522 
523 } // namespace OpenMS
524 
OpenMS::SVMWrapper::NU
the nu parameter for nu-SVR
Definition: SVMWrapper.h:100
OpenMS::SVMWrapper::KERNEL_TYPE
the kernel type
Definition: SVMWrapper.h:97
OpenMS::SVMWrapper::model_
svm_model * model_
the learned svm discriminant
Definition: SVMWrapper.h:510
TextFile.h
OpenMS::Math::RandomShuffler
Definition: MathFunctions.h:363
OpenMS::SVMWrapper::C
the C parameter of the svm
Definition: SVMWrapper.h:99
OpenMS::SVMData::sequences
std::vector< std::vector< std::pair< Int, double > > > sequences
Definition: SVMWrapper.h:60
Types.h
OpenMS::SVMWrapper::P
the epsilon parameter for epsilon-SVR
Definition: SVMWrapper.h:101
OpenMS::SVMWrapper::SVM_kernel_type
SVM_kernel_type
Kernel type.
Definition: SVMWrapper.h:109
OpenMS::String
A more convenient string class.
Definition: String.h:58
OpenMS::SVMWrapper::SVM_parameter_type
SVM_parameter_type
Parameters for the svm to be set from outside.
Definition: SVMWrapper.h:94
OpenMS::Size
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
OpenMS::SVMWrapper::training_data_
SVMData training_data_
the training set (different encoding)
Definition: SVMWrapper.h:519
OpenMS::SVMWrapper::border_length_
Size border_length_
the actual kernel type
Definition: SVMWrapper.h:516
OpenMS::Int
int Int
Signed integer type.
Definition: Types.h:102
OpenMS::SVMWrapper::kernel_type_
Size kernel_type_
the actual kernel type
Definition: SVMWrapper.h:515
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
OpenMS::SVMWrapper::gauss_table_
std::vector< double > gauss_table_
lookup table for fast computation of the oligo kernel
Definition: SVMWrapper.h:513
OpenMS::SVMWrapper::SVM_TYPE
the svm type can be NU_SVR or EPSILON_SVR
Definition: SVMWrapper.h:96
OpenMS::ProgressLogger
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:52
ProgressLogger.h
OpenMS::SVMWrapper::SIGMA
Definition: SVMWrapper.h:104
MathFunctions.h
OpenMS::SVMData::labels
std::vector< double > labels
Definition: SVMWrapper.h:61
OpenMS::SVMWrapper::shuffler_
Math::RandomShuffler shuffler_
random shuffler to create training partitions
Definition: SVMWrapper.h:520
OpenMS::SVMWrapper::GAMMA
the gamma parameter of the POLY, RBF and SIGMOID kernel
Definition: SVMWrapper.h:102
OpenMS::SVMData
Data structure used in SVMWrapper.
Definition: SVMWrapper.h:58
OpenMS::SVMWrapper::PROBABILITY
Definition: SVMWrapper.h:103
OpenMS::SVMWrapper::param_
svm_parameter * param_
the parameters for the svm
Definition: SVMWrapper.h:509
OpenMS::SVMWrapper::sigma_
double sigma_
for the oligo kernel (amount of positional smearing)
Definition: SVMWrapper.h:511
OpenMS::SVMWrapper
Serves as a wrapper for the libsvm.
Definition: SVMWrapper.h:83
OpenMS::SVMWrapper::sigmas_
std::vector< double > sigmas_
for the combined oligo kernel (amount of positional smearing)
Definition: SVMWrapper.h:512
OpenMS::Internal::operator==
bool operator==(const IDBoostGraph::ProteinGroup &lhs, const IDBoostGraph::ProteinGroup &rhs)
OpenMS::StringUtils::number
static String number(double d, UInt n)
Definition: StringUtils.h:196
String.h
OpenMS::SVMWrapper::DEGREE
the degree for the polynomial- kernel
Definition: SVMWrapper.h:98
File.h
OpenMS::SVMWrapper::gauss_tables_
std::vector< std::vector< double > > gauss_tables_
lookup table for fast computation of the combined oligo kernel
Definition: SVMWrapper.h:514