OpenMS  3.0.0
LibSVMEncoder.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2022.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Nico Pfeifer $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
40 
41 #include <vector>
42 #include <utility>
43 
44 struct svm_problem;
45 struct svm_parameter;
46 struct svm_model;
47 
48 namespace OpenMS
49 {
58  class OPENMS_DLLAPI LibSVMEncoder
59  {
60 public:
62  LibSVMEncoder() = default;
64  ~LibSVMEncoder() = default;
65 
75  void encodeCompositionVector(const String & sequence, std::vector<std::pair<Int, double> > & encoded_vector, const String & allowed_characters = "ACDEFGHIKLMNPQRSTVWY");
76 
86  void encodeCompositionVectors(const std::vector<String> & sequences, const String & allowed_characters, std::vector<std::vector<std::pair<Int, double> > > & composition_vectors);
88  svm_node * encodeLibSVMVector(const std::vector<std::pair<Int, double> > & feature_vector);
89 
91  void encodeLibSVMVectors(const std::vector<std::vector<std::pair<Int, double> > > & feature_vectors, std::vector<svm_node *> & libsvm_vectors);
92 
94  svm_problem * encodeLibSVMProblem(const std::vector<svm_node *> & vectors,
95  std::vector<double> & labels);
96 
98  svm_problem * encodeLibSVMProblemWithCompositionVectors(const std::vector<String> & sequences,
99  std::vector<double> & labels,
100  const String & allowed_characters);
101 
107  svm_problem * encodeLibSVMProblemWithCompositionAndLengthVectors(const std::vector<String> & sequences,
108  std::vector<double> & labels,
109  const String & allowed_characters,
110  UInt maximum_sequence_length);
111 
117  svm_problem * encodeLibSVMProblemWithCompositionLengthAndWeightVectors(const std::vector<String> & sequences,
118  std::vector<double> & labels,
119  const String & allowed_characters);
120 
122  bool storeLibSVMProblem(const String & filename, const svm_problem * problem) const;
123 
125  svm_problem * loadLibSVMProblem(const String & filename);
126 
128  void encodeOligoBorders(String sequence,
129  UInt k_mer_length,
130  const String & allowed_characters,
131  UInt border_length,
132  std::vector<std::pair<Int, double> > & libsvm_vector,
133  bool strict = false,
134  bool unpaired = false,
135  bool length_encoding = false);
136 
138  svm_problem * encodeLibSVMProblemWithOligoBorderVectors(const std::vector<String> & sequences,
139  std::vector<double> & labels,
140  UInt k_mer_length,
141  const String & allowed_characters,
142  UInt border_length,
143  bool strict = false,
144  bool unpaired = false,
145  bool length_encoding = false);
146 
148  void encodeProblemWithOligoBorderVectors(const std::vector<AASequence> & sequences,
149  UInt k_mer_length,
150  const String & allowed_characters,
151  UInt border_length,
152  std::vector<std::vector<std::pair<Int, double> > > & vectors);
153 
160  void libSVMVectorToString(svm_node * vector, String & output);
161 
168  void libSVMVectorsToString(svm_problem * vector, String & output);
169 
176  void encodeOligo(const AASequence & sequence,
177  UInt k_mer_length,
178  const String & allowed_characters,
179  std::vector<std::pair<Int, double> > & values,
180  bool is_right_border = false);
181 
187  static void destroyProblem(svm_problem* &problem, bool free_nodes = true);
188 
189  static std::vector<double> predictPeptideRT(const std::vector<String> & sequences,
190  SVMWrapper& svm,
191  const String & allowed_characters = "ACDEFGHIKLMNPQRSTVWY",
192  UInt maximum_sequence_length = 50)
193  {
194  std::vector<double> predicted_retention_times;
195 
196  LibSVMEncoder encoder;
197  std::vector<double> temp_rts;
198  temp_rts.resize(sequences.size(), 0);
199  svm_problem * prediction_data =
201  temp_rts,
202  allowed_characters,
203  maximum_sequence_length);
204  svm.predict(prediction_data, predicted_retention_times);
205  LibSVMEncoder::destroyProblem(prediction_data);
206  return predicted_retention_times;
207  }
208 
209 private:
211  static bool cmpOligos_(std::pair<Int, double> a,
212  std::pair<Int, double> b);
213 
214  };
215 
216 } // namespace OpenMS
217 
OpenMS::String
A more convenient string class.
Definition: String.h:58
SVMWrapper.h
OpenMS::LibSVMEncoder
Serves for encoding sequences into feature vectors.
Definition: LibSVMEncoder.h:58
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
OpenMS::LibSVMEncoder::encodeLibSVMProblemWithCompositionAndLengthVectors
svm_problem * encodeLibSVMProblemWithCompositionAndLengthVectors(const std::vector< String > &sequences, std::vector< double > &labels, const String &allowed_characters, UInt maximum_sequence_length)
creates composition vectors with additional length information for 'sequences' and stores them in Lib...
OpenMS::UInt
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
OpenMS::SVMWrapper
Serves as a wrapper for the libsvm.
Definition: SVMWrapper.h:83
AASequence.h
OpenMS::SVMWrapper::predict
void predict(struct svm_problem *problem, std::vector< double > &predicted_labels)
predicts the labels using the trained model
String.h
OpenMS::AASequence
Representation of a peptide/protein sequence.
Definition: AASequence.h:111
OpenMS::LibSVMEncoder::destroyProblem
static void destroyProblem(svm_problem *&problem, bool free_nodes=true)
frees all the memory of the svm_problem instance
OpenMS::LibSVMEncoder::predictPeptideRT
static std::vector< double > predictPeptideRT(const std::vector< String > &sequences, SVMWrapper &svm, const String &allowed_characters="ACDEFGHIKLMNPQRSTVWY", UInt maximum_sequence_length=50)
Definition: LibSVMEncoder.h:189