OpenMS  3.0.0
RTSimulation.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2022.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg$
32 // $Authors: Stephan Aiche, Chris Bielow$
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
39 
40 namespace OpenMS
41 {
53  class OPENMS_DLLAPI RTSimulation :
54  public DefaultParamHandler
55  {
56 
57 
58 public:
62 
64  RTSimulation();
65 
68 
70  RTSimulation(const RTSimulation& source);
71 
73  ~RTSimulation() override;
75 
77  RTSimulation& operator=(const RTSimulation& source);
78 
84  void predictRT(SimTypes::FeatureMapSim& features);
85 
89  void predictContaminantsRT(SimTypes::FeatureMapSim&);
90 
94  bool isRTColumnOn() const;
95 
97  void wrapSVM(std::vector<AASequence>& peptide_sequences, std::vector<double>& predicted_retention_times);
98 
99  SimTypes::SimCoordinateType getGradientTime() const;
100 
102  void createExperiment(SimTypes::MSSimExperiment& experiment);
103 
104 private:
106  void setDefaultParams_();
107 
109  void noRTColumn_(SimTypes::FeatureMapSim&);
110 
112  void smoothRTDistortion_(SimTypes::MSSimExperiment& experiment);
113 
121  void calculateMT_(SimTypes::FeatureMapSim& features, std::vector<double>& predicted_retention_times);
122 
123  void getChargeContribution_(std::map<String, double>& q_cterm,
124  std::map<String, double>& q_nterm,
125  std::map<String, double>& q_aa_basic,
126  std::map<String, double>& q_aa_acidic);
127 
128  // MEMBERS:
129 
130  // Name of the svm model file
132 
135 
137 
142 
145 
150 
155 
156 protected:
159 
161  void updateMembers_() override;
162 
163  };
164 
165 }
166 
LogStream.h
DefaultParamHandler.h
OpenMS::RTSimulation::total_gradient_time_
SimTypes::SimCoordinateType total_gradient_time_
Total gradient time.
Definition: RTSimulation.h:134
OpenMS::SVMWrapper::NU
the nu parameter for nu-SVR
Definition: SVMWrapper.h:100
OpenMS::SVMWrapper::KERNEL_TYPE
the kernel type
Definition: SVMWrapper.h:97
OpenMS::TOPPBase
Base class for TOPP applications.
Definition: TOPPBase.h:147
OpenMS::StringUtils::toInt
static Int toInt(const String &this_s)
Definition: StringUtils.h:206
FileHandler.h
SimTypes.h
TextFile.h
OpenMS::RTSimulation::rt_sampling_rate_
SimTypes::SimCoordinateType rt_sampling_rate_
bin size in rt dimension
Definition: RTSimulation.h:144
Size
double
OpenMS::toString
const std::string & toString(const DriftTimeUnit value)
OpenMS::Param::setValue
void setValue(const std::string &key, const ParamValue &value, const std::string &description="", const std::vector< std::string > &tags=std::vector< std::string >())
Sets a value.
OpenMS::IdXMLFile::store
void store(const String &filename, const std::vector< ProteinIdentification > &protein_ids, const std::vector< PeptideIdentification > &peptide_ids, const String &document_id="")
Stores the data in an idXML file.
OpenMS::SVMWrapper::C
the C parameter of the svm
Definition: SVMWrapper.h:99
OpenMS::PeptideHit::getSequence
const AASequence & getSequence() const
returns the peptide sequence without trailing or following spaces
OpenMS::SVMData::sequences
std::vector< std::vector< std::pair< Int, double > > > sequences
Definition: SVMWrapper.h:60
EnzymaticDigestion.h
OpenMS::SVMWrapper::P
the epsilon parameter for epsilon-SVR
Definition: SVMWrapper.h:101
OpenMS::RTSimulation
Simulates/Predicts retention times for peptides or peptide separation.
Definition: RTSimulation.h:53
OpenMS::FASTAFile
This class serves for reading in and writing FASTA files If the protein/gene sequence contains unusua...
Definition: FASTAFile.h:60
OpenMS::FileHandler::getTypeByFileName
static FileTypes::Type getTypeByFileName(const String &filename)
Determines the file type from a file name.
OpenMS::String
A more convenient string class.
Definition: String.h:58
ParamXMLFile.h
SVMWrapper.h
Int
OpenMS::PeptideIdentification::setRT
void setRT(double rt)
sets the RT of the MS2 spectrum where the identification occurred
OpenMS::MSExperiment
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:70
LibSVMEncoder.h
OpenMS::TextFile
This class provides some basic file handling methods for text files.
Definition: TextFile.h:46
IdXMLFile.h
OpenMS::SVMWrapper::getSignificanceBorders
void getSignificanceBorders(svm_problem *data, std::pair< double, double > &borders, double confidence=0.95, Size number_of_runs=5, Size number_of_partitions=5, double step_size=0.01, Size max_iterations=1000000)
calculates the significance borders of the error model and stores them in 'sigmas'
OpenMS::ParamXMLFile
The file pendant of the Param class used to load and store the param datastructure as paramXML.
Definition: ParamXMLFile.h:49
StatisticFunctions.h
OpenMS::SVMWrapper::saveModel
void saveModel(std::string modelFilename) const
saves the svm model
OpenMS::SimTypes::MutableSimRandomNumberGeneratorPtr
boost::shared_ptr< SimRandomNumberGenerator > MutableSimRandomNumberGeneratorPtr
Definition: SimTypes.h:174
OpenMS::RTSimulation::egh_tau_scale_
double egh_tau_scale_
EGH tau scale parameter of the lorentzian variation.
Definition: RTSimulation.h:149
OpenMS::AASequence::toUnmodifiedString
String toUnmodifiedString() const
returns the peptide as string without any modifications or (e.g., "PEPTIDER")
OpenMS::RTSimulation::egh_variance_location_
double egh_variance_location_
EGH sigma value.
Definition: RTSimulation.h:152
OpenMS::SVMWrapper::getIntParameter
Int getIntParameter(SVM_parameter_type type)
You can get the actual int- parameters of the svm.
OpenMS::LibSVMEncoder
Serves for encoding sequences into feature vectors.
Definition: LibSVMEncoder.h:58
OpenMS::SVMWrapper::setParameter
void setParameter(SVM_parameter_type type, Int value)
You can set the parameters of the svm:
OpenMS::ProteinIdentification
Representation of a protein identification run.
Definition: ProteinIdentification.h:70
OpenMS::ParamValue::toBool
bool toBool() const
Conversion to bool.
OpenMS::PeptideIdentification::setHits
void setHits(const std::vector< PeptideHit > &hits)
Sets the peptide hits.
OpenMS::DefaultParamHandler
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:92
OpenMS::SVMData::store
bool store(const String &filename) const
OpenMS::IdXMLFile::load
void load(const String &filename, std::vector< ProteinIdentification > &protein_ids, std::vector< PeptideIdentification > &peptide_ids)
Loads the identifications of an idXML file without identifier.
OpenMS::FileTypes::TXT
any text format, which has only loose definition of what it actually contains – thus it is usually ha...
Definition: FileTypes.h:94
OpenMS::RTSimulation::egh_variance_scale_
double egh_variance_scale_
EGH sigma scale parameter of the lorentzian variation.
Definition: RTSimulation.h:154
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
OpenMS::SVMWrapper::SVM_TYPE
the svm type can be NU_SVR or EPSILON_SVR
Definition: SVMWrapper.h:96
OpenMS::LibSVMEncoder::encodeLibSVMProblemWithCompositionAndLengthVectors
svm_problem * encodeLibSVMProblemWithCompositionAndLengthVectors(const std::vector< String > &sequences, std::vector< double > &labels, const String &allowed_characters, UInt maximum_sequence_length)
creates composition vectors with additional length information for 'sequences' and stores them in Lib...
OpenMS::RTSimulation::rnd_gen_
SimTypes::MutableSimRandomNumberGeneratorPtr rnd_gen_
Random number generator.
Definition: RTSimulation.h:158
FASTAFile.h
OpenMS::IDFilter::filterPeptidesByRTPredictPValue
static void filterPeptidesByRTPredictPValue(std::vector< PeptideIdentification > &peptides, const String &metavalue_key, double threshold=0.05)
Filters peptide identifications according to p-values from RTPredict.
OpenMS::SVMWrapper::SIGMA
Definition: SVMWrapper.h:104
OpenMS::IDFilter::keepBestPeptideHits
static void keepBestPeptideHits(std::vector< PeptideIdentification > &peptides, bool strict=false)
Filters peptide identifications keeping only the single best-scoring hit per ID.
OpenMS::Math::meanSquareError
static double meanSquareError(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the mean square error for the values in [begin_a, end_a) and [begin_b, end_b)
Definition: StatisticFunctions.h:395
ProteinIdentification.h
OpenMS::SVMData::labels
std::vector< double > labels
Definition: SVMWrapper.h:61
OpenMS::String::split
bool split(const char splitter, std::vector< String > &substrings, bool quote_protect=false) const
Splits a string into substrings using splitter as delimiter.
OpenMS::Param::exists
bool exists(const std::string &key) const
Tests if a parameter is set (expecting its fully qualified name, e.g., TextExporter:1:proteins_only)
OpenMS::RTSimulation::egh_tau_location_
double egh_tau_location_
EGH tau value.
Definition: RTSimulation.h:147
OpenMS::SVMData
Data structure used in SVMWrapper.
Definition: SVMWrapper.h:58
OpenMS::SVMWrapper::getPValue
double getPValue(double sigma1, double sigma2, std::pair< double, double > point)
calculates a p-value for a given data point using the model parameters
OpenMS::UInt
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
OpenMS::SVMWrapper::train
Int train(struct svm_problem *problem)
trains the svm
OpenMS::SVMWrapper::loadModel
void loadModel(std::string modelFilename)
loads the model
OpenMS::ParamValue::toString
std::string toString(bool full_precision=true) const
Convert ParamValue to string.
main
int main(int argc, const char **argv)
Definition: INIFileEditor.cpp:71
OpenMS::SVMWrapper
Serves as a wrapper for the libsvm.
Definition: SVMWrapper.h:83
OpenMS::SignedSize
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:134
OpenMS::StringUtils::toDouble
static double toDouble(const String &this_s)
Definition: StringUtils.h:216
OpenMS::FeatureMap
A container for features.
Definition: FeatureMap.h:98
OpenMS::PeptideIdentification
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:63
OpenMS::RTSimulation::rt_model_file_
OpenMS::String rt_model_file_
Definition: RTSimulation.h:131
OpenMS::FASTAFile::load
void load(const String &filename, std::vector< FASTAEntry > &data) const
loads a FASTA file given by 'filename' and stores the information in 'data' This uses more RAM than r...
OpenMS::SVMWrapper::predict
void predict(struct svm_problem *problem, std::vector< double > &predicted_labels)
predicts the labels using the trained model
OpenMS::RTSimulation::gradient_max_
SimTypes::SimCoordinateType gradient_max_
Maximal observed gradient time.
Definition: RTSimulation.h:141
OpenMS::SVMWrapper::getDoubleParameter
double getDoubleParameter(SVM_parameter_type type)
You can get the actual double- parameters of the svm.
OpenMS::SVMData::load
bool load(const String &filename)
OpenMS::SVMWrapper::BORDER_LENGTH
Definition: SVMWrapper.h:105
OpenMS::LibSVMEncoder::encodeProblemWithOligoBorderVectors
void encodeProblemWithOligoBorderVectors(const std::vector< AASequence > &sequences, UInt k_mer_length, const String &allowed_characters, UInt border_length, std::vector< std::vector< std::pair< Int, double > > > &vectors)
creates oligo border vectors vectors for 'sequences' and stores them in 'vectors'
OpenMS::SVMWrapper::DEGREE
the degree for the polynomial- kernel
Definition: SVMWrapper.h:98
OpenMS::StringUtils::trim
static String & trim(String &this_s)
Definition: StringUtilsSimple.h:229
OpenMS::Param
Management and storage of parameters / INI files.
Definition: Param.h:69
OpenMS::LibSVMEncoder::destroyProblem
static void destroyProblem(svm_problem *&problem, bool free_nodes=true)
frees all the memory of the svm_problem instance
IDFilter.h
OpenMS::ParamXMLFile::store
void store(const String &filename, const Param &param) const
Write XML file.
OpenMS::SVMWrapper::performCrossValidation
double performCrossValidation(svm_problem *problem_ul, const SVMData &problem_l, const bool is_labeled, const std::map< SVM_parameter_type, double > &start_values_map, const std::map< SVM_parameter_type, double > &step_sizes_map, const std::map< SVM_parameter_type, double > &end_values_map, Size number_of_partitions, Size number_of_runs, std::map< SVM_parameter_type, double > &best_parameters, bool additive_step_sizes=true, bool output=false, String performances_file_name="performances.txt", bool mcc_as_performance_measure=false)
Performs a CV for the data given by 'problem'.
OpenMS::AASequence::fromString
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
OpenMS::IDFilter::filterHitsByScore
static void filterHitsByScore(std::vector< IdentificationType > &ids, double threshold_score)
Filters peptide or protein identifications according to the score of the hits.
Definition: IDFilter.h:840
OPENMS_LOG_INFO
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:465
OpenMS::RTSimulation::gradient_min_
SimTypes::SimCoordinateType gradient_min_
gradient ranges
Definition: RTSimulation.h:139
OpenMS::IDFilter::removeDecoyHits
static void removeDecoyHits(std::vector< IdentificationType > &ids)
Removes hits annotated as decoys from peptide or protein identifications.
Definition: IDFilter.h:940
OpenMS::SVMWrapper::setTrainingSample
void setTrainingSample(svm_problem *training_sample)
This is used for being able to perform predictions with non libsvm standard kernels.
OpenMS::ParamXMLFile::load
void load(const String &filename, Param &param)
Read XML file.
OpenMS::PeptideIdentification::setMZ
void setMZ(double mz)
sets the MZ of the MS2 spectrum
OpenMS::Param::getValue
const ParamValue & getValue(const std::string &key) const
Returns a value of a parameter.
OpenMS::TextFile::ConstIterator
std::vector< String >::const_iterator ConstIterator
Non-mutable iterator.
Definition: TextFile.h:56
OpenMS::SVMWrapper::OLIGO
Definition: SVMWrapper.h:111
OpenMS::ProgressLogger::setLogType
void setLogType(LogType type) const
Sets the progress log that should be used. The default type is NONE!
TOPPBase.h
OpenMS::IdXMLFile
Used to load and store idXML files.
Definition: IdXMLFile.h:68
OpenMS::Math::pearsonCorrelationCoefficient
static double pearsonCorrelationCoefficient(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the Pearson correlation coefficient for the values in [begin_a, end_a) and [begin_b,...
Definition: StatisticFunctions.h:521
OpenMS::ParamValue::EMPTY
static const ParamValue EMPTY
Empty data value for comparisons.
Definition: ParamValue.h:60
OpenMS::PeptideHit
Representation of a peptide hit.
Definition: PeptideHit.h:55
StringListUtils.h