 |
OpenMS
3.0.0
|
|
Go to the documentation of this file.
94 bool isRTColumnOn()
const;
97 void wrapSVM(std::vector<AASequence>& peptide_sequences, std::vector<double>& predicted_retention_times);
106 void setDefaultParams_();
123 void getChargeContribution_(std::map<String, double>& q_cterm,
124 std::map<String, double>& q_nterm,
125 std::map<String, double>& q_aa_basic,
126 std::map<String, double>& q_aa_acidic);
161 void updateMembers_()
override;
SimTypes::SimCoordinateType total_gradient_time_
Total gradient time.
Definition: RTSimulation.h:134
the nu parameter for nu-SVR
Definition: SVMWrapper.h:100
the kernel type
Definition: SVMWrapper.h:97
Base class for TOPP applications.
Definition: TOPPBase.h:147
static Int toInt(const String &this_s)
Definition: StringUtils.h:206
SimTypes::SimCoordinateType rt_sampling_rate_
bin size in rt dimension
Definition: RTSimulation.h:144
const std::string & toString(const DriftTimeUnit value)
void setValue(const std::string &key, const ParamValue &value, const std::string &description="", const std::vector< std::string > &tags=std::vector< std::string >())
Sets a value.
void store(const String &filename, const std::vector< ProteinIdentification > &protein_ids, const std::vector< PeptideIdentification > &peptide_ids, const String &document_id="")
Stores the data in an idXML file.
the C parameter of the svm
Definition: SVMWrapper.h:99
const AASequence & getSequence() const
returns the peptide sequence without trailing or following spaces
std::vector< std::vector< std::pair< Int, double > > > sequences
Definition: SVMWrapper.h:60
the epsilon parameter for epsilon-SVR
Definition: SVMWrapper.h:101
Simulates/Predicts retention times for peptides or peptide separation.
Definition: RTSimulation.h:53
This class serves for reading in and writing FASTA files If the protein/gene sequence contains unusua...
Definition: FASTAFile.h:60
static FileTypes::Type getTypeByFileName(const String &filename)
Determines the file type from a file name.
A more convenient string class.
Definition: String.h:58
void setRT(double rt)
sets the RT of the MS2 spectrum where the identification occurred
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:70
This class provides some basic file handling methods for text files.
Definition: TextFile.h:46
void getSignificanceBorders(svm_problem *data, std::pair< double, double > &borders, double confidence=0.95, Size number_of_runs=5, Size number_of_partitions=5, double step_size=0.01, Size max_iterations=1000000)
calculates the significance borders of the error model and stores them in 'sigmas'
The file pendant of the Param class used to load and store the param datastructure as paramXML.
Definition: ParamXMLFile.h:49
void saveModel(std::string modelFilename) const
saves the svm model
boost::shared_ptr< SimRandomNumberGenerator > MutableSimRandomNumberGeneratorPtr
Definition: SimTypes.h:174
double egh_tau_scale_
EGH tau scale parameter of the lorentzian variation.
Definition: RTSimulation.h:149
String toUnmodifiedString() const
returns the peptide as string without any modifications or (e.g., "PEPTIDER")
double egh_variance_location_
EGH sigma value.
Definition: RTSimulation.h:152
Int getIntParameter(SVM_parameter_type type)
You can get the actual int- parameters of the svm.
Serves for encoding sequences into feature vectors.
Definition: LibSVMEncoder.h:58
void setParameter(SVM_parameter_type type, Int value)
You can set the parameters of the svm:
Representation of a protein identification run.
Definition: ProteinIdentification.h:70
bool toBool() const
Conversion to bool.
void setHits(const std::vector< PeptideHit > &hits)
Sets the peptide hits.
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:92
bool store(const String &filename) const
void load(const String &filename, std::vector< ProteinIdentification > &protein_ids, std::vector< PeptideIdentification > &peptide_ids)
Loads the identifications of an idXML file without identifier.
any text format, which has only loose definition of what it actually contains – thus it is usually ha...
Definition: FileTypes.h:94
double egh_variance_scale_
EGH sigma scale parameter of the lorentzian variation.
Definition: RTSimulation.h:154
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
the svm type can be NU_SVR or EPSILON_SVR
Definition: SVMWrapper.h:96
svm_problem * encodeLibSVMProblemWithCompositionAndLengthVectors(const std::vector< String > &sequences, std::vector< double > &labels, const String &allowed_characters, UInt maximum_sequence_length)
creates composition vectors with additional length information for 'sequences' and stores them in Lib...
SimTypes::MutableSimRandomNumberGeneratorPtr rnd_gen_
Random number generator.
Definition: RTSimulation.h:158
static void filterPeptidesByRTPredictPValue(std::vector< PeptideIdentification > &peptides, const String &metavalue_key, double threshold=0.05)
Filters peptide identifications according to p-values from RTPredict.
Definition: SVMWrapper.h:104
static void keepBestPeptideHits(std::vector< PeptideIdentification > &peptides, bool strict=false)
Filters peptide identifications keeping only the single best-scoring hit per ID.
static double meanSquareError(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the mean square error for the values in [begin_a, end_a) and [begin_b, end_b)
Definition: StatisticFunctions.h:395
std::vector< double > labels
Definition: SVMWrapper.h:61
bool split(const char splitter, std::vector< String > &substrings, bool quote_protect=false) const
Splits a string into substrings using splitter as delimiter.
bool exists(const std::string &key) const
Tests if a parameter is set (expecting its fully qualified name, e.g., TextExporter:1:proteins_only)
double egh_tau_location_
EGH tau value.
Definition: RTSimulation.h:147
Data structure used in SVMWrapper.
Definition: SVMWrapper.h:58
double getPValue(double sigma1, double sigma2, std::pair< double, double > point)
calculates a p-value for a given data point using the model parameters
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
Int train(struct svm_problem *problem)
trains the svm
void loadModel(std::string modelFilename)
loads the model
std::string toString(bool full_precision=true) const
Convert ParamValue to string.
int main(int argc, const char **argv)
Definition: INIFileEditor.cpp:71
Serves as a wrapper for the libsvm.
Definition: SVMWrapper.h:83
ptrdiff_t SignedSize
Signed Size type e.g. used as pointer difference.
Definition: Types.h:134
static double toDouble(const String &this_s)
Definition: StringUtils.h:216
A container for features.
Definition: FeatureMap.h:98
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:63
OpenMS::String rt_model_file_
Definition: RTSimulation.h:131
void load(const String &filename, std::vector< FASTAEntry > &data) const
loads a FASTA file given by 'filename' and stores the information in 'data' This uses more RAM than r...
void predict(struct svm_problem *problem, std::vector< double > &predicted_labels)
predicts the labels using the trained model
SimTypes::SimCoordinateType gradient_max_
Maximal observed gradient time.
Definition: RTSimulation.h:141
double getDoubleParameter(SVM_parameter_type type)
You can get the actual double- parameters of the svm.
bool load(const String &filename)
Definition: SVMWrapper.h:105
void encodeProblemWithOligoBorderVectors(const std::vector< AASequence > &sequences, UInt k_mer_length, const String &allowed_characters, UInt border_length, std::vector< std::vector< std::pair< Int, double > > > &vectors)
creates oligo border vectors vectors for 'sequences' and stores them in 'vectors'
the degree for the polynomial- kernel
Definition: SVMWrapper.h:98
static String & trim(String &this_s)
Definition: StringUtilsSimple.h:229
Management and storage of parameters / INI files.
Definition: Param.h:69
static void destroyProblem(svm_problem *&problem, bool free_nodes=true)
frees all the memory of the svm_problem instance
void store(const String &filename, const Param ¶m) const
Write XML file.
double performCrossValidation(svm_problem *problem_ul, const SVMData &problem_l, const bool is_labeled, const std::map< SVM_parameter_type, double > &start_values_map, const std::map< SVM_parameter_type, double > &step_sizes_map, const std::map< SVM_parameter_type, double > &end_values_map, Size number_of_partitions, Size number_of_runs, std::map< SVM_parameter_type, double > &best_parameters, bool additive_step_sizes=true, bool output=false, String performances_file_name="performances.txt", bool mcc_as_performance_measure=false)
Performs a CV for the data given by 'problem'.
static AASequence fromString(const String &s, bool permissive=true)
create AASequence object by parsing an OpenMS string
static void filterHitsByScore(std::vector< IdentificationType > &ids, double threshold_score)
Filters peptide or protein identifications according to the score of the hits.
Definition: IDFilter.h:840
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:465
SimTypes::SimCoordinateType gradient_min_
gradient ranges
Definition: RTSimulation.h:139
static void removeDecoyHits(std::vector< IdentificationType > &ids)
Removes hits annotated as decoys from peptide or protein identifications.
Definition: IDFilter.h:940
void setTrainingSample(svm_problem *training_sample)
This is used for being able to perform predictions with non libsvm standard kernels.
void load(const String &filename, Param ¶m)
Read XML file.
void setMZ(double mz)
sets the MZ of the MS2 spectrum
const ParamValue & getValue(const std::string &key) const
Returns a value of a parameter.
std::vector< String >::const_iterator ConstIterator
Non-mutable iterator.
Definition: TextFile.h:56
Definition: SVMWrapper.h:111
void setLogType(LogType type) const
Sets the progress log that should be used. The default type is NONE!
Used to load and store idXML files.
Definition: IdXMLFile.h:68
static double pearsonCorrelationCoefficient(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the Pearson correlation coefficient for the values in [begin_a, end_a) and [begin_b,...
Definition: StatisticFunctions.h:521
static const ParamValue EMPTY
Empty data value for comparisons.
Definition: ParamValue.h:60
Representation of a peptide hit.
Definition: PeptideHit.h:55