OpenMS  3.0.0
PercolatorFeatureSetHelper.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2022.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Mathias Walzer $
32 // $Authors: Mathias Walzer, Matthew The $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
37 #include <vector>
38 #include <cmath>
39 #include <string>
40 #include <map>
41 // #include <algorithm>
42 #include <limits>
43 
44 #include <OpenMS/CONCEPT/Types.h>
50 
51 namespace OpenMS
52 {
63  class OPENMS_DLLAPI PercolatorFeatureSetHelper
64  {
65 
66  public:
75  static void concatMULTISEPeptideIds(std::vector<PeptideIdentification>& all_peptide_ids, std::vector<PeptideIdentification>& new_peptide_ids, String search_engine);
76 
85  static void mergeMULTISEPeptideIds(std::vector<PeptideIdentification>& all_peptide_ids, std::vector<PeptideIdentification>& new_peptide_ids, String search_engine);
86 
94  static void mergeMULTISEProteinIds(std::vector<ProteinIdentification>& all_protein_ids, std::vector<ProteinIdentification>& new_protein_ids);
95 
96 
104  static void addMSGFFeatures(std::vector<PeptideIdentification>& peptide_ids, StringList& feature_set);
105 
113  static void addXTANDEMFeatures(std::vector<PeptideIdentification>& peptide_ids, StringList& feature_set);
114 
122  static void addCOMETFeatures(std::vector<PeptideIdentification>& peptide_ids, StringList& feature_set);
123 
131  static void addMASCOTFeatures(std::vector<PeptideIdentification>& peptide_ids, StringList& feature_set);
132 
143  static void addMULTISEFeatures(std::vector<PeptideIdentification>& peptide_ids, StringList& search_engines_used, StringList& feature_set, bool complete_only = true, bool limits_imputation = false);
144 
153  static void addCONCATSEFeatures(std::vector<PeptideIdentification>& peptide_id_list, StringList& search_engines_used, StringList& feature_set);
154 
162  static void checkExtraFeatures(const std::vector<PeptideHit> &psms, StringList& extra_features);
163 
170  static void addMSFRAGGERFeatures(StringList& extra_features);
171 
172 
173  protected:
175  static double rescaleFragmentFeature_(double featureValue, int NumMatchedMainIons);
176 
178  static void assignDeltaScore_(std::vector<PeptideHit>& hits, String score_ref, String output_ref);
179 
181  static String getScanMergeKey_(std::vector<PeptideIdentification>::iterator it, std::vector<PeptideIdentification>::iterator start);
182 
185  {
186  inline bool operator() (const ProteinHit& h1, const ProteinHit& h2)
187  {
188  return (h1.getAccession() < h2.getAccession());
189  }
190  };
191 
194  {
195  inline bool operator() (const PeptideEvidence& h1, const PeptideEvidence& h2)
196  {
197  return (h1.getProteinAccession() < h2.getProteinAccession());
198  }
199  };
200 
201  };
202 
203 } //namespace OpenMS
204 
205 
OpenMS::ProteinIdentification::SearchParameters::digestion_enzyme
Protease digestion_enzyme
The cleavage site information in details (from ProteaseDB)
Definition: ProteinIdentification.h:273
OpenMS::PercolatorInfile::getScanIdentifier
static String getScanIdentifier(const PeptideIdentification &pid, size_t index)
OpenMS::FileTypes::IDXML
OpenMS identification format (.idXML)
Definition: FileTypes.h:64
OpenMS::TOPPBase
Base class for TOPP applications.
Definition: TOPPBase.h:147
OpenMS::MzIdentMLFile::store
void store(const String &filename, const std::vector< ProteinIdentification > &poid, const std::vector< PeptideIdentification > &peid) const
Stores the identifications in a MzIdentML file.
FileHandler.h
FileTypes.h
Size
OpenMS::OSWFile::OSWLevel
OSWLevel
for Percolator data read/write operations
Definition: OSWFile.h:102
OpenMS::ProteinIdentification::getIndistinguishableProteins
const std::vector< ProteinGroup > & getIndistinguishableProteins() const
Returns the indistinguishable proteins.
OpenMS::IdXMLFile::store
void store(const String &filename, const std::vector< ProteinIdentification > &protein_ids, const std::vector< PeptideIdentification > &peptide_ids, const String &document_id="")
Stores the data in an idXML file.
OpenMS::PercolatorFeatureSetHelper::lq_PeptideEvidence
For accession dependent sorting of PeptideEvidences.
Definition: PercolatorFeatureSetHelper.h:193
Types.h
OpenMS::PercolatorFeatureSetHelper
Percolator feature set and integration helper.
Definition: PercolatorFeatureSetHelper.h:63
OpenMS::PercolatorFeatureSetHelper::mergeMULTISEProteinIds
static void mergeMULTISEProteinIds(std::vector< ProteinIdentification > &all_protein_ids, std::vector< ProteinIdentification > &new_protein_ids)
mergeMULTISEProteinIds
OpenMS::Exception::InvalidValue
Invalid value exception.
Definition: Exception.h:327
OpenMS::FileHandler::getTypeByFileName
static FileTypes::Type getTypeByFileName(const String &filename)
Determines the file type from a file name.
OpenMS::MetaInfoInterface::getMetaValue
const DataValue & getMetaValue(const String &name, const DataValue &default_value=DataValue::EMPTY) const
Returns the value corresponding to a string, or a default value (default: DataValue::EMPTY) if not fo...
OpenMS::String
A more convenient string class.
Definition: String.h:58
OpenMS::ProteinIdentification::ProteinGroup::accessions
std::vector< String > accessions
Accessions of (indistinguishable) proteins that belong to the same group.
Definition: ProteinIdentification.h:134
KDTree::operator!=
bool operator!=(_Iterator< _Val, _Ref, _Ptr > const &, _Iterator< _Val, _Ref, _Ptr > const &)
Definition: KDTree.h:824
OpenMS::OSWFile::readToPIN
static void readToPIN(const std::string &filename, const OSWFile::OSWLevel osw_level, std::ostream &pin_output, const double ipf_max_peakgroup_pep, const double ipf_max_transition_isotope_overlap, const double ipf_min_transition_sn)
Reads an OSW SQLite file and returns the data on MS1-, MS2- or transition-level as ostream (e....
Int
OSWFile.h
IdXMLFile.h
OpenMS::ProteinHit
Representation of a protein hit.
Definition: ProteinHit.h:58
OpenMS::DataValue::toString
String toString(bool full_precision=true) const
Conversion to String full_precision Controls number of fractional digits for all double types or list...
OpenMS::Constants::c
const double c
Definition: Constants.h:209
OpenMS::IDFilter::updateProteinGroups
static bool updateProteinGroups(std::vector< ProteinIdentification::ProteinGroup > &groups, const std::vector< ProteinHit > &hits)
Update protein groups after protein hits were filtered.
EnumHelpers.h
OpenMS::FileTypes::MZIDENTML
mzIdentML (HUPO PSI AnalysisXML followup format) (.mzid)
Definition: FileTypes.h:75
OpenMS::File::getUniqueName
static String getUniqueName(bool include_hostname=true)
Returns a string, consisting of date, time, hostname, process id, and a incrementing number....
OpenMS::Helpers::indexOf
Size indexOf(const ContainerType &cont, const typename ContainerType::value_type &val)
Definition: EnumHelpers.h:45
OPENMS_PRECONDITION
#define OPENMS_PRECONDITION(condition, message)
Precondition macro.
Definition: openms/include/OpenMS/CONCEPT/Macros.h:120
OpenMS::FileTypes::UNKNOWN
Unknown file extension.
Definition: FileTypes.h:58
OPENMS_LOG_WARN
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:460
OpenMS::ProteinIdentification
Representation of a protein identification run.
Definition: ProteinIdentification.h:70
Constants.h
OpenMS::OSWFile::names_of_oswlevel
static const std::array< std::string,(Size) OSWLevel::SIZE_OF_OSWLEVEL > names_of_oswlevel
Definition: OSWFile.h:109
OpenMS::IdXMLFile::load
void load(const String &filename, std::vector< ProteinIdentification > &protein_ids, std::vector< PeptideIdentification > &peptide_ids)
Loads the identifications of an idXML file without identifier.
OpenMS::ListUtils::concatenate
static String concatenate(const std::vector< T > &container, const String &glue="")
Concatenates all elements of the container and puts the glue string between elements.
Definition: ListUtils.h:209
OpenMS::ProteinIdentification::SearchParameters::charges
String charges
The allowed charges for the search.
Definition: ProteinIdentification.h:264
OpenMS::MzIdentMLFile
File adapter for MzIdentML files.
Definition: MzIdentMLFile.h:67
OpenMS::ProteinHit::getAccession
const String & getAccession() const
returns the accession of the protein
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
PercolatorInfile.h
OpenMS::StringUtils::remove
static String & remove(String &this_s, char what)
Definition: StringUtilsSimple.h:595
OpenMS::ProteinIdentification::SearchParameters::variable_modifications
std::vector< String > variable_modifications
Allowed variable modifications.
Definition: ProteinIdentification.h:267
OpenMS::MetaInfoInterface::setMetaValue
void setMetaValue(const String &name, const DataValue &value)
Sets the DataValue corresponding to a name.
ProteinIdentification.h
OpenMS::FileTypes::Type
Type
Actual file types enum.
Definition: FileTypes.h:56
OpenMS::PeptideEvidence
Representation of a peptide evidence.
Definition: PeptideEvidence.h:50
OPENMS_LOG_DEBUG
#define OPENMS_LOG_DEBUG
Macro for general debugging information.
Definition: LogStream.h:470
OpenMS::FileHandler::getType
static FileTypes::Type getType(const String &filename)
Tries to determine the file type (by name or content)
OpenMS::String::toQString
QString toQString() const
Conversion to Qt QString.
CsvFile.h
OpenMS::ProteinIdentification::SearchParameters::fragment_mass_tolerance
double fragment_mass_tolerance
Mass tolerance of fragment ions (Dalton or ppm)
Definition: ProteinIdentification.h:269
OpenMS::PercolatorInfile::store
static void store(const String &pin_file, const std::vector< PeptideIdentification > &peptide_ids, const StringList &feature_set, const std::string &enz, int min_charge, int max_charge)
OpenMS::StringList
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:70
OpenMS::ProteinIdentification::SearchParameters::fixed_modifications
std::vector< String > fixed_modifications
Used fixed modifications.
Definition: ProteinIdentification.h:266
OpenMS::FileTypes::OSW
OpenSWATH OpenSWATH report (OSW) SQLite DB.
Definition: FileTypes.h:104
OpenMS::MzIdentMLFile::load
void load(const String &filename, std::vector< ProteinIdentification > &poid, std::vector< PeptideIdentification > &peid)
Loads the identifications from a MzIdentML file.
OpenMS::CsvFile
This class handles csv files. Currently only loading is implemented.
Definition: CsvFile.h:49
main
int main(int argc, const char **argv)
Definition: INIFileEditor.cpp:71
OpenMS::ProteinIdentification::SearchParameters::precursor_mass_tolerance
double precursor_mass_tolerance
Mass tolerance of precursor ions (Dalton or ppm)
Definition: ProteinIdentification.h:271
PercolatorFeatureSetHelper.h
OpenMS::StringUtils::toDouble
static double toDouble(const String &this_s)
Definition: StringUtils.h:216
OpenMS::File::TempDir
Class representing a temporary directory.
Definition: File.h:64
OpenMS::PeptideIdentification
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:63
OpenMS::Internal::operator==
bool operator==(const IDBoostGraph::ProteinGroup &lhs, const IDBoostGraph::ProteinGroup &rhs)
OpenMS::MetaInfoInterface::metaValueExists
bool metaValueExists(const String &name) const
Returns whether an entry with the given name exists.
OpenMS::PercolatorFeatureSetHelper::lq_ProteinHit
For accession dependent sorting of ProteinHits.
Definition: PercolatorFeatureSetHelper.h:184
DataValue.h
OpenMS::TOPPBase::ExitCodes
ExitCodes
Exit codes.
Definition: TOPPBase.h:152
OpenMS::FileTypes::nameToType
static Type nameToType(const String &name)
IDFilter.h
OpenMS::PeptideEvidence::getProteinAccession
const String & getProteinAccession() const
get the protein accession the peptide matches to. If not available the empty string is returned.
OPENMS_LOG_INFO
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:465
OpenMS::StringUtils::substr
static String substr(const String &this_s, size_t pos, size_t n)
Definition: StringUtilsSimple.h:213
OpenMS::Exception::ElementNotFound
Element could not be found exception.
Definition: Exception.h:674
OpenMS::ProteinIdentification::ProteinGroup::probability
double probability
Probability of this group.
Definition: ProteinIdentification.h:131
PeptideIdentification.h
OpenMS::ProteinIdentification::SearchParameters
Search parameters of the DB search.
Definition: ProteinIdentification.h:258
StandardTypes.h
OpenMS::OSWFile::writeFromPercolator
static void writeFromPercolator(const std::string &osw_filename, const OSWFile::OSWLevel osw_level, const std::map< std::string, PercolatorFeature > &features)
Updates an OpenSWATH OSW SQLite file with the MS1-, MS2- or transition-level results of Percolator.
File.h
MzIdentMLFile.h
TOPPBase.h
OpenMS::IdXMLFile
Used to load and store idXML files.
Definition: IdXMLFile.h:68
OpenMS::ProteinIdentification::ProteinGroup
Bundles multiple (e.g. indistinguishable) proteins in a group.
Definition: ProteinIdentification.h:117
OpenMS::PeptideHit
Representation of a peptide hit.
Definition: PeptideHit.h:55