OpenMS  3.0.0
IDRipper.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2022.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg$
32 // $Authors: Immanuel Luhn, Leon Kuchenbecker$
33 // --------------------------------------------------------------------------
34 #pragma once
35 
39 
40 
41 namespace OpenMS
42 {
52  class OPENMS_DLLAPI IDRipper :
53  public DefaultParamHandler
54  {
55 public:
57  enum OriginAnnotationFormat { FILE_ORIGIN = 0, MAP_INDEX = 1, ID_MERGE_INDEX = 2, UNKNOWN_OAF = 3, SIZE_OF_ORIGIN_ANNOTATION_FORMAT = 4 };
58 
60  static const std::array<std::string, SIZE_OF_ORIGIN_ANNOTATION_FORMAT> names_of_OriginAnnotationFormat;
61 
63  struct OPENMS_DLLAPI IdentificationRuns
64  {
66  std::map<String, UInt> index_map;
68  std::vector<StringList> spectra_data;
69 
71  IdentificationRuns(const std::vector<ProteinIdentification>& prot_ids);
72  };
73 
75  struct OPENMS_DLLAPI RipFileIdentifier
76  {
85 
87  RipFileIdentifier(const IDRipper::IdentificationRuns& id_runs, const PeptideIdentification& pep_id, const std::map<String, UInt>& file_origin_map, const IDRipper::OriginAnnotationFormat origin_annotation_fmt, bool split_ident_runs);
88 
90  UInt getIdentRunIdx();
91 
93  UInt getFileOriginIdx();
94 
96  const String & getOriginFullname();
97 
99  const String & getOutputBasename();
100  };
101 
104  {
105  bool operator()(const RipFileIdentifier& left, const RipFileIdentifier& right) const;
106  };
107 
109  struct OPENMS_DLLAPI RipFileContent
110  {
112  std::vector<ProteinIdentification> prot_idents;
114  std::vector<PeptideIdentification> pep_idents;
116  RipFileContent(const std::vector<ProteinIdentification>& prot_idents, const std::vector<PeptideIdentification>& pep_idents)
117  : prot_idents(prot_idents), pep_idents(pep_idents) {}
119  const std::vector<ProteinIdentification> & getProteinIdentifications();
121  const std::vector<PeptideIdentification> & getPeptideIdentifications();
122  };
123 
125  typedef std::map<RipFileIdentifier, RipFileContent, RipFileIdentifierIdxComparator> RipFileMap;
126 
128  IDRipper();
129 
131  ~IDRipper() override;
132 
145  void rip(
146  RipFileMap& ripped,
147  std::vector<ProteinIdentification>& proteins,
148  std::vector<PeptideIdentification>& peptides,
149  bool numeric_filenames,
150  bool split_ident_runs);
151 
164  // Autowrap compatible wrapper for rip(RipFileMap,...)
165  void rip(
166  std::vector<RipFileIdentifier> & rfis,
167  std::vector<RipFileContent> & rfcs,
168  std::vector<ProteinIdentification>& proteins,
169  std::vector<PeptideIdentification>& peptides,
170  bool numeric_filenames,
171  bool split_ident_runs);
172 
173 private:
174  // Not implemented
176  IDRipper(const IDRipper & rhs);
177 
178  // Not implemented
180  IDRipper & operator=(const IDRipper & rhs);
181 
183  OriginAnnotationFormat detectOriginAnnotationFormat_(std::map<String, UInt> & file_origin_map, const std::vector<PeptideIdentification> & peptide_idents);
185  void getProteinHits_(std::vector<ProteinHit> & result, const std::vector<ProteinHit> & protein_hits, const std::vector<String> & protein_accessions);
187  void getProteinAccessions_(std::vector<String> & result, const std::vector<PeptideHit> & peptide_hits);
189  void getProteinIdentification_(ProteinIdentification & result, PeptideIdentification pep_ident, std::vector<ProteinIdentification> & prot_idents);
191  bool registerBasename_(std::map<String, std::pair<UInt, UInt> >& basename_to_numeric, const IDRipper::RipFileIdentifier& rfi);
193  bool setOriginAnnotationMode_(short& mode, short const new_value);
194  };
195 
196 } // namespace OpenMS
LogStream.h
DefaultParamHandler.h
OpenMS::Math::PosteriorErrorProbabilityModel::extractAndTransformScores
static std::map< String, std::vector< std::vector< double > > > extractAndTransformScores(const std::vector< ProteinIdentification > &protein_ids, const std::vector< PeptideIdentification > &peptide_ids, const bool split_charge, const bool top_hits_only, const bool target_decoy_available, const double fdr_for_targets_smaller)
extract and transform score types to a range and score orientation that the PEP model can handle
OpenMS::TOPPBase
Base class for TOPP applications.
Definition: TOPPBase.h:147
OpenMS::StringUtils::toInt
static Int toInt(const String &this_s)
Definition: StringUtils.h:206
OpenMS::IDRipper::RipFileIdentifierIdxComparator
Provides a 'less' operation for RipFileIdentifiers that ignores the out_basename and origin_fullname ...
Definition: IDRipper.h:103
OpenMS::IDRipper::RipFileMap
std::map< RipFileIdentifier, RipFileContent, RipFileIdentifierIdxComparator > RipFileMap
Represents the result of an IDRipper process, a map assigning file content to output file identifiers...
Definition: IDRipper.h:125
IDRipper.h
OpenMS::Param::setValue
void setValue(const std::string &key, const ParamValue &value, const std::string &description="", const std::vector< std::string > &tags=std::vector< std::string >())
Sets a value.
OpenMS::IdXMLFile::store
void store(const String &filename, const std::vector< ProteinIdentification > &protein_ids, const std::vector< PeptideIdentification > &peptide_ids, const String &document_id="")
Stores the data in an idXML file.
OpenMS::IDRipper::OriginAnnotationFormat
OriginAnnotationFormat
Possible input file encodings for the origin as used by different versions of IDMerger.
Definition: IDRipper.h:57
OpenMS::IDRipper::RipFileIdentifier
Identifies an IDRipper output file.
Definition: IDRipper.h:75
OpenMS::String
A more convenient string class.
Definition: String.h:58
OpenMS::Exception::Precondition
Precondition failed exception.
Definition: Exception.h:157
Int
IdXMLFile.h
OpenMS::IDRipper::RipFileContent::RipFileContent
RipFileContent(const std::vector< ProteinIdentification > &prot_idents, const std::vector< PeptideIdentification > &pep_idents)
Constructs a new RipFileContent object.
Definition: IDRipper.h:116
OpenMS::IDRipper::RipFileIdentifier::file_origin_idx
UInt file_origin_idx
The numerical index of the source file_origin / spectra_data element.
Definition: IDRipper.h:80
OpenMS::ProteinIdentification
Representation of a protein identification run.
Definition: ProteinIdentification.h:70
OpenMS::DefaultParamHandler
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:92
OpenMS::IdXMLFile::load
void load(const String &filename, std::vector< ProteinIdentification > &protein_ids, std::vector< PeptideIdentification > &peptide_ids)
Loads the identifications of an idXML file without identifier.
OpenMS::IDRipper::IdentificationRuns::index_map
std::map< String, UInt > index_map
Maps a unique index to every IdentificationRun string representation (getIdentifier()).
Definition: IDRipper.h:66
OpenMS::Exception::InvalidParameter
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:339
OpenMS::IDRipper::RipFileIdentifier::out_basename
String out_basename
The output basename derived from the file_origin / spectra_data element.
Definition: IDRipper.h:82
OpenMS::IDRipper
Ripping protein/peptide identification according their file origin.
Definition: IDRipper.h:52
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
OpenMS::IDRipper::RipFileIdentifier::ident_run_idx
UInt ident_run_idx
The numerical index of the source IdentificationRun.
Definition: IDRipper.h:78
ProteinIdentification.h
OpenMS::Math::PosteriorErrorProbabilityModel::updateScores
static void updateScores(const PosteriorErrorProbabilityModel &PEP_model, const String &search_engine, const Int charge, const bool prob_correct, const bool split_charge, std::vector< ProteinIdentification > &protein_ids, std::vector< PeptideIdentification > &peptide_ids, bool &unable_to_fit_data, bool &data_might_not_be_well_fit)
update score entries with PEP (or 1-PEP) estimates
OpenMS::IDRipper::names_of_OriginAnnotationFormat
static const std::array< std::string, SIZE_OF_ORIGIN_ANNOTATION_FORMAT > names_of_OriginAnnotationFormat
String representations for the OriginAnnotationFormat enum.
Definition: IDRipper.h:60
OpenMS::IDRipper::IdentificationRuns
Represents a set of IdentificationRuns.
Definition: IDRipper.h:63
OpenMS::Param::exists
bool exists(const std::string &key) const
Tests if a parameter is set (expecting its fully qualified name, e.g., TextExporter:1:proteins_only)
OpenMS::DefaultParamHandler::setParameters
void setParameters(const Param &param)
Sets the parameters.
OpenMS::String::toQString
QString toQString() const
Conversion to Qt QString.
OpenMS::DefaultParamHandler::getParameters
const Param & getParameters() const
Non-mutable access to the parameters.
OpenMS::Math::PosteriorErrorProbabilityModel::fit
bool fit(std::vector< double > &search_engine_scores, const String &outlier_handling)
fits the distributions to the data points(search_engine_scores). Estimated parameters for the distrib...
OpenMS::IDRipper::rip
void rip(RipFileMap &ripped, std::vector< ProteinIdentification > &proteins, std::vector< PeptideIdentification > &peptides, bool numeric_filenames, bool split_ident_runs)
Ripping protein/peptide identification according their file origin.
OpenMS::UInt
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
OpenMS::IDRipper::RipFileContent::prot_idents
std::vector< ProteinIdentification > prot_idents
Protein identifications.
Definition: IDRipper.h:112
OpenMS::ParamValue::toString
std::string toString(bool full_precision=true) const
Convert ParamValue to string.
main
int main(int argc, const char **argv)
Definition: INIFileEditor.cpp:71
OpenMS::Math::PosteriorErrorProbabilityModel::plotTargetDecoyEstimation
void plotTargetDecoyEstimation(std::vector< double > &target, std::vector< double > &decoy)
plots the estimated distribution against target and decoy hits
OpenMS::PeptideIdentification
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:63
OpenMS::IDRipper::RipFileIdentifier::origin_fullname
String origin_fullname
The full length origin read from the file_origin / spectra_data element.
Definition: IDRipper.h:84
OpenMS::Param::copy
Param copy(const std::string &prefix, bool remove_prefix=false) const
Returns a new Param object containing all entries that start with prefix.
OpenMS::IDRipper::RipFileContent::pep_idents
std::vector< PeptideIdentification > pep_idents
Peptide identifications.
Definition: IDRipper.h:114
OpenMS::IDRipper::RipFileContent
Represents the content of an IDRipper output file.
Definition: IDRipper.h:109
OpenMS::StringUtils::trim
static String & trim(String &this_s)
Definition: StringUtilsSimple.h:229
OpenMS::Param
Management and storage of parameters / INI files.
Definition: Param.h:69
OPENMS_LOG_INFO
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:465
PeptideIdentification.h
OpenMS::Param::getValue
const ParamValue & getValue(const std::string &key) const
Returns a value of a parameter.
OpenMS::Math::PosteriorErrorProbabilityModel
Implements a mixture model of the inverse gumbel and the gauss distribution or a gaussian mixture.
Definition: PosteriorErrorProbabilityModel.h:74
OpenMS::Param::remove
void remove(const std::string &key)
Remove the entry key or a section key (when suffix is ':')
PosteriorErrorProbabilityModel.h
File.h
TOPPBase.h
OpenMS::IDRipper::IdentificationRuns::spectra_data
std::vector< StringList > spectra_data
Maps the list of spectra data elements to every IdentificationRun index.
Definition: IDRipper.h:68
OpenMS::IdXMLFile
Used to load and store idXML files.
Definition: IdXMLFile.h:68