OpenMS  3.0.0
PepXMLFile.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2022.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Chris Bielow, Hendrik Weisser $
32 // $Authors: Chris Bielow, Hendrik Weisser $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
41 #include <OpenMS/FORMAT/XMLFile.h>
45 
46 #include <vector>
47 #include <map>
48 #include <set>
49 
50 
51 namespace OpenMS
52 {
63  class OPENMS_DLLAPI PepXMLFile :
64  protected Internal::XMLHandler,
65  public Internal::XMLFile
66  {
67 public:
68 
70  PepXMLFile();
71 
73  ~PepXMLFile() override;
74 
87  void load(const String& filename,
88  std::vector<ProteinIdentification>& proteins,
89  std::vector<PeptideIdentification>& peptides,
90  const String& experiment_name,
91  const SpectrumMetaDataLookup& lookup);
92 
99  void load(const String& filename,
100  std::vector<ProteinIdentification>& proteins,
101  std::vector<PeptideIdentification>& peptides,
102  const String& experiment_name = "");
103 
109  void store(const String& filename, std::vector<ProteinIdentification>& protein_ids,
110  std::vector<PeptideIdentification>& peptide_ids, const String& mz_file = "",
111  const String& mz_name = "", bool peptideprophet_analyzed = false, double rt_tolerance = 0.01);
112 
120  void keepNativeSpectrumName(bool keep)
121  {
122  keep_native_name_ = keep;
123  }
124 
126  void setPreferredFixedModifications(const std::vector<const ResidueModification*>& mods);
127 
129  void setPreferredVariableModifications(const std::vector<const ResidueModification*>& mods);
130 
132  void setParseUnknownScores(bool parse_unknown_scores);
133 
134 protected:
135 
137  void endElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname) override;
138 
140  void startElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname, const xercesc::Attributes& attributes) override;
141 
142 private:
143 
145  void makeScanMap_();
146 
148  void readRTMZCharge_(const xercesc::Attributes& attributes);
149 
151  {
152  private:
153 
155  double massdiff_;
156  double mass_;
160  bool is_protein_terminus_; // "true" if protein terminus, "false" if peptide terminus
162  std::vector<String> errors_;
164 
165  const ResidueModification* lookupModInPreferredMods_(const std::vector<const ResidueModification*>& preferred_fixed_mods,
166  const String& aminoacid,
167  double massdiff,
168  const String& description,
169  const ResidueModification::TermSpecificity term_spec,
170  double tolerance);
171 
172  public:
173  AminoAcidModification() = delete;
174 
180  const String& aminoacid, const String& massdiff, const String& mass,
181  String variable, const String& description, String terminus, const String& protein_terminus,
182  const std::vector<const ResidueModification*>& preferred_fixed_mods,
183  const std::vector<const ResidueModification*>& preferred_var_mods,
184  double tolerance);
185 
186  AminoAcidModification(const AminoAcidModification& rhs) = default;
187 
188  virtual ~AminoAcidModification() = default;
189 
190  AminoAcidModification& operator=(const AminoAcidModification& rhs) = default;
191 
192  String toUnimodLikeString() const;
193 
194  const String& getDescription() const;
195 
196  bool isVariable() const;
197 
198  const ResidueModification* getRegisteredMod() const;
199 
200  double getMassDiff() const;
201 
202  double getMass() const;
203 
204  const String& getTerminus() const;
205 
206  const String& getAminoAcid() const;
207 
208  const std::vector<String>& getErrors() const;
209  };
210 
212  std::vector<ProteinIdentification>* proteins_;
213 
215  std::vector<PeptideIdentification>* peptides_;
216 
219 
222 
225 
231 
233  bool use_precursor_data_{};
234 
236  std::map<Size, Size> scan_map_;
237 
240 
243 
246 
249 
251  bool search_summary_{};
252 
254  bool wrong_experiment_{};
255 
257  bool seen_experiment_{};
258 
260  bool checked_base_name_{};
261 
263  bool has_decoys_{};
264 
266  bool parse_unknown_scores_{};
267 
270 
273 
275  std::vector<std::vector<ProteinIdentification>::iterator> current_proteins_;
276 
279 
283 
286 
289 
292 
295 
297  double rt_{}, mz_{};
298 
300  Size scannr_{};
301 
303  Int charge_{};
304 
306  UInt search_id_{};
307 
310 
313 
315  double hydrogen_mass_{};
316 
318  std::vector<std::pair<const ResidueModification*, Size> > current_modifications_;
319 
321  std::vector<AminoAcidModification> fixed_modifications_;
322 
324  std::vector<AminoAcidModification> variable_modifications_;
325 
328  std::vector<const ResidueModification*> preferred_fixed_modifications_;
329 
332  std::vector<const ResidueModification*> preferred_variable_modifications_;
333 
335 
336  static const double mod_tol_;
337  static const double xtandem_artificial_mod_tol_;
338 
341  bool lookupAddFromHeader_(double modification_mass,
342  Size modification_position,
343  std::vector<AminoAcidModification> const& header_mods);
344 
345  //static std::vector<int> getIsotopeErrorsFromIntSetting_(int intSetting);
346  };
347 } // namespace OpenMS
OpenMS::PepXMLFile::native_spectrum_name_
String native_spectrum_name_
Several optional attributes of spectrum_query.
Definition: PepXMLFile.h:227
OpenMS::PepXMLFile::swath_assay_
String swath_assay_
Definition: PepXMLFile.h:229
XMLFile.h
OpenMS::PepXMLFile::AminoAcidModification::registered_mod_
const ResidueModification * registered_mod_
Definition: PepXMLFile.h:163
OpenMS::PepXMLFile::analysis_summary_
bool analysis_summary_
Are we currently in an "analysis_summary" element (should be skipped)?
Definition: PepXMLFile.h:242
OpenMS::ResidueModification
Representation of a modification on an amino acid residue.
Definition: ResidueModification.h:78
OpenMS::PepXMLFile::prot_id_
String prot_id_
Identifier linking PeptideIdentifications and ProteinIdentifications.
Definition: PepXMLFile.h:309
OpenMS::Internal::XMLHandler
Base class for XML handlers.
Definition: XMLHandler.h:323
OpenMS::PepXMLFile::enzyme_cuttingsite_
String enzyme_cuttingsite_
Definition: PepXMLFile.h:282
OpenMS::Element
Representation of an element.
Definition: Element.h:57
OpenMS::PepXMLFile::AminoAcidModification::is_variable_
bool is_variable_
Definition: PepXMLFile.h:157
OpenMS::PeptideHit::PepXMLAnalysisResult
Analysis Result (containing search engine / prophet results)
Definition: PeptideHit.h:175
OpenMS::PepXMLFile::scan_map_
std::map< Size, Size > scan_map_
Mapping between scan number in the pepXML file and index in the corresponding MSExperiment.
Definition: PepXMLFile.h:236
OpenMS::PepXMLFile::current_proteins_
std::vector< std::vector< ProteinIdentification >::iterator > current_proteins_
References to currently active ProteinIdentifications.
Definition: PepXMLFile.h:275
OpenMS::PepXMLFile::mod_tol_
static const double mod_tol_
Definition: PepXMLFile.h:336
OpenMS::String
A more convenient string class.
Definition: String.h:58
OpenMS::PepXMLFile::AminoAcidModification::is_protein_terminus_
bool is_protein_terminus_
Definition: PepXMLFile.h:160
OpenMS::PepXMLFile::hydrogen_
Element hydrogen_
Hydrogen data (for mass types)
Definition: PepXMLFile.h:239
OpenMS::PepXMLFile::keepNativeSpectrumName
void keepNativeSpectrumName(bool keep)
Whether we should keep the native spectrum name of the pepXML.
Definition: PepXMLFile.h:120
OpenMS::PepXMLFile::enzyme_
String enzyme_
Enzyme name associated with the current identification run.
Definition: PepXMLFile.h:281
OpenMS::Size
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
OpenMS::PepXMLFile::peptide_hit_
PeptideHit peptide_hit_
PeptideHit instance currently being processed.
Definition: PepXMLFile.h:291
OpenMS::PepXMLFile::xtandem_artificial_mod_tol_
static const double xtandem_artificial_mod_tol_
Definition: PepXMLFile.h:337
OpenMS::PepXMLFile::AminoAcidModification
Definition: PepXMLFile.h:150
OpenMS::PepXMLFile::current_peptide_
PeptideIdentification current_peptide_
PeptideIdentification instance currently being processed.
Definition: PepXMLFile.h:285
XMLHandler.h
SpectrumMetaDataLookup.h
ResidueDB.h
OpenMS::PepXMLFile::AminoAcidModification::terminus_
String terminus_
Definition: PepXMLFile.h:159
OpenMS::PepXMLFile::date_
DateTime date_
Date the pepXML file was generated.
Definition: PepXMLFile.h:312
Element.h
OpenMS::PepXMLFile::params_
ProteinIdentification::SearchParameters params_
Search parameters of the current identification run.
Definition: PepXMLFile.h:278
OpenMS::PepXMLFile::proteins_
std::vector< ProteinIdentification > * proteins_
Pointer to the list of identified proteins.
Definition: PepXMLFile.h:212
OpenMS::Int
int Int
Signed integer type.
Definition: Types.h:102
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
OpenMS::PepXMLFile::search_engine_
String search_engine_
Set name of search engine.
Definition: PepXMLFile.h:224
OpenMS::PepXMLFile::lookup_
const SpectrumMetaDataLookup * lookup_
Pointer to wrapper for looking up spectrum meta data.
Definition: PepXMLFile.h:218
OpenMS::PepXMLFile::variable_modifications_
std::vector< AminoAcidModification > variable_modifications_
Variable aminoacid modifications as parsed from the header.
Definition: PepXMLFile.h:324
OpenMS::ResidueModification::TermSpecificity
TermSpecificity
Position where the modification is allowed to occur.
Definition: ResidueModification.h:97
OpenMS::PepXMLFile::preferred_variable_modifications_
std::vector< const ResidueModification * > preferred_variable_modifications_
Definition: PepXMLFile.h:332
OpenMS::PepXMLFile::AminoAcidModification::description_
String description_
Definition: PepXMLFile.h:158
OpenMS::PepXMLFile::AminoAcidModification::massdiff_
double massdiff_
Definition: PepXMLFile.h:155
OpenMS::PepXMLFile::AminoAcidModification::errors_
std::vector< String > errors_
Definition: PepXMLFile.h:162
OpenMS::PepXMLFile::decoy_prefix_
String decoy_prefix_
In case it has decoys, what is the prefix?
Definition: PepXMLFile.h:269
ProteinIdentification.h
OpenMS::PepXMLFile::search_score_summary_
bool search_score_summary_
Are we currently in an "search_score_summary" element (should be skipped)?
Definition: PepXMLFile.h:248
OpenMS::PepXMLFile::preferred_fixed_modifications_
std::vector< const ResidueModification * > preferred_fixed_modifications_
Definition: PepXMLFile.h:328
OpenMS::PepXMLFile::status_
String status_
Definition: PepXMLFile.h:230
OpenMS::PepXMLFile::AminoAcidModification::term_spec_
ResidueModification::TermSpecificity term_spec_
Definition: PepXMLFile.h:161
OpenMS::PepXMLFile::keep_native_name_
bool keep_native_name_
Whether we should keep the native spectrum name of the pepXML.
Definition: PepXMLFile.h:245
OpenMS::PepXMLFile::AminoAcidModification::mass_
double mass_
Definition: PepXMLFile.h:156
OpenMS::UInt
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
OpenMS::PepXMLFile::fixed_modifications_
std::vector< AminoAcidModification > fixed_modifications_
Fixed aminoacid modifications as parsed from the header.
Definition: PepXMLFile.h:321
OpenMS::SpectrumMetaDataLookup
Helper class for looking up spectrum meta data.
Definition: SpectrumMetaDataLookup.h:142
OpenMS::PeptideIdentification
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:63
OpenMS::PepXMLFile::exp_name_
String exp_name_
Name of the associated experiment (filename of the data file, extension will be removed)
Definition: PepXMLFile.h:221
AASequence.h
OpenMS::PepXMLFile::current_base_name_
String current_base_name_
current base name
Definition: PepXMLFile.h:272
OpenMS::PepXMLFile::current_analysis_result_
PeptideHit::PepXMLAnalysisResult current_analysis_result_
Analysis result instance currently being processed.
Definition: PepXMLFile.h:288
OpenMS::PepXMLFile::experiment_label_
String experiment_label_
Definition: PepXMLFile.h:228
OpenMS::PepXMLFile::AminoAcidModification::aminoacid_
String aminoacid_
Definition: PepXMLFile.h:154
OpenMS::Internal::XMLFile
Base class for loading/storing XML files that have a handler derived from XMLHandler.
Definition: XMLFile.h:48
OpenMS::PepXMLFile::current_sequence_
String current_sequence_
Sequence of the current peptide hit.
Definition: PepXMLFile.h:294
OpenMS::PepXMLFile
Used to load and store PepXML files.
Definition: PepXMLFile.h:63
PeptideIdentification.h
OpenMS::ProteinIdentification::SearchParameters
Search parameters of the DB search.
Definition: ProteinIdentification.h:258
OpenMS::PepXMLFile::current_modifications_
std::vector< std::pair< const ResidueModification *, Size > > current_modifications_
The modifications of the current peptide hit (position is 1-based)
Definition: PepXMLFile.h:318
OpenMS::DateTime
DateTime Class.
Definition: DateTime.h:58
OpenMS::PepXMLFile::peptides_
std::vector< PeptideIdentification > * peptides_
Pointer to the list of identified peptides.
Definition: PepXMLFile.h:215
OpenMS::PeptideHit
Representation of a peptide hit.
Definition: PeptideHit.h:55