OpenMS  3.0.0
MzMLHandler.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2022.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Marc Sturm, Chris Bielow, Hannes Roest $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
37 #include <OpenMS/CONCEPT/Helpers.h>
39 
41 
44 
47 
51 
52 #include <map>
53 
54 
55 //MISSING:
56 // - more than one selected ion per precursor (warning if more than one)
57 // - scanWindowList for each acquisition separately (currently for the whole spectrum only)
58 // - instrumentConfigurationRef attribute for scan (why should the instrument change between scans? - warning if used)
59 // - scanSettingsRef attribute for instrumentConfiguration tag (currently no information there because of missing mapping file entry - warning if used)
60 
61 // xs:id/xs:idref prefix list
62 // - sf_ru : sourceFile (run)
63 // - sf_sp : sourceFile (spectrum)
64 // - sf_pr : sourceFile (precursor)
65 // - sf_ac : sourceFile (acquisition)
66 // - sa : sample
67 // - ic : instrumentConfiguration
68 // - so_dp : software (data processing)
69 // - so_in : software (instrument)
70 // - dp_sp : dataProcessing (spectrum)
71 // - dp_bi : dataProcessing (binary data array)
72 // - dp_ch : dataProcessing (chromatogram)
73 
74 namespace OpenMS
75 {
76  namespace Interfaces
77  {
78  class IMSDataConsumer;
79  }
80 
81  namespace Internal
82  {
83  class MzMLValidator;
84 
85  typedef PeakMap MapType;
86  typedef MSSpectrum SpectrumType;
88 
118  class OPENMS_DLLAPI MzMLHandler :
119  public XMLHandler
120  {
121 public:
122 
125 
127  MzMLHandler(MapType& exp, const String& filename, const String& version, const ProgressLogger& logger);
128 
130  MzMLHandler(const MapType& exp, const String& filename, const String& version, const ProgressLogger& logger);
131 
133  ~MzMLHandler() override;
135 
141 
143  void endElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname) override;
144 
146  void startElement(const XMLCh* const /*uri*/, const XMLCh* const /*local_name*/, const XMLCh* const qname, const xercesc::Attributes& attributes) override;
147 
149  void characters(const XMLCh* const chars, const XMLSize_t length) override;
150 
152  void writeTo(std::ostream& os) override;
153 
155 
166 
168  void setOptions(const PeakFileOptions& opt);
169 
171  PeakFileOptions& getOptions();
172 
174 
176  void getCounts(Size& spectra_counts, Size& chromatogram_counts);
177 
187 
189  void setMSDataConsumer(Interfaces::IMSDataConsumer* consumer);
191 
193  LOADDETAIL getLoadDetail() const override;
194 
196  void setLoadDetail(const LOADDETAIL d) override;
197 
198 protected:
199 
201  MzMLHandler(const String& filename, const String& version, const ProgressLogger& logger);
202 
211 
213 
218 
225  void populateSpectraWithData_();
226 
233  void populateChromatogramsWithData_();
234 
240  void addSpectrumMetaData_(const std::vector<MzMLHandlerHelper::BinaryData>& input_data,
241  const Size n,
242  SpectrumType& spectrum) const;
243 
259  void populateSpectraWithData_(std::vector<MzMLHandlerHelper::BinaryData>& input_data,
260  Size& length,
261  const PeakFileOptions& peak_file_options,
262  SpectrumType& spectrum);
263 
276  void populateChromatogramsWithData_(std::vector<MzMLHandlerHelper::BinaryData>& input_data,
277  Size& length,
278  const PeakFileOptions& peak_file_options,
279  ChromatogramType& chromatogram);
280 
282  void fillChromatogramData_();
283 
285  void handleCVParam_(const String& parent_parent_tag,
286  const String& parent_tag,
287  const String& accession,
288  const String& name,
289  const String& value,
290  const String& unit_accession = "");
291 
293  void handleUserParam_(const String& parent_parent_tag,
294  const String& parent_tag,
295  const String& name,
296  const String& type,
297  const String& value,
298  const String& unit_accession = "");
300 
306 
308  void writeHeader_(std::ostream& os,
309  const MapType& exp,
310  std::vector<std::vector< ConstDataProcessingPtr > >& dps,
311  const Internal::MzMLValidator& validator);
312 
313 
315  void writeSpectrum_(std::ostream& os,
316  const SpectrumType& spec,
317  Size spec_idx,
318  const Internal::MzMLValidator& validator,
319  bool renew_native_ids,
320  std::vector<std::vector< ConstDataProcessingPtr > >& dps);
321 
323  void writeChromatogram_(std::ostream& os,
324  const ChromatogramType& chromatogram,
325  Size chrom_idx,
326  const Internal::MzMLValidator& validator);
327 
328  template <typename ContainerT>
329  void writeContainerData_(std::ostream& os, const PeakFileOptions& pf_options_, const ContainerT& container, String array_type);
330 
343  template <typename DataType>
344  void writeBinaryDataArray_(std::ostream& os,
345  const PeakFileOptions& options,
346  std::vector<DataType>& data,
347  bool is32bit,
348  String array_type);
349 
364  void writeBinaryFloatDataArray_(std::ostream& os,
365  const PeakFileOptions& options,
367  const Size spec_chrom_idx,
368  const Size array_idx,
369  bool is_spectrum,
370  const Internal::MzMLValidator& validator);
371 
373  void writeUserParam_(std::ostream& os, const MetaInfoInterface& meta, UInt indent, const String& path, const Internal::MzMLValidator& validator, const std::set<String>& exclude = {}) const;
374 
376  void writeSoftware_(std::ostream& os, const String& id, const Software& software, const Internal::MzMLValidator& validator);
377 
379  void writeSourceFile_(std::ostream& os, const String& id, const SourceFile& software, const Internal::MzMLValidator& validator);
380 
382  void writeDataProcessing_(std::ostream& os, const String& id, const std::vector< ConstDataProcessingPtr >& dps, const Internal::MzMLValidator& validator);
383 
385  void writePrecursor_(std::ostream& os, const Precursor& precursor, const Internal::MzMLValidator& validator);
386 
388  void writeProduct_(std::ostream& os, const Product& product, const Internal::MzMLValidator& validator);
389 
391  String writeCV_(const ControlledVocabulary::CVTerm& c, const DataValue& metaValue) const;
392 
394  bool validateCV_(const ControlledVocabulary::CVTerm& c, const String& path, const Internal::MzMLValidator& validator) const;
395 
397  ControlledVocabulary::CVTerm getChildWithName_(const String& parent_accession, const String& name) const;
398 
400 
401  // MEMBERS
402 
404  MapType* exp_{ nullptr };
405 
407  const MapType* cexp_{ nullptr };
408 
411 
414  SpectrumType spec_;
419  std::vector<BinaryData> bin_data_;
423  bool in_spectrum_list_{ false };
425  bool skip_spectrum_{ false };
427  bool skip_chromatogram_{ false };
429  bool rt_set_{ false };
433  std::map<String, std::vector<SemanticValidator::CVTerm> > ref_param_;
435  std::map<String, SourceFile> source_files_;
437  std::map<String, Sample> samples_;
439  std::map<String, Software> software_;
441  std::map<String, Instrument> instruments_;
443  mutable std::map<std::pair<String, String>, bool> cached_terms_;
445  std::map<String, std::vector< DataProcessingPtr > > processing_;
449  UInt selected_ion_count_{ 0 };
450 
459  {
460  std::vector<BinaryData> data;
463  };
464 
466  std::vector<SpectrumData> spectrum_data_;
467 
476  {
477  std::vector<BinaryData> data;
480  };
481 
483  std::vector<ChromatogramData> chromatogram_data_;
484 
486 
494  std::vector<std::pair<std::string, Int64> > spectra_offsets_;
495  std::vector<std::pair<std::string, Int64> > chromatograms_offsets_;
496 
497 
500 
502  Interfaces::IMSDataConsumer* consumer_{ nullptr };
503 
505  UInt scan_count_{ 0 };
506  UInt chromatogram_count_{ 0 };
507  Int scan_count_total_{ -1 };
508  Int chrom_count_total_{ -1 };
509 
510 
514 
515  };
516 
517  //--------------------------------------------------------------------------------
518 
519  } // namespace Internal
520 } // namespace OpenMS
521 
OpenMS::Software
Description of the software used for processing.
Definition: Software.h:48
OpenMS::Internal::MzMLHandler::logger_
const ProgressLogger & logger_
Progress logger.
Definition: MzMLHandler.h:499
OpenMS::Internal::MzMLHandler::samples_
std::map< String, Sample > samples_
The sample list: id => Sample.
Definition: MzMLHandler.h:437
OpenMS::Internal::MzMLHandler::mapping_
CVMappings mapping_
Definition: MzMLHandler.h:513
OpenMS::ControlledVocabulary
Representation of a controlled vocabulary.
Definition: ControlledVocabulary.h:54
OpenMS::Internal::XMLHandler
Base class for XML handlers.
Definition: XMLHandler.h:323
OpenMS::Interfaces::IMSDataConsumer
The interface of a consumer of spectra and chromatograms.
Definition: IMSDataConsumer.h:69
ControlledVocabulary.h
OpenMS::Internal::MzMLHandler::BinaryData
MzMLHandlerHelper::BinaryData BinaryData
Definition: MzMLHandler.h:212
OpenMS::Internal::MzMLHandler::current_id_
String current_id_
Id of the current list. Used for referencing param group, source file, sample, software,...
Definition: MzMLHandler.h:431
OpenMS::Internal::MapType
PeakMap MapType
XML handler for MzDataFile.
Definition: MzDataHandler.h:59
OpenMS::String
A more convenient string class.
Definition: String.h:58
OpenMS::Internal::MzMLHandler::spectra_offsets_
std::vector< std::pair< std::string, Int64 > > spectra_offsets_
Stores binary offsets for each <spectrum> tag.
Definition: MzMLHandler.h:494
OpenMS::Internal::MzMLHandler::cv_
const ControlledVocabulary & cv_
Controlled vocabulary (psi-ms from OpenMS/share/OpenMS/CV/psi-ms.obo)
Definition: MzMLHandler.h:512
OpenMS::MSExperiment
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:70
OpenMS::Size
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
OpenMS::Constants::c
const double c
Definition: Constants.h:209
OpenMS::Internal::MzMLHandler::cached_terms_
std::map< std::pair< String, String >, bool > cached_terms_
CV terms-path-combinations that have been checked in validateCV_()
Definition: MzMLHandler.h:443
OpenMS::Internal::MzMLHandler::ref_param_
std::map< String, std::vector< SemanticValidator::CVTerm > > ref_param_
The referencing param groups: id => array (accession, value)
Definition: MzMLHandler.h:433
XMLHandler.h
OpenMS::Internal::MzMLHandler::SpectrumData::default_array_length
Size default_array_length
Definition: MzMLHandler.h:461
OpenMS::Internal::XMLHandler::LOADDETAIL
LOADDETAIL
Definition: XMLHandler.h:347
OpenMS::Internal::MzMLHandler::ChromatogramData
Data necessary to generate a single chromatogram.
Definition: MzMLHandler.h:475
OpenMS::CVMappings
Representation of controlled vocabulary mapping rules (for PSI formats)
Definition: CVMappings.h:56
OpenMS::Internal::MzMLHandler::options_
PeakFileOptions options_
Options that can be set for loading/storing.
Definition: MzMLHandler.h:410
OpenMS::Internal::MzMLHandler::instruments_
std::map< String, Instrument > instruments_
The data processing list: id => Instrument.
Definition: MzMLHandler.h:441
OpenMS::Precursor
Precursor meta information.
Definition: Precursor.h:58
Helpers.h
OpenMS::Internal::MzMLHandler::ChromatogramPeakType
MapType::ChromatogramPeakType ChromatogramPeakType
Chromatogram peak type.
Definition: MzMLHandler.h:206
OpenMS::Int
int Int
Signed integer type.
Definition: Types.h:102
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
OpenMS::Internal::MzMLHandlerHelper::BinaryData
Representation for binary data in mzML.
Definition: MzMLHandlerHelper.h:69
OpenMS::ProgressLogger
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:52
OpenMS::DataValue
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:58
CVMappings.h
ProgressLogger.h
OpenMS::Internal::MzMLHandler::ChromatogramData::chromatogram
ChromatogramType chromatogram
Definition: MzMLHandler.h:479
OpenMS::MetaInfoInterface
Interface for classes that can store arbitrary meta information (Type-Name-Value tuples).
Definition: MetaInfoInterface.h:60
OpenMS::ChromatogramPeak
A 1-dimensional raw data point or peak for chromatograms.
Definition: ChromatogramPeak.h:53
OpenMS::Internal::MzMLHandler::PeakType
MapType::PeakType PeakType
Peak type.
Definition: MzMLHandler.h:204
OpenMS::Internal::MzMLHandler::default_array_length_
Size default_array_length_
The default number of peaks in the current spectrum.
Definition: MzMLHandler.h:421
OpenMS::Internal::SpectrumType
MSSpectrum SpectrumType
Definition: MzDataHandler.h:60
OpenMS::Internal::MzMLHandler::SpectrumData::data
std::vector< BinaryData > data
Definition: MzMLHandler.h:460
OpenMS::Internal::MzMLValidator
Semantically validates MzXML files.
Definition: MzMLValidator.h:49
OpenMS::Internal::MzMLHandler::SpectrumData::spectrum
SpectrumType spectrum
Definition: MzMLHandler.h:462
OpenMS::Internal::MzMLHandler::chromatogram_
ChromatogramType chromatogram_
The current chromatogram.
Definition: MzMLHandler.h:417
OpenMS::Internal::MzMLHandler::SpectrumType
MSSpectrum SpectrumType
Spectrum type.
Definition: MzMLHandler.h:208
SemanticValidator.h
OpenMS::Peak1D
A 1-dimensional raw data point or peak.
Definition: Peak1D.h:53
OpenMS::Internal::MzMLHandler::bin_data_
std::vector< BinaryData > bin_data_
The spectrum data (or chromatogram data)
Definition: MzMLHandler.h:419
OpenMS::UInt
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
OpenMS::Internal::MzMLHandler::source_files_
std::map< String, SourceFile > source_files_
The source files: id => SourceFile.
Definition: MzMLHandler.h:435
OpenMS::Internal::MzMLHandler::default_processing_
String default_processing_
id of the default data processing (used when no processing is defined)
Definition: MzMLHandler.h:447
PeakFileOptions.h
MSExperiment.h
OpenMS::ControlledVocabulary::CVTerm
Representation of a CV term.
Definition: ControlledVocabulary.h:60
OpenMS::Internal::ChromatogramType
MSChromatogram ChromatogramType
Definition: MzDataHandler.h:61
OpenMS::PeakFileOptions
Options for loading files containing peak data.
Definition: PeakFileOptions.h:47
OpenMS::MSChromatogram
The representation of a chromatogram.
Definition: MSChromatogram.h:53
OpenMS::SourceFile
Description of a file location, used to store the origin of (meta) data.
Definition: SourceFile.h:46
OpenMS::Internal::MzMLHandler::processing_
std::map< String, std::vector< DataProcessingPtr > > processing_
The data processing list: id => Instrument.
Definition: MzMLHandler.h:445
OpenMS::Internal::MzMLHandler::chromatogram_data_
std::vector< ChromatogramData > chromatogram_data_
Vector of chromatogram data stored for later parallel processing.
Definition: MzMLHandler.h:483
OpenMS::Internal::MzMLHandler::ChromatogramType
MSChromatogram ChromatogramType
Spectrum type.
Definition: MzMLHandler.h:210
OpenMS::Internal::MzMLHandler::chromatograms_offsets_
std::vector< std::pair< std::string, Int64 > > chromatograms_offsets_
Stores binary offsets for each <chromatogram> tag.
Definition: MzMLHandler.h:495
OpenMS::Product
Product meta information.
Definition: Product.h:48
OpenMS::Internal::MzMLHandler::software_
std::map< String, Software > software_
The software list: id => Software.
Definition: MzMLHandler.h:439
OpenMS::MSSpectrum
The representation of a 1D spectrum.
Definition: MSSpectrum.h:66
OpenMS::DataArrays::FloatDataArray
Float data array class.
Definition: DataArrays.h:45
StandardTypes.h
OpenMS::Internal::MzMLHandler::SpectrumData
Data necessary to generate a single spectrum.
Definition: MzMLHandler.h:458
OpenMS::Internal::MzMLHandler::spectrum_data_
std::vector< SpectrumData > spectrum_data_
Vector of spectrum data stored for later parallel processing.
Definition: MzMLHandler.h:466
OpenMS::Internal::MzMLHandler
Handler for mzML file format.
Definition: MzMLHandler.h:118
MzMLHandlerHelper.h