OpenMS  3.0.0
AccurateMassSearchEngine.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2022.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Erhan Kenar, Chris Bielow $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
38 #include <OpenMS/KERNEL/Feature.h>
42 #include <OpenMS/FORMAT/MzTab.h>
43 #include <OpenMS/FORMAT/MzTabM.h>
48 #include <OpenMS/SYSTEM/File.h>
50 
51 
52 #include <iosfwd>
53 #include <vector>
54 
55 namespace OpenMS
56 {
57  class OPENMS_DLLAPI AccurateMassSearchResult
58  {
59  public:
62 
65 
68 
71 
73  double getObservedMZ() const;
74 
76  void setObservedMZ(const double&);
77 
79  double getCalculatedMZ() const;
80 
82  void setCalculatedMZ(const double&);
83 
85  double getQueryMass() const;
86 
88  void setQueryMass(const double&);
89 
91  double getFoundMass() const;
92 
94  void setFoundMass(const double&);
95 
97  Int getCharge() const;
98 
100  void setCharge(const Int&);
101 
103  double getMZErrorPPM() const;
104 
106  void setMZErrorPPM(const double);
107 
109  double getObservedRT() const;
110 
112  void setObservedRT(const double& rt);
113 
115  double getObservedIntensity() const;
116 
118  void setObservedIntensity(const double&);
119 
121  std::vector<double> getIndividualIntensities() const;
122 
124  void setIndividualIntensities(const std::vector<double>&);
125 
126  Size getMatchingIndex() const;
127  void setMatchingIndex(const Size&);
128 
129  Size getSourceFeatureIndex() const;
130  void setSourceFeatureIndex(const Size&);
131 
132  const String& getFoundAdduct() const;
133  void setFoundAdduct(const String&);
134 
135  const String& getFormulaString() const;
136  void setEmpiricalFormula(const String&);
137 
138  const std::vector<String>& getMatchingHMDBids() const;
139  void setMatchingHMDBids(const std::vector<String>&);
140 
142  const std::vector<double>& getMasstraceIntensities() const;
143  void setMasstraceIntensities(const std::vector<double>&);
144 
145  double getIsotopesSimScore() const;
146  void setIsotopesSimScore(const double&);
147 
148  // debug/output functions
149  friend OPENMS_DLLAPI std::ostream& operator<<(std::ostream& os, const AccurateMassSearchResult& amsr);
150 
151 private:
153  double observed_mz_;
156  double db_mass_;
159  double observed_rt_;
161  std::vector<double> individual_intensities_;
164 
167  std::vector<String> matching_hmdb_ids_;
168 
169  std::vector<double> mass_trace_intensities_;
171  };
172 
173  OPENMS_DLLAPI std::ostream& operator<<(std::ostream& os, const AccurateMassSearchResult& amsr);
174 
206  class OPENMS_DLLAPI AccurateMassSearchEngine :
207  public DefaultParamHandler,
208  public ProgressLogger
209  {
210 public:
211 
213  static constexpr char search_engine_identifier[] = "AccurateMassSearchEngine";
214 
217 
219  ~AccurateMassSearchEngine() override;
220 
226  void queryByMZ(const double& observed_mz, const Int& observed_charge, const String& ion_mode, std::vector<AccurateMassSearchResult>& results, const EmpiricalFormula& observed_adduct = EmpiricalFormula()) const;
227  void queryByFeature(const Feature& feature, const Size& feature_index, const String& ion_mode, std::vector<AccurateMassSearchResult>& results) const;
228  void queryByConsensusFeature(const ConsensusFeature& cfeat, const Size& cf_index, const Size& number_of_maps, const String& ion_mode, std::vector<AccurateMassSearchResult>& results) const;
229 
232  void run(FeatureMap&, MzTab&) const;
233 
234  void run(FeatureMap&, MzTabM&) const;
235 
239  void run(ConsensusMap&, MzTab&) const;
240 
242  void init();
243 
244 protected:
245  void updateMembers_() override;
246 
247 private:
249 
252  template <typename MAPTYPE> String resolveAutoMode_(const MAPTYPE& map) const
253  {
254  String ion_mode_internal;
255  String ion_mode_detect_msg = "";
256  if (map.size() > 0)
257  {
258  if (map[0].metaValueExists("scan_polarity"))
259  {
260  StringList pols = ListUtils::create<String>(String(map[0].getMetaValue("scan_polarity")), ';');
261  if (pols.size() == 1 && !pols[0].empty())
262  {
263  pols[0].toLower();
264  if (pols[0] == "positive" || pols[0] == "negative")
265  {
266  ion_mode_internal = pols[0];
267  OPENMS_LOG_INFO << "Setting auto ion-mode to '" << ion_mode_internal << "' for file " << File::basename(map.getLoadedFilePath()) << std::endl;
268  }
269  else ion_mode_detect_msg = String("Meta value 'scan_polarity' does not contain unknown ion mode") + String(map[0].getMetaValue("scan_polarity"));
270  }
271  else
272  {
273  ion_mode_detect_msg = String("ambiguous ion mode: ") + String(map[0].getMetaValue("scan_polarity"));
274  }
275  }
276  else
277  {
278  ion_mode_detect_msg = String("Meta value 'scan_polarity' not found in (Consensus-)Feature map");
279  }
280  }
281  else
282  { // do nothing, since map is
283  OPENMS_LOG_INFO << "Meta value 'scan_polarity' cannot be determined since (Consensus-)Feature map is empty!" << std::endl;
284  }
285 
286  if (!ion_mode_detect_msg.empty())
287  {
288  throw Exception::InvalidParameter(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, String("Auto ionization mode could not resolve ion mode of data (") + ion_mode_detect_msg + "!");
289  }
290 
291  return ion_mode_internal;
292  }
293 
294  void parseMappingFile_(const StringList&);
295  void parseStructMappingFile_(const StringList&);
296  void parseAdductsFile_(const String& filename, std::vector<AdductInfo>& result);
297  void searchMass_(double neutral_query_mass, double diff_mass, std::pair<Size, Size>& hit_indices) const;
298 
300  void annotate_(const std::vector<AccurateMassSearchResult>&, BaseFeature&) const;
301 
303  std::vector<AccurateMassSearchResult> extractQueryResults_(const Feature& feature, const Size& feature_index, const String& ion_mode_internal, Size& dummy_count) const;
304 
306  void addMatchesToID_(
307  IdentificationData& id,
308  const std::vector<AccurateMassSearchResult>& amr,
309  const IdentificationData::InputFileRef& file_ref,
310  const IdentificationData::ScoreTypeRef& mass_error_ppm_score_ref,
311  const IdentificationData::ScoreTypeRef& mass_error_Da_score_ref,
313  BaseFeature& f) const;
314 
317  double computeCosineSim_(const std::vector<double>& x, const std::vector<double>& y) const;
318 
319  double computeIsotopePatternSimilarity_(const Feature& feat, const EmpiricalFormula& form) const;
320 
321  typedef std::vector<std::vector<AccurateMassSearchResult> > QueryResultsTable;
322 
323  void exportMzTab_(const QueryResultsTable& overall_results, const Size number_of_maps, MzTab& mztab_out, const std::vector<String>& file_locations) const;
324 
325  void exportMzTabM_(const FeatureMap& fmap, MzTabM& mztabm_out) const;
326 
328  typedef std::vector<std::vector<String> > MassIDMapping;
329  typedef std::map<String, std::vector<String> > HMDBPropsMapping;
330 
332  {
333  double mass;
334  std::vector<String> massIDs;
336  };
337  std::vector<MappingEntry_> mass_mappings_;
338 
339  struct CompareEntryAndMass_ // defined here to allow for inlining by compiler
340  {
341  double asMass(const MappingEntry_& v) const
342  {
343  return v.mass;
344  }
345 
346  double asMass(double t) const
347  {
348  return t;
349  }
350 
351  template <typename T1, typename T2>
352  bool operator()(T1 const& t1, T2 const& t2) const
353  {
354  return asMass(t1) < asMass(t2);
355  }
356 
357  };
358 
360 
362 
363  bool legacyID_ = true;
364 
370 
373 
376 
377  std::vector<AdductInfo> pos_adducts_;
378  std::vector<AdductInfo> neg_adducts_;
379 
383 
385  };
386 
387 }
OpenMS::AccurateMassSearchResult::isotopes_sim_score_
double isotopes_sim_score_
Definition: AccurateMassSearchEngine.h:170
DefaultParamHandler.h
OpenMS::AccurateMassSearchResult::found_adduct_
String found_adduct_
Definition: AccurateMassSearchEngine.h:165
ConsensusXMLFile.h
MzTabFile.h
OpenMS::AccurateMassSearchEngine::CompareEntryAndMass_::asMass
double asMass(double t) const
Definition: AccurateMassSearchEngine.h:346
OpenMS::TOPPBase
Base class for TOPP applications.
Definition: TOPPBase.h:147
FileHandler.h
FileTypes.h
OpenMS::MzTabFile
File adapter for MzTab files.
Definition: MzTabFile.h:55
OpenMS::AccurateMassSearchEngine::pos_adducts_fname_
String pos_adducts_fname_
Definition: AccurateMassSearchEngine.h:371
OpenMS::AccurateMassSearchResult::observed_mz_
double observed_mz_
Stored information/results of DB query.
Definition: AccurateMassSearchEngine.h:153
MzTabM.h
OpenMS::OMSFile
This class supports reading and writing of OMS files.
Definition: OMSFile.h:48
OpenMS::AccurateMassSearchResult::charge_
Int charge_
Definition: AccurateMassSearchEngine.h:157
OpenMS::ConsensusXMLFile::store
void store(const String &filename, const ConsensusMap &consensus_map)
Stores a consensus map to file.
OpenMS::Param::setValue
void setValue(const std::string &key, const ParamValue &value, const std::string &description="", const std::vector< std::string > &tags=std::vector< std::string >())
Sets a value.
OpenMS::AccurateMassSearchEngine::MappingEntry_
Definition: AccurateMassSearchEngine.h:331
OpenMS::AccurateMassSearchEngine
An algorithm to search for exact mass matches from a spectrum against a database (e....
Definition: AccurateMassSearchEngine.h:206
OpenMS::AccurateMassSearchEngine::QueryResultsTable
std::vector< std::vector< AccurateMassSearchResult > > QueryResultsTable
Definition: AccurateMassSearchEngine.h:321
OpenMS::AccurateMassSearchResult::individual_intensities_
std::vector< double > individual_intensities_
Definition: AccurateMassSearchEngine.h:161
OpenMS::File::basename
static String basename(const String &file)
OpenMS::Exception::InvalidValue
Invalid value exception.
Definition: Exception.h:327
OpenMS::AccurateMassSearchEngine::CompareEntryAndMass_
Definition: AccurateMassSearchEngine.h:339
OpenMS::AccurateMassSearchResult::matching_hmdb_ids_
std::vector< String > matching_hmdb_ids_
Definition: AccurateMassSearchEngine.h:167
OpenMS::String
A more convenient string class.
Definition: String.h:58
OpenMS::BaseFeature
A basic LC-MS feature.
Definition: BaseFeature.h:58
Feature.h
ConsensusMap.h
OpenMS::FileTypes::CONSENSUSXML
OpenMS consensus map format (.consensusXML)
Definition: FileTypes.h:65
OpenMS::Size
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
OpenMS::AccurateMassSearchEngine::mass_error_value_
double mass_error_value_
parameter stuff
Definition: AccurateMassSearchEngine.h:366
FeatureXMLFile.h
OpenMS::AccurateMassSearchEngine::neg_adducts_fname_
String neg_adducts_fname_
Definition: AccurateMassSearchEngine.h:372
OpenMS::MzTab
Data model of MzTab files. Please see the official MzTab specification at https://code....
Definition: MzTab.h:477
OpenMS::AccurateMassSearchEngine::init
void init()
parse database and adduct files
OpenMS::AccurateMassSearchResult::searched_mass_
double searched_mass_
Definition: AccurateMassSearchEngine.h:155
OpenMS::AccurateMassSearchEngine::database_version_
String database_version_
Definition: AccurateMassSearchEngine.h:381
OpenMS::AccurateMassSearchEngine::mass_mappings_
std::vector< MappingEntry_ > mass_mappings_
Definition: AccurateMassSearchEngine.h:337
OpenMS::AccurateMassSearchResult::matching_index_
Size matching_index_
Definition: AccurateMassSearchEngine.h:162
ListUtils.h
OpenMS::DefaultParamHandler
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:92
ConsensusFeature.h
OpenMS::AccurateMassSearchEngine::MappingEntry_::mass
double mass
Definition: AccurateMassSearchEngine.h:333
OpenMS::Exception::InvalidParameter
Exception indicating that an invalid parameter was handed over to an algorithm.
Definition: Exception.h:339
OpenMS::AccurateMassSearchEngine::pos_adducts_
std::vector< AdductInfo > pos_adducts_
Definition: AccurateMassSearchEngine.h:377
AdductInfo.h
MassTrace.h
OpenMS::AccurateMassSearchEngine::db_struct_file_
StringList db_struct_file_
Definition: AccurateMassSearchEngine.h:375
OpenMS::Int
int Int
Signed integer type.
Definition: Types.h:102
OpenMS::AccurateMassSearchEngine::ion_mode_
String ion_mode_
Definition: AccurateMassSearchEngine.h:368
OMSFile.h
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
OpenMS::AccurateMassSearchEngine::is_initialized_
bool is_initialized_
true if init_() was called without any subsequent param changes
Definition: AccurateMassSearchEngine.h:361
OpenMS::FileTypes::FEATUREXML
OpenMS feature file (.featureXML)
Definition: FileTypes.h:63
OpenMS::ProgressLogger
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:52
OpenMS::AccurateMassSearchEngine::MappingEntry_::massIDs
std::vector< String > massIDs
Definition: AccurateMassSearchEngine.h:334
OpenMS::AccurateMassSearchEngine::database_location_
String database_location_
Definition: AccurateMassSearchEngine.h:382
ProgressLogger.h
OpenMS::AccurateMassSearchEngine::keep_unidentified_masses_
bool keep_unidentified_masses_
Definition: AccurateMassSearchEngine.h:384
OpenMS::AccurateMassSearchResult::observed_intensity_
double observed_intensity_
Definition: AccurateMassSearchEngine.h:160
OpenMS::AccurateMassSearchEngine::MappingEntry_::formula
String formula
Definition: AccurateMassSearchEngine.h:335
OpenMS::IdentificationDataInternal::IteratorWrapper< InputFiles::iterator >
OpenMS::FeatureXMLFile::load
void load(const String &filename, FeatureMap &feature_map)
loads the file with name filename into map and calls updateRanges().
FeatureMap.h
OpenMS::FileTypes::Type
Type
Actual file types enum.
Definition: FileTypes.h:56
OpenMS::AccurateMassSearchResult::source_feature_index_
Size source_feature_index_
Definition: AccurateMassSearchEngine.h:163
OpenMS::ConsensusFeature
A consensus feature spanning multiple LC-MS/MS experiments.
Definition: ConsensusFeature.h:69
OpenMS::AccurateMassSearchEngine::MassIDMapping
std::vector< std::vector< String > > MassIDMapping
private member variables
Definition: AccurateMassSearchEngine.h:328
OpenMS::AccurateMassSearchEngine::iso_similarity_
bool iso_similarity_
Definition: AccurateMassSearchEngine.h:369
OpenMS::MzTabFile::store
void store(const String &filename, const MzTab &mz_tab) const
OpenMS::FileHandler::getType
static FileTypes::Type getType(const String &filename)
Tries to determine the file type (by name or content)
OpenMS::OMSFile::store
void store(const String &filename, const IdentificationData &id_data)
Write out an IdentificationData object to SQL-based OMS file.
OpenMS::DefaultParamHandler::setParameters
void setParameters(const Param &param)
Sets the parameters.
OpenMS::DefaultParamHandler::getDefaults
const Param & getDefaults() const
Non-mutable access to the default parameters.
MzTab.h
OpenMS::String::hasSuffix
bool hasSuffix(const String &string) const
true if String ends with string, false otherwise
OpenMS::DefaultParamHandler::getParameters
const Param & getParameters() const
Non-mutable access to the parameters.
OpenMS::operator<<
std::ostream & operator<<(std::ostream &os, const AccurateMassSearchResult &amsr)
OpenMS::ConsensusMap
A container for consensus elements.
Definition: ConsensusMap.h:82
OpenMS::StringList
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:70
OpenMS::FeatureXMLFile::store
void store(const String &filename, const FeatureMap &feature_map)
stores the map feature_map in file with name filename.
OpenMS::ConsensusXMLFile::load
void load(const String &filename, ConsensusMap &map)
Loads a consensus map from file and calls updateRanges.
OpenMS::AccurateMassSearchResult::mass_trace_intensities_
std::vector< double > mass_trace_intensities_
Definition: AccurateMassSearchEngine.h:169
main
int main(int argc, const char **argv)
Definition: INIFileEditor.cpp:71
OpenMS::AccurateMassSearchEngine::CompareEntryAndMass_::operator()
bool operator()(T1 const &t1, T2 const &t2) const
Definition: AccurateMassSearchEngine.h:352
OpenMS::AccurateMassSearchEngine::mass_error_unit_
String mass_error_unit_
Definition: AccurateMassSearchEngine.h:367
OpenMS::IdentificationData
Representation of spectrum identification results and associated data.
Definition: IdentificationData.h:94
OpenMS::FeatureMap
A container for features.
Definition: FeatureMap.h:98
OpenMS::MzTabMFile
File adapter for MzTab-M files.
Definition: MzTabMFile.h:50
OpenMS::Feature
An LC-MS feature.
Definition: Feature.h:70
OpenMS::Param::copy
Param copy(const std::string &prefix, bool remove_prefix=false) const
Returns a new Param object containing all entries that start with prefix.
OpenMS::AccurateMassSearchEngine::neg_adducts_
std::vector< AdductInfo > neg_adducts_
Definition: AccurateMassSearchEngine.h:378
OpenMS::MzTabMFile::store
void store(const String &filename, const MzTabM &mztab_m) const
Store MzTabM file.
OpenMS::EmpiricalFormula
Representation of an empirical formula.
Definition: EmpiricalFormula.h:84
OpenMS::FeatureXMLFile
This class provides Input/Output functionality for feature maps.
Definition: FeatureXMLFile.h:62
OpenMS::MzTabM
Data model of MzTab-M files Please see the MzTab-M specification at https://github....
Definition: MzTabM.h:233
OpenMS::AccurateMassSearchResult::db_mass_
double db_mass_
Definition: AccurateMassSearchEngine.h:156
String.h
OpenMS::Param
Management and storage of parameters / INI files.
Definition: Param.h:69
OpenMS::AccurateMassSearchEngine::hmdb_properties_mapping_
HMDBPropsMapping hmdb_properties_mapping_
Definition: AccurateMassSearchEngine.h:359
MzTabMFile.h
OpenMS::AccurateMassSearchEngine::HMDBPropsMapping
std::map< String, std::vector< String > > HMDBPropsMapping
Definition: AccurateMassSearchEngine.h:329
OpenMS::AccurateMassSearchResult::empirical_formula_
String empirical_formula_
Definition: AccurateMassSearchEngine.h:166
OpenMS::AccurateMassSearchEngine::run
void run(FeatureMap &, MzTab &) const
OpenMS::AccurateMassSearchResult::observed_rt_
double observed_rt_
Definition: AccurateMassSearchEngine.h:159
OPENMS_LOG_INFO
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:465
AccurateMassSearchEngine.h
OpenMS::Param::getValue
const ParamValue & getValue(const std::string &key) const
Returns a value of a parameter.
OpenMS::AccurateMassSearchResult
Definition: AccurateMassSearchEngine.h:57
OpenMS::Param::remove
void remove(const std::string &key)
Remove the entry key or a section key (when suffix is ':')
OpenMS::AccurateMassSearchEngine::db_mapping_file_
StringList db_mapping_file_
Definition: AccurateMassSearchEngine.h:374
OpenMS::AccurateMassSearchResult::mz_error_ppm_
double mz_error_ppm_
Definition: AccurateMassSearchEngine.h:158
OpenMS::AccurateMassSearchResult::theoretical_mz_
double theoretical_mz_
Definition: AccurateMassSearchEngine.h:154
File.h
OpenMS::AccurateMassSearchEngine::database_name_
String database_name_
Definition: AccurateMassSearchEngine.h:380
TOPPBase.h
OpenMS::AccurateMassSearchEngine::CompareEntryAndMass_::asMass
double asMass(const MappingEntry_ &v) const
Definition: AccurateMassSearchEngine.h:341
OpenMS::ConsensusXMLFile
This class provides Input functionality for ConsensusMaps and Output functionality for alignments and...
Definition: ConsensusXMLFile.h:58
OpenMS::AccurateMassSearchEngine::resolveAutoMode_
String resolveAutoMode_(const MAPTYPE &map) const
private member functions
Definition: AccurateMassSearchEngine.h:252