OpenMS  3.0.0
IDScoreSwitcherAlgorithm.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2022.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Julianus Pfeuffer $
32 // $Authors: Julianus Pfeuffer $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
41 
42 #include <vector>
43 #include <set>
44 
45 namespace OpenMS
46 {
47 
48  class OPENMS_DLLAPI IDScoreSwitcherAlgorithm:
49  public DefaultParamHandler
50  {
51  public:
53 
58  enum class ScoreType
59  {
60  RAW,
61  RAW_EVAL,
62  PP,
63  PEP,
64  FDR,
65  QVAL,
66  };
67 
69  bool isScoreType(const String& score_name, const ScoreType& type)
70  {
71  const std::set<String>& possible_types = type_to_str_[type];
72  return possible_types.find(score_name) != possible_types.end();
73  }
74 
77  template <typename IDType>
78  void switchScores(IDType& id, Size& counter)
79  {
80  for (typename std::vector<typename IDType::HitType>::iterator hit_it = id.getHits().begin();
81  hit_it != id.getHits().end(); ++hit_it, ++counter)
82  {
83  if (!hit_it->metaValueExists(new_score_))
84  {
85  std::stringstream msg;
86  msg << "Meta value '" << new_score_ << "' not found for " << *hit_it;
87  throw Exception::MissingInformation(__FILE__, __LINE__,
88  OPENMS_PRETTY_FUNCTION, msg.str());
89  }
90 
91  const String& old_score_meta = (old_score_.empty() ? id.getScoreType() :
92  old_score_);
93  const DataValue& dv = hit_it->getMetaValue(old_score_meta);
94  if (!dv.isEmpty()) // meta value for old score already exists
95  {
96  if (fabs((double(dv) - hit_it->getScore()) * 2.0 /
97  (double(dv) + hit_it->getScore())) > tolerance_)
98  {
99  std::stringstream msg;
100  msg << "Meta value '" << old_score_meta << "' already exists "
101  << "with a conflicting value for " << *hit_it;
102  throw Exception::InvalidValue(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
103  msg.str(), dv.toString());
104  } // else: values match, nothing to do
105  }
106  else
107  {
108  hit_it->setMetaValue(old_score_meta, hit_it->getScore());
109  }
110  hit_it->setScore(hit_it->getMetaValue(new_score_));
111  }
112  id.setScoreType(new_score_type_);
113  id.setHigherScoreBetter(higher_better_);
114  }
115 
119  void switchToGeneralScoreType(std::vector<PeptideIdentification>& id, ScoreType type, Size& counter)
120  {
121  if (id.empty()) return;
122  String t = findScoreType(id[0], type);
123  if (t.empty())
124  {
125  String msg = "First encountered ID does not have the requested score type.";
126  throw Exception::MissingInformation(__FILE__, __LINE__,
127  OPENMS_PRETTY_FUNCTION, msg);
128  }
129  else if (t == id[0].getScoreType())
130  {
131  // we assume that all the other peptide ids
132  // also already have the correct score set
133  return;
134  }
135 
136  if (t.hasSuffix("_score"))
137  {
138  new_score_type_ = t.chop(6);
139  }
140  else
141  {
142  new_score_type_ = t;
143  }
144  new_score_ = t;
145 
146  if (type != ScoreType::RAW && higher_better_ != type_to_better_[type])
147  {
148  OPENMS_LOG_WARN << "Requested non-raw score type does not match the expected score direction. Correcting!\n";
149  higher_better_ = type_to_better_[type];
150  }
151  for (auto& i : id)
152  {
153  switchScores(i, counter);
154  }
155  }
156 
160  void switchToGeneralScoreType(ConsensusMap& cmap, ScoreType type, Size& counter, bool unassigned_peptides_too = true)
161  {
162  String new_type = "";
163  for (const auto& f : cmap)
164  {
165  const auto& ids = f.getPeptideIdentifications();
166  if (!ids.empty())
167  {
168  new_type = findScoreType(ids[0], type);
169  if (new_type == ids[0].getScoreType())
170  {
171  return;
172  }
173  else
174  {
175  break;
176  }
177  }
178  }
179 
180  if (new_type.empty())
181  {
182  String msg = "First encountered ID does not have the requested score type.";
183  throw Exception::MissingInformation(__FILE__, __LINE__,
184  OPENMS_PRETTY_FUNCTION, msg);
185  }
186 
187  if (new_type.hasSuffix("_score"))
188  {
189  new_score_type_ = new_type.chop(6);
190  }
191  else
192  {
193  new_score_type_ = new_type;
194  }
195  new_score_ = new_type;
196 
197  if (type != ScoreType::RAW && higher_better_ != type_to_better_[type])
198  {
199  OPENMS_LOG_WARN << "Requested non-raw score type does not match the expected score direction. Correcting!\n";
200  higher_better_ = type_to_better_[type];
201  }
202 
203  const auto switchScoresSingle = [&counter,this](PeptideIdentification& id){switchScores(id,counter);};
204  cmap.applyFunctionOnPeptideIDs(switchScoresSingle, unassigned_peptides_too);
205  }
206 
207 
209  template <typename IDType>
211  {
212  const String& curr_score_type = id.getScoreType();
213  const std::set<String>& possible_types = type_to_str_[type];
214  if (possible_types.find(curr_score_type) != possible_types.end())
215  {
216  OPENMS_LOG_INFO << "Requested score type already set as main score: " + curr_score_type + "\n";
217  return curr_score_type;
218  }
219  else
220  {
221  if (id.getHits().empty())
222  {
223  OPENMS_LOG_WARN << "Identification entry used to check for alternative score was empty.\n";
224  return "";
225  }
226  const auto& hit = id.getHits()[0];
227  for (const auto& poss_str : possible_types)
228  {
229  if (hit.metaValueExists(poss_str)) return poss_str;
230  else if (hit.metaValueExists(poss_str + "_score")) return poss_str + "_score";
231  }
232  OPENMS_LOG_WARN << "Score of requested type not found in the UserParams of the checked ID object.\n";
233  return "";
234  }
235  }
236 
237  private:
238  void updateMembers_() override;
239 
241  const double tolerance_ = 1e-6;
242 
244  String new_score_, new_score_type_, old_score_;
246  bool higher_better_; // for the new scores, are higher ones better?
247 
249  std::map<ScoreType, std::set<String>> type_to_str_ =
250  {
251  {ScoreType::RAW, {"XTandem", "OMSSA", "SEQUEST:xcorr", "Mascot", "mvh"}},
252  //TODO find out reasonable raw scores for SES that provide E-Values as main score or see below
253  //TODO there is no test for spectraST idXML, so I don't know its score
254  //TODO check if we should combine RAW and RAW_EVAL:
255  // What if a SE does not have an e-value score (spectrast, OMSSA, crux/sequest, myrimatch),
256  // then you need additional if's/try's
257  {ScoreType::RAW_EVAL, {"expect", "SpecEValue", "E-Value", "evalue", "MS:1002053", "MS:1002257"}},
258  {ScoreType::PP, {"Posterior Probability"}},
259  {ScoreType::PEP, {"Posterior Error Probability", "pep", "MS:1001493"}}, // TODO add CV terms
260  {ScoreType::FDR, {"FDR", "fdr", "false discovery rate"}},
261  {ScoreType::QVAL, {"q-value", "qvalue", "MS:1001491", "q-Value", "qval"}}
262  };
263 
265  std::map<ScoreType, bool> type_to_better_ =
266  {
267  {ScoreType::RAW, true}, //TODO this might actually not always be true
268  {ScoreType::RAW_EVAL, false},
269  {ScoreType::PP, true},
270  {ScoreType::PEP, false},
271  {ScoreType::FDR, false},
272  {ScoreType::QVAL, false}
273  };
274  };
275 } // namespace OpenMS
IDScoreSwitcherAlgorithm.h
LogStream.h
DefaultParamHandler.h
OpenMS::TOPPBase
Base class for TOPP applications.
Definition: TOPPBase.h:147
OpenMS::IDScoreSwitcherAlgorithm::ScoreType
ScoreType
Definition: IDScoreSwitcherAlgorithm.h:58
Size
OpenMS::IDScoreSwitcherAlgorithm::higher_better_
bool higher_better_
will be set according to the algorithm parameters
Definition: IDScoreSwitcherAlgorithm.h:246
OpenMS::IdXMLFile::store
void store(const String &filename, const std::vector< ProteinIdentification > &protein_ids, const std::vector< PeptideIdentification > &peptide_ids, const String &document_id="")
Stores the data in an idXML file.
OpenMS::IDScoreSwitcherAlgorithm::switchToGeneralScoreType
void switchToGeneralScoreType(ConsensusMap &cmap, ScoreType type, Size &counter, bool unassigned_peptides_too=true)
Definition: IDScoreSwitcherAlgorithm.h:160
OpenMS::Exception::InvalidValue
Invalid value exception.
Definition: Exception.h:327
OpenMS::String
A more convenient string class.
Definition: String.h:58
OpenMS::DataValue::isEmpty
bool isEmpty() const
Test if the value is empty.
Definition: DataValue.h:388
ConsensusMap.h
OpenMS::Size
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
IdXMLFile.h
OpenMS::DataValue::toString
String toString(bool full_precision=true) const
Conversion to String full_precision Controls number of fractional digits for all double types or list...
OpenMS::IDScoreSwitcherAlgorithm::isScoreType
bool isScoreType(const String &score_name, const ScoreType &type)
Checks if the given score_name is of ScoreType type.
Definition: IDScoreSwitcherAlgorithm.h:69
OpenMS::IDScoreSwitcherAlgorithm::switchToGeneralScoreType
void switchToGeneralScoreType(std::vector< PeptideIdentification > &id, ScoreType type, Size &counter)
Definition: IDScoreSwitcherAlgorithm.h:119
OpenMS::IDScoreSwitcherAlgorithm::findScoreType
String findScoreType(IDType &id, IDScoreSwitcherAlgorithm::ScoreType type)
finds a certain score type in an ID and its metavalues if present, otherwise returns empty string
Definition: IDScoreSwitcherAlgorithm.h:210
OPENMS_LOG_WARN
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:460
OpenMS::ProteinIdentification
Representation of a protein identification run.
Definition: ProteinIdentification.h:70
OpenMS::DefaultParamHandler
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:92
OpenMS::IdXMLFile::load
void load(const String &filename, std::vector< ProteinIdentification > &protein_ids, std::vector< PeptideIdentification > &peptide_ids)
Loads the identifications of an idXML file without identifier.
OpenMS::IDScoreSwitcherAlgorithm::old_score_
String old_score_
Definition: IDScoreSwitcherAlgorithm.h:244
OpenMS::String::chop
String chop(Size n) const
Returns a substring where n characters were removed from the end of the string.
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
Exception.h
OpenMS::DataValue
Class to hold strings, numeric values, lists of strings and lists of numeric values.
Definition: DataValue.h:58
OpenMS::IDScoreSwitcherAlgorithm::switchScores
void switchScores(IDType &id, Size &counter)
Definition: IDScoreSwitcherAlgorithm.h:78
ProteinIdentification.h
OpenMS::IDScoreSwitcherAlgorithm
Definition: IDScoreSwitcherAlgorithm.h:48
OpenMS::String::hasSuffix
bool hasSuffix(const String &string) const
true if String ends with string, false otherwise
OpenMS::ConsensusMap
A container for consensus elements.
Definition: ConsensusMap.h:82
main
int main(int argc, const char **argv)
Definition: INIFileEditor.cpp:71
OpenMS::PeptideIdentification
Represents the peptide hits for a spectrum.
Definition: PeptideIdentification.h:63
OpenMS::Exception::MissingInformation
Not all required information provided.
Definition: Exception.h:186
OPENMS_LOG_INFO
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:465
PeptideIdentification.h
TOPPBase.h
OpenMS::IdXMLFile
Used to load and store idXML files.
Definition: IdXMLFile.h:68