OpenMS  3.0.0
MapAlignmentAlgorithmIdentification.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2022.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Hendrik Weisser $
32 // $Authors: Eva Lange, Clemens Groepl, Hendrik Weisser $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
47 
48 #include <cmath> // for "abs"
49 #include <limits> // for "max"
50 #include <map>
51 
52 namespace OpenMS
53 {
73  public DefaultParamHandler,
74  public ProgressLogger
75  {
76 public:
79 
82 
83  // Set a reference for the alignment
84  template <typename DataType> void setReference(DataType& data)
85  {
86  reference_.clear();
87  if (data.empty()) return; // empty input resets the reference
88  SeqToList rt_data;
89  // set these here because "checkParameters_" may not have been called yet:
90  use_feature_rt_ = param_.getValue("use_feature_rt").toBool();
91  score_cutoff_ = param_.getValue("score_cutoff").toBool();
92  score_type_ = (std::string)param_.getValue("score_type");
93  bool sorted = getRetentionTimes_(data, rt_data);
94  computeMedians_(rt_data, reference_, sorted);
95 
96  if (reference_.empty())
97  {
98  throw Exception::MissingInformation(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Could not extract retention time information from the reference file");
99  }
100  }
101 
111  template <typename DataType>
112  void align(std::vector<DataType>& data,
113  std::vector<TransformationDescription>& transformations,
114  Int reference_index = -1)
115  {
116  checkParameters_(data.size());
117  startProgress(0, 3, "aligning maps");
118 
119  reference_index_ = reference_index;
120  // is reference one of the input files?
121  bool use_internal_reference = (reference_index >= 0);
122  if (use_internal_reference)
123  {
124  if (reference_index >= Int(data.size()))
125  {
126  throw Exception::IndexOverflow(__FILE__, __LINE__,
127  OPENMS_PRETTY_FUNCTION,
128  reference_index, data.size());
129  }
130  setReference(data[reference_index]);
131  }
132 
133  // one set of RT data for each input map, except reference (if any):
134  std::vector<SeqToList> rt_data(data.size() - use_internal_reference);
135  bool all_sorted = true;
136  for (Size i = 0, j = 0; i < data.size(); ++i)
137  {
138  if ((reference_index >= 0) && (i == Size(reference_index)))
139  {
140  continue; // skip reference map, if any
141  }
142  all_sorted &= getRetentionTimes_(data[i], rt_data[j++]);
143  }
144  setProgress(1);
145 
146  computeTransformations_(rt_data, transformations, all_sorted);
147  setProgress(2);
148 
149  setProgress(3);
150  endProgress();
151  }
152 
153 protected:
154 
156  typedef std::map<String, DoubleList> SeqToList;
157 
159  typedef std::map<String, double> SeqToValue;
160 
163 
166 
169 
172 
175 
177  double min_score_;
178 
181 
184 
186  bool (*better_) (double, double) = [](double, double) {return true;};
187 
197  void computeMedians_(SeqToList& rt_data, SeqToValue& medians,
198  bool sorted = false);
199 
208  bool getRetentionTimes_(std::vector<PeptideIdentification>& peptides,
209  SeqToList& rt_data);
210 
219  // "id_data" can't be "const" here or template resolution will fail
220  bool getRetentionTimes_(IdentificationData& id_data, SeqToList& rt_data);
221 
230  bool getRetentionTimes_(PeakMap& experiment, SeqToList& rt_data);
231 
246  template <typename MapType>
247  bool getRetentionTimes_(MapType& features, SeqToList& rt_data)
248  {
249  if (!score_cutoff_)
250  {
251  better_ = [](double, double)
252  {return true;};
253  }
254  else if (features[0].getPeptideIdentifications()[0].isHigherScoreBetter())
255  {
256  better_ = [](double a, double b)
257  { return a >= b; };
258  }
259  else
260  {
261  better_ = [](double a, double b)
262  { return a <= b; };
263  }
264 
265  for (typename MapType::Iterator feat_it = features.begin();
266  feat_it != features.end(); ++feat_it)
267  {
268  if (use_feature_rt_)
269  {
270  // find the peptide ID closest in RT to the feature centroid:
271  String sequence;
272  double rt_distance = std::numeric_limits<double>::max();
273  bool any_hit = false;
274  for (std::vector<PeptideIdentification>::iterator pep_it =
275  feat_it->getPeptideIdentifications().begin(); pep_it !=
276  feat_it->getPeptideIdentifications().end(); ++pep_it)
277  {
278  if (!pep_it->getHits().empty())
279  {
280  any_hit = true;
281  double current_distance = fabs(pep_it->getRT() -
282  feat_it->getRT());
283  if (current_distance < rt_distance)
284  {
285  pep_it->sort();
286  if (better_(pep_it->getHits()[0].getScore(), min_score_))
287  {
288  sequence = pep_it->getHits()[0].getSequence().toString();
289  rt_distance = current_distance;
290  }
291  }
292  }
293  }
294 
295  if (any_hit) rt_data[sequence].push_back(feat_it->getRT());
296  }
297  else
298  {
299  getRetentionTimes_(feat_it->getPeptideIdentifications(), rt_data);
300  }
301  }
302 
303  if (!use_feature_rt_ &&
304  param_.getValue("use_unassigned_peptides").toBool())
305  {
306  getRetentionTimes_(features.getUnassignedPeptideIdentifications(),
307  rt_data);
308  }
309 
310  // remove duplicates (can occur if a peptide ID was assigned to several
311  // features due to overlap or annotation tolerance):
312  for (SeqToList::iterator rt_it = rt_data.begin(); rt_it != rt_data.end();
313  ++rt_it)
314  {
315  DoubleList& rt_values = rt_it->second;
316  sort(rt_values.begin(), rt_values.end());
317  DoubleList::iterator it = unique(rt_values.begin(), rt_values.end());
318  rt_values.resize(it - rt_values.begin());
319  }
320  return true; // RTs were already sorted for duplicate detection
321  }
322 
330  void computeTransformations_(std::vector<SeqToList>& rt_data,
331  std::vector<TransformationDescription>&
332  transforms, bool sorted = false);
333 
341  void checkParameters_(const Size runs);
342 
348  void getReference_();
349 
355  IdentificationData::ScoreTypeRef handleIdDataScoreType_(const IdentificationData& id_data);
356 
357 private:
358 
361 
364 
365  };
366 
367 } // namespace OpenMS
DefaultParamHandler.h
OpenMS::FileTypes::IDXML
OpenMS identification format (.idXML)
Definition: FileTypes.h:64
ConsensusXMLFile.h
OpenMS::ProgressLogger::setProgress
void setProgress(SignedSize value) const
Sets the current progress.
OpenMS::ExperimentalDesign
Representation of an experimental design in OpenMS. Instances can be loaded with the ExperimentalDesi...
Definition: ExperimentalDesign.h:242
OpenMS::MapAlignmentAlgorithmTreeGuided::buildTree
static void buildTree(std::vector< FeatureMap > &feature_maps, std::vector< BinaryTreeNode > &tree, std::vector< std::vector< double >> &maps_ranges)
Extract RTs given for individual features of each map, calculate distances for each pair of maps and ...
OpenMS::ExperimentalDesignFile::load
static ExperimentalDesign load(const String &tsv_file, bool require_spectra_files)
Loads an experimental design from a tabular separated file.
OpenMS::MapAlignmentTransformer::transformRetentionTimes
static void transformRetentionTimes(PeakMap &msexp, const TransformationDescription &trafo, bool store_original_rt=false)
Applies the given transformation to a peak map.
Size
OpenMS::FeatureFileOptions
Options for loading files containing features.
Definition: FeatureFileOptions.h:46
OpenMS::OMSFile
This class supports reading and writing of OMS files.
Definition: OMSFile.h:48
double
OpenMS::Exception::IllegalArgument
A method or algorithm argument contains illegal values.
Definition: Exception.h:648
OpenMS::MapAlignmentAlgorithmIdentification::SeqToValue
std::map< String, double > SeqToValue
Type to store one representative retention time per peptide sequence.
Definition: MapAlignmentAlgorithmIdentification.h:159
OpenMS::MapAlignmentAlgorithmIdentification::min_run_occur_
Size min_run_occur_
Minimum number of runs a peptide must occur in.
Definition: MapAlignmentAlgorithmIdentification.h:168
ExperimentalDesignFile.h
OpenMS::DataProcessing::ALIGNMENT
Retention time alignment of different maps.
Definition: DataProcessing.h:68
OpenMS::ExperimentalDesign::getFractionToMSFilesMapping
std::map< unsigned int, std::vector< String > > getFractionToMSFilesMapping() const
return fraction index to file paths (ordered by fraction_group)
OpenMS::MzMLFile::store
void store(const String &filename, const PeakMap &map) const
Stores a map in an MzML file.
OpenMS::FileTypes::MZML
MzML file (.mzML)
Definition: FileTypes.h:70
OpenMS::MzMLFile
File adapter for MzML files.
Definition: MzMLFile.h:57
OpenMS::String
A more convenient string class.
Definition: String.h:58
OpenMS::MSExperiment::begin
Iterator begin()
Definition: MSExperiment.h:150
MzMLFile.h
OpenMS::DoubleList
std::vector< double > DoubleList
Vector of double precision real types.
Definition: ListUtils.h:62
Int
ConsensusMap.h
OpenMS::MSExperiment
In-Memory representation of a mass spectrometry run.
Definition: MSExperiment.h:70
OpenMS::FileTypes::CONSENSUSXML
OpenMS consensus map format (.consensusXML)
Definition: FileTypes.h:65
OpenMS::Size
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
IdXMLFile.h
FeatureXMLFile.h
ClusterAnalyzer.h
MapAlignmentAlgorithmTreeGuided.h
OpenMS::ProgressLogger::startProgress
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
OpenMS::MapAlignmentAlgorithmIdentification::setReference
void setReference(DataType &data)
Definition: MapAlignmentAlgorithmIdentification.h:84
TransformationDescription.h
OpenMS::MapAlignmentAlgorithmIdentification::getRetentionTimes_
bool getRetentionTimes_(MapType &features, SeqToList &rt_data)
Collect retention time data from peptide IDs contained in feature maps or consensus maps.
Definition: MapAlignmentAlgorithmIdentification.h:247
OPENMS_LOG_WARN
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged.
Definition: LogStream.h:460
MapAlignmentAlgorithmSpectrumAlignment.h
OpenMS::ProgressLogger::endProgress
void endProgress() const
Ends the progress display.
ListUtils.h
OpenMS::Exception::WrongParameterType
A parameter was accessed with the wrong type.
Definition: TOPPBase.h:102
OpenMS::DefaultParamHandler
A base class for all classes handling default parameters.
Definition: DefaultParamHandler.h:92
OpenMS::TransformationDescription::fitModel
void fitModel(const String &model_type, const Param &params=Param())
Fits a model to the data.
OpenMS::DateTime::now
static DateTime now()
Returns the current date and time.
OpenMS::IdXMLFile::load
void load(const String &filename, std::vector< ProteinIdentification > &protein_ids, std::vector< PeptideIdentification > &peptide_ids)
Loads the identifications of an idXML file without identifier.
OpenMS::MapAlignmentAlgorithmIdentification::use_feature_rt_
bool use_feature_rt_
Use feature RT instead of RT from best peptide ID in the feature?
Definition: MapAlignmentAlgorithmIdentification.h:171
OpenMS::Int
int Int
Signed integer type.
Definition: Types.h:102
OMSFile.h
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
OpenMS::MSExperiment::Iterator
std::vector< SpectrumType >::iterator Iterator
Mutable iterator.
Definition: MSExperiment.h:104
OpenMS::MSExperiment::getSize
UInt64 getSize() const
returns the total number of peaks
OpenMS::MapAlignmentAlgorithmSpectrumAlignment::align
virtual void align(std::vector< PeakMap > &, std::vector< TransformationDescription > &)
Align peak maps.
OpenMS::IdentificationDataInternal::ProcessingSoftware
Information about software used for data processing.
Definition: ProcessingSoftware.h:48
OpenMS::FileTypes::FEATUREXML
OpenMS feature file (.featureXML)
Definition: FileTypes.h:63
OpenMS::ProgressLogger
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:52
OpenMS::MapAlignmentAlgorithmIdentification::min_score_
double min_score_
Minimum score to reach for a peptide to be considered.
Definition: MapAlignmentAlgorithmIdentification.h:177
ProgressLogger.h
OpenMS::MzMLFile::load
void load(const String &filename, PeakMap &map)
Loads a map from a MzML file. Spectra and chromatograms are sorted by default (this can be disabled u...
OpenMS::ClusterAnalyzer
Bundles analyzing tools for a clustering (given as sequence of BinaryTreeNode's)
Definition: ClusterAnalyzer.h:51
TransformationXMLFile.h
OpenMS::FeatureXMLFile::loadSize
Size loadSize(const String &filename)
OpenMS::IdentificationDataInternal::IteratorWrapper< ProcessingSoftwares::iterator >
OpenMS::FeatureXMLFile::load
void load(const String &filename, FeatureMap &feature_map)
loads the file with name filename into map and calls updateRanges().
FeatureMap.h
OpenMS::FileTypes::Type
Type
Actual file types enum.
Definition: FileTypes.h:56
OpenMS::MapAlignmentAlgorithmPoseClustering::align
void align(const FeatureMap &map, TransformationDescription &trafo)
MapAlignmentAlgorithmIdentification.h
OpenMS::MapAlignmentAlgorithmIdentification::align
void align(std::vector< DataType > &data, std::vector< TransformationDescription > &transformations, Int reference_index=-1)
Align feature maps, consensus maps, peak maps, or peptide identifications.
Definition: MapAlignmentAlgorithmIdentification.h:112
OpenMS::FeatureXMLFile::setOptions
void setOptions(const FeatureFileOptions &)
setter for options for loading/storing
OpenMS::FileHandler::getType
static FileTypes::Type getType(const String &filename)
Tries to determine the file type (by name or content)
OpenMS::MapAlignmentAlgorithmIdentification::score_type_
String score_type_
Score type to use for filtering.
Definition: MapAlignmentAlgorithmIdentification.h:183
OpenMS::MapAlignmentAlgorithmIdentification::use_adducts_
bool use_adducts_
Consider differently adducted IDs as different?
Definition: MapAlignmentAlgorithmIdentification.h:174
OpenMS::MapAlignmentAlgorithmPoseClustering
A map alignment algorithm based on pose clustering.
Definition: MapAlignmentAlgorithmPoseClustering.h:70
OpenMS::DefaultParamHandler::setParameters
void setParameters(const Param &param)
Sets the parameters.
OpenMS::DefaultParamHandler::getParameters
const Param & getParameters() const
Non-mutable access to the parameters.
OpenMS::MapAlignmentAlgorithmIdentification::score_cutoff_
bool score_cutoff_
Actually use the above defined score_cutoff? Needed since it is hard to define a non-cutting score fo...
Definition: MapAlignmentAlgorithmIdentification.h:180
OpenMS::MapAlignmentAlgorithmSpectrumAlignment
A map alignment algorithm based on spectrum similarity (dynamic programming).
Definition: MapAlignmentAlgorithmSpectrumAlignment.h:54
OpenMS::ConsensusMap
A container for consensus elements.
Definition: ConsensusMap.h:82
OpenMS::StringList
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:70
OpenMS::FeatureXMLFile::store
void store(const String &filename, const FeatureMap &feature_map)
stores the map feature_map in file with name filename.
OpenMS::Exception::BaseException::getName
const char * getName() const noexcept
Returns the name of the exception.
OpenMS::ConsensusXMLFile::load
void load(const String &filename, ConsensusMap &map)
Loads a consensus map from file and calls updateRanges.
OpenMS::MapAlignmentAlgorithmIdentification::SeqToList
std::map< String, DoubleList > SeqToList
Type to store retention times given for individual peptide sequences.
Definition: MapAlignmentAlgorithmIdentification.h:156
OpenMS::MapAlignmentAlgorithmIdentification
A map alignment algorithm based on peptide identifications from MS2 spectra.
Definition: MapAlignmentAlgorithmIdentification.h:72
OpenMS::Exception::IndexOverflow
Int overflow exception.
Definition: Exception.h:245
OpenMS::ParamValue::toString
std::string toString(bool full_precision=true) const
Convert ParamValue to string.
main
int main(int argc, const char **argv)
Definition: INIFileEditor.cpp:71
ExperimentalDesign.h
OpenMS::MapAlignmentAlgorithmIdentification::reference_index_
Int reference_index_
Index of input file to use as reference (if any)
Definition: MapAlignmentAlgorithmIdentification.h:162
MSExperiment.h
OpenMS::IdentificationData
Representation of spectrum identification results and associated data.
Definition: IdentificationData.h:94
OpenMS::MapAlignmentAlgorithmIdentification::reference_
SeqToValue reference_
Reference retention times (per peptide sequence)
Definition: MapAlignmentAlgorithmIdentification.h:165
OpenMS::FeatureFileOptions::setLoadSubordinates
void setLoadSubordinates(bool sub)
OPENMS_LOG_ERROR
#define OPENMS_LOG_ERROR
Macro to be used if non-fatal error are reported (processing continues)
Definition: LogStream.h:455
OpenMS::FeatureMap
A container for features.
Definition: FeatureMap.h:98
OpenMS::TransformationXMLFile
Used to load and store TransformationXML files.
Definition: TransformationXMLFile.h:56
OpenMS::Param::copy
Param copy(const std::string &prefix, bool remove_prefix=false) const
Returns a new Param object containing all entries that start with prefix.
OpenMS::MSExperiment::end
Iterator end()
Definition: MSExperiment.h:160
MapAlignmentAlgorithmPoseClustering.h
OpenMS::IdentificationDataInternal::InputFile
Information about input files that were processed.
Definition: include/OpenMS/METADATA/ID/InputFile.h:50
OpenMS::FeatureXMLFile
This class provides Input/Output functionality for feature maps.
Definition: FeatureXMLFile.h:62
OpenMS::MapAlignmentAlgorithmTreeGuided::computeTrafosByOriginalRT
void computeTrafosByOriginalRT(std::vector< FeatureMap > &feature_maps, FeatureMap &map_transformed, std::vector< TransformationDescription > &transformations, const std::vector< Size > &trafo_order)
Extract original RT ("original_RT" MetaInfo) and transformed RT for each feature to compute RT transf...
OpenMS::IdentificationDataInternal::ProcessingStep
Data processing step that is applied to the data (e.g. database search, PEP calculation,...
Definition: ProcessingStep.h:47
OpenMS::MSExperiment::updateRanges
void updateRanges() override
OpenMS::MapAlignmentAlgorithmTreeGuided
A map alignment algorithm based on peptide identifications from MS2 spectra.
Definition: MapAlignmentAlgorithmTreeGuided.h:70
MapAlignerBase.h
OpenMS::Param
Management and storage of parameters / INI files.
Definition: Param.h:69
IdentificationData.h
OpenMS::TransformationXMLFile::store
void store(String filename, const TransformationDescription &transformation)
Stores the data in an TransformationXML file.
OpenMS::Exception::MissingInformation
Not all required information provided.
Definition: Exception.h:186
OPENMS_LOG_INFO
#define OPENMS_LOG_INFO
Macro if a information, e.g. a status should be reported.
Definition: LogStream.h:465
OpenMS::FeatureFileOptions::setLoadConvexHull
void setLoadConvexHull(bool convex)
PeptideIdentification.h
OpenMS::MapAlignmentAlgorithmTreeGuided::computeTransformedFeatureMaps
static void computeTransformedFeatureMaps(std::vector< FeatureMap > &feature_maps, const std::vector< TransformationDescription > &transformations)
Apply transformations on input maps.
OpenMS::Param::getValue
const ParamValue & getValue(const std::string &key) const
Returns a value of a parameter.
OpenMS::TransformationDescription
Generic description of a coordinate transformation.
Definition: TransformationDescription.h:62
OpenMS::MapAlignmentAlgorithmPoseClustering::setReference
void setReference(const MapType &map)
Sets the reference for the alignment.
Definition: MapAlignmentAlgorithmPoseClustering.h:87
StandardTypes.h
OpenMS::DateTime
DateTime Class.
Definition: DateTime.h:58
OpenMS::FeatureXMLFile::getOptions
FeatureFileOptions & getOptions()
Mutable access to the options for loading/storing.
OpenMS::OMSFile::load
void load(const String &filename, IdentificationData &id_data)
Read in a OMS file and construct an IdentificationData object.
OpenMS::ProgressLogger::setLogType
void setLogType(LogType type) const
Sets the progress log that should be used. The default type is NONE!
OpenMS::ExperimentalDesign::sameNrOfMSFilesPerFraction
bool sameNrOfMSFilesPerFraction() const
OpenMS::ConsensusXMLFile
This class provides Input functionality for ConsensusMaps and Output functionality for alignments and...
Definition: ConsensusXMLFile.h:58
OpenMS::IdXMLFile
Used to load and store idXML files.
Definition: IdXMLFile.h:68
OpenMS::ClusterAnalyzer::newickTree
String newickTree(const std::vector< BinaryTreeNode > &tree, const bool include_distance=false)
Returns the hierarchy described by a clustering tree as Newick-String.
OpenMS::FileTypes::OMS
OpenMS database file.
Definition: FileTypes.h:114
OpenMS::MapAlignmentAlgorithmTreeGuided::treeGuidedAlignment
void treeGuidedAlignment(const std::vector< BinaryTreeNode > &tree, std::vector< FeatureMap > &feature_maps_transformed, std::vector< std::vector< double >> &maps_ranges, FeatureMap &map_transformed, std::vector< Size > &trafo_order)
Align feature maps tree guided using align() of OpenMS::MapAlignmentAlgorithmIdentification and use T...