OpenMS  3.0.0
QTClusterFinder.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2022.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Hendrik Weisser $
32 // $Authors: Steffen Sass, Hendrik Weisser $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
43 
44 #include <boost/heap/fibonacci_heap.hpp>
45 #include <unordered_map>
46 
47 #include <list>
48 #include <vector>
49 #include <unordered_set>
50 #include <utility> // for pair<>
51 
52 namespace OpenMS
53 {
54 
104  class OPENMS_DLLAPI QTClusterFinder :
105  public BaseGroupFinder
106  {
107  public:
108 
110  typedef std::unordered_map<
111  std::pair<OpenMS::GridFeature*, OpenMS::GridFeature*>,
112  double> PairDistances;
113 
115  typedef std::unordered_map<
116  const OpenMS::GridFeature*, std::unordered_set<Size> > ElementMapping;
117 
119  typedef boost::heap::fibonacci_heap<QTCluster> Heap;
120 
122 
123  private:
126 
128  bool use_IDs_;
129 
130  //TODO we could also bin by equal sized RT bins, or by a fixed RT size
131  //TODO could be made dependent on nr. of maps (e.g. with 5 maps you get [around] 4 diffs per ID already)
134 
136  double min_score_;
137 
142 
144  double max_diff_rt_;
145 
147  double max_diff_mz_;
148 
151 
154 
156  std::unordered_set<const OpenMS::GridFeature*> already_used_;
157 
160  std::map<double, double> bin_tolerances_;
161 
165  double getDistance_(const OpenMS::GridFeature* left, const
166  OpenMS::GridFeature* right);
167 
169  void setParameters_(double max_intensity, double max_mz);
170 
182  bool makeConsensusFeature_(Heap& cluster_heads,
183  ConsensusFeature& feature,
184  ElementMapping& element_mapping,
185  const Grid& grid,
186  const std::vector<Heap::handle_type>& handles);
187 
197  void computeClustering_(const Grid& grid,
198  Heap& cluster_heads,
199  std::vector<QTCluster::BulkData>& cluster_data,
200  std::vector<Heap::handle_type>& handles,
201  ElementMapping& element_mapping);
202 
209  void removeFromElementMapping_(const QTCluster& cluster,
210  ElementMapping& element_mapping);
211 
221  void createConsensusFeature_(ConsensusFeature& feature, const double quality,
222  const QTCluster::Elements& elements);
223 
242  void updateClustering_(ElementMapping& element_mapping,
243  const Grid& grid,
244  const QTCluster::Elements& elements,
245  Heap& cluster_heads,
246  const std::vector<Heap::handle_type>& handles,
247  Size best_id);
248 
250  template <typename MapType>
251  void run_(const std::vector<MapType>& input_maps, ConsensusMap& result_map);
252 
254  template <typename MapType>
255  void run_internal_(const std::vector<MapType>& input_maps,
256  ConsensusMap& result_map, bool do_progress);
257 
264  void addClusterElements_(const Grid& grid, QTCluster& cluster);
265 
269  bool distIsOutlier_(double dist, double rt);
270 
271 protected:
272 
273  enum
274  {
277  };
278 
279 public:
280 
282  QTClusterFinder();
283 
285  ~QTClusterFinder() override;
286 
288  static const String getProductName()
289  {
290  return "qt";
291  }
292 
300  void run(const std::vector<ConsensusMap>& input_maps,
301  ConsensusMap& result_map) override;
302 
310  void run(const std::vector<FeatureMap>& input_maps,
311  ConsensusMap& result_map);
312 
315  {
316  return new QTClusterFinder();
317  }
318  };
319 } // namespace OpenMS
320 
BaseGroupFinder.h
OpenMS::QTClusterFinder::PairDistances
std::unordered_map< std::pair< OpenMS::GridFeature *, OpenMS::GridFeature * >, double > PairDistances
Distances between pairs of grid features.
Definition: QTClusterFinder.h:112
HashGrid.h
OpenMS::String
A more convenient string class.
Definition: String.h:58
OpenMS::QTClusterFinder::noID_penalty_
double noID_penalty_
Definition: QTClusterFinder.h:141
OpenMS::Size
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
FeatureDistance.h
OpenMS::QTCluster::Elements
std::vector< Element > Elements
Definition: QTCluster.h:129
OpenMS::QTClusterFinder::max_diff_rt_
double max_diff_rt_
Maximum RT difference.
Definition: QTClusterFinder.h:144
OpenMS::GridFeature
Representation of a feature in a hash grid.
Definition: GridFeature.h:52
OpenMS::QTClusterFinder
A variant of QT clustering for the detection of feature groups.
Definition: QTClusterFinder.h:104
QTCluster.h
OpenMS::DIM_UNIT::MZ
m/z
OpenMS::Peak2D::MZ
Mass-to-charge dimension id (1 if used as a const int)
Definition: Peak2D.h:76
OpenMS::QTClusterFinder::Grid
HashGrid< OpenMS::GridFeature * > Grid
Definition: QTClusterFinder.h:121
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
OpenMS::HashGrid
Container for (2-dimensional coordinate, value) pairs.
Definition: HashGrid.h:62
OpenMS::QTClusterFinder::already_used_
std::unordered_set< const OpenMS::GridFeature * > already_used_
Set of features already used.
Definition: QTClusterFinder.h:156
OpenMS::QTCluster
A representation of a QT cluster used for feature grouping.
Definition: QTCluster.h:107
ProgressLogger.h
OpenMS::ConsensusFeature
A consensus feature spanning multiple LC-MS/MS experiments.
Definition: ConsensusFeature.h:69
OpenMS::QTClusterFinder::ElementMapping
std::unordered_map< const OpenMS::GridFeature *, std::unordered_set< Size > > ElementMapping
Map to store which grid features are next to which clusters (saves the clusters ids)
Definition: QTClusterFinder.h:116
OpenMS::QTClusterFinder::create
static BaseGroupFinder * create()
Returns an instance of this class.
Definition: QTClusterFinder.h:314
OpenMS::FeatureDistance
A functor class for the calculation of distances between features or consensus features.
Definition: FeatureDistance.h:89
OpenMS::QTClusterFinder::num_maps_
Size num_maps_
Number of input maps.
Definition: QTClusterFinder.h:125
OpenMS::QTClusterFinder::min_nr_diffs_per_bin_
Size min_nr_diffs_per_bin_
Min. nr. of differences from matched IDs requested to calculate a linking tolerance per RT bin.
Definition: QTClusterFinder.h:133
OpenMS::QTClusterFinder::max_diff_mz_
double max_diff_mz_
Maximum m/z difference.
Definition: QTClusterFinder.h:147
OpenMS::QTClusterFinder::Heap
boost::heap::fibonacci_heap< QTCluster > Heap
Heap to efficiently find the best clusters.
Definition: QTClusterFinder.h:119
OpenMS::ConsensusMap
A container for consensus elements.
Definition: ConsensusMap.h:82
OpenMS::QTClusterFinder::bin_tolerances_
std::map< double, double > bin_tolerances_
Definition: QTClusterFinder.h:160
OpenMS::QTClusterFinder::feature_distance_
FeatureDistance feature_distance_
Feature distance functor.
Definition: QTClusterFinder.h:153
OpenMS::QTClusterFinder::use_IDs_
bool use_IDs_
Consider peptide identifications for grouping?
Definition: QTClusterFinder.h:128
OpenMS::QTClusterFinder::min_score_
double min_score_
Min. score for an ID to be considered for tolerance estimation.
Definition: QTClusterFinder.h:136
OpenMS::Peak2D::RT
Retention time dimension id (0 if used as a const int)
Definition: Peak2D.h:75
OpenMS::BaseGroupFinder
The base class of all element group finding algorithms.
Definition: BaseGroupFinder.h:61
GridFeature.h
OpenMS::DIM_UNIT::RT
RT in seconds.
OpenMS::QTClusterFinder::getProductName
static const String getProductName()
Returns the name of the product.
Definition: QTClusterFinder.h:288
OpenMS::QTClusterFinder::nr_partitions_
int nr_partitions_
Maximum m/z difference.
Definition: QTClusterFinder.h:150