OpenMS  3.0.0
GaussFilterAlgorithm.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2022.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Hannes Roest $
32 // $Authors: Eva Lange $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
37 #include <OpenMS/CONCEPT/Types.h>
40 #include <cmath>
41 #include <vector>
42 
43 namespace OpenMS
44 {
67 // #define DEBUG_FILTERING
68 
69  class OPENMS_DLLAPI GaussFilterAlgorithm
70  {
71 public:
74 
76  virtual ~GaussFilterAlgorithm();
77 
82  {
83  // create new arrays for mz / intensity data and set their size
86  mz_array->data.resize(spectrum->getMZArray()->data.size());
87  intensity_array->data.resize(spectrum->getMZArray()->data.size());
88 
89  // apply the filter
90  bool ret_val = filter(
91  spectrum->getMZArray()->data.begin(),
92  spectrum->getMZArray()->data.end(),
93  spectrum->getIntensityArray()->data.begin(),
94  mz_array->data.begin(), intensity_array->data.begin()
95  );
96  // set the data of the spectrum to the new mz / int arrays
97  spectrum->setMZArray(mz_array);
98  spectrum->setIntensityArray(intensity_array);
99  return ret_val;
100  }
101 
106  {
107  // create new arrays for rt / intensity data and set their size
110  rt_array->data.resize(chromatogram->getTimeArray()->data.size());
111  intensity_array->data.resize(chromatogram->getTimeArray()->data.size());
112 
113  // apply the filter
114  bool ret_val = filter(
115  chromatogram->getTimeArray()->data.begin(),
116  chromatogram->getTimeArray()->data.end(),
117  chromatogram->getIntensityArray()->data.begin(),
118  rt_array->data.begin(), intensity_array->data.begin()
119  );
120  // set the data of the chromatogram to the new rt / int arrays
121  chromatogram->setTimeArray(rt_array);
122  chromatogram->setIntensityArray(intensity_array);
123  return ret_val;
124  }
125 
131  template <typename ConstIterT, typename IterT>
132  bool filter(
133  ConstIterT mz_in_start,
134  ConstIterT mz_in_end,
135  ConstIterT int_in_start,
136  IterT mz_out,
137  IterT int_out)
138  {
139  bool found_signal = false;
140 
141  ConstIterT mz_it = mz_in_start;
142  ConstIterT int_it = int_in_start;
143  for (; mz_it != mz_in_end; mz_it++, int_it++)
144  {
145  // if ppm tolerance is used, calculate a reasonable width value for this m/z
146  if (use_ppm_tolerance_)
147  {
148  initialize((*mz_it) * ppm_tolerance_ * 10e-6, spacing_, ppm_tolerance_, use_ppm_tolerance_ );
149  }
150 
151  double new_int = integrate_(mz_it, int_it, mz_in_start, mz_in_end);
152 
153  // store new intensity and m/z into output iterator
154  *mz_out = *mz_it;
155  *int_out = new_int;
156  ++mz_out;
157  ++int_out;
158 
159  if (fabs(new_int) > 0) found_signal = true;
160  }
161  return found_signal;
162  }
163 
164  void initialize(double gaussian_width, double spacing, double ppm_tolerance, bool use_ppm_tolerance);
165 
166 protected:
167 
169  std::vector<double> coeffs_;
171  double sigma_;
173  double spacing_;
174 
175  // tolerance in ppm
178 
180  template <typename InputPeakIterator>
181  double integrate_(InputPeakIterator x /* mz */, InputPeakIterator y /* int */, InputPeakIterator first, InputPeakIterator last)
182  {
183  double v = 0.;
184  // norm the gaussian kernel area to one
185  double norm = 0.;
186  Size middle = coeffs_.size();
187 
188  double start_pos = (( (*x) - (middle * spacing_)) > (*first)) ? ((*x) - (middle * spacing_)) : (*first);
189  double end_pos = (( (*x) + (middle * spacing_)) < (*(last - 1))) ? ((*x) + (middle * spacing_)) : (*(last - 1));
190 
191  InputPeakIterator help_x = x;
192  InputPeakIterator help_y = y;
193 #ifdef DEBUG_FILTERING
194 
195  std::cout << "integrate from middle to start_pos " << *help_x << " until " << start_pos << std::endl;
196 #endif
197 
198  //integrate from middle to start_pos
199  while ((help_x != first) && (*(help_x - 1) > start_pos))
200  {
201  // search for the corresponding datapoint of help in the gaussian (take the left most adjacent point)
202  double distance_in_gaussian = fabs(*x - *help_x);
203  Size left_position = (Size)floor(distance_in_gaussian / spacing_);
204 
205  // search for the true left adjacent data point (because of rounding errors)
206  for (int j = 0; ((j < 3) && (distance(first, help_x - j) >= 0)); ++j)
207  {
208  if (((left_position - j) * spacing_ <= distance_in_gaussian) && ((left_position - j + 1) * spacing_ >= distance_in_gaussian))
209  {
210  left_position -= j;
211  break;
212  }
213 
214  if (((left_position + j) * spacing_ < distance_in_gaussian) && ((left_position + j + 1) * spacing_ < distance_in_gaussian))
215  {
216  left_position += j;
217  break;
218  }
219  }
220 
221  // interpolate between the left and right data points in the gaussian to get the true value at position distance_in_gaussian
222  Size right_position = left_position + 1;
223  double d = fabs((left_position * spacing_) - distance_in_gaussian) / spacing_;
224  // check if the right data point in the gaussian exists
225  double coeffs_right = (right_position < middle) ? (1 - d) * coeffs_[left_position] + d * coeffs_[right_position]
226  : coeffs_[left_position];
227 #ifdef DEBUG_FILTERING
228 
229  std::cout << "distance_in_gaussian " << distance_in_gaussian << std::endl;
230  std::cout << " right_position " << right_position << std::endl;
231  std::cout << " left_position " << left_position << std::endl;
232  std::cout << "coeffs_ at left_position " << coeffs_[left_position] << std::endl;
233  std::cout << "coeffs_ at right_position " << coeffs_[right_position] << std::endl;
234  std::cout << "interpolated value left " << coeffs_right << std::endl;
235 #endif
236 
237 
238  // search for the corresponding datapoint for (help-1) in the gaussian (take the left most adjacent point)
239  distance_in_gaussian = fabs((*x) - (*(help_x - 1)));
240  left_position = (Size)floor(distance_in_gaussian / spacing_);
241 
242  // search for the true left adjacent data point (because of rounding errors)
243  for (UInt j = 0; ((j < 3) && (distance(first, help_x - j) >= 0)); ++j)
244  {
245  if (((left_position - j) * spacing_ <= distance_in_gaussian) && ((left_position - j + 1) * spacing_ >= distance_in_gaussian))
246  {
247  left_position -= j;
248  break;
249  }
250 
251  if (((left_position + j) * spacing_ < distance_in_gaussian) && ((left_position + j + 1) * spacing_ < distance_in_gaussian))
252  {
253  left_position += j;
254  break;
255  }
256  }
257 
258  // start the interpolation for the true value in the gaussian
259  right_position = left_position + 1;
260  d = fabs((left_position * spacing_) - distance_in_gaussian) / spacing_;
261  double coeffs_left = (right_position < middle) ? (1 - d) * coeffs_[left_position] + d * coeffs_[right_position]
262  : coeffs_[left_position];
263 #ifdef DEBUG_FILTERING
264 
265  std::cout << " help_x-1 " << *(help_x - 1) << " distance_in_gaussian " << distance_in_gaussian << std::endl;
266  std::cout << " right_position " << right_position << std::endl;
267  std::cout << " left_position " << left_position << std::endl;
268  std::cout << "coeffs_ at left_position " << coeffs_[left_position] << std::endl;
269  std::cout << "coeffs_ at right_position " << coeffs_[right_position] << std::endl;
270  std::cout << "interpolated value right " << coeffs_left << std::endl;
271 
272  std::cout << " intensity " << fabs(*(help_x - 1) - (*help_x)) / 2. << " * " << *(help_y - 1) << " * " << coeffs_left << " + " << *help_y << "* " << coeffs_right
273  << std::endl;
274 #endif
275 
276 
277  norm += fabs((*(help_x - 1)) - (*help_x)) / 2. * (coeffs_left + coeffs_right);
278 
279  v += fabs((*(help_x - 1)) - (*help_x)) / 2. * (*(help_y - 1) * coeffs_left + (*help_y) * coeffs_right);
280  --help_x;
281  --help_y;
282  }
283 
284 
285  //integrate from middle to end_pos
286  help_x = x;
287  help_y = y;
288 #ifdef DEBUG_FILTERING
289 
290  std::cout << "integrate from middle to endpos " << *help_x << " until " << end_pos << std::endl;
291 #endif
292 
293  while ((help_x != (last - 1)) && (*(help_x + 1) < end_pos))
294  {
295  // search for the corresponding datapoint for help in the gaussian (take the left most adjacent point)
296  double distance_in_gaussian = fabs((*x) - (*help_x));
297  int left_position = (UInt)floor(distance_in_gaussian / spacing_);
298 
299  // search for the true left adjacent data point (because of rounding errors)
300  for (int j = 0; ((j < 3) && (distance(help_x + j, last - 1) >= 0)); ++j)
301  {
302  if (((left_position - j) * spacing_ <= distance_in_gaussian) && ((left_position - j + 1) * spacing_ >= distance_in_gaussian))
303  {
304  left_position -= j;
305  break;
306  }
307 
308  if (((left_position + j) * spacing_ < distance_in_gaussian) && ((left_position + j + 1) * spacing_ < distance_in_gaussian))
309  {
310  left_position += j;
311  break;
312  }
313  }
314  // start the interpolation for the true value in the gaussian
315  Size right_position = left_position + 1;
316  double d = fabs((left_position * spacing_) - distance_in_gaussian) / spacing_;
317  double coeffs_left = (right_position < middle) ? (1 - d) * coeffs_[left_position] + d * coeffs_[right_position]
318  : coeffs_[left_position];
319 
320 #ifdef DEBUG_FILTERING
321 
322  std::cout << " help " << *help_x << " distance_in_gaussian " << distance_in_gaussian << std::endl;
323  std::cout << " left_position " << left_position << std::endl;
324  std::cout << "coeffs_ at right_position " << coeffs_[left_position] << std::endl;
325  std::cout << "coeffs_ at left_position " << coeffs_[right_position] << std::endl;
326  std::cout << "interpolated value left " << coeffs_left << std::endl;
327 #endif
328 
329  // search for the corresponding datapoint for (help+1) in the gaussian (take the left most adjacent point)
330  distance_in_gaussian = fabs((*x) - (*(help_x + 1)));
331  left_position = (UInt)floor(distance_in_gaussian / spacing_);
332 
333  // search for the true left adjacent data point (because of rounding errors)
334  for (int j = 0; ((j < 3) && (distance(help_x + j, last - 1) >= 0)); ++j)
335  {
336  if (((left_position - j) * spacing_ <= distance_in_gaussian) && ((left_position - j + 1) * spacing_ >= distance_in_gaussian))
337  {
338  left_position -= j;
339  break;
340  }
341 
342  if (((left_position + j) * spacing_ < distance_in_gaussian) && ((left_position + j + 1) * spacing_ < distance_in_gaussian))
343  {
344  left_position += j;
345  break;
346  }
347  }
348 
349  // start the interpolation for the true value in the gaussian
350  right_position = left_position + 1;
351  d = fabs((left_position * spacing_) - distance_in_gaussian) / spacing_;
352  double coeffs_right = (right_position < middle) ? (1 - d) * coeffs_[left_position] + d * coeffs_[right_position]
353  : coeffs_[left_position];
354 #ifdef DEBUG_FILTERING
355 
356  std::cout << " (help + 1) " << *(help_x + 1) << " distance_in_gaussian " << distance_in_gaussian << std::endl;
357  std::cout << " left_position " << left_position << std::endl;
358  std::cout << "coeffs_ at right_position " << coeffs_[left_position] << std::endl;
359  std::cout << "coeffs_ at left_position " << coeffs_[right_position] << std::endl;
360  std::cout << "interpolated value right " << coeffs_right << std::endl;
361 
362  std::cout << " intensity " << fabs(*help_x - *(help_x + 1)) / 2.
363  << " * " << *help_y << " * " << coeffs_left << " + " << *(help_y + 1)
364  << "* " << coeffs_right
365  << std::endl;
366 #endif
367  norm += fabs((*help_x) - (*(help_x + 1)) ) / 2. * (coeffs_left + coeffs_right);
368 
369  v += fabs((*help_x) - (*(help_x + 1)) ) / 2. * ((*help_y) * coeffs_left + (*(help_y + 1)) * coeffs_right);
370  ++help_x;
371  ++help_y;
372  }
373 
374  if (v > 0)
375  {
376  return v / norm;
377  }
378  else
379  {
380  return 0;
381  }
382  }
383 
384  };
385 
386 } // namespace OpenMS
OpenMS::Interfaces::SpectrumPtr
boost::shared_ptr< Spectrum > SpectrumPtr
Definition: openms/include/OpenMS/INTERFACES/DataStructures.h:236
DataStructures.h
Types.h
OpenSwath::norm
double norm(T beg, T end)
compute the Euclidean norm of the vector
Definition: StatsHelpers.h:57
OpenMS::Size
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
OpenMS::GaussFilterAlgorithm::use_ppm_tolerance_
bool use_ppm_tolerance_
Definition: GaussFilterAlgorithm.h:176
Constants.h
OpenMS::Interfaces::BinaryDataArrayPtr
boost::shared_ptr< BinaryDataArray > BinaryDataArrayPtr
Definition: openms/include/OpenMS/INTERFACES/DataStructures.h:80
OpenMS::Interfaces::ChromatogramPtr
boost::shared_ptr< Chromatogram > ChromatogramPtr
Definition: openms/include/OpenMS/INTERFACES/DataStructures.h:156
OpenMS::GaussFilterAlgorithm::filter
bool filter(OpenMS::Interfaces::ChromatogramPtr chromatogram)
Smoothes an Chromatogram containing profile data.
Definition: GaussFilterAlgorithm.h:105
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
OpenMS::GaussFilterAlgorithm::sigma_
double sigma_
The standard derivation .
Definition: GaussFilterAlgorithm.h:171
OpenMS::GaussFilterAlgorithm::filter
bool filter(ConstIterT mz_in_start, ConstIterT mz_in_end, ConstIterT int_in_start, IterT mz_out, IterT int_out)
Smoothes an two data arrays containing data.
Definition: GaussFilterAlgorithm.h:132
OpenMS::GaussFilterAlgorithm::coeffs_
std::vector< double > coeffs_
Coefficients.
Definition: GaussFilterAlgorithm.h:169
OpenMS::UInt
unsigned int UInt
Unsigned integer type.
Definition: Types.h:94
OpenMS::GaussFilterAlgorithm::ppm_tolerance_
double ppm_tolerance_
Definition: GaussFilterAlgorithm.h:177
OpenMS::GaussFilterAlgorithm
This class represents a Gaussian lowpass-filter which works on uniform as well as on non-uniform prof...
Definition: GaussFilterAlgorithm.h:69
OpenMS::Interfaces::BinaryDataArray
The datastructures used by the OpenSwath interfaces.
Definition: openms/include/OpenMS/INTERFACES/DataStructures.h:72
OpenMS::GaussFilterAlgorithm::filter
bool filter(OpenMS::Interfaces::SpectrumPtr spectrum)
Smoothes an Spectrum containing profile data.
Definition: GaussFilterAlgorithm.h:81
OpenMS::GaussFilterAlgorithm::integrate_
double integrate_(InputPeakIterator x, InputPeakIterator y, InputPeakIterator first, InputPeakIterator last)
Computes the convolution of the raw data at position x and the gaussian kernel.
Definition: GaussFilterAlgorithm.h:181
OpenMS::GaussFilterAlgorithm::spacing_
double spacing_
The spacing of the pre-tabulated kernel coefficients.
Definition: GaussFilterAlgorithm.h:173