OpenMS  3.0.0
TriqlerFile.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2022.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Timo Sachsenberg $
32 // $Authors: Timo Sachsenberg $
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
39 #include <OpenMS/FORMAT/TextFile.h>
40 
41 #include <map>
42 #include <utility>
43 #include <unordered_map>
44 #include <set>
45 #include <vector>
46 
47 namespace OpenMS
48 {
50  using IndProtGrps = std::vector<IndProtGrp>;
51 
57  class OPENMS_DLLAPI TriqlerFile
58  {
59  public:
61  TriqlerFile() = default;
63  ~TriqlerFile() = default;
64 
66  void storeLFQ(const String& filename,
67  const ConsensusMap &consensus_map,
68  const ExperimentalDesign& design,
69  const StringList& reannotate_filenames,
70  const String& condition);
71 
72  private:
75 
76  static const String na_string_;
77  static const char delim_ = ',';
78  static const char accdelim_ = ';';
79  static const char quote_ = '"';
80 
81  /*
82  * @brief: Struct to aggregate intermediate information from ConsensusFeature and ConsensusMap,
83  * such as filenames, intensities, retention times, labels and features (for further processing)
84  */
86  {
87  std::vector< std::vector< String > > consensus_feature_filenames; //< Filenames of ConsensusFeature
88  std::vector< std::vector< Intensity > > consensus_feature_intensities; //< Intensities of ConsensusFeature
89  std::vector< std::vector< Coordinate > > consensus_feature_retention_times; //< Retention times of ConsensusFeature
90  std::vector< std::vector< unsigned > > consensus_feature_labels; //< Labels of ConsensusFeature
91  std::vector<BaseFeature> features; //<s Features of ConsensusMap
92  };
93 
94  /*
95  * @brief: Aggregates information from ConsensusFeature and ConsensusMap,
96  * such as filenames, intensities, retention times, labels and features.
97  * Stores them in AggregatedConsensusInfo for later processing
98  */
99  TriqlerFile::AggregatedConsensusInfo aggregateInfo_(const ConsensusMap& consensus_map,
100  const std::vector<String>& spectra_paths);
101 
102  /*
103  * @brief: Internal function to check if condition exists in Experimental Design
104  */
105  static void checkConditionLFQ_(const ExperimentalDesign::SampleSection& sampleSection, const String& condition);
106 
107  /*
108  * In OpenMS, a run is split into multiple fractions.
109  */
110  static void assembleRunMap_(
111  std::map< std::pair< String, unsigned>, unsigned> &run_map,
112  const ExperimentalDesign &design);
113 
114  /*
115  * @brief checks two vectors for same content
116  */
117  static bool checkUnorderedContent_(const std::vector< String> &first, const std::vector< String > &second);
118 
119  OpenMS::Peak2D::IntensityType sumIntensity_(const std::set< OpenMS::Peak2D::IntensityType > &intensities) const
120  {
122  for (const OpenMS::Peak2D::IntensityType &intensity : intensities)
123  {
124  result += intensity;
125  }
126  return result;
127  }
128 
129  OpenMS::Peak2D::IntensityType meanIntensity_(const std::set< OpenMS::Peak2D::IntensityType > &intensities) const
130  {
131  return sumIntensity_(intensities) / intensities.size();
132  }
133 
135  {
136  public :
138  const String& run,
139  const String& condition,
140  const String& precursor_charge,
141  const String& search_score,
142  const String& intensity,
143  const String& sequence,
144  const String& accession
145  ): run_(run),
146  condition_(condition),
147  precursor_charge_(precursor_charge),
148  search_score_(search_score),
149  intensity_(intensity),
150  sequence_(sequence),
151  accession_(accession)
152  {}
153 
154  TriqlerLine_(TriqlerLine_&& m) = default;
155 
156  TriqlerLine_(const TriqlerLine_& m) = default;
157 
159  String toString() const;
160 
161  friend bool operator<(const TriqlerLine_ &l,
162  const TriqlerLine_ &r)
163  {
164  return std::tie(l.accession_, l.run_, l.condition_, l.precursor_charge_, l.intensity_, l.sequence_) <
165  std::tie(r.accession_, r.run_, r.condition_, r.precursor_charge_, r.intensity_, r.sequence_);
166  }
167 
168  private:
176  };
177 
178  using MapSequenceToLines_ = std::map<String, std::set<TriqlerLine_>>;
179  /*
180  * @brief Constructs the lines and adds them to the TextFile
181  * @param peptideseq_quantifyable Has to be a set (only) for deterministic ordered output
182  */
183  void constructFile_(TextFile& csv_out,
184  const std::set<String>& peptideseq_quantifyable,
185  const MapSequenceToLines_& peptideseq_to_line) const;
186 
187  /*
188  * @brief Constructs the accession to indist. group mapping
189  */
190  static std::unordered_map<OpenMS::String, const IndProtGrp* > getAccessionToGroupMap_(const IndProtGrps& ind_prots);
191 
192 
193  /*
194  * @brief Based on the evidence accession set in a PeptideHit, checks if is unique and therefore quantifyable
195  * in a group context.
196  *
197  */
198  bool isQuantifyable_(
199  const std::set<String>& accs,
200  const std::unordered_map<String, const IndProtGrp*>& accession_to_group) const;
201 
202  };
203 } // namespace OpenMS
ConsensusXMLFile.h
TriqlerFile.h
MzTabFile.h
OpenMS::TOPPBase
Base class for TOPP applications.
Definition: TOPPBase.h:147
OpenMS::ExperimentalDesign
Representation of an experimental design in OpenMS. Instances can be loaded with the ExperimentalDesi...
Definition: ExperimentalDesign.h:242
OpenMS::ExperimentalDesignFile::load
static ExperimentalDesign load(const String &tsv_file, bool require_spectra_files)
Loads an experimental design from a tabular separated file.
FileHandler.h
OpenMS::TriqlerFile::sumIntensity_
OpenMS::Peak2D::IntensityType sumIntensity_(const std::set< OpenMS::Peak2D::IntensityType > &intensities) const
Definition: TriqlerFile.h:119
TextFile.h
OpenMS::IndProtGrps
std::vector< IndProtGrp > IndProtGrps
Definition: MSstatsFile.h:50
double
OpenMS::toString
const std::string & toString(const DriftTimeUnit value)
ExperimentalDesignFile.h
OpenMS::TriqlerFile::TriqlerLine_::precursor_charge_
String precursor_charge_
Definition: TriqlerFile.h:171
OpenMS::String
A more convenient string class.
Definition: String.h:58
ConsensusMap.h
OpenMS::FileTypes::CONSENSUSXML
OpenMS consensus map format (.consensusXML)
Definition: FileTypes.h:65
OpenMS::TextFile
This class provides some basic file handling methods for text files.
Definition: TextFile.h:46
FeatureXMLFile.h
OpenMS::TriqlerFile::TriqlerLine_::operator<
friend bool operator<(const TriqlerLine_ &l, const TriqlerLine_ &r)
Definition: TriqlerFile.h:161
OpenMS::TriqlerFile::storeLFQ
void storeLFQ(const String &filename, const ConsensusMap &consensus_map, const ExperimentalDesign &design, const StringList &reannotate_filenames, const String &condition)
store label free experiment
OpenMS::TriqlerFile::na_string_
static const String na_string_
Definition: TriqlerFile.h:76
OPENMS_LOG_FATAL_ERROR
#define OPENMS_LOG_FATAL_ERROR
Macro to be used if fatal error are reported (processing stops)
Definition: LogStream.h:450
OpenMS::TriqlerFile
File adapter for Triqler files.
Definition: TriqlerFile.h:57
OpenMS
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:47
OpenMS::TriqlerFile::meanIntensity_
OpenMS::Peak2D::IntensityType meanIntensity_(const std::set< OpenMS::Peak2D::IntensityType > &intensities) const
Definition: TriqlerFile.h:129
OpenMS::TriqlerFile::TriqlerLine_::condition_
String condition_
Definition: TriqlerFile.h:170
OpenMS::TriqlerFile::TriqlerLine_::sequence_
String sequence_
Definition: TriqlerFile.h:174
OpenMS::ExperimentalDesign::SampleSection
Definition: ExperimentalDesign.h:263
OpenMS::TriqlerFile::TriqlerLine_::accession_
String accession_
Definition: TriqlerFile.h:175
OpenMS::FileTypes::Type
Type
Actual file types enum.
Definition: FileTypes.h:56
OpenMS::TriqlerFile::Coordinate
OpenMS::Peak2D::CoordinateType Coordinate
Definition: TriqlerFile.h:74
OpenMS::TriqlerFile::AggregatedConsensusInfo::consensus_feature_retention_times
std::vector< std::vector< Coordinate > > consensus_feature_retention_times
Definition: TriqlerFile.h:89
OpenMS::FileHandler::getType
static FileTypes::Type getType(const String &filename)
Tries to determine the file type (by name or content)
OpenMS::TriqlerFile::TriqlerLine_::run_
String run_
Definition: TriqlerFile.h:169
OpenMS::TriqlerFile::MapSequenceToLines_
std::map< String, std::set< TriqlerLine_ > > MapSequenceToLines_
Definition: TriqlerFile.h:178
MzTab.h
OpenMS::TriqlerFile::TriqlerLine_::TriqlerLine_
TriqlerLine_(const String &run, const String &condition, const String &precursor_charge, const String &search_score, const String &intensity, const String &sequence, const String &accession)
Definition: TriqlerFile.h:137
OpenMS::IndProtGrp
OpenMS::ProteinIdentification::ProteinGroup IndProtGrp
Definition: MSstatsFile.h:49
OpenMS::ConsensusMap
A container for consensus elements.
Definition: ConsensusMap.h:82
OpenMS::StringList
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:70
OpenMS::ConsensusXMLFile::load
void load(const String &filename, ConsensusMap &map)
Loads a consensus map from file and calls updateRanges.
OpenMS::TriqlerFile::TriqlerLine_::intensity_
String intensity_
Definition: TriqlerFile.h:173
main
int main(int argc, const char **argv)
Definition: INIFileEditor.cpp:71
ExperimentalDesign.h
OpenMS::ExperimentalDesign::getSampleSection
const ExperimentalDesign::SampleSection & getSampleSection() const
OpenMS::TriqlerFile::TriqlerLine_
Definition: TriqlerFile.h:134
OpenMS::TriqlerFile::AggregatedConsensusInfo::consensus_feature_intensities
std::vector< std::vector< Intensity > > consensus_feature_intensities
Definition: TriqlerFile.h:88
float
OpenMS::TriqlerFile::AggregatedConsensusInfo::consensus_feature_filenames
std::vector< std::vector< String > > consensus_feature_filenames
Definition: TriqlerFile.h:87
OpenMS::TriqlerFile::TriqlerLine_::search_score_
String search_score_
Definition: TriqlerFile.h:172
OpenMS::TriqlerFile::AggregatedConsensusInfo
Definition: TriqlerFile.h:85
File.h
OpenMS::TriqlerFile::AggregatedConsensusInfo::consensus_feature_labels
std::vector< std::vector< unsigned > > consensus_feature_labels
Definition: TriqlerFile.h:90
OpenMS::TriqlerFile::Intensity
OpenMS::Peak2D::IntensityType Intensity
Definition: TriqlerFile.h:73
TOPPBase.h
OpenMS::TriqlerFile::AggregatedConsensusInfo::features
std::vector< BaseFeature > features
Definition: TriqlerFile.h:91
OpenMS::ConsensusXMLFile
This class provides Input functionality for ConsensusMaps and Output functionality for alignments and...
Definition: ConsensusXMLFile.h:58
OpenMS::ProteinIdentification::ProteinGroup
Bundles multiple (e.g. indistinguishable) proteins in a group.
Definition: ProteinIdentification.h:117