IsoSpec  2.1.2
isoSpec++.cpp
1 /*
2  * Copyright (C) 2015-2020 Mateusz Łącki and Michał Startek.
3  *
4  * This file is part of IsoSpec.
5  *
6  * IsoSpec is free software: you can redistribute it and/or modify
7  * it under the terms of the Simplified ("2-clause") BSD licence.
8  *
9  * IsoSpec is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12  *
13  * You should have received a copy of the Simplified BSD Licence
14  * along with IsoSpec. If not, see <https://opensource.org/licenses/BSD-2-Clause>.
15  */
16 
17 
18 #include "isoSpec++.h"
19 #include <cmath>
20 #include <algorithm>
21 #include <vector>
22 #include <cstdlib>
23 #include <cstring>
24 #include <unordered_map>
25 #include <queue>
26 #include <utility>
27 #include <iostream>
28 #include <iomanip>
29 #include <stdexcept>
30 #include <string>
31 #include <limits>
32 #include <memory>
33 #include <cassert>
34 #include <cctype>
35 #include "platform.h"
36 #include "conf.h"
37 #include "dirtyAllocator.h"
38 #include "operators.h"
39 #include "summator.h"
40 #include "marginalTrek++.h"
41 #include "misc.h"
42 #include "element_tables.h"
43 #include "fasta.h"
44 
45 
46 
47 namespace IsoSpec
48 {
49 
50 Iso::Iso() :
51 disowned(false),
52 dimNumber(0),
53 isotopeNumbers(new int[0]),
54 atomCounts(new int[0]),
55 confSize(0),
56 allDim(0),
57 marginals(new Marginal*[0])
58 {}
59 
60 
61 Iso::Iso(
62  int _dimNumber,
63  const int* _isotopeNumbers,
64  const int* _atomCounts,
65  const double* const * _isotopeMasses,
66  const double* const * _isotopeProbabilities
67 ) :
68 disowned(false),
69 dimNumber(_dimNumber),
70 isotopeNumbers(array_copy<int>(_isotopeNumbers, _dimNumber)),
71 atomCounts(array_copy<int>(_atomCounts, _dimNumber)),
72 confSize(_dimNumber * sizeof(int)),
73 allDim(0),
74 marginals(nullptr)
75 {
76  for(int ii = 0; ii < dimNumber; ++ii)
77  allDim += isotopeNumbers[ii];
78 
79  std::unique_ptr<double[]> masses(new double[allDim]);
80  std::unique_ptr<double[]> probs(new double[allDim]);
81  size_t idx = 0;
82 
83  for(int ii = 0; ii < dimNumber; ++ii)
84  for(int jj = 0; jj < isotopeNumbers[ii]; ++jj)
85  {
86  masses[idx] = _isotopeMasses[ii][jj];
87  probs[idx] = _isotopeProbabilities[ii][jj];
88  ++idx;
89  }
90 
91  allDim = 0; // setupMarginals will recalculate it, assuming it's set to 0
92 
93  try{
94  setupMarginals(masses.get(), probs.get());
95  }
96  catch(...)
97  {
98  delete[] isotopeNumbers;
99  delete[] atomCounts;
100  // Since we're throwing in a constructor, the destructor won't run, and we don't need to NULL these.
101  // However, this is not the fast code path and we can afford two unneeded instructions to keep
102  // some static analysis tools happy.
103  isotopeNumbers = nullptr;
104  atomCounts = nullptr;
105  throw;
106  }
107 }
108 
109 Iso::Iso(
110  int _dimNumber,
111  const int* _isotopeNumbers,
112  const int* _atomCounts,
113  const double* _isotopeMasses,
114  const double* _isotopeProbabilities
115 ) :
116 disowned(false),
117 dimNumber(_dimNumber),
118 isotopeNumbers(array_copy<int>(_isotopeNumbers, _dimNumber)),
119 atomCounts(array_copy<int>(_atomCounts, _dimNumber)),
120 confSize(_dimNumber * sizeof(int)),
121 allDim(0),
122 marginals(nullptr)
123 {
124  try{
125  setupMarginals(_isotopeMasses, _isotopeProbabilities);
126  }
127  catch(...)
128  {
129  delete[] isotopeNumbers;
130  delete[] atomCounts;
131  // Since we're throwing in a constructor, the destructor won't run, and we don't need to NULL these.
132  // However, this is not the fast code path and we can afford two unneeded instructions to keep
133  // some static analysis tools happy.
134  isotopeNumbers = nullptr;
135  atomCounts = nullptr;
136  throw;
137  }
138 }
139 
140 Iso::Iso(Iso&& other) :
141 disowned(other.disowned),
142 dimNumber(other.dimNumber),
143 isotopeNumbers(other.isotopeNumbers),
144 atomCounts(other.atomCounts),
145 confSize(other.confSize),
146 allDim(other.allDim),
147 marginals(other.marginals)
148 {
149  other.disowned = true;
150 }
151 
152 
153 Iso::Iso(const Iso& other, bool fullcopy) :
154 disowned(!fullcopy),
155 dimNumber(other.dimNumber),
156 isotopeNumbers(fullcopy ? array_copy<int>(other.isotopeNumbers, dimNumber) : other.isotopeNumbers),
157 atomCounts(fullcopy ? array_copy<int>(other.atomCounts, dimNumber) : other.atomCounts),
158 confSize(other.confSize),
159 allDim(other.allDim),
160 marginals(fullcopy ? new Marginal*[dimNumber] : other.marginals)
161 {
162  if(fullcopy)
163  {
164  for(int ii = 0; ii < dimNumber; ii++)
165  marginals[ii] = new Marginal(*other.marginals[ii]);
166  }
167 }
168 
169 Iso Iso::FromFASTA(const char* fasta, bool use_nominal_masses, bool add_water)
170 {
171  int atomCounts[6];
172 
173  parse_fasta(fasta, atomCounts);
174 
175  if(add_water)
176  {
177  atomCounts[1] += 2;
178  atomCounts[3] += 1;
179  }
180 
181  const int dimNr = atomCounts[5] > 0 ? 6 : 5;
182 
183  return Iso(dimNr, aa_isotope_numbers, atomCounts, use_nominal_masses ? aa_elem_nominal_masses : aa_elem_masses, aa_elem_probabilities);
184 }
185 
186 inline void Iso::setupMarginals(const double* _isotopeMasses, const double* _isotopeProbabilities)
187 {
188  if (marginals == nullptr)
189  {
190  int ii = 0;
191  marginals = new Marginal*[dimNumber];
192  try
193  {
194  while(ii < dimNumber)
195  {
196  marginals[ii] = new Marginal(
197  &_isotopeMasses[allDim],
198  &_isotopeProbabilities[allDim],
199  isotopeNumbers[ii],
200  atomCounts[ii]
201  );
202  allDim += isotopeNumbers[ii];
203  ii++;
204  }
205  }
206  catch(...)
207  {
208  ii--;
209  while(ii >= 0)
210  {
211  delete marginals[ii];
212  ii--;
213  }
214  delete[] marginals;
215  marginals = nullptr;
216  throw;
217  }
218  }
219 }
220 
222 {
223  if(!disowned)
224  {
225  if (marginals != nullptr)
226  dealloc_table(marginals, dimNumber);
227  delete[] isotopeNumbers;
228  delete[] atomCounts;
229  }
230 }
231 
232 bool Iso::doMarginalsNeedSorting() const
233 {
234  int nontrivial_marginals = 0;
235  for(int ii = 0; ii < dimNumber; ii++)
236  {
237  if(marginals[ii]->get_isotopeNo() > 1)
238  nontrivial_marginals++;
239  if(nontrivial_marginals > 1)
240  return true;
241  }
242  return false;
243 }
244 
246 {
247  double mass = 0.0;
248  for (int ii = 0; ii < dimNumber; ii++)
249  mass += marginals[ii]->getLightestConfMass();
250  return mass;
251 }
252 
254 {
255  double mass = 0.0;
256  for (int ii = 0; ii < dimNumber; ii++)
257  mass += marginals[ii]->getHeaviestConfMass();
258  return mass;
259 }
260 
262 {
263  double mass = 0.0;
264  for (int ii = 0; ii < dimNumber; ii++)
265  mass += marginals[ii]->getMonoisotopicConfMass();
266  return mass;
267 }
268 
270 {
271  double lprob = 0.0;
272  for (int ii = 0; ii < dimNumber; ii++)
273  lprob += marginals[ii]->getSmallestLProb();
274  return lprob;
275 }
276 
277 double Iso::getModeMass() const
278 {
279  double mass = 0.0;
280  for (int ii = 0; ii < dimNumber; ii++)
281  mass += marginals[ii]->getModeMass();
282  return mass;
283 }
284 
285 double Iso::getModeLProb() const
286 {
287  double ret = 0.0;
288  for (int ii = 0; ii < dimNumber; ii++)
289  ret += marginals[ii]->getModeLProb();
290  return ret;
291 }
292 
294 {
295  double mass = 0.0;
296  for (int ii = 0; ii < dimNumber; ii++)
297  mass += marginals[ii]->getTheoreticalAverageMass();
298  return mass;
299 }
300 
301 double Iso::variance() const
302 {
303  double ret = 0.0;
304  for(int ii = 0; ii < dimNumber; ii++)
305  ret += marginals[ii]->variance();
306  return ret;
307 }
308 
309 
310 Iso::Iso(const char* formula, bool use_nominal_masses) :
311 disowned(false),
312 allDim(0),
313 marginals(nullptr)
314 {
315  std::vector<double> isotope_masses;
316  std::vector<double> isotope_probabilities;
317 
318  dimNumber = parse_formula(formula, isotope_masses, isotope_probabilities, &isotopeNumbers, &atomCounts, &confSize, use_nominal_masses);
319 
320  setupMarginals(isotope_masses.data(), isotope_probabilities.data());
321 }
322 
323 
324 void Iso::addElement(int atomCount, int noIsotopes, const double* isotopeMasses, const double* isotopeProbabilities)
325 {
326  Marginal* m = new Marginal(isotopeMasses, isotopeProbabilities, noIsotopes, atomCount);
327  realloc_append<int>(&isotopeNumbers, noIsotopes, dimNumber);
328  realloc_append<int>(&atomCounts, atomCount, dimNumber);
329  realloc_append<Marginal*>(&marginals, m, dimNumber);
330  dimNumber++;
331  confSize += sizeof(int);
332  allDim += noIsotopes;
333 }
334 
335 void Iso::saveMarginalLogSizeEstimates(double* priorities, double target_total_prob) const
336 {
337  /*
338  * We shall now use Gaussian approximations of the marginal multinomial distributions to estimate
339  * how many configurations we shall need to visit from each marginal. This should be approximately
340  * proportional to the volume of the optimal P-ellipsoid of the marginal, which, in turn is defined
341  * by the quantile function of the chi-square distribution plus some modifications.
342  *
343  * We're dropping the constant factor and the (monotonic) exp() transform - these will be used as keys
344  * for sorting, so only the ordering is important.
345  */
346 
347  double K = allDim - dimNumber;
348 
349  double log_R2 = log(InverseChiSquareCDF2(K, target_total_prob));
350 
351  for(int ii = 0; ii < dimNumber; ii++)
352  priorities[ii] = marginals[ii]->getLogSizeEstimate(log_R2);
353 }
354 
355 unsigned int parse_formula(const char* formula, std::vector<double>& isotope_masses, std::vector<double>& isotope_probabilities, int** isotopeNumbers, int** atomCounts, unsigned int* confSize, bool use_nominal_masses)
356 {
357  // This function is NOT guaranteed to be secure against malicious input. It should be used only for debugging.
358  size_t slen = strlen(formula);
359  // Yes, it would be more elegant to use std::string here, but it's the only promiment place where it would be used in IsoSpec, and avoiding it here
360  // means we can run the whole thing through Clang's memory sanitizer without the need for instrumented libc++/libstdc++. That's worth messing with char pointers a
361  // little bit.
362  std::vector<std::pair<const char*, size_t> > elements;
363  std::vector<int> numbers;
364 
365  if(slen == 0)
366  throw std::invalid_argument("Invalid formula: can't be empty");
367 
368  if(!isdigit(formula[slen-1]))
369  throw std::invalid_argument("Invalid formula: every element must be followed by a number - write H2O1 and not H2O for water");
370 
371  for(size_t ii = 0; ii < slen; ii++)
372  if(!isdigit(formula[ii]) && !isalpha(formula[ii]))
373  throw std::invalid_argument("Invalid formula: contains invalid (non-digit, non-alpha) character");
374 
375  size_t position = 0;
376 
377  while(position < slen)
378  {
379  size_t elem_end = position;
380  while(isalpha(formula[elem_end]))
381  elem_end++;
382  size_t digit_end = elem_end;
383  while(isdigit(formula[digit_end]))
384  digit_end++;
385  elements.emplace_back(&formula[position], elem_end-position);
386  numbers.push_back(std::stoi(&formula[elem_end]));
387  position = digit_end;
388  }
389 
390  std::vector<int> element_indexes;
391 
392  for (unsigned int i = 0; i < elements.size(); i++)
393  {
394  int idx = -1;
395  for(int j = 0; j < ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES; j++)
396  {
397  if ((strlen(elem_table_symbol[j]) == elements[i].second) && (strncmp(elements[i].first, elem_table_symbol[j], elements[i].second) == 0))
398  {
399  idx = j;
400  break;
401  }
402  }
403  if(idx < 0)
404  throw std::invalid_argument("Invalid formula");
405  element_indexes.push_back(idx);
406  }
407 
408  std::vector<int> _isotope_numbers;
409  const double* masses = use_nominal_masses ? elem_table_massNo : elem_table_mass;
410 
411  for(std::vector<int>::iterator it = element_indexes.begin(); it != element_indexes.end(); ++it)
412  {
413  int num = 0;
414  int at_idx = *it;
415  int elem_ID = elem_table_ID[at_idx];
416  while(at_idx < ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES && elem_table_ID[at_idx] == elem_ID)
417  {
418  isotope_masses.push_back(masses[at_idx]);
419  isotope_probabilities.push_back(elem_table_probability[at_idx]);
420  at_idx++;
421  num++;
422  }
423  _isotope_numbers.push_back(num);
424  }
425 
426  const unsigned int dimNumber = elements.size();
427 
428  *isotopeNumbers = array_copy<int>(_isotope_numbers.data(), dimNumber);
429  *atomCounts = array_copy<int>(numbers.data(), dimNumber);
430  *confSize = dimNumber * sizeof(int);
431 
432  return dimNumber;
433 }
434 
435 
436 /*
437  * ----------------------------------------------------------------------------------------------------------
438  */
439 
440 
441 
442 IsoGenerator::IsoGenerator(Iso&& iso, bool alloc_partials) :
443  Iso(std::move(iso)),
444  mode_lprob(getModeLProb()),
445  partialLProbs(alloc_partials ? new double[dimNumber+1] : nullptr),
446  partialMasses(alloc_partials ? new double[dimNumber+1] : nullptr),
447  partialProbs(alloc_partials ? new double[dimNumber+1] : nullptr)
448 {
449  for(int ii = 0; ii < dimNumber; ++ii)
450  marginals[ii]->ensureModeConf();
451  if(alloc_partials)
452  {
453  partialLProbs[dimNumber] = 0.0;
454  partialMasses[dimNumber] = 0.0;
455  partialProbs[dimNumber] = 1.0;
456  }
457 }
458 
459 
461 {
462  if(partialLProbs != nullptr)
463  delete[] partialLProbs;
464  if(partialMasses != nullptr)
465  delete[] partialMasses;
466  if(partialProbs != nullptr)
467  delete[] partialProbs;
468 }
469 
470 
471 
472 /*
473  * ----------------------------------------------------------------------------------------------------------
474  */
475 
476 static const double minsqrt = -1.3407796239501852e+154; // == constexpr(-sqrt(std::numeric_limits<double>::max()));
477 
478 IsoThresholdGenerator::IsoThresholdGenerator(Iso&& iso, double _threshold, bool _absolute, int tabSize, int hashSize, bool reorder_marginals)
479 : IsoGenerator(std::move(iso)),
480 Lcutoff(_threshold <= 0.0 ? minsqrt : (_absolute ? log(_threshold) : log(_threshold) + mode_lprob))
481 {
482  counter = new int[dimNumber];
483  maxConfsLPSum = new double[dimNumber-1];
484  marginalResultsUnsorted = new PrecalculatedMarginal*[dimNumber];
485 
486  empty = false;
487 
488  const bool marginalsNeedSorting = doMarginalsNeedSorting();
489 
490  for(int ii = 0; ii < dimNumber; ii++)
491  {
492  counter[ii] = 0;
493  marginalResultsUnsorted[ii] = new PrecalculatedMarginal(std::move(*(marginals[ii])),
494  Lcutoff - mode_lprob + marginals[ii]->fastGetModeLProb(),
495  marginalsNeedSorting,
496  tabSize,
497  hashSize);
498 
499  if(!marginalResultsUnsorted[ii]->inRange(0))
500  empty = true;
501  }
502 
503  if(reorder_marginals && dimNumber > 1)
504  {
505  OrderMarginalsBySizeDecresing<PrecalculatedMarginal> comparator(marginalResultsUnsorted);
506  int* tmpMarginalOrder = new int[dimNumber];
507 
508  for(int ii = 0; ii < dimNumber; ii++)
509  tmpMarginalOrder[ii] = ii;
510 
511  std::sort(tmpMarginalOrder, tmpMarginalOrder + dimNumber, comparator);
512  marginalResults = new PrecalculatedMarginal*[dimNumber];
513 
514  for(int ii = 0; ii < dimNumber; ii++)
515  marginalResults[ii] = marginalResultsUnsorted[tmpMarginalOrder[ii]];
516 
517  marginalOrder = new int[dimNumber];
518  for(int ii = 0; ii < dimNumber; ii++)
519  marginalOrder[tmpMarginalOrder[ii]] = ii;
520 
521  delete[] tmpMarginalOrder;
522  }
523  else
524  {
525  marginalResults = marginalResultsUnsorted;
526  marginalOrder = nullptr;
527  }
528 
529  lProbs_ptr_start = marginalResults[0]->get_lProbs_ptr();
530 
531  if(dimNumber > 1)
532  maxConfsLPSum[0] = marginalResults[0]->fastGetModeLProb();
533 
534  for(int ii = 1; ii < dimNumber-1; ii++)
535  maxConfsLPSum[ii] = maxConfsLPSum[ii-1] + marginalResults[ii]->fastGetModeLProb();
536 
537  lProbs_ptr = lProbs_ptr_start;
538 
539  partialLProbs_second = partialLProbs;
540  partialLProbs_second++;
541 
542  if(!empty)
543  {
544  recalc(dimNumber-1);
545  counter[0]--;
546  lProbs_ptr--;
547  }
548  else
549  {
551  lcfmsv = std::numeric_limits<double>::infinity();
552  }
553 }
554 
556 {
557  for(int ii = 0; ii < dimNumber; ii++)
558  {
559  counter[ii] = marginalResults[ii]->get_no_confs()-1;
560  partialLProbs[ii] = -std::numeric_limits<double>::infinity();
561  }
562  partialLProbs[dimNumber] = -std::numeric_limits<double>::infinity();
563  lProbs_ptr = lProbs_ptr_start + marginalResults[0]->get_no_confs()-1;
564 }
565 
567 {
568  if(empty)
569  return 0;
570 
571  if(dimNumber == 1)
572  return marginalResults[0]->get_no_confs();
573 
574  const double* lProbs_ptr_l = marginalResults[0]->get_lProbs_ptr() + marginalResults[0]->get_no_confs();
575 
576  std::unique_ptr<const double* []> lProbs_restarts(new const double*[dimNumber]);
577 
578  for(int ii = 0; ii < dimNumber; ii++)
579  lProbs_restarts[ii] = lProbs_ptr_l;
580 
581  size_t count = 0;
582 
583  while(*lProbs_ptr_l < lcfmsv)
584  lProbs_ptr_l--;
585 
586  while(true)
587  {
588  count += lProbs_ptr_l - lProbs_ptr_start + 1;
589 
590  int idx = 0;
591  int * cntr_ptr = counter;
592 
593  while(idx < dimNumber - 1)
594  {
595  *cntr_ptr = 0;
596  idx++;
597  cntr_ptr++;
598  (*cntr_ptr)++;
599 
600  partialLProbs[idx] = partialLProbs[idx+1] + marginalResults[idx]->get_lProb(counter[idx]);
601  if(partialLProbs[idx] + maxConfsLPSum[idx-1] >= Lcutoff)
602  {
603  short_recalc(idx-1);
604  lProbs_ptr_l = lProbs_restarts[idx];
605  while(*lProbs_ptr_l < lcfmsv)
606  lProbs_ptr_l--;
607  for(idx--; idx > 0; idx--)
608  lProbs_restarts[idx] = lProbs_ptr_l;
609  break;
610  }
611  }
612  if(idx == dimNumber - 1)
613  {
614  reset();
615  return count;
616  }
617  }
618 }
619 
621 {
622  if(empty)
623  {
625  return;
626  }
627 
628  partialLProbs[dimNumber] = 0.0;
629 
630  memset(counter, 0, sizeof(int)*dimNumber);
631  recalc(dimNumber-1);
632  counter[0]--;
633 
634  lProbs_ptr = lProbs_ptr_start - 1;
635 }
636 
637 IsoThresholdGenerator::~IsoThresholdGenerator()
638 {
639  delete[] counter;
640  delete[] maxConfsLPSum;
641  if (marginalResultsUnsorted != marginalResults)
642  delete[] marginalResultsUnsorted;
643  dealloc_table(marginalResults, dimNumber);
644  if(marginalOrder != nullptr)
645  delete[] marginalOrder;
646 }
647 
648 
649 /*
650  * ------------------------------------------------------------------------------------------------------------------------
651  */
652 
653 
654 IsoLayeredGenerator::IsoLayeredGenerator(Iso&& iso, int tabSize, int hashSize, bool reorder_marginals, double t_prob_hint)
655 : IsoGenerator(std::move(iso))
656 {
657  counter = new int[dimNumber];
658  maxConfsLPSum = new double[dimNumber-1];
659  currentLThreshold = nextafter(mode_lprob, -std::numeric_limits<double>::infinity());
660  lastLThreshold = (std::numeric_limits<double>::min)();
661  marginalResultsUnsorted = new LayeredMarginal*[dimNumber];
662  resetPositions = new const double*[dimNumber];
663  marginalsNeedSorting = doMarginalsNeedSorting();
664 
665  memset(counter, 0, sizeof(int)*dimNumber);
666 
667  for(int ii = 0; ii < dimNumber; ii++)
668  marginalResultsUnsorted[ii] = new LayeredMarginal(std::move(*(marginals[ii])), tabSize, hashSize);
669 
670  if(reorder_marginals && dimNumber > 1)
671  {
672  double* marginal_priorities = new double[dimNumber];
673 
674  saveMarginalLogSizeEstimates(marginal_priorities, t_prob_hint);
675 
676  int* tmpMarginalOrder = new int[dimNumber];
677 
678  for(int ii = 0; ii < dimNumber; ii++)
679  tmpMarginalOrder[ii] = ii;
680 
681  TableOrder<double> TO(marginal_priorities);
682 
683  std::sort(tmpMarginalOrder, tmpMarginalOrder + dimNumber, TO);
684  marginalResults = new LayeredMarginal*[dimNumber];
685 
686  for(int ii = 0; ii < dimNumber; ii++)
687  marginalResults[ii] = marginalResultsUnsorted[tmpMarginalOrder[ii]];
688 
689  marginalOrder = new int[dimNumber];
690  for(int ii = 0; ii < dimNumber; ii++)
691  marginalOrder[tmpMarginalOrder[ii]] = ii;
692 
693  delete[] tmpMarginalOrder;
694  delete[] marginal_priorities;
695  }
696  else
697  {
698  marginalResults = marginalResultsUnsorted;
699  marginalOrder = nullptr;
700  }
701 
702  lProbs_ptr_start = marginalResults[0]->get_lProbs_ptr();
703 
704  if(dimNumber > 1)
705  maxConfsLPSum[0] = marginalResults[0]->fastGetModeLProb();
706 
707  for(int ii = 1; ii < dimNumber-1; ii++)
708  maxConfsLPSum[ii] = maxConfsLPSum[ii-1] + marginalResults[ii]->fastGetModeLProb();
709 
710  lProbs_ptr = lProbs_ptr_start;
711 
712  partialLProbs_second = partialLProbs;
713  partialLProbs_second++;
714 
715  counter[0]--;
716  lProbs_ptr--;
717  lastLThreshold = 10.0;
718  IsoLayeredGenerator::nextLayer(-0.00001);
719 }
720 
721 bool IsoLayeredGenerator::nextLayer(double offset)
722 {
723  size_t first_mrg_size = marginalResults[0]->get_no_confs();
724 
725  if(lastLThreshold < getUnlikeliestPeakLProb())
726  return false;
727 
728  lastLThreshold = currentLThreshold;
729  currentLThreshold += offset;
730 
731  for(int ii = 0; ii < dimNumber; ii++)
732  {
733  marginalResults[ii]->extend(currentLThreshold - mode_lprob + marginalResults[ii]->fastGetModeLProb(), marginalsNeedSorting);
734  counter[ii] = 0;
735  }
736 
737  lProbs_ptr_start = marginalResults[0]->get_lProbs_ptr(); // vector relocation might have happened
738 
739  lProbs_ptr = lProbs_ptr_start + first_mrg_size - 1;
740 
741  for(int ii = 0; ii < dimNumber; ii++)
742  resetPositions[ii] = lProbs_ptr;
743 
744  recalc(dimNumber-1);
745 
746  return true;
747 }
748 
749 bool IsoLayeredGenerator::carry()
750 {
751  // If we reached this point, a carry is needed
752 
753  int idx = 0;
754 
755  int * cntr_ptr = counter;
756 
757  while(idx < dimNumber-1)
758  {
759  *cntr_ptr = 0;
760  idx++;
761  cntr_ptr++;
762  (*cntr_ptr)++;
763  partialLProbs[idx] = partialLProbs[idx+1] + marginalResults[idx]->get_lProb(counter[idx]);
764  if(partialLProbs[idx] + maxConfsLPSum[idx-1] >= currentLThreshold)
765  {
766  partialMasses[idx] = partialMasses[idx+1] + marginalResults[idx]->get_mass(counter[idx]);
767  partialProbs[idx] = partialProbs[idx+1] * marginalResults[idx]->get_prob(counter[idx]);
768  recalc(idx-1);
769  lProbs_ptr = resetPositions[idx];
770 
771  while(*lProbs_ptr <= last_lcfmsv)
772  lProbs_ptr--;
773 
774  for(int ii = 0; ii < idx; ii++)
775  resetPositions[ii] = lProbs_ptr;
776 
777  return true;
778  }
779  }
780 
781  return false;
782 }
783 
784 
786 {
787  for(int ii = 0; ii < dimNumber; ii++)
788  {
789  counter[ii] = marginalResults[ii]->get_no_confs()-1;
790  partialLProbs[ii] = -std::numeric_limits<double>::infinity();
791  }
792  partialLProbs[dimNumber] = -std::numeric_limits<double>::infinity();
793  lProbs_ptr = lProbs_ptr_start + marginalResults[0]->get_no_confs()-1;
794 }
795 
796 IsoLayeredGenerator::~IsoLayeredGenerator()
797 {
798  delete[] counter;
799  delete[] maxConfsLPSum;
800  delete[] resetPositions;
801  if (marginalResultsUnsorted != marginalResults)
802  delete[] marginalResultsUnsorted;
803  dealloc_table(marginalResults, dimNumber);
804  if(marginalOrder != nullptr)
805  delete[] marginalOrder;
806 }
807 
808 
809 /*
810  * ------------------------------------------------------------------------------------------------------------------------
811  */
812 
813 
814 IsoOrderedGenerator::IsoOrderedGenerator(Iso&& iso, int _tabSize, int _hashSize) :
815 IsoGenerator(std::move(iso), false), allocator(dimNumber, _tabSize)
816 {
817  partialLProbs = &currentLProb;
818  partialMasses = &currentMass;
819  partialProbs = &currentProb;
820 
821  marginalResults = new MarginalTrek*[dimNumber];
822 
823  for(int i = 0; i < dimNumber; i++)
824  marginalResults[i] = new MarginalTrek(std::move(*(marginals[i])), _tabSize, _hashSize);
825 
826  logProbs = new const pod_vector<double>*[dimNumber];
827  masses = new const pod_vector<double>*[dimNumber];
828  marginalConfs = new const pod_vector<int*>*[dimNumber];
829 
830  for(int i = 0; i < dimNumber; i++)
831  {
832  masses[i] = &marginalResults[i]->conf_masses();
833  logProbs[i] = &marginalResults[i]->conf_lprobs();
834  marginalConfs[i] = &marginalResults[i]->confs();
835  }
836 
837  topConf = allocator.newConf();
838  memset(
839  reinterpret_cast<char*>(topConf) + sizeof(double),
840  0,
841  sizeof(int)*dimNumber
842  );
843 
844  *(reinterpret_cast<double*>(topConf)) =
845  combinedSum(
846  getConf(topConf),
847  logProbs,
848  dimNumber
849  );
850 
851  pq.push(topConf);
852 }
853 
854 
856 {
857  dealloc_table<MarginalTrek*>(marginalResults, dimNumber);
858  delete[] logProbs;
859  delete[] masses;
860  delete[] marginalConfs;
861  partialLProbs = nullptr;
862  partialMasses = nullptr;
863  partialProbs = nullptr;
864 }
865 
866 
868 {
869  if(pq.size() < 1)
870  return false;
871 
872 
873  topConf = pq.top();
874  pq.pop();
875 
876  int* topConfIsoCounts = getConf(topConf);
877 
878  currentLProb = *(reinterpret_cast<double*>(topConf));
879  currentMass = combinedSum( topConfIsoCounts, masses, dimNumber );
880  currentProb = exp(currentLProb);
881 
882  ccount = -1;
883  for(int j = 0; j < dimNumber; ++j)
884  {
885  if(marginalResults[j]->probeConfigurationIdx(topConfIsoCounts[j] + 1))
886  {
887  if(ccount == -1)
888  {
889  topConfIsoCounts[j]++;
890  *(reinterpret_cast<double*>(topConf)) = combinedSum(topConfIsoCounts, logProbs, dimNumber);
891  pq.push(topConf);
892  topConfIsoCounts[j]--;
893  ccount = j;
894  }
895  else
896  {
897  void* acceptedCandidate = allocator.newConf();
898  int* acceptedCandidateIsoCounts = getConf(acceptedCandidate);
899  memcpy(acceptedCandidateIsoCounts, topConfIsoCounts, confSize);
900 
901  acceptedCandidateIsoCounts[j]++;
902 
903  *(reinterpret_cast<double*>(acceptedCandidate)) = combinedSum(acceptedCandidateIsoCounts, logProbs, dimNumber);
904 
905  pq.push(acceptedCandidate);
906  }
907  }
908  if(topConfIsoCounts[j] > 0)
909  break;
910  }
911  if(ccount >=0)
912  topConfIsoCounts[ccount]++;
913 
914  return true;
915 }
916 
917 
918 /*
919  * ---------------------------------------------------------------------------------------------------
920  */
921 
922 
923 IsoStochasticGenerator::IsoStochasticGenerator(Iso&& iso, size_t no_molecules, double _precision, double _beta_bias) :
924 IsoGenerator(std::move(iso)),
925 ILG(std::move(*this)),
926 to_sample_left(no_molecules),
927 precision(_precision),
928 beta_bias(_beta_bias),
929 confs_prob(0.0),
930 chasing_prob(0.0)
931 {}
932 
933 /*
934  * ---------------------------------------------------------------------------------------------------
935  */
936 
937 
938 
939 
940 
941 } // namespace IsoSpec
942 
IsoSpec::Iso::allDim
int allDim
Definition: isoSpec++.h:67
IsoSpec::IsoGenerator::partialProbs
double * partialProbs
Definition: isoSpec++.h:191
IsoSpec::Iso::confSize
unsigned int confSize
Definition: isoSpec++.h:66
pod_vector< double >
IsoSpec::Iso::getLightestPeakMass
double getLightestPeakMass() const
Get the mass of the lightest peak in the isotopic distribution.
Definition: isoSpec++.cpp:245
IsoSpec::PrecalculatedMarginal
Precalculated Marginal class.
Definition: marginalTrek++.h:244
IsoSpec::Marginal
The marginal distribution class (a subisotopologue).
Definition: marginalTrek++.h:42
IsoSpec::OrderMarginalsBySizeDecresing
Definition: operators.h:141
IsoSpec::LayeredMarginal::get_no_confs
unsigned int get_no_confs() const
Get the number of precomputed subisotopologues, see details in PrecalculatedMarginal::get_no_confs.
Definition: marginalTrek++.h:392
IsoSpec::IsoLayeredGenerator::recalc
ISOSPEC_FORCE_INLINE void recalc(int idx)
Recalculate the current partial log-probabilities, masses, and probabilities.
Definition: isoSpec++.h:520
IsoSpec::Iso::dimNumber
int dimNumber
Definition: isoSpec++.h:63
IsoSpec::Iso::FromFASTA
static Iso FromFASTA(const char *fasta, bool use_nominal_masses=false, bool add_water=true)
Constructor (named) from aminoacid FASTA sequence as C string.
Definition: isoSpec++.cpp:169
IsoSpec::PrecalculatedMarginal::get_lProbs_ptr
const double * get_lProbs_ptr() const
Get the table of the log-probabilities of subisotopologues.
Definition: marginalTrek++.h:308
IsoSpec::Marginal::fastGetModeLProb
double fastGetModeLProb()
Get the log-probability of the mode subisotopologue. Results undefined if ensureModeConf() wasn't cal...
Definition: marginalTrek++.h:124
IsoSpec::Iso::saveMarginalLogSizeEstimates
void saveMarginalLogSizeEstimates(double *priorities, double target_total_prob) const
Save estimates of logarithms of target sizes of marginals using Gaussian approximation into argument ...
Definition: isoSpec++.cpp:335
IsoSpec::IsoThresholdGenerator::count_confs
size_t count_confs()
Definition: isoSpec++.cpp:566
IsoSpec::Iso::getHeaviestPeakMass
double getHeaviestPeakMass() const
Get the mass of the heaviest peak in the isotopic distribution.
Definition: isoSpec++.cpp:253
IsoSpec
Definition: allocator.cpp:20
IsoSpec::Iso::variance
double variance() const
Get the theoretical variance of the distribution.
Definition: isoSpec++.cpp:301
IsoSpec::IsoOrderedGenerator::~IsoOrderedGenerator
virtual ~IsoOrderedGenerator()
Destructor.
Definition: isoSpec++.cpp:855
IsoSpec::PrecalculatedMarginal::get_lProb
const double & get_lProb(int idx) const
Get the log-probability of the idx-th subisotopologue.
Definition: marginalTrek++.h:288
IsoSpec::LayeredMarginal::get_mass
double get_mass(int idx) const
get the mass of the idx-th subisotopologue, see details in PrecalculatedMarginal::get_mass.
Definition: marginalTrek++.h:383
IsoSpec::Iso::getModeLProb
double getModeLProb() const
Get the log-probability of the mode-configuration (if there are many modes, they share this value).
Definition: isoSpec++.cpp:285
IsoSpec::Iso
The Iso class for the calculation of the isotopic distribution.
Definition: isoSpec++.h:49
IsoSpec::Iso::getUnlikeliestPeakLProb
double getUnlikeliestPeakLProb() const
Get the logprobability of the least probable subisotopologue.
Definition: isoSpec++.cpp:269
IsoSpec::Iso::~Iso
virtual ~Iso()
Destructor.
Definition: isoSpec++.cpp:221
IsoSpec::Iso::isotopeNumbers
int * isotopeNumbers
Definition: isoSpec++.h:64
IsoSpec::Iso::getMonoisotopicPeakMass
double getMonoisotopicPeakMass() const
Definition: isoSpec++.cpp:261
IsoSpec::IsoGenerator::partialMasses
double * partialMasses
Definition: isoSpec++.h:190
IsoSpec::IsoThresholdGenerator::reset
void reset()
Definition: isoSpec++.cpp:620
IsoSpec::IsoOrderedGenerator::advanceToNextConfiguration
bool advanceToNextConfiguration() override final
Advance to the next, not yet visited, most probable isotopologue.
Definition: isoSpec++.cpp:867
IsoSpec::PrecalculatedMarginal::get_no_confs
unsigned int get_no_confs() const
Get the number of precomputed subisotopologues.
Definition: marginalTrek++.h:328
IsoSpec::IsoGenerator::IsoGenerator
IsoGenerator(Iso &&iso, bool alloc_partials=true)
Move constructor.
Definition: isoSpec++.cpp:442
IsoSpec::Iso::marginals
Marginal ** marginals
Definition: isoSpec++.h:68
IsoSpec::IsoGenerator::partialLProbs
double * partialLProbs
Definition: isoSpec++.h:189
IsoSpec::LayeredMarginal::get_lProbs_ptr
const double * get_lProbs_ptr() const
get the pointer to lProbs array. Accessing index -1 is legal and returns a guardian of -inf....
Definition: marginalTrek++.h:386
IsoSpec::IsoGenerator::~IsoGenerator
virtual ~IsoGenerator()
Destructor.
Definition: isoSpec++.cpp:460
IsoSpec::Iso::addElement
void addElement(int atomCount, int noIsotopes, const double *isotopeMasses, const double *isotopeProbabilities)
Add an element to the molecule. Note: this method can only be used BEFORE Iso is used to construct an...
Definition: isoSpec++.cpp:324
IsoSpec::LayeredMarginal::get_prob
double get_prob(int idx) const
get the probability of the idx-th subisotopologue, see details in PrecalculatedMarginal::get_eProb.
Definition: marginalTrek++.h:380
IsoSpec::LayeredMarginal::extend
bool extend(double new_threshold, bool do_sort=true)
Extend the set of computed subisotopologues to those above the new threshold.
Definition: marginalTrek++.cpp:529
IsoSpec::IsoThresholdGenerator::terminate_search
void terminate_search()
Block the subsequent search of isotopologues.
Definition: isoSpec++.cpp:555
IsoSpec::Iso::getModeMass
double getModeMass() const
Get the mass of the mode-configuration (if there are many modes, it is undefined which one will be se...
Definition: isoSpec++.cpp:277
IsoSpec::IsoGenerator
The generator of isotopologues.
Definition: isoSpec++.h:183
IsoSpec::MarginalTrek
The marginal distribution class (a subisotopologue).
Definition: marginalTrek++.h:175
IsoSpec::LayeredMarginal::get_lProb
double get_lProb(int idx) const
get the log-probability of the idx-th subisotopologue, see details in PrecalculatedMarginal::get_lPro...
Definition: marginalTrek++.h:377
IsoSpec::Iso::getTheoreticalAverageMass
double getTheoreticalAverageMass() const
Get the theoretical average mass of the molecule.
Definition: isoSpec++.cpp:293
IsoSpec::Iso::atomCounts
int * atomCounts
Definition: isoSpec++.h:65
IsoSpec::IsoLayeredGenerator::terminate_search
void terminate_search()
Block the subsequent search of isotopologues.
Definition: isoSpec++.cpp:785