18 #include "isoSpec++.h"
24 #include <unordered_map>
37 #include "dirtyAllocator.h"
38 #include "operators.h"
40 #include "marginalTrek++.h"
42 #include "element_tables.h"
53 isotopeNumbers(new int[0]),
54 atomCounts(new int[0]),
57 marginals(new Marginal*[0])
63 const int* _isotopeNumbers,
64 const int* _atomCounts,
65 const double*
const * _isotopeMasses,
66 const double*
const * _isotopeProbabilities
69 dimNumber(_dimNumber),
70 isotopeNumbers(array_copy<int>(_isotopeNumbers, _dimNumber)),
71 atomCounts(array_copy<int>(_atomCounts, _dimNumber)),
72 confSize(_dimNumber * sizeof(int)),
76 for(
int ii = 0; ii < dimNumber; ++ii)
77 allDim += isotopeNumbers[ii];
79 std::unique_ptr<double[]> masses(
new double[allDim]);
80 std::unique_ptr<double[]> probs(
new double[allDim]);
83 for(
int ii = 0; ii < dimNumber; ++ii)
84 for(
int jj = 0; jj < isotopeNumbers[ii]; ++jj)
86 masses[idx] = _isotopeMasses[ii][jj];
87 probs[idx] = _isotopeProbabilities[ii][jj];
94 setupMarginals(masses.get(), probs.get());
98 delete[] isotopeNumbers;
103 isotopeNumbers =
nullptr;
104 atomCounts =
nullptr;
111 const int* _isotopeNumbers,
112 const int* _atomCounts,
113 const double* _isotopeMasses,
114 const double* _isotopeProbabilities
117 dimNumber(_dimNumber),
118 isotopeNumbers(array_copy<int>(_isotopeNumbers, _dimNumber)),
119 atomCounts(array_copy<int>(_atomCounts, _dimNumber)),
120 confSize(_dimNumber * sizeof(int)),
125 setupMarginals(_isotopeMasses, _isotopeProbabilities);
141 disowned(other.disowned),
142 dimNumber(other.dimNumber),
143 isotopeNumbers(other.isotopeNumbers),
144 atomCounts(other.atomCounts),
145 confSize(other.confSize),
146 allDim(other.allDim),
147 marginals(other.marginals)
149 other.disowned =
true;
153 Iso::Iso(
const Iso& other,
bool fullcopy) :
155 dimNumber(other.dimNumber),
156 isotopeNumbers(fullcopy ? array_copy<int>(other.isotopeNumbers, dimNumber) : other.isotopeNumbers),
157 atomCounts(fullcopy ? array_copy<int>(other.atomCounts, dimNumber) : other.atomCounts),
158 confSize(other.confSize),
159 allDim(other.allDim),
160 marginals(fullcopy ? new
Marginal*[dimNumber] : other.marginals)
183 return Iso(dimNr, aa_isotope_numbers,
atomCounts, use_nominal_masses ? aa_elem_nominal_masses : aa_elem_masses, aa_elem_probabilities);
186 inline void Iso::setupMarginals(
const double* _isotopeMasses,
const double* _isotopeProbabilities)
198 &_isotopeProbabilities[
allDim],
232 bool Iso::doMarginalsNeedSorting()
const
234 int nontrivial_marginals = 0;
238 nontrivial_marginals++;
239 if(nontrivial_marginals > 1)
249 mass +=
marginals[ii]->getLightestConfMass();
257 mass +=
marginals[ii]->getHeaviestConfMass();
265 mass +=
marginals[ii]->getMonoisotopicConfMass();
273 lprob +=
marginals[ii]->getSmallestLProb();
310 Iso::Iso(
const char* formula,
bool use_nominal_masses) :
315 std::vector<double> isotope_masses;
316 std::vector<double> isotope_probabilities;
320 setupMarginals(isotope_masses.data(), isotope_probabilities.data());
324 void Iso::addElement(
int atomCount,
int noIsotopes,
const double* isotopeMasses,
const double* isotopeProbabilities)
326 Marginal* m =
new Marginal(isotopeMasses, isotopeProbabilities, noIsotopes, atomCount);
349 double log_R2 = log(InverseChiSquareCDF2(K, target_total_prob));
352 priorities[ii] =
marginals[ii]->getLogSizeEstimate(log_R2);
355 unsigned int parse_formula(
const char* formula, std::vector<double>& isotope_masses, std::vector<double>& isotope_probabilities,
int** isotopeNumbers,
int** atomCounts,
unsigned int* confSize,
bool use_nominal_masses)
358 size_t slen = strlen(formula);
362 std::vector<std::pair<const char*, size_t> > elements;
363 std::vector<int> numbers;
366 throw std::invalid_argument(
"Invalid formula: can't be empty");
368 if(!isdigit(formula[slen-1]))
369 throw std::invalid_argument(
"Invalid formula: every element must be followed by a number - write H2O1 and not H2O for water");
371 for(
size_t ii = 0; ii < slen; ii++)
372 if(!isdigit(formula[ii]) && !isalpha(formula[ii]))
373 throw std::invalid_argument(
"Invalid formula: contains invalid (non-digit, non-alpha) character");
377 while(position < slen)
379 size_t elem_end = position;
380 while(isalpha(formula[elem_end]))
382 size_t digit_end = elem_end;
383 while(isdigit(formula[digit_end]))
385 elements.emplace_back(&formula[position], elem_end-position);
386 numbers.push_back(std::stoi(&formula[elem_end]));
387 position = digit_end;
390 std::vector<int> element_indexes;
392 for (
unsigned int i = 0; i < elements.size(); i++)
395 for(
int j = 0; j < ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES; j++)
397 if ((strlen(elem_table_symbol[j]) == elements[i].second) && (strncmp(elements[i].first, elem_table_symbol[j], elements[i].second) == 0))
404 throw std::invalid_argument(
"Invalid formula");
405 element_indexes.push_back(idx);
408 std::vector<int> _isotope_numbers;
409 const double* masses = use_nominal_masses ? elem_table_massNo : elem_table_mass;
411 for(std::vector<int>::iterator it = element_indexes.begin(); it != element_indexes.end(); ++it)
415 int elem_ID = elem_table_ID[at_idx];
416 while(at_idx < ISOSPEC_NUMBER_OF_ISOTOPIC_ENTRIES && elem_table_ID[at_idx] == elem_ID)
418 isotope_masses.push_back(masses[at_idx]);
419 isotope_probabilities.push_back(elem_table_probability[at_idx]);
423 _isotope_numbers.push_back(num);
426 const unsigned int dimNumber = elements.size();
428 *isotopeNumbers = array_copy<int>(_isotope_numbers.data(), dimNumber);
429 *atomCounts = array_copy<int>(numbers.data(), dimNumber);
430 *confSize = dimNumber *
sizeof(int);
444 mode_lprob(getModeLProb()),
445 partialLProbs(alloc_partials ? new double[dimNumber+1] : nullptr),
446 partialMasses(alloc_partials ? new double[dimNumber+1] : nullptr),
447 partialProbs(alloc_partials ? new double[dimNumber+1] : nullptr)
476 static const double minsqrt = -1.3407796239501852e+154;
478 IsoThresholdGenerator::IsoThresholdGenerator(
Iso&& iso,
double _threshold,
bool _absolute,
int tabSize,
int hashSize,
bool reorder_marginals)
480 Lcutoff(_threshold <= 0.0 ? minsqrt : (_absolute ? log(_threshold) : log(_threshold) + mode_lprob))
488 const bool marginalsNeedSorting = doMarginalsNeedSorting();
494 Lcutoff - mode_lprob +
marginals[ii]->fastGetModeLProb(),
495 marginalsNeedSorting,
499 if(!marginalResultsUnsorted[ii]->inRange(0))
506 int* tmpMarginalOrder =
new int[
dimNumber];
509 tmpMarginalOrder[ii] = ii;
511 std::sort(tmpMarginalOrder, tmpMarginalOrder +
dimNumber, comparator);
515 marginalResults[ii] = marginalResultsUnsorted[tmpMarginalOrder[ii]];
519 marginalOrder[tmpMarginalOrder[ii]] = ii;
521 delete[] tmpMarginalOrder;
525 marginalResults = marginalResultsUnsorted;
526 marginalOrder =
nullptr;
535 maxConfsLPSum[ii] = maxConfsLPSum[ii-1] + marginalResults[ii]->fastGetModeLProb();
537 lProbs_ptr = lProbs_ptr_start;
540 partialLProbs_second++;
551 lcfmsv = std::numeric_limits<double>::infinity();
560 partialLProbs[ii] = -std::numeric_limits<double>::infinity();
563 lProbs_ptr = lProbs_ptr_start + marginalResults[0]->
get_no_confs()-1;
576 std::unique_ptr<const double* []> lProbs_restarts(
new const double*[
dimNumber]);
579 lProbs_restarts[ii] = lProbs_ptr_l;
583 while(*lProbs_ptr_l < lcfmsv)
588 count += lProbs_ptr_l - lProbs_ptr_start + 1;
591 int * cntr_ptr = counter;
604 lProbs_ptr_l = lProbs_restarts[idx];
605 while(*lProbs_ptr_l < lcfmsv)
607 for(idx--; idx > 0; idx--)
608 lProbs_restarts[idx] = lProbs_ptr_l;
630 memset(counter, 0,
sizeof(
int)*
dimNumber);
634 lProbs_ptr = lProbs_ptr_start - 1;
637 IsoThresholdGenerator::~IsoThresholdGenerator()
640 delete[] maxConfsLPSum;
641 if (marginalResultsUnsorted != marginalResults)
642 delete[] marginalResultsUnsorted;
643 dealloc_table(marginalResults,
dimNumber);
644 if(marginalOrder !=
nullptr)
645 delete[] marginalOrder;
654 IsoLayeredGenerator::IsoLayeredGenerator(Iso&& iso,
int tabSize,
int hashSize,
bool reorder_marginals,
double t_prob_hint)
655 : IsoGenerator(std::move(iso))
659 currentLThreshold = nextafter(mode_lprob, -std::numeric_limits<double>::infinity());
660 lastLThreshold = (std::numeric_limits<double>::min)();
661 marginalResultsUnsorted =
new LayeredMarginal*[
dimNumber];
662 resetPositions =
new const double*[
dimNumber];
663 marginalsNeedSorting = doMarginalsNeedSorting();
665 memset(counter, 0,
sizeof(
int)*
dimNumber);
668 marginalResultsUnsorted[ii] =
new LayeredMarginal(std::move(*(
marginals[ii])), tabSize, hashSize);
672 double* marginal_priorities =
new double[
dimNumber];
676 int* tmpMarginalOrder =
new int[
dimNumber];
679 tmpMarginalOrder[ii] = ii;
681 TableOrder<double> TO(marginal_priorities);
683 std::sort(tmpMarginalOrder, tmpMarginalOrder +
dimNumber, TO);
684 marginalResults =
new LayeredMarginal*[
dimNumber];
687 marginalResults[ii] = marginalResultsUnsorted[tmpMarginalOrder[ii]];
691 marginalOrder[tmpMarginalOrder[ii]] = ii;
693 delete[] tmpMarginalOrder;
694 delete[] marginal_priorities;
698 marginalResults = marginalResultsUnsorted;
699 marginalOrder =
nullptr;
708 maxConfsLPSum[ii] = maxConfsLPSum[ii-1] + marginalResults[ii]->fastGetModeLProb();
710 lProbs_ptr = lProbs_ptr_start;
713 partialLProbs_second++;
717 lastLThreshold = 10.0;
718 IsoLayeredGenerator::nextLayer(-0.00001);
721 bool IsoLayeredGenerator::nextLayer(
double offset)
723 size_t first_mrg_size = marginalResults[0]->
get_no_confs();
728 lastLThreshold = currentLThreshold;
729 currentLThreshold += offset;
733 marginalResults[ii]->
extend(currentLThreshold - mode_lprob + marginalResults[ii]->fastGetModeLProb(), marginalsNeedSorting);
739 lProbs_ptr = lProbs_ptr_start + first_mrg_size - 1;
742 resetPositions[ii] = lProbs_ptr;
749 bool IsoLayeredGenerator::carry()
755 int * cntr_ptr = counter;
764 if(
partialLProbs[idx] + maxConfsLPSum[idx-1] >= currentLThreshold)
769 lProbs_ptr = resetPositions[idx];
771 while(*lProbs_ptr <= last_lcfmsv)
774 for(
int ii = 0; ii < idx; ii++)
775 resetPositions[ii] = lProbs_ptr;
790 partialLProbs[ii] = -std::numeric_limits<double>::infinity();
793 lProbs_ptr = lProbs_ptr_start + marginalResults[0]->
get_no_confs()-1;
796 IsoLayeredGenerator::~IsoLayeredGenerator()
799 delete[] maxConfsLPSum;
800 delete[] resetPositions;
801 if (marginalResultsUnsorted != marginalResults)
802 delete[] marginalResultsUnsorted;
803 dealloc_table(marginalResults,
dimNumber);
804 if(marginalOrder !=
nullptr)
805 delete[] marginalOrder;
814 IsoOrderedGenerator::IsoOrderedGenerator(
Iso&& iso,
int _tabSize,
int _hashSize) :
815 IsoGenerator(std::move(iso), false), allocator(dimNumber, _tabSize)
832 masses[i] = &marginalResults[i]->conf_masses();
833 logProbs[i] = &marginalResults[i]->conf_lprobs();
834 marginalConfs[i] = &marginalResults[i]->confs();
837 topConf = allocator.newConf();
839 reinterpret_cast<char*
>(topConf) +
sizeof(
double),
844 *(
reinterpret_cast<double*
>(topConf)) =
857 dealloc_table<MarginalTrek*>(marginalResults,
dimNumber);
860 delete[] marginalConfs;
876 int* topConfIsoCounts = getConf(topConf);
878 currentLProb = *(
reinterpret_cast<double*
>(topConf));
879 currentMass = combinedSum( topConfIsoCounts, masses,
dimNumber );
880 currentProb = exp(currentLProb);
885 if(marginalResults[j]->probeConfigurationIdx(topConfIsoCounts[j] + 1))
889 topConfIsoCounts[j]++;
890 *(
reinterpret_cast<double*
>(topConf)) = combinedSum(topConfIsoCounts, logProbs,
dimNumber);
892 topConfIsoCounts[j]--;
897 void* acceptedCandidate = allocator.newConf();
898 int* acceptedCandidateIsoCounts = getConf(acceptedCandidate);
899 memcpy(acceptedCandidateIsoCounts, topConfIsoCounts,
confSize);
901 acceptedCandidateIsoCounts[j]++;
903 *(
reinterpret_cast<double*
>(acceptedCandidate)) = combinedSum(acceptedCandidateIsoCounts, logProbs,
dimNumber);
905 pq.push(acceptedCandidate);
908 if(topConfIsoCounts[j] > 0)
912 topConfIsoCounts[ccount]++;
923 IsoStochasticGenerator::IsoStochasticGenerator(
Iso&& iso,
size_t no_molecules,
double _precision,
double _beta_bias) :
925 ILG(std::move(*this)),
926 to_sample_left(no_molecules),
927 precision(_precision),
928 beta_bias(_beta_bias),