19 #include <unordered_map>
25 #include "dirtyAllocator.h"
27 #include "operators.h"
28 #include "marginalTrek++.h"
36 unsigned int parse_formula(
const char* formula,
37 std::vector<double>& isotope_masses,
38 std::vector<double>& isotope_probabilities,
41 unsigned int* confSize,
42 bool use_nominal_masses =
false);
49 class ISOSPEC_EXPORT_SYMBOL
Iso {
58 void setupMarginals(
const double* _isotopeMasses,
59 const double* _isotopeProbabilities);
70 bool doMarginalsNeedSorting()
const;
85 const int* _isotopeNumbers,
86 const int* _atomCounts,
87 const double* _isotopeMasses,
88 const double* _isotopeProbabilities
92 const int* _isotopeNumbers,
93 const int* _atomCounts,
94 const double*
const * _isotopeMasses,
95 const double*
const * _isotopeProbabilities
99 Iso(
const char* formula,
bool use_nominal_masses =
false);
102 inline Iso(
const std::string& formula,
bool use_nominal_masses =
false) :
Iso(formula.c_str(), use_nominal_masses) {}
113 static Iso FromFASTA(
const char* fasta,
bool use_nominal_masses =
false,
bool add_water =
true);
116 static inline Iso FromFASTA(
const std::string& fasta,
bool use_nominal_masses =
false,
bool add_water =
true) {
return FromFASTA(fasta.c_str(), use_nominal_masses, add_water); }
122 Iso& operator=(
const Iso& other) =
delete;
129 Iso(
const Iso& other,
bool fullcopy);
135 double getLightestPeakMass()
const;
138 double getHeaviestPeakMass()
const;
145 double getMonoisotopicPeakMass()
const;
148 double getModeLProb()
const;
151 double getUnlikeliestPeakLProb()
const;
154 double getModeMass()
const;
157 double getTheoreticalAverageMass()
const;
160 double variance()
const;
163 double stddev()
const {
return sqrt(variance()); }
172 void addElement(
int atomCount,
int noIsotopes,
const double* isotopeMasses,
const double* isotopeProbabilities);
175 void saveMarginalLogSizeEstimates(
double* priorities,
double target_total_prob)
const;
186 const double mode_lprob;
198 virtual bool advanceToNextConfiguration() = 0;
204 virtual double lprob()
const {
return partialLProbs[0]; }
210 virtual double mass()
const {
return partialMasses[0]; }
216 virtual double prob()
const {
return partialProbs[0]; }
219 virtual void get_conf_signature(
int* space)
const = 0;
240 std::priority_queue<void*, pod_vector<void*>,
ConfOrder> pq;
255 bool advanceToNextConfiguration()
override final;
264 int* c = getConf(topConf);
269 for(
int ii = 0; ii < dimNumber; ii++)
271 memcpy(space, marginalResults[ii]->confs()[c[ii]], isotopeNumbers[ii]*
sizeof(
int));
272 space += isotopeNumbers[ii];
299 double* maxConfsLPSum;
300 const double Lcutoff;
305 const double* lProbs_ptr;
306 const double* lProbs_ptr_start;
307 double* partialLProbs_second;
308 double partialLProbs_second_val, lcfmsv;
317 counter[0] = lProbs_ptr - lProbs_ptr_start;
318 if(marginalOrder !=
nullptr)
320 for(
int ii = 0; ii < dimNumber; ii++)
322 int jj = marginalOrder[ii];
323 memcpy(space, marginalResultsUnsorted[ii]->get_conf(counter[jj]), isotopeNumbers[ii]*
sizeof(
int));
324 space += isotopeNumbers[ii];
329 for(
int ii = 0; ii < dimNumber; ii++)
331 memcpy(space, marginalResultsUnsorted[ii]->get_conf(counter[ii]), isotopeNumbers[ii]*
sizeof(
int));
332 space += isotopeNumbers[ii];
346 IsoThresholdGenerator(
Iso&& iso,
double _threshold,
bool _absolute =
true,
int _tabSize = 1000,
int _hashSize = 1000,
bool reorder_marginals =
true);
356 if(ISOSPEC_LIKELY(*lProbs_ptr >= lcfmsv))
364 lProbs_ptr = lProbs_ptr_start;
366 int * cntr_ptr = counter;
368 while(idx < dimNumber-1)
376 partialLProbs[idx] = partialLProbs[idx+1] + marginalResults[idx]->
get_lProb(counter[idx]);
377 if(partialLProbs[idx] + maxConfsLPSum[idx-1] >= Lcutoff)
379 partialMasses[idx] = partialMasses[idx+1] + marginalResults[idx]->
get_mass(counter[idx]);
380 partialProbs[idx] = partialProbs[idx+1] * marginalResults[idx]->
get_prob(counter[idx]);
391 ISOSPEC_FORCE_INLINE
double lprob() const override final {
return partialLProbs_second_val + (*(lProbs_ptr)); }
392 ISOSPEC_FORCE_INLINE
double mass() const override final {
return partialMasses[1] + marginalResults[0]->
get_mass(lProbs_ptr - lProbs_ptr_start); }
393 ISOSPEC_FORCE_INLINE
double prob() const override final {
return partialProbs[1] * marginalResults[0]->
get_prob(lProbs_ptr - lProbs_ptr_start); }
396 void terminate_search();
408 size_t count_confs();
412 ISOSPEC_FORCE_INLINE
void recalc(
int idx)
414 for(; idx > 0; idx--)
416 partialLProbs[idx] = partialLProbs[idx+1] + marginalResults[idx]->
get_lProb(counter[idx]);
417 partialMasses[idx] = partialMasses[idx+1] + marginalResults[idx]->
get_mass(counter[idx]);
418 partialProbs[idx] = partialProbs[idx+1] * marginalResults[idx]->
get_prob(counter[idx]);
420 partialLProbs_second_val = *partialLProbs_second;
421 partialLProbs[0] = *partialLProbs_second + marginalResults[0]->
get_lProb(counter[0]);
422 lcfmsv = Lcutoff - partialLProbs_second_val;
425 ISOSPEC_FORCE_INLINE
void short_recalc(
int idx)
427 for(; idx > 0; idx--)
428 partialLProbs[idx] = partialLProbs[idx+1] + marginalResults[idx]->get_lProb(counter[idx]);
429 partialLProbs_second_val = *partialLProbs_second;
430 partialLProbs[0] = *partialLProbs_second + marginalResults[0]->
get_lProb(counter[0]);
431 lcfmsv = Lcutoff - partialLProbs_second_val;
443 double* maxConfsLPSum;
444 double currentLThreshold, lastLThreshold;
449 const double* lProbs_ptr;
450 const double* lProbs_ptr_start;
451 const double** resetPositions;
452 double* partialLProbs_second;
453 double partialLProbs_second_val, lcfmsv, last_lcfmsv;
454 bool marginalsNeedSorting;
463 counter[0] = lProbs_ptr - lProbs_ptr_start;
464 if(marginalOrder !=
nullptr)
466 for(
int ii = 0; ii < dimNumber; ii++)
468 int jj = marginalOrder[ii];
469 memcpy(space, marginalResultsUnsorted[ii]->get_conf(counter[jj]), isotopeNumbers[ii]*
sizeof(
int));
470 space += isotopeNumbers[ii];
475 for(
int ii = 0; ii < dimNumber; ii++)
477 memcpy(space, marginalResultsUnsorted[ii]->get_conf(counter[ii]), isotopeNumbers[ii]*
sizeof(
int));
478 space += isotopeNumbers[ii];
483 inline double get_currentLThreshold()
const {
return currentLThreshold; }
485 IsoLayeredGenerator(Iso&& iso,
int _tabSize = 1000,
int _hashSize = 1000,
bool reorder_marginals =
true,
double t_prob_hint = 0.99);
487 ~IsoLayeredGenerator();
493 if(advanceToNextConfigurationWithinLayer())
495 }
while(IsoLayeredGenerator::nextLayer(-2.0));
499 ISOSPEC_FORCE_INLINE
bool advanceToNextConfigurationWithinLayer()
504 if(ISOSPEC_LIKELY(*lProbs_ptr >= lcfmsv))
511 ISOSPEC_FORCE_INLINE
double lprob() const override final {
return partialLProbs_second_val + (*(lProbs_ptr)); };
512 ISOSPEC_FORCE_INLINE
double mass() const override final {
return partialMasses[1] + marginalResults[0]->
get_mass(lProbs_ptr - lProbs_ptr_start); };
513 ISOSPEC_FORCE_INLINE
double prob() const override final {
return partialProbs[1] * marginalResults[0]->
get_prob(lProbs_ptr - lProbs_ptr_start); };
516 void terminate_search();
520 ISOSPEC_FORCE_INLINE
void recalc(
int idx)
522 for(; idx > 0; idx--)
524 partialLProbs[idx] = partialLProbs[idx+1] + marginalResults[idx]->
get_lProb(counter[idx]);
525 partialMasses[idx] = partialMasses[idx+1] + marginalResults[idx]->
get_mass(counter[idx]);
526 partialProbs[idx] = partialProbs[idx+1] * marginalResults[idx]->
get_prob(counter[idx]);
528 partialLProbs_second_val = *partialLProbs_second;
529 partialLProbs[0] = partialLProbs_second_val + marginalResults[0]->
get_lProb(counter[0]);
530 lcfmsv = currentLThreshold - partialLProbs_second_val;
531 last_lcfmsv = lastLThreshold - partialLProbs_second_val;
534 bool nextLayer(
double offset);
545 size_t to_sample_left;
546 const double precision;
547 const double beta_bias;
550 size_t current_count;
555 ISOSPEC_FORCE_INLINE
size_t count()
const {
return current_count; }
557 ISOSPEC_FORCE_INLINE
double mass() const override final {
return ILG.
mass(); }
559 ISOSPEC_FORCE_INLINE
double prob() const override final {
return static_cast<double>(count()); }
561 ISOSPEC_FORCE_INLINE
double lprob() const override final {
return log(
prob()); }
572 double curr_conf_prob_left, current_prob;
574 if(to_sample_left <= 0)
577 if(confs_prob < chasing_prob)
583 current_prob = ILG.
prob();
584 confs_prob += current_prob;
585 while(confs_prob <= chasing_prob)
588 current_prob = ILG.
prob();
589 confs_prob += current_prob;
591 if(to_sample_left <= 0)
593 curr_conf_prob_left = confs_prob - chasing_prob;
600 current_prob = ILG.
prob();
601 confs_prob += current_prob;
602 curr_conf_prob_left = current_prob;
605 double prob_left_to_1 = precision - chasing_prob;
606 double expected_confs = curr_conf_prob_left * to_sample_left / prob_left_to_1;
608 if(expected_confs <= beta_bias)
611 chasing_prob += rdvariate_beta_1_b(to_sample_left) * prob_left_to_1;
612 while(chasing_prob <= confs_prob)
616 if(to_sample_left == 0)
618 prob_left_to_1 = precision - chasing_prob;
619 chasing_prob += rdvariate_beta_1_b(to_sample_left) * prob_left_to_1;
621 if(current_count > 0)
627 size_t rbin = rdvariate_binom(to_sample_left, curr_conf_prob_left/prob_left_to_1);
628 current_count += rbin;
629 to_sample_left -= rbin;
630 chasing_prob = confs_prob;
631 if(current_count > 0)