17 #include "fixedEnvelopes.h"
24 FixedEnvelope::FixedEnvelope(
const FixedEnvelope& other) :
25 _masses(array_copy<double>(other._masses, other._confs_no)),
26 _probs(array_copy<double>(other._probs, other._confs_no)),
27 _confs(array_copy<int>(other._confs, other._confs_no*other.allDim)),
28 _confs_no(other._confs_no),
30 sorted_by_mass(other.sorted_by_mass),
31 sorted_by_prob(other.sorted_by_prob),
32 total_prob(other.total_prob)
35 FixedEnvelope::FixedEnvelope(FixedEnvelope&& other) :
36 _masses(other._masses),
39 _confs_no(other._confs_no),
41 sorted_by_mass(other.sorted_by_mass),
42 sorted_by_prob(other.sorted_by_prob),
43 total_prob(other.total_prob)
45 other._masses =
nullptr;
46 other._probs =
nullptr;
47 other._confs =
nullptr;
49 other.total_prob = 0.0;
52 FixedEnvelope::FixedEnvelope(
double* in_masses,
double* in_probs,
size_t in_confs_no,
bool masses_sorted,
bool probs_sorted,
double _total_prob) :
56 _confs_no(in_confs_no),
58 sorted_by_mass(masses_sorted),
59 sorted_by_prob(probs_sorted),
60 total_prob(_total_prob)
63 FixedEnvelope FixedEnvelope::operator+(
const FixedEnvelope& other)
const
65 double* nprobs =
reinterpret_cast<double*
>(malloc(
sizeof(
double) * (_confs_no+other._confs_no)));
67 throw std::bad_alloc();
68 double* nmasses =
reinterpret_cast<double*
>(malloc(
sizeof(
double) * (_confs_no+other._confs_no)));
69 if(nmasses ==
nullptr)
72 throw std::bad_alloc();
75 memcpy(nprobs, _probs,
sizeof(
double) * _confs_no);
76 memcpy(nmasses, _masses,
sizeof(
double) * _confs_no);
78 memcpy(nprobs+_confs_no, other._probs,
sizeof(
double) * other._confs_no);
79 memcpy(nmasses+_confs_no, other._masses,
sizeof(
double) * other._confs_no);
81 return FixedEnvelope(nmasses, nprobs, _confs_no + other._confs_no);
84 FixedEnvelope FixedEnvelope::operator*(
const FixedEnvelope& other)
const
86 double* nprobs =
reinterpret_cast<double*
>(malloc(
sizeof(
double) * _confs_no * other._confs_no));
88 throw std::bad_alloc();
91 double* nmasses =
reinterpret_cast<double*
>(malloc(
sizeof(
double) * _confs_no * other._confs_no));
92 if(nmasses ==
nullptr)
95 throw std::bad_alloc();
100 for(
size_t ii = 0; ii < _confs_no; ii++)
101 for(
size_t jj = 0; jj < other._confs_no; jj++)
103 nprobs[tgt_idx] = _probs[ii] * other._probs[jj];
104 nmasses[tgt_idx] = _masses[ii] + other._masses[jj];
108 return FixedEnvelope(nmasses, nprobs, tgt_idx);
111 void FixedEnvelope::sort_by_mass()
118 sorted_by_mass =
true;
119 sorted_by_prob =
false;
123 void FixedEnvelope::sort_by_prob()
130 sorted_by_prob =
true;
131 sorted_by_mass =
false;
134 template<
typename T>
void reorder_array(T* arr,
size_t* order,
size_t size,
bool can_destroy =
false)
138 size_t* order_c =
new size_t[size];
139 memcpy(order_c, order,
sizeof(
size_t)*size);
143 for(
size_t ii = 0; ii < size; ii++)
144 while(order[ii] != ii)
146 std::swap(arr[ii], arr[order[ii]]);
147 std::swap(order[order[ii]], order[ii]);
154 void FixedEnvelope::sort_by(
double* order)
156 size_t* indices =
new size_t[_confs_no];
158 for(
size_t ii = 0; ii < _confs_no; ii++)
161 std::sort<size_t*>(indices, indices + _confs_no, TableOrder<double>(order));
163 size_t* inverse =
new size_t[_confs_no];
165 for(
size_t ii = 0; ii < _confs_no; ii++)
166 inverse[indices[ii]] = ii;
170 reorder_array(_masses, inverse, _confs_no);
171 reorder_array(_probs, inverse, _confs_no);
172 if(_confs !=
nullptr)
174 int* swapspace =
new int[allDim];
175 for(
size_t ii = 0; ii < _confs_no; ii++)
176 while(order[ii] != ii)
178 memcpy(swapspace, &_confs[ii*allDim], allDimSizeofInt);
179 memcpy(&_confs[ii*allDim], &_confs[inverse[ii]*allDim], allDimSizeofInt);
180 memcpy(&_confs[inverse[ii]*allDim], swapspace, allDimSizeofInt);
188 double FixedEnvelope::get_total_prob()
190 if(std::isnan(total_prob))
193 for(
size_t ii = 0; ii < _confs_no; ii++)
194 total_prob += _probs[ii];
199 void FixedEnvelope::scale(
double factor)
201 for(
size_t ii = 0; ii < _confs_no; ii++)
202 _probs[ii] *= factor;
203 total_prob *= factor;
206 void FixedEnvelope::normalize()
208 double tp = get_total_prob();
216 FixedEnvelope FixedEnvelope::LinearCombination(
const std::vector<const FixedEnvelope*>& spectra,
const std::vector<double>& intensities)
218 return LinearCombination(spectra.data(), intensities.data(), spectra.size());
221 FixedEnvelope FixedEnvelope::LinearCombination(
const FixedEnvelope*
const * spectra,
const double* intensities,
size_t size)
224 for(
size_t ii = 0; ii < size; ii++)
225 ret_size += spectra[ii]->_confs_no;
227 double* newprobs =
reinterpret_cast<double*
>(malloc(
sizeof(
double)*ret_size));
228 if(newprobs ==
nullptr)
229 throw std::bad_alloc();
230 double* newmasses =
reinterpret_cast<double*
>(malloc(
sizeof(
double)*ret_size));
231 if(newmasses ==
nullptr)
234 throw std::bad_alloc();
238 for(
size_t ii = 0; ii < size; ii++)
240 double mul = intensities[ii];
241 for(
size_t jj = 0; jj < spectra[ii]->_confs_no; jj++)
242 newprobs[jj+cntr] = spectra[ii]->_probs[jj] * mul;
243 memcpy(newmasses + cntr, spectra[ii]->_masses,
sizeof(
double) * spectra[ii]->_confs_no);
244 cntr += spectra[ii]->_confs_no;
246 return FixedEnvelope(newmasses, newprobs, cntr);
249 double FixedEnvelope::WassersteinDistance(FixedEnvelope& other)
252 if((get_total_prob()*0.999 > other.get_total_prob()) || (other.get_total_prob() > get_total_prob()*1.001))
253 throw std::logic_error(
"Spectra must be normalized before computing Wasserstein Distance");
255 if(_confs_no == 0 || other._confs_no == 0)
259 other.sort_by_mass();
262 size_t idx_other = 0;
264 double acc_prob = 0.0;
265 double last_point = 0.0;
268 while(idx_this < _confs_no && idx_other < other._confs_no)
270 if(_masses[idx_this] < other._masses[idx_other])
272 ret += (_masses[idx_this] - last_point) * std::abs(acc_prob);
273 acc_prob += _probs[idx_this];
274 last_point = _masses[idx_this];
279 ret += (other._masses[idx_other] - last_point) * std::abs(acc_prob);
280 acc_prob -= other._probs[idx_other];
281 last_point = other._masses[idx_other];
286 acc_prob = std::abs(acc_prob);
288 while(idx_this < _confs_no)
290 ret += (_masses[idx_this] - last_point) * acc_prob;
291 acc_prob -= _probs[idx_this];
292 last_point = _masses[idx_this];
296 while(idx_other < other._confs_no)
298 ret += (other._masses[idx_other] - last_point) * acc_prob;
299 acc_prob -= other._probs[idx_other];
300 last_point = other._masses[idx_other];
308 double FixedEnvelope::OrientedWassersteinDistance(FixedEnvelope& other)
311 if((get_total_prob()*0.999 > other.get_total_prob()) || (other.get_total_prob() > get_total_prob()*1.001))
312 throw std::logic_error(
"Spectra must be normalized before computing Wasserstein Distance");
314 if(_confs_no == 0 || other._confs_no == 0)
318 other.sort_by_mass();
321 size_t idx_other = 0;
323 double acc_prob = 0.0;
324 double last_point = 0.0;
327 while(idx_this < _confs_no && idx_other < other._confs_no)
329 if(_masses[idx_this] < other._masses[idx_other])
331 ret += (_masses[idx_this] - last_point) * acc_prob;
332 acc_prob += _probs[idx_this];
333 last_point = _masses[idx_this];
338 ret += (other._masses[idx_other] - last_point) * acc_prob;
339 acc_prob -= other._probs[idx_other];
340 last_point = other._masses[idx_other];
345 while(idx_this < _confs_no)
347 ret += (_masses[idx_this] - last_point) * acc_prob;
348 acc_prob -= _probs[idx_this];
349 last_point = _masses[idx_this];
353 while(idx_other < other._confs_no)
355 ret += (other._masses[idx_other] - last_point) * acc_prob;
356 acc_prob -= other._probs[idx_other];
357 last_point = other._masses[idx_other];
364 FixedEnvelope FixedEnvelope::bin(
double bin_width,
double middle)
373 ret.reallocate_memory<
false>(ISOSPEC_INIT_TABLE_SIZE);
374 ret.current_size = ISOSPEC_INIT_TABLE_SIZE;
378 double half_width = 0.5*bin_width;
379 double hwmm = half_width-middle;
381 while(ii < _confs_no)
383 double current_bin_middle = floor((_masses[ii]+hwmm)/bin_width)*bin_width + middle;
384 double current_bin_end = current_bin_middle + half_width;
385 double bin_prob = 0.0;
387 while(ii < _confs_no && _masses[ii] <= current_bin_end)
389 bin_prob += _probs[ii];
392 ret.store_conf(current_bin_middle, bin_prob);
398 template<
bool tgetConfs>
void FixedEnvelope::reallocate_memory(
size_t new_size)
401 _masses =
reinterpret_cast<double*
>(realloc(_masses, new_size *
sizeof(
double)));
402 if(_masses ==
nullptr)
403 throw std::bad_alloc();
404 tmasses = _masses + _confs_no;
406 _probs =
reinterpret_cast<double*
>(realloc(_probs, new_size *
sizeof(
double)));
407 if(_probs ==
nullptr)
408 throw std::bad_alloc();
409 tprobs = _probs + _confs_no;
411 constexpr_if(tgetConfs)
413 _confs =
reinterpret_cast<int*
>(realloc(_confs, new_size * allDimSizeofInt));
414 if(_confs ==
nullptr)
415 throw std::bad_alloc();
416 tconfs = _confs + (allDim * _confs_no);
420 void FixedEnvelope::slow_reallocate_memory(
size_t new_size)
423 _masses =
reinterpret_cast<double*
>(realloc(_masses, new_size *
sizeof(
double)));
424 if(_masses ==
nullptr)
425 throw std::bad_alloc();
426 tmasses = _masses + _confs_no;
428 _probs =
reinterpret_cast<double*
>(realloc(_probs, new_size *
sizeof(
double)));
429 if(_probs ==
nullptr)
430 throw std::bad_alloc();
431 tprobs = _probs + _confs_no;
433 if(_confs !=
nullptr)
435 _confs =
reinterpret_cast<int*
>(realloc(_confs, new_size * allDimSizeofInt));
436 if(_confs ==
nullptr)
437 throw std::bad_alloc();
438 tconfs = _confs + (allDim * _confs_no);
442 template<
bool tgetConfs>
void FixedEnvelope::threshold_init(Iso&& iso,
double threshold,
bool absolute)
444 IsoThresholdGenerator generator(std::move(iso), threshold, absolute);
446 size_t tab_size = generator.count_confs();
447 this->allDim = generator.getAllDim();
448 this->allDimSizeofInt = this->allDim *
sizeof(int);
450 this->reallocate_memory<tgetConfs>(tab_size);
452 double* ttmasses = this->_masses;
453 double* ttprobs = this->_probs;
454 ISOSPEC_MAYBE_UNUSED
int* ttconfs;
455 constexpr_if(tgetConfs)
458 while(generator.advanceToNextConfiguration())
460 *ttmasses = generator.mass(); ttmasses++;
461 *ttprobs = generator.prob(); ttprobs++;
462 constexpr_if(tgetConfs) { generator.get_conf_signature(ttconfs); ttconfs += allDim; }
465 this->_confs_no = tab_size;
468 template void FixedEnvelope::threshold_init<true>(Iso&& iso,
double threshold,
bool absolute);
469 template void FixedEnvelope::threshold_init<false>(Iso&& iso,
double threshold,
bool absolute);
472 template<
bool tgetConfs>
void FixedEnvelope::total_prob_init(Iso&& iso,
double target_total_prob,
bool optimize)
474 if(target_total_prob <= 0.0)
477 if(target_total_prob >= 1.0)
479 threshold_init<tgetConfs>(std::move(iso), 0.0,
true);
483 current_size = ISOSPEC_INIT_TABLE_SIZE;
485 IsoLayeredGenerator generator(std::move(iso), 1000, 1000,
true, std::min<double>(target_total_prob, 0.9999));
487 this->allDim = generator.getAllDim();
488 this->allDimSizeofInt = this->allDim*
sizeof(int);
491 this->reallocate_memory<tgetConfs>(ISOSPEC_INIT_TABLE_SIZE);
493 size_t last_switch = 0;
494 double prob_at_last_switch = 0.0;
495 double prob_so_far = 0.0;
498 const double sum_above = log1p(-target_total_prob) - 2.3025850929940455;
503 while(generator.advanceToNextConfigurationWithinLayer())
505 this->
template addConfILG<tgetConfs>(generator);
506 prob_so_far += *(tprobs-1);
507 if(prob_so_far >= target_total_prob)
511 while(generator.advanceToNextConfigurationWithinLayer())
512 this->
template addConfILG<tgetConfs>(generator);
519 if(prob_so_far >= target_total_prob)
522 last_switch = this->_confs_no;
523 prob_at_last_switch = prob_so_far;
525 layer_delta = sum_above - log1p(-prob_so_far);
526 layer_delta = (std::max)((std::min)(layer_delta, -0.1), -5.0);
527 }
while(generator.nextLayer(layer_delta));
529 if(!optimize || prob_so_far <= target_total_prob)
538 int* conf_swapspace =
nullptr;
539 constexpr_if(tgetConfs)
540 conf_swapspace =
reinterpret_cast<int*
>(malloc(this->allDimSizeofInt));
542 size_t start = last_switch;
543 size_t end = this->_confs_no;
544 double sum_to_start = prob_at_last_switch;
549 size_t len = end - start;
550 #if ISOSPEC_BUILDING_R
551 size_t pivot = len/2 + start;
553 size_t pivot = random_gen() % len + start;
557 double pprob = this->_probs[pivot];
558 swap<tgetConfs>(pivot, end-1, conf_swapspace);
560 double new_csum = sum_to_start;
562 size_t loweridx = start;
563 for(
size_t ii = start; ii < end-1; ii++)
564 if(this->_probs[ii] > pprob)
566 swap<tgetConfs>(ii, loweridx, conf_swapspace);
567 new_csum += this->_probs[loweridx];
571 swap<tgetConfs>(end-1, loweridx, conf_swapspace);
574 if(new_csum < target_total_prob)
576 start = loweridx + 1;
577 sum_to_start = new_csum + this->_probs[loweridx];
583 constexpr_if(tgetConfs)
584 free(conf_swapspace);
586 if(end <= current_size/2)
588 this->
template reallocate_memory<tgetConfs>(end);
590 this->_confs_no = end;
593 template void FixedEnvelope::total_prob_init<true>(Iso&& iso,
double target_total_prob,
bool optimize);
594 template void FixedEnvelope::total_prob_init<false>(Iso&& iso,
double target_total_prob,
bool optimize);
597 double FixedEnvelope::empiric_average_mass()
600 for(
size_t ii = 0; ii < _confs_no; ii++)
602 ret += _masses[ii] * _probs[ii];
604 return ret / get_total_prob();
607 double FixedEnvelope::empiric_variance()
610 double avg = empiric_average_mass();
611 for(
size_t ii = 0; ii < _confs_no; ii++)
613 double msq = _masses[ii] - avg;
614 ret += msq * msq * _probs[ii];
617 return ret / get_total_prob();