Isis 3 Programmer Reference
StatCumProbDistDynCalc.cpp
1
6/* SPDX-License-Identifier: CC0-1.0 */
7
8#include "StatCumProbDistDynCalc.h"
9
10#include <QDataStream>
11#include <QDebug>
12#include <QList>
13#include <QUuid>
14#include <QXmlStreamWriter>
15#include <QXmlStreamReader>
16
17#include <float.h>
18#include <math.h>
19#include <stdio.h>
20
21#include "IException.h"
22#include "IString.h"
23#include "Project.h"
24
25#include "Pvl.h"
26#include <iostream>
27
28namespace Isis {
29
30
38 : QObject(parent) {
39 initialize();
40 setQuantiles(nodes);
41 }
42
43
44 StatCumProbDistDynCalc::StatCumProbDistDynCalc(QXmlStreamReader *xmlReader, QObject *parent) {
45 initialize();
46 readStatistics(xmlReader);
47 }
48
49 void StatCumProbDistDynCalc::readStatistics(QXmlStreamReader *xmlReader) {
50 Q_ASSERT(xmlReader->name() == "statCumProbDistDynCalc");
51 while (xmlReader->readNextStartElement()) {
52 if (xmlReader->qualifiedName() == "numberCells") {
53 try {
54 m_numberCells = toDouble(xmlReader->readElementText());
55 }
56 catch (IException &e) {
57 m_numberCells = 0.0;
58 }
59 }
60 else if (xmlReader->qualifiedName() == "numberQuantiles") {
61 try {
62 m_numberQuantiles = toDouble(xmlReader->readElementText());
63 }
64 catch (IException &e) {
66 }
67 }
68 else if (xmlReader->qualifiedName() == "numberObservations") {
69 try {
70 m_numberObservations = toDouble(xmlReader->readElementText());
71 }
72 catch (IException &e) {
74 }
75 }
76 else if (xmlReader->qualifiedName() == "distributionData") {
77 m_quantiles.clear();
78 m_observationValues.clear();
81 for (unsigned int i = 0; i < m_numberQuantiles; i++) {
82 while (xmlReader->readNextStartElement()) {
83 if (xmlReader->qualifiedName() == "quantileInfo") {
84 QStringRef quantile = xmlReader->attributes().value("quantile");
85 if (!quantile.isEmpty()) {
86 m_quantiles.append(quantile.toDouble());
87 }
88 QStringRef dataValue = xmlReader->attributes().value("dataValue");
89 if (!dataValue.isEmpty()) {
90 m_observationValues.append(dataValue.toDouble());
91 }
92 QStringRef idealNumObsBelowQuantile = xmlReader->attributes().value("idealNumObsBelowQuantile");
93 if (!idealNumObsBelowQuantile.isEmpty()) {
94 m_idealNumObsBelowQuantile.append(idealNumObsBelowQuantile.toDouble());
95 }
96 QStringRef actualNumObsBelowQuantile = xmlReader->attributes().value("actualNumObsBelowQuantile");
97
98 if (!actualNumObsBelowQuantile.isEmpty()) {
99 m_numObsBelowQuantile.append(actualNumObsBelowQuantile.toInt());
100 }
101 }
102 else {
103 xmlReader->skipCurrentElement();
104 }
105 }
106 }
107 }
108 else {
109 xmlReader->skipCurrentElement();
110 }
111 }
112 }
113
114
115 StatCumProbDistDynCalc::StatCumProbDistDynCalc(const StatCumProbDistDynCalc &other)
116 : m_numberCells(other.m_numberCells),
117 m_numberQuantiles(other.m_numberQuantiles),
118 m_numberObservations(other.m_numberObservations),
119 m_quantiles(other.m_quantiles),
120 m_observationValues(other.m_observationValues),
121 m_idealNumObsBelowQuantile(other.m_idealNumObsBelowQuantile),
122 m_numObsBelowQuantile(other.m_numObsBelowQuantile) {
123 }
124
125
126
132
133
134
135 StatCumProbDistDynCalc &StatCumProbDistDynCalc::operator=(const StatCumProbDistDynCalc &other) {
136
137 if (&other != this) {
138 m_numberCells = other.m_numberCells;
139 m_numberQuantiles = other.m_numberQuantiles;
140 m_numberObservations = other.m_numberObservations;
141 m_quantiles = other.m_quantiles;
142 m_observationValues = other.m_observationValues;
143 m_idealNumObsBelowQuantile = other.m_idealNumObsBelowQuantile;
144 m_numObsBelowQuantile = other.m_numObsBelowQuantile;
145 }
146 return *this;
147
148 }
149
150
151
165
166
167
168 void StatCumProbDistDynCalc::setQuantiles(unsigned int nodes) {
169 initialize();
170 if (nodes < 3) {
172 }
173 else {
174 m_numberQuantiles = nodes;
175 }
176
177 m_numberCells = m_numberQuantiles - 1; // there is one more border value then there are cells
178
179 double stepSize = 1.0 / (double)m_numberCells;
180 for (unsigned int i = 0; i < m_numberQuantiles; i++) {
181 if (i == 0) {
182 m_quantiles.append( 0.0 );
183 }
184 else {
185 m_quantiles.append( m_quantiles[i-1] + stepSize );
186 } // essentially the same as i/numCells without having to cast i every time...
187 m_idealNumObsBelowQuantile.append(i+1);
188 m_numObsBelowQuantile.append(i+1);
189 }
190
191 }
192
193
194
205 validate();
206 //return the largest value so far
208 }
209
210
211
222 validate();
223 //return the smallest value so far
224 return m_observationValues[0];
225 }
226
227
228
243 double StatCumProbDistDynCalc::value(double cumProb) {
244 validate();
245 //given a cumProbability return the associate value
246
247 //if the requested cumProb is outside [0,1] return DBL_MAX
248 if (cumProb < 0.0 || cumProb > 1.0) {
249 IString msg = "Invalid cumulative probability [" + toString(cumProb) + "] passed in to "
250 "StatCumProbDistDynCalc::value(double cumProb). Must be on the domain [0, 1].";
251 throw IException(IException::Programmer, msg, _FILEINFO_);
252 }
253
254 //if the requested quantile is 0.0 return the lowest value
255 if (cumProb == 0.0) {
256 return m_observationValues[0];
257 }
258
259 //if the requested quantile is 1.0 return the largest value
260 if (cumProb == 1.0) {
262 }
263
264 // otherwise find the the node nearest the value
265 double minDistance = fabs(m_quantiles[0] - cumProb); // distance from first quantile
266 // to the given probability
267 unsigned int index = 0;
268 for (int i = 1; i < int(m_numberQuantiles); i++) {
269 // if the given probability is closer to this quantile than the currently saved
270 // min distance, then replace
271 double dist = fabs(m_quantiles[i] - cumProb);
272// if ( dist < minDistance || qFuzzyCompare(dist, minDistance)) {
273 if ( dist < minDistance ) {
274 minDistance = fabs(m_quantiles[i] - cumProb);
275 index = i;
276 }
277 else {
278 break; // we are getting farther from the given value
279 }
280 }// TODO: must be a better way to write this???
281
282 //get the coordinates of the three closest nodes, value as a function of cumProb
283 double x[3]; // m_observationValues values
284 double y[3]; // cumlative probilities
285
286 if (index == 0) { // the given prob is closest to the first quantile
287 y[0] = m_observationValues[0];
288 y[1] = m_observationValues[1];
289 y[2] = m_observationValues[2];
290
291 x[0] = m_quantiles[0];
292 x[1] = m_quantiles[1];
293 x[2] = m_quantiles[2];
294 }
295 else if (index == m_numberCells) { // the given prob is closest to the last quantile
296 y[0] = m_observationValues[index-2];
297 y[1] = m_observationValues[index-1];
298 y[2] = m_observationValues[index ];
299
300 x[0] = m_quantiles[index-2];
301 x[1] = m_quantiles[index-1];
302 x[2] = m_quantiles[index ];
303 }
304 else {
305 y[0] = m_observationValues[index-1];
306 y[1] = m_observationValues[index ];
307 y[2] = m_observationValues[index+1];
308
309 x[0] = m_quantiles[index-1];
310 x[1] = m_quantiles[index ];
311 x[2] = m_quantiles[index+1];
312 }
313
314 if ( x[0] == x[1] || x[0] == x[2] || x[1] == x[2]) {
315 return m_observationValues[index]; // this should never happen,
316 // but just in case return the value of the nearest node
317 }
318
319 // try quadratic interpolation
320 double interp = (cumProb-x[1]) * (cumProb-x[2]) / (x[0]-x[1]) / (x[0]-x[2]) * y[0]
321 + (cumProb-x[0]) * (cumProb-x[2]) / (x[1]-x[0]) / (x[1]-x[2]) * y[1]
322 + (cumProb-x[0]) * (cumProb-x[1]) / (x[2]-x[0]) / (x[2]-x[1]) * y[2];
323
324 // check for reasonability of the quadratic interpolation
325
326 // TODO: better way to write this???
327 int i;
328 for (i = 0; i < 3; i++) {
329 if ( x[i] <= cumProb && x[i+1] >= cumProb) { // find the nodes on both sides of cumProb
330 break;
331 }
332 }
333
334 // make sure things are strictly increasing
335 if (y[i] <= interp && y[i+1] >= interp) {
336 return interp;
337 }
338 else {
339 // if the quadratic wasn't reasonable return the linear interpolation
340 return ( (x[i] - cumProb) * y[i+1] + (x[i+1] - cumProb) * y[i] ) / (x[i] - x[i+1]);
341 }
342
343 }
344
345
346
360 double StatCumProbDistDynCalc::cumProb(double value) {
361 validate();
362 //given a value return the cumulative probility
363
364 if (value <= m_observationValues[0]) {
365 return 0.0; // if the value is less than or equal to the the current min,
366 // the best estimate is 0.0
367 }
369 return 1.0; // if the value is greater than or equal to the current max,
370 // the best estimate is 1.0
371 }
372
373 // otherwise, find the the node nearest to the given value
374 double minDistance = fabs(m_observationValues[0]-value);
375 unsigned int index = 0;
376 for (unsigned int i = 1; i < m_numberQuantiles; i++) {
377 double dist = fabs(m_observationValues[i]-value);
378 if ( dist < minDistance) {
379 minDistance = fabs(m_observationValues[i]-value);
380 index = i;
381 }
382 else {
383 break; // we are getting farther from the given value
384 }
385 }// TODO: must be a better way to write this???
386
387 //get the coordinates of the three closest nodes cumProb as a function of value
388 double x[3]; // m_observationValues values
389 double y[3]; // cumlative probilities
390
391 if (index == 0) {
392 x[0] = m_observationValues[0];
393 x[1] = m_observationValues[1];
394 x[2] = m_observationValues[2];
395
396 y[0] = m_quantiles[0];
397 y[1] = m_quantiles[1];
398 y[2] = m_quantiles[2];
399 }
400 else if (index == m_numberCells) {
401 x[0] = m_observationValues[index-2];
402 x[1] = m_observationValues[index-1];
403 x[2] = m_observationValues[index ];
404
405 y[0] = m_quantiles[index-2];
406 y[1] = m_quantiles[index-1];
407 y[2] = m_quantiles[index ];
408 }
409 else {
410 x[0] = m_observationValues[index-1];
411 x[1] = m_observationValues[index ];
412 x[2] = m_observationValues[index+1];
413
414 y[0] = m_quantiles[index-1];
415 y[1] = m_quantiles[index ];
416 y[2] = m_quantiles[index+1];
417 }
418
419 if ( x[0] == x[1] || x[0] == x[2] || x[1] == x[2]) {
420 return m_quantiles[index]; // this should never happen,
421 // but just in case return the cumProb of the nearest node
422 }
423
424 //do a parabolic interpolation to find the probability at value
425 double interp = (value-x[1]) * (value-x[2]) / (x[0]-x[1]) / (x[0]-x[2]) * y[0]
426 + (value-x[0]) * (value-x[2]) / (x[1]-x[0]) / (x[1]-x[2]) * y[1]
427 + (value-x[0]) * (value-x[1]) / (x[2]-x[0]) / (x[2]-x[1]) * y[2];
428
429 //check for reasonability of the quadratic interpolation
430 // TODO: better way to write this???
431 int i;
432 for (i = 0; i < 3; i++) {
433 if ( x[i] <= value && x[i+1] >= value) //find the nodes on both sides of cumProb
434 break;
435 }
436
437 if (y[i] <= interp && y[i+1] >= interp) //make sure things are strictly increasing
438 return interp;
439 //if the quadratic wasn't reasonable return the linear interpolation
440 else {
441 return ((x[i] - value) * y[i+1] + (x[i+1] - value) * y[i]) / (x[i] - x[i+1]);
442 }
443
444 }
445
446
447
456 double temp = 0.0;
457
459 // in this phase, the algorithm is getting initial values
460 m_observationValues.append(obs);
462
463 // if there are now m_numberQuantiles observations, then sort them from smallest to greatest
465 std::sort(m_observationValues.begin(), m_observationValues.end(), std::less<double>());
466 }
467 }
468 else { // m_numberObservations >= m_numberQuantiles
469
470 //incrementing the number of observations
473
474 // replace min or max with this observation, if appropriate
477 }
478 if (obs < m_observationValues[0]) {
479 m_observationValues[0] = obs;
480 temp = 1.0;
481 }
482
483 //estimated quantile positions are increased if obs <= estimated quantile value
484 for (int i = 1; i < (int)m_numberQuantiles-1; i++) {
485 if (obs <= m_observationValues[i]) {
486 for (; i < (int)m_numberQuantiles-1; i++) {
487 m_numObsBelowQuantile[i]++; // all m_n's above the first >= obs get incremented
488 }
489 }
490 }
491
492 for (int i = 1; i < (int)m_numberQuantiles; i++) {
494 }
495
496 //calculate corrections to node positions (m_n) and values (m_observationValues)
497 int d;
498 // note that the loop does not edit the first or last positions
499 for (int i = 1; i < (int)m_numberCells; i++) {
500 //calculation of d[i] it is either 1, -1, or 0
502 if (fabs(temp)>1 && temp > 0.0) {
503 d = 1;
504 }
505 else if (fabs(temp)>1 && temp < 0.0) {
506 d = -1;
507 }
508 else {
509 continue;
510 }
511
512 // if the node can be moved by d without stepping on another node
513 if ( ( (d == 1)
515 || ( (d == -1)
516 && (m_numObsBelowQuantile[i-1] - m_numObsBelowQuantile[i] < -1) ) ) {
517 //calculate a new quantile value for the node from the parabolic formula
518 temp = m_observationValues[i]
519 + double(d) / (m_numObsBelowQuantile[i+1] - m_numObsBelowQuantile[i-1])
526
527 // it is necessary that the values of the quantiles be strictly increasing,
528 // if the parabolic formula perserves this so be it
529 if ( m_observationValues[i-1] < temp && temp < m_observationValues[i+1]) {// use qSort???
530 m_observationValues[i] = temp;
531 }
532 else {
533 // if the parabolic formula does not maintain
534 // that the values of the quantiles be strictly increasing,
535 // then use linear interpolation
537 + double(d)
540 }
541
542 // the position of the quantile
543 // (in terms of the number of observations <= quantile) is also adjusted
544 m_numObsBelowQuantile[i] += d;
545 }
546 }
547 }
548 }
549
550
551
552 void StatCumProbDistDynCalc::save(QXmlStreamWriter &stream, const Project *project) const { // TODO: does xml stuff need project???
553
554 stream.writeStartElement("statCumProbDistDynCalc");
555 stream.writeTextElement("numberCells", toString(m_numberCells));
556 stream.writeTextElement("numberQuantiles", toString(m_numberQuantiles));
557 stream.writeTextElement("numberObservations", toString(m_numberObservations));
558
559 stream.writeStartElement("distributionData");
560 for (unsigned int i = 0; i < m_numberQuantiles; i++) {
561 stream.writeStartElement("quantileInfo");
562 // we need to write out high precision for minDistance calculations in value() and cumProb()
563 stream.writeAttribute("quantile", toString(m_quantiles[i], 17));
564 stream.writeAttribute("dataValue", toString(m_observationValues[i], 17));
565 stream.writeAttribute("idealNumObsBelowQuantile",
567 stream.writeAttribute("actualNumObsBelowQuantile", toString(m_numObsBelowQuantile[i]));
568 stream.writeEndElement(); // end observation
569 }
570 stream.writeEndElement(); // end observationData
571 stream.writeEndElement(); // end statCumProbDistDynCalc
572
573 }
574
575 QDataStream &StatCumProbDistDynCalc::write(QDataStream &stream) const {
576 stream << (qint32)m_numberCells
577 << (qint32)m_numberQuantiles
578 << (qint32)m_numberObservations
579 << m_quantiles
583 return stream;
584 }
585
586 QDataStream &StatCumProbDistDynCalc::read(QDataStream &stream) {
587 QString id;
588 qint32 numCells, numQuantiles, numObservations;
589 stream >> numCells
590 >> numQuantiles
591 >> numObservations
592 >> m_quantiles
596
597 m_numberCells = (unsigned int)numCells;
598 m_numberQuantiles = (unsigned int)numQuantiles;
599 m_numberObservations = (unsigned int)numObservations;
600 return stream;
601 }
602
603
604
605 QDataStream &operator<<(QDataStream &stream, const StatCumProbDistDynCalc &scpddc) {
606 return scpddc.write(stream);
607 }
608
609
610
611 QDataStream &operator>>(QDataStream &stream, StatCumProbDistDynCalc &scpddc) {
612 return scpddc.read(stream);
613 }
614
615 void StatCumProbDistDynCalc::validate() {
616 // if quantiles have not been set
617 if (m_numberQuantiles == 0) {
618 QString msg = "StatCumProbDistDynCalc will return no data until the quantiles have been set. "
619 "Number of cells = [" + toString(m_numberCells) + "].";
620 throw IException(IException::Programmer, msg, _FILEINFO_);
621 }
622
623 //if there isn't even as much data as there are quantiles to track
625 QString msg = "StatCumProbDistDynCalc will return no data until the number of observations "
626 "added [" + toString(m_numberObservations) + "] matches the number of "
627 "quantiles [" + toString(m_numberQuantiles)
628 + "] (i.e. number of nodes) selected.";
629 throw IException(IException::Programmer, msg, _FILEINFO_);
630 }
631
632 }
633}// end namespace Isis
Isis exception class.
Definition IException.h:91
@ Programmer
This error is for when a programmer made an API call that was illegal.
Definition IException.h:146
Adds specific functionality to C++ strings.
Definition IString.h:165
The main project for ipce.
Definition Project.h:287
This class is used to approximate cumulative probibility distributions of a stream of observations wi...
QList< int > m_numObsBelowQuantile
The actual number of observations that are less than or equal to the value of the corresponding quant...
unsigned int m_numberObservations
The number of observations, note this is dynamically changing as observations are added.
double min()
Returns the maximum observation so far included in the dynamic calculation.
QList< double > m_idealNumObsBelowQuantile
The ideal number of observations that should be less than or equal to the value of the corresponding ...
QList< double > m_observationValues
The calculated values of the quantiles, note this is dynamically changing as observations are added.
StatCumProbDistDynCalc(unsigned int nodes=20, QObject *parent=0)
Construtor sets up the class to start recieving data.
void initialize()
Inializer, resets the class to start its dynamic calculation anew.
unsigned int m_numberCells
The number of cells or histogram bins that are being used to model the probility density function.
~StatCumProbDistDynCalc()
Destroys StatCumProbDistDynCalc object.
unsigned int m_numberQuantiles
The number of quantiles being used to model the probility density function.
QList< double > m_quantiles
The target quantiles being modeled, between 0 and 1.
double value(double cumProb)
Provides the value of the variable that has the given cumulative probility (according the current est...
void addObs(double obs)
Values for the estimated quantile positions are update as observations are added.
double cumProb(double value)
Provides the cumulative probility, that is, the proportion of the distribution that is less than or e...
double max()
Returns the maximum observation so far included in the dynamic calculation.
This is free and unencumbered software released into the public domain.
Definition Apollo.h:16
QString toString(bool boolToConvert)
Global function to convert a boolean to a string.
Definition IString.cpp:211
std::istream & operator>>(std::istream &is, CSVReader &csv)
Input read operator for input stream sources.
double toDouble(const QString &string)
Global function to convert from a string to a double.
Definition IString.cpp:149
QDebug operator<<(QDebug debug, const Hillshade &hillshade)
Print this class out to a QDebug object.