Isis 3 Programmer Reference
StatCumProbDistDynCalc.cpp
1
6/* SPDX-License-Identifier: CC0-1.0 */
7
8#include "StatCumProbDistDynCalc.h"
9
10#include <QDataStream>
11#include <QDebug>
12#include <QList>
13#include <QUuid>
14#include <QXmlStreamWriter>
15
16#include <float.h>
17#include <math.h>
18#include <stdio.h>
19
20#include "IException.h"
21#include "IString.h"
22#include "Project.h"
23#include "XmlStackedHandlerReader.h"
24
25#include "Pvl.h"
26#include <iostream>
27
28namespace Isis {
29
30
38 : QObject(parent) {
39 initialize();
40 setQuantiles(nodes);
41 }
42
43
44// TODO: should project be const ???
46 XmlStackedHandlerReader *xmlReader,
47 QObject *parent) { // TODO: does xml stuff need project???
48 initialize();
49 xmlReader->pushContentHandler(new XmlHandler(this, project)); // TODO: does xml stuff need project???
50 }
51
52
53
54 StatCumProbDistDynCalc::StatCumProbDistDynCalc(const StatCumProbDistDynCalc &other)
55 : m_numberCells(other.m_numberCells),
56 m_numberQuantiles(other.m_numberQuantiles),
57 m_numberObservations(other.m_numberObservations),
58 m_quantiles(other.m_quantiles),
59 m_observationValues(other.m_observationValues),
60 m_idealNumObsBelowQuantile(other.m_idealNumObsBelowQuantile),
61 m_numObsBelowQuantile(other.m_numObsBelowQuantile) {
62 }
63
64
65
71
72
73
74 StatCumProbDistDynCalc &StatCumProbDistDynCalc::operator=(const StatCumProbDistDynCalc &other) {
75
76 if (&other != this) {
77 m_numberCells = other.m_numberCells;
78 m_numberQuantiles = other.m_numberQuantiles;
79 m_numberObservations = other.m_numberObservations;
80 m_quantiles = other.m_quantiles;
81 m_observationValues = other.m_observationValues;
82 m_idealNumObsBelowQuantile = other.m_idealNumObsBelowQuantile;
83 m_numObsBelowQuantile = other.m_numObsBelowQuantile;
84 }
85 return *this;
86
87 }
88
89
90
104
105
106
107 void StatCumProbDistDynCalc::setQuantiles(unsigned int nodes) {
108 initialize();
109 if (nodes < 3) {
111 }
112 else {
113 m_numberQuantiles = nodes;
114 }
115
116 m_numberCells = m_numberQuantiles - 1; // there is one more border value then there are cells
117
118 double stepSize = 1.0 / (double)m_numberCells;
119 for (unsigned int i = 0; i < m_numberQuantiles; i++) {
120 if (i == 0) {
121 m_quantiles.append( 0.0 );
122 }
123 else {
124 m_quantiles.append( m_quantiles[i-1] + stepSize );
125 } // essentially the same as i/numCells without having to cast i every time...
126 m_idealNumObsBelowQuantile.append(i+1);
127 m_numObsBelowQuantile.append(i+1);
128 }
129
130 }
131
132
133
144 validate();
145 //return the largest value so far
147 }
148
149
150
161 validate();
162 //return the smallest value so far
163 return m_observationValues[0];
164 }
165
166
167
182 double StatCumProbDistDynCalc::value(double cumProb) {
183 validate();
184 //given a cumProbability return the associate value
185
186 //if the requested cumProb is outside [0,1] return DBL_MAX
187 if (cumProb < 0.0 || cumProb > 1.0) {
188 IString msg = "Invalid cumulative probability [" + toString(cumProb) + "] passed in to "
189 "StatCumProbDistDynCalc::value(double cumProb). Must be on the domain [0, 1].";
190 throw IException(IException::Programmer, msg, _FILEINFO_);
191 }
192
193 //if the requested quantile is 0.0 return the lowest value
194 if (cumProb == 0.0) {
195 return m_observationValues[0];
196 }
197
198 //if the requested quantile is 1.0 return the largest value
199 if (cumProb == 1.0) {
201 }
202
203 // otherwise find the the node nearest the value
204 double minDistance = fabs(m_quantiles[0] - cumProb); // distance from first quantile
205 // to the given probability
206 unsigned int index = 0;
207 for (int i = 1; i < int(m_numberQuantiles); i++) {
208 // if the given probability is closer to this quantile than the currently saved
209 // min distance, then replace
210 double dist = fabs(m_quantiles[i] - cumProb);
211// if ( dist < minDistance || qFuzzyCompare(dist, minDistance)) {
212 if ( dist < minDistance ) {
213 minDistance = fabs(m_quantiles[i] - cumProb);
214 index = i;
215 }
216 else {
217 break; // we are getting farther from the given value
218 }
219 }// TODO: must be a better way to write this???
220
221 //get the coordinates of the three closest nodes, value as a function of cumProb
222 double x[3]; // m_observationValues values
223 double y[3]; // cumlative probilities
224
225 if (index == 0) { // the given prob is closest to the first quantile
226 y[0] = m_observationValues[0];
227 y[1] = m_observationValues[1];
228 y[2] = m_observationValues[2];
229
230 x[0] = m_quantiles[0];
231 x[1] = m_quantiles[1];
232 x[2] = m_quantiles[2];
233 }
234 else if (index == m_numberCells) { // the given prob is closest to the last quantile
235 y[0] = m_observationValues[index-2];
236 y[1] = m_observationValues[index-1];
237 y[2] = m_observationValues[index ];
238
239 x[0] = m_quantiles[index-2];
240 x[1] = m_quantiles[index-1];
241 x[2] = m_quantiles[index ];
242 }
243 else {
244 y[0] = m_observationValues[index-1];
245 y[1] = m_observationValues[index ];
246 y[2] = m_observationValues[index+1];
247
248 x[0] = m_quantiles[index-1];
249 x[1] = m_quantiles[index ];
250 x[2] = m_quantiles[index+1];
251 }
252
253 if ( x[0] == x[1] || x[0] == x[2] || x[1] == x[2]) {
254 return m_observationValues[index]; // this should never happen,
255 // but just in case return the value of the nearest node
256 }
257
258 // try quadratic interpolation
259 double interp = (cumProb-x[1]) * (cumProb-x[2]) / (x[0]-x[1]) / (x[0]-x[2]) * y[0]
260 + (cumProb-x[0]) * (cumProb-x[2]) / (x[1]-x[0]) / (x[1]-x[2]) * y[1]
261 + (cumProb-x[0]) * (cumProb-x[1]) / (x[2]-x[0]) / (x[2]-x[1]) * y[2];
262
263 // check for reasonability of the quadratic interpolation
264
265 // TODO: better way to write this???
266 int i;
267 for (i = 0; i < 3; i++) {
268 if ( x[i] <= cumProb && x[i+1] >= cumProb) { // find the nodes on both sides of cumProb
269 break;
270 }
271 }
272
273 // make sure things are strictly increasing
274 if (y[i] <= interp && y[i+1] >= interp) {
275 return interp;
276 }
277 else {
278 // if the quadratic wasn't reasonable return the linear interpolation
279 return ( (x[i] - cumProb) * y[i+1] + (x[i+1] - cumProb) * y[i] ) / (x[i] - x[i+1]);
280 }
281
282 }
283
284
285
299 double StatCumProbDistDynCalc::cumProb(double value) {
300 validate();
301 //given a value return the cumulative probility
302
303 if (value <= m_observationValues[0]) {
304 return 0.0; // if the value is less than or equal to the the current min,
305 // the best estimate is 0.0
306 }
308 return 1.0; // if the value is greater than or equal to the current max,
309 // the best estimate is 1.0
310 }
311
312 // otherwise, find the the node nearest to the given value
313 double minDistance = fabs(m_observationValues[0]-value);
314 unsigned int index = 0;
315 for (unsigned int i = 1; i < m_numberQuantiles; i++) {
316 double dist = fabs(m_observationValues[i]-value);
317 if ( dist < minDistance) {
318 minDistance = fabs(m_observationValues[i]-value);
319 index = i;
320 }
321 else {
322 break; // we are getting farther from the given value
323 }
324 }// TODO: must be a better way to write this???
325
326 //get the coordinates of the three closest nodes cumProb as a function of value
327 double x[3]; // m_observationValues values
328 double y[3]; // cumlative probilities
329
330 if (index == 0) {
331 x[0] = m_observationValues[0];
332 x[1] = m_observationValues[1];
333 x[2] = m_observationValues[2];
334
335 y[0] = m_quantiles[0];
336 y[1] = m_quantiles[1];
337 y[2] = m_quantiles[2];
338 }
339 else if (index == m_numberCells) {
340 x[0] = m_observationValues[index-2];
341 x[1] = m_observationValues[index-1];
342 x[2] = m_observationValues[index ];
343
344 y[0] = m_quantiles[index-2];
345 y[1] = m_quantiles[index-1];
346 y[2] = m_quantiles[index ];
347 }
348 else {
349 x[0] = m_observationValues[index-1];
350 x[1] = m_observationValues[index ];
351 x[2] = m_observationValues[index+1];
352
353 y[0] = m_quantiles[index-1];
354 y[1] = m_quantiles[index ];
355 y[2] = m_quantiles[index+1];
356 }
357
358 if ( x[0] == x[1] || x[0] == x[2] || x[1] == x[2]) {
359 return m_quantiles[index]; // this should never happen,
360 // but just in case return the cumProb of the nearest node
361 }
362
363 //do a parabolic interpolation to find the probability at value
364 double interp = (value-x[1]) * (value-x[2]) / (x[0]-x[1]) / (x[0]-x[2]) * y[0]
365 + (value-x[0]) * (value-x[2]) / (x[1]-x[0]) / (x[1]-x[2]) * y[1]
366 + (value-x[0]) * (value-x[1]) / (x[2]-x[0]) / (x[2]-x[1]) * y[2];
367
368 //check for reasonability of the quadratic interpolation
369 // TODO: better way to write this???
370 int i;
371 for (i = 0; i < 3; i++) {
372 if ( x[i] <= value && x[i+1] >= value) //find the nodes on both sides of cumProb
373 break;
374 }
375
376 if (y[i] <= interp && y[i+1] >= interp) //make sure things are strictly increasing
377 return interp;
378 //if the quadratic wasn't reasonable return the linear interpolation
379 else {
380 return ((x[i] - value) * y[i+1] + (x[i+1] - value) * y[i]) / (x[i] - x[i+1]);
381 }
382
383 }
384
385
386
395 double temp = 0.0;
396
398 // in this phase, the algorithm is getting initial values
399 m_observationValues.append(obs);
401
402 // if there are now m_numberQuantiles observations, then sort them from smallest to greatest
404 qSort(m_observationValues.begin(), m_observationValues.end(), std::less<double>());
405 std::sort(m_observationValues.begin(), m_observationValues.end(), qLess<double>());
406 }
407 }
408 else { // m_numberObservations >= m_numberQuantiles
409
410 //incrementing the number of observations
413
414 // replace min or max with this observation, if appropriate
417 }
418 if (obs < m_observationValues[0]) {
419 m_observationValues[0] = obs;
420 temp = 1.0;
421 }
422
423 //estimated quantile positions are increased if obs <= estimated quantile value
424 for (int i = 1; i < (int)m_numberQuantiles-1; i++) {
425 if (obs <= m_observationValues[i]) {
426 for (; i < (int)m_numberQuantiles-1; i++) {
427 m_numObsBelowQuantile[i]++; // all m_n's above the first >= obs get incremented
428 }
429 }
430 }
431
432 for (int i = 1; i < (int)m_numberQuantiles; i++) {
434 }
435
436 //calculate corrections to node positions (m_n) and values (m_observationValues)
437 int d;
438 // note that the loop does not edit the first or last positions
439 for (int i = 1; i < (int)m_numberCells; i++) {
440 //calculation of d[i] it is either 1, -1, or 0
442 if (fabs(temp)>1 && temp > 0.0) {
443 d = 1;
444 }
445 else if (fabs(temp)>1 && temp < 0.0) {
446 d = -1;
447 }
448 else {
449 continue;
450 }
451
452 // if the node can be moved by d without stepping on another node
453 if ( ( (d == 1)
455 || ( (d == -1)
456 && (m_numObsBelowQuantile[i-1] - m_numObsBelowQuantile[i] < -1) ) ) {
457 //calculate a new quantile value for the node from the parabolic formula
458 temp = m_observationValues[i]
459 + double(d) / (m_numObsBelowQuantile[i+1] - m_numObsBelowQuantile[i-1])
466
467 // it is necessary that the values of the quantiles be strictly increasing,
468 // if the parabolic formula perserves this so be it
469 if ( m_observationValues[i-1] < temp && temp < m_observationValues[i+1]) {// use qSort???
470 m_observationValues[i] = temp;
471 }
472 else {
473 // if the parabolic formula does not maintain
474 // that the values of the quantiles be strictly increasing,
475 // then use linear interpolation
477 + double(d)
480 }
481
482 // the position of the quantile
483 // (in terms of the number of observations <= quantile) is also adjusted
484 m_numObsBelowQuantile[i] += d;
485 }
486 }
487 }
488 }
489
490
491
492 void StatCumProbDistDynCalc::save(QXmlStreamWriter &stream, const Project *project) const { // TODO: does xml stuff need project???
493
494 stream.writeStartElement("statCumProbDistDynCalc");
495 stream.writeTextElement("numberCells", toString(m_numberCells));
496 stream.writeTextElement("numberQuantiles", toString(m_numberQuantiles));
497 stream.writeTextElement("numberObservations", toString(m_numberObservations));
498
499 stream.writeStartElement("distributionData");
500 for (unsigned int i = 0; i < m_numberQuantiles; i++) {
501 stream.writeStartElement("quantileInfo");
502 // we need to write out high precision for minDistance calculations in value() and cumProb()
503 stream.writeAttribute("quantile", toString(m_quantiles[i], 17));
504 stream.writeAttribute("dataValue", toString(m_observationValues[i], 17));
505 stream.writeAttribute("idealNumObsBelowQuantile",
507 stream.writeAttribute("actualNumObsBelowQuantile", toString(m_numObsBelowQuantile[i]));
508 stream.writeEndElement(); // end observation
509 }
510 stream.writeEndElement(); // end observationData
511 stream.writeEndElement(); // end statCumProbDistDynCalc
512
513 }
514
515
516
517// TODO: should project be const ???
518 StatCumProbDistDynCalc::XmlHandler::XmlHandler(StatCumProbDistDynCalc *probabilityCalc,
519 Project *project) { // TODO: does xml stuff need project???
520 m_xmlHandlerCumProbCalc = probabilityCalc;
521 m_xmlHandlerProject = project; // TODO: does xml stuff need project???
522 m_xmlHandlerCharacters = "";
523 }
524
525
526
527 StatCumProbDistDynCalc::XmlHandler::~XmlHandler() {
528 // do not delete this pointer... we don't own it, do we??? passed into StatCumProbDistDynCalc constructor as pointer
529// delete m_xmlHandlerProject; // TODO: does xml stuff need project???
530 m_xmlHandlerProject = NULL;
531 }
532
533
534
535 bool StatCumProbDistDynCalc::XmlHandler::startElement(const QString &namespaceURI,
536 const QString &localName,
537 const QString &qName,
538 const QXmlAttributes &atts) {
539
540 m_xmlHandlerCharacters = "";
541 if (XmlStackedHandler::startElement(namespaceURI, localName, qName, atts)) {
542 if (qName == "quantileInfo") {
543
544 QString quantile = atts.value("quantile");
545 QString obsValue = atts.value("dataValue");
546 QString idealObs = atts.value("idealNumObsBelowQuantile");
547 QString actualObs = atts.value("actualNumObsBelowQuantile");
548
549 if (!quantile.isEmpty() && !obsValue.isEmpty()
550 && !idealObs.isEmpty() && !actualObs.isEmpty()) {
551 m_xmlHandlerCumProbCalc->m_quantiles.append(toDouble(quantile));
552 m_xmlHandlerCumProbCalc->m_observationValues.append(toDouble(obsValue));
553 m_xmlHandlerCumProbCalc->m_idealNumObsBelowQuantile.append(toDouble(idealObs));
554 m_xmlHandlerCumProbCalc->m_numObsBelowQuantile.append(toDouble(actualObs));
555 }
556 }
557
558 }
559 return true;
560 }
561
562
563
564 bool StatCumProbDistDynCalc::XmlHandler::characters(const QString &ch) {
565 m_xmlHandlerCharacters += ch;
566 return XmlStackedHandler::characters(ch);
567 }
568
569
570
571 bool StatCumProbDistDynCalc::XmlHandler::endElement(const QString &namespaceURI,
572 const QString &localName,
573 const QString &qName) {
574 if (!m_xmlHandlerCharacters.isEmpty()) {
575 if (qName == "numberCells") {
576 m_xmlHandlerCumProbCalc->m_numberCells = toInt(m_xmlHandlerCharacters);
577 }
578 else if (qName == "numberQuantiles") {
579 m_xmlHandlerCumProbCalc->m_numberQuantiles = toInt(m_xmlHandlerCharacters);
580 }
581 else if (qName == "numberObservations") {
582 m_xmlHandlerCumProbCalc->m_numberObservations = toInt(m_xmlHandlerCharacters);
583 }
584
585 m_xmlHandlerCharacters = "";
586 }
587 return XmlStackedHandler::endElement(namespaceURI, localName, qName);
588 }
589
590
591
592 QDataStream &StatCumProbDistDynCalc::write(QDataStream &stream) const {
593 stream << (qint32)m_numberCells
594 << (qint32)m_numberQuantiles
595 << (qint32)m_numberObservations
596 << m_quantiles
600 return stream;
601 }
602
603
604
605 QDataStream &StatCumProbDistDynCalc::read(QDataStream &stream) {
606 QString id;
607 qint32 numCells, numQuantiles, numObservations;
608 stream >> numCells
609 >> numQuantiles
610 >> numObservations
611 >> m_quantiles
615
616 m_numberCells = (unsigned int)numCells;
617 m_numberQuantiles = (unsigned int)numQuantiles;
618 m_numberObservations = (unsigned int)numObservations;
619 return stream;
620 }
621
622
623
624 QDataStream &operator<<(QDataStream &stream, const StatCumProbDistDynCalc &scpddc) {
625 return scpddc.write(stream);
626 }
627
628
629
630 QDataStream &operator>>(QDataStream &stream, StatCumProbDistDynCalc &scpddc) {
631 return scpddc.read(stream);
632 }
633
634 void StatCumProbDistDynCalc::validate() {
635 // if quantiles have not been set
636 if (m_numberQuantiles == 0) {
637 QString msg = "StatCumProbDistDynCalc will return no data until the quantiles have been set. "
638 "Number of cells = [" + toString(m_numberCells) + "].";
639 throw IException(IException::Programmer, msg, _FILEINFO_);
640 }
641
642 //if there isn't even as much data as there are quantiles to track
644 QString msg = "StatCumProbDistDynCalc will return no data until the number of observations "
645 "added [" + toString(m_numberObservations) + "] matches the number of "
646 "quantiles [" + toString(m_numberQuantiles)
647 + "] (i.e. number of nodes) selected.";
648 throw IException(IException::Programmer, msg, _FILEINFO_);
649 }
650
651 }
652}// end namespace Isis
Isis exception class.
Definition IException.h:91
@ Programmer
This error is for when a programmer made an API call that was illegal.
Definition IException.h:146
Adds specific functionality to C++ strings.
Definition IString.h:165
The main project for ipce.
Definition Project.h:289
This class is used to approximate cumulative probibility distributions of a stream of observations wi...
QList< int > m_numObsBelowQuantile
The actual number of observations that are less than or equal to the value of the corresponding quant...
unsigned int m_numberObservations
The number of observations, note this is dynamically changing as observations are added.
double min()
Returns the maximum observation so far included in the dynamic calculation.
QList< double > m_idealNumObsBelowQuantile
The ideal number of observations that should be less than or equal to the value of the corresponding ...
QList< double > m_observationValues
The calculated values of the quantiles, note this is dynamically changing as observations are added.
StatCumProbDistDynCalc(unsigned int nodes=20, QObject *parent=0)
Construtor sets up the class to start recieving data.
void initialize()
Inializer, resets the class to start its dynamic calculation anew.
unsigned int m_numberCells
The number of cells or histogram bins that are being used to model the probility density function.
~StatCumProbDistDynCalc()
Destroys StatCumProbDistDynCalc object.
unsigned int m_numberQuantiles
The number of quantiles being used to model the probility density function.
QList< double > m_quantiles
The target quantiles being modeled, between 0 and 1.
double value(double cumProb)
Provides the value of the variable that has the given cumulative probility (according the current est...
void addObs(double obs)
Values for the estimated quantile positions are update as observations are added.
double cumProb(double value)
Provides the cumulative probility, that is, the proportion of the distribution that is less than or e...
double max()
Returns the maximum observation so far included in the dynamic calculation.
Manage a stack of content handlers for reading XML files.
This is free and unencumbered software released into the public domain.
Definition Apollo.h:16
QString toString(bool boolToConvert)
Global function to convert a boolean to a string.
Definition IString.cpp:211
int toInt(const QString &string)
Global function to convert from a string to an integer.
Definition IString.cpp:93
std::istream & operator>>(std::istream &is, CSVReader &csv)
Input read operator for input stream sources.
double toDouble(const QString &string)
Global function to convert from a string to a double.
Definition IString.cpp:149
QDebug operator<<(QDebug debug, const Hillshade &hillshade)
Print this class out to a QDebug object.