Isis 3 Programmer Reference
StatCumProbDistDynCalc.cpp
1
6/* SPDX-License-Identifier: CC0-1.0 */
7
8#include "StatCumProbDistDynCalc.h"
9
10#include <QDataStream>
11#include <QDebug>
12#include <QList>
13#include <QUuid>
14#include <QXmlStreamWriter>
15
16#include <float.h>
17#include <math.h>
18#include <stdio.h>
19
20#include "IException.h"
21#include "IString.h"
22#include "Project.h"
23#include "XmlStackedHandlerReader.h"
24
25#include "Pvl.h"
26#include <iostream>
27
28namespace Isis {
29
30
38 : QObject(parent) {
39 initialize();
40 setQuantiles(nodes);
41 }
42
43
44// TODO: should project be const ???
46 XmlStackedHandlerReader *xmlReader,
47 QObject *parent) { // TODO: does xml stuff need project???
48 initialize();
49 xmlReader->pushContentHandler(new XmlHandler(this, project)); // TODO: does xml stuff need project???
50 }
51
52
53
54 StatCumProbDistDynCalc::StatCumProbDistDynCalc(const StatCumProbDistDynCalc &other)
55 : m_numberCells(other.m_numberCells),
56 m_numberQuantiles(other.m_numberQuantiles),
57 m_numberObservations(other.m_numberObservations),
58 m_quantiles(other.m_quantiles),
59 m_observationValues(other.m_observationValues),
60 m_idealNumObsBelowQuantile(other.m_idealNumObsBelowQuantile),
61 m_numObsBelowQuantile(other.m_numObsBelowQuantile) {
62 }
63
64
65
71
72
73
74 StatCumProbDistDynCalc &StatCumProbDistDynCalc::operator=(const StatCumProbDistDynCalc &other) {
75
76 if (&other != this) {
77 m_numberCells = other.m_numberCells;
78 m_numberQuantiles = other.m_numberQuantiles;
79 m_numberObservations = other.m_numberObservations;
80 m_quantiles = other.m_quantiles;
81 m_observationValues = other.m_observationValues;
82 m_idealNumObsBelowQuantile = other.m_idealNumObsBelowQuantile;
83 m_numObsBelowQuantile = other.m_numObsBelowQuantile;
84 }
85 return *this;
86
87 }
88
89
90
104
105
106
107 void StatCumProbDistDynCalc::setQuantiles(unsigned int nodes) {
108 initialize();
109 if (nodes < 3) {
111 }
112 else {
113 m_numberQuantiles = nodes;
114 }
115
116 m_numberCells = m_numberQuantiles - 1; // there is one more border value then there are cells
117
118 double stepSize = 1.0 / (double)m_numberCells;
119 for (unsigned int i = 0; i < m_numberQuantiles; i++) {
120 if (i == 0) {
121 m_quantiles.append( 0.0 );
122 }
123 else {
124 m_quantiles.append( m_quantiles[i-1] + stepSize );
125 } // essentially the same as i/numCells without having to cast i every time...
126 m_idealNumObsBelowQuantile.append(i+1);
127 m_numObsBelowQuantile.append(i+1);
128 }
129
130 }
131
132
133
144 validate();
145 //return the largest value so far
147 }
148
149
150
161 validate();
162 //return the smallest value so far
163 return m_observationValues[0];
164 }
165
166
167
182 double StatCumProbDistDynCalc::value(double cumProb) {
183 validate();
184 //given a cumProbability return the associate value
185
186 //if the requested cumProb is outside [0,1] return DBL_MAX
187 if (cumProb < 0.0 || cumProb > 1.0) {
188 IString msg = "Invalid cumulative probability [" + toString(cumProb) + "] passed in to "
189 "StatCumProbDistDynCalc::value(double cumProb). Must be on the domain [0, 1].";
190 throw IException(IException::Programmer, msg, _FILEINFO_);
191 }
192
193 //if the requested quantile is 0.0 return the lowest value
194 if (cumProb == 0.0) {
195 return m_observationValues[0];
196 }
197
198 //if the requested quantile is 1.0 return the largest value
199 if (cumProb == 1.0) {
201 }
202
203 // otherwise find the the node nearest the value
204 double minDistance = fabs(m_quantiles[0] - cumProb); // distance from first quantile
205 // to the given probability
206 unsigned int index = 0;
207 for (int i = 1; i < int(m_numberQuantiles); i++) {
208 // if the given probability is closer to this quantile than the currently saved
209 // min distance, then replace
210 double dist = fabs(m_quantiles[i] - cumProb);
211// if ( dist < minDistance || qFuzzyCompare(dist, minDistance)) {
212 if ( dist < minDistance ) {
213 minDistance = fabs(m_quantiles[i] - cumProb);
214 index = i;
215 }
216 else {
217 break; // we are getting farther from the given value
218 }
219 }// TODO: must be a better way to write this???
220
221 //get the coordinates of the three closest nodes, value as a function of cumProb
222 double x[3]; // m_observationValues values
223 double y[3]; // cumlative probilities
224
225 if (index == 0) { // the given prob is closest to the first quantile
226 y[0] = m_observationValues[0];
227 y[1] = m_observationValues[1];
228 y[2] = m_observationValues[2];
229
230 x[0] = m_quantiles[0];
231 x[1] = m_quantiles[1];
232 x[2] = m_quantiles[2];
233 }
234 else if (index == m_numberCells) { // the given prob is closest to the last quantile
235 y[0] = m_observationValues[index-2];
236 y[1] = m_observationValues[index-1];
237 y[2] = m_observationValues[index ];
238
239 x[0] = m_quantiles[index-2];
240 x[1] = m_quantiles[index-1];
241 x[2] = m_quantiles[index ];
242 }
243 else {
244 y[0] = m_observationValues[index-1];
245 y[1] = m_observationValues[index ];
246 y[2] = m_observationValues[index+1];
247
248 x[0] = m_quantiles[index-1];
249 x[1] = m_quantiles[index ];
250 x[2] = m_quantiles[index+1];
251 }
252
253 if ( x[0] == x[1] || x[0] == x[2] || x[1] == x[2]) {
254 return m_observationValues[index]; // this should never happen,
255 // but just in case return the value of the nearest node
256 }
257
258 // try quadratic interpolation
259 double interp = (cumProb-x[1]) * (cumProb-x[2]) / (x[0]-x[1]) / (x[0]-x[2]) * y[0]
260 + (cumProb-x[0]) * (cumProb-x[2]) / (x[1]-x[0]) / (x[1]-x[2]) * y[1]
261 + (cumProb-x[0]) * (cumProb-x[1]) / (x[2]-x[0]) / (x[2]-x[1]) * y[2];
262
263 // check for reasonability of the quadratic interpolation
264
265 // TODO: better way to write this???
266 int i;
267 for (i = 0; i < 3; i++) {
268 if ( x[i] <= cumProb && x[i+1] >= cumProb) { // find the nodes on both sides of cumProb
269 break;
270 }
271 }
272
273 // make sure things are strictly increasing
274 if (y[i] <= interp && y[i+1] >= interp) {
275 return interp;
276 }
277 else {
278 // if the quadratic wasn't reasonable return the linear interpolation
279 return ( (x[i] - cumProb) * y[i+1] + (x[i+1] - cumProb) * y[i] ) / (x[i] - x[i+1]);
280 }
281
282 }
283
284
285
299 double StatCumProbDistDynCalc::cumProb(double value) {
300 validate();
301 //given a value return the cumulative probility
302
303 if (value <= m_observationValues[0]) {
304 return 0.0; // if the value is less than or equal to the the current min,
305 // the best estimate is 0.0
306 }
308 return 1.0; // if the value is greater than or equal to the current max,
309 // the best estimate is 1.0
310 }
311
312 // otherwise, find the the node nearest to the given value
313 double minDistance = fabs(m_observationValues[0]-value);
314 unsigned int index = 0;
315 for (unsigned int i = 1; i < m_numberQuantiles; i++) {
316 double dist = fabs(m_observationValues[i]-value);
317 if ( dist < minDistance) {
318 minDistance = fabs(m_observationValues[i]-value);
319 index = i;
320 }
321 else {
322 break; // we are getting farther from the given value
323 }
324 }// TODO: must be a better way to write this???
325
326 //get the coordinates of the three closest nodes cumProb as a function of value
327 double x[3]; // m_observationValues values
328 double y[3]; // cumlative probilities
329
330 if (index == 0) {
331 x[0] = m_observationValues[0];
332 x[1] = m_observationValues[1];
333 x[2] = m_observationValues[2];
334
335 y[0] = m_quantiles[0];
336 y[1] = m_quantiles[1];
337 y[2] = m_quantiles[2];
338 }
339 else if (index == m_numberCells) {
340 x[0] = m_observationValues[index-2];
341 x[1] = m_observationValues[index-1];
342 x[2] = m_observationValues[index ];
343
344 y[0] = m_quantiles[index-2];
345 y[1] = m_quantiles[index-1];
346 y[2] = m_quantiles[index ];
347 }
348 else {
349 x[0] = m_observationValues[index-1];
350 x[1] = m_observationValues[index ];
351 x[2] = m_observationValues[index+1];
352
353 y[0] = m_quantiles[index-1];
354 y[1] = m_quantiles[index ];
355 y[2] = m_quantiles[index+1];
356 }
357
358 if ( x[0] == x[1] || x[0] == x[2] || x[1] == x[2]) {
359 return m_quantiles[index]; // this should never happen,
360 // but just in case return the cumProb of the nearest node
361 }
362
363 //do a parabolic interpolation to find the probability at value
364 double interp = (value-x[1]) * (value-x[2]) / (x[0]-x[1]) / (x[0]-x[2]) * y[0]
365 + (value-x[0]) * (value-x[2]) / (x[1]-x[0]) / (x[1]-x[2]) * y[1]
366 + (value-x[0]) * (value-x[1]) / (x[2]-x[0]) / (x[2]-x[1]) * y[2];
367
368 //check for reasonability of the quadratic interpolation
369 // TODO: better way to write this???
370 int i;
371 for (i = 0; i < 3; i++) {
372 if ( x[i] <= value && x[i+1] >= value) //find the nodes on both sides of cumProb
373 break;
374 }
375
376 if (y[i] <= interp && y[i+1] >= interp) //make sure things are strictly increasing
377 return interp;
378 //if the quadratic wasn't reasonable return the linear interpolation
379 else {
380 return ((x[i] - value) * y[i+1] + (x[i+1] - value) * y[i]) / (x[i] - x[i+1]);
381 }
382
383 }
384
385
386
395 double temp = 0.0;
396
398 // in this phase, the algorithm is getting initial values
399 m_observationValues.append(obs);
401
402 // if there are now m_numberQuantiles observations, then sort them from smallest to greatest
404 qSort(m_observationValues.begin(), m_observationValues.end(), qLess<double>());
405 }
406 }
407 else { // m_numberObservations >= m_numberQuantiles
408
409 //incrementing the number of observations
412
413 // replace min or max with this observation, if appropriate
416 }
417 if (obs < m_observationValues[0]) {
418 m_observationValues[0] = obs;
419 temp = 1.0;
420 }
421
422 //estimated quantile positions are increased if obs <= estimated quantile value
423 for (int i = 1; i < (int)m_numberQuantiles-1; i++) {
424 if (obs <= m_observationValues[i]) {
425 for (; i < (int)m_numberQuantiles-1; i++) {
426 m_numObsBelowQuantile[i]++; // all m_n's above the first >= obs get incremented
427 }
428 }
429 }
430
431 for (int i = 1; i < (int)m_numberQuantiles; i++) {
433 }
434
435 //calculate corrections to node positions (m_n) and values (m_observationValues)
436 int d;
437 // note that the loop does not edit the first or last positions
438 for (int i = 1; i < (int)m_numberCells; i++) {
439 //calculation of d[i] it is either 1, -1, or 0
441 if (fabs(temp)>1 && temp > 0.0) {
442 d = 1;
443 }
444 else if (fabs(temp)>1 && temp < 0.0) {
445 d = -1;
446 }
447 else {
448 continue;
449 }
450
451 // if the node can be moved by d without stepping on another node
452 if ( ( (d == 1)
454 || ( (d == -1)
455 && (m_numObsBelowQuantile[i-1] - m_numObsBelowQuantile[i] < -1) ) ) {
456 //calculate a new quantile value for the node from the parabolic formula
457 temp = m_observationValues[i]
458 + double(d) / (m_numObsBelowQuantile[i+1] - m_numObsBelowQuantile[i-1])
465
466 // it is necessary that the values of the quantiles be strictly increasing,
467 // if the parabolic formula perserves this so be it
468 if ( m_observationValues[i-1] < temp && temp < m_observationValues[i+1]) {// use qSort???
469 m_observationValues[i] = temp;
470 }
471 else {
472 // if the parabolic formula does not maintain
473 // that the values of the quantiles be strictly increasing,
474 // then use linear interpolation
476 + double(d)
479 }
480
481 // the position of the quantile
482 // (in terms of the number of observations <= quantile) is also adjusted
483 m_numObsBelowQuantile[i] += d;
484 }
485 }
486 }
487 }
488
489
490
491 void StatCumProbDistDynCalc::save(QXmlStreamWriter &stream, const Project *project) const { // TODO: does xml stuff need project???
492
493 stream.writeStartElement("statCumProbDistDynCalc");
494 stream.writeTextElement("numberCells", toString(m_numberCells));
495 stream.writeTextElement("numberQuantiles", toString(m_numberQuantiles));
496 stream.writeTextElement("numberObservations", toString(m_numberObservations));
497
498 stream.writeStartElement("distributionData");
499 for (unsigned int i = 0; i < m_numberQuantiles; i++) {
500 stream.writeStartElement("quantileInfo");
501 // we need to write out high precision for minDistance calculations in value() and cumProb()
502 stream.writeAttribute("quantile", toString(m_quantiles[i], 17));
503 stream.writeAttribute("dataValue", toString(m_observationValues[i], 17));
504 stream.writeAttribute("idealNumObsBelowQuantile",
506 stream.writeAttribute("actualNumObsBelowQuantile", toString(m_numObsBelowQuantile[i]));
507 stream.writeEndElement(); // end observation
508 }
509 stream.writeEndElement(); // end observationData
510 stream.writeEndElement(); // end statCumProbDistDynCalc
511
512 }
513
514
515
516// TODO: should project be const ???
517 StatCumProbDistDynCalc::XmlHandler::XmlHandler(StatCumProbDistDynCalc *probabilityCalc,
518 Project *project) { // TODO: does xml stuff need project???
519 m_xmlHandlerCumProbCalc = probabilityCalc;
520 m_xmlHandlerProject = project; // TODO: does xml stuff need project???
521 m_xmlHandlerCharacters = "";
522 }
523
524
525
526 StatCumProbDistDynCalc::XmlHandler::~XmlHandler() {
527 // do not delete this pointer... we don't own it, do we??? passed into StatCumProbDistDynCalc constructor as pointer
528// delete m_xmlHandlerProject; // TODO: does xml stuff need project???
529 m_xmlHandlerProject = NULL;
530 }
531
532
533
534 bool StatCumProbDistDynCalc::XmlHandler::startElement(const QString &namespaceURI,
535 const QString &localName,
536 const QString &qName,
537 const QXmlAttributes &atts) {
538
539 m_xmlHandlerCharacters = "";
540 if (XmlStackedHandler::startElement(namespaceURI, localName, qName, atts)) {
541 if (qName == "quantileInfo") {
542
543 QString quantile = atts.value("quantile");
544 QString obsValue = atts.value("dataValue");
545 QString idealObs = atts.value("idealNumObsBelowQuantile");
546 QString actualObs = atts.value("actualNumObsBelowQuantile");
547
548 if (!quantile.isEmpty() && !obsValue.isEmpty()
549 && !idealObs.isEmpty() && !actualObs.isEmpty()) {
550 m_xmlHandlerCumProbCalc->m_quantiles.append(toDouble(quantile));
551 m_xmlHandlerCumProbCalc->m_observationValues.append(toDouble(obsValue));
552 m_xmlHandlerCumProbCalc->m_idealNumObsBelowQuantile.append(toDouble(idealObs));
553 m_xmlHandlerCumProbCalc->m_numObsBelowQuantile.append(toDouble(actualObs));
554 }
555 }
556
557 }
558 return true;
559 }
560
561
562
563 bool StatCumProbDistDynCalc::XmlHandler::characters(const QString &ch) {
564 m_xmlHandlerCharacters += ch;
565 return XmlStackedHandler::characters(ch);
566 }
567
568
569
570 bool StatCumProbDistDynCalc::XmlHandler::endElement(const QString &namespaceURI,
571 const QString &localName,
572 const QString &qName) {
573 if (!m_xmlHandlerCharacters.isEmpty()) {
574 if (qName == "numberCells") {
575 m_xmlHandlerCumProbCalc->m_numberCells = toInt(m_xmlHandlerCharacters);
576 }
577 else if (qName == "numberQuantiles") {
578 m_xmlHandlerCumProbCalc->m_numberQuantiles = toInt(m_xmlHandlerCharacters);
579 }
580 else if (qName == "numberObservations") {
581 m_xmlHandlerCumProbCalc->m_numberObservations = toInt(m_xmlHandlerCharacters);
582 }
583
584 m_xmlHandlerCharacters = "";
585 }
586 return XmlStackedHandler::endElement(namespaceURI, localName, qName);
587 }
588
589
590
591 QDataStream &StatCumProbDistDynCalc::write(QDataStream &stream) const {
592 stream << (qint32)m_numberCells
593 << (qint32)m_numberQuantiles
594 << (qint32)m_numberObservations
595 << m_quantiles
599 return stream;
600 }
601
602
603
604 QDataStream &StatCumProbDistDynCalc::read(QDataStream &stream) {
605 QString id;
606 qint32 numCells, numQuantiles, numObservations;
607 stream >> numCells
608 >> numQuantiles
609 >> numObservations
610 >> m_quantiles
614
615 m_numberCells = (unsigned int)numCells;
616 m_numberQuantiles = (unsigned int)numQuantiles;
617 m_numberObservations = (unsigned int)numObservations;
618 return stream;
619 }
620
621
622
623 QDataStream &operator<<(QDataStream &stream, const StatCumProbDistDynCalc &scpddc) {
624 return scpddc.write(stream);
625 }
626
627
628
629 QDataStream &operator>>(QDataStream &stream, StatCumProbDistDynCalc &scpddc) {
630 return scpddc.read(stream);
631 }
632
633 void StatCumProbDistDynCalc::validate() {
634 // if quantiles have not been set
635 if (m_numberQuantiles == 0) {
636 QString msg = "StatCumProbDistDynCalc will return no data until the quantiles have been set. "
637 "Number of cells = [" + toString(m_numberCells) + "].";
638 throw IException(IException::Programmer, msg, _FILEINFO_);
639 }
640
641 //if there isn't even as much data as there are quantiles to track
643 QString msg = "StatCumProbDistDynCalc will return no data until the number of observations "
644 "added [" + toString(m_numberObservations) + "] matches the number of "
645 "quantiles [" + toString(m_numberQuantiles)
646 + "] (i.e. number of nodes) selected.";
647 throw IException(IException::Programmer, msg, _FILEINFO_);
648 }
649
650 }
651}// end namespace Isis
Isis exception class.
Definition IException.h:91
@ Programmer
This error is for when a programmer made an API call that was illegal.
Definition IException.h:146
Adds specific functionality to C++ strings.
Definition IString.h:165
The main project for ipce.
Definition Project.h:289
This class is used to approximate cumulative probibility distributions of a stream of observations wi...
QList< int > m_numObsBelowQuantile
The actual number of observations that are less than or equal to the value of the corresponding quant...
unsigned int m_numberObservations
The number of observations, note this is dynamically changing as observations are added.
double min()
Returns the maximum observation so far included in the dynamic calculation.
QList< double > m_idealNumObsBelowQuantile
The ideal number of observations that should be less than or equal to the value of the corresponding ...
QList< double > m_observationValues
The calculated values of the quantiles, note this is dynamically changing as observations are added.
StatCumProbDistDynCalc(unsigned int nodes=20, QObject *parent=0)
Construtor sets up the class to start recieving data.
void initialize()
Inializer, resets the class to start its dynamic calculation anew.
unsigned int m_numberCells
The number of cells or histogram bins that are being used to model the probility density function.
~StatCumProbDistDynCalc()
Destroys StatCumProbDistDynCalc object.
unsigned int m_numberQuantiles
The number of quantiles being used to model the probility density function.
QList< double > m_quantiles
The target quantiles being modeled, between 0 and 1.
double value(double cumProb)
Provides the value of the variable that has the given cumulative probility (according the current est...
void addObs(double obs)
Values for the estimated quantile positions are update as observations are added.
double cumProb(double value)
Provides the cumulative probility, that is, the proportion of the distribution that is less than or e...
double max()
Returns the maximum observation so far included in the dynamic calculation.
Manage a stack of content handlers for reading XML files.
This is free and unencumbered software released into the public domain.
Definition Apollo.h:16
QString toString(bool boolToConvert)
Global function to convert a boolean to a string.
Definition IString.cpp:211
int toInt(const QString &string)
Global function to convert from a string to an integer.
Definition IString.cpp:93
std::istream & operator>>(std::istream &is, CSVReader &csv)
Input read operator for input stream sources.
double toDouble(const QString &string)
Global function to convert from a string to a double.
Definition IString.cpp:149
QDebug operator<<(QDebug debug, const Hillshade &hillshade)
Print this class out to a QDebug object.