Isis 3 Programmer Reference
StatCumProbDistDynCalc.cpp
1 
6 /* SPDX-License-Identifier: CC0-1.0 */
7 
8 #include "StatCumProbDistDynCalc.h"
9 
10 #include <QDataStream>
11 #include <QDebug>
12 #include <QList>
13 #include <QUuid>
14 #include <QXmlStreamWriter>
15 
16 #include <float.h>
17 #include <math.h>
18 #include <stdio.h>
19 
20 #include "IException.h"
21 #include "IString.h"
22 #include "Project.h"
23 #include "XmlStackedHandlerReader.h"
24 
25 #include "Pvl.h"
26 #include <iostream>
27 
28 namespace Isis {
29 
30 
38  : QObject(parent) {
39  initialize();
40  setQuantiles(nodes);
41  }
42 
43 
44 // TODO: should project be const ???
46  XmlStackedHandlerReader *xmlReader,
47  QObject *parent) { // TODO: does xml stuff need project???
48  initialize();
49  xmlReader->pushContentHandler(new XmlHandler(this, project)); // TODO: does xml stuff need project???
50  }
51 
52 
53 
54  StatCumProbDistDynCalc::StatCumProbDistDynCalc(const StatCumProbDistDynCalc &other)
55  : m_numberCells(other.m_numberCells),
56  m_numberQuantiles(other.m_numberQuantiles),
57  m_numberObservations(other.m_numberObservations),
58  m_quantiles(other.m_quantiles),
59  m_observationValues(other.m_observationValues),
60  m_idealNumObsBelowQuantile(other.m_idealNumObsBelowQuantile),
61  m_numObsBelowQuantile(other.m_numObsBelowQuantile) {
62  }
63 
64 
65 
70  }
71 
72 
73 
74  StatCumProbDistDynCalc &StatCumProbDistDynCalc::operator=(const StatCumProbDistDynCalc &other) {
75 
76  if (&other != this) {
80  m_quantiles = other.m_quantiles;
84  }
85  return *this;
86 
87  }
88 
89 
90 
99  m_quantiles.clear();
100  m_observationValues.clear();
102  m_numObsBelowQuantile.clear();
103  }
104 
105 
106 
107  void StatCumProbDistDynCalc::setQuantiles(unsigned int nodes) {
108  initialize();
109  if (nodes < 3) {
110  m_numberQuantiles = 3;
111  }
112  else {
113  m_numberQuantiles = nodes;
114  }
115 
116  m_numberCells = m_numberQuantiles - 1; // there is one more border value then there are cells
117 
118  double stepSize = 1.0 / (double)m_numberCells;
119  for (unsigned int i = 0; i < m_numberQuantiles; i++) {
120  if (i == 0) {
121  m_quantiles.append( 0.0 );
122  }
123  else {
124  m_quantiles.append( m_quantiles[i-1] + stepSize );
125  } // essentially the same as i/numCells without having to cast i every time...
126  m_idealNumObsBelowQuantile.append(i+1);
127  m_numObsBelowQuantile.append(i+1);
128  }
129 
130  }
131 
132 
133 
144  validate();
145  //return the largest value so far
147  }
148 
149 
150 
161  validate();
162  //return the smallest value so far
163  return m_observationValues[0];
164  }
165 
166 
167 
182  double StatCumProbDistDynCalc::value(double cumProb) {
183  validate();
184  //given a cumProbability return the associate value
185 
186  //if the requested cumProb is outside [0,1] return DBL_MAX
187  if (cumProb < 0.0 || cumProb > 1.0) {
188  IString msg = "Invalid cumulative probability [" + toString(cumProb) + "] passed in to "
189  "StatCumProbDistDynCalc::value(double cumProb). Must be on the domain [0, 1].";
190  throw IException(IException::Programmer, msg, _FILEINFO_);
191  }
192 
193  //if the requested quantile is 0.0 return the lowest value
194  if (cumProb == 0.0) {
195  return m_observationValues[0];
196  }
197 
198  //if the requested quantile is 1.0 return the largest value
199  if (cumProb == 1.0) {
201  }
202 
203  // otherwise find the the node nearest the value
204  double minDistance = fabs(m_quantiles[0] - cumProb); // distance from first quantile
205  // to the given probability
206  unsigned int index = 0;
207  for (int i = 1; i < int(m_numberQuantiles); i++) {
208  // if the given probability is closer to this quantile than the currently saved
209  // min distance, then replace
210  double dist = fabs(m_quantiles[i] - cumProb);
211 // if ( dist < minDistance || qFuzzyCompare(dist, minDistance)) {
212  if ( dist < minDistance ) {
213  minDistance = fabs(m_quantiles[i] - cumProb);
214  index = i;
215  }
216  else {
217  break; // we are getting farther from the given value
218  }
219  }// TODO: must be a better way to write this???
220 
221  //get the coordinates of the three closest nodes, value as a function of cumProb
222  double x[3]; // m_observationValues values
223  double y[3]; // cumlative probilities
224 
225  if (index == 0) { // the given prob is closest to the first quantile
226  y[0] = m_observationValues[0];
227  y[1] = m_observationValues[1];
228  y[2] = m_observationValues[2];
229 
230  x[0] = m_quantiles[0];
231  x[1] = m_quantiles[1];
232  x[2] = m_quantiles[2];
233  }
234  else if (index == m_numberCells) { // the given prob is closest to the last quantile
235  y[0] = m_observationValues[index-2];
236  y[1] = m_observationValues[index-1];
237  y[2] = m_observationValues[index ];
238 
239  x[0] = m_quantiles[index-2];
240  x[1] = m_quantiles[index-1];
241  x[2] = m_quantiles[index ];
242  }
243  else {
244  y[0] = m_observationValues[index-1];
245  y[1] = m_observationValues[index ];
246  y[2] = m_observationValues[index+1];
247 
248  x[0] = m_quantiles[index-1];
249  x[1] = m_quantiles[index ];
250  x[2] = m_quantiles[index+1];
251  }
252 
253  if ( x[0] == x[1] || x[0] == x[2] || x[1] == x[2]) {
254  return m_observationValues[index]; // this should never happen,
255  // but just in case return the value of the nearest node
256  }
257 
258  // try quadratic interpolation
259  double interp = (cumProb-x[1]) * (cumProb-x[2]) / (x[0]-x[1]) / (x[0]-x[2]) * y[0]
260  + (cumProb-x[0]) * (cumProb-x[2]) / (x[1]-x[0]) / (x[1]-x[2]) * y[1]
261  + (cumProb-x[0]) * (cumProb-x[1]) / (x[2]-x[0]) / (x[2]-x[1]) * y[2];
262 
263  // check for reasonability of the quadratic interpolation
264 
265  // TODO: better way to write this???
266  int i;
267  for (i = 0; i < 3; i++) {
268  if ( x[i] <= cumProb && x[i+1] >= cumProb) { // find the nodes on both sides of cumProb
269  break;
270  }
271  }
272 
273  // make sure things are strictly increasing
274  if (y[i] <= interp && y[i+1] >= interp) {
275  return interp;
276  }
277  else {
278  // if the quadratic wasn't reasonable return the linear interpolation
279  return ( (x[i] - cumProb) * y[i+1] + (x[i+1] - cumProb) * y[i] ) / (x[i] - x[i+1]);
280  }
281 
282  }
283 
284 
285 
299  double StatCumProbDistDynCalc::cumProb(double value) {
300  validate();
301  //given a value return the cumulative probility
302 
303  if (value <= m_observationValues[0]) {
304  return 0.0; // if the value is less than or equal to the the current min,
305  // the best estimate is 0.0
306  }
307  else if (value >= m_observationValues[m_numberCells]) {
308  return 1.0; // if the value is greater than or equal to the current max,
309  // the best estimate is 1.0
310  }
311 
312  // otherwise, find the the node nearest to the given value
313  double minDistance = fabs(m_observationValues[0]-value);
314  unsigned int index = 0;
315  for (unsigned int i = 1; i < m_numberQuantiles; i++) {
316  double dist = fabs(m_observationValues[i]-value);
317  if ( dist < minDistance) {
318  minDistance = fabs(m_observationValues[i]-value);
319  index = i;
320  }
321  else {
322  break; // we are getting farther from the given value
323  }
324  }// TODO: must be a better way to write this???
325 
326  //get the coordinates of the three closest nodes cumProb as a function of value
327  double x[3]; // m_observationValues values
328  double y[3]; // cumlative probilities
329 
330  if (index == 0) {
331  x[0] = m_observationValues[0];
332  x[1] = m_observationValues[1];
333  x[2] = m_observationValues[2];
334 
335  y[0] = m_quantiles[0];
336  y[1] = m_quantiles[1];
337  y[2] = m_quantiles[2];
338  }
339  else if (index == m_numberCells) {
340  x[0] = m_observationValues[index-2];
341  x[1] = m_observationValues[index-1];
342  x[2] = m_observationValues[index ];
343 
344  y[0] = m_quantiles[index-2];
345  y[1] = m_quantiles[index-1];
346  y[2] = m_quantiles[index ];
347  }
348  else {
349  x[0] = m_observationValues[index-1];
350  x[1] = m_observationValues[index ];
351  x[2] = m_observationValues[index+1];
352 
353  y[0] = m_quantiles[index-1];
354  y[1] = m_quantiles[index ];
355  y[2] = m_quantiles[index+1];
356  }
357 
358  if ( x[0] == x[1] || x[0] == x[2] || x[1] == x[2]) {
359  return m_quantiles[index]; // this should never happen,
360  // but just in case return the cumProb of the nearest node
361  }
362 
363  //do a parabolic interpolation to find the probability at value
364  double interp = (value-x[1]) * (value-x[2]) / (x[0]-x[1]) / (x[0]-x[2]) * y[0]
365  + (value-x[0]) * (value-x[2]) / (x[1]-x[0]) / (x[1]-x[2]) * y[1]
366  + (value-x[0]) * (value-x[1]) / (x[2]-x[0]) / (x[2]-x[1]) * y[2];
367 
368  //check for reasonability of the quadratic interpolation
369  // TODO: better way to write this???
370  int i;
371  for (i = 0; i < 3; i++) {
372  if ( x[i] <= value && x[i+1] >= value) //find the nodes on both sides of cumProb
373  break;
374  }
375 
376  if (y[i] <= interp && y[i+1] >= interp) //make sure things are strictly increasing
377  return interp;
378  //if the quadratic wasn't reasonable return the linear interpolation
379  else {
380  return ((x[i] - value) * y[i+1] + (x[i+1] - value) * y[i]) / (x[i] - x[i+1]);
381  }
382 
383  }
384 
385 
386 
395  double temp = 0.0;
396 
398  // in this phase, the algorithm is getting initial values
399  m_observationValues.append(obs);
401 
402  // if there are now m_numberQuantiles observations, then sort them from smallest to greatest
404  qSort(m_observationValues.begin(), m_observationValues.end(), qLess<double>());
405  }
406  }
407  else { // m_numberObservations >= m_numberQuantiles
408 
409  //incrementing the number of observations
412 
413  // replace min or max with this observation, if appropriate
414  if (obs > m_observationValues[m_numberQuantiles-1]) {
416  }
417  if (obs < m_observationValues[0]) {
418  m_observationValues[0] = obs;
419  temp = 1.0;
420  }
421 
422  //estimated quantile positions are increased if obs <= estimated quantile value
423  for (int i = 1; i < (int)m_numberQuantiles-1; i++) {
424  if (obs <= m_observationValues[i]) {
425  for (; i < (int)m_numberQuantiles-1; i++) {
426  m_numObsBelowQuantile[i]++; // all m_n's above the first >= obs get incremented
427  }
428  }
429  }
430 
431  for (int i = 1; i < (int)m_numberQuantiles; i++) {
433  }
434 
435  //calculate corrections to node positions (m_n) and values (m_observationValues)
436  int d;
437  // note that the loop does not edit the first or last positions
438  for (int i = 1; i < (int)m_numberCells; i++) {
439  //calculation of d[i] it is either 1, -1, or 0
441  if (fabs(temp)>1 && temp > 0.0) {
442  d = 1;
443  }
444  else if (fabs(temp)>1 && temp < 0.0) {
445  d = -1;
446  }
447  else {
448  continue;
449  }
450 
451  // if the node can be moved by d without stepping on another node
452  if ( ( (d == 1)
453  && (m_numObsBelowQuantile[i+1] - m_numObsBelowQuantile[i] > 1) )
454  || ( (d == -1)
455  && (m_numObsBelowQuantile[i-1] - m_numObsBelowQuantile[i] < -1) ) ) {
456  //calculate a new quantile value for the node from the parabolic formula
457  temp = m_observationValues[i]
458  + double(d) / (m_numObsBelowQuantile[i+1] - m_numObsBelowQuantile[i-1])
459  * ( (m_numObsBelowQuantile[i] - m_numObsBelowQuantile[i-1] + d)
465 
466  // it is necessary that the values of the quantiles be strictly increasing,
467  // if the parabolic formula perserves this so be it
468  if ( m_observationValues[i-1] < temp && temp < m_observationValues[i+1]) {// use qSort???
469  m_observationValues[i] = temp;
470  }
471  else {
472  // if the parabolic formula does not maintain
473  // that the values of the quantiles be strictly increasing,
474  // then use linear interpolation
476  + double(d)
479  }
480 
481  // the position of the quantile
482  // (in terms of the number of observations <= quantile) is also adjusted
483  m_numObsBelowQuantile[i] += d;
484  }
485  }
486  }
487  }
488 
489 
490 
491  void StatCumProbDistDynCalc::save(QXmlStreamWriter &stream, const Project *project) const { // TODO: does xml stuff need project???
492 
493  stream.writeStartElement("statCumProbDistDynCalc");
494  stream.writeTextElement("numberCells", toString(m_numberCells));
495  stream.writeTextElement("numberQuantiles", toString(m_numberQuantiles));
496  stream.writeTextElement("numberObservations", toString(m_numberObservations));
497 
498  stream.writeStartElement("distributionData");
499  for (unsigned int i = 0; i < m_numberQuantiles; i++) {
500  stream.writeStartElement("quantileInfo");
501  // we need to write out high precision for minDistance calculations in value() and cumProb()
502  stream.writeAttribute("quantile", toString(m_quantiles[i], 17));
503  stream.writeAttribute("dataValue", toString(m_observationValues[i], 17));
504  stream.writeAttribute("idealNumObsBelowQuantile",
506  stream.writeAttribute("actualNumObsBelowQuantile", toString(m_numObsBelowQuantile[i]));
507  stream.writeEndElement(); // end observation
508  }
509  stream.writeEndElement(); // end observationData
510  stream.writeEndElement(); // end statCumProbDistDynCalc
511 
512  }
513 
514 
515 
516 // TODO: should project be const ???
517  StatCumProbDistDynCalc::XmlHandler::XmlHandler(StatCumProbDistDynCalc *probabilityCalc,
518  Project *project) { // TODO: does xml stuff need project???
519  m_xmlHandlerCumProbCalc = probabilityCalc;
520  m_xmlHandlerProject = project; // TODO: does xml stuff need project???
521  m_xmlHandlerCharacters = "";
522  }
523 
524 
525 
526  StatCumProbDistDynCalc::XmlHandler::~XmlHandler() {
527  // do not delete this pointer... we don't own it, do we??? passed into StatCumProbDistDynCalc constructor as pointer
528 // delete m_xmlHandlerProject; // TODO: does xml stuff need project???
529  m_xmlHandlerProject = NULL;
530  }
531 
532 
533 
534  bool StatCumProbDistDynCalc::XmlHandler::startElement(const QString &namespaceURI,
535  const QString &localName,
536  const QString &qName,
537  const QXmlAttributes &atts) {
538 
539  m_xmlHandlerCharacters = "";
540  if (XmlStackedHandler::startElement(namespaceURI, localName, qName, atts)) {
541  if (qName == "quantileInfo") {
542 
543  QString quantile = atts.value("quantile");
544  QString obsValue = atts.value("dataValue");
545  QString idealObs = atts.value("idealNumObsBelowQuantile");
546  QString actualObs = atts.value("actualNumObsBelowQuantile");
547 
548  if (!quantile.isEmpty() && !obsValue.isEmpty()
549  && !idealObs.isEmpty() && !actualObs.isEmpty()) {
550  m_xmlHandlerCumProbCalc->m_quantiles.append(toDouble(quantile));
551  m_xmlHandlerCumProbCalc->m_observationValues.append(toDouble(obsValue));
552  m_xmlHandlerCumProbCalc->m_idealNumObsBelowQuantile.append(toDouble(idealObs));
553  m_xmlHandlerCumProbCalc->m_numObsBelowQuantile.append(toDouble(actualObs));
554  }
555  }
556 
557  }
558  return true;
559  }
560 
561 
562 
563  bool StatCumProbDistDynCalc::XmlHandler::characters(const QString &ch) {
564  m_xmlHandlerCharacters += ch;
565  return XmlStackedHandler::characters(ch);
566  }
567 
568 
569 
570  bool StatCumProbDistDynCalc::XmlHandler::endElement(const QString &namespaceURI,
571  const QString &localName,
572  const QString &qName) {
573  if (!m_xmlHandlerCharacters.isEmpty()) {
574  if (qName == "numberCells") {
575  m_xmlHandlerCumProbCalc->m_numberCells = toInt(m_xmlHandlerCharacters);
576  }
577  else if (qName == "numberQuantiles") {
578  m_xmlHandlerCumProbCalc->m_numberQuantiles = toInt(m_xmlHandlerCharacters);
579  }
580  else if (qName == "numberObservations") {
581  m_xmlHandlerCumProbCalc->m_numberObservations = toInt(m_xmlHandlerCharacters);
582  }
583 
584  m_xmlHandlerCharacters = "";
585  }
586  return XmlStackedHandler::endElement(namespaceURI, localName, qName);
587  }
588 
589 
590 
591  QDataStream &StatCumProbDistDynCalc::write(QDataStream &stream) const {
592  stream << (qint32)m_numberCells
593  << (qint32)m_numberQuantiles
594  << (qint32)m_numberObservations
595  << m_quantiles
599  return stream;
600  }
601 
602 
603 
604  QDataStream &StatCumProbDistDynCalc::read(QDataStream &stream) {
605  QString id;
606  qint32 numCells, numQuantiles, numObservations;
607  stream >> numCells
608  >> numQuantiles
609  >> numObservations
610  >> m_quantiles
614 
615  m_numberCells = (unsigned int)numCells;
616  m_numberQuantiles = (unsigned int)numQuantiles;
617  m_numberObservations = (unsigned int)numObservations;
618  return stream;
619  }
620 
621 
622 
623  QDataStream &operator<<(QDataStream &stream, const StatCumProbDistDynCalc &scpddc) {
624  return scpddc.write(stream);
625  }
626 
627 
628 
629  QDataStream &operator>>(QDataStream &stream, StatCumProbDistDynCalc &scpddc) {
630  return scpddc.read(stream);
631  }
632 
633  void StatCumProbDistDynCalc::validate() {
634  // if quantiles have not been set
635  if (m_numberQuantiles == 0) {
636  QString msg = "StatCumProbDistDynCalc will return no data until the quantiles have been set. "
637  "Number of cells = [" + toString(m_numberCells) + "].";
638  throw IException(IException::Programmer, msg, _FILEINFO_);
639  }
640 
641  //if there isn't even as much data as there are quantiles to track
643  QString msg = "StatCumProbDistDynCalc will return no data until the number of observations "
644  "added [" + toString(m_numberObservations) + "] matches the number of "
645  "quantiles [" + toString(m_numberQuantiles)
646  + "] (i.e. number of nodes) selected.";
647  throw IException(IException::Programmer, msg, _FILEINFO_);
648  }
649 
650  }
651 }// end namespace Isis
Isis::operator<<
QDebug operator<<(QDebug debug, const Hillshade &hillshade)
Print this class out to a QDebug object.
Definition: Hillshade.cpp:314
Isis::StatCumProbDistDynCalc::initialize
void initialize()
Inializer, resets the class to start its dynamic calculation anew.
Definition: StatCumProbDistDynCalc.cpp:97
Isis::StatCumProbDistDynCalc::min
double min()
Returns the maximum observation so far included in the dynamic calculation.
Definition: StatCumProbDistDynCalc.cpp:160
Isis::StatCumProbDistDynCalc::m_numberQuantiles
unsigned int m_numberQuantiles
The number of quantiles being used to model the probility density function.
Definition: StatCumProbDistDynCalc.h:124
Project.h
Isis::StatCumProbDistDynCalc::~StatCumProbDistDynCalc
~StatCumProbDistDynCalc()
Destroys StatCumProbDistDynCalc object.
Definition: StatCumProbDistDynCalc.cpp:69
Isis::XmlStackedHandlerReader::pushContentHandler
virtual void pushContentHandler(XmlStackedHandler *newHandler)
Push a contentHandler and maybe continue parsing...
Definition: XmlStackedHandlerReader.cpp:55
Isis::StatCumProbDistDynCalc::m_numObsBelowQuantile
QList< int > m_numObsBelowQuantile
The actual number of observations that are less than or equal to the value of the corresponding quant...
Definition: StatCumProbDistDynCalc.h:141
Isis::StatCumProbDistDynCalc::max
double max()
Returns the maximum observation so far included in the dynamic calculation.
Definition: StatCumProbDistDynCalc.cpp:143
Isis::toString
QString toString(bool boolToConvert)
Global function to convert a boolean to a string.
Definition: IString.cpp:211
Isis::XmlStackedHandlerReader
Manage a stack of content handlers for reading XML files.
Definition: XmlStackedHandlerReader.h:30
Isis::Project
The main project for ipce.
Definition: Project.h:289
Isis::StatCumProbDistDynCalc
This class is used to approximate cumulative probibility distributions of a stream of observations wi...
Definition: StatCumProbDistDynCalc.h:65
Isis::StatCumProbDistDynCalc::m_numberObservations
unsigned int m_numberObservations
The number of observations, note this is dynamically changing as observations are added.
Definition: StatCumProbDistDynCalc.h:128
Isis::StatCumProbDistDynCalc::m_idealNumObsBelowQuantile
QList< double > m_idealNumObsBelowQuantile
The ideal number of observations that should be less than or equal to the value of the corresponding ...
Definition: StatCumProbDistDynCalc.h:136
Isis::toInt
int toInt(const QString &string)
Global function to convert from a string to an integer.
Definition: IString.cpp:93
Isis::StatCumProbDistDynCalc::m_observationValues
QList< double > m_observationValues
The calculated values of the quantiles, note this is dynamically changing as observations are added.
Definition: StatCumProbDistDynCalc.h:133
Isis::IException
Isis exception class.
Definition: IException.h:91
Isis::StatCumProbDistDynCalc::value
double value(double cumProb)
Provides the value of the variable that has the given cumulative probility (according the current est...
Definition: StatCumProbDistDynCalc.cpp:182
Isis::StatCumProbDistDynCalc::m_quantiles
QList< double > m_quantiles
The target quantiles being modeled, between 0 and 1.
Definition: StatCumProbDistDynCalc.h:131
Isis::toDouble
double toDouble(const QString &string)
Global function to convert from a string to a double.
Definition: IString.cpp:149
Isis::StatCumProbDistDynCalc::m_numberCells
unsigned int m_numberCells
The number of cells or histogram bins that are being used to model the probility density function.
Definition: StatCumProbDistDynCalc.h:121
Isis::IException::Programmer
@ Programmer
This error is for when a programmer made an API call that was illegal.
Definition: IException.h:146
Isis::StatCumProbDistDynCalc::addObs
void addObs(double obs)
Values for the estimated quantile positions are update as observations are added.
Definition: StatCumProbDistDynCalc.cpp:394
Isis::StatCumProbDistDynCalc::StatCumProbDistDynCalc
StatCumProbDistDynCalc(unsigned int nodes=20, QObject *parent=0)
Construtor sets up the class to start recieving data.
Definition: StatCumProbDistDynCalc.cpp:37
Isis::IString
Adds specific functionality to C++ strings.
Definition: IString.h:165
QObject
Isis::operator>>
std::istream & operator>>(std::istream &is, CSVReader &csv)
Input read operator for input stream sources.
Definition: CSVReader.cpp:447
Isis::StatCumProbDistDynCalc::cumProb
double cumProb(double value)
Provides the cumulative probility, that is, the proportion of the distribution that is less than or e...
Definition: StatCumProbDistDynCalc.cpp:299
Isis
This is free and unencumbered software released into the public domain.
Definition: Apollo.h:16