Isis 3.0 Programmer Reference
Home
StatCumProbDistDynCalc.cpp
Go to the documentation of this file.
1 
21 #include "StatCumProbDistDynCalc.h"
22 
23 #include <QDataStream>
24 #include <QDebug>
25 #include <QList>
26 #include <QUuid>
27 #include <QXmlStreamWriter>
28 
29 #include <float.h>
30 #include <math.h>
31 #include <stdio.h>
32 
33 #include "IException.h"
34 #include "IString.h"
35 #include "Project.h"
36 #include "XmlStackedHandlerReader.h"
37 
38 #include "Pvl.h"
39 #include <iostream>
40 
41 namespace Isis {
42 
43 
51  : QObject(parent) {
52  initialize();
53  setQuantiles(nodes);
54  }
55 
56 
57 // TODO: should project be const ???
59  XmlStackedHandlerReader *xmlReader,
60  QObject *parent) { // TODO: does xml stuff need project???
61  initialize();
62  xmlReader->pushContentHandler(new XmlHandler(this, project)); // TODO: does xml stuff need project???
63  }
64 
65 
66 
67  StatCumProbDistDynCalc::StatCumProbDistDynCalc(const StatCumProbDistDynCalc &other)
68  : m_id(new QUuid(other.m_id->toString())),
69  m_numberCells(other.m_numberCells),
70  m_numberQuantiles(other.m_numberQuantiles),
71  m_numberObservations(other.m_numberObservations),
72  m_quantiles(other.m_quantiles),
73  m_observationValues(other.m_observationValues),
74  m_idealNumObsBelowQuantile(other.m_idealNumObsBelowQuantile),
75  m_numObsBelowQuantile(other.m_numObsBelowQuantile) {
76  }
77 
78 
79 
84  delete m_id;
85  m_id = NULL;
86  }
87 
88 
89 
90  StatCumProbDistDynCalc &StatCumProbDistDynCalc::operator=(const StatCumProbDistDynCalc &other) {
91 
92  if (&other != this) {
93  delete m_id;
94  m_id = NULL;
95  m_id = new QUuid(other.m_id->toString());
96 
100  m_quantiles = other.m_quantiles;
104  }
105  return *this;
106 
107  }
108 
109 
110 
118  m_id = NULL;
120  m_quantiles.clear();
121  m_observationValues.clear();
123  m_numObsBelowQuantile.clear();
124  }
125 
126 
127 
128  void StatCumProbDistDynCalc::setQuantiles(unsigned int nodes) {
129  initialize();
130  m_id = new QUuid(QUuid::createUuid());
131 
132  if (nodes < 3) {
133  m_numberQuantiles = 3;
134  }
135  else {
136  m_numberQuantiles = nodes;
137  }
138 
139  m_numberCells = m_numberQuantiles - 1; // there is one more border value then there are cells
140 
141  double stepSize = 1.0 / (double)m_numberCells;
142  for (unsigned int i = 0; i < m_numberQuantiles; i++) {
143  if (i == 0) {
144  m_quantiles.append( 0.0 );
145  }
146  else {
147  m_quantiles.append( m_quantiles[i-1] + stepSize );
148  } // essentially the same as i/numCells without having to cast i every time...
149  m_idealNumObsBelowQuantile.append(i+1);
150  m_numObsBelowQuantile.append(i+1);
151  }
152 
153  }
154 
155 
156 
167  validate();
168  //return the largest value so far
170  }
171 
172 
173 
184  validate();
185  //return the smallest value so far
186  return m_observationValues[0];
187  }
188 
189 
190 
205  double StatCumProbDistDynCalc::value(double cumProb) {
206  validate();
207  //given a cumProbability return the associate value
208 
209  //if the requested cumProb is outside [0,1] return DBL_MAX
210  if (cumProb < 0.0 || cumProb > 1.0) {
211  IString msg = "Invalid cumulative probability [" + toString(cumProb) + "] passed in to "
212  "StatCumProbDistDynCalc::value(double cumProb). Must be on the domain [0, 1].";
214  }
215 
216  //if the requested quantile is 0.0 return the lowest value
217  if (cumProb == 0.0) {
218  return m_observationValues[0];
219  }
220 
221  //if the requested quantile is 1.0 return the largest value
222  if (cumProb == 1.0) {
224  }
225 
226  // otherwise find the the node nearest the value
227  double minDistance = fabs(m_quantiles[0] - cumProb); // distance from first quantile
228  // to the given probability
229  unsigned int index = 0;
230  for (int i = 1; i < int(m_numberQuantiles); i++) {
231  // if the given probability is closer to this quantile than the currently saved
232  // min distance, then replace
233  double dist = fabs(m_quantiles[i] - cumProb);
234 // if ( dist < minDistance || qFuzzyCompare(dist, minDistance)) {
235  if ( dist < minDistance ) {
236  minDistance = fabs(m_quantiles[i] - cumProb);
237  index = i;
238  }
239  else {
240  break; // we are getting farther from the given value
241  }
242  }// TODO: must be a better way to write this???
243 
244  //get the coordinates of the three closest nodes, value as a function of cumProb
245  double x[3]; // m_observationValues values
246  double y[3]; // cumlative probilities
247 
248  if (index == 0) { // the given prob is closest to the first quantile
249  y[0] = m_observationValues[0];
250  y[1] = m_observationValues[1];
251  y[2] = m_observationValues[2];
252 
253  x[0] = m_quantiles[0];
254  x[1] = m_quantiles[1];
255  x[2] = m_quantiles[2];
256  }
257  else if (index == m_numberCells) { // the given prob is closest to the last quantile
258  y[0] = m_observationValues[index-2];
259  y[1] = m_observationValues[index-1];
260  y[2] = m_observationValues[index ];
261 
262  x[0] = m_quantiles[index-2];
263  x[1] = m_quantiles[index-1];
264  x[2] = m_quantiles[index ];
265  }
266  else {
267  y[0] = m_observationValues[index-1];
268  y[1] = m_observationValues[index ];
269  y[2] = m_observationValues[index+1];
270 
271  x[0] = m_quantiles[index-1];
272  x[1] = m_quantiles[index ];
273  x[2] = m_quantiles[index+1];
274  }
275 
276  if ( x[0] == x[1] || x[0] == x[2] || x[1] == x[2]) {
277  return m_observationValues[index]; // this should never happen,
278  // but just in case return the value of the nearest node
279  }
280 
281  // try quadratic interpolation
282  double interp = (cumProb-x[1]) * (cumProb-x[2]) / (x[0]-x[1]) / (x[0]-x[2]) * y[0]
283  + (cumProb-x[0]) * (cumProb-x[2]) / (x[1]-x[0]) / (x[1]-x[2]) * y[1]
284  + (cumProb-x[0]) * (cumProb-x[1]) / (x[2]-x[0]) / (x[2]-x[1]) * y[2];
285 
286  // check for reasonability of the quadratic interpolation
287 
288  // TODO: better way to write this???
289  int i;
290  for (i = 0; i < 3; i++) {
291  if ( x[i] <= cumProb && x[i+1] >= cumProb) { // find the nodes on both sides of cumProb
292  break;
293  }
294  }
295 
296  // make sure things are strictly increasing
297  if (y[i] <= interp && y[i+1] >= interp) {
298  return interp;
299  }
300  else {
301  // if the quadratic wasn't reasonable return the linear interpolation
302  return ( (x[i] - cumProb) * y[i+1] + (x[i+1] - cumProb) * y[i] ) / (x[i] - x[i+1]);
303  }
304 
305  }
306 
307 
308 
322  double StatCumProbDistDynCalc::cumProb(double value) {
323  validate();
324  //given a value return the cumulative probility
325 
326  if (value <= m_observationValues[0]) {
327  return 0.0; // if the value is less than or equal to the the current min,
328  // the best estimate is 0.0
329  }
330  else if (value >= m_observationValues[m_numberCells]) {
331  return 1.0; // if the value is greater than or equal to the current max,
332  // the best estimate is 1.0
333  }
334 
335  // otherwise, find the the node nearest to the given value
336  double minDistance = fabs(m_observationValues[0]-value);
337  unsigned int index = 0;
338  for (unsigned int i = 1; i < m_numberQuantiles; i++) {
339  double dist = fabs(m_observationValues[i]-value);
340  if ( dist < minDistance) {
341  minDistance = fabs(m_observationValues[i]-value);
342  index = i;
343  }
344  else {
345  break; // we are getting farther from the given value
346  }
347  }// TODO: must be a better way to write this???
348 
349  //get the coordinates of the three closest nodes cumProb as a function of value
350  double x[3]; // m_observationValues values
351  double y[3]; // cumlative probilities
352 
353  if (index == 0) {
354  x[0] = m_observationValues[0];
355  x[1] = m_observationValues[1];
356  x[2] = m_observationValues[2];
357 
358  y[0] = m_quantiles[0];
359  y[1] = m_quantiles[1];
360  y[2] = m_quantiles[2];
361  }
362  else if (index == m_numberCells) {
363  x[0] = m_observationValues[index-2];
364  x[1] = m_observationValues[index-1];
365  x[2] = m_observationValues[index ];
366 
367  y[0] = m_quantiles[index-2];
368  y[1] = m_quantiles[index-1];
369  y[2] = m_quantiles[index ];
370  }
371  else {
372  x[0] = m_observationValues[index-1];
373  x[1] = m_observationValues[index ];
374  x[2] = m_observationValues[index+1];
375 
376  y[0] = m_quantiles[index-1];
377  y[1] = m_quantiles[index ];
378  y[2] = m_quantiles[index+1];
379  }
380 
381  if ( x[0] == x[1] || x[0] == x[2] || x[1] == x[2]) {
382  return m_quantiles[index]; // this should never happen,
383  // but just in case return the cumProb of the nearest node
384  }
385 
386  //do a parabolic interpolation to find the probability at value
387  double interp = (value-x[1]) * (value-x[2]) / (x[0]-x[1]) / (x[0]-x[2]) * y[0]
388  + (value-x[0]) * (value-x[2]) / (x[1]-x[0]) / (x[1]-x[2]) * y[1]
389  + (value-x[0]) * (value-x[1]) / (x[2]-x[0]) / (x[2]-x[1]) * y[2];
390 
391  //check for reasonability of the quadratic interpolation
392  // TODO: better way to write this???
393  int i;
394  for (i = 0; i < 3; i++) {
395  if ( x[i] <= value && x[i+1] >= value) //find the nodes on both sides of cumProb
396  break;
397  }
398 
399  if (y[i] <= interp && y[i+1] >= interp) //make sure things are strictly increasing
400  return interp;
401  //if the quadratic wasn't reasonable return the linear interpolation
402  else {
403  return ((x[i] - value) * y[i+1] + (x[i+1] - value) * y[i]) / (x[i] - x[i+1]);
404  }
405 
406  }
407 
408 
409 
418  double temp = 0.0;
419 
420  if (m_numberObservations < m_numberQuantiles) {
421  // in this phase, the algorithm is getting initial values
422  m_observationValues.append(obs);
424 
425  // if there are now m_numberQuantiles observations, then sort them from smallest to greatest
426  if (m_numberObservations == m_numberQuantiles) {
427  qSort(m_observationValues.begin(), m_observationValues.end(), qLess<double>());
428  }
429  }
430  else { // m_numberObservations >= m_numberQuantiles
431 
432  //incrementing the number of observations
434  m_numObsBelowQuantile[m_numberQuantiles-1] = m_numberObservations;
435 
436  // replace min or max with this observation, if appropriate
437  if (obs > m_observationValues[m_numberQuantiles-1]) {
438  m_observationValues[m_numberQuantiles-1] = obs;
439  }
440  if (obs < m_observationValues[0]) {
441  m_observationValues[0] = obs;
442  temp = 1.0;
443  }
444 
445  //estimated quantile positions are increased if obs <= estimated quantile value
446  for (int i = 1; i < (int)m_numberQuantiles-1; i++) {
447  if (obs <= m_observationValues[i]) {
448  for (; i < (int)m_numberQuantiles-1; i++) {
449  m_numObsBelowQuantile[i]++; // all m_n's above the first >= obs get incremented
450  }
451  }
452  }
453 
454  for (int i = 1; i < (int)m_numberQuantiles; i++) {
456  }
457 
458  //calculate corrections to node positions (m_n) and values (m_observationValues)
459  int d;
460  // note that the loop does not edit the first or last positions
461  for (int i = 1; i < (int)m_numberCells; i++) {
462  //calculation of d[i] it is either 1, -1, or 0
464  if (fabs(temp)>1 && temp > 0.0) {
465  d = 1;
466  }
467  else if (fabs(temp)>1 && temp < 0.0) {
468  d = -1;
469  }
470  else {
471  continue;
472  }
473 
474  // if the node can be moved by d without stepping on another node
475  if ( ( (d == 1)
476  && (m_numObsBelowQuantile[i+1] - m_numObsBelowQuantile[i] > 1) )
477  || ( (d == -1)
478  && (m_numObsBelowQuantile[i-1] - m_numObsBelowQuantile[i] < -1) ) ) {
479  //calculate a new quantile value for the node from the parabolic formula
480  temp = m_observationValues[i]
481  + double(d) / (m_numObsBelowQuantile[i+1] - m_numObsBelowQuantile[i-1])
482  * ( (m_numObsBelowQuantile[i] - m_numObsBelowQuantile[i-1] + d)
484  / (m_numObsBelowQuantile[i+1] - m_numObsBelowQuantile[i])
485  + (m_numObsBelowQuantile[i+1] - m_numObsBelowQuantile[i] - d)
487  / (m_numObsBelowQuantile[i] - m_numObsBelowQuantile[i-1]) );
488 
489  // it is necessary that the values of the quantiles be strictly increasing,
490  // if the parabolic formula perserves this so be it
491  if ( m_observationValues[i-1] < temp && temp < m_observationValues[i+1]) {// use qSort???
492  m_observationValues[i] = temp;
493  }
494  else {
495  // if the parabolic formula does not maintain
496  // that the values of the quantiles be strictly increasing,
497  // then use linear interpolation
499  + double(d)
501  / (m_numObsBelowQuantile[i+d] - m_numObsBelowQuantile[i]);
502  }
503 
504  // the position of the quantile
505  // (in terms of the number of observations <= quantile) is also adjusted
506  m_numObsBelowQuantile[i] += d;
507  }
508  }
509  }
510  }
511 
512 
513 
514  void StatCumProbDistDynCalc::save(QXmlStreamWriter &stream, const Project *project) const { // TODO: does xml stuff need project???
515 
516  stream.writeStartElement("statCumProbDistDynCalc");
517  stream.writeTextElement("id", m_id->toString());
518  stream.writeTextElement("numberCells", toString(m_numberCells));
519  stream.writeTextElement("numberQuantiles", toString(m_numberQuantiles));
520  stream.writeTextElement("numberObservations", toString(m_numberObservations));
521 
522  stream.writeStartElement("distributionData");
523  for (unsigned int i = 0; i < m_numberQuantiles; i++) {
524  stream.writeStartElement("quantileInfo");
525  // we need to write out high precision for minDistance calculations in value() and cumProb()
526  stream.writeAttribute("quantile", toString(m_quantiles[i], 17));
527  stream.writeAttribute("dataValue", toString(m_observationValues[i], 17));
528  stream.writeAttribute("idealNumObsBelowQuantile",
530  stream.writeAttribute("actualNumObsBelowQuantile", toString(m_numObsBelowQuantile[i]));
531  stream.writeEndElement(); // end observation
532  }
533  stream.writeEndElement(); // end observationData
534  stream.writeEndElement(); // end statCumProbDistDynCalc
535 
536  }
537 
538 
539 
540 // TODO: should project be const ???
541  StatCumProbDistDynCalc::XmlHandler::XmlHandler(StatCumProbDistDynCalc *probabilityCalc,
542  Project *project) { // TODO: does xml stuff need project???
543  m_xmlHandlerCumProbCalc = probabilityCalc;
544  m_xmlHandlerProject = project; // TODO: does xml stuff need project???
545  m_xmlHandlerCharacters = "";
546  }
547 
548 
549 
550  StatCumProbDistDynCalc::XmlHandler::~XmlHandler() {
551  // do not delete this pointer... we don't own it, do we??? passed into StatCumProbDistDynCalc constructor as pointer
552 // delete m_xmlHandlerProject; // TODO: does xml stuff need project???
553  m_xmlHandlerProject = NULL;
554  }
555 
556 
557 
558  bool StatCumProbDistDynCalc::XmlHandler::startElement(const QString &namespaceURI,
559  const QString &localName,
560  const QString &qName,
561  const QXmlAttributes &atts) {
562 
563  m_xmlHandlerCharacters = "";
564  if (XmlStackedHandler::startElement(namespaceURI, localName, qName, atts)) {
565  if (qName == "quantileInfo") {
566 
567  QString quantile = atts.value("quantile");
568  QString obsValue = atts.value("dataValue");
569  QString idealObs = atts.value("idealNumObsBelowQuantile");
570  QString actualObs = atts.value("actualNumObsBelowQuantile");
571 
572  if (!quantile.isEmpty() && !obsValue.isEmpty()
573  && !idealObs.isEmpty() && !actualObs.isEmpty()) {
574  m_xmlHandlerCumProbCalc->m_quantiles.append(toDouble(quantile));
575  m_xmlHandlerCumProbCalc->m_observationValues.append(toDouble(obsValue));
576  m_xmlHandlerCumProbCalc->m_idealNumObsBelowQuantile.append(toDouble(idealObs));
577  m_xmlHandlerCumProbCalc->m_numObsBelowQuantile.append(toDouble(actualObs));
578  }
579  }
580 
581  }
582  return true;
583  }
584 
585 
586 
587  bool StatCumProbDistDynCalc::XmlHandler::characters(const QString &ch) {
588  m_xmlHandlerCharacters += ch;
589  return XmlStackedHandler::characters(ch);
590  }
591 
592 
593 
594  bool StatCumProbDistDynCalc::XmlHandler::endElement(const QString &namespaceURI,
595  const QString &localName,
596  const QString &qName) {
597  if (!m_xmlHandlerCharacters.isEmpty()) {
598  if (qName == "id") {
599  m_xmlHandlerCumProbCalc->m_id = NULL;
600  m_xmlHandlerCumProbCalc->m_id = new QUuid(m_xmlHandlerCharacters);
601  }
602  else if (qName == "numberCells") {
603  m_xmlHandlerCumProbCalc->m_numberCells = toInt(m_xmlHandlerCharacters);
604  }
605  else if (qName == "numberQuantiles") {
606  m_xmlHandlerCumProbCalc->m_numberQuantiles = toInt(m_xmlHandlerCharacters);
607  }
608  else if (qName == "numberObservations") {
609  m_xmlHandlerCumProbCalc->m_numberObservations = toInt(m_xmlHandlerCharacters);
610  }
611 
612  m_xmlHandlerCharacters = "";
613  }
614  return XmlStackedHandler::endElement(namespaceURI, localName, qName);
615  }
616 
617 
618 
619  QDataStream &StatCumProbDistDynCalc::write(QDataStream &stream) const {
620  stream << m_id->toString()
621  << (qint32)m_numberCells
622  << (qint32)m_numberQuantiles
623  << (qint32)m_numberObservations
624  << m_quantiles
628  return stream;
629  }
630 
631 
632 
633  QDataStream &StatCumProbDistDynCalc::read(QDataStream &stream) {
634  QString id;
635  qint32 numCells, numQuantiles, numObservations;
636  stream >> id
637  >> numCells
638  >> numQuantiles
639  >> numObservations
640  >> m_quantiles
644 
645  delete m_id;
646  m_id = NULL;
647  m_id = new QUuid(id);
648 
649  m_numberCells = (unsigned int)numCells;
650  m_numberQuantiles = (unsigned int)numQuantiles;
651  m_numberObservations = (unsigned int)numObservations;
652  return stream;
653  }
654 
655 
656 
657  QDataStream &operator<<(QDataStream &stream, const StatCumProbDistDynCalc &scpddc) {
658  return scpddc.write(stream);
659  }
660 
661 
662 
663  QDataStream &operator>>(QDataStream &stream, StatCumProbDistDynCalc &scpddc) {
664  return scpddc.read(stream);
665  }
666 
667  void StatCumProbDistDynCalc::validate() {
668  // if quantiles have not been set
669  if (m_numberQuantiles == 0) {
670  QString msg = "StatCumProbDistDynCalc will return no data until the quantiles have been set. "
671  "Number of cells = [" + toString(m_numberCells) + "].";
672  throw IException(IException::Programmer, msg, _FILEINFO_);
673  }
674 
675  //if there isn't even as much data as there are quantiles to track
676  if (m_numberObservations < m_numberQuantiles) {
677  QString msg = "StatCumProbDistDynCalc will return no data until the number of observations "
678  "added [" + toString(m_numberObservations) + "] matches the number of "
679  "quantiles [" + toString(m_numberQuantiles)
680  + "] (i.e. number of nodes) selected.";
681  throw IException(IException::Programmer, msg, _FILEINFO_);
682  }
683 
684  }
685 }// end namespace Isis
double max()
Returns the maximum observation so far included in the dynamic calculation.
The main project for cnetsuite.
Definition: Project.h:105
StatCumProbDistDynCalc(unsigned int nodes=20, QObject *parent=0)
Construtor sets up the class to start recieving data.
double value(double cumProb)
Provides the value of the variable that has the given cumulative probility (according the current est...
~StatCumProbDistDynCalc()
Destroys StatCumProbDistDynCalc object.
int toInt(const QString &string)
Global function to convert from a string to an integer.
Definition: IString.cpp:108
QList< double > m_observationValues
The calculated values of the quantiles, note this is dynamically changing as observations are added...
QString toString(bool boolToConvert)
Global function to convert a boolean to a string.
Definition: IString.cpp:226
double toDouble(const QString &string)
Global function to convert from a string to a double.
Definition: IString.cpp:164
This error is for when a programmer made an API call that was illegal.
Definition: IException.h:154
unsigned int m_numberQuantiles
The number of quantiles being used to model the probility density function.
This class is used to approximate cumulative probibility distributions of a stream of observations wi...
unsigned int m_numberCells
The number of cells or histogram bins that are being used to model the probility density function...
#define _FILEINFO_
Macro for the filename and line number.
Definition: IException.h:38
std::istream & operator>>(std::istream &is, CSVReader &csv)
Input read operator for input stream sources.
Definition: CSVReader.cpp:463
QUuid * m_id
A unique ID for this object (useful for others to reference this object when saving to disk)...
QList< double > m_quantiles
The target quantiles being modeled, between 0 and 1.
void initialize()
Inializer, resets the class to start its dynamic calculation anew.
QList< int > m_numObsBelowQuantile
The actual number of observations that are less than or equal to the value of the corresponding quant...
unsigned int m_numberObservations
The number of observations, note this is dynamically changing as observations are added...
void addObs(double obs)
Values for the estimated quantile positions are update as observations are added. ...
Isis exception class.
Definition: IException.h:99
Adds specific functionality to C++ strings.
Definition: IString.h:179
double min()
Returns the maximum observation so far included in the dynamic calculation.
QList< double > m_idealNumObsBelowQuantile
The ideal number of observations that should be less than or equal to the value of the correspond...
QDebug operator<<(QDebug debug, const Hillshade &hillshade)
Print this class out to a QDebug object.
Definition: Hillshade.cpp:308
double cumProb(double value)
Provides the cumulative probility, that is, the proportion of the distribution that is less than or e...
his enables stack-based XML parsing of XML files.