Isis 3 Programmer Reference
StatCumProbDistDynCalc.cpp
Go to the documentation of this file.
1 
21 #include "StatCumProbDistDynCalc.h"
22 
23 #include <QDataStream>
24 #include <QDebug>
25 #include <QList>
26 #include <QUuid>
27 #include <QXmlStreamWriter>
28 
29 #include <float.h>
30 #include <math.h>
31 #include <stdio.h>
32 
33 #include "IException.h"
34 #include "IString.h"
35 #include "Project.h"
36 #include "XmlStackedHandlerReader.h"
37 
38 #include "Pvl.h"
39 #include <iostream>
40 
41 namespace Isis {
42 
43 
51  : QObject(parent) {
52  initialize();
53  setQuantiles(nodes);
54  }
55 
56 
57 // TODO: should project be const ???
59  XmlStackedHandlerReader *xmlReader,
60  QObject *parent) { // TODO: does xml stuff need project???
61  initialize();
62  xmlReader->pushContentHandler(new XmlHandler(this, project)); // TODO: does xml stuff need project???
63  }
64 
65 
66 
67  StatCumProbDistDynCalc::StatCumProbDistDynCalc(const StatCumProbDistDynCalc &other)
68  : m_numberCells(other.m_numberCells),
69  m_numberQuantiles(other.m_numberQuantiles),
70  m_numberObservations(other.m_numberObservations),
71  m_quantiles(other.m_quantiles),
72  m_observationValues(other.m_observationValues),
73  m_idealNumObsBelowQuantile(other.m_idealNumObsBelowQuantile),
74  m_numObsBelowQuantile(other.m_numObsBelowQuantile) {
75  }
76 
77 
78 
83  }
84 
85 
86 
87  StatCumProbDistDynCalc &StatCumProbDistDynCalc::operator=(const StatCumProbDistDynCalc &other) {
88 
89  if (&other != this) {
93  m_quantiles = other.m_quantiles;
97  }
98  return *this;
99 
100  }
101 
102 
103 
112  m_quantiles.clear();
113  m_observationValues.clear();
115  m_numObsBelowQuantile.clear();
116  }
117 
118 
119 
120  void StatCumProbDistDynCalc::setQuantiles(unsigned int nodes) {
121  initialize();
122  if (nodes < 3) {
123  m_numberQuantiles = 3;
124  }
125  else {
126  m_numberQuantiles = nodes;
127  }
128 
129  m_numberCells = m_numberQuantiles - 1; // there is one more border value then there are cells
130 
131  double stepSize = 1.0 / (double)m_numberCells;
132  for (unsigned int i = 0; i < m_numberQuantiles; i++) {
133  if (i == 0) {
134  m_quantiles.append( 0.0 );
135  }
136  else {
137  m_quantiles.append( m_quantiles[i-1] + stepSize );
138  } // essentially the same as i/numCells without having to cast i every time...
139  m_idealNumObsBelowQuantile.append(i+1);
140  m_numObsBelowQuantile.append(i+1);
141  }
142 
143  }
144 
145 
146 
157  validate();
158  //return the largest value so far
160  }
161 
162 
163 
174  validate();
175  //return the smallest value so far
176  return m_observationValues[0];
177  }
178 
179 
180 
195  double StatCumProbDistDynCalc::value(double cumProb) {
196  validate();
197  //given a cumProbability return the associate value
198 
199  //if the requested cumProb is outside [0,1] return DBL_MAX
200  if (cumProb < 0.0 || cumProb > 1.0) {
201  IString msg = "Invalid cumulative probability [" + toString(cumProb) + "] passed in to "
202  "StatCumProbDistDynCalc::value(double cumProb). Must be on the domain [0, 1].";
204  }
205 
206  //if the requested quantile is 0.0 return the lowest value
207  if (cumProb == 0.0) {
208  return m_observationValues[0];
209  }
210 
211  //if the requested quantile is 1.0 return the largest value
212  if (cumProb == 1.0) {
214  }
215 
216  // otherwise find the the node nearest the value
217  double minDistance = fabs(m_quantiles[0] - cumProb); // distance from first quantile
218  // to the given probability
219  unsigned int index = 0;
220  for (int i = 1; i < int(m_numberQuantiles); i++) {
221  // if the given probability is closer to this quantile than the currently saved
222  // min distance, then replace
223  double dist = fabs(m_quantiles[i] - cumProb);
224 // if ( dist < minDistance || qFuzzyCompare(dist, minDistance)) {
225  if ( dist < minDistance ) {
226  minDistance = fabs(m_quantiles[i] - cumProb);
227  index = i;
228  }
229  else {
230  break; // we are getting farther from the given value
231  }
232  }// TODO: must be a better way to write this???
233 
234  //get the coordinates of the three closest nodes, value as a function of cumProb
235  double x[3]; // m_observationValues values
236  double y[3]; // cumlative probilities
237 
238  if (index == 0) { // the given prob is closest to the first quantile
239  y[0] = m_observationValues[0];
240  y[1] = m_observationValues[1];
241  y[2] = m_observationValues[2];
242 
243  x[0] = m_quantiles[0];
244  x[1] = m_quantiles[1];
245  x[2] = m_quantiles[2];
246  }
247  else if (index == m_numberCells) { // the given prob is closest to the last quantile
248  y[0] = m_observationValues[index-2];
249  y[1] = m_observationValues[index-1];
250  y[2] = m_observationValues[index ];
251 
252  x[0] = m_quantiles[index-2];
253  x[1] = m_quantiles[index-1];
254  x[2] = m_quantiles[index ];
255  }
256  else {
257  y[0] = m_observationValues[index-1];
258  y[1] = m_observationValues[index ];
259  y[2] = m_observationValues[index+1];
260 
261  x[0] = m_quantiles[index-1];
262  x[1] = m_quantiles[index ];
263  x[2] = m_quantiles[index+1];
264  }
265 
266  if ( x[0] == x[1] || x[0] == x[2] || x[1] == x[2]) {
267  return m_observationValues[index]; // this should never happen,
268  // but just in case return the value of the nearest node
269  }
270 
271  // try quadratic interpolation
272  double interp = (cumProb-x[1]) * (cumProb-x[2]) / (x[0]-x[1]) / (x[0]-x[2]) * y[0]
273  + (cumProb-x[0]) * (cumProb-x[2]) / (x[1]-x[0]) / (x[1]-x[2]) * y[1]
274  + (cumProb-x[0]) * (cumProb-x[1]) / (x[2]-x[0]) / (x[2]-x[1]) * y[2];
275 
276  // check for reasonability of the quadratic interpolation
277 
278  // TODO: better way to write this???
279  int i;
280  for (i = 0; i < 3; i++) {
281  if ( x[i] <= cumProb && x[i+1] >= cumProb) { // find the nodes on both sides of cumProb
282  break;
283  }
284  }
285 
286  // make sure things are strictly increasing
287  if (y[i] <= interp && y[i+1] >= interp) {
288  return interp;
289  }
290  else {
291  // if the quadratic wasn't reasonable return the linear interpolation
292  return ( (x[i] - cumProb) * y[i+1] + (x[i+1] - cumProb) * y[i] ) / (x[i] - x[i+1]);
293  }
294 
295  }
296 
297 
298 
312  double StatCumProbDistDynCalc::cumProb(double value) {
313  validate();
314  //given a value return the cumulative probility
315 
316  if (value <= m_observationValues[0]) {
317  return 0.0; // if the value is less than or equal to the the current min,
318  // the best estimate is 0.0
319  }
320  else if (value >= m_observationValues[m_numberCells]) {
321  return 1.0; // if the value is greater than or equal to the current max,
322  // the best estimate is 1.0
323  }
324 
325  // otherwise, find the the node nearest to the given value
326  double minDistance = fabs(m_observationValues[0]-value);
327  unsigned int index = 0;
328  for (unsigned int i = 1; i < m_numberQuantiles; i++) {
329  double dist = fabs(m_observationValues[i]-value);
330  if ( dist < minDistance) {
331  minDistance = fabs(m_observationValues[i]-value);
332  index = i;
333  }
334  else {
335  break; // we are getting farther from the given value
336  }
337  }// TODO: must be a better way to write this???
338 
339  //get the coordinates of the three closest nodes cumProb as a function of value
340  double x[3]; // m_observationValues values
341  double y[3]; // cumlative probilities
342 
343  if (index == 0) {
344  x[0] = m_observationValues[0];
345  x[1] = m_observationValues[1];
346  x[2] = m_observationValues[2];
347 
348  y[0] = m_quantiles[0];
349  y[1] = m_quantiles[1];
350  y[2] = m_quantiles[2];
351  }
352  else if (index == m_numberCells) {
353  x[0] = m_observationValues[index-2];
354  x[1] = m_observationValues[index-1];
355  x[2] = m_observationValues[index ];
356 
357  y[0] = m_quantiles[index-2];
358  y[1] = m_quantiles[index-1];
359  y[2] = m_quantiles[index ];
360  }
361  else {
362  x[0] = m_observationValues[index-1];
363  x[1] = m_observationValues[index ];
364  x[2] = m_observationValues[index+1];
365 
366  y[0] = m_quantiles[index-1];
367  y[1] = m_quantiles[index ];
368  y[2] = m_quantiles[index+1];
369  }
370 
371  if ( x[0] == x[1] || x[0] == x[2] || x[1] == x[2]) {
372  return m_quantiles[index]; // this should never happen,
373  // but just in case return the cumProb of the nearest node
374  }
375 
376  //do a parabolic interpolation to find the probability at value
377  double interp = (value-x[1]) * (value-x[2]) / (x[0]-x[1]) / (x[0]-x[2]) * y[0]
378  + (value-x[0]) * (value-x[2]) / (x[1]-x[0]) / (x[1]-x[2]) * y[1]
379  + (value-x[0]) * (value-x[1]) / (x[2]-x[0]) / (x[2]-x[1]) * y[2];
380 
381  //check for reasonability of the quadratic interpolation
382  // TODO: better way to write this???
383  int i;
384  for (i = 0; i < 3; i++) {
385  if ( x[i] <= value && x[i+1] >= value) //find the nodes on both sides of cumProb
386  break;
387  }
388 
389  if (y[i] <= interp && y[i+1] >= interp) //make sure things are strictly increasing
390  return interp;
391  //if the quadratic wasn't reasonable return the linear interpolation
392  else {
393  return ((x[i] - value) * y[i+1] + (x[i+1] - value) * y[i]) / (x[i] - x[i+1]);
394  }
395 
396  }
397 
398 
399 
408  double temp = 0.0;
409 
411  // in this phase, the algorithm is getting initial values
412  m_observationValues.append(obs);
414 
415  // if there are now m_numberQuantiles observations, then sort them from smallest to greatest
417  qSort(m_observationValues.begin(), m_observationValues.end(), qLess<double>());
418  }
419  }
420  else { // m_numberObservations >= m_numberQuantiles
421 
422  //incrementing the number of observations
425 
426  // replace min or max with this observation, if appropriate
427  if (obs > m_observationValues[m_numberQuantiles-1]) {
429  }
430  if (obs < m_observationValues[0]) {
431  m_observationValues[0] = obs;
432  temp = 1.0;
433  }
434 
435  //estimated quantile positions are increased if obs <= estimated quantile value
436  for (int i = 1; i < (int)m_numberQuantiles-1; i++) {
437  if (obs <= m_observationValues[i]) {
438  for (; i < (int)m_numberQuantiles-1; i++) {
439  m_numObsBelowQuantile[i]++; // all m_n's above the first >= obs get incremented
440  }
441  }
442  }
443 
444  for (int i = 1; i < (int)m_numberQuantiles; i++) {
446  }
447 
448  //calculate corrections to node positions (m_n) and values (m_observationValues)
449  int d;
450  // note that the loop does not edit the first or last positions
451  for (int i = 1; i < (int)m_numberCells; i++) {
452  //calculation of d[i] it is either 1, -1, or 0
454  if (fabs(temp)>1 && temp > 0.0) {
455  d = 1;
456  }
457  else if (fabs(temp)>1 && temp < 0.0) {
458  d = -1;
459  }
460  else {
461  continue;
462  }
463 
464  // if the node can be moved by d without stepping on another node
465  if ( ( (d == 1)
466  && (m_numObsBelowQuantile[i+1] - m_numObsBelowQuantile[i] > 1) )
467  || ( (d == -1)
468  && (m_numObsBelowQuantile[i-1] - m_numObsBelowQuantile[i] < -1) ) ) {
469  //calculate a new quantile value for the node from the parabolic formula
470  temp = m_observationValues[i]
471  + double(d) / (m_numObsBelowQuantile[i+1] - m_numObsBelowQuantile[i-1])
472  * ( (m_numObsBelowQuantile[i] - m_numObsBelowQuantile[i-1] + d)
478 
479  // it is necessary that the values of the quantiles be strictly increasing,
480  // if the parabolic formula perserves this so be it
481  if ( m_observationValues[i-1] < temp && temp < m_observationValues[i+1]) {// use qSort???
482  m_observationValues[i] = temp;
483  }
484  else {
485  // if the parabolic formula does not maintain
486  // that the values of the quantiles be strictly increasing,
487  // then use linear interpolation
489  + double(d)
492  }
493 
494  // the position of the quantile
495  // (in terms of the number of observations <= quantile) is also adjusted
496  m_numObsBelowQuantile[i] += d;
497  }
498  }
499  }
500  }
501 
502 
503 
504  void StatCumProbDistDynCalc::save(QXmlStreamWriter &stream, const Project *project) const { // TODO: does xml stuff need project???
505 
506  stream.writeStartElement("statCumProbDistDynCalc");
507  stream.writeTextElement("numberCells", toString(m_numberCells));
508  stream.writeTextElement("numberQuantiles", toString(m_numberQuantiles));
509  stream.writeTextElement("numberObservations", toString(m_numberObservations));
510 
511  stream.writeStartElement("distributionData");
512  for (unsigned int i = 0; i < m_numberQuantiles; i++) {
513  stream.writeStartElement("quantileInfo");
514  // we need to write out high precision for minDistance calculations in value() and cumProb()
515  stream.writeAttribute("quantile", toString(m_quantiles[i], 17));
516  stream.writeAttribute("dataValue", toString(m_observationValues[i], 17));
517  stream.writeAttribute("idealNumObsBelowQuantile",
519  stream.writeAttribute("actualNumObsBelowQuantile", toString(m_numObsBelowQuantile[i]));
520  stream.writeEndElement(); // end observation
521  }
522  stream.writeEndElement(); // end observationData
523  stream.writeEndElement(); // end statCumProbDistDynCalc
524 
525  }
526 
527 
528 
529 // TODO: should project be const ???
530  StatCumProbDistDynCalc::XmlHandler::XmlHandler(StatCumProbDistDynCalc *probabilityCalc,
531  Project *project) { // TODO: does xml stuff need project???
532  m_xmlHandlerCumProbCalc = probabilityCalc;
533  m_xmlHandlerProject = project; // TODO: does xml stuff need project???
534  m_xmlHandlerCharacters = "";
535  }
536 
537 
538 
539  StatCumProbDistDynCalc::XmlHandler::~XmlHandler() {
540  // do not delete this pointer... we don't own it, do we??? passed into StatCumProbDistDynCalc constructor as pointer
541 // delete m_xmlHandlerProject; // TODO: does xml stuff need project???
542  m_xmlHandlerProject = NULL;
543  }
544 
545 
546 
547  bool StatCumProbDistDynCalc::XmlHandler::startElement(const QString &namespaceURI,
548  const QString &localName,
549  const QString &qName,
550  const QXmlAttributes &atts) {
551 
552  m_xmlHandlerCharacters = "";
553  if (XmlStackedHandler::startElement(namespaceURI, localName, qName, atts)) {
554  if (qName == "quantileInfo") {
555 
556  QString quantile = atts.value("quantile");
557  QString obsValue = atts.value("dataValue");
558  QString idealObs = atts.value("idealNumObsBelowQuantile");
559  QString actualObs = atts.value("actualNumObsBelowQuantile");
560 
561  if (!quantile.isEmpty() && !obsValue.isEmpty()
562  && !idealObs.isEmpty() && !actualObs.isEmpty()) {
563  m_xmlHandlerCumProbCalc->m_quantiles.append(toDouble(quantile));
564  m_xmlHandlerCumProbCalc->m_observationValues.append(toDouble(obsValue));
565  m_xmlHandlerCumProbCalc->m_idealNumObsBelowQuantile.append(toDouble(idealObs));
566  m_xmlHandlerCumProbCalc->m_numObsBelowQuantile.append(toDouble(actualObs));
567  }
568  }
569 
570  }
571  return true;
572  }
573 
574 
575 
576  bool StatCumProbDistDynCalc::XmlHandler::characters(const QString &ch) {
577  m_xmlHandlerCharacters += ch;
578  return XmlStackedHandler::characters(ch);
579  }
580 
581 
582 
583  bool StatCumProbDistDynCalc::XmlHandler::endElement(const QString &namespaceURI,
584  const QString &localName,
585  const QString &qName) {
586  if (!m_xmlHandlerCharacters.isEmpty()) {
587  if (qName == "numberCells") {
588  m_xmlHandlerCumProbCalc->m_numberCells = toInt(m_xmlHandlerCharacters);
589  }
590  else if (qName == "numberQuantiles") {
591  m_xmlHandlerCumProbCalc->m_numberQuantiles = toInt(m_xmlHandlerCharacters);
592  }
593  else if (qName == "numberObservations") {
594  m_xmlHandlerCumProbCalc->m_numberObservations = toInt(m_xmlHandlerCharacters);
595  }
596 
597  m_xmlHandlerCharacters = "";
598  }
599  return XmlStackedHandler::endElement(namespaceURI, localName, qName);
600  }
601 
602 
603 
604  QDataStream &StatCumProbDistDynCalc::write(QDataStream &stream) const {
605  stream << (qint32)m_numberCells
606  << (qint32)m_numberQuantiles
607  << (qint32)m_numberObservations
608  << m_quantiles
612  return stream;
613  }
614 
615 
616 
617  QDataStream &StatCumProbDistDynCalc::read(QDataStream &stream) {
618  QString id;
619  qint32 numCells, numQuantiles, numObservations;
620  stream >> numCells
621  >> numQuantiles
622  >> numObservations
623  >> m_quantiles
627 
628  m_numberCells = (unsigned int)numCells;
629  m_numberQuantiles = (unsigned int)numQuantiles;
630  m_numberObservations = (unsigned int)numObservations;
631  return stream;
632  }
633 
634 
635 
636  QDataStream &operator<<(QDataStream &stream, const StatCumProbDistDynCalc &scpddc) {
637  return scpddc.write(stream);
638  }
639 
640 
641 
642  QDataStream &operator>>(QDataStream &stream, StatCumProbDistDynCalc &scpddc) {
643  return scpddc.read(stream);
644  }
645 
646  void StatCumProbDistDynCalc::validate() {
647  // if quantiles have not been set
648  if (m_numberQuantiles == 0) {
649  QString msg = "StatCumProbDistDynCalc will return no data until the quantiles have been set. "
650  "Number of cells = [" + toString(m_numberCells) + "].";
651  throw IException(IException::Programmer, msg, _FILEINFO_);
652  }
653 
654  //if there isn't even as much data as there are quantiles to track
656  QString msg = "StatCumProbDistDynCalc will return no data until the number of observations "
657  "added [" + toString(m_numberObservations) + "] matches the number of "
658  "quantiles [" + toString(m_numberQuantiles)
659  + "] (i.e. number of nodes) selected.";
660  throw IException(IException::Programmer, msg, _FILEINFO_);
661  }
662 
663  }
664 }// end namespace Isis
double max()
Returns the maximum observation so far included in the dynamic calculation.
The main project for ipce.
Definition: Project.h:289
StatCumProbDistDynCalc(unsigned int nodes=20, QObject *parent=0)
Construtor sets up the class to start recieving data.
double value(double cumProb)
Provides the value of the variable that has the given cumulative probility (according the current est...
~StatCumProbDistDynCalc()
Destroys StatCumProbDistDynCalc object.
int toInt(const QString &string)
Global function to convert from a string to an integer.
Definition: IString.cpp:108
QList< double > m_observationValues
The calculated values of the quantiles, note this is dynamically changing as observations are added...
QString toString(bool boolToConvert)
Global function to convert a boolean to a string.
Definition: IString.cpp:226
double toDouble(const QString &string)
Global function to convert from a string to a double.
Definition: IString.cpp:164
This error is for when a programmer made an API call that was illegal.
Definition: IException.h:162
unsigned int m_numberQuantiles
The number of quantiles being used to model the probility density function.
This class is used to approximate cumulative probibility distributions of a stream of observations wi...
unsigned int m_numberCells
The number of cells or histogram bins that are being used to model the probility density function...
#define _FILEINFO_
Macro for the filename and line number.
Definition: IException.h:40
std::istream & operator>>(std::istream &is, CSVReader &csv)
Input read operator for input stream sources.
Definition: CSVReader.cpp:463
virtual void pushContentHandler(XmlStackedHandler *newHandler)
Push a contentHandler and maybe continue parsing...
QList< double > m_quantiles
The target quantiles being modeled, between 0 and 1.
void initialize()
Inializer, resets the class to start its dynamic calculation anew.
QList< int > m_numObsBelowQuantile
The actual number of observations that are less than or equal to the value of the corresponding quant...
unsigned int m_numberObservations
The number of observations, note this is dynamically changing as observations are added...
void addObs(double obs)
Values for the estimated quantile positions are update as observations are added. ...
Isis exception class.
Definition: IException.h:107
Adds specific functionality to C++ strings.
Definition: IString.h:181
Namespace for ISIS/Bullet specific routines.
Definition: Apollo.h:31
double min()
Returns the maximum observation so far included in the dynamic calculation.
QList< double > m_idealNumObsBelowQuantile
The ideal number of observations that should be less than or equal to the value of the corresponding ...
QDebug operator<<(QDebug debug, const Hillshade &hillshade)
Print this class out to a QDebug object.
Definition: Hillshade.cpp:308
double cumProb(double value)
Provides the cumulative probility, that is, the proportion of the distribution that is less than or e...
Manage a stack of content handlers for reading XML files.