Isis 3 Programmer Reference
PvlTokenizer.cpp
1 
6 /* SPDX-License-Identifier: CC0-1.0 */
7 #include "PvlTokenizer.h"
8 
9 #include <sstream>
10 #include <fstream>
11 
12 #include <QDebug>
13 
14 #include "IException.h"
15 #include "IString.h"
16 #include "Message.h"
17 
18 using namespace std;
19 namespace Isis {
20 
22  PvlTokenizer::PvlTokenizer() {
23  Clear();
24  }
25 
27  PvlTokenizer::~PvlTokenizer() {
28  Clear();
29  }
30 
32  void PvlTokenizer::Clear() {
33  tokens.clear();
34  }
35 
48  void PvlTokenizer::Load(std::istream &stream, const QString &terminator) {
49  QString upTerminator(terminator.toUpper());
50  QString s;
51  int c;
52  bool newlineFound = false;
53 
54  while(true) {
55  newlineFound = SkipWhiteSpace(stream);
56  c = stream.peek();
57  ValidateCharacter(c);
58  if(c == EOF) return;
59 
60  if(c == '#') {
61  s = ReadComment(stream);
62  Isis::PvlToken t("_COMMENT_");
63  t.addValue(s);
64 
65  if(newlineFound || tokens.size() == 0 || tokens[tokens.size()-1].valueSize() == 0) {
66  // applies to next pvl item
67  tokens.push_back(t);
68  }
69  else {
70  // applies to previous pvl item
71  tokens.push_back(tokens[tokens.size()-1]);
72  tokens[tokens.size()-2] = t;
73  }
74 
75  continue;
76  }
77 
78  if(c == '/') {
79  c = stream.get();
80  c = stream.peek();
81  stream.unget();
82  ValidateCharacter(c);
83  if(c == '*') {
84  s = ReadComment(stream);
85  Isis::PvlToken t("_COMMENT_");
86  t.addValue(s);
87 
88  if(newlineFound || tokens.size() == 0 || tokens[tokens.size()-1].valueSize() == 0) {
89  // applies to next pvl item
90  tokens.push_back(t);
91  }
92  else {
93  // applies to previous pvl item
94  tokens.push_back(tokens[tokens.size()-1]);
95  tokens[tokens.size()-2] = t;
96  }
97 
98  continue;
99  }
100  }
101 
102  s = ReadToken(stream);
103  Isis::PvlToken t(s);
104 
105  if(t.keyUpper() == upTerminator) {
106  tokens.push_back(t);
107  return;
108  }
109 
110  SkipWhiteSpace(stream);
111  c = stream.peek();
112  ValidateCharacter(c);
113  if(c == EOF) {
114  tokens.push_back(t);
115  return;
116  }
117 
118  if(c != '=') {
119  tokens.push_back(t);
120  if(t.keyUpper() == upTerminator) return;
121  continue;
122  }
123 
124  stream.ignore();
125  SkipWhiteSpace(stream);
126 
127  c = stream.peek();
128  ValidateCharacter(c);
129  if(c == EOF) {
130  tokens.push_back(t);
131  return;
132  }
133 
134  if(c == '(') {
135  stream.ignore();
136  try {
137  s = ReadToParen(stream);
138  ParseCommaList(t, s);
139  }
140  catch(IException &e) {
141  QString message = Isis::Message::KeywordValueBad(t.key());
142  throw IException(e, IException::Unknown, message, _FILEINFO_);
143  }
144  tokens.push_back(t);
145  continue;
146  }
147 
148  if(c == '{') {
149  stream.ignore();
150  try {
151  s = ReadToBrace(stream);
152  ParseCommaList(t, s);
153  }
154  catch(IException &e) {
155  QString message = Isis::Message::KeywordValueBad(t.key());
156  throw IException(e, IException::Unknown, message, _FILEINFO_);
157  }
158  tokens.push_back(t);
159  continue;
160  }
161 
162  if(c == '"') {
163  stream.ignore();
164  try {
165  s = ReadToDoubleQuote(stream);
166  }
167  catch(IException &e) {
168  QString message = Isis::Message::KeywordValueBad(t.key());
169  throw IException(e, IException::Unknown, message, _FILEINFO_);
170  }
171  t.addValue(s);
172  tokens.push_back(t);
173  continue;
174  }
175 
176  if(c == '\'') {
177  stream.ignore();
178  try {
179  s = ReadToSingleQuote(stream);
180  }
181  catch(IException &e) {
182  QString message = Isis::Message::KeywordValueBad(t.key());
183  throw IException(IException::Unknown, message, _FILEINFO_);
184  }
185  t.addValue(s);
186  tokens.push_back(t);
187  continue;
188  }
189 
190 
191  s = ReadToken(stream);
192  t.addValue(s);
193  tokens.push_back(t);
194  continue;
195  }
196  }
197 
205  QString PvlTokenizer::ReadComment(std::istream &stream) {
206  QString s;
207  int c;
208 
209  c = stream.get();
210  while((c != '\r') && (c != '\n') && (c != '\0')) {
211  s += (char) c;
212  c = stream.peek();
213  ValidateCharacter(c);
214  if(c == EOF) return s;
215  c = stream.get();
216  }
217 
218  stream.unget();
219 
220  return s;
221  }
222 
233  QString PvlTokenizer::ReadToken(std::istream &stream) {
234  QString s;
235  int c;
236 
237  c = stream.get();
238  while((!isspace(c)) && (c != '\0') && (c != '=')) {
239  s += (char) c;
240  c = stream.peek();
241  ValidateCharacter(c);
242  if(c == EOF) return s;
243  c = stream.get();
244  }
245 
246  stream.unget();
247 
248  return s;
249  }
250 
257  bool PvlTokenizer::SkipWhiteSpace(std::istream &stream) {
258  bool foundNewline = false;
259  int c;
260 
261  c = stream.peek();
262  ValidateCharacter(c);
263  while((isspace(c)) || (c == '\0')) {
264  if(c == '\n') {
265  foundNewline = true;
266  }
267 
268  c = stream.get();
269  c = stream.peek();
270  ValidateCharacter(c);
271  }
272 
273  return foundNewline;
274  }
275 
276 
277  QString PvlTokenizer::ReadToDoubleQuote(std::istream &stream) {
278  QString s;
279  int c;
280 
281  do {
282  c = stream.get();
283  ValidateCharacter(c);
284  if(c == EOF) {
285  QString message = Isis::Message::MissingDelimiter('"', s);
286  throw IException(IException::Unknown, message, _FILEINFO_);
287  }
288  else if(c != '"') {
289  s += (char) c;
290  }
291  }
292  while(c != '"');
293 
294  int pos = s.indexOf(QRegExp("[\\n\\r]"));
295  while(pos != -1) {
296  QString first = s.mid(0, pos);
297  bool addspace = false;
298  if(first[pos-1] == ' ') addspace = true;
299  first = first.remove(QRegExp("[\\s]*$"));
300  QString second = s.mid(pos + 1);
301  if(second[0] == ' ') addspace = true;
302  if(second[0] == '\r') addspace = true;
303  if(second[0] == '\n') addspace = true;
304  second = second.remove(QRegExp("^[\\s]*"));
305  if(second[0] == ',') addspace = false;
306  s = first;
307  if(addspace) s += " ";
308  s += second;
309 
310  pos = s.indexOf(QRegExp("[\\n\\r]"));
311  }
312  return s;
313  }
314 
315  QString PvlTokenizer::ReadToSingleQuote(std::istream &stream) {
316  QString s;
317  int c;
318 
319  do {
320  c = stream.get();
321  ValidateCharacter(c);
322  if(c == EOF) {
323  QString message = Isis::Message::MissingDelimiter('\'', s);
324  throw IException(IException::Unknown, message, _FILEINFO_);
325  }
326  else if(c != '\'') {
327  s += (char) c;
328  }
329  }
330  while(c != '\'');
331 
332  int pos = s.indexOf(QRegExp("[\\n\\r]"));
333  while(pos != -1) {
334  QString first = s.mid(0, pos);
335  bool addspace = false;
336  if(first[pos-1] == ' ') addspace = true;
337  first = first.remove(QRegExp("[\\s]*$"));
338  QString second = s.mid(pos + 1);
339  if(second[0] == ' ') addspace = true;
340  if(second[0] == '\r') addspace = true;
341  if(second[0] == '\n') addspace = true;
342  second = second.remove(QRegExp("^[\\s]*"));
343  if(second[0] == ',') addspace = false;
344  s = first;
345  if(addspace) s += " ";
346  s += second;
347  pos = s.indexOf(QRegExp("[\\n\\r]"));
348  }
349 
350  return s;
351  }
352 
353  QString PvlTokenizer::ReadToParen(std::istream &stream) {
354  QString s;
355  int c;
356  int leftParenCount = 1;
357 
358  do {
359  c = stream.get();
360  ValidateCharacter(c);
361  if(c == EOF) {
362  QString message = Isis::Message::MissingDelimiter(')', s);
363  throw IException(IException::Unknown, message, _FILEINFO_);
364  }
365  else if(c == '"') {
366  try {
367  s += "\"" + ReadToDoubleQuote(stream) + "\"";
368  }
369  catch(IException &) {
370  QString message = Isis::Message::MissingDelimiter('"', s);
371  throw IException(IException::Unknown, message, _FILEINFO_);
372  }
373  }
374  else if(c == '\'') {
375  try {
376  s += "'" + ReadToSingleQuote(stream) + "'";
377  }
378  catch(IException &) {
379  QString message = Isis::Message::MissingDelimiter('\'', s);
380  throw IException(IException::Unknown, message, _FILEINFO_);
381  }
382  }
383  else if(c == ')') {
384  leftParenCount--;
385  if(leftParenCount > 0) s += (char) c;
386  }
387  else {
388  s += (char) c;
389  if(c == '(') leftParenCount++;
390  }
391  }
392  while(leftParenCount > 0);
393 
394  return s;
395  }
396 
397  QString PvlTokenizer::ReadToBrace(std::istream &stream) {
398  QString s;
399  int c;
400  int leftBraceCount = 1;
401 
402  do {
403  c = stream.get();
404  ValidateCharacter(c);
405  if(c == EOF) {
406  QString message = Isis::Message::MissingDelimiter('}', s);
407  throw IException(IException::Unknown, message, _FILEINFO_);
408  }
409  else if(c == '"') {
410  try {
411  s += "\"" + ReadToDoubleQuote(stream) + "\"";
412  }
413  catch(IException &e) {
414  QString message = Isis::Message::MissingDelimiter('"', s);
415  throw IException(IException::Unknown, message, _FILEINFO_);
416  }
417  }
418  else if(c == '\'') {
419  try {
420  s += "'" + ReadToSingleQuote(stream) + "'";
421  }
422  catch(IException &) {
423  QString message = Isis::Message::MissingDelimiter('\'', s);
424  throw IException(IException::Unknown, message, _FILEINFO_);
425  }
426  }
427  else if(c == '}') {
428  leftBraceCount--;
429  if(leftBraceCount > 0) s += (char) c;
430  }
431  else {
432  s += (char) c;
433  if(c == '{') leftBraceCount++;
434  }
435  }
436  while(leftBraceCount > 0);
437 
438  return s;
439  }
440 
449  void PvlTokenizer::ParseCommaList(Isis::PvlToken &t, const QString &cl) {
450  stringstream stream(cl.toLatin1().data());
451  int c;
452  QString s;
453 
454  do {
455  SkipWhiteSpace(stream);
456  c = stream.get();
457  if(c == '"') {
458  s += ReadToDoubleQuote(stream);
459  }
460  else if(c == '\'') {
461  s += ReadToSingleQuote(stream);
462  }
463  else if(c == '(') {
464  s += "(";
465  s += ReadToParen(stream);
466  s += ")";
467  }
468  else if(c == '{') {
469  s += "{";
470  s += ReadToBrace(stream);
471  s += "}";
472  }
473  else if(c == ',') {
474  t.addValue(s);
475  s.clear();
476  }
477  else if(c != EOF) {
478  s += (char) c;
479  }
480  }
481  while(c != EOF);
482 
483  t.addValue(s);
484  }
485 
486 
487  vector<Isis::PvlToken> & PvlTokenizer::GetTokenList() {
488  return tokens;
489  }
490 
496  void PvlTokenizer::ValidateCharacter(int c) {
497  if(c == EOF) return;
498  if(isprint(c)) return;
499  if(isspace(c)) return;
500  if(c == '\0') return;
501 
502  QString message = "ASCII data expected but found unprintable (binary) data";
503  throw IException(IException::Unknown, message, _FILEINFO_);
504  }
505 } // end namespace isis
Isis::Message::KeywordValueBad
QString KeywordValueBad(const QString &key)
This error should be used when a supplied keyword does not appear in the list (e.g....
Definition: KeywordValueBad.cpp:11
Isis::PvlToken::key
QString key() const
Returns the token keyword.
Definition: PvlToken.cpp:49
Isis::PvlToken
Container for Keyword-value pair.
Definition: PvlToken.h:38
Isis::IException
Isis exception class.
Definition: IException.h:91
std
Namespace for the standard library.
Isis::Message::MissingDelimiter
QString MissingDelimiter(const char delimiter)
This error should be used when a delimiter is missing.
Definition: MissingDelimiter.cpp:15
Isis::PvlToken::addValue
void addValue(const QString &v)
Adds a value to the value-vector.
Definition: PvlToken.cpp:82
Isis::PvlToken::keyUpper
QString keyUpper() const
Returns the token keyword in all uppercase characters.
Definition: PvlToken.cpp:58
Isis
This is free and unencumbered software released into the public domain.
Definition: Apollo.h:16