Isis 3 Programmer Reference
PvlTokenizer.cpp
1
6/* SPDX-License-Identifier: CC0-1.0 */
7#include "PvlTokenizer.h"
8
9#include <sstream>
10#include <fstream>
11
12#include <QDebug>
13
14#include "IException.h"
15#include "IString.h"
16#include "Message.h"
17
18using namespace std;
19namespace Isis {
20
25
30
33 tokens.clear();
34 }
35
48 void PvlTokenizer::Load(std::istream &stream, const QString &terminator) {
49 QString upTerminator(terminator.toUpper());
50 QString s;
51 int c;
52 bool newlineFound = false;
53
54 while(true) {
55 newlineFound = SkipWhiteSpace(stream);
56 c = stream.peek();
58 if(c == EOF) return;
59
60 if(c == '#') {
61 s = ReadComment(stream);
62 Isis::PvlToken t("_COMMENT_");
63 t.addValue(s);
64
65 if(newlineFound || tokens.size() == 0 || tokens[tokens.size()-1].valueSize() == 0) {
66 // applies to next pvl item
67 tokens.push_back(t);
68 }
69 else {
70 // applies to previous pvl item
71 tokens.push_back(tokens[tokens.size()-1]);
72 tokens[tokens.size()-2] = t;
73 }
74
75 continue;
76 }
77
78 if(c == '/') {
79 c = stream.get();
80 c = stream.peek();
81 stream.unget();
83 if(c == '*') {
84 s = ReadComment(stream);
85 Isis::PvlToken t("_COMMENT_");
86 t.addValue(s);
87
88 if(newlineFound || tokens.size() == 0 || tokens[tokens.size()-1].valueSize() == 0) {
89 // applies to next pvl item
90 tokens.push_back(t);
91 }
92 else {
93 // applies to previous pvl item
94 tokens.push_back(tokens[tokens.size()-1]);
95 tokens[tokens.size()-2] = t;
96 }
97
98 continue;
99 }
100 }
101
102 s = ReadToken(stream);
103 Isis::PvlToken t(s);
104
105 if(t.keyUpper() == upTerminator) {
106 tokens.push_back(t);
107 return;
108 }
109
110 SkipWhiteSpace(stream);
111 c = stream.peek();
113 if(c == EOF) {
114 tokens.push_back(t);
115 return;
116 }
117
118 if(c != '=') {
119 tokens.push_back(t);
120 if(t.keyUpper() == upTerminator) return;
121 continue;
122 }
123
124 stream.ignore();
125 SkipWhiteSpace(stream);
126
127 c = stream.peek();
129 if(c == EOF) {
130 tokens.push_back(t);
131 return;
132 }
133
134 if(c == '(') {
135 stream.ignore();
136 try {
137 s = ReadToParen(stream);
138 ParseCommaList(t, s);
139 }
140 catch(IException &e) {
141 QString message = Isis::Message::KeywordValueBad(t.key());
142 throw IException(e, IException::Unknown, message, _FILEINFO_);
143 }
144 tokens.push_back(t);
145 continue;
146 }
147
148 if(c == '{') {
149 stream.ignore();
150 try {
151 s = ReadToBrace(stream);
152 ParseCommaList(t, s);
153 }
154 catch(IException &e) {
155 QString message = Isis::Message::KeywordValueBad(t.key());
156 throw IException(e, IException::Unknown, message, _FILEINFO_);
157 }
158 tokens.push_back(t);
159 continue;
160 }
161
162 if(c == '"') {
163 stream.ignore();
164 try {
165 s = ReadToDoubleQuote(stream);
166 }
167 catch(IException &e) {
168 QString message = Isis::Message::KeywordValueBad(t.key());
169 throw IException(e, IException::Unknown, message, _FILEINFO_);
170 }
171 t.addValue(s);
172 tokens.push_back(t);
173 continue;
174 }
175
176 if(c == '\'') {
177 stream.ignore();
178 try {
179 s = ReadToSingleQuote(stream);
180 }
181 catch(IException &e) {
182 QString message = Isis::Message::KeywordValueBad(t.key());
183 throw IException(IException::Unknown, message, _FILEINFO_);
184 }
185 t.addValue(s);
186 tokens.push_back(t);
187 continue;
188 }
189
190
191 s = ReadToken(stream);
192 t.addValue(s);
193 tokens.push_back(t);
194 continue;
195 }
196 }
197
205 QString PvlTokenizer::ReadComment(std::istream &stream) {
206 QString s;
207 int c;
208
209 c = stream.get();
210 while((c != '\r') && (c != '\n') && (c != '\0')) {
211 s += (char) c;
212 c = stream.peek();
214 if(c == EOF) return s;
215 c = stream.get();
216 }
217
218 stream.unget();
219
220 return s;
221 }
222
233 QString PvlTokenizer::ReadToken(std::istream &stream) {
234 QString s;
235 int c;
236
237 c = stream.get();
238 while((!isspace(c)) && (c != '\0') && (c != '=')) {
239 s += (char) c;
240 c = stream.peek();
242 if(c == EOF) return s;
243 c = stream.get();
244 }
245
246 stream.unget();
247
248 return s;
249 }
250
257 bool PvlTokenizer::SkipWhiteSpace(std::istream &stream) {
258 bool foundNewline = false;
259 int c;
260
261 c = stream.peek();
263 while((isspace(c)) || (c == '\0')) {
264 if(c == '\n') {
265 foundNewline = true;
266 }
267
268 c = stream.get();
269 c = stream.peek();
271 }
272
273 return foundNewline;
274 }
275
276
277 QString PvlTokenizer::ReadToDoubleQuote(std::istream &stream) {
278 QString s;
279 int c;
280
281 do {
282 c = stream.get();
284 if(c == EOF) {
285 QString message = Isis::Message::MissingDelimiter('"', s);
286 throw IException(IException::Unknown, message, _FILEINFO_);
287 }
288 else if(c != '"') {
289 s += (char) c;
290 }
291 }
292 while(c != '"');
293
294 int pos = s.indexOf(QRegExp("[\\n\\r]"));
295 while(pos != -1) {
296 QString first = s.mid(0, pos);
297 bool addspace = false;
298 if(first[pos-1] == ' ') addspace = true;
299 first = first.remove(QRegExp("[\\s]*$"));
300 QString second = s.mid(pos + 1);
301 if(second[0] == ' ') addspace = true;
302 if(second[0] == '\r') addspace = true;
303 if(second[0] == '\n') addspace = true;
304 second = second.remove(QRegExp("^[\\s]*"));
305 if(second[0] == ',') addspace = false;
306 s = first;
307 if(addspace) s += " ";
308 s += second;
309
310 pos = s.indexOf(QRegExp("[\\n\\r]"));
311 }
312 return s;
313 }
314
315 QString PvlTokenizer::ReadToSingleQuote(std::istream &stream) {
316 QString s;
317 int c;
318
319 do {
320 c = stream.get();
322 if(c == EOF) {
323 QString message = Isis::Message::MissingDelimiter('\'', s);
324 throw IException(IException::Unknown, message, _FILEINFO_);
325 }
326 else if(c != '\'') {
327 s += (char) c;
328 }
329 }
330 while(c != '\'');
331
332 int pos = s.indexOf(QRegExp("[\\n\\r]"));
333 while(pos != -1) {
334 QString first = s.mid(0, pos);
335 bool addspace = false;
336 if(first[pos-1] == ' ') addspace = true;
337 first = first.remove(QRegExp("[\\s]*$"));
338 QString second = s.mid(pos + 1);
339 if(second[0] == ' ') addspace = true;
340 if(second[0] == '\r') addspace = true;
341 if(second[0] == '\n') addspace = true;
342 second = second.remove(QRegExp("^[\\s]*"));
343 if(second[0] == ',') addspace = false;
344 s = first;
345 if(addspace) s += " ";
346 s += second;
347 pos = s.indexOf(QRegExp("[\\n\\r]"));
348 }
349
350 return s;
351 }
352
353 QString PvlTokenizer::ReadToParen(std::istream &stream) {
354 QString s;
355 int c;
356 int leftParenCount = 1;
357
358 do {
359 c = stream.get();
361 if(c == EOF) {
362 QString message = Isis::Message::MissingDelimiter(')', s);
363 throw IException(IException::Unknown, message, _FILEINFO_);
364 }
365 else if(c == '"') {
366 try {
367 s += "\"" + ReadToDoubleQuote(stream) + "\"";
368 }
369 catch(IException &) {
370 QString message = Isis::Message::MissingDelimiter('"', s);
371 throw IException(IException::Unknown, message, _FILEINFO_);
372 }
373 }
374 else if(c == '\'') {
375 try {
376 s += "'" + ReadToSingleQuote(stream) + "'";
377 }
378 catch(IException &) {
379 QString message = Isis::Message::MissingDelimiter('\'', s);
380 throw IException(IException::Unknown, message, _FILEINFO_);
381 }
382 }
383 else if(c == ')') {
384 leftParenCount--;
385 if(leftParenCount > 0) s += (char) c;
386 }
387 else {
388 s += (char) c;
389 if(c == '(') leftParenCount++;
390 }
391 }
392 while(leftParenCount > 0);
393
394 return s;
395 }
396
397 QString PvlTokenizer::ReadToBrace(std::istream &stream) {
398 QString s;
399 int c;
400 int leftBraceCount = 1;
401
402 do {
403 c = stream.get();
405 if(c == EOF) {
406 QString message = Isis::Message::MissingDelimiter('}', s);
407 throw IException(IException::Unknown, message, _FILEINFO_);
408 }
409 else if(c == '"') {
410 try {
411 s += "\"" + ReadToDoubleQuote(stream) + "\"";
412 }
413 catch(IException &e) {
414 QString message = Isis::Message::MissingDelimiter('"', s);
415 throw IException(IException::Unknown, message, _FILEINFO_);
416 }
417 }
418 else if(c == '\'') {
419 try {
420 s += "'" + ReadToSingleQuote(stream) + "'";
421 }
422 catch(IException &) {
423 QString message = Isis::Message::MissingDelimiter('\'', s);
424 throw IException(IException::Unknown, message, _FILEINFO_);
425 }
426 }
427 else if(c == '}') {
428 leftBraceCount--;
429 if(leftBraceCount > 0) s += (char) c;
430 }
431 else {
432 s += (char) c;
433 if(c == '{') leftBraceCount++;
434 }
435 }
436 while(leftBraceCount > 0);
437
438 return s;
439 }
440
449 void PvlTokenizer::ParseCommaList(Isis::PvlToken &t, const QString &cl) {
450 stringstream stream(cl.toLatin1().data());
451 int c;
452 QString s;
453
454 do {
455 SkipWhiteSpace(stream);
456 c = stream.get();
457 if(c == '"') {
458 s += ReadToDoubleQuote(stream);
459 }
460 else if(c == '\'') {
461 s += ReadToSingleQuote(stream);
462 }
463 else if(c == '(') {
464 s += "(";
465 s += ReadToParen(stream);
466 s += ")";
467 }
468 else if(c == '{') {
469 s += "{";
470 s += ReadToBrace(stream);
471 s += "}";
472 }
473 else if(c == ',') {
474 t.addValue(s);
475 s.clear();
476 }
477 else if(c != EOF) {
478 s += (char) c;
479 }
480 }
481 while(c != EOF);
482
483 t.addValue(s);
484 }
485
486
487 vector<Isis::PvlToken> & PvlTokenizer::GetTokenList() {
488 return tokens;
489 }
490
497 if(c == EOF) return;
498 if(isprint(c)) return;
499 if(isspace(c)) return;
500 if(c == '\0') return;
501
502 QString message = "ASCII data expected but found unprintable (binary) data";
503 throw IException(IException::Unknown, message, _FILEINFO_);
504 }
505} // end namespace isis
Isis exception class.
Definition IException.h:91
@ Unknown
A type of error that cannot be classified as any of the other error types.
Definition IException.h:118
Container for Keyword-value pair.
Definition PvlToken.h:38
bool SkipWhiteSpace(std::istream &stream)
Skips over whitespace so long as it is not inside quotes.
void Clear()
Empties the token list.
~PvlTokenizer()
Destroys the Tokenizer object and token list.
QString ReadToken(std::istream &stream)
Reads and returns a token from the stream.
QString ReadComment(std::istream &stream)
Reads and returns a comment from the stream.
std::vector< Isis::PvlToken > tokens
The array of Tokens parse out of the stream.
void ValidateCharacter(int c)
Make sure a character is valid printable (non-control) character.
void ParseCommaList(Isis::PvlToken &t, const QString &cl)
This routine parses a QString containing a comma separated list.
PvlTokenizer()
Constructs a Tokenizer with an empty token list.
void Load(std::istream &stream, const QString &terminator="END")
Loads the Token list from a stream.
QString MissingDelimiter(const char delimiter)
This error should be used when a delimiter is missing.
QString KeywordValueBad(const QString &key)
This error should be used when a supplied keyword does not appear in the list (e.g....
This is free and unencumbered software released into the public domain.
Definition Apollo.h:16
Namespace for the standard library.