1 2 // Copyright Ferdinand Majerech 2011-2014. 3 // Distributed under the Boost Software License, Version 1.0. 4 // (See accompanying file LICENSE_1_0.txt or copy at 5 // http://www.boost.org/LICENSE_1_0.txt) 6 7 /// YAML scanner. 8 /// Code based on PyYAML: http://www.pyyaml.org 9 module dyaml.scanner; 10 11 12 import core.stdc.string; 13 14 import std.algorithm; 15 import std.array; 16 import std.conv; 17 import std.ascii : isAlphaNum, isDigit, isHexDigit; 18 import std.exception; 19 import std.string; 20 import std.typecons; 21 import std.traits : Unqual; 22 import std.utf; 23 24 import dyaml.escapes; 25 import dyaml.exception; 26 import dyaml.queue; 27 import dyaml.reader; 28 import dyaml.style; 29 import dyaml.token; 30 31 package: 32 /// Scanner produces tokens of the following types: 33 /// STREAM-START 34 /// STREAM-END 35 /// DIRECTIVE(name, value) 36 /// DOCUMENT-START 37 /// DOCUMENT-END 38 /// BLOCK-SEQUENCE-START 39 /// BLOCK-MAPPING-START 40 /// BLOCK-END 41 /// FLOW-SEQUENCE-START 42 /// FLOW-MAPPING-START 43 /// FLOW-SEQUENCE-END 44 /// FLOW-MAPPING-END 45 /// BLOCK-ENTRY 46 /// FLOW-ENTRY 47 /// KEY 48 /// VALUE 49 /// ALIAS(value) 50 /// ANCHOR(value) 51 /// TAG(value) 52 /// SCALAR(value, plain, style) 53 54 alias isBreak = among!('\0', '\n', '\r', '\u0085', '\u2028', '\u2029'); 55 56 alias isBreakOrSpace = among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'); 57 58 alias isWhiteSpace = among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'); 59 60 alias isNonLinebreakWhitespace = among!(' ', '\t'); 61 62 alias isNonScalarStartCharacter = among!('-', '?', ':', ',', '[', ']', '{', '}', 63 '#', '&', '*', '!', '|', '>', '\'', '"', '%', '@', '`', ' ', '\t', '\0', '\n', 64 '\r', '\u0085', '\u2028', '\u2029'); 65 66 alias isURIChar = among!('-', ';', '/', '?', ':', '@', '&', '=', '+', '$', ',', 67 '_', '.', '!', '~', '*', '\'', '(', ')', '[', ']', '%'); 68 69 alias isNSChar = among!(' ', '\n', '\r', '\u0085', '\u2028', '\u2029'); 70 71 alias isBChar = among!('\n', '\r', '\u0085', '\u2028', '\u2029'); 72 73 alias isFlowScalarBreakSpace = among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029', '\'', '"', '\\'); 74 75 alias isNSAnchorName = c => !c.isWhiteSpace && !c.among!('[', ']', '{', '}', ',', '\uFEFF'); 76 77 /// Generates tokens from data provided by a Reader. 78 struct Scanner 79 { 80 private: 81 /// A simple key is a key that is not denoted by the '?' indicator. 82 /// For example: 83 /// --- 84 /// block simple key: value 85 /// ? not a simple key: 86 /// : { flow simple key: value } 87 /// We emit the KEY token before all keys, so when we find a potential simple 88 /// key, we try to locate the corresponding ':' indicator. Simple keys should be 89 /// limited to a single line and 1024 characters. 90 static struct SimpleKey 91 { 92 /// Position of the key 93 Mark mark; 94 /// Index of the key token from start (first token scanned being 0). 95 uint tokenIndex; 96 /// Is this required to be a simple key? 97 bool required; 98 /// Is this struct "null" (invalid)?. 99 bool isNull; 100 } 101 102 /// Block chomping types. 103 enum Chomping 104 { 105 /// Strip all trailing line breaks. '-' indicator. 106 strip, 107 /// Line break of the last line is preserved, others discarded. Default. 108 clip, 109 /// All trailing line breaks are preserved. '+' indicator. 110 keep 111 } 112 113 /// Reader used to read from a file/stream. 114 Reader reader_; 115 /// Are we done scanning? 116 bool done_; 117 118 /// Level of nesting in flow context. If 0, we're in block context. 119 uint flowLevel_; 120 /// Current indentation level. 121 int indent_ = -1; 122 /// Past indentation levels. Used as a stack. 123 Appender!(int[]) indents_; 124 125 /// Processed tokens not yet emitted. Used as a queue. 126 Queue!Token tokens_; 127 128 /// Number of tokens emitted through the getToken method. 129 uint tokensTaken_; 130 131 /// Can a simple key start at the current position? A simple key may start: 132 /// - at the beginning of the line, not counting indentation spaces 133 /// (in block context), 134 /// - after '{', '[', ',' (in the flow context), 135 /// - after '?', ':', '-' (in the block context). 136 /// In the block context, this flag also signifies if a block collection 137 /// may start at the current position. 138 bool allowSimpleKey_ = true; 139 140 /// Possible simple keys indexed by flow levels. 141 SimpleKey[] possibleSimpleKeys_; 142 143 public: 144 /// Construct a Scanner using specified Reader. 145 this(Reader reader) @safe nothrow 146 { 147 // Return the next token, but do not delete it from the queue 148 reader_ = reader; 149 fetchStreamStart(); 150 } 151 152 /// Advance to the next token 153 void popFront() @safe 154 { 155 ++tokensTaken_; 156 tokens_.pop(); 157 } 158 159 /// Return the current token 160 const(Token) front() @safe 161 { 162 enforce(!empty, "No token left to peek"); 163 return tokens_.peek(); 164 } 165 166 /// Return whether there are any more tokens left. 167 bool empty() @safe 168 { 169 while (needMoreTokens()) 170 { 171 fetchToken(); 172 } 173 return tokens_.empty; 174 } 175 176 /// Set file name. 177 ref inout(string) name() inout @safe return pure nothrow @nogc 178 { 179 return reader_.name; 180 } 181 /// Get a mark from the current reader position 182 Mark mark() const @safe pure nothrow @nogc 183 { 184 return reader_.mark; 185 } 186 187 private: 188 /// Most scanning error messages have the same format; so build them with this 189 /// function. 190 string expected(T)(string expected, T found) 191 { 192 return text(expected, ", but found ", found); 193 } 194 195 /// Determine whether or not we need to fetch more tokens before peeking/getting a token. 196 bool needMoreTokens() @safe pure 197 { 198 if(done_) { return false; } 199 if(tokens_.empty) { return true; } 200 201 /// The current token may be a potential simple key, so we need to look further. 202 stalePossibleSimpleKeys(); 203 return nextPossibleSimpleKey() == tokensTaken_; 204 } 205 206 /// Fetch at token, adding it to tokens_. 207 void fetchToken() @safe 208 { 209 // Eat whitespaces and comments until we reach the next token. 210 scanToNextToken(); 211 212 // Remove obsolete possible simple keys. 213 stalePossibleSimpleKeys(); 214 215 // Compare current indentation and column. It may add some tokens 216 // and decrease the current indentation level. 217 unwindIndent(reader_.column); 218 219 // Get the next character. 220 const dchar c = reader_.peekByte(); 221 222 // Fetch the token. 223 if(c == '\0') { return fetchStreamEnd(); } 224 if(checkDirective()) { return fetchDirective(); } 225 if(checkDocumentStart()) { return fetchDocumentStart(); } 226 if(checkDocumentEnd()) { return fetchDocumentEnd(); } 227 // Order of the following checks is NOT significant. 228 switch(c) 229 { 230 case '[': return fetchFlowSequenceStart(); 231 case '{': return fetchFlowMappingStart(); 232 case ']': return fetchFlowSequenceEnd(); 233 case '}': return fetchFlowMappingEnd(); 234 case ',': return fetchFlowEntry(); 235 case '!': return fetchTag(); 236 case '\'': return fetchSingle(); 237 case '\"': return fetchDouble(); 238 case '*': return fetchAlias(); 239 case '&': return fetchAnchor(); 240 case '?': if(checkKey()) { return fetchKey(); } goto default; 241 case ':': if(checkValue()) { return fetchValue(); } goto default; 242 case '-': if(checkBlockEntry()) { return fetchBlockEntry(); } goto default; 243 case '|': if(flowLevel_ == 0) { return fetchLiteral(); } break; 244 case '>': if(flowLevel_ == 0) { return fetchFolded(); } break; 245 default: if(checkPlain()) { return fetchPlain(); } 246 } 247 248 throw new ScannerException("While scanning for the next token, found character " ~ 249 "\'%s\', index %s that cannot start any token" 250 .format(c, to!int(c)), reader_.mark); 251 } 252 253 254 /// Return the token number of the nearest possible simple key. 255 uint nextPossibleSimpleKey() @safe pure nothrow @nogc 256 { 257 uint minTokenNumber = uint.max; 258 foreach(k, ref simpleKey; possibleSimpleKeys_) 259 { 260 if(simpleKey.isNull) { continue; } 261 minTokenNumber = min(minTokenNumber, simpleKey.tokenIndex); 262 } 263 return minTokenNumber; 264 } 265 266 /// Remove entries that are no longer possible simple keys. 267 /// 268 /// According to the YAML specification, simple keys 269 /// - should be limited to a single line, 270 /// - should be no longer than 1024 characters. 271 /// Disabling this will allow simple keys of any length and 272 /// height (may cause problems if indentation is broken though). 273 void stalePossibleSimpleKeys() @safe pure 274 { 275 foreach(level, ref key; possibleSimpleKeys_) 276 { 277 if(key.isNull) { continue; } 278 if(key.mark.line != reader_.mark.line || reader_.mark.column - key.mark.column > 1024) 279 { 280 enforce(!key.required, 281 new ScannerException("While scanning a simple key, could not find expected ':'", 282 reader_.mark, "key started here", key.mark)); 283 key.isNull = true; 284 } 285 } 286 } 287 288 /// Check if the next token starts a possible simple key and if so, save its position. 289 /// 290 /// This function is called for ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'. 291 void savePossibleSimpleKey() @safe pure 292 { 293 // Check if a simple key is required at the current position. 294 const required = (flowLevel_ == 0 && indent_ == reader_.column); 295 assert(allowSimpleKey_ || !required, "A simple key is required only if it is " ~ 296 "the first token in the current line. Therefore it is always allowed."); 297 298 if(!allowSimpleKey_) { return; } 299 300 // The next token might be a simple key, so save its number and position. 301 removePossibleSimpleKey(); 302 const tokenCount = tokensTaken_ + cast(uint)tokens_.length; 303 304 const line = reader_.line; 305 const column = reader_.column; 306 const key = SimpleKey(reader_.mark, tokenCount, required); 307 308 if(possibleSimpleKeys_.length <= flowLevel_) 309 { 310 const oldLength = possibleSimpleKeys_.length; 311 possibleSimpleKeys_.length = flowLevel_ + 1; 312 // Make sure all the empty keys are null 313 foreach (ref emptyKey; possibleSimpleKeys_[oldLength .. flowLevel_]) 314 { 315 emptyKey.isNull = true; 316 } 317 } 318 possibleSimpleKeys_[flowLevel_] = key; 319 } 320 321 /// Remove the saved possible key position at the current flow level. 322 void removePossibleSimpleKey() @safe pure 323 { 324 if(possibleSimpleKeys_.length <= flowLevel_) { return; } 325 326 if(!possibleSimpleKeys_[flowLevel_].isNull) 327 { 328 const key = possibleSimpleKeys_[flowLevel_]; 329 enforce(!key.required, 330 new ScannerException("While scanning a simple key, could not find expected ':'", 331 reader_.mark, "key started here", key.mark)); 332 possibleSimpleKeys_[flowLevel_].isNull = true; 333 } 334 } 335 336 /// Decrease indentation, removing entries in indents_. 337 /// 338 /// Params: column = Current column in the file/stream. 339 void unwindIndent(const int column) @safe 340 { 341 if(flowLevel_ > 0) 342 { 343 // In flow context, tokens should respect indentation. 344 // The condition should be `indent >= column` according to the spec. 345 // But this condition will prohibit intuitively correct 346 // constructions such as 347 // key : { 348 // } 349 350 // In the flow context, indentation is ignored. We make the scanner less 351 // restrictive than what the specification requires. 352 // if(pedantic_ && flowLevel_ > 0 && indent_ > column) 353 // { 354 // throw new ScannerException("Invalid intendation or unclosed '[' or '{'", 355 // reader_.mark) 356 // } 357 return; 358 } 359 360 // In block context, we may need to issue the BLOCK-END tokens. 361 while(indent_ > column) 362 { 363 indent_ = indents_.data.back; 364 assert(indents_.data.length); 365 indents_.shrinkTo(indents_.data.length - 1); 366 tokens_.push(blockEndToken(reader_.mark, reader_.mark)); 367 } 368 } 369 370 /// Increase indentation if needed. 371 /// 372 /// Params: column = Current column in the file/stream. 373 /// 374 /// Returns: true if the indentation was increased, false otherwise. 375 bool addIndent(int column) @safe 376 { 377 if(indent_ >= column){return false;} 378 indents_ ~= indent_; 379 indent_ = column; 380 return true; 381 } 382 383 384 /// Add STREAM-START token. 385 void fetchStreamStart() @safe nothrow 386 { 387 tokens_.push(streamStartToken(reader_.mark, reader_.mark, reader_.encoding)); 388 } 389 390 ///Add STREAM-END token. 391 void fetchStreamEnd() @safe 392 { 393 //Set intendation to -1 . 394 unwindIndent(-1); 395 removePossibleSimpleKey(); 396 allowSimpleKey_ = false; 397 possibleSimpleKeys_.destroy; 398 399 tokens_.push(streamEndToken(reader_.mark, reader_.mark)); 400 done_ = true; 401 } 402 403 /// Add DIRECTIVE token. 404 void fetchDirective() @safe 405 { 406 // Set intendation to -1 . 407 unwindIndent(-1); 408 // Reset simple keys. 409 removePossibleSimpleKey(); 410 allowSimpleKey_ = false; 411 412 auto directive = scanDirective(); 413 tokens_.push(directive); 414 } 415 416 /// Add DOCUMENT-START or DOCUMENT-END token. 417 void fetchDocumentIndicator(TokenID id)() 418 if(id == TokenID.documentStart || id == TokenID.documentEnd) 419 { 420 // Set indentation to -1 . 421 unwindIndent(-1); 422 // Reset simple keys. Note that there can't be a block collection after '---'. 423 removePossibleSimpleKey(); 424 allowSimpleKey_ = false; 425 426 Mark startMark = reader_.mark; 427 reader_.forward(3); 428 tokens_.push(simpleToken!id(startMark, reader_.mark)); 429 } 430 431 /// Aliases to add DOCUMENT-START or DOCUMENT-END token. 432 alias fetchDocumentStart = fetchDocumentIndicator!(TokenID.documentStart); 433 alias fetchDocumentEnd = fetchDocumentIndicator!(TokenID.documentEnd); 434 435 /// Add FLOW-SEQUENCE-START or FLOW-MAPPING-START token. 436 void fetchFlowCollectionStart(TokenID id)() @safe 437 { 438 // '[' and '{' may start a simple key. 439 savePossibleSimpleKey(); 440 // Simple keys are allowed after '[' and '{'. 441 allowSimpleKey_ = true; 442 ++flowLevel_; 443 444 Mark startMark = reader_.mark; 445 reader_.forward(); 446 tokens_.push(simpleToken!id(startMark, reader_.mark)); 447 } 448 449 /// Aliases to add FLOW-SEQUENCE-START or FLOW-MAPPING-START token. 450 alias fetchFlowSequenceStart = fetchFlowCollectionStart!(TokenID.flowSequenceStart); 451 alias fetchFlowMappingStart = fetchFlowCollectionStart!(TokenID.flowMappingStart); 452 453 /// Add FLOW-SEQUENCE-START or FLOW-MAPPING-START token. 454 void fetchFlowCollectionEnd(TokenID id)() 455 { 456 // Reset possible simple key on the current level. 457 removePossibleSimpleKey(); 458 // No simple keys after ']' and '}'. 459 allowSimpleKey_ = false; 460 --flowLevel_; 461 462 Mark startMark = reader_.mark; 463 reader_.forward(); 464 tokens_.push(simpleToken!id(startMark, reader_.mark)); 465 } 466 467 /// Aliases to add FLOW-SEQUENCE-START or FLOW-MAPPING-START token/ 468 alias fetchFlowSequenceEnd = fetchFlowCollectionEnd!(TokenID.flowSequenceEnd); 469 alias fetchFlowMappingEnd = fetchFlowCollectionEnd!(TokenID.flowMappingEnd); 470 471 /// Add FLOW-ENTRY token; 472 void fetchFlowEntry() @safe 473 { 474 // Reset possible simple key on the current level. 475 removePossibleSimpleKey(); 476 // Simple keys are allowed after ','. 477 allowSimpleKey_ = true; 478 479 Mark startMark = reader_.mark; 480 reader_.forward(); 481 tokens_.push(flowEntryToken(startMark, reader_.mark)); 482 } 483 484 /// Additional checks used in block context in fetchBlockEntry and fetchKey. 485 /// 486 /// Params: type = String representing the token type we might need to add. 487 /// id = Token type we might need to add. 488 void blockChecks(string type, TokenID id)() 489 { 490 enum context = type ~ " keys are not allowed here"; 491 // Are we allowed to start a key (not neccesarily a simple one)? 492 enforce(allowSimpleKey_, new ScannerException(context, reader_.mark)); 493 494 if(addIndent(reader_.column)) 495 { 496 tokens_.push(simpleToken!id(reader_.mark, reader_.mark)); 497 } 498 } 499 500 /// Add BLOCK-ENTRY token. Might add BLOCK-SEQUENCE-START in the process. 501 void fetchBlockEntry() @safe 502 { 503 if(flowLevel_ == 0) { blockChecks!("Sequence", TokenID.blockSequenceStart)(); } 504 505 // It's an error for the block entry to occur in the flow context, 506 // but we let the parser detect this. 507 508 // Reset possible simple key on the current level. 509 removePossibleSimpleKey(); 510 // Simple keys are allowed after '-'. 511 allowSimpleKey_ = true; 512 513 Mark startMark = reader_.mark; 514 reader_.forward(); 515 tokens_.push(blockEntryToken(startMark, reader_.mark)); 516 } 517 518 /// Add KEY token. Might add BLOCK-MAPPING-START in the process. 519 void fetchKey() @safe 520 { 521 if(flowLevel_ == 0) { blockChecks!("Mapping", TokenID.blockMappingStart)(); } 522 523 // Reset possible simple key on the current level. 524 removePossibleSimpleKey(); 525 // Simple keys are allowed after '?' in the block context. 526 allowSimpleKey_ = (flowLevel_ == 0); 527 528 Mark startMark = reader_.mark; 529 reader_.forward(); 530 tokens_.push(keyToken(startMark, reader_.mark)); 531 } 532 533 /// Add VALUE token. Might add KEY and/or BLOCK-MAPPING-START in the process. 534 void fetchValue() @safe 535 { 536 //Do we determine a simple key? 537 if(possibleSimpleKeys_.length > flowLevel_ && 538 !possibleSimpleKeys_[flowLevel_].isNull) 539 { 540 const key = possibleSimpleKeys_[flowLevel_]; 541 assert(key.tokenIndex >= tokensTaken_); 542 543 possibleSimpleKeys_[flowLevel_].isNull = true; 544 Mark keyMark = key.mark; 545 const idx = key.tokenIndex - tokensTaken_; 546 547 // Add KEY. 548 // Manually inserting since tokens are immutable (need linked list). 549 tokens_.insert(keyToken(keyMark, keyMark), idx); 550 551 // If this key starts a new block mapping, we need to add BLOCK-MAPPING-START. 552 if(flowLevel_ == 0 && addIndent(key.mark.column)) 553 { 554 tokens_.insert(blockMappingStartToken(keyMark, keyMark), idx); 555 } 556 557 // There cannot be two simple keys in a row. 558 allowSimpleKey_ = false; 559 } 560 // Part of a complex key 561 else 562 { 563 // We can start a complex value if and only if we can start a simple key. 564 enforce(flowLevel_ > 0 || allowSimpleKey_, 565 new ScannerException("Mapping values are not allowed here", reader_.mark)); 566 567 // If this value starts a new block mapping, we need to add 568 // BLOCK-MAPPING-START. It'll be detected as an error later by the parser. 569 if(flowLevel_ == 0 && addIndent(reader_.column)) 570 { 571 tokens_.push(blockMappingStartToken(reader_.mark, reader_.mark)); 572 } 573 574 // Reset possible simple key on the current level. 575 removePossibleSimpleKey(); 576 // Simple keys are allowed after ':' in the block context. 577 allowSimpleKey_ = (flowLevel_ == 0); 578 } 579 580 // Add VALUE. 581 Mark startMark = reader_.mark; 582 reader_.forward(); 583 tokens_.push(valueToken(startMark, reader_.mark)); 584 } 585 586 /// Add ALIAS or ANCHOR token. 587 void fetchAnchor_(TokenID id)() @safe 588 if(id == TokenID.alias_ || id == TokenID.anchor) 589 { 590 // ALIAS/ANCHOR could be a simple key. 591 savePossibleSimpleKey(); 592 // No simple keys after ALIAS/ANCHOR. 593 allowSimpleKey_ = false; 594 595 auto anchor = scanAnchor(id); 596 tokens_.push(anchor); 597 } 598 599 /// Aliases to add ALIAS or ANCHOR token. 600 alias fetchAlias = fetchAnchor_!(TokenID.alias_); 601 alias fetchAnchor = fetchAnchor_!(TokenID.anchor); 602 603 /// Add TAG token. 604 void fetchTag() @safe 605 { 606 //TAG could start a simple key. 607 savePossibleSimpleKey(); 608 //No simple keys after TAG. 609 allowSimpleKey_ = false; 610 611 tokens_.push(scanTag()); 612 } 613 614 /// Add block SCALAR token. 615 void fetchBlockScalar(ScalarStyle style)() @safe 616 if(style == ScalarStyle.literal || style == ScalarStyle.folded) 617 { 618 // Reset possible simple key on the current level. 619 removePossibleSimpleKey(); 620 // A simple key may follow a block scalar. 621 allowSimpleKey_ = true; 622 623 auto blockScalar = scanBlockScalar(style); 624 tokens_.push(blockScalar); 625 } 626 627 /// Aliases to add literal or folded block scalar. 628 alias fetchLiteral = fetchBlockScalar!(ScalarStyle.literal); 629 alias fetchFolded = fetchBlockScalar!(ScalarStyle.folded); 630 631 /// Add quoted flow SCALAR token. 632 void fetchFlowScalar(ScalarStyle quotes)() 633 { 634 // A flow scalar could be a simple key. 635 savePossibleSimpleKey(); 636 // No simple keys after flow scalars. 637 allowSimpleKey_ = false; 638 639 // Scan and add SCALAR. 640 auto scalar = scanFlowScalar(quotes); 641 tokens_.push(scalar); 642 } 643 644 /// Aliases to add single or double quoted block scalar. 645 alias fetchSingle = fetchFlowScalar!(ScalarStyle.singleQuoted); 646 alias fetchDouble = fetchFlowScalar!(ScalarStyle.doubleQuoted); 647 648 /// Add plain SCALAR token. 649 void fetchPlain() @safe 650 { 651 // A plain scalar could be a simple key 652 savePossibleSimpleKey(); 653 // No simple keys after plain scalars. But note that scanPlain() will 654 // change this flag if the scan is finished at the beginning of the line. 655 allowSimpleKey_ = false; 656 auto plain = scanPlain(); 657 658 // Scan and add SCALAR. May change allowSimpleKey_ 659 tokens_.push(plain); 660 } 661 662 pure: 663 664 ///Check if the next token is DIRECTIVE: ^ '%' ... 665 bool checkDirective() @safe 666 { 667 return reader_.peekByte() == '%' && reader_.column == 0; 668 } 669 670 /// Check if the next token is DOCUMENT-START: ^ '---' (' '|'\n') 671 bool checkDocumentStart() @safe 672 { 673 // Check one char first, then all 3, to prevent reading outside the buffer. 674 return reader_.column == 0 && 675 reader_.peekByte() == '-' && 676 reader_.prefix(3) == "---" && 677 reader_.peek(3).isWhiteSpace; 678 } 679 680 /// Check if the next token is DOCUMENT-END: ^ '...' (' '|'\n') 681 bool checkDocumentEnd() @safe 682 { 683 // Check one char first, then all 3, to prevent reading outside the buffer. 684 return reader_.column == 0 && 685 reader_.peekByte() == '.' && 686 reader_.prefix(3) == "..." && 687 reader_.peek(3).isWhiteSpace; 688 } 689 690 /// Check if the next token is BLOCK-ENTRY: '-' (' '|'\n') 691 bool checkBlockEntry() @safe 692 { 693 return !!reader_.peek(1).isWhiteSpace; 694 } 695 696 /// Check if the next token is KEY(flow context): '?' 697 /// 698 /// or KEY(block context): '?' (' '|'\n') 699 bool checkKey() @safe 700 { 701 return (flowLevel_ > 0 || reader_.peek(1).isWhiteSpace); 702 } 703 704 /// Check if the next token is VALUE(flow context): ':' 705 /// 706 /// or VALUE(block context): ':' (' '|'\n') 707 bool checkValue() @safe 708 { 709 return flowLevel_ > 0 || reader_.peek(1).isWhiteSpace; 710 } 711 712 /// Check if the next token is a plain scalar. 713 /// 714 /// A plain scalar may start with any non-space character except: 715 /// '-', '?', ':', ',', '[', ']', '{', '}', 716 /// '#', '&', '*', '!', '|', '>', '\'', '\"', 717 /// '%', '@', '`'. 718 /// 719 /// It may also start with 720 /// '-', '?', ':' 721 /// if it is followed by a non-space character. 722 /// 723 /// Note that we limit the last rule to the block context (except the 724 /// '-' character) because we want the flow context to be space 725 /// independent. 726 bool checkPlain() @safe 727 { 728 const c = reader_.peek(); 729 if(!c.isNonScalarStartCharacter) 730 { 731 return true; 732 } 733 return !reader_.peek(1).isWhiteSpace && 734 (c == '-' || (flowLevel_ == 0 && (c == '?' || c == ':'))); 735 } 736 737 /// Move to the next non-space character. 738 void findNextNonSpace() @safe 739 { 740 while(reader_.peekByte() == ' ') { reader_.forward(); } 741 } 742 743 /// Scan a string of alphanumeric or "-_" characters. 744 /// 745 /// Assumes that the caller is building a slice in Reader, and puts the scanned 746 /// characters into that slice. 747 void scanAlphaNumericToSlice(string name)(ref char[] slice, const Mark startMark) 748 { 749 size_t length; 750 dchar c = reader_.peek(); 751 while(c.isAlphaNum || c.among!('-', '_')) { c = reader_.peek(++length); } 752 753 enforce(length > 0, new ScannerException(expected("While scanning a " ~ name ~ ", expected alphanumeric, '-' or '_'", c), 754 reader_.mark, name~" started here", startMark)); 755 756 slice ~= reader_.get(length); 757 } 758 759 /// Scan a string. 760 /// 761 /// Assumes that the caller is building a slice in Reader, and puts the scanned 762 /// characters into that slice. 763 char[] readAnchorAlias(const Mark startMark) @safe 764 { 765 size_t length; 766 dchar c = reader_.peek(); 767 while (c.isNSAnchorName) 768 { 769 c = reader_.peek(++length); 770 } 771 772 enforce(length > 0, new ScannerException( 773 expected("While scanning an anchor or alias, expected a printable character besides '[', ']', '{', '}' and ','", c), 774 reader_.mark, "started here", startMark)); 775 776 return reader_.get(length); 777 } 778 779 /// Scan and throw away all characters until next line break. 780 void scanToNextBreak() @safe 781 { 782 while(!reader_.peek().isBreak) { reader_.forward(); } 783 } 784 785 /// Scan all characters until next line break. 786 /// 787 /// Assumes that the caller is building a slice in Reader, and puts the scanned 788 /// characters into that slice. 789 void scanToNextBreakToSlice(ref char[] slice) @safe 790 { 791 uint length; 792 while(!reader_.peek(length).isBreak) 793 { 794 ++length; 795 } 796 slice ~= reader_.get(length); 797 } 798 799 800 /// Move to next token in the file/stream. 801 /// 802 /// We ignore spaces, line breaks and comments. 803 /// If we find a line break in the block context, we set 804 /// allowSimpleKey` on. 805 /// 806 /// We do not yet support BOM inside the stream as the 807 /// specification requires. Any such mark will be considered as a part 808 /// of the document. 809 void scanToNextToken() @safe 810 { 811 // TODO(PyYAML): We need to make tab handling rules more sane. A good rule is: 812 // Tabs cannot precede tokens 813 // BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END, 814 // KEY(block), VALUE(block), BLOCK-ENTRY 815 // So the checking code is 816 // if <TAB>: 817 // allowSimpleKey_ = false 818 // We also need to add the check for `allowSimpleKey_ == true` to 819 // `unwindIndent` before issuing BLOCK-END. 820 // Scanners for block, flow, and plain scalars need to be modified. 821 822 for(;;) 823 { 824 //All whitespace in flow context is ignored, even whitespace 825 // not allowed in other contexts 826 if (flowLevel_ > 0) 827 { 828 while(reader_.peekByte().isNonLinebreakWhitespace) { reader_.forward(); } 829 } 830 else 831 { 832 findNextNonSpace(); 833 } 834 if(reader_.peekByte() == '#') { scanToNextBreak(); } 835 if(scanLineBreak() != '\0') 836 { 837 if(flowLevel_ == 0) { allowSimpleKey_ = true; } 838 } 839 else 840 { 841 break; 842 } 843 } 844 } 845 846 /// Scan directive token. 847 Token scanDirective() @safe 848 { 849 Mark startMark = reader_.mark; 850 // Skip the '%'. 851 reader_.forward(); 852 853 // Scan directive name 854 char[] name; 855 scanDirectiveNameToSlice(name, startMark); 856 857 char[] value; 858 859 // Index where tag handle ends and suffix starts in a tag directive value. 860 uint tagHandleEnd = uint.max; 861 if(name == "YAML") { scanYAMLDirectiveValueToSlice(value, startMark); } 862 else if(name == "TAG") { tagHandleEnd = scanTagDirectiveValueToSlice(value, startMark); } 863 864 Mark endMark = reader_.mark; 865 866 DirectiveType directive; 867 if(name == "YAML") { directive = DirectiveType.yaml; } 868 else if(name == "TAG") { directive = DirectiveType.tag; } 869 else 870 { 871 directive = DirectiveType.reserved; 872 scanToNextBreak(); 873 } 874 875 scanDirectiveIgnoredLine(startMark); 876 877 return directiveToken(startMark, endMark, value, directive, tagHandleEnd); 878 } 879 880 /// Scan name of a directive token. 881 /// 882 /// Assumes that the caller is building a slice in Reader, and puts the scanned 883 /// characters into that slice. 884 void scanDirectiveNameToSlice(ref char[] slice, const Mark startMark) @safe 885 { 886 // Scan directive name. 887 scanAlphaNumericToSlice!"directive"(slice, startMark); 888 889 enforce(reader_.peek().among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'), 890 new ScannerException(expected("While scanning a directive, expected alphanumeric, '-' or '_'", reader_.peek()), 891 reader_.mark, "directive started here", startMark)); 892 } 893 894 /// Scan value of a YAML directive token. Returns major, minor version separated by '.'. 895 /// 896 /// Assumes that the caller is building a slice in Reader, and puts the scanned 897 /// characters into that slice. 898 void scanYAMLDirectiveValueToSlice(ref char[] slice, const Mark startMark) @safe 899 { 900 findNextNonSpace(); 901 902 scanYAMLDirectiveNumberToSlice(slice, startMark); 903 904 enforce(reader_.peekByte() == '.', 905 new ScannerException(expected("While scanning a directive, expected digit or '.'", reader_.peek()), 906 reader_.mark, "directive started here", startMark)); 907 // Skip the '.'. 908 reader_.forward(); 909 910 slice ~= '.'; 911 scanYAMLDirectiveNumberToSlice(slice, startMark); 912 913 enforce(reader_.peek().among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'), 914 new ScannerException(expected("While scanning a directive, expected digit or '.'", reader_.peek()), 915 reader_.mark, "directive started here", startMark)); 916 } 917 918 /// Scan a number from a YAML directive. 919 /// 920 /// Assumes that the caller is building a slice in Reader, and puts the scanned 921 /// characters into that slice. 922 void scanYAMLDirectiveNumberToSlice(ref char[] slice, const Mark startMark) @safe 923 { 924 enforce(isDigit(reader_.peek()), 925 new ScannerException(expected("While scanning a directive, expected a digit", reader_.peek()), 926 reader_.mark, "directive started here", startMark)); 927 928 // Already found the first digit in the enforce(), so set length to 1. 929 uint length = 1; 930 while(reader_.peek(length).isDigit) { ++length; } 931 932 slice ~= reader_.get(length); 933 } 934 935 /// Scan value of a tag directive. 936 /// 937 /// Assumes that the caller is building a slice in Reader, and puts the scanned 938 /// characters into that slice. 939 /// 940 /// Returns: Length of tag handle (which is before tag prefix) in scanned data 941 uint scanTagDirectiveValueToSlice(ref char[] slice, const Mark startMark) @safe 942 { 943 findNextNonSpace(); 944 const startLength = slice.length; 945 scanTagDirectiveHandleToSlice(slice, startMark); 946 const handleLength = cast(uint)(slice.length - startLength); 947 findNextNonSpace(); 948 scanTagDirectivePrefixToSlice(slice, startMark); 949 950 return handleLength; 951 } 952 953 /// Scan handle of a tag directive. 954 /// 955 /// Assumes that the caller is building a slice in Reader, and puts the scanned 956 /// characters into that slice. 957 void scanTagDirectiveHandleToSlice(ref char[] slice, const Mark startMark) @safe 958 { 959 scanTagHandleToSlice!"directive"(slice, startMark); 960 enforce(reader_.peekByte() == ' ', 961 new ScannerException(expected("While scanning a directive handle, expected ' '", reader_.peek()), 962 reader_.mark, "directive started here", startMark)); 963 } 964 965 /// Scan prefix of a tag directive. 966 /// 967 /// Assumes that the caller is building a slice in Reader, and puts the scanned 968 /// characters into that slice. 969 void scanTagDirectivePrefixToSlice(ref char[] slice, const Mark startMark) @safe 970 { 971 scanTagURIToSlice!"directive"(slice, startMark); 972 enforce(reader_.peek().among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'), 973 new ScannerException(expected("While scanning a directive prefix, expected ' '", reader_.peek()), 974 reader_.mark, "directive started here", startMark)); 975 } 976 977 /// Scan (and ignore) ignored line after a directive. 978 void scanDirectiveIgnoredLine(const Mark startMark) @safe 979 { 980 findNextNonSpace(); 981 if(reader_.peekByte() == '#') { scanToNextBreak(); } 982 enforce(reader_.peek().isBreak, 983 new ScannerException(expected("While scanning a directive, expected a comment or a line break", reader_.peek()), 984 reader_.mark, "directive started here", startMark)); 985 scanLineBreak(); 986 } 987 988 989 /// Scan an alias or an anchor. 990 /// 991 /// The specification does not restrict characters for anchors and 992 /// aliases. This may lead to problems, for instance, the document: 993 /// [ *alias, value ] 994 /// can be interpteted in two ways, as 995 /// [ "value" ] 996 /// and 997 /// [ *alias , "value" ] 998 /// Therefore we restrict aliases to ASCII alphanumeric characters. 999 Token scanAnchor(const TokenID id) @safe 1000 { 1001 const startMark = reader_.mark; 1002 reader_.forward(); // The */& character was only peeked, so we drop it now 1003 1004 char[] value = readAnchorAlias(startMark); 1005 1006 assert(!reader_.peek().isNSAnchorName, "Anchor/alias name not fully scanned"); 1007 1008 if(id == TokenID.alias_) 1009 { 1010 return aliasToken(startMark, reader_.mark, value); 1011 } 1012 if(id == TokenID.anchor) 1013 { 1014 return anchorToken(startMark, reader_.mark, value); 1015 } 1016 assert(false, "This code should never be reached"); 1017 } 1018 1019 /// Scan a tag token. 1020 Token scanTag() @safe 1021 { 1022 const startMark = reader_.mark; 1023 dchar c = reader_.peek(1); 1024 1025 char[] slice; 1026 // Index where tag handle ends and tag suffix starts in the tag value 1027 // (slice) we will produce. 1028 uint handleEnd; 1029 1030 if(c == '<') 1031 { 1032 reader_.forward(2); 1033 1034 handleEnd = 0; 1035 scanTagURIToSlice!"tag"(slice, startMark); 1036 enforce(reader_.peekByte() == '>', 1037 new ScannerException(expected("While scanning a tag, expected a '>'", reader_.peek()), 1038 reader_.mark, "tag started here", startMark)); 1039 reader_.forward(); 1040 } 1041 else if(c.isWhiteSpace) 1042 { 1043 reader_.forward(); 1044 handleEnd = 0; 1045 slice ~= '!'; 1046 } 1047 else 1048 { 1049 uint length = 1; 1050 bool useHandle; 1051 1052 while(!c.isBreakOrSpace) 1053 { 1054 if(c == '!') 1055 { 1056 useHandle = true; 1057 break; 1058 } 1059 ++length; 1060 c = reader_.peek(length); 1061 } 1062 1063 if(useHandle) 1064 { 1065 scanTagHandleToSlice!"tag"(slice, startMark); 1066 handleEnd = cast(uint)slice.length; 1067 } 1068 else 1069 { 1070 reader_.forward(); 1071 slice ~= '!'; 1072 handleEnd = cast(uint)slice.length; 1073 } 1074 1075 scanTagURIToSlice!"tag"(slice, startMark); 1076 } 1077 1078 enforce(reader_.peek().isBreakOrSpace, 1079 new ScannerException(expected("While scanning a tag, expected a ' '", reader_.peek()), 1080 reader_.mark, "tag started here", startMark)); 1081 1082 return tagToken(startMark, reader_.mark, slice, handleEnd); 1083 } 1084 1085 /// Scan a block scalar token with specified style. 1086 Token scanBlockScalar(const ScalarStyle style) @safe 1087 { 1088 const startMark = reader_.mark; 1089 1090 // Scan the header. 1091 reader_.forward(); 1092 1093 const indicators = scanBlockScalarIndicators(startMark); 1094 1095 const chomping = indicators[0]; 1096 const increment = indicators[1]; 1097 scanBlockScalarIgnoredLine(startMark); 1098 1099 // Determine the indentation level and go to the first non-empty line. 1100 Mark endMark; 1101 uint indent = max(1, indent_ + 1); 1102 1103 char[] slice; 1104 // Used to strip the last line breaks written to the slice at the end of the 1105 // scalar, which may be needed based on chomping. 1106 char[] newBreakSlice; 1107 // Read the first indentation/line breaks before the scalar. 1108 size_t startLen = newBreakSlice.length; 1109 if(increment == int.min) 1110 { 1111 auto indentation = scanBlockScalarIndentationToSlice(newBreakSlice); 1112 endMark = indentation[1]; 1113 indent = max(indent, indentation[0]); 1114 } 1115 else 1116 { 1117 indent += increment - 1; 1118 endMark = scanBlockScalarBreaksToSlice(newBreakSlice, indent); 1119 } 1120 1121 // int.max means there's no line break (int.max is outside UTF-32). 1122 dchar lineBreak = cast(dchar)int.max; 1123 1124 // Scan the inner part of the block scalar. 1125 while(reader_.column == indent && reader_.peekByte() != '\0') 1126 { 1127 slice ~= newBreakSlice; 1128 const bool leadingNonSpace = !reader_.peekByte().among!(' ', '\t'); 1129 // This is where the 'interesting' non-whitespace data gets read. 1130 scanToNextBreakToSlice(slice); 1131 lineBreak = scanLineBreak(); 1132 1133 1134 // This transaction serves to rollback data read in the 1135 // scanBlockScalarBreaksToSlice() call. 1136 newBreakSlice = []; 1137 startLen = slice.length; 1138 // The line breaks should actually be written _after_ the if() block 1139 // below. We work around that by inserting 1140 endMark = scanBlockScalarBreaksToSlice(newBreakSlice, indent); 1141 1142 // This will not run during the last iteration 1143 if(reader_.column == indent && reader_.peekByte() != '\0') 1144 { 1145 // Unfortunately, folding rules are ambiguous. 1146 1147 // This is the folding according to the specification: 1148 if(style == ScalarStyle.folded && lineBreak == '\n' && 1149 leadingNonSpace && !reader_.peekByte().among!(' ', '\t')) 1150 { 1151 // No breaks were scanned; no need to insert the space in the 1152 // middle of slice. 1153 if(startLen == slice.length + newBreakSlice.length) 1154 { 1155 newBreakSlice ~= ' '; 1156 } 1157 } 1158 else 1159 { 1160 // We need to insert in the middle of the slice in case any line 1161 // breaks were scanned. 1162 newBreakSlice.insert(lineBreak, 0); 1163 } 1164 1165 ////this is Clark Evans's interpretation (also in the spec 1166 ////examples): 1167 // 1168 //if(style == ScalarStyle.folded && lineBreak == '\n') 1169 //{ 1170 // if(startLen == endLen) 1171 // { 1172 // if(!" \t"d.canFind(reader_.peekByte())) 1173 // { 1174 // reader_.sliceBuilder.write(' '); 1175 // } 1176 // else 1177 // { 1178 // chunks ~= lineBreak; 1179 // } 1180 // } 1181 //} 1182 //else 1183 //{ 1184 // reader_.sliceBuilder.insertBack(lineBreak, endLen - startLen); 1185 //} 1186 } 1187 else 1188 { 1189 break; 1190 } 1191 } 1192 1193 // If chompint is Keep, we keep (commit) the last scanned line breaks 1194 // (which are at the end of the scalar). Otherwise re remove them (end the 1195 // transaction). 1196 if(chomping == Chomping.keep) { slice ~= newBreakSlice; } 1197 if(chomping != Chomping.strip && lineBreak != int.max) 1198 { 1199 // If chomping is Keep, we keep the line break but the first line break 1200 // that isn't stripped (since chomping isn't Strip in this branch) must 1201 // be inserted _before_ the other line breaks. 1202 if(chomping == Chomping.keep) 1203 { 1204 slice.insert(lineBreak, startLen); 1205 } 1206 // If chomping is not Keep, discard the line break 1207 else 1208 { 1209 if (lineBreak != '\0') 1210 { 1211 slice ~= lineBreak; 1212 } 1213 } 1214 } 1215 1216 return scalarToken(startMark, endMark, slice, style); 1217 } 1218 1219 /// Scan chomping and indentation indicators of a scalar token. 1220 Tuple!(Chomping, int) scanBlockScalarIndicators(const Mark startMark) @safe 1221 { 1222 auto chomping = Chomping.clip; 1223 int increment = int.min; 1224 dchar c = reader_.peek(); 1225 1226 /// Indicators can be in any order. 1227 if(getChomping(c, chomping)) 1228 { 1229 getIncrement(c, increment, startMark); 1230 } 1231 else 1232 { 1233 const gotIncrement = getIncrement(c, increment, startMark); 1234 if(gotIncrement) { getChomping(c, chomping); } 1235 } 1236 1237 enforce(c.among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'), 1238 new ScannerException(expected("While scanning a block scalar, expected a chomping or indentation indicator", c), 1239 reader_.mark, "scalar started here", startMark)); 1240 1241 return tuple(chomping, increment); 1242 } 1243 1244 /// Get chomping indicator, if detected. Return false otherwise. 1245 /// 1246 /// Used in scanBlockScalarIndicators. 1247 /// 1248 /// Params: 1249 /// 1250 /// c = The character that may be a chomping indicator. 1251 /// chomping = Write the chomping value here, if detected. 1252 bool getChomping(ref dchar c, ref Chomping chomping) @safe 1253 { 1254 if(!c.among!('+', '-')) { return false; } 1255 chomping = c == '+' ? Chomping.keep : Chomping.strip; 1256 reader_.forward(); 1257 c = reader_.peek(); 1258 return true; 1259 } 1260 1261 /// Get increment indicator, if detected. Return false otherwise. 1262 /// 1263 /// Used in scanBlockScalarIndicators. 1264 /// 1265 /// Params: 1266 /// 1267 /// c = The character that may be an increment indicator. 1268 /// If an increment indicator is detected, this will be updated to 1269 /// the next character in the Reader. 1270 /// increment = Write the increment value here, if detected. 1271 /// startMark = Mark for error messages. 1272 bool getIncrement(ref dchar c, ref int increment, const Mark startMark) @safe 1273 { 1274 if(!c.isDigit) { return false; } 1275 // Convert a digit to integer. 1276 increment = c - '0'; 1277 assert(increment < 10 && increment >= 0, "Digit has invalid value"); 1278 1279 enforce(increment > 0, 1280 new ScannerException(expected("While scanning a block scalar, expected an indentation indicator in range 1-9", "0"), 1281 reader_.mark, "scalar started here", startMark)); 1282 1283 reader_.forward(); 1284 c = reader_.peek(); 1285 return true; 1286 } 1287 1288 /// Scan (and ignore) ignored line in a block scalar. 1289 void scanBlockScalarIgnoredLine(const Mark startMark) @safe 1290 { 1291 findNextNonSpace(); 1292 if(reader_.peekByte()== '#') { scanToNextBreak(); } 1293 1294 enforce(reader_.peek().isBreak, 1295 new ScannerException(expected("While scanning a block scalar, expected a comment or line break", reader_.peek()), 1296 reader_.mark, "scalar started here", startMark)); 1297 1298 scanLineBreak(); 1299 } 1300 1301 /// Scan indentation in a block scalar, returning line breaks, max indent and end mark. 1302 /// 1303 /// Assumes that the caller is building a slice in Reader, and puts the scanned 1304 /// characters into that slice. 1305 Tuple!(uint, Mark) scanBlockScalarIndentationToSlice(ref char[] slice) @safe 1306 { 1307 uint maxIndent; 1308 Mark endMark = reader_.mark; 1309 1310 while(reader_.peek().among!(' ', '\n', '\r', '\u0085', '\u2028', '\u2029')) 1311 { 1312 if(reader_.peekByte() != ' ') 1313 { 1314 slice ~= scanLineBreak(); 1315 endMark = reader_.mark; 1316 continue; 1317 } 1318 reader_.forward(); 1319 maxIndent = max(reader_.column, maxIndent); 1320 } 1321 1322 return tuple(maxIndent, endMark); 1323 } 1324 1325 /// Scan line breaks at lower or specified indentation in a block scalar. 1326 /// 1327 /// Assumes that the caller is building a slice in Reader, and puts the scanned 1328 /// characters into that slice. 1329 Mark scanBlockScalarBreaksToSlice(ref char[] slice, const uint indent) @safe 1330 { 1331 Mark endMark = reader_.mark; 1332 1333 for(;;) 1334 { 1335 while(reader_.column < indent && reader_.peekByte() == ' ') { reader_.forward(); } 1336 if(!reader_.peek().among!('\n', '\r', '\u0085', '\u2028', '\u2029')) { break; } 1337 slice ~= scanLineBreak(); 1338 endMark = reader_.mark; 1339 } 1340 1341 return endMark; 1342 } 1343 1344 /// Scan a qouted flow scalar token with specified quotes. 1345 Token scanFlowScalar(const ScalarStyle quotes) @safe 1346 { 1347 const startMark = reader_.mark; 1348 const quote = reader_.get(); 1349 1350 char[] slice; 1351 1352 scanFlowScalarNonSpacesToSlice(slice, quotes, startMark); 1353 1354 while(reader_.peek() != quote) 1355 { 1356 scanFlowScalarSpacesToSlice(slice, startMark); 1357 scanFlowScalarNonSpacesToSlice(slice, quotes, startMark); 1358 } 1359 reader_.forward(); 1360 1361 return scalarToken(startMark, reader_.mark, slice, quotes); 1362 } 1363 1364 /// Scan nonspace characters in a flow scalar. 1365 /// 1366 /// Assumes that the caller is building a slice in Reader, and puts the scanned 1367 /// characters into that slice. 1368 void scanFlowScalarNonSpacesToSlice(ref char[] slice, const ScalarStyle quotes, const Mark startMark) 1369 @safe 1370 { 1371 for(;;) 1372 { 1373 dchar c = reader_.peek(); 1374 1375 size_t numCodePoints; 1376 while(!reader_.peek(numCodePoints).isFlowScalarBreakSpace) { ++numCodePoints; } 1377 1378 if (numCodePoints > 0) { slice ~= reader_.get(numCodePoints); } 1379 1380 c = reader_.peek(); 1381 if(quotes == ScalarStyle.singleQuoted && c == '\'' && reader_.peek(1) == '\'') 1382 { 1383 reader_.forward(2); 1384 slice ~= '\''; 1385 } 1386 else if((quotes == ScalarStyle.doubleQuoted && c == '\'') || 1387 (quotes == ScalarStyle.singleQuoted && c.among!('"', '\\'))) 1388 { 1389 reader_.forward(); 1390 slice ~= c; 1391 } 1392 else if(quotes == ScalarStyle.doubleQuoted && c == '\\') 1393 { 1394 reader_.forward(); 1395 c = reader_.peek(); 1396 if(c.among!(escapes)) 1397 { 1398 reader_.forward(); 1399 // Escaping has been moved to Parser as it can't be done in 1400 // place (in a slice) in case of '\P' and '\L' (very uncommon, 1401 // but we don't want to break the spec) 1402 char[2] escapeSequence = ['\\', cast(char)c]; 1403 slice ~= escapeSequence; 1404 } 1405 else if(c.among!(escapeHexCodeList)) 1406 { 1407 const hexLength = dyaml.escapes.escapeHexLength(c); 1408 reader_.forward(); 1409 1410 foreach(i; 0 .. hexLength) { 1411 enforce(reader_.peek(i).isHexDigit, 1412 new ScannerException(expected("While scanning a double quoted scalar, expected an escape sequence of hexadecimal numbers", reader_.peek(i)), 1413 reader_.mark, "scalar started here", startMark)); 1414 } 1415 char[] hex = reader_.get(hexLength); 1416 1417 assert((hex.length > 0) && (hex.length <= 8), "Hex escape overflow"); 1418 1419 char[2] escapeStart = ['\\', cast(char) c]; 1420 slice ~= escapeStart; 1421 slice ~= hex; 1422 1423 } 1424 else if(c.among!('\n', '\r', '\u0085', '\u2028', '\u2029')) 1425 { 1426 scanLineBreak(); 1427 scanFlowScalarBreaksToSlice(slice, startMark); 1428 } 1429 else 1430 { 1431 throw new ScannerException(text("While scanning a double quoted scalar, found unsupported escape character ", c), 1432 reader_.mark, "scalar started here", startMark); 1433 } 1434 } 1435 else { return; } 1436 } 1437 } 1438 1439 /// Scan space characters in a flow scalar. 1440 /// 1441 /// Assumes that the caller is building a slice in Reader, and puts the scanned 1442 /// spaces into that slice. 1443 void scanFlowScalarSpacesToSlice(ref char[] slice, const Mark startMark) @safe 1444 { 1445 // Increase length as long as we see whitespace. 1446 size_t length; 1447 while(reader_.peekByte(length).among!(' ', '\t')) { ++length; } 1448 auto whitespaces = reader_.prefixBytes(length); 1449 1450 // Can check the last byte without striding because '\0' is ASCII 1451 const c = reader_.peek(length); 1452 enforce(c != '\0', 1453 new ScannerException("While scanning a quoted scalar, found unexpected end of buffer", 1454 reader_.mark, "scalar started here", startMark)); 1455 1456 // Spaces not followed by a line break. 1457 if(!c.among!('\n', '\r', '\u0085', '\u2028', '\u2029')) 1458 { 1459 reader_.forward(length); 1460 slice ~= whitespaces; 1461 return; 1462 } 1463 1464 // There's a line break after the spaces. 1465 reader_.forward(length); 1466 const lineBreak = scanLineBreak(); 1467 1468 if(lineBreak != '\n') { slice ~= lineBreak; } 1469 1470 // If we have extra line breaks after the first, scan them into the 1471 // slice. 1472 const bool extraBreaks = scanFlowScalarBreaksToSlice(slice, startMark); 1473 1474 // No extra breaks, one normal line break. Replace it with a space. 1475 if(lineBreak == '\n' && !extraBreaks) { slice ~= ' '; } 1476 } 1477 1478 /// Scan line breaks in a flow scalar. 1479 /// 1480 /// Assumes that the caller is building a slice in Reader, and puts the scanned 1481 /// line breaks into that slice. 1482 bool scanFlowScalarBreaksToSlice(ref char[] slice, const Mark startMark) @safe 1483 { 1484 // True if at least one line break was found. 1485 bool anyBreaks; 1486 for(;;) 1487 { 1488 // Instead of checking indentation, we check for document separators. 1489 const prefix = reader_.prefix(3); 1490 enforce(!(prefix == "---" || prefix == "...") || 1491 !reader_.peek(3).isWhiteSpace, 1492 new ScannerException("While scanning a quoted scalar, found unexpected document separator", 1493 reader_.mark, "scalar started here", startMark)); 1494 1495 // Skip any whitespaces. 1496 while(reader_.peekByte().among!(' ', '\t')) { reader_.forward(); } 1497 1498 // Encountered a non-whitespace non-linebreak character, so we're done. 1499 if(!reader_.peek().among!(' ', '\n', '\r', '\u0085', '\u2028', '\u2029')) { break; } 1500 1501 const lineBreak = scanLineBreak(); 1502 anyBreaks = true; 1503 slice ~= lineBreak; 1504 } 1505 return anyBreaks; 1506 } 1507 1508 /// Scan plain scalar token (no block, no quotes). 1509 Token scanPlain() @safe 1510 { 1511 // We keep track of the allowSimpleKey_ flag here. 1512 // Indentation rules are loosed for the flow context 1513 const startMark = reader_.mark; 1514 Mark endMark = startMark; 1515 const indent = indent_ + 1; 1516 1517 // We allow zero indentation for scalars, but then we need to check for 1518 // document separators at the beginning of the line. 1519 // if(indent == 0) { indent = 1; } 1520 1521 char[] slice; 1522 1523 char[] newSpacesSlice; 1524 // Stop at a comment. 1525 while(reader_.peekByte() != '#') 1526 { 1527 // Scan the entire plain scalar. 1528 size_t length; 1529 dchar c = reader_.peek(length); 1530 for(;;) 1531 { 1532 const cNext = reader_.peek(length + 1); 1533 if(c.isWhiteSpace || 1534 (flowLevel_ == 0 && c == ':' && cNext.isWhiteSpace) || 1535 (flowLevel_ > 0 && c == ':' && (cNext.isWhiteSpace || cNext.among!(',', '[', ']', '{', '}'))) || 1536 (flowLevel_ > 0 && c.among!(',', '[', ']', '{', '}'))) 1537 { 1538 break; 1539 } 1540 ++length; 1541 c = cNext; 1542 } 1543 1544 if(length == 0) { break; } 1545 1546 allowSimpleKey_ = false; 1547 1548 newSpacesSlice ~= reader_.get(length); 1549 1550 endMark = reader_.mark; 1551 1552 slice ~= newSpacesSlice; 1553 newSpacesSlice = []; 1554 1555 const startLength = slice.length; 1556 scanPlainSpacesToSlice(newSpacesSlice); 1557 if(startLength == slice.length + newSpacesSlice.length || 1558 (flowLevel_ == 0 && reader_.column < indent)) 1559 { 1560 break; 1561 } 1562 } 1563 1564 return scalarToken(startMark, endMark, slice, ScalarStyle.plain); 1565 } 1566 1567 /// Scan spaces in a plain scalar. 1568 /// 1569 /// Assumes that the caller is building a slice in Reader, and puts the spaces 1570 /// into that slice. 1571 void scanPlainSpacesToSlice(ref char[] slice) @trusted 1572 { 1573 // The specification is really confusing about tabs in plain scalars. 1574 // We just forbid them completely. Do not use tabs in YAML! 1575 1576 // Get as many plain spaces as there are. 1577 size_t length; 1578 while(reader_.peekByte(length) == ' ') { ++length; } 1579 char[] whitespaces = reader_.prefixBytes(length); 1580 reader_.forward(length); 1581 1582 const dchar c = reader_.peek(); 1583 if(!c.isNSChar) 1584 { 1585 // We have spaces, but no newline. 1586 if(whitespaces.length > 0) { slice ~= whitespaces; } 1587 return; 1588 } 1589 1590 // Newline after the spaces (if any) 1591 const lineBreak = scanLineBreak(); 1592 allowSimpleKey_ = true; 1593 1594 static bool end(Reader reader_) @safe pure 1595 { 1596 const prefix = reader_.prefix(3); 1597 return ("---" == prefix || "..." == prefix) 1598 && reader_.peek(3).among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'); 1599 } 1600 1601 if(end(reader_)) { return; } 1602 1603 bool extraBreaks; 1604 1605 char[] newSlice; 1606 if(lineBreak != '\n') { newSlice ~= lineBreak; } 1607 while(reader_.peek().isNSChar) 1608 { 1609 if(reader_.peekByte() == ' ') { reader_.forward(); } 1610 else 1611 { 1612 const lBreak = scanLineBreak(); 1613 extraBreaks = true; 1614 newSlice ~= lBreak; 1615 1616 if(end(reader_)) { return; } 1617 } 1618 } 1619 slice ~= newSlice; 1620 1621 // No line breaks, only a space. 1622 if(lineBreak == '\n' && !extraBreaks) { slice ~= ' '; } 1623 } 1624 1625 /// Scan handle of a tag token. 1626 /// 1627 /// Assumes that the caller is building a slice in Reader, and puts the scanned 1628 /// characters into that slice. 1629 void scanTagHandleToSlice(string name)(ref char[] slice, const Mark startMark) 1630 { 1631 dchar c = reader_.peek(); 1632 enum contextMsg = "While scanning a " ~ name ~ ", expected a !"; 1633 // should this be an assert? 1634 enforce(c == '!', 1635 new ScannerException(expected(contextMsg, c), reader_.mark, "tag started here", startMark)); 1636 1637 uint length = 1; 1638 c = reader_.peek(length); 1639 if(c != ' ') 1640 { 1641 while(c.isAlphaNum || c.among!('-', '_')) 1642 { 1643 ++length; 1644 c = reader_.peek(length); 1645 } 1646 enforce(c == '!', 1647 new ScannerException(expected(contextMsg, c), reader_.mark(length), "tag started here", startMark)); 1648 ++length; 1649 } 1650 1651 slice ~= reader_.get(length); 1652 } 1653 1654 /// Scan URI in a tag token. 1655 /// 1656 /// Assumes that the caller is building a slice in Reader, and puts the scanned 1657 /// characters into that slice. 1658 void scanTagURIToSlice(string name)(ref char[] slice, const Mark startMark) 1659 { 1660 // Note: we do not check if URI is well-formed. 1661 dchar c = reader_.peek(); 1662 const startLen = slice.length; 1663 { 1664 uint length; 1665 while(c.isAlphaNum || c.isURIChar) 1666 { 1667 if(c == '%') 1668 { 1669 auto chars = reader_.get(length); 1670 slice ~= chars; 1671 length = 0; 1672 scanURIEscapesToSlice!name(slice, startMark); 1673 } 1674 else { ++length; } 1675 c = reader_.peek(length); 1676 } 1677 if(length > 0) 1678 { 1679 auto chars = reader_.get(length); 1680 slice ~= chars; 1681 length = 0; 1682 } 1683 } 1684 // OK if we scanned something, error otherwise. 1685 enum contextMsg = "While parsing a " ~ name ~ ", expected a URI"; 1686 enforce(slice.length > startLen, 1687 new ScannerException(expected(contextMsg, c), reader_.mark, "tag started here", startMark)); 1688 } 1689 1690 // Not @nogc yet because std.utf.decode is not @nogc 1691 /// Scan URI escape sequences. 1692 /// 1693 /// Assumes that the caller is building a slice in Reader, and puts the scanned 1694 /// characters into that slice. 1695 void scanURIEscapesToSlice(string name)(ref char[] slice, const Mark startMark) 1696 { 1697 import core.exception : UnicodeException; 1698 // URI escapes encode a UTF-8 string. We store UTF-8 code units here for 1699 // decoding into UTF-32. 1700 Appender!string buffer; 1701 1702 1703 enum contextMsg = "While scanning a " ~ name; 1704 while(reader_.peekByte() == '%') 1705 { 1706 reader_.forward(); 1707 char[2] nextByte = [reader_.peekByte(), reader_.peekByte(1)]; 1708 1709 enforce(nextByte[0].isHexDigit && nextByte[1].isHexDigit, 1710 new ScannerException(expected(contextMsg ~ ", expected a URI escape sequence of 2 hexadecimal numbers", nextByte), 1711 reader_.mark, "tag started here", startMark)); 1712 1713 buffer ~= nextByte[].to!ubyte(16); 1714 1715 reader_.forward(2); 1716 } 1717 try 1718 { 1719 foreach (dchar chr; buffer.data) 1720 { 1721 slice ~= chr; 1722 } 1723 } 1724 catch (UnicodeException) 1725 { 1726 throw new ScannerException(contextMsg ~ ", found invalid UTF-8 data encoded in URI escape sequence", 1727 reader_.mark, "tag started here", startMark); 1728 } 1729 } 1730 1731 1732 /// Scan a line break, if any. 1733 /// 1734 /// Transforms: 1735 /// '\r\n' : '\n' 1736 /// '\r' : '\n' 1737 /// '\n' : '\n' 1738 /// '\u0085' : '\n' 1739 /// '\u2028' : '\u2028' 1740 /// '\u2029 : '\u2029' 1741 /// no break : '\0' 1742 dchar scanLineBreak() @safe 1743 { 1744 // Fast path for ASCII line breaks. 1745 const b = reader_.peekByte(); 1746 if(b < 0x80) 1747 { 1748 if(b == '\n' || b == '\r') 1749 { 1750 if(reader_.prefix(2) == "\r\n") { reader_.forward(2); } 1751 else { reader_.forward(); } 1752 return '\n'; 1753 } 1754 return '\0'; 1755 } 1756 1757 const c = reader_.peek(); 1758 if(c == '\x85') 1759 { 1760 reader_.forward(); 1761 return '\n'; 1762 } 1763 if(c == '\u2028' || c == '\u2029') 1764 { 1765 reader_.forward(); 1766 return c; 1767 } 1768 return '\0'; 1769 } 1770 } 1771 1772 // Issue 309 - https://github.com/dlang-community/D-YAML/issues/309 1773 @safe unittest 1774 { 1775 enum str = q"EOS 1776 exp: | 1777 foobar 1778 EOS".chomp; 1779 1780 auto r = Reader(cast(ubyte[])str.dup); 1781 auto s = Scanner(r); 1782 auto elems = s.map!"a.value".filter!"a.length > 0".array; 1783 assert(elems[1] == "foobar"); 1784 } 1785 1786 @safe unittest 1787 { 1788 import dyaml.loader : Loader; 1789 1790 const str = `test: key: value`; 1791 1792 const exc = collectException!LoaderException(Loader.fromString(str).load()); 1793 assert(exc); 1794 assert(exc.message() == 1795 "Unable to load <unknown>: Mapping values are not allowed here\n" ~ 1796 "<unknown>:1,10"); 1797 } 1798 1799 @safe unittest 1800 { 1801 import dyaml.loader : Loader; 1802 1803 const str = `test: ? foo 1804 : bar`; 1805 1806 const exc = collectException!LoaderException(Loader.fromString(str).load()); 1807 assert(exc); 1808 assert(exc.message() == 1809 "Unable to load <unknown>: Mapping keys are not allowed here\n" ~ 1810 "<unknown>:1,7"); 1811 } 1812 1813 @safe unittest 1814 { 1815 import dyaml.loader : Loader; 1816 1817 const str = `@`; 1818 1819 const exc = collectException!LoaderException(Loader.fromString(str).load()); 1820 assert(exc); 1821 assert(exc.message() == 1822 "Unable to load <unknown>: While scanning for the next token, found character '@', index 64 that cannot start any token\n" ~ 1823 "<unknown>:1,1"); 1824 } 1825 1826 @safe unittest 1827 { 1828 import dyaml.loader : Loader; 1829 1830 const str = `foo: bar 1831 meh`; 1832 1833 const exc = collectException!LoaderException(Loader.fromString(str).load()); 1834 assert(exc); 1835 assert(exc.message() == 1836 "Unable to load <unknown>: While scanning a simple key, could not find expected ':'\n" ~ 1837 "<unknown>:2,4\nkey started here: <unknown>:2,1"); 1838 } 1839 1840 @safe unittest 1841 { 1842 import dyaml.loader : Loader; 1843 1844 const str = `foo: &A bar 1845 *A ]`; 1846 1847 const exc = collectException!LoaderException(Loader.fromString(str).load()); 1848 assert(exc); 1849 assert(exc.message() == 1850 "Unable to load <unknown>: While scanning a simple key, could not find expected ':'\n" ~ 1851 "<unknown>:2,4\nkey started here: <unknown>:2,1"); 1852 } 1853 1854 @safe unittest 1855 { 1856 import dyaml.loader : Loader; 1857 1858 const str = `foo: &[`; 1859 1860 const exc = collectException!LoaderException(Loader.fromString(str).load()); 1861 assert(exc); 1862 assert(exc.message() == 1863 "Unable to load <unknown>: While scanning an anchor or alias, expected a printable character besides '[', ']', '{', '}' and ',', but found [\n" ~ 1864 "<unknown>:1,7\nstarted here: <unknown>:1,6"); 1865 } 1866 1867 @safe unittest 1868 { 1869 import dyaml.loader : Loader; 1870 1871 const str = `%?`; 1872 1873 const exc = collectException!LoaderException(Loader.fromString(str).load()); 1874 assert(exc); 1875 assert(exc.message() == 1876 "Unable to load <unknown>: While scanning a directive, expected alphanumeric, '-' or '_', but found ?\n" ~ 1877 "<unknown>:1,2\ndirective started here: <unknown>:1,1"); 1878 } 1879 1880 @safe unittest 1881 { 1882 import dyaml.loader : Loader; 1883 1884 const str = `%b?`; 1885 1886 const exc = collectException!LoaderException(Loader.fromString(str).load()); 1887 assert(exc); 1888 assert(exc.message() == 1889 "Unable to load <unknown>: While scanning a directive, expected alphanumeric, '-' or '_', but found ?\n" ~ 1890 "<unknown>:1,3\ndirective started here: <unknown>:1,1"); 1891 } 1892 1893 @safe unittest 1894 { 1895 import dyaml.loader : Loader; 1896 1897 const str = `%YAML 1?`; 1898 1899 const exc = collectException!LoaderException(Loader.fromString(str).load()); 1900 assert(exc); 1901 assert(exc.message() == 1902 "Unable to load <unknown>: While scanning a directive, expected digit or '.', but found ?\n" ~ 1903 "<unknown>:1,8\ndirective started here: <unknown>:1,1"); 1904 } 1905 1906 @safe unittest 1907 { 1908 import dyaml.loader : Loader; 1909 1910 const str = `%YAML 1.1?`; 1911 1912 const exc = collectException!LoaderException(Loader.fromString(str).load()); 1913 assert(exc); 1914 assert(exc.message() == 1915 "Unable to load <unknown>: While scanning a directive, expected digit or '.', but found ?\n" ~ 1916 "<unknown>:1,10\ndirective started here: <unknown>:1,1"); 1917 } 1918 1919 @safe unittest 1920 { 1921 import dyaml.loader : Loader; 1922 1923 const str = `%YAML ?`; 1924 1925 const exc = collectException!LoaderException(Loader.fromString(str).load()); 1926 assert(exc); 1927 assert(exc.message() == 1928 "Unable to load <unknown>: While scanning a directive, expected a digit, but found ?\n" ~ 1929 "<unknown>:1,7\ndirective started here: <unknown>:1,1"); 1930 } 1931 1932 @safe unittest 1933 { 1934 import dyaml.loader : Loader; 1935 1936 const str = `%TAG !a!<`; 1937 1938 const exc = collectException!LoaderException(Loader.fromString(str).load()); 1939 assert(exc); 1940 assert(exc.message() == 1941 "Unable to load <unknown>: While scanning a directive handle, expected ' ', but found <\n" ~ 1942 "<unknown>:1,9\ndirective started here: <unknown>:1,1"); 1943 } 1944 1945 @safe unittest 1946 { 1947 import dyaml.loader : Loader; 1948 1949 const str = `%TAG !a! !>`; 1950 1951 const exc = collectException!LoaderException(Loader.fromString(str).load()); 1952 assert(exc); 1953 assert(exc.message() == 1954 "Unable to load <unknown>: While scanning a directive prefix, expected ' ', but found >\n" ~ 1955 "<unknown>:1,11\ndirective started here: <unknown>:1,1"); 1956 } 1957 1958 @safe unittest 1959 { 1960 import dyaml.loader : Loader; 1961 1962 const str = `%YAML 1.0 ?`; 1963 1964 const exc = collectException!LoaderException(Loader.fromString(str).load()); 1965 assert(exc); 1966 assert(exc.message() == 1967 "Unable to load <unknown>: While scanning a directive, expected a comment or a line break, but found ?\n" ~ 1968 "<unknown>:1,11\ndirective started here: <unknown>:1,1"); 1969 } 1970 1971 @safe unittest 1972 { 1973 import dyaml.loader : Loader; 1974 1975 const str = `foo: !<a#`; 1976 1977 const exc = collectException!LoaderException(Loader.fromString(str).load()); 1978 assert(exc); 1979 assert(exc.message() == 1980 "Unable to load <unknown>: While scanning a tag, expected a '>', but found #\n" ~ 1981 "<unknown>:1,9\ntag started here: <unknown>:1,6"); 1982 } 1983 1984 @safe unittest 1985 { 1986 import dyaml.loader : Loader; 1987 1988 const str = `foo: !<a>#`; 1989 1990 const exc = collectException!LoaderException(Loader.fromString(str).load()); 1991 assert(exc); 1992 assert(exc.message() == 1993 "Unable to load <unknown>: While scanning a tag, expected a ' ', but found #\n" ~ 1994 "<unknown>:1,10\ntag started here: <unknown>:1,6"); 1995 } 1996 1997 @safe unittest 1998 { 1999 import dyaml.loader : Loader; 2000 2001 const str = `foo: !<#`; 2002 2003 const exc = collectException!LoaderException(Loader.fromString(str).load()); 2004 assert(exc); 2005 assert(exc.message() == 2006 "Unable to load <unknown>: While parsing a tag, expected a URI, but found #\n" ~ 2007 "<unknown>:1,8\ntag started here: <unknown>:1,6"); 2008 } 2009 2010 @safe unittest 2011 { 2012 import dyaml.loader : Loader; 2013 2014 const str = `foo: |b`; 2015 2016 const exc = collectException!LoaderException(Loader.fromString(str).load()); 2017 assert(exc); 2018 assert(exc.message() == 2019 "Unable to load <unknown>: While scanning a block scalar, expected a chomping or indentation indicator, but found b\n" ~ 2020 "<unknown>:1,7\nscalar started here: <unknown>:1,6"); 2021 } 2022 2023 @safe unittest 2024 { 2025 import dyaml.loader : Loader; 2026 2027 const str = `foo: |0`; 2028 2029 const exc = collectException!LoaderException(Loader.fromString(str).load()); 2030 assert(exc); 2031 assert(exc.message() == 2032 "Unable to load <unknown>: While scanning a block scalar, expected an indentation indicator in range 1-9, but found 0\n" ~ 2033 "<unknown>:1,7\nscalar started here: <unknown>:1,6"); 2034 } 2035 2036 @safe unittest 2037 { 2038 import dyaml.loader : Loader; 2039 2040 const str = `"\x"`; 2041 2042 const exc = collectException!LoaderException(Loader.fromString(str).load()); 2043 assert(exc); 2044 assert(exc.message() == 2045 "Unable to load <unknown>: While scanning a double quoted scalar, expected an escape sequence of hexadecimal numbers, but found \"\n" ~ 2046 "<unknown>:1,4\nscalar started here: <unknown>:1,1"); 2047 } 2048 2049 @safe unittest 2050 { 2051 import dyaml.loader : Loader; 2052 2053 const str = `"\:"`; 2054 2055 const exc = collectException!LoaderException(Loader.fromString(str).load()); 2056 assert(exc); 2057 assert(exc.message() == 2058 "Unable to load <unknown>: While scanning a double quoted scalar, found unsupported escape character :\n" ~ 2059 "<unknown>:1,3\nscalar started here: <unknown>:1,1"); 2060 } 2061 2062 @safe unittest 2063 { 2064 import dyaml.loader : Loader; 2065 2066 const str = `"an unfinished scal`; 2067 2068 const exc = collectException!LoaderException(Loader.fromString(str).load()); 2069 assert(exc); 2070 assert(exc.message() == 2071 "Unable to load <unknown>: While scanning a quoted scalar, found unexpected end of buffer\n" ~ 2072 "<unknown>:1,20\nscalar started here: <unknown>:1,1"); 2073 } 2074 2075 @safe unittest 2076 { 2077 import dyaml.loader : Loader; 2078 2079 const str = `"an unfinished scal 2080 ---`; 2081 2082 const exc = collectException!LoaderException(Loader.fromString(str).load()); 2083 assert(exc); 2084 assert(exc.message() == 2085 "Unable to load <unknown>: While scanning a quoted scalar, found unexpected document separator\n" ~ 2086 "<unknown>:2,1\nscalar started here: <unknown>:1,1"); 2087 } 2088 2089 @safe unittest 2090 { 2091 import dyaml.loader : Loader; 2092 2093 const str = `Error: !a:!`; 2094 2095 const exc = collectException!LoaderException(Loader.fromString(str).load()); 2096 assert(exc); 2097 assert(exc.message() == 2098 "Unable to load <unknown>: While scanning a tag, expected a !, but found :\n" ~ 2099 "<unknown>:1,10\ntag started here: <unknown>:1,8"); 2100 } 2101 2102 @safe unittest 2103 { 2104 import dyaml.loader : Loader; 2105 2106 const str = `Error: !e!tag%:)`; 2107 2108 const exc = collectException!LoaderException(Loader.fromString(str).load()); 2109 assert(exc); 2110 assert(exc.message() == 2111 "Unable to load <unknown>: While scanning a tag, expected a URI escape sequence of 2 hexadecimal numbers, but found :)\n" ~ 2112 "<unknown>:1,15\ntag started here: <unknown>:1,8"); 2113 } 2114 2115 @safe unittest 2116 { 2117 import dyaml.loader : Loader; 2118 2119 const str = `Error: !e!tag%99%99`; 2120 2121 const exc = collectException!LoaderException(Loader.fromString(str).load()); 2122 assert(exc); 2123 assert(exc.message() == 2124 "Unable to load <unknown>: While scanning a tag, found invalid UTF-8 data encoded in URI escape sequence\n" ~ 2125 "<unknown>:1,20\ntag started here: <unknown>:1,8"); 2126 } 2127 2128 private void insert(ref char[] slice, const dchar c, const size_t position) @safe pure 2129 in(position <= slice.length, text("Trying to insert after the end of the slice (", position, " > ", slice.length, ")")) 2130 { 2131 const point = position; 2132 const movedLength = slice.length - point; 2133 2134 // Encode c into UTF-8 2135 char[4] encodeBuf; 2136 if(c < 0x80) { encodeBuf[0] = cast(char)c; } 2137 const size_t bytes = c < 0x80 ? 1 : encode(encodeBuf, c); 2138 2139 slice.length += bytes; 2140 if(movedLength > 0) 2141 { 2142 copy(slice[point..point + movedLength * char.sizeof], 2143 slice[point + bytes .. point + bytes + movedLength * char.sizeof]); 2144 } 2145 slice[point .. point + bytes] = encodeBuf[0 .. bytes]; 2146 }