dyaml.scanner source code

1 
2 //          Copyright Ferdinand Majerech 2011-2014.
3 // Distributed under the Boost Software License, Version 1.0.
4 //    (See accompanying file LICENSE_1_0.txt or copy at
5 //          http://www.boost.org/LICENSE_1_0.txt)
6 
7 /// YAML scanner.
8 /// Code based on PyYAML: http://www.pyyaml.org
9 module dyaml.scanner;
10 
11 
12 import core.stdc.string;
13 
14 import std.algorithm;
15 import std.array;
16 import std.conv;
17 import std.ascii : isAlphaNum, isDigit, isHexDigit;
18 import std.exception;
19 import std.string;
20 import std.typecons;
21 import std.traits : Unqual;
22 import std.utf;
23 
24 import dyaml.escapes;
25 import dyaml.exception;
26 import dyaml.queue;
27 import dyaml.reader;
28 import dyaml.style;
29 import dyaml.token;
30 
31 package:
32 /// Scanner produces tokens of the following types:
33 /// STREAM-START
34 /// STREAM-END
35 /// DIRECTIVE(name, value)
36 /// DOCUMENT-START
37 /// DOCUMENT-END
38 /// BLOCK-SEQUENCE-START
39 /// BLOCK-MAPPING-START
40 /// BLOCK-END
41 /// FLOW-SEQUENCE-START
42 /// FLOW-MAPPING-START
43 /// FLOW-SEQUENCE-END
44 /// FLOW-MAPPING-END
45 /// BLOCK-ENTRY
46 /// FLOW-ENTRY
47 /// KEY
48 /// VALUE
49 /// ALIAS(value)
50 /// ANCHOR(value)
51 /// TAG(value)
52 /// SCALAR(value, plain, style)
53 
54 alias isBreak = among!('\0', '\n', '\r', '\u0085', '\u2028', '\u2029');
55 
56 alias isBreakOrSpace = among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029');
57 
58 alias isWhiteSpace = among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029');
59 
60 alias isNonLinebreakWhitespace = among!(' ', '\t');
61 
62 alias isNonScalarStartCharacter = among!('-', '?', ':', ',', '[', ']', '{', '}',
63     '#', '&', '*', '!', '|', '>', '\'', '"', '%', '@', '`', ' ', '\t', '\0', '\n',
64     '\r', '\u0085', '\u2028', '\u2029');
65 
66 alias isURIChar = among!('-', ';', '/', '?', ':', '@', '&', '=', '+', '$', ',',
67     '_', '.', '!', '~', '*', '\'', '(', ')', '[', ']', '%');
68 
69 alias isNSChar = among!(' ', '\n', '\r', '\u0085', '\u2028', '\u2029');
70 
71 alias isBChar = among!('\n', '\r', '\u0085', '\u2028', '\u2029');
72 
73 alias isFlowScalarBreakSpace = among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029', '\'', '"', '\\');
74 
75 alias isNSAnchorName = c => !c.isWhiteSpace && !c.among!('[', ']', '{', '}', ',', '\uFEFF');
76 
77 /// Generates tokens from data provided by a Reader.
78 struct Scanner
79 {
80     private:
81         /// A simple key is a key that is not denoted by the '?' indicator.
82         /// For example:
83         ///   ---
84         ///   block simple key: value
85         ///   ? not a simple key:
86         ///   : { flow simple key: value }
87         /// We emit the KEY token before all keys, so when we find a potential simple
88         /// key, we try to locate the corresponding ':' indicator. Simple keys should be
89         /// limited to a single line and 1024 characters.
90         static struct SimpleKey
91         {
92             /// Position of the key
93             Mark mark;
94             /// Index of the key token from start (first token scanned being 0).
95             uint tokenIndex;
96             /// Is this required to be a simple key?
97             bool required;
98             /// Is this struct "null" (invalid)?.
99             bool isNull;
100         }
101 
102         /// Block chomping types.
103         enum Chomping
104         {
105             /// Strip all trailing line breaks. '-' indicator.
106             strip,
107             /// Line break of the last line is preserved, others discarded. Default.
108             clip,
109             /// All trailing line breaks are preserved. '+' indicator.
110             keep
111         }
112 
113         /// Reader used to read from a file/stream.
114         Reader reader_;
115         /// Are we done scanning?
116         bool done_;
117 
118         /// Level of nesting in flow context. If 0, we're in block context.
119         uint flowLevel_;
120         /// Current indentation level.
121         int indent_ = -1;
122         /// Past indentation levels. Used as a stack.
123         Appender!(int[]) indents_;
124 
125         /// Processed tokens not yet emitted. Used as a queue.
126         Queue!Token tokens_;
127 
128         /// Number of tokens emitted through the getToken method.
129         uint tokensTaken_;
130 
131         /// Can a simple key start at the current position? A simple key may start:
132         /// - at the beginning of the line, not counting indentation spaces
133         ///       (in block context),
134         /// - after '{', '[', ',' (in the flow context),
135         /// - after '?', ':', '-' (in the block context).
136         /// In the block context, this flag also signifies if a block collection
137         /// may start at the current position.
138         bool allowSimpleKey_ = true;
139 
140         /// Possible simple keys indexed by flow levels.
141         SimpleKey[] possibleSimpleKeys_;
142 
143     public:
144         /// Construct a Scanner using specified Reader.
145         this(Reader reader) @safe nothrow
146         {
147             // Return the next token, but do not delete it from the queue
148             reader_   = reader;
149             fetchStreamStart();
150         }
151 
152         /// Advance to the next token
153         void popFront() @safe
154         {
155             ++tokensTaken_;
156             tokens_.pop();
157         }
158 
159         /// Return the current token
160         const(Token) front() @safe
161         {
162             enforce(!empty, "No token left to peek");
163             return tokens_.peek();
164         }
165 
166         /// Return whether there are any more tokens left.
167         bool empty() @safe
168         {
169             while (needMoreTokens())
170             {
171                 fetchToken();
172             }
173             return tokens_.empty;
174         }
175 
176         /// Set file name.
177         ref inout(string) name() inout @safe return pure nothrow @nogc
178         {
179             return reader_.name;
180         }
181         /// Get a mark from the current reader position
182         Mark mark() const @safe pure nothrow @nogc
183         {
184             return reader_.mark;
185         }
186 
187     private:
188         /// Most scanning error messages have the same format; so build them with this
189         /// function.
190         string expected(T)(string expected, T found)
191         {
192             return text(expected, ", but found ", found);
193         }
194 
195         /// Determine whether or not we need to fetch more tokens before peeking/getting a token.
196         bool needMoreTokens() @safe pure
197         {
198             if(done_)         { return false; }
199             if(tokens_.empty) { return true; }
200 
201             /// The current token may be a potential simple key, so we need to look further.
202             stalePossibleSimpleKeys();
203             return nextPossibleSimpleKey() == tokensTaken_;
204         }
205 
206         /// Fetch at token, adding it to tokens_.
207         void fetchToken() @safe
208         {
209             // Eat whitespaces and comments until we reach the next token.
210             scanToNextToken();
211 
212             // Remove obsolete possible simple keys.
213             stalePossibleSimpleKeys();
214 
215             // Compare current indentation and column. It may add some tokens
216             // and decrease the current indentation level.
217             unwindIndent(reader_.column);
218 
219             // Get the next character.
220             const dchar c = reader_.peekByte();
221 
222             // Fetch the token.
223             if(c == '\0')            { return fetchStreamEnd();     }
224             if(checkDirective())     { return fetchDirective();     }
225             if(checkDocumentStart()) { return fetchDocumentStart(); }
226             if(checkDocumentEnd())   { return fetchDocumentEnd();   }
227             // Order of the following checks is NOT significant.
228             switch(c)
229             {
230                 case '[':  return fetchFlowSequenceStart();
231                 case '{':  return fetchFlowMappingStart();
232                 case ']':  return fetchFlowSequenceEnd();
233                 case '}':  return fetchFlowMappingEnd();
234                 case ',':  return fetchFlowEntry();
235                 case '!':  return fetchTag();
236                 case '\'': return fetchSingle();
237                 case '\"': return fetchDouble();
238                 case '*':  return fetchAlias();
239                 case '&':  return fetchAnchor();
240                 case '?':  if(checkKey())        { return fetchKey();        } goto default;
241                 case ':':  if(checkValue())      { return fetchValue();      } goto default;
242                 case '-':  if(checkBlockEntry()) { return fetchBlockEntry(); } goto default;
243                 case '|':  if(flowLevel_ == 0)   { return fetchLiteral();    } break;
244                 case '>':  if(flowLevel_ == 0)   { return fetchFolded();     } break;
245                 default:   if(checkPlain())      { return fetchPlain();      }
246             }
247 
248             throw new ScannerException("While scanning for the next token, found character " ~
249                                        "\'%s\', index %s that cannot start any token"
250                                        .format(c, to!int(c)), reader_.mark);
251         }
252 
253 
254         /// Return the token number of the nearest possible simple key.
255         uint nextPossibleSimpleKey() @safe pure nothrow @nogc
256         {
257             uint minTokenNumber = uint.max;
258             foreach(k, ref simpleKey; possibleSimpleKeys_)
259             {
260                 if(simpleKey.isNull) { continue; }
261                 minTokenNumber = min(minTokenNumber, simpleKey.tokenIndex);
262             }
263             return minTokenNumber;
264         }
265 
266         /// Remove entries that are no longer possible simple keys.
267         ///
268         /// According to the YAML specification, simple keys
269         /// - should be limited to a single line,
270         /// - should be no longer than 1024 characters.
271         /// Disabling this will allow simple keys of any length and
272         /// height (may cause problems if indentation is broken though).
273         void stalePossibleSimpleKeys() @safe pure
274         {
275             foreach(level, ref key; possibleSimpleKeys_)
276             {
277                 if(key.isNull) { continue; }
278                 if(key.mark.line != reader_.mark.line || reader_.mark.column - key.mark.column > 1024)
279                 {
280                     enforce(!key.required,
281                         new ScannerException("While scanning a simple key, could not find expected ':'",
282                             reader_.mark, "key started here", key.mark));
283                     key.isNull = true;
284                 }
285             }
286         }
287 
288         /// Check if the next token starts a possible simple key and if so, save its position.
289         ///
290         /// This function is called for ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
291         void savePossibleSimpleKey() @safe pure
292         {
293             // Check if a simple key is required at the current position.
294             const required = (flowLevel_ == 0 && indent_ == reader_.column);
295             assert(allowSimpleKey_ || !required, "A simple key is required only if it is " ~
296                    "the first token in the current line. Therefore it is always allowed.");
297 
298             if(!allowSimpleKey_) { return; }
299 
300             // The next token might be a simple key, so save its number and position.
301             removePossibleSimpleKey();
302             const tokenCount = tokensTaken_ + cast(uint)tokens_.length;
303 
304             const line = reader_.line;
305             const column = reader_.column;
306             const key = SimpleKey(reader_.mark, tokenCount, required);
307 
308             if(possibleSimpleKeys_.length <= flowLevel_)
309             {
310                 const oldLength = possibleSimpleKeys_.length;
311                 possibleSimpleKeys_.length = flowLevel_ + 1;
312                 // Make sure all the empty keys are null
313                 foreach (ref emptyKey; possibleSimpleKeys_[oldLength .. flowLevel_])
314                 {
315                     emptyKey.isNull = true;
316                 }
317             }
318             possibleSimpleKeys_[flowLevel_] = key;
319         }
320 
321         /// Remove the saved possible key position at the current flow level.
322         void removePossibleSimpleKey() @safe pure
323         {
324             if(possibleSimpleKeys_.length <= flowLevel_) { return; }
325 
326             if(!possibleSimpleKeys_[flowLevel_].isNull)
327             {
328                 const key = possibleSimpleKeys_[flowLevel_];
329                 enforce(!key.required,
330                     new ScannerException("While scanning a simple key, could not find expected ':'",
331                          reader_.mark, "key started here", key.mark));
332                 possibleSimpleKeys_[flowLevel_].isNull = true;
333             }
334         }
335 
336         /// Decrease indentation, removing entries in indents_.
337         ///
338         /// Params:  column = Current column in the file/stream.
339         void unwindIndent(const int column) @safe
340         {
341             if(flowLevel_ > 0)
342             {
343                 // In flow context, tokens should respect indentation.
344                 // The condition should be `indent >= column` according to the spec.
345                 // But this condition will prohibit intuitively correct
346                 // constructions such as
347                 // key : {
348                 // }
349 
350                 // In the flow context, indentation is ignored. We make the scanner less
351                 // restrictive than what the specification requires.
352                 // if(pedantic_ && flowLevel_ > 0 && indent_ > column)
353                 // {
354                 //     throw new ScannerException("Invalid intendation or unclosed '[' or '{'",
355                 //                                reader_.mark)
356                 // }
357                 return;
358             }
359 
360             // In block context, we may need to issue the BLOCK-END tokens.
361             while(indent_ > column)
362             {
363                 indent_ = indents_.data.back;
364                 assert(indents_.data.length);
365                 indents_.shrinkTo(indents_.data.length - 1);
366                 tokens_.push(blockEndToken(reader_.mark, reader_.mark));
367             }
368         }
369 
370         /// Increase indentation if needed.
371         ///
372         /// Params:  column = Current column in the file/stream.
373         ///
374         /// Returns: true if the indentation was increased, false otherwise.
375         bool addIndent(int column) @safe
376         {
377             if(indent_ >= column){return false;}
378             indents_ ~= indent_;
379             indent_ = column;
380             return true;
381         }
382 
383 
384         /// Add STREAM-START token.
385         void fetchStreamStart() @safe nothrow
386         {
387             tokens_.push(streamStartToken(reader_.mark, reader_.mark, reader_.encoding));
388         }
389 
390         ///Add STREAM-END token.
391         void fetchStreamEnd() @safe
392         {
393             //Set intendation to -1 .
394             unwindIndent(-1);
395             removePossibleSimpleKey();
396             allowSimpleKey_ = false;
397             possibleSimpleKeys_.destroy;
398 
399             tokens_.push(streamEndToken(reader_.mark, reader_.mark));
400             done_ = true;
401         }
402 
403         /// Add DIRECTIVE token.
404         void fetchDirective() @safe
405         {
406             // Set intendation to -1 .
407             unwindIndent(-1);
408             // Reset simple keys.
409             removePossibleSimpleKey();
410             allowSimpleKey_ = false;
411 
412             auto directive = scanDirective();
413             tokens_.push(directive);
414         }
415 
416         /// Add DOCUMENT-START or DOCUMENT-END token.
417         void fetchDocumentIndicator(TokenID id)()
418             if(id == TokenID.documentStart || id == TokenID.documentEnd)
419         {
420             // Set indentation to -1 .
421             unwindIndent(-1);
422             // Reset simple keys. Note that there can't be a block collection after '---'.
423             removePossibleSimpleKey();
424             allowSimpleKey_ = false;
425 
426             Mark startMark = reader_.mark;
427             reader_.forward(3);
428             tokens_.push(simpleToken!id(startMark, reader_.mark));
429         }
430 
431         /// Aliases to add DOCUMENT-START or DOCUMENT-END token.
432         alias fetchDocumentStart = fetchDocumentIndicator!(TokenID.documentStart);
433         alias fetchDocumentEnd = fetchDocumentIndicator!(TokenID.documentEnd);
434 
435         /// Add FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
436         void fetchFlowCollectionStart(TokenID id)() @safe
437         {
438             // '[' and '{' may start a simple key.
439             savePossibleSimpleKey();
440             // Simple keys are allowed after '[' and '{'.
441             allowSimpleKey_ = true;
442             ++flowLevel_;
443 
444             Mark startMark = reader_.mark;
445             reader_.forward();
446             tokens_.push(simpleToken!id(startMark, reader_.mark));
447         }
448 
449         /// Aliases to add FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
450         alias fetchFlowSequenceStart = fetchFlowCollectionStart!(TokenID.flowSequenceStart);
451         alias fetchFlowMappingStart = fetchFlowCollectionStart!(TokenID.flowMappingStart);
452 
453         /// Add FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
454         void fetchFlowCollectionEnd(TokenID id)()
455         {
456             // Reset possible simple key on the current level.
457             removePossibleSimpleKey();
458             // No simple keys after ']' and '}'.
459             allowSimpleKey_ = false;
460             --flowLevel_;
461 
462             Mark startMark = reader_.mark;
463             reader_.forward();
464             tokens_.push(simpleToken!id(startMark, reader_.mark));
465         }
466 
467         /// Aliases to add FLOW-SEQUENCE-START or FLOW-MAPPING-START token/
468         alias fetchFlowSequenceEnd = fetchFlowCollectionEnd!(TokenID.flowSequenceEnd);
469         alias fetchFlowMappingEnd = fetchFlowCollectionEnd!(TokenID.flowMappingEnd);
470 
471         /// Add FLOW-ENTRY token;
472         void fetchFlowEntry() @safe
473         {
474             // Reset possible simple key on the current level.
475             removePossibleSimpleKey();
476             // Simple keys are allowed after ','.
477             allowSimpleKey_ = true;
478 
479             Mark startMark = reader_.mark;
480             reader_.forward();
481             tokens_.push(flowEntryToken(startMark, reader_.mark));
482         }
483 
484         /// Additional checks used in block context in fetchBlockEntry and fetchKey.
485         ///
486         /// Params:  type = String representing the token type we might need to add.
487         ///          id   = Token type we might need to add.
488         void blockChecks(string type, TokenID id)()
489         {
490             enum context = type ~ " keys are not allowed here";
491             // Are we allowed to start a key (not neccesarily a simple one)?
492             enforce(allowSimpleKey_, new ScannerException(context, reader_.mark));
493 
494             if(addIndent(reader_.column))
495             {
496                 tokens_.push(simpleToken!id(reader_.mark, reader_.mark));
497             }
498         }
499 
500         /// Add BLOCK-ENTRY token. Might add BLOCK-SEQUENCE-START in the process.
501         void fetchBlockEntry() @safe
502         {
503             if(flowLevel_ == 0) { blockChecks!("Sequence", TokenID.blockSequenceStart)(); }
504 
505             // It's an error for the block entry to occur in the flow context,
506             // but we let the parser detect this.
507 
508             // Reset possible simple key on the current level.
509             removePossibleSimpleKey();
510             // Simple keys are allowed after '-'.
511             allowSimpleKey_ = true;
512 
513             Mark startMark = reader_.mark;
514             reader_.forward();
515             tokens_.push(blockEntryToken(startMark, reader_.mark));
516         }
517 
518         /// Add KEY token. Might add BLOCK-MAPPING-START in the process.
519         void fetchKey() @safe
520         {
521             if(flowLevel_ == 0) { blockChecks!("Mapping", TokenID.blockMappingStart)(); }
522 
523             // Reset possible simple key on the current level.
524             removePossibleSimpleKey();
525             // Simple keys are allowed after '?' in the block context.
526             allowSimpleKey_ = (flowLevel_ == 0);
527 
528             Mark startMark = reader_.mark;
529             reader_.forward();
530             tokens_.push(keyToken(startMark, reader_.mark));
531         }
532 
533         /// Add VALUE token. Might add KEY and/or BLOCK-MAPPING-START in the process.
534         void fetchValue() @safe
535         {
536             //Do we determine a simple key?
537             if(possibleSimpleKeys_.length > flowLevel_ &&
538                !possibleSimpleKeys_[flowLevel_].isNull)
539             {
540                 const key = possibleSimpleKeys_[flowLevel_];
541                 assert(key.tokenIndex >= tokensTaken_);
542 
543                 possibleSimpleKeys_[flowLevel_].isNull = true;
544                 Mark keyMark = key.mark;
545                 const idx = key.tokenIndex - tokensTaken_;
546 
547                 // Add KEY.
548                 // Manually inserting since tokens are immutable (need linked list).
549                 tokens_.insert(keyToken(keyMark, keyMark), idx);
550 
551                 // If this key starts a new block mapping, we need to add BLOCK-MAPPING-START.
552                 if(flowLevel_ == 0 && addIndent(key.mark.column))
553                 {
554                     tokens_.insert(blockMappingStartToken(keyMark, keyMark), idx);
555                 }
556 
557                 // There cannot be two simple keys in a row.
558                 allowSimpleKey_ = false;
559             }
560             // Part of a complex key
561             else
562             {
563                 // We can start a complex value if and only if we can start a simple key.
564                 enforce(flowLevel_ > 0 || allowSimpleKey_,
565                         new ScannerException("Mapping values are not allowed here", reader_.mark));
566 
567                 // If this value starts a new block mapping, we need to add
568                 // BLOCK-MAPPING-START. It'll be detected as an error later by the parser.
569                 if(flowLevel_ == 0 && addIndent(reader_.column))
570                 {
571                     tokens_.push(blockMappingStartToken(reader_.mark, reader_.mark));
572                 }
573 
574                 // Reset possible simple key on the current level.
575                 removePossibleSimpleKey();
576                 // Simple keys are allowed after ':' in the block context.
577                 allowSimpleKey_ = (flowLevel_ == 0);
578             }
579 
580             // Add VALUE.
581             Mark startMark = reader_.mark;
582             reader_.forward();
583             tokens_.push(valueToken(startMark, reader_.mark));
584         }
585 
586         /// Add ALIAS or ANCHOR token.
587         void fetchAnchor_(TokenID id)() @safe
588             if(id == TokenID.alias_ || id == TokenID.anchor)
589         {
590             // ALIAS/ANCHOR could be a simple key.
591             savePossibleSimpleKey();
592             // No simple keys after ALIAS/ANCHOR.
593             allowSimpleKey_ = false;
594 
595             auto anchor = scanAnchor(id);
596             tokens_.push(anchor);
597         }
598 
599         /// Aliases to add ALIAS or ANCHOR token.
600         alias fetchAlias = fetchAnchor_!(TokenID.alias_);
601         alias fetchAnchor = fetchAnchor_!(TokenID.anchor);
602 
603         /// Add TAG token.
604         void fetchTag() @safe
605         {
606             //TAG could start a simple key.
607             savePossibleSimpleKey();
608             //No simple keys after TAG.
609             allowSimpleKey_ = false;
610 
611             tokens_.push(scanTag());
612         }
613 
614         /// Add block SCALAR token.
615         void fetchBlockScalar(ScalarStyle style)() @safe
616             if(style == ScalarStyle.literal || style == ScalarStyle.folded)
617         {
618             // Reset possible simple key on the current level.
619             removePossibleSimpleKey();
620             // A simple key may follow a block scalar.
621             allowSimpleKey_ = true;
622 
623             auto blockScalar = scanBlockScalar(style);
624             tokens_.push(blockScalar);
625         }
626 
627         /// Aliases to add literal or folded block scalar.
628         alias fetchLiteral = fetchBlockScalar!(ScalarStyle.literal);
629         alias fetchFolded = fetchBlockScalar!(ScalarStyle.folded);
630 
631         /// Add quoted flow SCALAR token.
632         void fetchFlowScalar(ScalarStyle quotes)()
633         {
634             // A flow scalar could be a simple key.
635             savePossibleSimpleKey();
636             // No simple keys after flow scalars.
637             allowSimpleKey_ = false;
638 
639             // Scan and add SCALAR.
640             auto scalar = scanFlowScalar(quotes);
641             tokens_.push(scalar);
642         }
643 
644         /// Aliases to add single or double quoted block scalar.
645         alias fetchSingle = fetchFlowScalar!(ScalarStyle.singleQuoted);
646         alias fetchDouble = fetchFlowScalar!(ScalarStyle.doubleQuoted);
647 
648         /// Add plain SCALAR token.
649         void fetchPlain() @safe
650         {
651             // A plain scalar could be a simple key
652             savePossibleSimpleKey();
653             // No simple keys after plain scalars. But note that scanPlain() will
654             // change this flag if the scan is finished at the beginning of the line.
655             allowSimpleKey_ = false;
656             auto plain = scanPlain();
657 
658             // Scan and add SCALAR. May change allowSimpleKey_
659             tokens_.push(plain);
660         }
661 
662     pure:
663 
664         ///Check if the next token is DIRECTIVE:        ^ '%' ...
665         bool checkDirective() @safe
666         {
667             return reader_.peekByte() == '%' && reader_.column == 0;
668         }
669 
670         /// Check if the next token is DOCUMENT-START:   ^ '---' (' '|'\n')
671         bool checkDocumentStart() @safe
672         {
673             // Check one char first, then all 3, to prevent reading outside the buffer.
674             return reader_.column     == 0     &&
675                    reader_.peekByte() == '-'   &&
676                    reader_.prefix(3)  == "---" &&
677                    reader_.peek(3).isWhiteSpace;
678         }
679 
680         /// Check if the next token is DOCUMENT-END:     ^ '...' (' '|'\n')
681         bool checkDocumentEnd() @safe
682         {
683             // Check one char first, then all 3, to prevent reading outside the buffer.
684             return reader_.column     == 0     &&
685                    reader_.peekByte() == '.'   &&
686                    reader_.prefix(3)  == "..." &&
687                    reader_.peek(3).isWhiteSpace;
688         }
689 
690         /// Check if the next token is BLOCK-ENTRY:      '-' (' '|'\n')
691         bool checkBlockEntry() @safe
692         {
693             return !!reader_.peek(1).isWhiteSpace;
694         }
695 
696         /// Check if the next token is KEY(flow context):    '?'
697         ///
698         /// or KEY(block context):   '?' (' '|'\n')
699         bool checkKey() @safe
700         {
701             return (flowLevel_ > 0 || reader_.peek(1).isWhiteSpace);
702         }
703 
704         /// Check if the next token is VALUE(flow context):  ':'
705         ///
706         /// or VALUE(block context): ':' (' '|'\n')
707         bool checkValue() @safe
708         {
709             return flowLevel_ > 0 || reader_.peek(1).isWhiteSpace;
710         }
711 
712         /// Check if the next token is a plain scalar.
713         ///
714         /// A plain scalar may start with any non-space character except:
715         ///   '-', '?', ':', ',', '[', ']', '{', '}',
716         ///   '#', '&', '*', '!', '|', '>', '\'', '\"',
717         ///   '%', '@', '`'.
718         ///
719         /// It may also start with
720         ///   '-', '?', ':'
721         /// if it is followed by a non-space character.
722         ///
723         /// Note that we limit the last rule to the block context (except the
724         /// '-' character) because we want the flow context to be space
725         /// independent.
726         bool checkPlain() @safe
727         {
728             const c = reader_.peek();
729             if(!c.isNonScalarStartCharacter)
730             {
731                 return true;
732             }
733             return !reader_.peek(1).isWhiteSpace &&
734                    (c == '-' || (flowLevel_ == 0 && (c == '?' || c == ':')));
735         }
736 
737         /// Move to the next non-space character.
738         void findNextNonSpace() @safe
739         {
740             while(reader_.peekByte() == ' ') { reader_.forward(); }
741         }
742 
743         /// Scan a string of alphanumeric or "-_" characters.
744         ///
745         /// Assumes that the caller is building a slice in Reader, and puts the scanned
746         /// characters into that slice.
747         void scanAlphaNumericToSlice(string name)(ref char[] slice, const Mark startMark)
748         {
749             size_t length;
750             dchar c = reader_.peek();
751             while(c.isAlphaNum || c.among!('-', '_')) { c = reader_.peek(++length); }
752 
753             enforce(length > 0, new ScannerException(expected("While scanning a " ~ name ~ ", expected alphanumeric, '-' or '_'", c),
754                 reader_.mark, name~" started here", startMark));
755 
756             slice ~= reader_.get(length);
757         }
758 
759         /// Scan a string.
760         ///
761         /// Assumes that the caller is building a slice in Reader, and puts the scanned
762         /// characters into that slice.
763         char[] readAnchorAlias(const Mark startMark) @safe
764         {
765             size_t length;
766             dchar c = reader_.peek();
767             while (c.isNSAnchorName)
768             {
769                 c = reader_.peek(++length);
770             }
771 
772             enforce(length > 0, new ScannerException(
773                 expected("While scanning an anchor or alias, expected a printable character besides '[', ']', '{', '}' and ','", c),
774                 reader_.mark, "started here", startMark));
775 
776             return reader_.get(length);
777         }
778 
779         /// Scan and throw away all characters until next line break.
780         void scanToNextBreak() @safe
781         {
782             while(!reader_.peek().isBreak) { reader_.forward(); }
783         }
784 
785         /// Scan all characters until next line break.
786         ///
787         /// Assumes that the caller is building a slice in Reader, and puts the scanned
788         /// characters into that slice.
789         void scanToNextBreakToSlice(ref char[] slice) @safe
790         {
791             uint length;
792             while(!reader_.peek(length).isBreak)
793             {
794                 ++length;
795             }
796             slice ~= reader_.get(length);
797         }
798 
799 
800         /// Move to next token in the file/stream.
801         ///
802         /// We ignore spaces, line breaks and comments.
803         /// If we find a line break in the block context, we set
804         /// allowSimpleKey` on.
805         ///
806         /// We do not yet support BOM inside the stream as the
807         /// specification requires. Any such mark will be considered as a part
808         /// of the document.
809         void scanToNextToken() @safe
810         {
811             // TODO(PyYAML): We need to make tab handling rules more sane. A good rule is:
812             //   Tabs cannot precede tokens
813             //   BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END,
814             //   KEY(block), VALUE(block), BLOCK-ENTRY
815             // So the checking code is
816             //   if <TAB>:
817             //       allowSimpleKey_ = false
818             // We also need to add the check for `allowSimpleKey_ == true` to
819             // `unwindIndent` before issuing BLOCK-END.
820             // Scanners for block, flow, and plain scalars need to be modified.
821 
822             for(;;)
823             {
824                 //All whitespace in flow context is ignored, even whitespace
825                 // not allowed in other contexts
826                 if (flowLevel_ > 0)
827                 {
828                     while(reader_.peekByte().isNonLinebreakWhitespace) { reader_.forward(); }
829                 }
830                 else
831                 {
832                     findNextNonSpace();
833                 }
834                 if(reader_.peekByte() == '#') { scanToNextBreak(); }
835                 if(scanLineBreak() != '\0')
836                 {
837                     if(flowLevel_ == 0) { allowSimpleKey_ = true; }
838                 }
839                 else
840                 {
841                     break;
842                 }
843             }
844         }
845 
846         /// Scan directive token.
847         Token scanDirective() @safe
848         {
849             Mark startMark = reader_.mark;
850             // Skip the '%'.
851             reader_.forward();
852 
853             // Scan directive name
854             char[] name;
855             scanDirectiveNameToSlice(name, startMark);
856 
857             char[] value;
858 
859             // Index where tag handle ends and suffix starts in a tag directive value.
860             uint tagHandleEnd = uint.max;
861             if(name == "YAML")     { scanYAMLDirectiveValueToSlice(value, startMark); }
862             else if(name == "TAG") { tagHandleEnd = scanTagDirectiveValueToSlice(value, startMark); }
863 
864             Mark endMark = reader_.mark;
865 
866             DirectiveType directive;
867             if(name == "YAML")     { directive = DirectiveType.yaml; }
868             else if(name == "TAG") { directive = DirectiveType.tag; }
869             else
870             {
871                 directive = DirectiveType.reserved;
872                 scanToNextBreak();
873             }
874 
875             scanDirectiveIgnoredLine(startMark);
876 
877             return directiveToken(startMark, endMark, value, directive, tagHandleEnd);
878         }
879 
880         /// Scan name of a directive token.
881         ///
882         /// Assumes that the caller is building a slice in Reader, and puts the scanned
883         /// characters into that slice.
884         void scanDirectiveNameToSlice(ref char[] slice, const Mark startMark) @safe
885         {
886             // Scan directive name.
887             scanAlphaNumericToSlice!"directive"(slice, startMark);
888 
889             enforce(reader_.peek().among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'),
890                 new ScannerException(expected("While scanning a directive, expected alphanumeric, '-' or '_'", reader_.peek()),
891                     reader_.mark, "directive started here", startMark));
892         }
893 
894         /// Scan value of a YAML directive token. Returns major, minor version separated by '.'.
895         ///
896         /// Assumes that the caller is building a slice in Reader, and puts the scanned
897         /// characters into that slice.
898         void scanYAMLDirectiveValueToSlice(ref char[] slice, const Mark startMark) @safe
899         {
900             findNextNonSpace();
901 
902             scanYAMLDirectiveNumberToSlice(slice, startMark);
903 
904             enforce(reader_.peekByte() == '.',
905                 new ScannerException(expected("While scanning a directive, expected digit or '.'", reader_.peek()),
906                     reader_.mark, "directive started here", startMark));
907             // Skip the '.'.
908             reader_.forward();
909 
910             slice ~= '.';
911             scanYAMLDirectiveNumberToSlice(slice, startMark);
912 
913             enforce(reader_.peek().among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'),
914                 new ScannerException(expected("While scanning a directive, expected digit or '.'", reader_.peek()),
915                     reader_.mark, "directive started here", startMark));
916         }
917 
918         /// Scan a number from a YAML directive.
919         ///
920         /// Assumes that the caller is building a slice in Reader, and puts the scanned
921         /// characters into that slice.
922         void scanYAMLDirectiveNumberToSlice(ref char[] slice, const Mark startMark) @safe
923         {
924             enforce(isDigit(reader_.peek()),
925                 new ScannerException(expected("While scanning a directive, expected a digit", reader_.peek()),
926                     reader_.mark, "directive started here", startMark));
927 
928             // Already found the first digit in the enforce(), so set length to 1.
929             uint length = 1;
930             while(reader_.peek(length).isDigit) { ++length; }
931 
932             slice ~= reader_.get(length);
933         }
934 
935         /// Scan value of a tag directive.
936         ///
937         /// Assumes that the caller is building a slice in Reader, and puts the scanned
938         /// characters into that slice.
939         ///
940         /// Returns: Length of tag handle (which is before tag prefix) in scanned data
941         uint scanTagDirectiveValueToSlice(ref char[] slice, const Mark startMark) @safe
942         {
943             findNextNonSpace();
944             const startLength = slice.length;
945             scanTagDirectiveHandleToSlice(slice, startMark);
946             const handleLength = cast(uint)(slice.length  - startLength);
947             findNextNonSpace();
948             scanTagDirectivePrefixToSlice(slice, startMark);
949 
950             return handleLength;
951         }
952 
953         /// Scan handle of a tag directive.
954         ///
955         /// Assumes that the caller is building a slice in Reader, and puts the scanned
956         /// characters into that slice.
957         void scanTagDirectiveHandleToSlice(ref char[] slice, const Mark startMark) @safe
958         {
959             scanTagHandleToSlice!"directive"(slice, startMark);
960             enforce(reader_.peekByte() == ' ',
961                 new ScannerException(expected("While scanning a directive handle, expected ' '", reader_.peek()),
962                     reader_.mark, "directive started here", startMark));
963         }
964 
965         /// Scan prefix of a tag directive.
966         ///
967         /// Assumes that the caller is building a slice in Reader, and puts the scanned
968         /// characters into that slice.
969         void scanTagDirectivePrefixToSlice(ref char[] slice, const Mark startMark) @safe
970         {
971             scanTagURIToSlice!"directive"(slice, startMark);
972             enforce(reader_.peek().among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'),
973                 new ScannerException(expected("While scanning a directive prefix, expected ' '", reader_.peek()),
974                     reader_.mark, "directive started here", startMark));
975         }
976 
977         /// Scan (and ignore) ignored line after a directive.
978         void scanDirectiveIgnoredLine(const Mark startMark) @safe
979         {
980             findNextNonSpace();
981             if(reader_.peekByte() == '#') { scanToNextBreak(); }
982             enforce(reader_.peek().isBreak,
983                 new ScannerException(expected("While scanning a directive, expected a comment or a line break", reader_.peek()),
984                     reader_.mark, "directive started here", startMark));
985             scanLineBreak();
986         }
987 
988 
989         /// Scan an alias or an anchor.
990         ///
991         /// The specification does not restrict characters for anchors and
992         /// aliases. This may lead to problems, for instance, the document:
993         ///   [ *alias, value ]
994         /// can be interpteted in two ways, as
995         ///   [ "value" ]
996         /// and
997         ///   [ *alias , "value" ]
998         /// Therefore we restrict aliases to ASCII alphanumeric characters.
999         Token scanAnchor(const TokenID id) @safe
1000         {
1001             const startMark = reader_.mark;
1002             reader_.forward(); // The */& character was only peeked, so we drop it now
1003 
1004             char[] value = readAnchorAlias(startMark);
1005 
1006             assert(!reader_.peek().isNSAnchorName, "Anchor/alias name not fully scanned");
1007 
1008             if(id == TokenID.alias_)
1009             {
1010                 return aliasToken(startMark, reader_.mark, value);
1011             }
1012             if(id == TokenID.anchor)
1013             {
1014                 return anchorToken(startMark, reader_.mark, value);
1015             }
1016             assert(false, "This code should never be reached");
1017         }
1018 
1019         /// Scan a tag token.
1020         Token scanTag() @safe
1021         {
1022             const startMark = reader_.mark;
1023             dchar c = reader_.peek(1);
1024 
1025             char[] slice;
1026             // Index where tag handle ends and tag suffix starts in the tag value
1027             // (slice) we will produce.
1028             uint handleEnd;
1029 
1030             if(c == '<')
1031             {
1032                 reader_.forward(2);
1033 
1034                 handleEnd = 0;
1035                 scanTagURIToSlice!"tag"(slice, startMark);
1036                 enforce(reader_.peekByte() == '>',
1037                     new ScannerException(expected("While scanning a tag, expected a '>'", reader_.peek()),
1038                         reader_.mark, "tag started here", startMark));
1039                 reader_.forward();
1040             }
1041             else if(c.isWhiteSpace)
1042             {
1043                 reader_.forward();
1044                 handleEnd = 0;
1045                 slice ~= '!';
1046             }
1047             else
1048             {
1049                 uint length = 1;
1050                 bool useHandle;
1051 
1052                 while(!c.isBreakOrSpace)
1053                 {
1054                     if(c == '!')
1055                     {
1056                         useHandle = true;
1057                         break;
1058                     }
1059                     ++length;
1060                     c = reader_.peek(length);
1061                 }
1062 
1063                 if(useHandle)
1064                 {
1065                     scanTagHandleToSlice!"tag"(slice, startMark);
1066                     handleEnd = cast(uint)slice.length;
1067                 }
1068                 else
1069                 {
1070                     reader_.forward();
1071                     slice ~= '!';
1072                     handleEnd = cast(uint)slice.length;
1073                 }
1074 
1075                 scanTagURIToSlice!"tag"(slice, startMark);
1076             }
1077 
1078             enforce(reader_.peek().isBreakOrSpace,
1079                 new ScannerException(expected("While scanning a tag, expected a ' '", reader_.peek()),
1080                     reader_.mark, "tag started here", startMark));
1081 
1082             return tagToken(startMark, reader_.mark, slice, handleEnd);
1083         }
1084 
1085         /// Scan a block scalar token with specified style.
1086         Token scanBlockScalar(const ScalarStyle style) @safe
1087         {
1088             const startMark = reader_.mark;
1089 
1090             // Scan the header.
1091             reader_.forward();
1092 
1093             const indicators = scanBlockScalarIndicators(startMark);
1094 
1095             const chomping   = indicators[0];
1096             const increment  = indicators[1];
1097             scanBlockScalarIgnoredLine(startMark);
1098 
1099             // Determine the indentation level and go to the first non-empty line.
1100             Mark endMark;
1101             uint indent = max(1, indent_ + 1);
1102 
1103             char[] slice;
1104             // Used to strip the last line breaks written to the slice at the end of the
1105             // scalar, which may be needed based on chomping.
1106             char[] newBreakSlice;
1107             // Read the first indentation/line breaks before the scalar.
1108             size_t startLen = newBreakSlice.length;
1109             if(increment == int.min)
1110             {
1111                 auto indentation = scanBlockScalarIndentationToSlice(newBreakSlice);
1112                 endMark = indentation[1];
1113                 indent  = max(indent, indentation[0]);
1114             }
1115             else
1116             {
1117                 indent += increment - 1;
1118                 endMark = scanBlockScalarBreaksToSlice(newBreakSlice, indent);
1119             }
1120 
1121             // int.max means there's no line break (int.max is outside UTF-32).
1122             dchar lineBreak = cast(dchar)int.max;
1123 
1124             // Scan the inner part of the block scalar.
1125             while(reader_.column == indent && reader_.peekByte() != '\0')
1126             {
1127                 slice ~= newBreakSlice;
1128                 const bool leadingNonSpace = !reader_.peekByte().among!(' ', '\t');
1129                 // This is where the 'interesting' non-whitespace data gets read.
1130                 scanToNextBreakToSlice(slice);
1131                 lineBreak = scanLineBreak();
1132 
1133 
1134                 // This transaction serves to rollback data read in the
1135                 // scanBlockScalarBreaksToSlice() call.
1136                 newBreakSlice = [];
1137                 startLen = slice.length;
1138                 // The line breaks should actually be written _after_ the if() block
1139                 // below. We work around that by inserting
1140                 endMark = scanBlockScalarBreaksToSlice(newBreakSlice, indent);
1141 
1142                 // This will not run during the last iteration
1143                 if(reader_.column == indent && reader_.peekByte() != '\0')
1144                 {
1145                     // Unfortunately, folding rules are ambiguous.
1146 
1147                     // This is the folding according to the specification:
1148                     if(style == ScalarStyle.folded && lineBreak == '\n' &&
1149                        leadingNonSpace && !reader_.peekByte().among!(' ', '\t'))
1150                     {
1151                         // No breaks were scanned; no need to insert the space in the
1152                         // middle of slice.
1153                         if(startLen == slice.length + newBreakSlice.length)
1154                         {
1155                             newBreakSlice ~= ' ';
1156                         }
1157                     }
1158                     else
1159                     {
1160                         // We need to insert in the middle of the slice in case any line
1161                         // breaks were scanned.
1162                         newBreakSlice.insert(lineBreak, 0);
1163                     }
1164 
1165                     ////this is Clark Evans's interpretation (also in the spec
1166                     ////examples):
1167                     //
1168                     //if(style == ScalarStyle.folded && lineBreak == '\n')
1169                     //{
1170                     //    if(startLen == endLen)
1171                     //    {
1172                     //        if(!" \t"d.canFind(reader_.peekByte()))
1173                     //        {
1174                     //            reader_.sliceBuilder.write(' ');
1175                     //        }
1176                     //        else
1177                     //        {
1178                     //            chunks ~= lineBreak;
1179                     //        }
1180                     //    }
1181                     //}
1182                     //else
1183                     //{
1184                     //    reader_.sliceBuilder.insertBack(lineBreak, endLen - startLen);
1185                     //}
1186                 }
1187                 else
1188                 {
1189                     break;
1190                 }
1191             }
1192 
1193             // If chompint is Keep, we keep (commit) the last scanned line breaks
1194             // (which are at the end of the scalar). Otherwise re remove them (end the
1195             // transaction).
1196             if(chomping == Chomping.keep)  { slice ~= newBreakSlice; }
1197             if(chomping != Chomping.strip && lineBreak != int.max)
1198             {
1199                 // If chomping is Keep, we keep the line break but the first line break
1200                 // that isn't stripped (since chomping isn't Strip in this branch) must
1201                 // be inserted _before_ the other line breaks.
1202                 if(chomping == Chomping.keep)
1203                 {
1204                     slice.insert(lineBreak, startLen);
1205                 }
1206                 // If chomping is not Keep, discard the line break
1207                 else
1208                 {
1209                     if (lineBreak != '\0')
1210                     {
1211                         slice ~= lineBreak;
1212                     }
1213                 }
1214             }
1215 
1216             return scalarToken(startMark, endMark, slice, style);
1217         }
1218 
1219         /// Scan chomping and indentation indicators of a scalar token.
1220         Tuple!(Chomping, int) scanBlockScalarIndicators(const Mark startMark) @safe
1221         {
1222             auto chomping = Chomping.clip;
1223             int increment = int.min;
1224             dchar c       = reader_.peek();
1225 
1226             /// Indicators can be in any order.
1227             if(getChomping(c, chomping))
1228             {
1229                 getIncrement(c, increment, startMark);
1230             }
1231             else
1232             {
1233                 const gotIncrement = getIncrement(c, increment, startMark);
1234                 if(gotIncrement) { getChomping(c, chomping); }
1235             }
1236 
1237             enforce(c.among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'),
1238                 new ScannerException(expected("While scanning a block scalar, expected a chomping or indentation indicator", c),
1239                     reader_.mark, "scalar started here", startMark));
1240 
1241             return tuple(chomping, increment);
1242         }
1243 
1244         /// Get chomping indicator, if detected. Return false otherwise.
1245         ///
1246         /// Used in scanBlockScalarIndicators.
1247         ///
1248         /// Params:
1249         ///
1250         /// c        = The character that may be a chomping indicator.
1251         /// chomping = Write the chomping value here, if detected.
1252         bool getChomping(ref dchar c, ref Chomping chomping) @safe
1253         {
1254             if(!c.among!('+', '-')) { return false; }
1255             chomping = c == '+' ? Chomping.keep : Chomping.strip;
1256             reader_.forward();
1257             c = reader_.peek();
1258             return true;
1259         }
1260 
1261         /// Get increment indicator, if detected. Return false otherwise.
1262         ///
1263         /// Used in scanBlockScalarIndicators.
1264         ///
1265         /// Params:
1266         ///
1267         /// c         = The character that may be an increment indicator.
1268         ///             If an increment indicator is detected, this will be updated to
1269         ///             the next character in the Reader.
1270         /// increment = Write the increment value here, if detected.
1271         /// startMark = Mark for error messages.
1272         bool getIncrement(ref dchar c, ref int increment, const Mark startMark) @safe
1273         {
1274             if(!c.isDigit) { return false; }
1275             // Convert a digit to integer.
1276             increment = c - '0';
1277             assert(increment < 10 && increment >= 0, "Digit has invalid value");
1278 
1279             enforce(increment > 0,
1280                 new ScannerException(expected("While scanning a block scalar, expected an indentation indicator in range 1-9", "0"),
1281                     reader_.mark, "scalar started here", startMark));
1282 
1283             reader_.forward();
1284             c = reader_.peek();
1285             return true;
1286         }
1287 
1288         /// Scan (and ignore) ignored line in a block scalar.
1289         void scanBlockScalarIgnoredLine(const Mark startMark) @safe
1290         {
1291             findNextNonSpace();
1292             if(reader_.peekByte()== '#') { scanToNextBreak(); }
1293 
1294             enforce(reader_.peek().isBreak,
1295                 new ScannerException(expected("While scanning a block scalar, expected a comment or line break", reader_.peek()),
1296                     reader_.mark, "scalar started here", startMark));
1297 
1298             scanLineBreak();
1299         }
1300 
1301         /// Scan indentation in a block scalar, returning line breaks, max indent and end mark.
1302         ///
1303         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1304         /// characters into that slice.
1305         Tuple!(uint, Mark) scanBlockScalarIndentationToSlice(ref char[] slice) @safe
1306         {
1307             uint maxIndent;
1308             Mark endMark = reader_.mark;
1309 
1310             while(reader_.peek().among!(' ', '\n', '\r', '\u0085', '\u2028', '\u2029'))
1311             {
1312                 if(reader_.peekByte() != ' ')
1313                 {
1314                     slice ~= scanLineBreak();
1315                     endMark = reader_.mark;
1316                     continue;
1317                 }
1318                 reader_.forward();
1319                 maxIndent = max(reader_.column, maxIndent);
1320             }
1321 
1322             return tuple(maxIndent, endMark);
1323         }
1324 
1325         /// Scan line breaks at lower or specified indentation in a block scalar.
1326         ///
1327         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1328         /// characters into that slice.
1329         Mark scanBlockScalarBreaksToSlice(ref char[] slice, const uint indent) @safe
1330         {
1331             Mark endMark = reader_.mark;
1332 
1333             for(;;)
1334             {
1335                 while(reader_.column < indent && reader_.peekByte() == ' ') { reader_.forward(); }
1336                 if(!reader_.peek().among!('\n', '\r', '\u0085', '\u2028', '\u2029'))  { break; }
1337                 slice ~= scanLineBreak();
1338                 endMark = reader_.mark;
1339             }
1340 
1341             return endMark;
1342         }
1343 
1344         /// Scan a qouted flow scalar token with specified quotes.
1345         Token scanFlowScalar(const ScalarStyle quotes) @safe
1346         {
1347             const startMark = reader_.mark;
1348             const quote     = reader_.get();
1349 
1350             char[] slice;
1351 
1352             scanFlowScalarNonSpacesToSlice(slice, quotes, startMark);
1353 
1354             while(reader_.peek() != quote)
1355             {
1356                 scanFlowScalarSpacesToSlice(slice, startMark);
1357                 scanFlowScalarNonSpacesToSlice(slice, quotes, startMark);
1358             }
1359             reader_.forward();
1360 
1361             return scalarToken(startMark, reader_.mark, slice, quotes);
1362         }
1363 
1364         /// Scan nonspace characters in a flow scalar.
1365         ///
1366         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1367         /// characters into that slice.
1368         void scanFlowScalarNonSpacesToSlice(ref char[] slice, const ScalarStyle quotes, const Mark startMark)
1369             @safe
1370         {
1371             for(;;)
1372             {
1373                 dchar c = reader_.peek();
1374 
1375                 size_t numCodePoints;
1376                 while(!reader_.peek(numCodePoints).isFlowScalarBreakSpace) { ++numCodePoints; }
1377 
1378                 if (numCodePoints > 0) { slice ~= reader_.get(numCodePoints); }
1379 
1380                 c = reader_.peek();
1381                 if(quotes == ScalarStyle.singleQuoted && c == '\'' && reader_.peek(1) == '\'')
1382                 {
1383                     reader_.forward(2);
1384                     slice ~= '\'';
1385                 }
1386                 else if((quotes == ScalarStyle.doubleQuoted && c == '\'') ||
1387                         (quotes == ScalarStyle.singleQuoted && c.among!('"', '\\')))
1388                 {
1389                     reader_.forward();
1390                     slice ~= c;
1391                 }
1392                 else if(quotes == ScalarStyle.doubleQuoted && c == '\\')
1393                 {
1394                     reader_.forward();
1395                     c = reader_.peek();
1396                     if(c.among!(escapes))
1397                     {
1398                         reader_.forward();
1399                         // Escaping has been moved to Parser as it can't be done in
1400                         // place (in a slice) in case of '\P' and '\L' (very uncommon,
1401                         // but we don't want to break the spec)
1402                         char[2] escapeSequence = ['\\', cast(char)c];
1403                         slice ~= escapeSequence;
1404                     }
1405                     else if(c.among!(escapeHexCodeList))
1406                     {
1407                         const hexLength = dyaml.escapes.escapeHexLength(c);
1408                         reader_.forward();
1409 
1410                         foreach(i; 0 .. hexLength) {
1411                             enforce(reader_.peek(i).isHexDigit,
1412                                 new ScannerException(expected("While scanning a double quoted scalar, expected an escape sequence of hexadecimal numbers", reader_.peek(i)),
1413                                     reader_.mark, "scalar started here", startMark));
1414                         }
1415                         char[] hex = reader_.get(hexLength);
1416 
1417                         assert((hex.length > 0) && (hex.length <= 8), "Hex escape overflow");
1418 
1419                         char[2] escapeStart = ['\\', cast(char) c];
1420                         slice ~= escapeStart;
1421                         slice ~= hex;
1422 
1423                     }
1424                     else if(c.among!('\n', '\r', '\u0085', '\u2028', '\u2029'))
1425                     {
1426                         scanLineBreak();
1427                         scanFlowScalarBreaksToSlice(slice, startMark);
1428                     }
1429                     else
1430                     {
1431                         throw new ScannerException(text("While scanning a double quoted scalar, found unsupported escape character ", c),
1432                             reader_.mark, "scalar started here", startMark);
1433                     }
1434                 }
1435                 else { return; }
1436             }
1437         }
1438 
1439         /// Scan space characters in a flow scalar.
1440         ///
1441         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1442         /// spaces into that slice.
1443         void scanFlowScalarSpacesToSlice(ref char[] slice, const Mark startMark) @safe
1444         {
1445             // Increase length as long as we see whitespace.
1446             size_t length;
1447             while(reader_.peekByte(length).among!(' ', '\t')) { ++length; }
1448             auto whitespaces = reader_.prefixBytes(length);
1449 
1450             // Can check the last byte without striding because '\0' is ASCII
1451             const c = reader_.peek(length);
1452             enforce(c != '\0',
1453                 new ScannerException("While scanning a quoted scalar, found unexpected end of buffer",
1454                     reader_.mark, "scalar started here", startMark));
1455 
1456             // Spaces not followed by a line break.
1457             if(!c.among!('\n', '\r', '\u0085', '\u2028', '\u2029'))
1458             {
1459                 reader_.forward(length);
1460                 slice ~= whitespaces;
1461                 return;
1462             }
1463 
1464             // There's a line break after the spaces.
1465             reader_.forward(length);
1466             const lineBreak = scanLineBreak();
1467 
1468             if(lineBreak != '\n') { slice ~= lineBreak; }
1469 
1470             // If we have extra line breaks after the first, scan them into the
1471             // slice.
1472             const bool extraBreaks = scanFlowScalarBreaksToSlice(slice, startMark);
1473 
1474             // No extra breaks, one normal line break. Replace it with a space.
1475             if(lineBreak == '\n' && !extraBreaks) { slice ~= ' '; }
1476         }
1477 
1478         /// Scan line breaks in a flow scalar.
1479         ///
1480         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1481         /// line breaks into that slice.
1482         bool scanFlowScalarBreaksToSlice(ref char[] slice, const Mark startMark) @safe
1483         {
1484             // True if at least one line break was found.
1485             bool anyBreaks;
1486             for(;;)
1487             {
1488                 // Instead of checking indentation, we check for document separators.
1489                 const prefix = reader_.prefix(3);
1490                 enforce(!(prefix == "---" || prefix == "...") ||
1491                     !reader_.peek(3).isWhiteSpace,
1492                     new ScannerException("While scanning a quoted scalar, found unexpected document separator",
1493                         reader_.mark, "scalar started here", startMark));
1494 
1495                 // Skip any whitespaces.
1496                 while(reader_.peekByte().among!(' ', '\t')) { reader_.forward(); }
1497 
1498                 // Encountered a non-whitespace non-linebreak character, so we're done.
1499                 if(!reader_.peek().among!(' ', '\n', '\r', '\u0085', '\u2028', '\u2029')) { break; }
1500 
1501                 const lineBreak = scanLineBreak();
1502                 anyBreaks = true;
1503                 slice ~= lineBreak;
1504             }
1505             return anyBreaks;
1506         }
1507 
1508         /// Scan plain scalar token (no block, no quotes).
1509         Token scanPlain() @safe
1510         {
1511             // We keep track of the allowSimpleKey_ flag here.
1512             // Indentation rules are loosed for the flow context
1513             const startMark = reader_.mark;
1514             Mark endMark = startMark;
1515             const indent = indent_ + 1;
1516 
1517             // We allow zero indentation for scalars, but then we need to check for
1518             // document separators at the beginning of the line.
1519             // if(indent == 0) { indent = 1; }
1520 
1521             char[] slice;
1522 
1523             char[] newSpacesSlice;
1524             // Stop at a comment.
1525             while(reader_.peekByte() != '#')
1526             {
1527                 // Scan the entire plain scalar.
1528                 size_t length;
1529                 dchar c = reader_.peek(length);
1530                 for(;;)
1531                 {
1532                     const cNext = reader_.peek(length + 1);
1533                     if(c.isWhiteSpace ||
1534                        (flowLevel_ == 0 && c == ':' && cNext.isWhiteSpace) ||
1535                        (flowLevel_ > 0 && c == ':' && (cNext.isWhiteSpace || cNext.among!(',', '[', ']', '{', '}'))) ||
1536                        (flowLevel_ > 0 && c.among!(',', '[', ']', '{', '}')))
1537                     {
1538                         break;
1539                     }
1540                     ++length;
1541                     c = cNext;
1542                 }
1543 
1544                 if(length == 0) { break; }
1545 
1546                 allowSimpleKey_ = false;
1547 
1548                 newSpacesSlice ~= reader_.get(length);
1549 
1550                 endMark = reader_.mark;
1551 
1552                 slice ~= newSpacesSlice;
1553                 newSpacesSlice = [];
1554 
1555                 const startLength = slice.length;
1556                 scanPlainSpacesToSlice(newSpacesSlice);
1557                 if(startLength == slice.length + newSpacesSlice.length ||
1558                    (flowLevel_ == 0 && reader_.column < indent))
1559                 {
1560                     break;
1561                 }
1562             }
1563 
1564             return scalarToken(startMark, endMark, slice, ScalarStyle.plain);
1565         }
1566 
1567         /// Scan spaces in a plain scalar.
1568         ///
1569         /// Assumes that the caller is building a slice in Reader, and puts the spaces
1570         /// into that slice.
1571         void scanPlainSpacesToSlice(ref char[] slice) @trusted
1572         {
1573             // The specification is really confusing about tabs in plain scalars.
1574             // We just forbid them completely. Do not use tabs in YAML!
1575 
1576             // Get as many plain spaces as there are.
1577             size_t length;
1578             while(reader_.peekByte(length) == ' ') { ++length; }
1579             char[] whitespaces = reader_.prefixBytes(length);
1580             reader_.forward(length);
1581 
1582             const dchar c = reader_.peek();
1583             if(!c.isNSChar)
1584             {
1585                 // We have spaces, but no newline.
1586                 if(whitespaces.length > 0) { slice ~= whitespaces; }
1587                 return;
1588             }
1589 
1590             // Newline after the spaces (if any)
1591             const lineBreak = scanLineBreak();
1592             allowSimpleKey_ = true;
1593 
1594             static bool end(Reader reader_) @safe pure
1595             {
1596                 const prefix = reader_.prefix(3);
1597                 return ("---" == prefix || "..." == prefix)
1598                         && reader_.peek(3).among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029');
1599             }
1600 
1601             if(end(reader_)) { return; }
1602 
1603             bool extraBreaks;
1604 
1605             char[] newSlice;
1606             if(lineBreak != '\n') { newSlice ~= lineBreak; }
1607             while(reader_.peek().isNSChar)
1608             {
1609                 if(reader_.peekByte() == ' ') { reader_.forward(); }
1610                 else
1611                 {
1612                     const lBreak = scanLineBreak();
1613                     extraBreaks  = true;
1614                     newSlice ~= lBreak;
1615 
1616                     if(end(reader_)) { return; }
1617                 }
1618             }
1619             slice ~= newSlice;
1620 
1621             // No line breaks, only a space.
1622             if(lineBreak == '\n' && !extraBreaks) { slice ~= ' '; }
1623         }
1624 
1625         /// Scan handle of a tag token.
1626         ///
1627         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1628         /// characters into that slice.
1629         void scanTagHandleToSlice(string name)(ref char[] slice, const Mark startMark)
1630         {
1631             dchar c = reader_.peek();
1632             enum contextMsg = "While scanning a " ~ name ~ ", expected a !";
1633             // should this be an assert?
1634             enforce(c == '!',
1635                 new ScannerException(expected(contextMsg, c), reader_.mark, "tag started here", startMark));
1636 
1637             uint length = 1;
1638             c = reader_.peek(length);
1639             if(c != ' ')
1640             {
1641                 while(c.isAlphaNum || c.among!('-', '_'))
1642                 {
1643                     ++length;
1644                     c = reader_.peek(length);
1645                 }
1646                 enforce(c == '!',
1647                     new ScannerException(expected(contextMsg, c), reader_.mark(length), "tag started here", startMark));
1648                 ++length;
1649             }
1650 
1651             slice ~= reader_.get(length);
1652         }
1653 
1654         /// Scan URI in a tag token.
1655         ///
1656         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1657         /// characters into that slice.
1658         void scanTagURIToSlice(string name)(ref char[] slice, const Mark startMark)
1659         {
1660             // Note: we do not check if URI is well-formed.
1661             dchar c = reader_.peek();
1662             const startLen = slice.length;
1663             {
1664                 uint length;
1665                 while(c.isAlphaNum || c.isURIChar)
1666                 {
1667                     if(c == '%')
1668                     {
1669                         auto chars = reader_.get(length);
1670                         slice ~= chars;
1671                         length = 0;
1672                         scanURIEscapesToSlice!name(slice, startMark);
1673                     }
1674                     else { ++length; }
1675                     c = reader_.peek(length);
1676                 }
1677                 if(length > 0)
1678                 {
1679                     auto chars = reader_.get(length);
1680                     slice ~= chars;
1681                     length = 0;
1682                 }
1683             }
1684             // OK if we scanned something, error otherwise.
1685             enum contextMsg = "While parsing a " ~ name ~ ", expected a URI";
1686             enforce(slice.length > startLen,
1687                 new ScannerException(expected(contextMsg, c), reader_.mark, "tag started here", startMark));
1688         }
1689 
1690         // Not @nogc yet because std.utf.decode is not @nogc
1691         /// Scan URI escape sequences.
1692         ///
1693         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1694         /// characters into that slice.
1695         void scanURIEscapesToSlice(string name)(ref char[] slice, const Mark startMark)
1696         {
1697             import core.exception : UnicodeException;
1698             // URI escapes encode a UTF-8 string. We store UTF-8 code units here for
1699             // decoding into UTF-32.
1700             Appender!string buffer;
1701 
1702 
1703             enum contextMsg = "While scanning a " ~ name;
1704             while(reader_.peekByte() == '%')
1705             {
1706                 reader_.forward();
1707                 char[2] nextByte = [reader_.peekByte(), reader_.peekByte(1)];
1708 
1709                 enforce(nextByte[0].isHexDigit && nextByte[1].isHexDigit,
1710                     new ScannerException(expected(contextMsg ~ ", expected a URI escape sequence of 2 hexadecimal numbers", nextByte),
1711                         reader_.mark, "tag started here", startMark));
1712 
1713                 buffer ~= nextByte[].to!ubyte(16);
1714 
1715                 reader_.forward(2);
1716             }
1717             try
1718             {
1719                 foreach (dchar chr; buffer.data)
1720                 {
1721                     slice ~= chr;
1722                 }
1723             }
1724             catch (UnicodeException)
1725             {
1726                 throw new ScannerException(contextMsg ~ ", found invalid UTF-8 data encoded in URI escape sequence",
1727                     reader_.mark, "tag started here", startMark);
1728             }
1729         }
1730 
1731 
1732         /// Scan a line break, if any.
1733         ///
1734         /// Transforms:
1735         ///   '\r\n'      :   '\n'
1736         ///   '\r'        :   '\n'
1737         ///   '\n'        :   '\n'
1738         ///   '\u0085'    :   '\n'
1739         ///   '\u2028'    :   '\u2028'
1740         ///   '\u2029     :   '\u2029'
1741         ///   no break    :   '\0'
1742         dchar scanLineBreak() @safe
1743         {
1744             // Fast path for ASCII line breaks.
1745             const b = reader_.peekByte();
1746             if(b < 0x80)
1747             {
1748                 if(b == '\n' || b == '\r')
1749                 {
1750                     if(reader_.prefix(2) == "\r\n") { reader_.forward(2); }
1751                     else { reader_.forward(); }
1752                     return '\n';
1753                 }
1754                 return '\0';
1755             }
1756 
1757             const c = reader_.peek();
1758             if(c == '\x85')
1759             {
1760                 reader_.forward();
1761                 return '\n';
1762             }
1763             if(c == '\u2028' || c == '\u2029')
1764             {
1765                 reader_.forward();
1766                 return c;
1767             }
1768             return '\0';
1769         }
1770 }
1771 
1772 // Issue 309 - https://github.com/dlang-community/D-YAML/issues/309
1773 @safe unittest
1774 {
1775     enum str = q"EOS
1776 exp: |
1777   foobar
1778 EOS".chomp;
1779 
1780     auto r = Reader(cast(ubyte[])str.dup);
1781     auto s = Scanner(r);
1782     auto elems = s.map!"a.value".filter!"a.length > 0".array;
1783     assert(elems[1] == "foobar");
1784 }
1785 
1786 @safe unittest
1787 {
1788     import dyaml.loader : Loader;
1789 
1790     const str = `test: key: value`;
1791 
1792     const exc = collectException!LoaderException(Loader.fromString(str).load());
1793     assert(exc);
1794     assert(exc.message() ==
1795        "Unable to load <unknown>: Mapping values are not allowed here\n" ~
1796        "<unknown>:1,10");
1797 }
1798 
1799 @safe unittest
1800 {
1801     import dyaml.loader : Loader;
1802 
1803     const str = `test: ? foo
1804       : bar`;
1805 
1806     const exc = collectException!LoaderException(Loader.fromString(str).load());
1807     assert(exc);
1808     assert(exc.message() ==
1809        "Unable to load <unknown>: Mapping keys are not allowed here\n" ~
1810        "<unknown>:1,7");
1811 }
1812 
1813 @safe unittest
1814 {
1815     import dyaml.loader : Loader;
1816 
1817     const str = `@`;
1818 
1819     const exc = collectException!LoaderException(Loader.fromString(str).load());
1820     assert(exc);
1821     assert(exc.message() ==
1822        "Unable to load <unknown>: While scanning for the next token, found character '@', index 64 that cannot start any token\n" ~
1823        "<unknown>:1,1");
1824 }
1825 
1826 @safe unittest
1827 {
1828     import dyaml.loader : Loader;
1829 
1830     const str = `foo: bar
1831 meh`;
1832 
1833     const exc = collectException!LoaderException(Loader.fromString(str).load());
1834     assert(exc);
1835     assert(exc.message() ==
1836        "Unable to load <unknown>: While scanning a simple key, could not find expected ':'\n" ~
1837        "<unknown>:2,4\nkey started here: <unknown>:2,1");
1838 }
1839 
1840 @safe unittest
1841 {
1842     import dyaml.loader : Loader;
1843 
1844     const str = `foo: &A bar
1845 *A ]`;
1846 
1847     const exc = collectException!LoaderException(Loader.fromString(str).load());
1848     assert(exc);
1849     assert(exc.message() ==
1850        "Unable to load <unknown>: While scanning a simple key, could not find expected ':'\n" ~
1851        "<unknown>:2,4\nkey started here: <unknown>:2,1");
1852 }
1853 
1854 @safe unittest
1855 {
1856     import dyaml.loader : Loader;
1857 
1858     const str = `foo: &[`;
1859 
1860     const exc = collectException!LoaderException(Loader.fromString(str).load());
1861     assert(exc);
1862     assert(exc.message() ==
1863        "Unable to load <unknown>: While scanning an anchor or alias, expected a printable character besides '[', ']', '{', '}' and ',', but found [\n" ~
1864        "<unknown>:1,7\nstarted here: <unknown>:1,6");
1865 }
1866 
1867 @safe unittest
1868 {
1869     import dyaml.loader : Loader;
1870 
1871     const str = `%?`;
1872 
1873     const exc = collectException!LoaderException(Loader.fromString(str).load());
1874     assert(exc);
1875     assert(exc.message() ==
1876        "Unable to load <unknown>: While scanning a directive, expected alphanumeric, '-' or '_', but found ?\n" ~
1877        "<unknown>:1,2\ndirective started here: <unknown>:1,1");
1878 }
1879 
1880 @safe unittest
1881 {
1882     import dyaml.loader : Loader;
1883 
1884     const str = `%b?`;
1885 
1886     const exc = collectException!LoaderException(Loader.fromString(str).load());
1887     assert(exc);
1888     assert(exc.message() ==
1889        "Unable to load <unknown>: While scanning a directive, expected alphanumeric, '-' or '_', but found ?\n" ~
1890        "<unknown>:1,3\ndirective started here: <unknown>:1,1");
1891 }
1892 
1893 @safe unittest
1894 {
1895     import dyaml.loader : Loader;
1896 
1897     const str = `%YAML 1?`;
1898 
1899     const exc = collectException!LoaderException(Loader.fromString(str).load());
1900     assert(exc);
1901     assert(exc.message() ==
1902        "Unable to load <unknown>: While scanning a directive, expected digit or '.', but found ?\n" ~
1903        "<unknown>:1,8\ndirective started here: <unknown>:1,1");
1904 }
1905 
1906 @safe unittest
1907 {
1908     import dyaml.loader : Loader;
1909 
1910     const str = `%YAML 1.1?`;
1911 
1912     const exc = collectException!LoaderException(Loader.fromString(str).load());
1913     assert(exc);
1914     assert(exc.message() ==
1915        "Unable to load <unknown>: While scanning a directive, expected digit or '.', but found ?\n" ~
1916        "<unknown>:1,10\ndirective started here: <unknown>:1,1");
1917 }
1918 
1919 @safe unittest
1920 {
1921     import dyaml.loader : Loader;
1922 
1923     const str = `%YAML ?`;
1924 
1925     const exc = collectException!LoaderException(Loader.fromString(str).load());
1926     assert(exc);
1927     assert(exc.message() ==
1928        "Unable to load <unknown>: While scanning a directive, expected a digit, but found ?\n" ~
1929        "<unknown>:1,7\ndirective started here: <unknown>:1,1");
1930 }
1931 
1932 @safe unittest
1933 {
1934     import dyaml.loader : Loader;
1935 
1936     const str = `%TAG !a!<`;
1937 
1938     const exc = collectException!LoaderException(Loader.fromString(str).load());
1939     assert(exc);
1940     assert(exc.message() ==
1941        "Unable to load <unknown>: While scanning a directive handle, expected ' ', but found <\n" ~
1942        "<unknown>:1,9\ndirective started here: <unknown>:1,1");
1943 }
1944 
1945 @safe unittest
1946 {
1947     import dyaml.loader : Loader;
1948 
1949     const str = `%TAG !a! !>`;
1950 
1951     const exc = collectException!LoaderException(Loader.fromString(str).load());
1952     assert(exc);
1953     assert(exc.message() ==
1954        "Unable to load <unknown>: While scanning a directive prefix, expected ' ', but found >\n" ~
1955        "<unknown>:1,11\ndirective started here: <unknown>:1,1");
1956 }
1957 
1958 @safe unittest
1959 {
1960     import dyaml.loader : Loader;
1961 
1962     const str = `%YAML 1.0 ?`;
1963 
1964     const exc = collectException!LoaderException(Loader.fromString(str).load());
1965     assert(exc);
1966     assert(exc.message() ==
1967        "Unable to load <unknown>: While scanning a directive, expected a comment or a line break, but found ?\n" ~
1968        "<unknown>:1,11\ndirective started here: <unknown>:1,1");
1969 }
1970 
1971 @safe unittest
1972 {
1973     import dyaml.loader : Loader;
1974 
1975     const str = `foo: !<a#`;
1976 
1977     const exc = collectException!LoaderException(Loader.fromString(str).load());
1978     assert(exc);
1979     assert(exc.message() ==
1980        "Unable to load <unknown>: While scanning a tag, expected a '>', but found #\n" ~
1981        "<unknown>:1,9\ntag started here: <unknown>:1,6");
1982 }
1983 
1984 @safe unittest
1985 {
1986     import dyaml.loader : Loader;
1987 
1988     const str = `foo: !<a>#`;
1989 
1990     const exc = collectException!LoaderException(Loader.fromString(str).load());
1991     assert(exc);
1992     assert(exc.message() ==
1993        "Unable to load <unknown>: While scanning a tag, expected a ' ', but found #\n" ~
1994        "<unknown>:1,10\ntag started here: <unknown>:1,6");
1995 }
1996 
1997 @safe unittest
1998 {
1999     import dyaml.loader : Loader;
2000 
2001     const str = `foo: !<#`;
2002 
2003     const exc = collectException!LoaderException(Loader.fromString(str).load());
2004     assert(exc);
2005     assert(exc.message() ==
2006        "Unable to load <unknown>: While parsing a tag, expected a URI, but found #\n" ~
2007        "<unknown>:1,8\ntag started here: <unknown>:1,6");
2008 }
2009 
2010 @safe unittest
2011 {
2012     import dyaml.loader : Loader;
2013 
2014     const str = `foo: |b`;
2015 
2016     const exc = collectException!LoaderException(Loader.fromString(str).load());
2017     assert(exc);
2018     assert(exc.message() ==
2019        "Unable to load <unknown>: While scanning a block scalar, expected a chomping or indentation indicator, but found b\n" ~
2020        "<unknown>:1,7\nscalar started here: <unknown>:1,6");
2021 }
2022 
2023 @safe unittest
2024 {
2025     import dyaml.loader : Loader;
2026 
2027     const str = `foo: |0`;
2028 
2029     const exc = collectException!LoaderException(Loader.fromString(str).load());
2030     assert(exc);
2031     assert(exc.message() ==
2032        "Unable to load <unknown>: While scanning a block scalar, expected an indentation indicator in range 1-9, but found 0\n" ~
2033        "<unknown>:1,7\nscalar started here: <unknown>:1,6");
2034 }
2035 
2036 @safe unittest
2037 {
2038     import dyaml.loader : Loader;
2039 
2040     const str = `"\x"`;
2041 
2042     const exc = collectException!LoaderException(Loader.fromString(str).load());
2043     assert(exc);
2044     assert(exc.message() ==
2045        "Unable to load <unknown>: While scanning a double quoted scalar, expected an escape sequence of hexadecimal numbers, but found \"\n" ~
2046        "<unknown>:1,4\nscalar started here: <unknown>:1,1");
2047 }
2048 
2049 @safe unittest
2050 {
2051     import dyaml.loader : Loader;
2052 
2053     const str = `"\:"`;
2054 
2055     const exc = collectException!LoaderException(Loader.fromString(str).load());
2056     assert(exc);
2057     assert(exc.message() ==
2058        "Unable to load <unknown>: While scanning a double quoted scalar, found unsupported escape character :\n" ~
2059        "<unknown>:1,3\nscalar started here: <unknown>:1,1");
2060 }
2061 
2062 @safe unittest
2063 {
2064     import dyaml.loader : Loader;
2065 
2066     const str = `"an unfinished scal`;
2067 
2068     const exc = collectException!LoaderException(Loader.fromString(str).load());
2069     assert(exc);
2070     assert(exc.message() ==
2071        "Unable to load <unknown>: While scanning a quoted scalar, found unexpected end of buffer\n" ~
2072        "<unknown>:1,20\nscalar started here: <unknown>:1,1");
2073 }
2074 
2075 @safe unittest
2076 {
2077     import dyaml.loader : Loader;
2078 
2079     const str = `"an unfinished scal
2080 ---`;
2081 
2082     const exc = collectException!LoaderException(Loader.fromString(str).load());
2083     assert(exc);
2084     assert(exc.message() ==
2085        "Unable to load <unknown>: While scanning a quoted scalar, found unexpected document separator\n" ~
2086        "<unknown>:2,1\nscalar started here: <unknown>:1,1");
2087 }
2088 
2089 @safe unittest
2090 {
2091     import dyaml.loader : Loader;
2092 
2093     const str = `Error: !a:!`;
2094 
2095     const exc = collectException!LoaderException(Loader.fromString(str).load());
2096     assert(exc);
2097     assert(exc.message() ==
2098        "Unable to load <unknown>: While scanning a tag, expected a !, but found :\n" ~
2099        "<unknown>:1,10\ntag started here: <unknown>:1,8");
2100 }
2101 
2102 @safe unittest
2103 {
2104     import dyaml.loader : Loader;
2105 
2106     const str = `Error: !e!tag%:)`;
2107 
2108     const exc = collectException!LoaderException(Loader.fromString(str).load());
2109     assert(exc);
2110     assert(exc.message() ==
2111        "Unable to load <unknown>: While scanning a tag, expected a URI escape sequence of 2 hexadecimal numbers, but found :)\n" ~
2112        "<unknown>:1,15\ntag started here: <unknown>:1,8");
2113 }
2114 
2115 @safe unittest
2116 {
2117     import dyaml.loader : Loader;
2118 
2119     const str = `Error: !e!tag%99%99`;
2120 
2121     const exc = collectException!LoaderException(Loader.fromString(str).load());
2122     assert(exc);
2123     assert(exc.message() ==
2124        "Unable to load <unknown>: While scanning a tag, found invalid UTF-8 data encoded in URI escape sequence\n" ~
2125        "<unknown>:1,20\ntag started here: <unknown>:1,8");
2126 }
2127 
2128 private void insert(ref char[] slice, const dchar c, const size_t position) @safe pure
2129 in(position <= slice.length, text("Trying to insert after the end of the slice (", position, " > ", slice.length, ")"))
2130 {
2131     const point       = position;
2132     const movedLength = slice.length - point;
2133 
2134     // Encode c into UTF-8
2135     char[4] encodeBuf;
2136     if(c < 0x80) { encodeBuf[0] = cast(char)c; }
2137     const size_t bytes = c < 0x80 ? 1 : encode(encodeBuf, c);
2138 
2139     slice.length += bytes;
2140     if(movedLength > 0)
2141     {
2142         copy(slice[point..point + movedLength * char.sizeof],
2143                 slice[point + bytes .. point + bytes + movedLength * char.sizeof]);
2144     }
2145     slice[point .. point + bytes] = encodeBuf[0 .. bytes];
2146 }