dyaml.scanner source code

1 
2 //          Copyright Ferdinand Majerech 2011-2014.
3 // Distributed under the Boost Software License, Version 1.0.
4 //    (See accompanying file LICENSE_1_0.txt or copy at
5 //          http://www.boost.org/LICENSE_1_0.txt)
6 
7 /// YAML scanner.
8 /// Code based on PyYAML: http://www.pyyaml.org
9 module dyaml.scanner;
10 
11 
12 import core.stdc..string;
13 
14 import std.algorithm;
15 import std.array;
16 import std.conv;
17 import std.ascii : isAlphaNum, isDigit, isHexDigit;
18 import std.exception;
19 import std..string;
20 import std.typecons;
21 import std.traits : Unqual;
22 import std.utf;
23 
24 import dyaml.escapes;
25 import dyaml.exception;
26 import dyaml.queue;
27 import dyaml.reader;
28 import dyaml.style;
29 import dyaml.token;
30 
31 package:
32 /// Scanner produces tokens of the following types:
33 /// STREAM-START
34 /// STREAM-END
35 /// DIRECTIVE(name, value)
36 /// DOCUMENT-START
37 /// DOCUMENT-END
38 /// BLOCK-SEQUENCE-START
39 /// BLOCK-MAPPING-START
40 /// BLOCK-END
41 /// FLOW-SEQUENCE-START
42 /// FLOW-MAPPING-START
43 /// FLOW-SEQUENCE-END
44 /// FLOW-MAPPING-END
45 /// BLOCK-ENTRY
46 /// FLOW-ENTRY
47 /// KEY
48 /// VALUE
49 /// ALIAS(value)
50 /// ANCHOR(value)
51 /// TAG(value)
52 /// SCALAR(value, plain, style)
53 
54 alias isBreak = among!('\0', '\n', '\r', '\u0085', '\u2028', '\u2029');
55 
56 alias isBreakOrSpace = among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029');
57 
58 alias isWhiteSpace = among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029');
59 
60 alias isNonScalarStartCharacter = among!('-', '?', ':', ',', '[', ']', '{', '}',
61     '#', '&', '*', '!', '|', '>', '\'', '"', '%', '@', '`', ' ', '\t', '\0', '\n',
62     '\r', '\u0085', '\u2028', '\u2029');
63 
64 alias isURIChar = among!('-', ';', '/', '?', ':', '@', '&', '=', '+', '$', ',',
65     '_', '.', '!', '~', '*', '\'', '(', ')', '[', ']', '%');
66 
67 alias isNSChar = among!(' ', '\n', '\r', '\u0085', '\u2028', '\u2029');
68 
69 alias isBChar = among!('\n', '\r', '\u0085', '\u2028', '\u2029');
70 
71 alias isFlowScalarBreakSpace = among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029', '\'', '"', '\\');
72 
73 /// Marked exception thrown at scanner errors.
74 ///
75 /// See_Also: MarkedYAMLException
76 class ScannerException : MarkedYAMLException
77 {
78     mixin MarkedExceptionCtors;
79 }
80 
81 /// Generates tokens from data provided by a Reader.
82 struct Scanner
83 {
84     private:
85         /// A simple key is a key that is not denoted by the '?' indicator.
86         /// For example:
87         ///   ---
88         ///   block simple key: value
89         ///   ? not a simple key:
90         ///   : { flow simple key: value }
91         /// We emit the KEY token before all keys, so when we find a potential simple
92         /// key, we try to locate the corresponding ':' indicator. Simple keys should be
93         /// limited to a single line and 1024 characters.
94         ///
95         /// 16 bytes on 64-bit.
96         static struct SimpleKey
97         {
98             /// Character index in reader where the key starts.
99             uint charIndex = uint.max;
100             /// Index of the key token from start (first token scanned being 0).
101             uint tokenIndex;
102             /// Line the key starts at.
103             uint line;
104             /// Column the key starts at.
105             ushort column;
106             /// Is this required to be a simple key?
107             bool required;
108             /// Is this struct "null" (invalid)?.
109             bool isNull;
110         }
111 
112         /// Block chomping types.
113         enum Chomping
114         {
115             /// Strip all trailing line breaks. '-' indicator.
116             strip,
117             /// Line break of the last line is preserved, others discarded. Default.
118             clip,
119             /// All trailing line breaks are preserved. '+' indicator.
120             keep
121         }
122 
123         /// Reader used to read from a file/stream.
124         Reader reader_;
125         /// Are we done scanning?
126         bool done_;
127 
128         /// Level of nesting in flow context. If 0, we're in block context.
129         uint flowLevel_;
130         /// Current indentation level.
131         int indent_ = -1;
132         /// Past indentation levels. Used as a stack.
133         Appender!(int[]) indents_;
134 
135         /// Processed tokens not yet emitted. Used as a queue.
136         Queue!Token tokens_;
137 
138         /// Number of tokens emitted through the getToken method.
139         uint tokensTaken_;
140 
141         /// Can a simple key start at the current position? A simple key may start:
142         /// - at the beginning of the line, not counting indentation spaces
143         ///       (in block context),
144         /// - after '{', '[', ',' (in the flow context),
145         /// - after '?', ':', '-' (in the block context).
146         /// In the block context, this flag also signifies if a block collection
147         /// may start at the current position.
148         bool allowSimpleKey_ = true;
149 
150         /// Possible simple keys indexed by flow levels.
151         SimpleKey[] possibleSimpleKeys_;
152 
153     public:
154         /// Construct a Scanner using specified Reader.
155         this(Reader reader) @safe nothrow
156         {
157             // Return the next token, but do not delete it from the queue
158             reader_   = reader;
159             fetchStreamStart();
160         }
161 
162         /// Advance to the next token
163         void popFront() @safe
164         {
165             ++tokensTaken_;
166             tokens_.pop();
167         }
168 
169         /// Return the current token
170         const(Token) front() @safe
171         {
172             enforce(!empty, "No token left to peek");
173             return tokens_.peek();
174         }
175 
176         /// Return whether there are any more tokens left.
177         bool empty() @safe
178         {
179             while (needMoreTokens())
180             {
181                 fetchToken();
182             }
183             return tokens_.empty;
184         }
185 
186     private:
187         /// Most scanning error messages have the same format; so build them with this
188         /// function.
189         string expected(T)(string expected, T found)
190         {
191             return text("expected ", expected, ", but found ", found);
192         }
193 
194         /// Determine whether or not we need to fetch more tokens before peeking/getting a token.
195         bool needMoreTokens() @safe pure
196         {
197             if(done_)         { return false; }
198             if(tokens_.empty) { return true; }
199 
200             /// The current token may be a potential simple key, so we need to look further.
201             stalePossibleSimpleKeys();
202             return nextPossibleSimpleKey() == tokensTaken_;
203         }
204 
205         /// Fetch at token, adding it to tokens_.
206         void fetchToken() @safe
207         {
208             // Eat whitespaces and comments until we reach the next token.
209             scanToNextToken();
210 
211             // Remove obsolete possible simple keys.
212             stalePossibleSimpleKeys();
213 
214             // Compare current indentation and column. It may add some tokens
215             // and decrease the current indentation level.
216             unwindIndent(reader_.column);
217 
218             // Get the next character.
219             const dchar c = reader_.peekByte();
220 
221             // Fetch the token.
222             if(c == '\0')            { return fetchStreamEnd();     }
223             if(checkDirective())     { return fetchDirective();     }
224             if(checkDocumentStart()) { return fetchDocumentStart(); }
225             if(checkDocumentEnd())   { return fetchDocumentEnd();   }
226             // Order of the following checks is NOT significant.
227             switch(c)
228             {
229                 case '[':  return fetchFlowSequenceStart();
230                 case '{':  return fetchFlowMappingStart();
231                 case ']':  return fetchFlowSequenceEnd();
232                 case '}':  return fetchFlowMappingEnd();
233                 case ',':  return fetchFlowEntry();
234                 case '!':  return fetchTag();
235                 case '\'': return fetchSingle();
236                 case '\"': return fetchDouble();
237                 case '*':  return fetchAlias();
238                 case '&':  return fetchAnchor();
239                 case '?':  if(checkKey())        { return fetchKey();        } goto default;
240                 case ':':  if(checkValue())      { return fetchValue();      } goto default;
241                 case '-':  if(checkBlockEntry()) { return fetchBlockEntry(); } goto default;
242                 case '|':  if(flowLevel_ == 0)   { return fetchLiteral();    } break;
243                 case '>':  if(flowLevel_ == 0)   { return fetchFolded();     } break;
244                 default:   if(checkPlain())      { return fetchPlain();      }
245             }
246 
247             throw new ScannerException("While scanning for the next token, found character " ~
248                                        "\'%s\', index %s that cannot start any token"
249                                        .format(c, to!int(c)), reader_.mark);
250         }
251 
252 
253         /// Return the token number of the nearest possible simple key.
254         uint nextPossibleSimpleKey() @safe pure nothrow @nogc
255         {
256             uint minTokenNumber = uint.max;
257             foreach(k, ref simpleKey; possibleSimpleKeys_)
258             {
259                 if(simpleKey.isNull) { continue; }
260                 minTokenNumber = min(minTokenNumber, simpleKey.tokenIndex);
261             }
262             return minTokenNumber;
263         }
264 
265         /// Remove entries that are no longer possible simple keys.
266         ///
267         /// According to the YAML specification, simple keys
268         /// - should be limited to a single line,
269         /// - should be no longer than 1024 characters.
270         /// Disabling this will allow simple keys of any length and
271         /// height (may cause problems if indentation is broken though).
272         void stalePossibleSimpleKeys() @safe pure
273         {
274             foreach(level, ref key; possibleSimpleKeys_)
275             {
276                 if(key.isNull) { continue; }
277                 if(key.line != reader_.line || reader_.charIndex - key.charIndex > 1024)
278                 {
279                     enforce(!key.required,
280                             new ScannerException("While scanning a simple key",
281                                                  Mark(key.line, key.column),
282                                                  "could not find expected ':'", reader_.mark));
283                     key.isNull = true;
284                 }
285             }
286         }
287 
288         /// Check if the next token starts a possible simple key and if so, save its position.
289         ///
290         /// This function is called for ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
291         void savePossibleSimpleKey() @safe pure
292         {
293             // Check if a simple key is required at the current position.
294             const required = (flowLevel_ == 0 && indent_ == reader_.column);
295             assert(allowSimpleKey_ || !required, "A simple key is required only if it is " ~
296                    "the first token in the current line. Therefore it is always allowed.");
297 
298             if(!allowSimpleKey_) { return; }
299 
300             // The next token might be a simple key, so save its number and position.
301             removePossibleSimpleKey();
302             const tokenCount = tokensTaken_ + cast(uint)tokens_.length;
303 
304             const line   = reader_.line;
305             const column = reader_.column;
306             const key    = SimpleKey(cast(uint)reader_.charIndex, tokenCount, line,
307                                      cast(ushort)min(column, ushort.max), required);
308 
309             if(possibleSimpleKeys_.length <= flowLevel_)
310             {
311                 const oldLength = possibleSimpleKeys_.length;
312                 possibleSimpleKeys_.length = flowLevel_ + 1;
313                 //No need to initialize the last element, it's already done in the next line.
314                 possibleSimpleKeys_[oldLength .. flowLevel_] = SimpleKey.init;
315             }
316             possibleSimpleKeys_[flowLevel_] = key;
317         }
318 
319         /// Remove the saved possible key position at the current flow level.
320         void removePossibleSimpleKey() @safe pure
321         {
322             if(possibleSimpleKeys_.length <= flowLevel_) { return; }
323 
324             if(!possibleSimpleKeys_[flowLevel_].isNull)
325             {
326                 const key = possibleSimpleKeys_[flowLevel_];
327                 enforce(!key.required,
328                         new ScannerException("While scanning a simple key",
329                                              Mark(key.line, key.column),
330                                              "could not find expected ':'", reader_.mark));
331                 possibleSimpleKeys_[flowLevel_].isNull = true;
332             }
333         }
334 
335         /// Decrease indentation, removing entries in indents_.
336         ///
337         /// Params:  column = Current column in the file/stream.
338         void unwindIndent(const int column) @safe
339         {
340             if(flowLevel_ > 0)
341             {
342                 // In flow context, tokens should respect indentation.
343                 // The condition should be `indent >= column` according to the spec.
344                 // But this condition will prohibit intuitively correct
345                 // constructions such as
346                 // key : {
347                 // }
348 
349                 // In the flow context, indentation is ignored. We make the scanner less
350                 // restrictive than what the specification requires.
351                 // if(pedantic_ && flowLevel_ > 0 && indent_ > column)
352                 // {
353                 //     throw new ScannerException("Invalid intendation or unclosed '[' or '{'",
354                 //                                reader_.mark)
355                 // }
356                 return;
357             }
358 
359             // In block context, we may need to issue the BLOCK-END tokens.
360             while(indent_ > column)
361             {
362                 indent_ = indents_.data.back;
363                 assert(indents_.data.length);
364                 indents_.shrinkTo(indents_.data.length - 1);
365                 tokens_.push(blockEndToken(reader_.mark, reader_.mark));
366             }
367         }
368 
369         /// Increase indentation if needed.
370         ///
371         /// Params:  column = Current column in the file/stream.
372         ///
373         /// Returns: true if the indentation was increased, false otherwise.
374         bool addIndent(int column) @safe
375         {
376             if(indent_ >= column){return false;}
377             indents_ ~= indent_;
378             indent_ = column;
379             return true;
380         }
381 
382 
383         /// Add STREAM-START token.
384         void fetchStreamStart() @safe nothrow
385         {
386             tokens_.push(streamStartToken(reader_.mark, reader_.mark, reader_.encoding));
387         }
388 
389         ///Add STREAM-END token.
390         void fetchStreamEnd() @safe
391         {
392             //Set intendation to -1 .
393             unwindIndent(-1);
394             removePossibleSimpleKey();
395             allowSimpleKey_ = false;
396             possibleSimpleKeys_.destroy;
397 
398             tokens_.push(streamEndToken(reader_.mark, reader_.mark));
399             done_ = true;
400         }
401 
402         /// Add DIRECTIVE token.
403         void fetchDirective() @safe
404         {
405             // Set intendation to -1 .
406             unwindIndent(-1);
407             // Reset simple keys.
408             removePossibleSimpleKey();
409             allowSimpleKey_ = false;
410 
411             auto directive = scanDirective();
412             tokens_.push(directive);
413         }
414 
415         /// Add DOCUMENT-START or DOCUMENT-END token.
416         void fetchDocumentIndicator(TokenID id)()
417             if(id == TokenID.documentStart || id == TokenID.documentEnd)
418         {
419             // Set indentation to -1 .
420             unwindIndent(-1);
421             // Reset simple keys. Note that there can't be a block collection after '---'.
422             removePossibleSimpleKey();
423             allowSimpleKey_ = false;
424 
425             Mark startMark = reader_.mark;
426             reader_.forward(3);
427             tokens_.push(simpleToken!id(startMark, reader_.mark));
428         }
429 
430         /// Aliases to add DOCUMENT-START or DOCUMENT-END token.
431         alias fetchDocumentStart = fetchDocumentIndicator!(TokenID.documentStart);
432         alias fetchDocumentEnd = fetchDocumentIndicator!(TokenID.documentEnd);
433 
434         /// Add FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
435         void fetchFlowCollectionStart(TokenID id)() @safe
436         {
437             // '[' and '{' may start a simple key.
438             savePossibleSimpleKey();
439             // Simple keys are allowed after '[' and '{'.
440             allowSimpleKey_ = true;
441             ++flowLevel_;
442 
443             Mark startMark = reader_.mark;
444             reader_.forward();
445             tokens_.push(simpleToken!id(startMark, reader_.mark));
446         }
447 
448         /// Aliases to add FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
449         alias fetchFlowSequenceStart = fetchFlowCollectionStart!(TokenID.flowSequenceStart);
450         alias fetchFlowMappingStart = fetchFlowCollectionStart!(TokenID.flowMappingStart);
451 
452         /// Add FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
453         void fetchFlowCollectionEnd(TokenID id)()
454         {
455             // Reset possible simple key on the current level.
456             removePossibleSimpleKey();
457             // No simple keys after ']' and '}'.
458             allowSimpleKey_ = false;
459             --flowLevel_;
460 
461             Mark startMark = reader_.mark;
462             reader_.forward();
463             tokens_.push(simpleToken!id(startMark, reader_.mark));
464         }
465 
466         /// Aliases to add FLOW-SEQUENCE-START or FLOW-MAPPING-START token/
467         alias fetchFlowSequenceEnd = fetchFlowCollectionEnd!(TokenID.flowSequenceEnd);
468         alias fetchFlowMappingEnd = fetchFlowCollectionEnd!(TokenID.flowMappingEnd);
469 
470         /// Add FLOW-ENTRY token;
471         void fetchFlowEntry() @safe
472         {
473             // Reset possible simple key on the current level.
474             removePossibleSimpleKey();
475             // Simple keys are allowed after ','.
476             allowSimpleKey_ = true;
477 
478             Mark startMark = reader_.mark;
479             reader_.forward();
480             tokens_.push(flowEntryToken(startMark, reader_.mark));
481         }
482 
483         /// Additional checks used in block context in fetchBlockEntry and fetchKey.
484         ///
485         /// Params:  type = String representing the token type we might need to add.
486         ///          id   = Token type we might need to add.
487         void blockChecks(string type, TokenID id)()
488         {
489             enum context = type ~ " keys are not allowed here";
490             // Are we allowed to start a key (not neccesarily a simple one)?
491             enforce(allowSimpleKey_, new ScannerException(context, reader_.mark));
492 
493             if(addIndent(reader_.column))
494             {
495                 tokens_.push(simpleToken!id(reader_.mark, reader_.mark));
496             }
497         }
498 
499         /// Add BLOCK-ENTRY token. Might add BLOCK-SEQUENCE-START in the process.
500         void fetchBlockEntry() @safe
501         {
502             if(flowLevel_ == 0) { blockChecks!("Sequence", TokenID.blockSequenceStart)(); }
503 
504             // It's an error for the block entry to occur in the flow context,
505             // but we let the parser detect this.
506 
507             // Reset possible simple key on the current level.
508             removePossibleSimpleKey();
509             // Simple keys are allowed after '-'.
510             allowSimpleKey_ = true;
511 
512             Mark startMark = reader_.mark;
513             reader_.forward();
514             tokens_.push(blockEntryToken(startMark, reader_.mark));
515         }
516 
517         /// Add KEY token. Might add BLOCK-MAPPING-START in the process.
518         void fetchKey() @safe
519         {
520             if(flowLevel_ == 0) { blockChecks!("Mapping", TokenID.blockMappingStart)(); }
521 
522             // Reset possible simple key on the current level.
523             removePossibleSimpleKey();
524             // Simple keys are allowed after '?' in the block context.
525             allowSimpleKey_ = (flowLevel_ == 0);
526 
527             Mark startMark = reader_.mark;
528             reader_.forward();
529             tokens_.push(keyToken(startMark, reader_.mark));
530         }
531 
532         /// Add VALUE token. Might add KEY and/or BLOCK-MAPPING-START in the process.
533         void fetchValue() @safe
534         {
535             //Do we determine a simple key?
536             if(possibleSimpleKeys_.length > flowLevel_ &&
537                !possibleSimpleKeys_[flowLevel_].isNull)
538             {
539                 const key = possibleSimpleKeys_[flowLevel_];
540                 possibleSimpleKeys_[flowLevel_].isNull = true;
541                 Mark keyMark = Mark(key.line, key.column);
542                 const idx = key.tokenIndex - tokensTaken_;
543 
544                 assert(idx >= 0);
545 
546                 // Add KEY.
547                 // Manually inserting since tokens are immutable (need linked list).
548                 tokens_.insert(keyToken(keyMark, keyMark), idx);
549 
550                 // If this key starts a new block mapping, we need to add BLOCK-MAPPING-START.
551                 if(flowLevel_ == 0 && addIndent(key.column))
552                 {
553                     tokens_.insert(blockMappingStartToken(keyMark, keyMark), idx);
554                 }
555 
556                 // There cannot be two simple keys in a row.
557                 allowSimpleKey_ = false;
558             }
559             // Part of a complex key
560             else
561             {
562                 // We can start a complex value if and only if we can start a simple key.
563                 enforce(flowLevel_ > 0 || allowSimpleKey_,
564                         new ScannerException("Mapping values are not allowed here", reader_.mark));
565 
566                 // If this value starts a new block mapping, we need to add
567                 // BLOCK-MAPPING-START. It'll be detected as an error later by the parser.
568                 if(flowLevel_ == 0 && addIndent(reader_.column))
569                 {
570                     tokens_.push(blockMappingStartToken(reader_.mark, reader_.mark));
571                 }
572 
573                 // Reset possible simple key on the current level.
574                 removePossibleSimpleKey();
575                 // Simple keys are allowed after ':' in the block context.
576                 allowSimpleKey_ = (flowLevel_ == 0);
577             }
578 
579             // Add VALUE.
580             Mark startMark = reader_.mark;
581             reader_.forward();
582             tokens_.push(valueToken(startMark, reader_.mark));
583         }
584 
585         /// Add ALIAS or ANCHOR token.
586         void fetchAnchor_(TokenID id)() @safe
587             if(id == TokenID.alias_ || id == TokenID.anchor)
588         {
589             // ALIAS/ANCHOR could be a simple key.
590             savePossibleSimpleKey();
591             // No simple keys after ALIAS/ANCHOR.
592             allowSimpleKey_ = false;
593 
594             auto anchor = scanAnchor(id);
595             tokens_.push(anchor);
596         }
597 
598         /// Aliases to add ALIAS or ANCHOR token.
599         alias fetchAlias = fetchAnchor_!(TokenID.alias_);
600         alias fetchAnchor = fetchAnchor_!(TokenID.anchor);
601 
602         /// Add TAG token.
603         void fetchTag() @safe
604         {
605             //TAG could start a simple key.
606             savePossibleSimpleKey();
607             //No simple keys after TAG.
608             allowSimpleKey_ = false;
609 
610             tokens_.push(scanTag());
611         }
612 
613         /// Add block SCALAR token.
614         void fetchBlockScalar(ScalarStyle style)() @safe
615             if(style == ScalarStyle.literal || style == ScalarStyle.folded)
616         {
617             // Reset possible simple key on the current level.
618             removePossibleSimpleKey();
619             // A simple key may follow a block scalar.
620             allowSimpleKey_ = true;
621 
622             auto blockScalar = scanBlockScalar(style);
623             tokens_.push(blockScalar);
624         }
625 
626         /// Aliases to add literal or folded block scalar.
627         alias fetchLiteral = fetchBlockScalar!(ScalarStyle.literal);
628         alias fetchFolded = fetchBlockScalar!(ScalarStyle.folded);
629 
630         /// Add quoted flow SCALAR token.
631         void fetchFlowScalar(ScalarStyle quotes)()
632         {
633             // A flow scalar could be a simple key.
634             savePossibleSimpleKey();
635             // No simple keys after flow scalars.
636             allowSimpleKey_ = false;
637 
638             // Scan and add SCALAR.
639             auto scalar = scanFlowScalar(quotes);
640             tokens_.push(scalar);
641         }
642 
643         /// Aliases to add single or double quoted block scalar.
644         alias fetchSingle = fetchFlowScalar!(ScalarStyle.singleQuoted);
645         alias fetchDouble = fetchFlowScalar!(ScalarStyle.doubleQuoted);
646 
647         /// Add plain SCALAR token.
648         void fetchPlain() @safe
649         {
650             // A plain scalar could be a simple key
651             savePossibleSimpleKey();
652             // No simple keys after plain scalars. But note that scanPlain() will
653             // change this flag if the scan is finished at the beginning of the line.
654             allowSimpleKey_ = false;
655             auto plain = scanPlain();
656 
657             // Scan and add SCALAR. May change allowSimpleKey_
658             tokens_.push(plain);
659         }
660 
661     pure:
662 
663         ///Check if the next token is DIRECTIVE:        ^ '%' ...
664         bool checkDirective() @safe
665         {
666             return reader_.peekByte() == '%' && reader_.column == 0;
667         }
668 
669         /// Check if the next token is DOCUMENT-START:   ^ '---' (' '|'\n')
670         bool checkDocumentStart() @safe
671         {
672             // Check one char first, then all 3, to prevent reading outside the buffer.
673             return reader_.column     == 0     &&
674                    reader_.peekByte() == '-'   &&
675                    reader_.prefix(3)  == "---" &&
676                    reader_.peek(3).isWhiteSpace;
677         }
678 
679         /// Check if the next token is DOCUMENT-END:     ^ '...' (' '|'\n')
680         bool checkDocumentEnd() @safe
681         {
682             // Check one char first, then all 3, to prevent reading outside the buffer.
683             return reader_.column     == 0     &&
684                    reader_.peekByte() == '.'   &&
685                    reader_.prefix(3)  == "..." &&
686                    reader_.peek(3).isWhiteSpace;
687         }
688 
689         /// Check if the next token is BLOCK-ENTRY:      '-' (' '|'\n')
690         bool checkBlockEntry() @safe
691         {
692             return !!reader_.peek(1).isWhiteSpace;
693         }
694 
695         /// Check if the next token is KEY(flow context):    '?'
696         ///
697         /// or KEY(block context):   '?' (' '|'\n')
698         bool checkKey() @safe
699         {
700             return (flowLevel_ > 0 || reader_.peek(1).isWhiteSpace);
701         }
702 
703         /// Check if the next token is VALUE(flow context):  ':'
704         ///
705         /// or VALUE(block context): ':' (' '|'\n')
706         bool checkValue() @safe
707         {
708             return flowLevel_ > 0 || reader_.peek(1).isWhiteSpace;
709         }
710 
711         /// Check if the next token is a plain scalar.
712         ///
713         /// A plain scalar may start with any non-space character except:
714         ///   '-', '?', ':', ',', '[', ']', '{', '}',
715         ///   '#', '&', '*', '!', '|', '>', '\'', '\"',
716         ///   '%', '@', '`'.
717         ///
718         /// It may also start with
719         ///   '-', '?', ':'
720         /// if it is followed by a non-space character.
721         ///
722         /// Note that we limit the last rule to the block context (except the
723         /// '-' character) because we want the flow context to be space
724         /// independent.
725         bool checkPlain() @safe
726         {
727             const c = reader_.peek();
728             if(!c.isNonScalarStartCharacter)
729             {
730                 return true;
731             }
732             return !reader_.peek(1).isWhiteSpace &&
733                    (c == '-' || (flowLevel_ == 0 && (c == '?' || c == ':')));
734         }
735 
736         /// Move to the next non-space character.
737         void findNextNonSpace() @safe
738         {
739             while(reader_.peekByte() == ' ') { reader_.forward(); }
740         }
741 
742         /// Scan a string of alphanumeric or "-_" characters.
743         ///
744         /// Assumes that the caller is building a slice in Reader, and puts the scanned
745         /// characters into that slice.
746         void scanAlphaNumericToSlice(string name)(const Mark startMark)
747         {
748             size_t length;
749             dchar c = reader_.peek();
750             while(c.isAlphaNum || c.among!('-', '_')) { c = reader_.peek(++length); }
751 
752             enforce(length > 0, new ScannerException("While scanning " ~ name,
753                 startMark, expected("alphanumeric, '-' or '_'", c), reader_.mark));
754 
755             reader_.sliceBuilder.write(reader_.get(length));
756         }
757 
758         /// Scan and throw away all characters until next line break.
759         void scanToNextBreak() @safe
760         {
761             while(!reader_.peek().isBreak) { reader_.forward(); }
762         }
763 
764         /// Scan all characters until next line break.
765         ///
766         /// Assumes that the caller is building a slice in Reader, and puts the scanned
767         /// characters into that slice.
768         void scanToNextBreakToSlice() @safe
769         {
770             uint length;
771             while(!reader_.peek(length).isBreak)
772             {
773                 ++length;
774             }
775             reader_.sliceBuilder.write(reader_.get(length));
776         }
777 
778 
779         /// Move to next token in the file/stream.
780         ///
781         /// We ignore spaces, line breaks and comments.
782         /// If we find a line break in the block context, we set
783         /// allowSimpleKey` on.
784         ///
785         /// We do not yet support BOM inside the stream as the
786         /// specification requires. Any such mark will be considered as a part
787         /// of the document.
788         void scanToNextToken() @safe
789         {
790             // TODO(PyYAML): We need to make tab handling rules more sane. A good rule is:
791             //   Tabs cannot precede tokens
792             //   BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END,
793             //   KEY(block), VALUE(block), BLOCK-ENTRY
794             // So the checking code is
795             //   if <TAB>:
796             //       allowSimpleKey_ = false
797             // We also need to add the check for `allowSimpleKey_ == true` to
798             // `unwindIndent` before issuing BLOCK-END.
799             // Scanners for block, flow, and plain scalars need to be modified.
800 
801             for(;;)
802             {
803                 findNextNonSpace();
804 
805                 if(reader_.peekByte() == '#') { scanToNextBreak(); }
806                 if(scanLineBreak() != '\0')
807                 {
808                     if(flowLevel_ == 0) { allowSimpleKey_ = true; }
809                 }
810                 else
811                 {
812                     break;
813                 }
814             }
815         }
816 
817         /// Scan directive token.
818         Token scanDirective() @safe
819         {
820             Mark startMark = reader_.mark;
821             // Skip the '%'.
822             reader_.forward();
823 
824             // Scan directive name
825             reader_.sliceBuilder.begin();
826             scanDirectiveNameToSlice(startMark);
827             const name = reader_.sliceBuilder.finish();
828 
829             reader_.sliceBuilder.begin();
830 
831             // Index where tag handle ends and suffix starts in a tag directive value.
832             uint tagHandleEnd = uint.max;
833             if(name == "YAML")     { scanYAMLDirectiveValueToSlice(startMark); }
834             else if(name == "TAG") { tagHandleEnd = scanTagDirectiveValueToSlice(startMark); }
835             char[] value = reader_.sliceBuilder.finish();
836 
837             Mark endMark = reader_.mark;
838 
839             DirectiveType directive;
840             if(name == "YAML")     { directive = DirectiveType.yaml; }
841             else if(name == "TAG") { directive = DirectiveType.tag; }
842             else
843             {
844                 directive = DirectiveType.reserved;
845                 scanToNextBreak();
846             }
847 
848             scanDirectiveIgnoredLine(startMark);
849 
850             return directiveToken(startMark, endMark, value, directive, tagHandleEnd);
851         }
852 
853         /// Scan name of a directive token.
854         ///
855         /// Assumes that the caller is building a slice in Reader, and puts the scanned
856         /// characters into that slice.
857         void scanDirectiveNameToSlice(const Mark startMark) @safe
858         {
859             // Scan directive name.
860             scanAlphaNumericToSlice!"a directive"(startMark);
861 
862             enforce(reader_.peek().among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'),
863                 new ScannerException("While scanning a directive", startMark,
864                     expected("alphanumeric, '-' or '_'", reader_.peek()), reader_.mark));
865         }
866 
867         /// Scan value of a YAML directive token. Returns major, minor version separated by '.'.
868         ///
869         /// Assumes that the caller is building a slice in Reader, and puts the scanned
870         /// characters into that slice.
871         void scanYAMLDirectiveValueToSlice(const Mark startMark) @safe
872         {
873             findNextNonSpace();
874 
875             scanYAMLDirectiveNumberToSlice(startMark);
876 
877             enforce(reader_.peekByte() == '.',
878                 new ScannerException("While scanning a directive", startMark,
879                     expected("digit or '.'", reader_.peek()), reader_.mark));
880             // Skip the '.'.
881             reader_.forward();
882 
883             reader_.sliceBuilder.write('.');
884             scanYAMLDirectiveNumberToSlice(startMark);
885 
886             enforce(reader_.peek().among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'),
887                 new ScannerException("While scanning a directive", startMark,
888                     expected("digit or '.'", reader_.peek()), reader_.mark));
889         }
890 
891         /// Scan a number from a YAML directive.
892         ///
893         /// Assumes that the caller is building a slice in Reader, and puts the scanned
894         /// characters into that slice.
895         void scanYAMLDirectiveNumberToSlice(const Mark startMark) @safe
896         {
897             enforce(isDigit(reader_.peek()),
898                 new ScannerException("While scanning a directive", startMark,
899                     expected("digit", reader_.peek()), reader_.mark));
900 
901             // Already found the first digit in the enforce(), so set length to 1.
902             uint length = 1;
903             while(reader_.peek(length).isDigit) { ++length; }
904 
905             reader_.sliceBuilder.write(reader_.get(length));
906         }
907 
908         /// Scan value of a tag directive.
909         ///
910         /// Assumes that the caller is building a slice in Reader, and puts the scanned
911         /// characters into that slice.
912         ///
913         /// Returns: Length of tag handle (which is before tag prefix) in scanned data
914         uint scanTagDirectiveValueToSlice(const Mark startMark) @safe
915         {
916             findNextNonSpace();
917             const startLength = reader_.sliceBuilder.length;
918             scanTagDirectiveHandleToSlice(startMark);
919             const handleLength = cast(uint)(reader_.sliceBuilder.length  - startLength);
920             findNextNonSpace();
921             scanTagDirectivePrefixToSlice(startMark);
922 
923             return handleLength;
924         }
925 
926         /// Scan handle of a tag directive.
927         ///
928         /// Assumes that the caller is building a slice in Reader, and puts the scanned
929         /// characters into that slice.
930         void scanTagDirectiveHandleToSlice(const Mark startMark) @safe
931         {
932             scanTagHandleToSlice!"directive"(startMark);
933             enforce(reader_.peekByte() == ' ',
934                 new ScannerException("While scanning a directive handle", startMark,
935                     expected("' '", reader_.peek()), reader_.mark));
936         }
937 
938         /// Scan prefix of a tag directive.
939         ///
940         /// Assumes that the caller is building a slice in Reader, and puts the scanned
941         /// characters into that slice.
942         void scanTagDirectivePrefixToSlice(const Mark startMark) @safe
943         {
944             scanTagURIToSlice!"directive"(startMark);
945             enforce(reader_.peek().among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'),
946                 new ScannerException("While scanning a directive prefix", startMark,
947                     expected("' '", reader_.peek()), reader_.mark));
948         }
949 
950         /// Scan (and ignore) ignored line after a directive.
951         void scanDirectiveIgnoredLine(const Mark startMark) @safe
952         {
953             findNextNonSpace();
954             if(reader_.peekByte() == '#') { scanToNextBreak(); }
955             enforce(reader_.peek().isBreak,
956                 new ScannerException("While scanning a directive", startMark,
957                       expected("comment or a line break", reader_.peek()), reader_.mark));
958             scanLineBreak();
959         }
960 
961 
962         /// Scan an alias or an anchor.
963         ///
964         /// The specification does not restrict characters for anchors and
965         /// aliases. This may lead to problems, for instance, the document:
966         ///   [ *alias, value ]
967         /// can be interpteted in two ways, as
968         ///   [ "value" ]
969         /// and
970         ///   [ *alias , "value" ]
971         /// Therefore we restrict aliases to ASCII alphanumeric characters.
972         Token scanAnchor(const TokenID id) @safe
973         {
974             const startMark = reader_.mark;
975             const dchar i = reader_.get();
976 
977             reader_.sliceBuilder.begin();
978             if(i == '*') { scanAlphaNumericToSlice!"an alias"(startMark); }
979             else         { scanAlphaNumericToSlice!"an anchor"(startMark); }
980             // On error, value is discarded as we return immediately
981             char[] value = reader_.sliceBuilder.finish();
982 
983             enum anchorCtx = "While scanning an anchor";
984             enum aliasCtx  = "While scanning an alias";
985             enforce(reader_.peek().isWhiteSpace ||
986                 reader_.peekByte().among!('?', ':', ',', ']', '}', '%', '@'),
987                 new ScannerException(i == '*' ? aliasCtx : anchorCtx, startMark,
988                     expected("alphanumeric, '-' or '_'", reader_.peek()), reader_.mark));
989 
990             if(id == TokenID.alias_)
991             {
992                 return aliasToken(startMark, reader_.mark, value);
993             }
994             if(id == TokenID.anchor)
995             {
996                 return anchorToken(startMark, reader_.mark, value);
997             }
998             assert(false, "This code should never be reached");
999         }
1000 
1001         /// Scan a tag token.
1002         Token scanTag() @safe
1003         {
1004             const startMark = reader_.mark;
1005             dchar c = reader_.peek(1);
1006 
1007             reader_.sliceBuilder.begin();
1008             scope(failure) { reader_.sliceBuilder.finish(); }
1009             // Index where tag handle ends and tag suffix starts in the tag value
1010             // (slice) we will produce.
1011             uint handleEnd;
1012 
1013             if(c == '<')
1014             {
1015                 reader_.forward(2);
1016 
1017                 handleEnd = 0;
1018                 scanTagURIToSlice!"tag"(startMark);
1019                 enforce(reader_.peekByte() == '>',
1020                     new ScannerException("While scanning a tag", startMark,
1021                         expected("'>'", reader_.peek()), reader_.mark));
1022                 reader_.forward();
1023             }
1024             else if(c.isWhiteSpace)
1025             {
1026                 reader_.forward();
1027                 handleEnd = 0;
1028                 reader_.sliceBuilder.write('!');
1029             }
1030             else
1031             {
1032                 uint length = 1;
1033                 bool useHandle;
1034 
1035                 while(!c.isBreakOrSpace)
1036                 {
1037                     if(c == '!')
1038                     {
1039                         useHandle = true;
1040                         break;
1041                     }
1042                     ++length;
1043                     c = reader_.peek(length);
1044                 }
1045 
1046                 if(useHandle)
1047                 {
1048                     scanTagHandleToSlice!"tag"(startMark);
1049                     handleEnd = cast(uint)reader_.sliceBuilder.length;
1050                 }
1051                 else
1052                 {
1053                     reader_.forward();
1054                     reader_.sliceBuilder.write('!');
1055                     handleEnd = cast(uint)reader_.sliceBuilder.length;
1056                 }
1057 
1058                 scanTagURIToSlice!"tag"(startMark);
1059             }
1060 
1061             enforce(reader_.peek().isBreakOrSpace,
1062                 new ScannerException("While scanning a tag", startMark, expected("' '", reader_.peek()),
1063                     reader_.mark));
1064 
1065             char[] slice = reader_.sliceBuilder.finish();
1066             return tagToken(startMark, reader_.mark, slice, handleEnd);
1067         }
1068 
1069         /// Scan a block scalar token with specified style.
1070         Token scanBlockScalar(const ScalarStyle style) @safe
1071         {
1072             const startMark = reader_.mark;
1073 
1074             // Scan the header.
1075             reader_.forward();
1076 
1077             const indicators = scanBlockScalarIndicators(startMark);
1078 
1079             const chomping   = indicators[0];
1080             const increment  = indicators[1];
1081             scanBlockScalarIgnoredLine(startMark);
1082 
1083             // Determine the indentation level and go to the first non-empty line.
1084             Mark endMark;
1085             uint indent = max(1, indent_ + 1);
1086 
1087             reader_.sliceBuilder.begin();
1088             alias Transaction = SliceBuilder.Transaction;
1089             // Used to strip the last line breaks written to the slice at the end of the
1090             // scalar, which may be needed based on chomping.
1091             Transaction breaksTransaction = Transaction(&reader_.sliceBuilder);
1092             // Read the first indentation/line breaks before the scalar.
1093             size_t startLen = reader_.sliceBuilder.length;
1094             if(increment == int.min)
1095             {
1096                 auto indentation = scanBlockScalarIndentationToSlice();
1097                 endMark = indentation[1];
1098                 indent  = max(indent, indentation[0]);
1099             }
1100             else
1101             {
1102                 indent += increment - 1;
1103                 endMark = scanBlockScalarBreaksToSlice(indent);
1104             }
1105 
1106             // int.max means there's no line break (int.max is outside UTF-32).
1107             dchar lineBreak = cast(dchar)int.max;
1108 
1109             // Scan the inner part of the block scalar.
1110             while(reader_.column == indent && reader_.peekByte() != '\0')
1111             {
1112                 breaksTransaction.commit();
1113                 const bool leadingNonSpace = !reader_.peekByte().among!(' ', '\t');
1114                 // This is where the 'interesting' non-whitespace data gets read.
1115                 scanToNextBreakToSlice();
1116                 lineBreak = scanLineBreak();
1117 
1118 
1119                 // This transaction serves to rollback data read in the
1120                 // scanBlockScalarBreaksToSlice() call.
1121                 breaksTransaction = Transaction(&reader_.sliceBuilder);
1122                 startLen = reader_.sliceBuilder.length;
1123                 // The line breaks should actually be written _after_ the if() block
1124                 // below. We work around that by inserting
1125                 endMark = scanBlockScalarBreaksToSlice(indent);
1126 
1127                 // This will not run during the last iteration (see the if() vs the
1128                 // while()), hence breaksTransaction rollback (which happens after this
1129                 // loop) will never roll back data written in this if() block.
1130                 if(reader_.column == indent && reader_.peekByte() != '\0')
1131                 {
1132                     // Unfortunately, folding rules are ambiguous.
1133 
1134                     // This is the folding according to the specification:
1135                     if(style == ScalarStyle.folded && lineBreak == '\n' &&
1136                        leadingNonSpace && !reader_.peekByte().among!(' ', '\t'))
1137                     {
1138                         // No breaks were scanned; no need to insert the space in the
1139                         // middle of slice.
1140                         if(startLen == reader_.sliceBuilder.length)
1141                         {
1142                             reader_.sliceBuilder.write(' ');
1143                         }
1144                     }
1145                     else
1146                     {
1147                         // We need to insert in the middle of the slice in case any line
1148                         // breaks were scanned.
1149                         reader_.sliceBuilder.insert(lineBreak, startLen);
1150                     }
1151 
1152                     ////this is Clark Evans's interpretation (also in the spec
1153                     ////examples):
1154                     //
1155                     //if(style == ScalarStyle.folded && lineBreak == '\n')
1156                     //{
1157                     //    if(startLen == endLen)
1158                     //    {
1159                     //        if(!" \t"d.canFind(reader_.peekByte()))
1160                     //        {
1161                     //            reader_.sliceBuilder.write(' ');
1162                     //        }
1163                     //        else
1164                     //        {
1165                     //            chunks ~= lineBreak;
1166                     //        }
1167                     //    }
1168                     //}
1169                     //else
1170                     //{
1171                     //    reader_.sliceBuilder.insertBack(lineBreak, endLen - startLen);
1172                     //}
1173                 }
1174                 else
1175                 {
1176                     break;
1177                 }
1178             }
1179 
1180             // If chompint is Keep, we keep (commit) the last scanned line breaks
1181             // (which are at the end of the scalar). Otherwise re remove them (end the
1182             // transaction).
1183             if(chomping == Chomping.keep)  { breaksTransaction.commit(); }
1184             else                           { breaksTransaction.end(); }
1185             if(chomping != Chomping.strip && lineBreak != int.max)
1186             {
1187                 // If chomping is Keep, we keep the line break but the first line break
1188                 // that isn't stripped (since chomping isn't Strip in this branch) must
1189                 // be inserted _before_ the other line breaks.
1190                 if(chomping == Chomping.keep)
1191                 {
1192                     reader_.sliceBuilder.insert(lineBreak, startLen);
1193                 }
1194                 // If chomping is not Keep, breaksTransaction was cancelled so we can
1195                 // directly write the first line break (as it isn't stripped - chomping
1196                 // is not Strip)
1197                 else
1198                 {
1199                     reader_.sliceBuilder.write(lineBreak);
1200                 }
1201             }
1202 
1203             char[] slice = reader_.sliceBuilder.finish();
1204             return scalarToken(startMark, endMark, slice, style);
1205         }
1206 
1207         /// Scan chomping and indentation indicators of a scalar token.
1208         Tuple!(Chomping, int) scanBlockScalarIndicators(const Mark startMark) @safe
1209         {
1210             auto chomping = Chomping.clip;
1211             int increment = int.min;
1212             dchar c       = reader_.peek();
1213 
1214             /// Indicators can be in any order.
1215             if(getChomping(c, chomping))
1216             {
1217                 getIncrement(c, increment, startMark);
1218             }
1219             else
1220             {
1221                 const gotIncrement = getIncrement(c, increment, startMark);
1222                 if(gotIncrement) { getChomping(c, chomping); }
1223             }
1224 
1225             enforce(c.among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029'),
1226                 new ScannerException("While scanning a block scalar", startMark,
1227                 expected("chomping or indentation indicator", c), reader_.mark));
1228 
1229             return tuple(chomping, increment);
1230         }
1231 
1232         /// Get chomping indicator, if detected. Return false otherwise.
1233         ///
1234         /// Used in scanBlockScalarIndicators.
1235         ///
1236         /// Params:
1237         ///
1238         /// c        = The character that may be a chomping indicator.
1239         /// chomping = Write the chomping value here, if detected.
1240         bool getChomping(ref dchar c, ref Chomping chomping) @safe
1241         {
1242             if(!c.among!('+', '-')) { return false; }
1243             chomping = c == '+' ? Chomping.keep : Chomping.strip;
1244             reader_.forward();
1245             c = reader_.peek();
1246             return true;
1247         }
1248 
1249         /// Get increment indicator, if detected. Return false otherwise.
1250         ///
1251         /// Used in scanBlockScalarIndicators.
1252         ///
1253         /// Params:
1254         ///
1255         /// c         = The character that may be an increment indicator.
1256         ///             If an increment indicator is detected, this will be updated to
1257         ///             the next character in the Reader.
1258         /// increment = Write the increment value here, if detected.
1259         /// startMark = Mark for error messages.
1260         bool getIncrement(ref dchar c, ref int increment, const Mark startMark) @safe
1261         {
1262             if(!c.isDigit) { return false; }
1263             // Convert a digit to integer.
1264             increment = c - '0';
1265             assert(increment < 10 && increment >= 0, "Digit has invalid value");
1266 
1267             enforce(increment > 0,
1268                 new ScannerException("While scanning a block scalar", startMark,
1269                     expected("indentation indicator in range 1-9", "0"), reader_.mark));
1270 
1271             reader_.forward();
1272             c = reader_.peek();
1273             return true;
1274         }
1275 
1276         /// Scan (and ignore) ignored line in a block scalar.
1277         void scanBlockScalarIgnoredLine(const Mark startMark) @safe
1278         {
1279             findNextNonSpace();
1280             if(reader_.peekByte()== '#') { scanToNextBreak(); }
1281 
1282             enforce(reader_.peek().isBreak,
1283                 new ScannerException("While scanning a block scalar", startMark,
1284                     expected("comment or line break", reader_.peek()), reader_.mark));
1285 
1286             scanLineBreak();
1287         }
1288 
1289         /// Scan indentation in a block scalar, returning line breaks, max indent and end mark.
1290         ///
1291         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1292         /// characters into that slice.
1293         Tuple!(uint, Mark) scanBlockScalarIndentationToSlice() @safe
1294         {
1295             uint maxIndent;
1296             Mark endMark = reader_.mark;
1297 
1298             while(reader_.peek().among!(' ', '\n', '\r', '\u0085', '\u2028', '\u2029'))
1299             {
1300                 if(reader_.peekByte() != ' ')
1301                 {
1302                     reader_.sliceBuilder.write(scanLineBreak());
1303                     endMark = reader_.mark;
1304                     continue;
1305                 }
1306                 reader_.forward();
1307                 maxIndent = max(reader_.column, maxIndent);
1308             }
1309 
1310             return tuple(maxIndent, endMark);
1311         }
1312 
1313         /// Scan line breaks at lower or specified indentation in a block scalar.
1314         ///
1315         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1316         /// characters into that slice.
1317         Mark scanBlockScalarBreaksToSlice(const uint indent) @safe
1318         {
1319             Mark endMark = reader_.mark;
1320 
1321             for(;;)
1322             {
1323                 while(reader_.column < indent && reader_.peekByte() == ' ') { reader_.forward(); }
1324                 if(!reader_.peek().among!('\n', '\r', '\u0085', '\u2028', '\u2029'))  { break; }
1325                 reader_.sliceBuilder.write(scanLineBreak());
1326                 endMark = reader_.mark;
1327             }
1328 
1329             return endMark;
1330         }
1331 
1332         /// Scan a qouted flow scalar token with specified quotes.
1333         Token scanFlowScalar(const ScalarStyle quotes) @safe
1334         {
1335             const startMark = reader_.mark;
1336             const quote     = reader_.get();
1337 
1338             reader_.sliceBuilder.begin();
1339 
1340             scanFlowScalarNonSpacesToSlice(quotes, startMark);
1341 
1342             while(reader_.peek() != quote)
1343             {
1344                 scanFlowScalarSpacesToSlice(startMark);
1345                 scanFlowScalarNonSpacesToSlice(quotes, startMark);
1346             }
1347             reader_.forward();
1348 
1349             auto slice = reader_.sliceBuilder.finish();
1350             return scalarToken(startMark, reader_.mark, slice, quotes);
1351         }
1352 
1353         /// Scan nonspace characters in a flow scalar.
1354         ///
1355         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1356         /// characters into that slice.
1357         void scanFlowScalarNonSpacesToSlice(const ScalarStyle quotes, const Mark startMark)
1358             @safe
1359         {
1360             for(;;)
1361             {
1362                 dchar c = reader_.peek();
1363 
1364                 size_t numCodePoints;
1365                 // This is an optimized way of writing:
1366                 // while(!search.canFind(reader_.peek(numCodePoints))) { ++numCodePoints; }
1367                 outer: for(size_t oldSliceLength;;)
1368                 {
1369                     // This will not necessarily make slice 32 chars longer, as not all
1370                     // code points are 1 char.
1371                     const char[] slice = reader_.slice(numCodePoints + 32);
1372                     enforce(slice.length != oldSliceLength,
1373                         new ScannerException("While reading a flow scalar", startMark,
1374                             "reached end of file", reader_.mark));
1375 
1376                     for(size_t i = oldSliceLength; i < slice.length;)
1377                     {
1378                         // slice is UTF-8 - need to decode
1379                         const ch = slice[i] < 0x80 ? slice[i++] : decode(slice, i);
1380                         if(ch.isFlowScalarBreakSpace) { break outer; }
1381                         ++numCodePoints;
1382                     }
1383                     oldSliceLength = slice.length;
1384                 }
1385 
1386                 reader_.sliceBuilder.write(reader_.get(numCodePoints));
1387 
1388                 c = reader_.peek();
1389                 if(quotes == ScalarStyle.singleQuoted && c == '\'' && reader_.peek(1) == '\'')
1390                 {
1391                     reader_.forward(2);
1392                     reader_.sliceBuilder.write('\'');
1393                 }
1394                 else if((quotes == ScalarStyle.doubleQuoted && c == '\'') ||
1395                         (quotes == ScalarStyle.singleQuoted && c.among!('"', '\\')))
1396                 {
1397                     reader_.forward();
1398                     reader_.sliceBuilder.write(c);
1399                 }
1400                 else if(quotes == ScalarStyle.doubleQuoted && c == '\\')
1401                 {
1402                     reader_.forward();
1403                     c = reader_.peek();
1404                     if(c.among!(escapes))
1405                     {
1406                         reader_.forward();
1407                         // Escaping has been moved to Parser as it can't be done in
1408                         // place (in a slice) in case of '\P' and '\L' (very uncommon,
1409                         // but we don't want to break the spec)
1410                         char[2] escapeSequence = ['\\', cast(char)c];
1411                         reader_.sliceBuilder.write(escapeSequence);
1412                     }
1413                     else if(c.among!(escapeHexCodeList))
1414                     {
1415                         const hexLength = dyaml.escapes.escapeHexLength(c);
1416                         reader_.forward();
1417 
1418                         foreach(i; 0 .. hexLength) {
1419                             enforce(reader_.peek(i).isHexDigit,
1420                                 new ScannerException("While scanning a double quoted scalar", startMark,
1421                                     expected("escape sequence of hexadecimal numbers",
1422                                         reader_.peek(i)), reader_.mark));
1423                         }
1424                         char[] hex = reader_.get(hexLength);
1425 
1426                         enforce((hex.length > 0) && (hex.length <= 8),
1427                             new ScannerException("While scanning a double quoted scalar", startMark,
1428                                   "overflow when parsing an escape sequence of " ~
1429                                   "hexadecimal numbers.", reader_.mark));
1430 
1431                         char[2] escapeStart = ['\\', cast(char) c];
1432                         reader_.sliceBuilder.write(escapeStart);
1433                         reader_.sliceBuilder.write(hex);
1434 
1435                     }
1436                     else if(c.among!('\n', '\r', '\u0085', '\u2028', '\u2029'))
1437                     {
1438                         scanLineBreak();
1439                         scanFlowScalarBreaksToSlice(startMark);
1440                     }
1441                     else
1442                     {
1443                         throw new ScannerException("While scanning a double quoted scalar", startMark,
1444                               text("found unsupported escape character ", c),
1445                               reader_.mark);
1446                     }
1447                 }
1448                 else { return; }
1449             }
1450         }
1451 
1452         /// Scan space characters in a flow scalar.
1453         ///
1454         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1455         /// spaces into that slice.
1456         void scanFlowScalarSpacesToSlice(const Mark startMark) @safe
1457         {
1458             // Increase length as long as we see whitespace.
1459             size_t length;
1460             while(reader_.peekByte(length).among!(' ', '\t')) { ++length; }
1461             auto whitespaces = reader_.prefixBytes(length);
1462 
1463             // Can check the last byte without striding because '\0' is ASCII
1464             const c = reader_.peek(length);
1465             enforce(c != '\0',
1466                 new ScannerException("While scanning a quoted scalar", startMark,
1467                     "found unexpected end of buffer", reader_.mark));
1468 
1469             // Spaces not followed by a line break.
1470             if(!c.among!('\n', '\r', '\u0085', '\u2028', '\u2029'))
1471             {
1472                 reader_.forward(length);
1473                 reader_.sliceBuilder.write(whitespaces);
1474                 return;
1475             }
1476 
1477             // There's a line break after the spaces.
1478             reader_.forward(length);
1479             const lineBreak = scanLineBreak();
1480 
1481             if(lineBreak != '\n') { reader_.sliceBuilder.write(lineBreak); }
1482 
1483             // If we have extra line breaks after the first, scan them into the
1484             // slice.
1485             const bool extraBreaks = scanFlowScalarBreaksToSlice(startMark);
1486 
1487             // No extra breaks, one normal line break. Replace it with a space.
1488             if(lineBreak == '\n' && !extraBreaks) { reader_.sliceBuilder.write(' '); }
1489         }
1490 
1491         /// Scan line breaks in a flow scalar.
1492         ///
1493         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1494         /// line breaks into that slice.
1495         bool scanFlowScalarBreaksToSlice(const Mark startMark) @safe
1496         {
1497             // True if at least one line break was found.
1498             bool anyBreaks;
1499             for(;;)
1500             {
1501                 // Instead of checking indentation, we check for document separators.
1502                 const prefix = reader_.prefix(3);
1503                 enforce(!(prefix == "---" || prefix == "...") ||
1504                     !reader_.peek(3).isWhiteSpace,
1505                     new ScannerException("While scanning a quoted scalar", startMark,
1506                         "found unexpected document separator", reader_.mark));
1507 
1508                 // Skip any whitespaces.
1509                 while(reader_.peekByte().among!(' ', '\t')) { reader_.forward(); }
1510 
1511                 // Encountered a non-whitespace non-linebreak character, so we're done.
1512                 if(!reader_.peek().among!(' ', '\n', '\r', '\u0085', '\u2028', '\u2029')) { break; }
1513 
1514                 const lineBreak = scanLineBreak();
1515                 anyBreaks = true;
1516                 reader_.sliceBuilder.write(lineBreak);
1517             }
1518             return anyBreaks;
1519         }
1520 
1521         /// Scan plain scalar token (no block, no quotes).
1522         Token scanPlain() @safe
1523         {
1524             // We keep track of the allowSimpleKey_ flag here.
1525             // Indentation rules are loosed for the flow context
1526             const startMark = reader_.mark;
1527             Mark endMark = startMark;
1528             const indent = indent_ + 1;
1529 
1530             // We allow zero indentation for scalars, but then we need to check for
1531             // document separators at the beginning of the line.
1532             // if(indent == 0) { indent = 1; }
1533 
1534             reader_.sliceBuilder.begin();
1535 
1536             alias Transaction = SliceBuilder.Transaction;
1537             Transaction spacesTransaction;
1538             // Stop at a comment.
1539             while(reader_.peekByte() != '#')
1540             {
1541                 // Scan the entire plain scalar.
1542                 size_t length;
1543                 dchar c = void;
1544                 // Moved the if() out of the loop for optimization.
1545                 if(flowLevel_ == 0)
1546                 {
1547                     c = reader_.peek(length);
1548                     for(;;)
1549                     {
1550                         const cNext = reader_.peek(length + 1);
1551                         if(c.isWhiteSpace ||
1552                            (c == ':' && cNext.isWhiteSpace))
1553                         {
1554                             break;
1555                         }
1556                         ++length;
1557                         c = cNext;
1558                     }
1559                 }
1560                 else
1561                 {
1562                     for(;;)
1563                     {
1564                         c = reader_.peek(length);
1565                         if(c.isWhiteSpace || c.among!(',', ':', '?', '[', ']', '{', '}'))
1566                         {
1567                             break;
1568                         }
1569                         ++length;
1570                     }
1571                 }
1572 
1573                 // It's not clear what we should do with ':' in the flow context.
1574                 enforce(flowLevel_ == 0 || c != ':' ||
1575                    reader_.peek(length + 1).isWhiteSpace ||
1576                    reader_.peek(length + 1).among!(',', '[', ']', '{', '}'),
1577                     new ScannerException("While scanning a plain scalar", startMark,
1578                         "found unexpected ':' . Please check " ~
1579                         "http://pyyaml.org/wiki/YAMLColonInFlowContext for details.",
1580                         reader_.mark));
1581 
1582                 if(length == 0) { break; }
1583 
1584                 allowSimpleKey_ = false;
1585 
1586                 reader_.sliceBuilder.write(reader_.get(length));
1587 
1588                 endMark = reader_.mark;
1589 
1590                 spacesTransaction.commit();
1591                 spacesTransaction = Transaction(&reader_.sliceBuilder);
1592 
1593                 const startLength = reader_.sliceBuilder.length;
1594                 scanPlainSpacesToSlice();
1595                 if(startLength == reader_.sliceBuilder.length ||
1596                    (flowLevel_ == 0 && reader_.column < indent))
1597                 {
1598                     break;
1599                 }
1600             }
1601 
1602             spacesTransaction.end();
1603             char[] slice = reader_.sliceBuilder.finish();
1604 
1605             return scalarToken(startMark, endMark, slice, ScalarStyle.plain);
1606         }
1607 
1608         /// Scan spaces in a plain scalar.
1609         ///
1610         /// Assumes that the caller is building a slice in Reader, and puts the spaces
1611         /// into that slice.
1612         void scanPlainSpacesToSlice() @safe
1613         {
1614             // The specification is really confusing about tabs in plain scalars.
1615             // We just forbid them completely. Do not use tabs in YAML!
1616 
1617             // Get as many plain spaces as there are.
1618             size_t length;
1619             while(reader_.peekByte(length) == ' ') { ++length; }
1620             char[] whitespaces = reader_.prefixBytes(length);
1621             reader_.forward(length);
1622 
1623             const dchar c = reader_.peek();
1624             if(!c.isNSChar)
1625             {
1626                 // We have spaces, but no newline.
1627                 if(whitespaces.length > 0) { reader_.sliceBuilder.write(whitespaces); }
1628                 return;
1629             }
1630 
1631             // Newline after the spaces (if any)
1632             const lineBreak = scanLineBreak();
1633             allowSimpleKey_ = true;
1634 
1635             static bool end(Reader reader_) @safe pure
1636             {
1637                 const prefix = reader_.prefix(3);
1638                 return ("---" == prefix || "..." == prefix)
1639                         && reader_.peek(3).among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029');
1640             }
1641 
1642             if(end(reader_)) { return; }
1643 
1644             bool extraBreaks;
1645 
1646             alias Transaction = SliceBuilder.Transaction;
1647             auto transaction = Transaction(&reader_.sliceBuilder);
1648             if(lineBreak != '\n') { reader_.sliceBuilder.write(lineBreak); }
1649             while(reader_.peek().isNSChar)
1650             {
1651                 if(reader_.peekByte() == ' ') { reader_.forward(); }
1652                 else
1653                 {
1654                     const lBreak = scanLineBreak();
1655                     extraBreaks  = true;
1656                     reader_.sliceBuilder.write(lBreak);
1657 
1658                     if(end(reader_)) { return; }
1659                 }
1660             }
1661             transaction.commit();
1662 
1663             // No line breaks, only a space.
1664             if(lineBreak == '\n' && !extraBreaks) { reader_.sliceBuilder.write(' '); }
1665         }
1666 
1667         /// Scan handle of a tag token.
1668         ///
1669         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1670         /// characters into that slice.
1671         void scanTagHandleToSlice(string name)(const Mark startMark)
1672         {
1673             dchar c = reader_.peek();
1674             enum contextMsg = "While scanning a " ~ name;
1675             enforce(c == '!',
1676                 new ScannerException(contextMsg, startMark, expected("'!'", c), reader_.mark));
1677 
1678             uint length = 1;
1679             c = reader_.peek(length);
1680             if(c != ' ')
1681             {
1682                 while(c.isAlphaNum || c.among!('-', '_'))
1683                 {
1684                     ++length;
1685                     c = reader_.peek(length);
1686                 }
1687                 enforce(c == '!',
1688                     new ScannerException(contextMsg, startMark, expected("'!'", c), reader_.mark));
1689                 ++length;
1690             }
1691 
1692             reader_.sliceBuilder.write(reader_.get(length));
1693         }
1694 
1695         /// Scan URI in a tag token.
1696         ///
1697         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1698         /// characters into that slice.
1699         void scanTagURIToSlice(string name)(const Mark startMark)
1700         {
1701             // Note: we do not check if URI is well-formed.
1702             dchar c = reader_.peek();
1703             const startLen = reader_.sliceBuilder.length;
1704             {
1705                 uint length;
1706                 while(c.isAlphaNum || c.isURIChar)
1707                 {
1708                     if(c == '%')
1709                     {
1710                         auto chars = reader_.get(length);
1711                         reader_.sliceBuilder.write(chars);
1712                         length = 0;
1713                         scanURIEscapesToSlice!name(startMark);
1714                     }
1715                     else { ++length; }
1716                     c = reader_.peek(length);
1717                 }
1718                 if(length > 0)
1719                 {
1720                     auto chars = reader_.get(length);
1721                     reader_.sliceBuilder.write(chars);
1722                     length = 0;
1723                 }
1724             }
1725             // OK if we scanned something, error otherwise.
1726             enum contextMsg = "While parsing a " ~ name;
1727             enforce(reader_.sliceBuilder.length > startLen,
1728                 new ScannerException(contextMsg, startMark, expected("URI", c), reader_.mark));
1729         }
1730 
1731         // Not @nogc yet because std.utf.decode is not @nogc
1732         /// Scan URI escape sequences.
1733         ///
1734         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1735         /// characters into that slice.
1736         void scanURIEscapesToSlice(string name)(const Mark startMark)
1737         {
1738             import core.exception : UnicodeException;
1739             // URI escapes encode a UTF-8 string. We store UTF-8 code units here for
1740             // decoding into UTF-32.
1741             Appender!string buffer;
1742 
1743 
1744             enum contextMsg = "While scanning a " ~ name;
1745             while(reader_.peekByte() == '%')
1746             {
1747                 reader_.forward();
1748                 char[2] nextByte = [reader_.peekByte(), reader_.peekByte(1)];
1749 
1750                 enforce(nextByte[0].isHexDigit && nextByte[1].isHexDigit,
1751                     new ScannerException(contextMsg, startMark,
1752                         expected("URI escape sequence of 2 hexadecimal " ~
1753                             "numbers", nextByte), reader_.mark));
1754 
1755                 buffer ~= nextByte[].to!ubyte(16);
1756 
1757                 reader_.forward(2);
1758             }
1759             try
1760             {
1761                 foreach (dchar chr; buffer.data)
1762                 {
1763                     reader_.sliceBuilder.write(chr);
1764                 }
1765             }
1766             catch (UnicodeException)
1767             {
1768                 throw new ScannerException(contextMsg, startMark,
1769                         "Invalid UTF-8 data encoded in URI escape sequence",
1770                         reader_.mark);
1771             }
1772         }
1773 
1774 
1775         /// Scan a line break, if any.
1776         ///
1777         /// Transforms:
1778         ///   '\r\n'      :   '\n'
1779         ///   '\r'        :   '\n'
1780         ///   '\n'        :   '\n'
1781         ///   '\u0085'    :   '\n'
1782         ///   '\u2028'    :   '\u2028'
1783         ///   '\u2029     :   '\u2029'
1784         ///   no break    :   '\0'
1785         dchar scanLineBreak() @safe
1786         {
1787             // Fast path for ASCII line breaks.
1788             const b = reader_.peekByte();
1789             if(b < 0x80)
1790             {
1791                 if(b == '\n' || b == '\r')
1792                 {
1793                     if(reader_.prefix(2) == "\r\n") { reader_.forward(2); }
1794                     else { reader_.forward(); }
1795                     return '\n';
1796                 }
1797                 return '\0';
1798             }
1799 
1800             const c = reader_.peek();
1801             if(c == '\x85')
1802             {
1803                 reader_.forward();
1804                 return '\n';
1805             }
1806             if(c == '\u2028' || c == '\u2029')
1807             {
1808                 reader_.forward();
1809                 return c;
1810             }
1811             return '\0';
1812         }
1813 }