dyaml.scanner source code

1 
2 //          Copyright Ferdinand Majerech 2011-2014.
3 // Distributed under the Boost Software License, Version 1.0.
4 //    (See accompanying file LICENSE_1_0.txt or copy at
5 //          http://www.boost.org/LICENSE_1_0.txt)
6 
7 /// YAML scanner.
8 /// Code based on PyYAML: http://www.pyyaml.org
9 module dyaml.scanner;
10 
11 
12 import core.stdc..string;
13 
14 import std.algorithm;
15 import std.array;
16 import std.container;
17 import std.conv;
18 import std.ascii : isAlphaNum, isDigit, isHexDigit;
19 import std.exception;
20 import std..string;
21 import std.typecons;
22 import std.traits : Unqual;
23 
24 import dyaml.fastcharsearch;
25 import dyaml.escapes;
26 import dyaml.exception;
27 import dyaml.nogcutil;
28 import dyaml.queue;
29 import dyaml.reader;
30 import dyaml.style;
31 import dyaml.token;
32 
33 package:
34 /// Scanner produces tokens of the following types:
35 /// STREAM-START
36 /// STREAM-END
37 /// DIRECTIVE(name, value)
38 /// DOCUMENT-START
39 /// DOCUMENT-END
40 /// BLOCK-SEQUENCE-START
41 /// BLOCK-MAPPING-START
42 /// BLOCK-END
43 /// FLOW-SEQUENCE-START
44 /// FLOW-MAPPING-START
45 /// FLOW-SEQUENCE-END
46 /// FLOW-MAPPING-END
47 /// BLOCK-ENTRY
48 /// FLOW-ENTRY
49 /// KEY
50 /// VALUE
51 /// ALIAS(value)
52 /// ANCHOR(value)
53 /// TAG(value)
54 /// SCALAR(value, plain, style)
55 
56 
57 /// Marked exception thrown at scanner errors.
58 ///
59 /// See_Also: MarkedYAMLException
60 class ScannerException : MarkedYAMLException
61 {
62     mixin MarkedExceptionCtors;
63 }
64 
65 /// Generates tokens from data provided by a Reader.
66 final class Scanner
67 {
68     private:
69         /// A simple key is a key that is not denoted by the '?' indicator.
70         /// For example:
71         ///   ---
72         ///   block simple key: value
73         ///   ? not a simple key:
74         ///   : { flow simple key: value }
75         /// We emit the KEY token before all keys, so when we find a potential simple
76         /// key, we try to locate the corresponding ':' indicator. Simple keys should be
77         /// limited to a single line and 1024 characters.
78         ///
79         /// 16 bytes on 64-bit.
80         static struct SimpleKey
81         {
82             /// Character index in reader where the key starts.
83             uint charIndex = uint.max;
84             /// Index of the key token from start (first token scanned being 0).
85             uint tokenIndex;
86             /// Line the key starts at.
87             uint line;
88             /// Column the key starts at.
89             ushort column;
90             /// Is this required to be a simple key?
91             bool required;
92             /// Is this struct "null" (invalid)?.
93             bool isNull;
94         }
95 
96         /// Block chomping types.
97         enum Chomping
98         {
99             /// Strip all trailing line breaks. '-' indicator.
100             Strip,
101             /// Line break of the last line is preserved, others discarded. Default.
102             Clip,
103             /// All trailing line breaks are preserved. '+' indicator.
104             Keep
105         }
106 
107         /// Reader used to read from a file/stream.
108         Reader reader_;
109         /// Are we done scanning?
110         bool done_;
111 
112         /// Level of nesting in flow context. If 0, we're in block context.
113         uint flowLevel_;
114         /// Current indentation level.
115         int indent_ = -1;
116         /// Past indentation levels. Used as a stack.
117         Array!int indents_;
118 
119         /// Processed tokens not yet emitted. Used as a queue.
120         Queue!Token tokens_;
121 
122         /// Number of tokens emitted through the getToken method.
123         uint tokensTaken_;
124 
125         /// Can a simple key start at the current position? A simple key may start:
126         /// - at the beginning of the line, not counting indentation spaces
127         ///       (in block context),
128         /// - after '{', '[', ',' (in the flow context),
129         /// - after '?', ':', '-' (in the block context).
130         /// In the block context, this flag also signifies if a block collection
131         /// may start at the current position.
132         bool allowSimpleKey_ = true;
133 
134         /// Possible simple keys indexed by flow levels.
135         SimpleKey[] possibleSimpleKeys_;
136 
137 
138         /// Set on error by nothrow/@nogc inner functions along with errorData_.
139         ///
140         /// Non-nothrow/GC-using caller functions can then throw an exception using
141         /// data stored in errorData_.
142         bool error_;
143 
144         /// Data for the exception to throw if error_ is true.
145         MarkedYAMLExceptionData errorData_;
146 
147         /// Error messages can be built in this buffer without using the GC.
148         ///
149         /// ScannerException (MarkedYAMLException) copies string data passed to its
150         /// constructor so it's safe to use slices of this buffer as parameters for
151         /// exceptions that may outlive the Scanner. The GC allocation when creating the
152         /// error message is removed, but the allocation when creating an exception is
153         /// not.
154         char[256] msgBuffer_;
155 
156         /// Used to detect if a character is any whitespace plus '\0'
157         mixin FastCharSearch!" \t\0\n\r\u0085\u2028\u2029"d searchAllWhitespace;
158         /// Used to detect if a character is any line break plus '\0'
159         mixin FastCharSearch!"\0\n\r\u0085\u2028\u2029"d searchAllBreaks;
160 
161         /// Avoids compiler confusion of std.algorithm.canFind with FastCharSearch.
162         alias canFind = std.algorithm.canFind;
163 
164     public:
165         /// Construct a Scanner using specified Reader.
166         this(Reader reader) @safe nothrow
167         {
168             // Return the next token, but do not delete it from the queue
169             reader_   = reader;
170             fetchStreamStart();
171         }
172 
173         /// Destroy the scanner.
174         @trusted ~this()
175         {
176             tokens_.destroy();
177             indents_.destroy();
178             possibleSimpleKeys_.destroy();
179             possibleSimpleKeys_ = null;
180             reader_ = null;
181         }
182 
183         /// Check if the next token is one of specified types.
184         ///
185         /// If no types are specified, checks if any tokens are left.
186         ///
187         /// Params:  ids = Token IDs to check for.
188         ///
189         /// Returns: true if the next token is one of specified types, or if there are
190         ///          any tokens left if no types specified, false otherwise.
191         bool checkToken(const TokenID[] ids ...) @safe
192         {
193             // Check if the next token is one of specified types.
194             while(needMoreTokens()) { fetchToken(); }
195             if(!tokens_.empty)
196             {
197                 if(ids.length == 0) { return true; }
198                 else
199                 {
200                     const nextId = tokens_.peek().id;
201                     foreach(id; ids)
202                     {
203                         if(nextId == id) { return true; }
204                     }
205                 }
206             }
207             return false;
208         }
209 
210         /// Return the next token, but keep it in the queue.
211         ///
212         /// Must not be called if there are no tokens left.
213         ref const(Token) peekToken() @safe
214         {
215             while(needMoreTokens) { fetchToken(); }
216             if(!tokens_.empty)    { return tokens_.peek(); }
217             assert(false, "No token left to peek");
218         }
219 
220         /// Return the next token, removing it from the queue.
221         ///
222         /// Must not be called if there are no tokens left.
223         Token getToken() @safe
224         {
225             while(needMoreTokens){fetchToken();}
226             if(!tokens_.empty)
227             {
228                 ++tokensTaken_;
229                 return tokens_.pop();
230             }
231             assert(false, "No token left to get");
232         }
233 
234     private:
235         /// Build an error message in msgBuffer_ and return it as a string.
236         string buildMsg(S ...)(S args) @trusted pure nothrow @nogc
237         {
238             return cast(string)msgBuffer_.printNoGC(args);
239         }
240 
241         /// Most scanning error messages have the same format; so build them with this
242         /// function.
243         string expected(T)(string expected, T found) @safe pure nothrow @nogc
244         {
245             return buildMsg("expected ", expected, ", but found ", found);
246         }
247 
248         /// If error_ is true, throws a ScannerException constructed from errorData_ and
249         /// sets error_ to false.
250         void throwIfError() @safe pure
251         {
252             if(!error_) { return; }
253             error_ = false;
254             throw new ScannerException(errorData_);
255         }
256 
257         /// Called by internal nothrow/@nogc methods to set an error to be thrown by
258         /// their callers.
259         ///
260         /// See_Also: dyaml.exception.MarkedYamlException
261         void error(string context, const Mark contextMark, string problem,
262                    const Mark problemMark) @safe pure nothrow @nogc
263         {
264             assert(error_ == false,
265                    "Setting an error when there already is a not yet thrown error");
266             error_     = true;
267             errorData_ = MarkedYAMLExceptionData(context, contextMark, problem, problemMark);
268         }
269 
270         /// Determine whether or not we need to fetch more tokens before peeking/getting a token.
271         bool needMoreTokens() @safe pure
272         {
273             if(done_)         { return false; }
274             if(tokens_.empty) { return true; }
275 
276             /// The current token may be a potential simple key, so we need to look further.
277             stalePossibleSimpleKeys();
278             return nextPossibleSimpleKey() == tokensTaken_;
279         }
280 
281         /// Fetch at token, adding it to tokens_.
282         void fetchToken() @safe
283         {
284             // Eat whitespaces and comments until we reach the next token.
285             scanToNextToken();
286 
287             // Remove obsolete possible simple keys.
288             stalePossibleSimpleKeys();
289 
290             // Compare current indentation and column. It may add some tokens
291             // and decrease the current indentation level.
292             unwindIndent(reader_.column);
293 
294             // Get the next character.
295             const dchar c = reader_.peekByte();
296 
297             // Fetch the token.
298             if(c == '\0')            { return fetchStreamEnd();     }
299             if(checkDirective())     { return fetchDirective();     }
300             if(checkDocumentStart()) { return fetchDocumentStart(); }
301             if(checkDocumentEnd())   { return fetchDocumentEnd();   }
302             // Order of the following checks is NOT significant.
303             switch(c)
304             {
305                 case '[':  return fetchFlowSequenceStart();
306                 case '{':  return fetchFlowMappingStart();
307                 case ']':  return fetchFlowSequenceEnd();
308                 case '}':  return fetchFlowMappingEnd();
309                 case ',':  return fetchFlowEntry();
310                 case '!':  return fetchTag();
311                 case '\'': return fetchSingle();
312                 case '\"': return fetchDouble();
313                 case '*':  return fetchAlias();
314                 case '&':  return fetchAnchor();
315                 case '?':  if(checkKey())        { return fetchKey();        } goto default;
316                 case ':':  if(checkValue())      { return fetchValue();      } goto default;
317                 case '-':  if(checkBlockEntry()) { return fetchBlockEntry(); } goto default;
318                 case '|':  if(flowLevel_ == 0)   { return fetchLiteral();    } break;
319                 case '>':  if(flowLevel_ == 0)   { return fetchFolded();     } break;
320                 default:   if(checkPlain())      { return fetchPlain();      }
321             }
322 
323             throw new ScannerException("While scanning for the next token, found character "
324                                        "\'%s\', index %s that cannot start any token"
325                                        .format(c, to!int(c)), reader_.mark);
326         }
327 
328 
329         /// Return the token number of the nearest possible simple key.
330         uint nextPossibleSimpleKey() @safe pure nothrow @nogc
331         {
332             uint minTokenNumber = uint.max;
333             foreach(k, ref simpleKey; possibleSimpleKeys_)
334             {
335                 if(simpleKey.isNull) { continue; }
336                 minTokenNumber = min(minTokenNumber, simpleKey.tokenIndex);
337             }
338             return minTokenNumber;
339         }
340 
341         /// Remove entries that are no longer possible simple keys.
342         ///
343         /// According to the YAML specification, simple keys
344         /// - should be limited to a single line,
345         /// - should be no longer than 1024 characters.
346         /// Disabling this will allow simple keys of any length and
347         /// height (may cause problems if indentation is broken though).
348         void stalePossibleSimpleKeys() @safe pure
349         {
350             foreach(level, ref key; possibleSimpleKeys_)
351             {
352                 if(key.isNull) { continue; }
353                 if(key.line != reader_.line || reader_.charIndex - key.charIndex > 1024)
354                 {
355                     enforce(!key.required,
356                             new ScannerException("While scanning a simple key",
357                                                  Mark(key.line, key.column),
358                                                  "could not find expected ':'", reader_.mark));
359                     key.isNull = true;
360                 }
361             }
362         }
363 
364         /// Check if the next token starts a possible simple key and if so, save its position.
365         ///
366         /// This function is called for ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
367         void savePossibleSimpleKey() @safe pure
368         {
369             // Check if a simple key is required at the current position.
370             const required = (flowLevel_ == 0 && indent_ == reader_.column);
371             assert(allowSimpleKey_ || !required, "A simple key is required only if it is "
372                    "the first token in the current line. Therefore it is always allowed.");
373 
374             if(!allowSimpleKey_) { return; }
375 
376             // The next token might be a simple key, so save its number and position.
377             removePossibleSimpleKey();
378             const tokenCount = tokensTaken_ + cast(uint)tokens_.length;
379 
380             const line   = reader_.line;
381             const column = reader_.column;
382             const key    = SimpleKey(cast(uint)reader_.charIndex, tokenCount, line,
383                                      cast(ushort)min(column, ushort.max), required);
384 
385             if(possibleSimpleKeys_.length <= flowLevel_)
386             {
387                 const oldLength = possibleSimpleKeys_.length;
388                 possibleSimpleKeys_.length = flowLevel_ + 1;
389                 //No need to initialize the last element, it's already done in the next line.
390                 possibleSimpleKeys_[oldLength .. flowLevel_] = SimpleKey.init;
391             }
392             possibleSimpleKeys_[flowLevel_] = key;
393         }
394 
395         /// Remove the saved possible key position at the current flow level.
396         void removePossibleSimpleKey() @safe pure
397         {
398             if(possibleSimpleKeys_.length <= flowLevel_) { return; }
399 
400             if(!possibleSimpleKeys_[flowLevel_].isNull)
401             {
402                 const key = possibleSimpleKeys_[flowLevel_];
403                 enforce(!key.required,
404                         new ScannerException("While scanning a simple key",
405                                              Mark(key.line, key.column),
406                                              "could not find expected ':'", reader_.mark));
407                 possibleSimpleKeys_[flowLevel_].isNull = true;
408             }
409         }
410 
411         /// Decrease indentation, removing entries in indents_.
412         ///
413         /// Params:  column = Current column in the file/stream.
414         void unwindIndent(const int column) @trusted
415         {
416             if(flowLevel_ > 0)
417             {
418                 // In flow context, tokens should respect indentation.
419                 // The condition should be `indent >= column` according to the spec.
420                 // But this condition will prohibit intuitively correct
421                 // constructions such as
422                 // key : {
423                 // }
424 
425                 // In the flow context, indentation is ignored. We make the scanner less
426                 // restrictive than what the specification requires.
427                 // if(pedantic_ && flowLevel_ > 0 && indent_ > column)
428                 // {
429                 //     throw new ScannerException("Invalid intendation or unclosed '[' or '{'",
430                 //                                reader_.mark)
431                 // }
432                 return;
433             }
434 
435             // In block context, we may need to issue the BLOCK-END tokens.
436             while(indent_ > column)
437             {
438                 indent_ = indents_.back;
439                 indents_.length = indents_.length - 1;
440                 tokens_.push(blockEndToken(reader_.mark, reader_.mark));
441             }
442         }
443 
444         /// Increase indentation if needed.
445         ///
446         /// Params:  column = Current column in the file/stream.
447         ///
448         /// Returns: true if the indentation was increased, false otherwise.
449         bool addIndent(int column) @trusted
450         {
451             if(indent_ >= column){return false;}
452             indents_ ~= indent_;
453             indent_ = column;
454             return true;
455         }
456 
457 
458         /// Add STREAM-START token.
459         void fetchStreamStart() @safe nothrow
460         {
461             tokens_.push(streamStartToken(reader_.mark, reader_.mark, reader_.encoding));
462         }
463 
464         ///Add STREAM-END token.
465         void fetchStreamEnd() @safe
466         {
467             //Set intendation to -1 .
468             unwindIndent(-1);
469             removePossibleSimpleKey();
470             allowSimpleKey_ = false;
471             possibleSimpleKeys_.destroy;
472 
473             tokens_.push(streamEndToken(reader_.mark, reader_.mark));
474             done_ = true;
475         }
476 
477         /// Add DIRECTIVE token.
478         void fetchDirective() @safe
479         {
480             // Set intendation to -1 .
481             unwindIndent(-1);
482             // Reset simple keys.
483             removePossibleSimpleKey();
484             allowSimpleKey_ = false;
485 
486             auto directive = scanDirective();
487             throwIfError();
488             tokens_.push(directive);
489         }
490 
491         /// Add DOCUMENT-START or DOCUMENT-END token.
492         void fetchDocumentIndicator(TokenID id)() @safe
493             if(id == TokenID.DocumentStart || id == TokenID.DocumentEnd)
494         {
495             // Set indentation to -1 .
496             unwindIndent(-1);
497             // Reset simple keys. Note that there can't be a block collection after '---'.
498             removePossibleSimpleKey();
499             allowSimpleKey_ = false;
500 
501             Mark startMark = reader_.mark;
502             reader_.forward(3);
503             tokens_.push(simpleToken!id(startMark, reader_.mark));
504         }
505 
506         /// Aliases to add DOCUMENT-START or DOCUMENT-END token.
507         alias fetchDocumentIndicator!(TokenID.DocumentStart) fetchDocumentStart;
508         alias fetchDocumentIndicator!(TokenID.DocumentEnd) fetchDocumentEnd;
509 
510         /// Add FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
511         void fetchFlowCollectionStart(TokenID id)() @trusted
512         {
513             // '[' and '{' may start a simple key.
514             savePossibleSimpleKey();
515             // Simple keys are allowed after '[' and '{'.
516             allowSimpleKey_ = true;
517             ++flowLevel_;
518 
519             Mark startMark = reader_.mark;
520             reader_.forward();
521             tokens_.push(simpleToken!id(startMark, reader_.mark));
522         }
523 
524         /// Aliases to add FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
525         alias fetchFlowCollectionStart!(TokenID.FlowSequenceStart) fetchFlowSequenceStart;
526         alias fetchFlowCollectionStart!(TokenID.FlowMappingStart) fetchFlowMappingStart;
527 
528         /// Add FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
529         void fetchFlowCollectionEnd(TokenID id)() @safe
530         {
531             // Reset possible simple key on the current level.
532             removePossibleSimpleKey();
533             // No simple keys after ']' and '}'.
534             allowSimpleKey_ = false;
535             --flowLevel_;
536 
537             Mark startMark = reader_.mark;
538             reader_.forward();
539             tokens_.push(simpleToken!id(startMark, reader_.mark));
540         }
541 
542         /// Aliases to add FLOW-SEQUENCE-START or FLOW-MAPPING-START token/
543         alias fetchFlowCollectionEnd!(TokenID.FlowSequenceEnd) fetchFlowSequenceEnd;
544         alias fetchFlowCollectionEnd!(TokenID.FlowMappingEnd) fetchFlowMappingEnd;
545 
546         /// Add FLOW-ENTRY token;
547         void fetchFlowEntry() @safe
548         {
549             // Reset possible simple key on the current level.
550             removePossibleSimpleKey();
551             // Simple keys are allowed after ','.
552             allowSimpleKey_ = true;
553 
554             Mark startMark = reader_.mark;
555             reader_.forward();
556             tokens_.push(flowEntryToken(startMark, reader_.mark));
557         }
558 
559         /// Additional checks used in block context in fetchBlockEntry and fetchKey.
560         ///
561         /// Params:  type = String representing the token type we might need to add.
562         ///          id   = Token type we might need to add.
563         void blockChecks(string type, TokenID id)() @safe
564         {
565             enum context = type ~ " keys are not allowed here";
566             // Are we allowed to start a key (not neccesarily a simple one)?
567             enforce(allowSimpleKey_, new ScannerException(context, reader_.mark));
568 
569             if(addIndent(reader_.column))
570             {
571                 tokens_.push(simpleToken!id(reader_.mark, reader_.mark));
572             }
573         }
574 
575         /// Add BLOCK-ENTRY token. Might add BLOCK-SEQUENCE-START in the process.
576         void fetchBlockEntry() @safe
577         {
578             if(flowLevel_ == 0) { blockChecks!("Sequence", TokenID.BlockSequenceStart)(); }
579 
580             // It's an error for the block entry to occur in the flow context,
581             // but we let the parser detect this.
582 
583             // Reset possible simple key on the current level.
584             removePossibleSimpleKey();
585             // Simple keys are allowed after '-'.
586             allowSimpleKey_ = true;
587 
588             Mark startMark = reader_.mark;
589             reader_.forward();
590             tokens_.push(blockEntryToken(startMark, reader_.mark));
591         }
592 
593         /// Add KEY token. Might add BLOCK-MAPPING-START in the process.
594         void fetchKey() @safe
595         {
596             if(flowLevel_ == 0) { blockChecks!("Mapping", TokenID.BlockMappingStart)(); }
597 
598             // Reset possible simple key on the current level.
599             removePossibleSimpleKey();
600             // Simple keys are allowed after '?' in the block context.
601             allowSimpleKey_ = (flowLevel_ == 0);
602 
603             Mark startMark = reader_.mark;
604             reader_.forward();
605             tokens_.push(keyToken(startMark, reader_.mark));
606         }
607 
608         /// Add VALUE token. Might add KEY and/or BLOCK-MAPPING-START in the process.
609         void fetchValue() @safe
610         {
611             //Do we determine a simple key?
612             if(possibleSimpleKeys_.length > flowLevel_ &&
613                !possibleSimpleKeys_[flowLevel_].isNull)
614             {
615                 const key = possibleSimpleKeys_[flowLevel_];
616                 possibleSimpleKeys_[flowLevel_].isNull = true;
617                 Mark keyMark = Mark(key.line, key.column);
618                 const idx = key.tokenIndex - tokensTaken_;
619 
620                 assert(idx >= 0);
621 
622                 // Add KEY.
623                 // Manually inserting since tokens are immutable (need linked list).
624                 tokens_.insert(keyToken(keyMark, keyMark), idx);
625 
626                 // If this key starts a new block mapping, we need to add BLOCK-MAPPING-START.
627                 if(flowLevel_ == 0 && addIndent(key.column))
628                 {
629                     tokens_.insert(blockMappingStartToken(keyMark, keyMark), idx);
630                 }
631 
632                 // There cannot be two simple keys in a row.
633                 allowSimpleKey_ = false;
634             }
635             // Part of a complex key
636             else
637             {
638                 // We can start a complex value if and only if we can start a simple key.
639                 enforce(flowLevel_ > 0 || allowSimpleKey_,
640                         new ScannerException("Mapping values are not allowed here", reader_.mark));
641 
642                 // If this value starts a new block mapping, we need to add
643                 // BLOCK-MAPPING-START. It'll be detected as an error later by the parser.
644                 if(flowLevel_ == 0 && addIndent(reader_.column))
645                 {
646                     tokens_.push(blockMappingStartToken(reader_.mark, reader_.mark));
647                 }
648 
649                 // Reset possible simple key on the current level.
650                 removePossibleSimpleKey();
651                 // Simple keys are allowed after ':' in the block context.
652                 allowSimpleKey_ = (flowLevel_ == 0);
653             }
654 
655             // Add VALUE.
656             Mark startMark = reader_.mark;
657             reader_.forward();
658             tokens_.push(valueToken(startMark, reader_.mark));
659         }
660 
661         /// Add ALIAS or ANCHOR token.
662         void fetchAnchor_(TokenID id)() @trusted
663             if(id == TokenID.Alias || id == TokenID.Anchor)
664         {
665             // ALIAS/ANCHOR could be a simple key.
666             savePossibleSimpleKey();
667             // No simple keys after ALIAS/ANCHOR.
668             allowSimpleKey_ = false;
669 
670             auto anchor = scanAnchor(id);
671             throwIfError();
672             tokens_.push(anchor);
673         }
674 
675         /// Aliases to add ALIAS or ANCHOR token.
676         alias fetchAnchor_!(TokenID.Alias) fetchAlias;
677         alias fetchAnchor_!(TokenID.Anchor) fetchAnchor;
678 
679         /// Add TAG token.
680         void fetchTag() @trusted
681         {
682             //TAG could start a simple key.
683             savePossibleSimpleKey();
684             //No simple keys after TAG.
685             allowSimpleKey_ = false;
686 
687             tokens_.push(scanTag());
688             throwIfError();
689         }
690 
691         /// Add block SCALAR token.
692         void fetchBlockScalar(ScalarStyle style)() @trusted
693             if(style == ScalarStyle.Literal || style == ScalarStyle.Folded)
694         {
695             // Reset possible simple key on the current level.
696             removePossibleSimpleKey();
697             // A simple key may follow a block scalar.
698             allowSimpleKey_ = true;
699 
700             auto blockScalar = scanBlockScalar(style);
701             throwIfError();
702             tokens_.push(blockScalar);
703         }
704 
705         /// Aliases to add literal or folded block scalar.
706         alias fetchBlockScalar!(ScalarStyle.Literal) fetchLiteral;
707         alias fetchBlockScalar!(ScalarStyle.Folded) fetchFolded;
708 
709         /// Add quoted flow SCALAR token.
710         void fetchFlowScalar(ScalarStyle quotes)() @safe
711         {
712             // A flow scalar could be a simple key.
713             savePossibleSimpleKey();
714             // No simple keys after flow scalars.
715             allowSimpleKey_ = false;
716 
717             // Scan and add SCALAR.
718             auto scalar = scanFlowScalar(quotes);
719             throwIfError();
720             tokens_.push(scalar);
721         }
722 
723         /// Aliases to add single or double quoted block scalar.
724         alias fetchFlowScalar!(ScalarStyle.SingleQuoted) fetchSingle;
725         alias fetchFlowScalar!(ScalarStyle.DoubleQuoted) fetchDouble;
726 
727         /// Add plain SCALAR token.
728         void fetchPlain() @safe
729         {
730             // A plain scalar could be a simple key
731             savePossibleSimpleKey();
732             // No simple keys after plain scalars. But note that scanPlain() will
733             // change this flag if the scan is finished at the beginning of the line.
734             allowSimpleKey_ = false;
735             auto plain = scanPlain();
736             throwIfError();
737 
738             // Scan and add SCALAR. May change allowSimpleKey_
739             tokens_.push(plain);
740         }
741 
742     pure nothrow @nogc:
743 
744         ///Check if the next token is DIRECTIVE:        ^ '%' ...
745         bool checkDirective() @safe
746         {
747             return reader_.peekByte() == '%' && reader_.column == 0;
748         }
749 
750         /// Check if the next token is DOCUMENT-START:   ^ '---' (' '|'\n')
751         bool checkDocumentStart() @safe
752         {
753             // Check one char first, then all 3, to prevent reading outside the buffer.
754             return reader_.column     == 0     &&
755                    reader_.peekByte() == '-'   &&
756                    reader_.prefix(3)  == "---" &&
757                    searchAllWhitespace.canFind(reader_.peek(3));
758         }
759 
760         /// Check if the next token is DOCUMENT-END:     ^ '...' (' '|'\n')
761         bool checkDocumentEnd() @safe
762         {
763             // Check one char first, then all 3, to prevent reading outside the buffer.
764             return reader_.column     == 0     &&
765                    reader_.peekByte() == '.'   &&
766                    reader_.prefix(3)  == "..." &&
767                    searchAllWhitespace.canFind(reader_.peek(3));
768         }
769 
770         /// Check if the next token is BLOCK-ENTRY:      '-' (' '|'\n')
771         bool checkBlockEntry() @safe
772         {
773             return searchAllWhitespace.canFind(reader_.peek(1));
774         }
775 
776         /// Check if the next token is KEY(flow context):    '?'
777         ///
778         /// or KEY(block context):   '?' (' '|'\n')
779         bool checkKey() @safe
780         {
781             return (flowLevel_ > 0 || searchAllWhitespace.canFind(reader_.peek(1)));
782         }
783 
784         /// Check if the next token is VALUE(flow context):  ':'
785         ///
786         /// or VALUE(block context): ':' (' '|'\n')
787         bool checkValue() @safe
788         {
789             return flowLevel_ > 0 || searchAllWhitespace.canFind(reader_.peek(1));
790         }
791 
792         /// Check if the next token is a plain scalar.
793         ///
794         /// A plain scalar may start with any non-space character except:
795         ///   '-', '?', ':', ',', '[', ']', '{', '}',
796         ///   '#', '&', '*', '!', '|', '>', '\'', '\"',
797         ///   '%', '@', '`'.
798         ///
799         /// It may also start with
800         ///   '-', '?', ':'
801         /// if it is followed by a non-space character.
802         ///
803         /// Note that we limit the last rule to the block context (except the
804         /// '-' character) because we want the flow context to be space
805         /// independent.
806         bool checkPlain() @safe
807         {
808             const c = reader_.peek();
809             mixin FastCharSearch!"-?:,[]{}#&*!|>\'\"%@` \t\0\n\r\u0085\u2028\u2029"d
810                 searchPlainNotFirstChar;
811             if(!searchPlainNotFirstChar.canFind(c))
812             {
813                 return true;
814             }
815             return !searchAllWhitespace.canFind(reader_.peek(1)) &&
816                    (c == '-' || (flowLevel_ == 0 && (c == '?' || c == ':')));
817         }
818 
819         /// Move to the next non-space character.
820         void findNextNonSpace() @safe
821         {
822             while(reader_.peekByte() == ' ') { reader_.forward(); }
823         }
824 
825         /// Scan a string of alphanumeric or "-_" characters.
826         ///
827         /// Assumes that the caller is building a slice in Reader, and puts the scanned
828         /// characters into that slice.
829         ///
830         /// In case of an error, error_ is set. Use throwIfError() to handle this.
831         void scanAlphaNumericToSlice(string name)(const Mark startMark) @system
832         {
833             size_t length = 0;
834             dchar c = reader_.peek();
835             while(c.isAlphaNum || "-_"d.canFind(c)) { c = reader_.peek(++length); }
836 
837             if(length == 0)
838             {
839                 enum contextMsg = "While scanning " ~ name;
840                 error(contextMsg, startMark, expected("alphanumeric, '-' or '_'", c),
841                       reader_.mark);
842                 return;
843             }
844 
845             reader_.sliceBuilder.write(reader_.get(length));
846         }
847 
848         /// Scan and throw away all characters until next line break.
849         void scanToNextBreak() @safe
850         {
851             while(!searchAllBreaks.canFind(reader_.peek())) { reader_.forward(); }
852         }
853 
854         /// Scan all characters until next line break.
855         ///
856         /// Assumes that the caller is building a slice in Reader, and puts the scanned
857         /// characters into that slice.
858         void scanToNextBreakToSlice() @system
859         {
860             uint length = 0;
861             while(!searchAllBreaks.canFind(reader_.peek(length)))
862             {
863                 ++length;
864             }
865             reader_.sliceBuilder.write(reader_.get(length));
866         }
867 
868 
869         /// Move to next token in the file/stream.
870         ///
871         /// We ignore spaces, line breaks and comments.
872         /// If we find a line break in the block context, we set
873         /// allowSimpleKey` on.
874         ///
875         /// We do not yet support BOM inside the stream as the
876         /// specification requires. Any such mark will be considered as a part
877         /// of the document.
878         void scanToNextToken() @safe
879         {
880             // TODO(PyYAML): We need to make tab handling rules more sane. A good rule is:
881             //   Tabs cannot precede tokens
882             //   BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END,
883             //   KEY(block), VALUE(block), BLOCK-ENTRY
884             // So the checking code is
885             //   if <TAB>:
886             //       allowSimpleKey_ = false
887             // We also need to add the check for `allowSimpleKey_ == true` to
888             // `unwindIndent` before issuing BLOCK-END.
889             // Scanners for block, flow, and plain scalars need to be modified.
890 
891             for(;;)
892             {
893                 findNextNonSpace();
894 
895                 if(reader_.peekByte() == '#') { scanToNextBreak(); }
896                 if(scanLineBreak() != '\0')
897                 {
898                     if(flowLevel_ == 0) { allowSimpleKey_ = true; }
899                 }
900                 else
901                 {
902                     break;
903                 }
904             }
905         }
906 
907         /// Scan directive token.
908         Token scanDirective() @trusted
909         {
910             Mark startMark = reader_.mark;
911             // Skip the '%'.
912             reader_.forward();
913 
914             // Scan directive name
915             reader_.sliceBuilder.begin();
916             scanDirectiveNameToSlice(startMark);
917             if(error_) { return Token.init; }
918             const name = reader_.sliceBuilder.finish();
919 
920             reader_.sliceBuilder.begin();
921 
922             // Index where tag handle ends and suffix starts in a tag directive value.
923             uint tagHandleEnd = uint.max;
924             if(name == "YAML")     { scanYAMLDirectiveValueToSlice(startMark); }
925             else if(name == "TAG") { tagHandleEnd = scanTagDirectiveValueToSlice(startMark); }
926             if(error_) { return Token.init; }
927             char[] value = reader_.sliceBuilder.finish();
928 
929             Mark endMark = reader_.mark;
930 
931             DirectiveType directive;
932             if(name == "YAML")     { directive = DirectiveType.YAML; }
933             else if(name == "TAG") { directive = DirectiveType.TAG; }
934             else
935             {
936                 directive = DirectiveType.Reserved;
937                 scanToNextBreak();
938             }
939 
940             scanDirectiveIgnoredLine(startMark);
941             if(error_) { return Token.init; }
942 
943             return directiveToken(startMark, endMark, value, directive, tagHandleEnd);
944         }
945 
946         /// Scan name of a directive token.
947         ///
948         /// Assumes that the caller is building a slice in Reader, and puts the scanned
949         /// characters into that slice.
950         ///
951         /// In case of an error, error_ is set. Use throwIfError() to handle this.
952         void scanDirectiveNameToSlice(const Mark startMark) @system
953         {
954             // Scan directive name.
955             scanAlphaNumericToSlice!"a directive"(startMark);
956             if(error_) { return; }
957 
958             if(" \0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek())) { return; }
959             error("While scanning a directive", startMark,
960                   expected("alphanumeric, '-' or '_'", reader_.peek()), reader_.mark);
961         }
962 
963         /// Scan value of a YAML directive token. Returns major, minor version separated by '.'.
964         ///
965         /// Assumes that the caller is building a slice in Reader, and puts the scanned
966         /// characters into that slice.
967         ///
968         /// In case of an error, error_ is set. Use throwIfError() to handle this.
969         void scanYAMLDirectiveValueToSlice(const Mark startMark) @system
970         {
971             findNextNonSpace();
972 
973             scanYAMLDirectiveNumberToSlice(startMark);
974             if(error_) { return; }
975 
976             if(reader_.peekByte() != '.')
977             {
978                 error("While scanning a directive", startMark,
979                       expected("digit or '.'", reader_.peek()), reader_.mark);
980                 return;
981             }
982             // Skip the '.'.
983             reader_.forward();
984 
985             reader_.sliceBuilder.write('.');
986             scanYAMLDirectiveNumberToSlice(startMark);
987             if(error_) { return; }
988 
989             if(!" \0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek()))
990             {
991                 error("While scanning a directive", startMark,
992                       expected("digit or '.'", reader_.peek()), reader_.mark);
993             }
994         }
995 
996         /// Scan a number from a YAML directive.
997         ///
998         /// Assumes that the caller is building a slice in Reader, and puts the scanned
999         /// characters into that slice.
1000         ///
1001         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1002         void scanYAMLDirectiveNumberToSlice(const Mark startMark) @system
1003         {
1004             if(!isDigit(reader_.peek()))
1005             {
1006                 error("While scanning a directive", startMark,
1007                       expected("digit", reader_.peek()), reader_.mark);
1008                 return;
1009             }
1010 
1011             // Already found the first digit in the enforce(), so set length to 1.
1012             uint length = 1;
1013             while(reader_.peek(length).isDigit) { ++length; }
1014 
1015             reader_.sliceBuilder.write(reader_.get(length));
1016         }
1017 
1018         /// Scan value of a tag directive.
1019         ///
1020         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1021         /// characters into that slice.
1022         ///
1023         /// Returns: Length of tag handle (which is before tag prefix) in scanned data
1024         ///
1025         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1026         uint scanTagDirectiveValueToSlice(const Mark startMark) @system
1027         {
1028             findNextNonSpace();
1029             const startLength = reader_.sliceBuilder.length;
1030             scanTagDirectiveHandleToSlice(startMark);
1031             if(error_) { return uint.max; }
1032             const handleLength = cast(uint)(reader_.sliceBuilder.length  - startLength);
1033             findNextNonSpace();
1034             scanTagDirectivePrefixToSlice(startMark);
1035 
1036             return handleLength;
1037         }
1038 
1039         /// Scan handle of a tag directive.
1040         ///
1041         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1042         /// characters into that slice.
1043         ///
1044         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1045         void scanTagDirectiveHandleToSlice(const Mark startMark) @system
1046         {
1047             scanTagHandleToSlice!"directive"(startMark);
1048             if(error_) { return; }
1049             if(reader_.peekByte() == ' ') { return; }
1050             error("While scanning a directive handle", startMark,
1051                   expected("' '", reader_.peek()), reader_.mark);
1052         }
1053 
1054         /// Scan prefix of a tag directive.
1055         ///
1056         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1057         /// characters into that slice.
1058         ///
1059         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1060         void scanTagDirectivePrefixToSlice(const Mark startMark) @system
1061         {
1062             scanTagURIToSlice!"directive"(startMark);
1063             if(" \0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek())) { return; }
1064             error("While scanning a directive prefix", startMark,
1065                   expected("' '", reader_.peek()), reader_.mark);
1066         }
1067 
1068         /// Scan (and ignore) ignored line after a directive.
1069         ///
1070         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1071         void scanDirectiveIgnoredLine(const Mark startMark) @safe
1072         {
1073             findNextNonSpace();
1074             if(reader_.peekByte() == '#') { scanToNextBreak(); }
1075             if(searchAllBreaks.canFind(reader_.peek()))
1076             {
1077                 scanLineBreak();
1078                 return;
1079             }
1080             error("While scanning a directive", startMark,
1081                   expected("comment or a line break", reader_.peek()), reader_.mark);
1082         }
1083 
1084 
1085         /// Scan an alias or an anchor.
1086         ///
1087         /// The specification does not restrict characters for anchors and
1088         /// aliases. This may lead to problems, for instance, the document:
1089         ///   [ *alias, value ]
1090         /// can be interpteted in two ways, as
1091         ///   [ "value" ]
1092         /// and
1093         ///   [ *alias , "value" ]
1094         /// Therefore we restrict aliases to ASCII alphanumeric characters.
1095         ///
1096         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1097         Token scanAnchor(const TokenID id) @trusted
1098         {
1099             const startMark = reader_.mark;
1100             const dchar i = reader_.get();
1101 
1102             reader_.sliceBuilder.begin();
1103             if(i == '*') { scanAlphaNumericToSlice!"an alias"(startMark); }
1104             else         { scanAlphaNumericToSlice!"an anchor"(startMark); }
1105             // On error, value is discarded as we return immediately
1106             char[] value = reader_.sliceBuilder.finish();
1107             if(error_)   { return Token.init; }
1108 
1109             if(!searchAllWhitespace.canFind(reader_.peek()) &&
1110                !"?:,]}%@"d.canFind(reader_.peekByte()))
1111             {
1112                 enum anchorCtx = "While scanning an anchor";
1113                 enum aliasCtx  = "While scanning an alias";
1114                 error(i == '*' ? aliasCtx : anchorCtx, startMark,
1115                       expected("alphanumeric, '-' or '_'", reader_.peek()), reader_.mark);
1116                 return Token.init;
1117             }
1118 
1119             if(id == TokenID.Alias)
1120             {
1121                 return aliasToken(startMark, reader_.mark, value);
1122             }
1123             if(id == TokenID.Anchor)
1124             {
1125                 return anchorToken(startMark, reader_.mark, value);
1126             }
1127             assert(false, "This code should never be reached");
1128         }
1129 
1130         /// Scan a tag token.
1131         ///
1132         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1133         Token scanTag() @trusted
1134         {
1135             const startMark = reader_.mark;
1136             dchar c = reader_.peek(1);
1137 
1138             reader_.sliceBuilder.begin();
1139             scope(failure) { reader_.sliceBuilder.finish(); }
1140             // Index where tag handle ends and tag suffix starts in the tag value
1141             // (slice) we will produce.
1142             uint handleEnd;
1143 
1144             mixin FastCharSearch!" \0\n\r\u0085\u2028\u2029"d search;
1145             if(c == '<')
1146             {
1147                 reader_.forward(2);
1148 
1149                 handleEnd = 0;
1150                 scanTagURIToSlice!"tag"(startMark);
1151                 if(error_) { return Token.init; }
1152                 if(reader_.peekByte() != '>')
1153                 {
1154                     error("While scanning a tag", startMark,
1155                           expected("'>'", reader_.peek()), reader_.mark);
1156                     return Token.init;
1157                 }
1158                 reader_.forward();
1159             }
1160             else if(searchAllWhitespace.canFind(c))
1161             {
1162                 reader_.forward();
1163                 handleEnd = 0;
1164                 reader_.sliceBuilder.write('!');
1165             }
1166             else
1167             {
1168                 uint length = 1;
1169                 bool useHandle = false;
1170 
1171                 while(!search.canFind(c))
1172                 {
1173                     if(c == '!')
1174                     {
1175                         useHandle = true;
1176                         break;
1177                     }
1178                     ++length;
1179                     c = reader_.peek(length);
1180                 }
1181 
1182                 if(useHandle)
1183                 {
1184                     scanTagHandleToSlice!"tag"(startMark);
1185                     handleEnd = cast(uint)reader_.sliceBuilder.length;
1186                     if(error_) { return Token.init; }
1187                 }
1188                 else
1189                 {
1190                     reader_.forward();
1191                     reader_.sliceBuilder.write('!');
1192                     handleEnd = cast(uint)reader_.sliceBuilder.length;
1193                 }
1194 
1195                 scanTagURIToSlice!"tag"(startMark);
1196                 if(error_) { return Token.init; }
1197             }
1198 
1199             if(search.canFind(reader_.peek()))
1200             {
1201                 char[] slice = reader_.sliceBuilder.finish();
1202                 return tagToken(startMark, reader_.mark, slice, handleEnd);
1203             }
1204 
1205             error("While scanning a tag", startMark, expected("' '", reader_.peek()),
1206                   reader_.mark);
1207             return Token.init;
1208         }
1209 
1210         /// Scan a block scalar token with specified style.
1211         ///
1212         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1213         Token scanBlockScalar(const ScalarStyle style) @trusted
1214         {
1215             const startMark = reader_.mark;
1216 
1217             // Scan the header.
1218             reader_.forward();
1219 
1220             const indicators = scanBlockScalarIndicators(startMark);
1221             if(error_) { return Token.init; }
1222 
1223             const chomping   = indicators[0];
1224             const increment  = indicators[1];
1225             scanBlockScalarIgnoredLine(startMark);
1226             if(error_) { return Token.init; }
1227 
1228             // Determine the indentation level and go to the first non-empty line.
1229             Mark endMark;
1230             uint indent = max(1, indent_ + 1);
1231 
1232             reader_.sliceBuilder.begin();
1233             alias Transaction = SliceBuilder.Transaction;
1234             // Used to strip the last line breaks written to the slice at the end of the
1235             // scalar, which may be needed based on chomping.
1236             Transaction breaksTransaction = Transaction(reader_.sliceBuilder);
1237             // Read the first indentation/line breaks before the scalar.
1238             size_t startLen = reader_.sliceBuilder.length;
1239             if(increment == int.min)
1240             {
1241                 auto indentation = scanBlockScalarIndentationToSlice();
1242                 endMark = indentation[1];
1243                 indent  = max(indent, indentation[0]);
1244             }
1245             else
1246             {
1247                 indent += increment - 1;
1248                 endMark = scanBlockScalarBreaksToSlice(indent);
1249             }
1250 
1251             // int.max means there's no line break (int.max is outside UTF-32).
1252             dchar lineBreak = cast(dchar)int.max;
1253 
1254             // Scan the inner part of the block scalar.
1255             while(reader_.column == indent && reader_.peekByte() != '\0')
1256             {
1257                 breaksTransaction.commit();
1258                 const bool leadingNonSpace = !" \t"d.canFind(reader_.peekByte());
1259                 // This is where the 'interesting' non-whitespace data gets read.
1260                 scanToNextBreakToSlice();
1261                 lineBreak = scanLineBreak();
1262 
1263 
1264                 // This transaction serves to rollback data read in the
1265                 // scanBlockScalarBreaksToSlice() call.
1266                 breaksTransaction = Transaction(reader_.sliceBuilder);
1267                 startLen = reader_.sliceBuilder.length;
1268                 // The line breaks should actually be written _after_ the if() block
1269                 // below. We work around that by inserting
1270                 endMark = scanBlockScalarBreaksToSlice(indent);
1271 
1272                 // This will not run during the last iteration (see the if() vs the
1273                 // while()), hence breaksTransaction rollback (which happens after this
1274                 // loop) will never roll back data written in this if() block.
1275                 if(reader_.column == indent && reader_.peekByte() != '\0')
1276                 {
1277                     // Unfortunately, folding rules are ambiguous.
1278 
1279                     // This is the folding according to the specification:
1280                     if(style == ScalarStyle.Folded && lineBreak == '\n' &&
1281                        leadingNonSpace && !" \t"d.canFind(reader_.peekByte()))
1282                     {
1283                         // No breaks were scanned; no need to insert the space in the
1284                         // middle of slice.
1285                         if(startLen == reader_.sliceBuilder.length)
1286                         {
1287                             reader_.sliceBuilder.write(' ');
1288                         }
1289                     }
1290                     else
1291                     {
1292                         // We need to insert in the middle of the slice in case any line
1293                         // breaks were scanned.
1294                         reader_.sliceBuilder.insert(lineBreak, startLen);
1295                     }
1296 
1297                     ////this is Clark Evans's interpretation (also in the spec
1298                     ////examples):
1299                     //
1300                     //if(style == ScalarStyle.Folded && lineBreak == '\n')
1301                     //{
1302                     //    if(startLen == endLen)
1303                     //    {
1304                     //        if(!" \t"d.canFind(reader_.peekByte()))
1305                     //        {
1306                     //            reader_.sliceBuilder.write(' ');
1307                     //        }
1308                     //        else
1309                     //        {
1310                     //            chunks ~= lineBreak;
1311                     //        }
1312                     //    }
1313                     //}
1314                     //else
1315                     //{
1316                     //    reader_.sliceBuilder.insertBack(lineBreak, endLen - startLen);
1317                     //}
1318                 }
1319                 else
1320                 {
1321                     break;
1322                 }
1323             }
1324 
1325             // If chompint is Keep, we keep (commit) the last scanned line breaks
1326             // (which are at the end of the scalar). Otherwise re remove them (end the
1327             // transaction).
1328             if(chomping == Chomping.Keep)  { breaksTransaction.commit(); }
1329             else                           { breaksTransaction.__dtor(); }
1330             if(chomping != Chomping.Strip && lineBreak != int.max)
1331             {
1332                 // If chomping is Keep, we keep the line break but the first line break
1333                 // that isn't stripped (since chomping isn't Strip in this branch) must
1334                 // be inserted _before_ the other line breaks.
1335                 if(chomping == Chomping.Keep)
1336                 {
1337                     reader_.sliceBuilder.insert(lineBreak, startLen);
1338                 }
1339                 // If chomping is not Keep, breaksTransaction was cancelled so we can
1340                 // directly write the first line break (as it isn't stripped - chomping
1341                 // is not Strip)
1342                 else
1343                 {
1344                     reader_.sliceBuilder.write(lineBreak);
1345                 }
1346             }
1347 
1348             char[] slice = reader_.sliceBuilder.finish();
1349             return scalarToken(startMark, endMark, slice, style);
1350         }
1351 
1352         /// Scan chomping and indentation indicators of a scalar token.
1353         ///
1354         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1355         Tuple!(Chomping, int) scanBlockScalarIndicators(const Mark startMark) @safe
1356         {
1357             auto chomping = Chomping.Clip;
1358             int increment = int.min;
1359             dchar c       = reader_.peek();
1360 
1361             /// Indicators can be in any order.
1362             if(getChomping(c, chomping))
1363             {
1364                 getIncrement(c, increment, startMark);
1365                 if(error_) { return tuple(Chomping.init, int.max); }
1366             }
1367             else
1368             {
1369                 const gotIncrement = getIncrement(c, increment, startMark);
1370                 if(error_)       { return tuple(Chomping.init, int.max); }
1371                 if(gotIncrement) { getChomping(c, chomping); }
1372             }
1373 
1374             if(" \0\n\r\u0085\u2028\u2029"d.canFind(c))
1375             {
1376                 return tuple(chomping, increment);
1377             }
1378             error("While scanning a block scalar", startMark,
1379                   expected("chomping or indentation indicator", c), reader_.mark);
1380             return tuple(Chomping.init, int.max);
1381         }
1382 
1383         /// Get chomping indicator, if detected. Return false otherwise.
1384         ///
1385         /// Used in scanBlockScalarIndicators.
1386         ///
1387         /// Params:
1388         ///
1389         /// c        = The character that may be a chomping indicator.
1390         /// chomping = Write the chomping value here, if detected.
1391         bool getChomping(ref dchar c, ref Chomping chomping) @safe
1392         {
1393             if(!"+-"d.canFind(c)) { return false; }
1394             chomping = c == '+' ? Chomping.Keep : Chomping.Strip;
1395             reader_.forward();
1396             c = reader_.peek();
1397             return true;
1398         }
1399 
1400         /// Get increment indicator, if detected. Return false otherwise.
1401         ///
1402         /// Used in scanBlockScalarIndicators.
1403         ///
1404         /// Params:
1405         ///
1406         /// c         = The character that may be an increment indicator.
1407         ///             If an increment indicator is detected, this will be updated to
1408         ///             the next character in the Reader.
1409         /// increment = Write the increment value here, if detected.
1410         /// startMark = Mark for error messages.
1411         ///
1412         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1413         bool getIncrement(ref dchar c, ref int increment, const Mark startMark) @safe
1414         {
1415             if(!c.isDigit) { return false; }
1416             // Convert a digit to integer.
1417             increment = c - '0';
1418             assert(increment < 10 && increment >= 0, "Digit has invalid value");
1419             if(increment > 0)
1420             {
1421                 reader_.forward();
1422                 c = reader_.peek();
1423                 return true;
1424             }
1425             error("While scanning a block scalar", startMark,
1426                   expected("indentation indicator in range 1-9", "0"), reader_.mark);
1427             return false;
1428         }
1429 
1430         /// Scan (and ignore) ignored line in a block scalar.
1431         ///
1432         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1433         void scanBlockScalarIgnoredLine(const Mark startMark) @safe
1434         {
1435             findNextNonSpace();
1436             if(reader_.peekByte()== '#') { scanToNextBreak(); }
1437 
1438             if(searchAllBreaks.canFind(reader_.peek()))
1439             {
1440                 scanLineBreak();
1441                 return;
1442             }
1443             error("While scanning a block scalar", startMark,
1444                   expected("comment or line break", reader_.peek()), reader_.mark);
1445         }
1446 
1447         /// Scan indentation in a block scalar, returning line breaks, max indent and end mark.
1448         ///
1449         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1450         /// characters into that slice.
1451         Tuple!(uint, Mark) scanBlockScalarIndentationToSlice() @system
1452         {
1453             uint maxIndent;
1454             Mark endMark = reader_.mark;
1455 
1456             while(" \n\r\u0085\u2028\u2029"d.canFind(reader_.peek()))
1457             {
1458                 if(reader_.peekByte() != ' ')
1459                 {
1460                     reader_.sliceBuilder.write(scanLineBreak());
1461                     endMark = reader_.mark;
1462                     continue;
1463                 }
1464                 reader_.forward();
1465                 maxIndent = max(reader_.column, maxIndent);
1466             }
1467 
1468             return tuple(maxIndent, endMark);
1469         }
1470 
1471         /// Scan line breaks at lower or specified indentation in a block scalar.
1472         ///
1473         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1474         /// characters into that slice.
1475         Mark scanBlockScalarBreaksToSlice(const uint indent) @trusted
1476         {
1477             Mark endMark = reader_.mark;
1478 
1479             for(;;)
1480             {
1481                 while(reader_.column < indent && reader_.peekByte() == ' ') { reader_.forward(); }
1482                 if(!"\n\r\u0085\u2028\u2029"d.canFind(reader_.peek()))  { break; }
1483                 reader_.sliceBuilder.write(scanLineBreak());
1484                 endMark = reader_.mark;
1485             }
1486 
1487             return endMark;
1488         }
1489 
1490         /// Scan a qouted flow scalar token with specified quotes.
1491         ///
1492         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1493         Token scanFlowScalar(const ScalarStyle quotes) @trusted
1494         {
1495             const startMark = reader_.mark;
1496             const quote     = reader_.get();
1497 
1498             reader_.sliceBuilder.begin();
1499             scope(exit) if(error_) { reader_.sliceBuilder.finish(); }
1500 
1501             scanFlowScalarNonSpacesToSlice(quotes, startMark);
1502             if(error_) { return Token.init; }
1503 
1504             while(reader_.peek() != quote)
1505             {
1506                 scanFlowScalarSpacesToSlice(startMark);
1507                 if(error_) { return Token.init; }
1508                 scanFlowScalarNonSpacesToSlice(quotes, startMark);
1509                 if(error_) { return Token.init; }
1510             }
1511             reader_.forward();
1512 
1513             auto slice = reader_.sliceBuilder.finish();
1514             return scalarToken(startMark, reader_.mark, slice, quotes);
1515         }
1516 
1517         /// Scan nonspace characters in a flow scalar.
1518         ///
1519         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1520         /// characters into that slice.
1521         ///
1522         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1523         void scanFlowScalarNonSpacesToSlice(const ScalarStyle quotes, const Mark startMark)
1524             @system
1525         {
1526             for(;;) with(ScalarStyle)
1527             {
1528                 dchar c = reader_.peek();
1529 
1530                 mixin FastCharSearch!" \t\0\n\r\u0085\u2028\u2029\'\"\\"d search;
1531 
1532                 size_t numCodePoints = 0;
1533                 // This is an optimized way of writing:
1534                 // while(!search.canFind(reader_.peek(numCodePoints))) { ++numCodePoints; }
1535                 outer: for(size_t oldSliceLength;;)
1536                 {
1537                     // This will not necessarily make slice 32 chars longer, as not all
1538                     // code points are 1 char.
1539                     const char[] slice = reader_.slice(numCodePoints + 32);
1540                     if(slice.length == oldSliceLength)
1541                     {
1542                         error("While reading a flow scalar", startMark,
1543                               "reached end of file", reader_.mark);
1544                         return;
1545                     }
1546                     for(size_t i = oldSliceLength; i < slice.length;)
1547                     {
1548                         // slice is UTF-8 - need to decode
1549                         const ch = slice[i] < 0x80 ? slice[i++] : decodeValidUTF8NoGC(slice, i);
1550                         if(search.canFind(ch)) { break outer; }
1551                         ++numCodePoints;
1552                     }
1553                     oldSliceLength = slice.length;
1554                 }
1555 
1556                 reader_.sliceBuilder.write(reader_.get(numCodePoints));
1557 
1558                 c = reader_.peek();
1559                 if(quotes == SingleQuoted && c == '\'' && reader_.peek(1) == '\'')
1560                 {
1561                     reader_.forward(2);
1562                     reader_.sliceBuilder.write('\'');
1563                 }
1564                 else if((quotes == DoubleQuoted && c == '\'') ||
1565                         (quotes == SingleQuoted && "\"\\"d.canFind(c)))
1566                 {
1567                     reader_.forward();
1568                     reader_.sliceBuilder.write(c);
1569                 }
1570                 else if(quotes == DoubleQuoted && c == '\\')
1571                 {
1572                     reader_.forward();
1573                     c = reader_.peek();
1574                     if(dyaml.escapes.escapes.canFind(c))
1575                     {
1576                         reader_.forward();
1577                         // Escaping has been moved to Parser as it can't be done in
1578                         // place (in a slice) in case of '\P' and '\L' (very uncommon,
1579                         // but we don't want to break the spec)
1580                         char[2] escapeSequence = ['\\', cast(char)c];
1581                         reader_.sliceBuilder.write(escapeSequence);
1582                     }
1583                     else if(dyaml.escapes.escapeHexCodeList.canFind(c))
1584                     {
1585                         const hexLength = dyaml.escapes.escapeHexLength(c);
1586                         reader_.forward();
1587 
1588                         foreach(i; 0 .. hexLength) if(!reader_.peek(i).isHexDigit)
1589                         {
1590                             error("While scanning a double quoted scalar", startMark,
1591                                   expected("escape sequence of hexadecimal numbers",
1592                                            reader_.peek(i)), reader_.mark);
1593                             return;
1594                         }
1595                         char[] hex = reader_.get(hexLength);
1596                         char[2] escapeStart = ['\\', cast(char) c];
1597                         reader_.sliceBuilder.write(escapeStart);
1598                         reader_.sliceBuilder.write(hex);
1599                         bool overflow;
1600                         // Note: This is just error checking; Parser does the actual
1601                         //       escaping (otherwise we could accidentally create an
1602                         //       escape sequence here that wasn't in input, breaking the
1603                         //       escaping code in parser, which is in parser because it
1604                         //       can't always be done in place)
1605                         parseNoGC!int(hex, 16u, overflow);
1606                         if(overflow)
1607                         {
1608                             error("While scanning a double quoted scalar", startMark,
1609                                   "overflow when parsing an escape sequence of "
1610                                   "hexadecimal numbers.", reader_.mark);
1611                             return;
1612                         }
1613                     }
1614                     else if("\n\r\u0085\u2028\u2029"d.canFind(c))
1615                     {
1616                         scanLineBreak();
1617                         scanFlowScalarBreaksToSlice(startMark);
1618                         if(error_) { return; }
1619                     }
1620                     else
1621                     {
1622                         error("While scanning a double quoted scalar", startMark,
1623                               buildMsg("found unsupported escape character", c),
1624                               reader_.mark);
1625                         return;
1626                     }
1627                 }
1628                 else { return; }
1629             }
1630         }
1631 
1632         /// Scan space characters in a flow scalar.
1633         ///
1634         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1635         /// spaces into that slice.
1636         ///
1637         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1638         void scanFlowScalarSpacesToSlice(const Mark startMark) @system
1639         {
1640             // Increase length as long as we see whitespace.
1641             size_t length = 0;
1642             while(" \t"d.canFind(reader_.peekByte(length))) { ++length; }
1643             auto whitespaces = reader_.prefixBytes(length);
1644 
1645             // Can check the last byte without striding because '\0' is ASCII
1646             const c = reader_.peek(length);
1647             if(c == '\0')
1648             {
1649                 error("While scanning a quoted scalar", startMark,
1650                       "found unexpected end of buffer", reader_.mark);
1651                 return;
1652             }
1653 
1654             // Spaces not followed by a line break.
1655             if(!"\n\r\u0085\u2028\u2029"d.canFind(c))
1656             {
1657                 reader_.forward(length);
1658                 reader_.sliceBuilder.write(whitespaces);
1659                 return;
1660             }
1661 
1662             // There's a line break after the spaces.
1663             reader_.forward(length);
1664             const lineBreak = scanLineBreak();
1665 
1666             if(lineBreak != '\n') { reader_.sliceBuilder.write(lineBreak); }
1667 
1668             // If we have extra line breaks after the first, scan them into the
1669             // slice.
1670             const bool extraBreaks = scanFlowScalarBreaksToSlice(startMark);
1671             if(error_) { return; }
1672 
1673             // No extra breaks, one normal line break. Replace it with a space.
1674             if(lineBreak == '\n' && !extraBreaks) { reader_.sliceBuilder.write(' '); }
1675         }
1676 
1677         /// Scan line breaks in a flow scalar.
1678         ///
1679         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1680         /// line breaks into that slice.
1681         ///
1682         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1683         bool scanFlowScalarBreaksToSlice(const Mark startMark) @system
1684         {
1685             // True if at least one line break was found.
1686             bool anyBreaks;
1687             for(;;)
1688             {
1689                 // Instead of checking indentation, we check for document separators.
1690                 const prefix = reader_.prefix(3);
1691                 if((prefix == "---" || prefix == "...") &&
1692                    searchAllWhitespace.canFind(reader_.peek(3)))
1693                 {
1694                     error("While scanning a quoted scalar", startMark,
1695                           "found unexpected document separator", reader_.mark);
1696                     return false;
1697                 }
1698 
1699                 // Skip any whitespaces.
1700                 while(" \t"d.canFind(reader_.peekByte())) { reader_.forward(); }
1701 
1702                 // Encountered a non-whitespace non-linebreak character, so we're done.
1703                 if(!"\n\r\u0085\u2028\u2029"d.canFind(reader_.peek())) { break; }
1704 
1705                 const lineBreak = scanLineBreak();
1706                 anyBreaks = true;
1707                 reader_.sliceBuilder.write(lineBreak);
1708             }
1709             return anyBreaks;
1710         }
1711 
1712         /// Scan plain scalar token (no block, no quotes).
1713         ///
1714         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1715         Token scanPlain() @trusted
1716         {
1717             // We keep track of the allowSimpleKey_ flag here.
1718             // Indentation rules are loosed for the flow context
1719             const startMark = reader_.mark;
1720             Mark endMark = startMark;
1721             const indent = indent_ + 1;
1722 
1723             // We allow zero indentation for scalars, but then we need to check for
1724             // document separators at the beginning of the line.
1725             // if(indent == 0) { indent = 1; }
1726 
1727             reader_.sliceBuilder.begin();
1728 
1729             alias Transaction = SliceBuilder.Transaction;
1730             Transaction spacesTransaction;
1731             // Stop at a comment.
1732             while(reader_.peekByte() != '#')
1733             {
1734                 // Scan the entire plain scalar.
1735                 size_t length = 0;
1736                 dchar c = void;
1737                 // Moved the if() out of the loop for optimization.
1738                 if(flowLevel_ == 0)
1739                 {
1740                     c = reader_.peek(length);
1741                     for(;;)
1742                     {
1743                         const cNext = reader_.peek(length + 1);
1744                         if(searchAllWhitespace.canFind(c) ||
1745                            (c == ':' && searchAllWhitespace.canFind(cNext)))
1746                         {
1747                             break;
1748                         }
1749                         ++length;
1750                         c = cNext;
1751                     }
1752                 }
1753                 else
1754                 {
1755                     for(;;)
1756                     {
1757                         c = reader_.peek(length);
1758                         if(searchAllWhitespace.canFind(c) || ",:?[]{}"d.canFind(c))
1759                         {
1760                             break;
1761                         }
1762                         ++length;
1763                     }
1764                 }
1765 
1766                 // It's not clear what we should do with ':' in the flow context.
1767                 if(flowLevel_ > 0 && c == ':' &&
1768                    !searchAllWhitespace.canFind(reader_.peek(length + 1)) &&
1769                    !",[]{}"d.canFind(reader_.peek(length + 1)))
1770                 {
1771                     // This is an error; throw the slice away.
1772                     spacesTransaction.commit();
1773                     reader_.sliceBuilder.finish();
1774                     reader_.forward(length);
1775                     error("While scanning a plain scalar", startMark,
1776                           "found unexpected ':' . Please check "
1777                           "http://pyyaml.org/wiki/YAMLColonInFlowContext for details.",
1778                           reader_.mark);
1779                     return Token.init;
1780                 }
1781 
1782                 if(length == 0) { break; }
1783 
1784                 allowSimpleKey_ = false;
1785 
1786                 reader_.sliceBuilder.write(reader_.get(length));
1787 
1788                 endMark = reader_.mark;
1789 
1790                 spacesTransaction.commit();
1791                 spacesTransaction = Transaction(reader_.sliceBuilder);
1792 
1793                 const startLength = reader_.sliceBuilder.length;
1794                 scanPlainSpacesToSlice(startMark);
1795                 if(startLength == reader_.sliceBuilder.length ||
1796                    (flowLevel_ == 0 && reader_.column < indent))
1797                 {
1798                     break;
1799                 }
1800             }
1801 
1802             spacesTransaction.__dtor();
1803             char[] slice = reader_.sliceBuilder.finish();
1804 
1805             return scalarToken(startMark, endMark, slice, ScalarStyle.Plain);
1806         }
1807 
1808         /// Scan spaces in a plain scalar.
1809         ///
1810         /// Assumes that the caller is building a slice in Reader, and puts the spaces
1811         /// into that slice.
1812         void scanPlainSpacesToSlice(const Mark startMark) @system
1813         {
1814             // The specification is really confusing about tabs in plain scalars.
1815             // We just forbid them completely. Do not use tabs in YAML!
1816 
1817             // Get as many plain spaces as there are.
1818             size_t length = 0;
1819             while(reader_.peekByte(length) == ' ') { ++length; }
1820             char[] whitespaces = reader_.prefixBytes(length);
1821             reader_.forward(length);
1822 
1823             dchar c = reader_.peek();
1824             mixin FastCharSearch!" \n\r\u0085\u2028\u2029"d search;
1825             // No newline after the spaces (if any)
1826             // (Excluding ' ' so we can use the same FastCharSearch as below)
1827             if(!search.canFind(c) && c != ' ')
1828             {
1829                 // We have spaces, but no newline.
1830                 if(whitespaces.length > 0) { reader_.sliceBuilder.write(whitespaces); }
1831                 return;
1832             }
1833 
1834             // Newline after the spaces (if any)
1835             const lineBreak = scanLineBreak();
1836             allowSimpleKey_ = true;
1837 
1838             static bool end(Reader reader_) @safe pure nothrow @nogc
1839             {
1840                 const prefix = reader_.prefix(3);
1841                 return ("---" == prefix || "..." == prefix)
1842                         && " \t\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek(3));
1843             }
1844 
1845             if(end(reader_)) { return; }
1846 
1847             bool extraBreaks = false;
1848 
1849             alias Transaction = SliceBuilder.Transaction;
1850             auto transaction = Transaction(reader_.sliceBuilder);
1851             if(lineBreak != '\n') { reader_.sliceBuilder.write(lineBreak); }
1852             while(search.canFind(reader_.peek()))
1853             {
1854                 if(reader_.peekByte() == ' ') { reader_.forward(); }
1855                 else
1856                 {
1857                     const lBreak = scanLineBreak();
1858                     extraBreaks  = true;
1859                     reader_.sliceBuilder.write(lBreak);
1860 
1861                     if(end(reader_)) { return; }
1862                 }
1863             }
1864             transaction.commit();
1865 
1866             // No line breaks, only a space.
1867             if(lineBreak == '\n' && !extraBreaks) { reader_.sliceBuilder.write(' '); }
1868         }
1869 
1870         /// Scan handle of a tag token.
1871         ///
1872         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1873         /// characters into that slice.
1874         ///
1875         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1876         void scanTagHandleToSlice(string name)(const Mark startMark) @system
1877         {
1878             dchar c = reader_.peek();
1879             enum contextMsg = "While scanning a " ~ name;
1880             if(c != '!')
1881             {
1882                 error(contextMsg, startMark, expected("'!'", c), reader_.mark);
1883                 return;
1884             }
1885 
1886             uint length = 1;
1887             c = reader_.peek(length);
1888             if(c != ' ')
1889             {
1890                 while(c.isAlphaNum || "-_"d.canFind(c))
1891                 {
1892                     ++length;
1893                     c = reader_.peek(length);
1894                 }
1895                 if(c != '!')
1896                 {
1897                     reader_.forward(length);
1898                     error(contextMsg, startMark, expected("'!'", c), reader_.mark);
1899                     return;
1900                 }
1901                 ++length;
1902             }
1903 
1904             reader_.sliceBuilder.write(reader_.get(length));
1905         }
1906 
1907         /// Scan URI in a tag token.
1908         ///
1909         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1910         /// characters into that slice.
1911         ///
1912         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1913         void scanTagURIToSlice(string name)(const Mark startMark) @trusted
1914         {
1915             // Note: we do not check if URI is well-formed.
1916             dchar c = reader_.peek();
1917             const startLen = reader_.sliceBuilder.length;
1918             {
1919                 uint length = 0;
1920                 mixin FastCharSearch!"-;/?:@&=+$,_.!~*\'()[]%"d search;
1921                 while(c.isAlphaNum || search.canFind(c))
1922                 {
1923                     if(c == '%')
1924                     {
1925                         auto chars = reader_.get(length);
1926                         reader_.sliceBuilder.write(chars);
1927                         length = 0;
1928                         scanURIEscapesToSlice!name(startMark);
1929                         if(error_) { return; }
1930                     }
1931                     else { ++length; }
1932                     c = reader_.peek(length);
1933                 }
1934                 if(length > 0)
1935                 {
1936                     auto chars = reader_.get(length);
1937                     reader_.sliceBuilder.write(chars);
1938                     length = 0;
1939                 }
1940             }
1941             // OK if we scanned something, error otherwise.
1942             if(reader_.sliceBuilder.length > startLen) { return; }
1943 
1944             enum contextMsg = "While parsing a " ~ name;
1945             error(contextMsg, startMark, expected("URI", c), reader_.mark);
1946         }
1947 
1948         // Not @nogc yet because std.utf.decode is not @nogc
1949         /// Scan URI escape sequences.
1950         ///
1951         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1952         /// characters into that slice.
1953         ///
1954         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1955         void scanURIEscapesToSlice(string name)(const Mark startMark) @system
1956         {
1957             // URI escapes encode a UTF-8 string. We store UTF-8 code units here for
1958             // decoding into UTF-32.
1959             char[4] bytes;
1960             size_t bytesUsed;
1961             Mark mark = reader_.mark;
1962 
1963             // Get one dchar by decoding data from bytes.
1964             //
1965             // This is probably slow, but simple and URI escapes are extremely uncommon
1966             // in YAML.
1967             //
1968             // Returns the number of bytes used by the dchar in bytes on success,
1969             // size_t.max on failure.
1970             static size_t getDchar(char[] bytes, Reader reader_)
1971             {
1972                 size_t nextChar;
1973                 dchar c;
1974                 if(bytes[0] < 0x80)
1975                 {
1976                     c = bytes[0];
1977                     ++nextChar;
1978                 }
1979                 else
1980                 {
1981                     const decoded = decodeUTF8NoGC!(No.validated)(bytes[], nextChar);
1982                     if(decoded.errorMessage !is null) { return size_t.max; }
1983                     c = decoded.decoded;
1984                 }
1985                 reader_.sliceBuilder.write(c);
1986                 if(bytes.length - nextChar > 0)
1987                 {
1988                     core.stdc..string.memmove(bytes.ptr, bytes.ptr + nextChar,
1989                                              bytes.length - nextChar);
1990                 }
1991                 return bytes.length - nextChar;
1992             }
1993 
1994             enum contextMsg = "While scanning a " ~ name;
1995             while(reader_.peekByte() == '%')
1996             {
1997                 reader_.forward();
1998                 if(bytesUsed == bytes.length)
1999                 {
2000                     bytesUsed = getDchar(bytes[], reader_);
2001                     if(bytesUsed == size_t.max)
2002                     {
2003                         error(contextMsg, startMark,
2004                                 "Invalid UTF-8 data encoded in URI escape sequence",
2005                                 reader_.mark);
2006                         return;
2007                     }
2008                 }
2009 
2010                 char b = 0;
2011                 uint mult = 16;
2012                 // Converting 2 hexadecimal digits to a byte.
2013                 foreach(k; 0 .. 2)
2014                 {
2015                     const dchar c = reader_.peek(k);
2016                     if(!c.isHexDigit)
2017                     {
2018                         auto msg = expected("URI escape sequence of 2 hexadecimal "
2019                                             "numbers", c);
2020                         error(contextMsg, startMark, msg, reader_.mark);
2021                         return;
2022                     }
2023 
2024                     uint digit;
2025                     if(c - '0' < 10)     { digit = c - '0'; }
2026                     else if(c - 'A' < 6) { digit = c - 'A'; }
2027                     else if(c - 'a' < 6) { digit = c - 'a'; }
2028                     else                 { assert(false); }
2029                     b += mult * digit;
2030                     mult /= 16;
2031                 }
2032                 bytes[bytesUsed++] = b;
2033 
2034                 reader_.forward(2);
2035             }
2036 
2037             bytesUsed = getDchar(bytes[0 .. bytesUsed], reader_);
2038         }
2039 
2040 
2041         /// Scan a line break, if any.
2042         ///
2043         /// Transforms:
2044         ///   '\r\n'      :   '\n'
2045         ///   '\r'        :   '\n'
2046         ///   '\n'        :   '\n'
2047         ///   '\u0085'    :   '\n'
2048         ///   '\u2028'    :   '\u2028'
2049         ///   '\u2029     :   '\u2029'
2050         ///   no break    :   '\0'
2051         dchar scanLineBreak() @safe
2052         {
2053             // Fast path for ASCII line breaks.
2054             const b = reader_.peekByte();
2055             if(b < 0x80)
2056             {
2057                 if(b == '\n' || b == '\r')
2058                 {
2059                     if(reader_.prefix(2) == "\r\n") { reader_.forward(2); }
2060                     else { reader_.forward(); }
2061                     return '\n';
2062                 }
2063                 return '\0';
2064             }
2065 
2066             const c = reader_.peek();
2067             if(c == '\x85')
2068             {
2069                 reader_.forward();
2070                 return '\n';
2071             }
2072             if(c == '\u2028' || c == '\u2029')
2073             {
2074                 reader_.forward();
2075                 return c;
2076             }
2077             return '\0';
2078         }
2079 }
2080 
2081 private:
2082 
2083 /// A nothrow function that converts a dchar[] to a string.
2084 string utf32To8(C)(C[] str) @safe pure nothrow
2085     if(is(Unqual!C == dchar))
2086 {
2087     try                    { return str.to!string; }
2088     catch(ConvException e) { assert(false, "Unexpected invalid UTF-32 string"); }
2089     catch(Exception e)     { assert(false, "Unexpected exception during UTF-8 encoding"); }
2090 }