1 
2 //          Copyright Ferdinand Majerech 2011-2014.
3 // Distributed under the Boost Software License, Version 1.0.
4 //    (See accompanying file LICENSE_1_0.txt or copy at
5 //          http://www.boost.org/LICENSE_1_0.txt)
6 
7 /// YAML scanner.
8 /// Code based on PyYAML: http://www.pyyaml.org
9 module dyaml.scanner;
10 
11 
12 import core.stdc..string;
13 
14 import std.algorithm;
15 import std.array;
16 import std.conv;
17 import std.ascii : isAlphaNum, isDigit, isHexDigit;
18 import std.exception;
19 import std..string;
20 import std.typecons;
21 import std.traits : Unqual;
22 import std.utf;
23 
24 import dyaml.escapes;
25 import dyaml.exception;
26 import dyaml.queue;
27 import dyaml.reader;
28 import dyaml.style;
29 import dyaml.token;
30 
31 package:
32 /// Scanner produces tokens of the following types:
33 /// STREAM-START
34 /// STREAM-END
35 /// DIRECTIVE(name, value)
36 /// DOCUMENT-START
37 /// DOCUMENT-END
38 /// BLOCK-SEQUENCE-START
39 /// BLOCK-MAPPING-START
40 /// BLOCK-END
41 /// FLOW-SEQUENCE-START
42 /// FLOW-MAPPING-START
43 /// FLOW-SEQUENCE-END
44 /// FLOW-MAPPING-END
45 /// BLOCK-ENTRY
46 /// FLOW-ENTRY
47 /// KEY
48 /// VALUE
49 /// ALIAS(value)
50 /// ANCHOR(value)
51 /// TAG(value)
52 /// SCALAR(value, plain, style)
53 
54 alias isBreak = among!('\0', '\n', '\r', '\u0085', '\u2028', '\u2029');
55 
56 alias isBreakOrSpace = among!(' ', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029');
57 
58 alias isWhiteSpace = among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029');
59 
60 alias isNonScalarStartCharacter = among!('-', '?', ':', ',', '[', ']', '{', '}',
61     '#', '&', '*', '!', '|', '>', '\'', '"', '%', '@', '`', ' ', '\t', '\0', '\n',
62     '\r', '\u0085', '\u2028', '\u2029');
63 
64 alias isURIChar = among!('-', ';', '/', '?', ':', '@', '&', '=', '+', '$', ',',
65     '_', '.', '!', '~', '*', '\'', '(', ')', '[', ']', '%');
66 
67 alias isNSChar = among!(' ', '\n', '\r', '\u0085', '\u2028', '\u2029');
68 
69 alias isBChar = among!('\n', '\r', '\u0085', '\u2028', '\u2029');
70 
71 alias isFlowScalarBreakSpace = among!(' ', '\t', '\0', '\n', '\r', '\u0085', '\u2028', '\u2029', '\'', '"', '\\');
72 
73 /// Marked exception thrown at scanner errors.
74 ///
75 /// See_Also: MarkedYAMLException
76 class ScannerException : MarkedYAMLException
77 {
78     mixin MarkedExceptionCtors;
79 }
80 
81 /// Generates tokens from data provided by a Reader.
82 final class Scanner
83 {
84     private:
85         /// A simple key is a key that is not denoted by the '?' indicator.
86         /// For example:
87         ///   ---
88         ///   block simple key: value
89         ///   ? not a simple key:
90         ///   : { flow simple key: value }
91         /// We emit the KEY token before all keys, so when we find a potential simple
92         /// key, we try to locate the corresponding ':' indicator. Simple keys should be
93         /// limited to a single line and 1024 characters.
94         ///
95         /// 16 bytes on 64-bit.
96         static struct SimpleKey
97         {
98             /// Character index in reader where the key starts.
99             uint charIndex = uint.max;
100             /// Index of the key token from start (first token scanned being 0).
101             uint tokenIndex;
102             /// Line the key starts at.
103             uint line;
104             /// Column the key starts at.
105             ushort column;
106             /// Is this required to be a simple key?
107             bool required;
108             /// Is this struct "null" (invalid)?.
109             bool isNull;
110         }
111 
112         /// Block chomping types.
113         enum Chomping
114         {
115             /// Strip all trailing line breaks. '-' indicator.
116             Strip,
117             /// Line break of the last line is preserved, others discarded. Default.
118             Clip,
119             /// All trailing line breaks are preserved. '+' indicator.
120             Keep
121         }
122 
123         /// Reader used to read from a file/stream.
124         Reader reader_;
125         /// Are we done scanning?
126         bool done_;
127 
128         /// Level of nesting in flow context. If 0, we're in block context.
129         uint flowLevel_;
130         /// Current indentation level.
131         int indent_ = -1;
132         /// Past indentation levels. Used as a stack.
133         Appender!(int[]) indents_;
134 
135         /// Processed tokens not yet emitted. Used as a queue.
136         Queue!Token tokens_;
137 
138         /// Number of tokens emitted through the getToken method.
139         uint tokensTaken_;
140 
141         /// Can a simple key start at the current position? A simple key may start:
142         /// - at the beginning of the line, not counting indentation spaces
143         ///       (in block context),
144         /// - after '{', '[', ',' (in the flow context),
145         /// - after '?', ':', '-' (in the block context).
146         /// In the block context, this flag also signifies if a block collection
147         /// may start at the current position.
148         bool allowSimpleKey_ = true;
149 
150         /// Possible simple keys indexed by flow levels.
151         SimpleKey[] possibleSimpleKeys_;
152 
153 
154         /// Set on error by nothrow/@nogc inner functions along with errorData_.
155         ///
156         /// Non-nothrow/GC-using caller functions can then throw an exception using
157         /// data stored in errorData_.
158         bool error_;
159 
160         /// Data for the exception to throw if error_ is true.
161         MarkedYAMLExceptionData errorData_;
162 
163         /// Error messages can be built in this buffer without using the GC.
164         ///
165         /// ScannerException (MarkedYAMLException) copies string data passed to its
166         /// constructor so it's safe to use slices of this buffer as parameters for
167         /// exceptions that may outlive the Scanner. The GC allocation when creating the
168         /// error message is removed, but the allocation when creating an exception is
169         /// not.
170         char[256] msgBuffer_;
171 
172     public:
173         /// Construct a Scanner using specified Reader.
174         this(Reader reader) @safe nothrow
175         {
176             // Return the next token, but do not delete it from the queue
177             reader_   = reader;
178             fetchStreamStart();
179         }
180 
181         /// Check if the next token is one of specified types.
182         ///
183         /// If no types are specified, checks if any tokens are left.
184         ///
185         /// Params:  ids = Token IDs to check for.
186         ///
187         /// Returns: true if the next token is one of specified types, or if there are
188         ///          any tokens left if no types specified, false otherwise.
189         bool checkToken(const TokenID[] ids ...) @safe
190         {
191             // Check if the next token is one of specified types.
192             while(needMoreTokens()) { fetchToken(); }
193             if(!tokens_.empty)
194             {
195                 if(ids.length == 0) { return true; }
196                 else
197                 {
198                     const nextId = tokens_.peek().id;
199                     foreach(id; ids)
200                     {
201                         if(nextId == id) { return true; }
202                     }
203                 }
204             }
205             return false;
206         }
207 
208         /// Return the next token, but keep it in the queue.
209         ///
210         /// Must not be called if there are no tokens left.
211         ref const(Token) peekToken() @safe
212         {
213             while(needMoreTokens) { fetchToken(); }
214             if(!tokens_.empty)    { return tokens_.peek(); }
215             assert(false, "No token left to peek");
216         }
217 
218         /// Return the next token, removing it from the queue.
219         ///
220         /// Must not be called if there are no tokens left.
221         Token getToken() @safe
222         {
223             while(needMoreTokens){fetchToken();}
224             if(!tokens_.empty)
225             {
226                 ++tokensTaken_;
227                 return tokens_.pop();
228             }
229             assert(false, "No token left to get");
230         }
231 
232     private:
233         /// Build an error message in msgBuffer_ and return it as a string.
234         string buildMsg(S ...)(S args)
235         {
236             try {
237                 return text(args);
238             }
239             catch (Exception)
240             {
241                 return "";
242             }
243         }
244 
245         /// Most scanning error messages have the same format; so build them with this
246         /// function.
247         string expected(T)(string expected, T found)
248         {
249             return buildMsg("expected ", expected, ", but found ", found);
250         }
251 
252         /// If error_ is true, throws a ScannerException constructed from errorData_ and
253         /// sets error_ to false.
254         void throwIfError() @safe pure
255         {
256             if(!error_) { return; }
257             error_ = false;
258             throw new ScannerException(errorData_);
259         }
260 
261         /// Called by internal nothrow/@nogc methods to set an error to be thrown by
262         /// their callers.
263         ///
264         /// See_Also: dyaml.exception.MarkedYamlException
265         void error(string context, const Mark contextMark, string problem,
266                    const Mark problemMark) @safe pure nothrow @nogc
267         {
268             assert(error_ == false,
269                    "Setting an error when there already is a not yet thrown error");
270             error_     = true;
271             errorData_ = MarkedYAMLExceptionData(context, contextMark, problem, problemMark);
272         }
273 
274         /// Determine whether or not we need to fetch more tokens before peeking/getting a token.
275         bool needMoreTokens() @safe pure
276         {
277             if(done_)         { return false; }
278             if(tokens_.empty) { return true; }
279 
280             /// The current token may be a potential simple key, so we need to look further.
281             stalePossibleSimpleKeys();
282             return nextPossibleSimpleKey() == tokensTaken_;
283         }
284 
285         /// Fetch at token, adding it to tokens_.
286         void fetchToken() @safe
287         {
288             // Eat whitespaces and comments until we reach the next token.
289             scanToNextToken();
290 
291             // Remove obsolete possible simple keys.
292             stalePossibleSimpleKeys();
293 
294             // Compare current indentation and column. It may add some tokens
295             // and decrease the current indentation level.
296             unwindIndent(reader_.column);
297 
298             // Get the next character.
299             const dchar c = reader_.peekByte();
300 
301             // Fetch the token.
302             if(c == '\0')            { return fetchStreamEnd();     }
303             if(checkDirective())     { return fetchDirective();     }
304             if(checkDocumentStart()) { return fetchDocumentStart(); }
305             if(checkDocumentEnd())   { return fetchDocumentEnd();   }
306             // Order of the following checks is NOT significant.
307             switch(c)
308             {
309                 case '[':  return fetchFlowSequenceStart();
310                 case '{':  return fetchFlowMappingStart();
311                 case ']':  return fetchFlowSequenceEnd();
312                 case '}':  return fetchFlowMappingEnd();
313                 case ',':  return fetchFlowEntry();
314                 case '!':  return fetchTag();
315                 case '\'': return fetchSingle();
316                 case '\"': return fetchDouble();
317                 case '*':  return fetchAlias();
318                 case '&':  return fetchAnchor();
319                 case '?':  if(checkKey())        { return fetchKey();        } goto default;
320                 case ':':  if(checkValue())      { return fetchValue();      } goto default;
321                 case '-':  if(checkBlockEntry()) { return fetchBlockEntry(); } goto default;
322                 case '|':  if(flowLevel_ == 0)   { return fetchLiteral();    } break;
323                 case '>':  if(flowLevel_ == 0)   { return fetchFolded();     } break;
324                 default:   if(checkPlain())      { return fetchPlain();      }
325             }
326 
327             throw new ScannerException("While scanning for the next token, found character " ~
328                                        "\'%s\', index %s that cannot start any token"
329                                        .format(c, to!int(c)), reader_.mark);
330         }
331 
332 
333         /// Return the token number of the nearest possible simple key.
334         uint nextPossibleSimpleKey() @safe pure nothrow @nogc
335         {
336             uint minTokenNumber = uint.max;
337             foreach(k, ref simpleKey; possibleSimpleKeys_)
338             {
339                 if(simpleKey.isNull) { continue; }
340                 minTokenNumber = min(minTokenNumber, simpleKey.tokenIndex);
341             }
342             return minTokenNumber;
343         }
344 
345         /// Remove entries that are no longer possible simple keys.
346         ///
347         /// According to the YAML specification, simple keys
348         /// - should be limited to a single line,
349         /// - should be no longer than 1024 characters.
350         /// Disabling this will allow simple keys of any length and
351         /// height (may cause problems if indentation is broken though).
352         void stalePossibleSimpleKeys() @safe pure
353         {
354             foreach(level, ref key; possibleSimpleKeys_)
355             {
356                 if(key.isNull) { continue; }
357                 if(key.line != reader_.line || reader_.charIndex - key.charIndex > 1024)
358                 {
359                     enforce(!key.required,
360                             new ScannerException("While scanning a simple key",
361                                                  Mark(key.line, key.column),
362                                                  "could not find expected ':'", reader_.mark));
363                     key.isNull = true;
364                 }
365             }
366         }
367 
368         /// Check if the next token starts a possible simple key and if so, save its position.
369         ///
370         /// This function is called for ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
371         void savePossibleSimpleKey() @safe pure
372         {
373             // Check if a simple key is required at the current position.
374             const required = (flowLevel_ == 0 && indent_ == reader_.column);
375             assert(allowSimpleKey_ || !required, "A simple key is required only if it is " ~
376                    "the first token in the current line. Therefore it is always allowed.");
377 
378             if(!allowSimpleKey_) { return; }
379 
380             // The next token might be a simple key, so save its number and position.
381             removePossibleSimpleKey();
382             const tokenCount = tokensTaken_ + cast(uint)tokens_.length;
383 
384             const line   = reader_.line;
385             const column = reader_.column;
386             const key    = SimpleKey(cast(uint)reader_.charIndex, tokenCount, line,
387                                      cast(ushort)min(column, ushort.max), required);
388 
389             if(possibleSimpleKeys_.length <= flowLevel_)
390             {
391                 const oldLength = possibleSimpleKeys_.length;
392                 possibleSimpleKeys_.length = flowLevel_ + 1;
393                 //No need to initialize the last element, it's already done in the next line.
394                 possibleSimpleKeys_[oldLength .. flowLevel_] = SimpleKey.init;
395             }
396             possibleSimpleKeys_[flowLevel_] = key;
397         }
398 
399         /// Remove the saved possible key position at the current flow level.
400         void removePossibleSimpleKey() @safe pure
401         {
402             if(possibleSimpleKeys_.length <= flowLevel_) { return; }
403 
404             if(!possibleSimpleKeys_[flowLevel_].isNull)
405             {
406                 const key = possibleSimpleKeys_[flowLevel_];
407                 enforce(!key.required,
408                         new ScannerException("While scanning a simple key",
409                                              Mark(key.line, key.column),
410                                              "could not find expected ':'", reader_.mark));
411                 possibleSimpleKeys_[flowLevel_].isNull = true;
412             }
413         }
414 
415         /// Decrease indentation, removing entries in indents_.
416         ///
417         /// Params:  column = Current column in the file/stream.
418         void unwindIndent(const int column) @safe
419         {
420             if(flowLevel_ > 0)
421             {
422                 // In flow context, tokens should respect indentation.
423                 // The condition should be `indent >= column` according to the spec.
424                 // But this condition will prohibit intuitively correct
425                 // constructions such as
426                 // key : {
427                 // }
428 
429                 // In the flow context, indentation is ignored. We make the scanner less
430                 // restrictive than what the specification requires.
431                 // if(pedantic_ && flowLevel_ > 0 && indent_ > column)
432                 // {
433                 //     throw new ScannerException("Invalid intendation or unclosed '[' or '{'",
434                 //                                reader_.mark)
435                 // }
436                 return;
437             }
438 
439             // In block context, we may need to issue the BLOCK-END tokens.
440             while(indent_ > column)
441             {
442                 indent_ = indents_.data.back;
443                 assert(indents_.data.length);
444                 indents_.shrinkTo(indents_.data.length - 1);
445                 tokens_.push(blockEndToken(reader_.mark, reader_.mark));
446             }
447         }
448 
449         /// Increase indentation if needed.
450         ///
451         /// Params:  column = Current column in the file/stream.
452         ///
453         /// Returns: true if the indentation was increased, false otherwise.
454         bool addIndent(int column) @safe
455         {
456             if(indent_ >= column){return false;}
457             indents_ ~= indent_;
458             indent_ = column;
459             return true;
460         }
461 
462 
463         /// Add STREAM-START token.
464         void fetchStreamStart() @safe nothrow
465         {
466             tokens_.push(streamStartToken(reader_.mark, reader_.mark, reader_.encoding));
467         }
468 
469         ///Add STREAM-END token.
470         void fetchStreamEnd() @safe
471         {
472             //Set intendation to -1 .
473             unwindIndent(-1);
474             removePossibleSimpleKey();
475             allowSimpleKey_ = false;
476             possibleSimpleKeys_.destroy;
477 
478             tokens_.push(streamEndToken(reader_.mark, reader_.mark));
479             done_ = true;
480         }
481 
482         /// Add DIRECTIVE token.
483         void fetchDirective() @safe
484         {
485             // Set intendation to -1 .
486             unwindIndent(-1);
487             // Reset simple keys.
488             removePossibleSimpleKey();
489             allowSimpleKey_ = false;
490 
491             auto directive = scanDirective();
492             throwIfError();
493             tokens_.push(directive);
494         }
495 
496         /// Add DOCUMENT-START or DOCUMENT-END token.
497         void fetchDocumentIndicator(TokenID id)()
498             if(id == TokenID.DocumentStart || id == TokenID.DocumentEnd)
499         {
500             // Set indentation to -1 .
501             unwindIndent(-1);
502             // Reset simple keys. Note that there can't be a block collection after '---'.
503             removePossibleSimpleKey();
504             allowSimpleKey_ = false;
505 
506             Mark startMark = reader_.mark;
507             reader_.forward(3);
508             tokens_.push(simpleToken!id(startMark, reader_.mark));
509         }
510 
511         /// Aliases to add DOCUMENT-START or DOCUMENT-END token.
512         alias fetchDocumentStart = fetchDocumentIndicator!(TokenID.DocumentStart);
513         alias fetchDocumentEnd = fetchDocumentIndicator!(TokenID.DocumentEnd);
514 
515         /// Add FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
516         void fetchFlowCollectionStart(TokenID id)() @safe
517         {
518             // '[' and '{' may start a simple key.
519             savePossibleSimpleKey();
520             // Simple keys are allowed after '[' and '{'.
521             allowSimpleKey_ = true;
522             ++flowLevel_;
523 
524             Mark startMark = reader_.mark;
525             reader_.forward();
526             tokens_.push(simpleToken!id(startMark, reader_.mark));
527         }
528 
529         /// Aliases to add FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
530         alias fetchFlowSequenceStart = fetchFlowCollectionStart!(TokenID.FlowSequenceStart);
531         alias fetchFlowMappingStart = fetchFlowCollectionStart!(TokenID.FlowMappingStart);
532 
533         /// Add FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
534         void fetchFlowCollectionEnd(TokenID id)()
535         {
536             // Reset possible simple key on the current level.
537             removePossibleSimpleKey();
538             // No simple keys after ']' and '}'.
539             allowSimpleKey_ = false;
540             --flowLevel_;
541 
542             Mark startMark = reader_.mark;
543             reader_.forward();
544             tokens_.push(simpleToken!id(startMark, reader_.mark));
545         }
546 
547         /// Aliases to add FLOW-SEQUENCE-START or FLOW-MAPPING-START token/
548         alias fetchFlowSequenceEnd = fetchFlowCollectionEnd!(TokenID.FlowSequenceEnd);
549         alias fetchFlowMappingEnd = fetchFlowCollectionEnd!(TokenID.FlowMappingEnd);
550 
551         /// Add FLOW-ENTRY token;
552         void fetchFlowEntry() @safe
553         {
554             // Reset possible simple key on the current level.
555             removePossibleSimpleKey();
556             // Simple keys are allowed after ','.
557             allowSimpleKey_ = true;
558 
559             Mark startMark = reader_.mark;
560             reader_.forward();
561             tokens_.push(flowEntryToken(startMark, reader_.mark));
562         }
563 
564         /// Additional checks used in block context in fetchBlockEntry and fetchKey.
565         ///
566         /// Params:  type = String representing the token type we might need to add.
567         ///          id   = Token type we might need to add.
568         void blockChecks(string type, TokenID id)()
569         {
570             enum context = type ~ " keys are not allowed here";
571             // Are we allowed to start a key (not neccesarily a simple one)?
572             enforce(allowSimpleKey_, new ScannerException(context, reader_.mark));
573 
574             if(addIndent(reader_.column))
575             {
576                 tokens_.push(simpleToken!id(reader_.mark, reader_.mark));
577             }
578         }
579 
580         /// Add BLOCK-ENTRY token. Might add BLOCK-SEQUENCE-START in the process.
581         void fetchBlockEntry() @safe
582         {
583             if(flowLevel_ == 0) { blockChecks!("Sequence", TokenID.BlockSequenceStart)(); }
584 
585             // It's an error for the block entry to occur in the flow context,
586             // but we let the parser detect this.
587 
588             // Reset possible simple key on the current level.
589             removePossibleSimpleKey();
590             // Simple keys are allowed after '-'.
591             allowSimpleKey_ = true;
592 
593             Mark startMark = reader_.mark;
594             reader_.forward();
595             tokens_.push(blockEntryToken(startMark, reader_.mark));
596         }
597 
598         /// Add KEY token. Might add BLOCK-MAPPING-START in the process.
599         void fetchKey() @safe
600         {
601             if(flowLevel_ == 0) { blockChecks!("Mapping", TokenID.BlockMappingStart)(); }
602 
603             // Reset possible simple key on the current level.
604             removePossibleSimpleKey();
605             // Simple keys are allowed after '?' in the block context.
606             allowSimpleKey_ = (flowLevel_ == 0);
607 
608             Mark startMark = reader_.mark;
609             reader_.forward();
610             tokens_.push(keyToken(startMark, reader_.mark));
611         }
612 
613         /// Add VALUE token. Might add KEY and/or BLOCK-MAPPING-START in the process.
614         void fetchValue() @safe
615         {
616             //Do we determine a simple key?
617             if(possibleSimpleKeys_.length > flowLevel_ &&
618                !possibleSimpleKeys_[flowLevel_].isNull)
619             {
620                 const key = possibleSimpleKeys_[flowLevel_];
621                 possibleSimpleKeys_[flowLevel_].isNull = true;
622                 Mark keyMark = Mark(key.line, key.column);
623                 const idx = key.tokenIndex - tokensTaken_;
624 
625                 assert(idx >= 0);
626 
627                 // Add KEY.
628                 // Manually inserting since tokens are immutable (need linked list).
629                 tokens_.insert(keyToken(keyMark, keyMark), idx);
630 
631                 // If this key starts a new block mapping, we need to add BLOCK-MAPPING-START.
632                 if(flowLevel_ == 0 && addIndent(key.column))
633                 {
634                     tokens_.insert(blockMappingStartToken(keyMark, keyMark), idx);
635                 }
636 
637                 // There cannot be two simple keys in a row.
638                 allowSimpleKey_ = false;
639             }
640             // Part of a complex key
641             else
642             {
643                 // We can start a complex value if and only if we can start a simple key.
644                 enforce(flowLevel_ > 0 || allowSimpleKey_,
645                         new ScannerException("Mapping values are not allowed here", reader_.mark));
646 
647                 // If this value starts a new block mapping, we need to add
648                 // BLOCK-MAPPING-START. It'll be detected as an error later by the parser.
649                 if(flowLevel_ == 0 && addIndent(reader_.column))
650                 {
651                     tokens_.push(blockMappingStartToken(reader_.mark, reader_.mark));
652                 }
653 
654                 // Reset possible simple key on the current level.
655                 removePossibleSimpleKey();
656                 // Simple keys are allowed after ':' in the block context.
657                 allowSimpleKey_ = (flowLevel_ == 0);
658             }
659 
660             // Add VALUE.
661             Mark startMark = reader_.mark;
662             reader_.forward();
663             tokens_.push(valueToken(startMark, reader_.mark));
664         }
665 
666         /// Add ALIAS or ANCHOR token.
667         void fetchAnchor_(TokenID id)() @safe
668             if(id == TokenID.Alias || id == TokenID.Anchor)
669         {
670             // ALIAS/ANCHOR could be a simple key.
671             savePossibleSimpleKey();
672             // No simple keys after ALIAS/ANCHOR.
673             allowSimpleKey_ = false;
674 
675             auto anchor = scanAnchor(id);
676             throwIfError();
677             tokens_.push(anchor);
678         }
679 
680         /// Aliases to add ALIAS or ANCHOR token.
681         alias fetchAlias = fetchAnchor_!(TokenID.Alias);
682         alias fetchAnchor = fetchAnchor_!(TokenID.Anchor);
683 
684         /// Add TAG token.
685         void fetchTag() @safe
686         {
687             //TAG could start a simple key.
688             savePossibleSimpleKey();
689             //No simple keys after TAG.
690             allowSimpleKey_ = false;
691 
692             tokens_.push(scanTag());
693             throwIfError();
694         }
695 
696         /// Add block SCALAR token.
697         void fetchBlockScalar(ScalarStyle style)() @safe
698             if(style == ScalarStyle.Literal || style == ScalarStyle.Folded)
699         {
700             // Reset possible simple key on the current level.
701             removePossibleSimpleKey();
702             // A simple key may follow a block scalar.
703             allowSimpleKey_ = true;
704 
705             auto blockScalar = scanBlockScalar(style);
706             throwIfError();
707             tokens_.push(blockScalar);
708         }
709 
710         /// Aliases to add literal or folded block scalar.
711         alias fetchLiteral = fetchBlockScalar!(ScalarStyle.Literal);
712         alias fetchFolded = fetchBlockScalar!(ScalarStyle.Folded);
713 
714         /// Add quoted flow SCALAR token.
715         void fetchFlowScalar(ScalarStyle quotes)()
716         {
717             // A flow scalar could be a simple key.
718             savePossibleSimpleKey();
719             // No simple keys after flow scalars.
720             allowSimpleKey_ = false;
721 
722             // Scan and add SCALAR.
723             auto scalar = scanFlowScalar(quotes);
724             throwIfError();
725             tokens_.push(scalar);
726         }
727 
728         /// Aliases to add single or double quoted block scalar.
729         alias fetchSingle = fetchFlowScalar!(ScalarStyle.SingleQuoted);
730         alias fetchDouble = fetchFlowScalar!(ScalarStyle.DoubleQuoted);
731 
732         /// Add plain SCALAR token.
733         void fetchPlain() @safe
734         {
735             // A plain scalar could be a simple key
736             savePossibleSimpleKey();
737             // No simple keys after plain scalars. But note that scanPlain() will
738             // change this flag if the scan is finished at the beginning of the line.
739             allowSimpleKey_ = false;
740             auto plain = scanPlain();
741             throwIfError();
742 
743             // Scan and add SCALAR. May change allowSimpleKey_
744             tokens_.push(plain);
745         }
746 
747     pure:
748 
749         ///Check if the next token is DIRECTIVE:        ^ '%' ...
750         bool checkDirective() @safe
751         {
752             return reader_.peekByte() == '%' && reader_.column == 0;
753         }
754 
755         /// Check if the next token is DOCUMENT-START:   ^ '---' (' '|'\n')
756         bool checkDocumentStart() @safe
757         {
758             // Check one char first, then all 3, to prevent reading outside the buffer.
759             return reader_.column     == 0     &&
760                    reader_.peekByte() == '-'   &&
761                    reader_.prefix(3)  == "---" &&
762                    reader_.peek(3).isWhiteSpace;
763         }
764 
765         /// Check if the next token is DOCUMENT-END:     ^ '...' (' '|'\n')
766         bool checkDocumentEnd() @safe
767         {
768             // Check one char first, then all 3, to prevent reading outside the buffer.
769             return reader_.column     == 0     &&
770                    reader_.peekByte() == '.'   &&
771                    reader_.prefix(3)  == "..." &&
772                    reader_.peek(3).isWhiteSpace;
773         }
774 
775         /// Check if the next token is BLOCK-ENTRY:      '-' (' '|'\n')
776         bool checkBlockEntry() @safe
777         {
778             return !!reader_.peek(1).isWhiteSpace;
779         }
780 
781         /// Check if the next token is KEY(flow context):    '?'
782         ///
783         /// or KEY(block context):   '?' (' '|'\n')
784         bool checkKey() @safe
785         {
786             return (flowLevel_ > 0 || reader_.peek(1).isWhiteSpace);
787         }
788 
789         /// Check if the next token is VALUE(flow context):  ':'
790         ///
791         /// or VALUE(block context): ':' (' '|'\n')
792         bool checkValue() @safe
793         {
794             return flowLevel_ > 0 || reader_.peek(1).isWhiteSpace;
795         }
796 
797         /// Check if the next token is a plain scalar.
798         ///
799         /// A plain scalar may start with any non-space character except:
800         ///   '-', '?', ':', ',', '[', ']', '{', '}',
801         ///   '#', '&', '*', '!', '|', '>', '\'', '\"',
802         ///   '%', '@', '`'.
803         ///
804         /// It may also start with
805         ///   '-', '?', ':'
806         /// if it is followed by a non-space character.
807         ///
808         /// Note that we limit the last rule to the block context (except the
809         /// '-' character) because we want the flow context to be space
810         /// independent.
811         bool checkPlain() @safe
812         {
813             const c = reader_.peek();
814             if(!c.isNonScalarStartCharacter)
815             {
816                 return true;
817             }
818             return !reader_.peek(1).isWhiteSpace &&
819                    (c == '-' || (flowLevel_ == 0 && (c == '?' || c == ':')));
820         }
821 
822         /// Move to the next non-space character.
823         void findNextNonSpace() @safe
824         {
825             while(reader_.peekByte() == ' ') { reader_.forward(); }
826         }
827 
828         /// Scan a string of alphanumeric or "-_" characters.
829         ///
830         /// Assumes that the caller is building a slice in Reader, and puts the scanned
831         /// characters into that slice.
832         ///
833         /// In case of an error, error_ is set. Use throwIfError() to handle this.
834         void scanAlphaNumericToSlice(string name)(const Mark startMark)
835         {
836             size_t length;
837             dchar c = reader_.peek();
838             while(c.isAlphaNum || "-_"d.canFind(c)) { c = reader_.peek(++length); }
839 
840             if(length == 0)
841             {
842                 enum contextMsg = "While scanning " ~ name;
843                 error(contextMsg, startMark, expected("alphanumeric, '-' or '_'", c),
844                       reader_.mark);
845                 return;
846             }
847 
848             reader_.sliceBuilder.write(reader_.get(length));
849         }
850 
851         /// Scan and throw away all characters until next line break.
852         void scanToNextBreak() @safe
853         {
854             while(!reader_.peek().isBreak) { reader_.forward(); }
855         }
856 
857         /// Scan all characters until next line break.
858         ///
859         /// Assumes that the caller is building a slice in Reader, and puts the scanned
860         /// characters into that slice.
861         void scanToNextBreakToSlice() @safe
862         {
863             uint length;
864             while(!reader_.peek(length).isBreak)
865             {
866                 ++length;
867             }
868             reader_.sliceBuilder.write(reader_.get(length));
869         }
870 
871 
872         /// Move to next token in the file/stream.
873         ///
874         /// We ignore spaces, line breaks and comments.
875         /// If we find a line break in the block context, we set
876         /// allowSimpleKey` on.
877         ///
878         /// We do not yet support BOM inside the stream as the
879         /// specification requires. Any such mark will be considered as a part
880         /// of the document.
881         void scanToNextToken() @safe
882         {
883             // TODO(PyYAML): We need to make tab handling rules more sane. A good rule is:
884             //   Tabs cannot precede tokens
885             //   BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END,
886             //   KEY(block), VALUE(block), BLOCK-ENTRY
887             // So the checking code is
888             //   if <TAB>:
889             //       allowSimpleKey_ = false
890             // We also need to add the check for `allowSimpleKey_ == true` to
891             // `unwindIndent` before issuing BLOCK-END.
892             // Scanners for block, flow, and plain scalars need to be modified.
893 
894             for(;;)
895             {
896                 findNextNonSpace();
897 
898                 if(reader_.peekByte() == '#') { scanToNextBreak(); }
899                 if(scanLineBreak() != '\0')
900                 {
901                     if(flowLevel_ == 0) { allowSimpleKey_ = true; }
902                 }
903                 else
904                 {
905                     break;
906                 }
907             }
908         }
909 
910         /// Scan directive token.
911         Token scanDirective() @safe
912         {
913             Mark startMark = reader_.mark;
914             // Skip the '%'.
915             reader_.forward();
916 
917             // Scan directive name
918             reader_.sliceBuilder.begin();
919             scanDirectiveNameToSlice(startMark);
920             if(error_) { return Token.init; }
921             const name = reader_.sliceBuilder.finish();
922 
923             reader_.sliceBuilder.begin();
924 
925             // Index where tag handle ends and suffix starts in a tag directive value.
926             uint tagHandleEnd = uint.max;
927             if(name == "YAML")     { scanYAMLDirectiveValueToSlice(startMark); }
928             else if(name == "TAG") { tagHandleEnd = scanTagDirectiveValueToSlice(startMark); }
929             if(error_) { return Token.init; }
930             char[] value = reader_.sliceBuilder.finish();
931 
932             Mark endMark = reader_.mark;
933 
934             DirectiveType directive;
935             if(name == "YAML")     { directive = DirectiveType.YAML; }
936             else if(name == "TAG") { directive = DirectiveType.TAG; }
937             else
938             {
939                 directive = DirectiveType.Reserved;
940                 scanToNextBreak();
941             }
942 
943             scanDirectiveIgnoredLine(startMark);
944             if(error_) { return Token.init; }
945 
946             return directiveToken(startMark, endMark, value, directive, tagHandleEnd);
947         }
948 
949         /// Scan name of a directive token.
950         ///
951         /// Assumes that the caller is building a slice in Reader, and puts the scanned
952         /// characters into that slice.
953         ///
954         /// In case of an error, error_ is set. Use throwIfError() to handle this.
955         void scanDirectiveNameToSlice(const Mark startMark) @safe
956         {
957             // Scan directive name.
958             scanAlphaNumericToSlice!"a directive"(startMark);
959             if(error_) { return; }
960 
961             if(" \0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek())) { return; }
962             error("While scanning a directive", startMark,
963                   expected("alphanumeric, '-' or '_'", reader_.peek()), reader_.mark);
964         }
965 
966         /// Scan value of a YAML directive token. Returns major, minor version separated by '.'.
967         ///
968         /// Assumes that the caller is building a slice in Reader, and puts the scanned
969         /// characters into that slice.
970         ///
971         /// In case of an error, error_ is set. Use throwIfError() to handle this.
972         void scanYAMLDirectiveValueToSlice(const Mark startMark) @safe
973         {
974             findNextNonSpace();
975 
976             scanYAMLDirectiveNumberToSlice(startMark);
977             if(error_) { return; }
978 
979             if(reader_.peekByte() != '.')
980             {
981                 error("While scanning a directive", startMark,
982                       expected("digit or '.'", reader_.peek()), reader_.mark);
983                 return;
984             }
985             // Skip the '.'.
986             reader_.forward();
987 
988             reader_.sliceBuilder.write('.');
989             scanYAMLDirectiveNumberToSlice(startMark);
990             if(error_) { return; }
991 
992             if(!" \0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek()))
993             {
994                 error("While scanning a directive", startMark,
995                       expected("digit or '.'", reader_.peek()), reader_.mark);
996             }
997         }
998 
999         /// Scan a number from a YAML directive.
1000         ///
1001         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1002         /// characters into that slice.
1003         ///
1004         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1005         void scanYAMLDirectiveNumberToSlice(const Mark startMark) @safe
1006         {
1007             if(!isDigit(reader_.peek()))
1008             {
1009                 error("While scanning a directive", startMark,
1010                       expected("digit", reader_.peek()), reader_.mark);
1011                 return;
1012             }
1013 
1014             // Already found the first digit in the enforce(), so set length to 1.
1015             uint length = 1;
1016             while(reader_.peek(length).isDigit) { ++length; }
1017 
1018             reader_.sliceBuilder.write(reader_.get(length));
1019         }
1020 
1021         /// Scan value of a tag directive.
1022         ///
1023         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1024         /// characters into that slice.
1025         ///
1026         /// Returns: Length of tag handle (which is before tag prefix) in scanned data
1027         ///
1028         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1029         uint scanTagDirectiveValueToSlice(const Mark startMark) @safe
1030         {
1031             findNextNonSpace();
1032             const startLength = reader_.sliceBuilder.length;
1033             scanTagDirectiveHandleToSlice(startMark);
1034             if(error_) { return uint.max; }
1035             const handleLength = cast(uint)(reader_.sliceBuilder.length  - startLength);
1036             findNextNonSpace();
1037             scanTagDirectivePrefixToSlice(startMark);
1038 
1039             return handleLength;
1040         }
1041 
1042         /// Scan handle of a tag directive.
1043         ///
1044         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1045         /// characters into that slice.
1046         ///
1047         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1048         void scanTagDirectiveHandleToSlice(const Mark startMark) @safe
1049         {
1050             scanTagHandleToSlice!"directive"(startMark);
1051             if(error_) { return; }
1052             if(reader_.peekByte() == ' ') { return; }
1053             error("While scanning a directive handle", startMark,
1054                   expected("' '", reader_.peek()), reader_.mark);
1055         }
1056 
1057         /// Scan prefix of a tag directive.
1058         ///
1059         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1060         /// characters into that slice.
1061         ///
1062         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1063         void scanTagDirectivePrefixToSlice(const Mark startMark) @safe
1064         {
1065             scanTagURIToSlice!"directive"(startMark);
1066             if(" \0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek())) { return; }
1067             error("While scanning a directive prefix", startMark,
1068                   expected("' '", reader_.peek()), reader_.mark);
1069         }
1070 
1071         /// Scan (and ignore) ignored line after a directive.
1072         ///
1073         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1074         void scanDirectiveIgnoredLine(const Mark startMark) @safe
1075         {
1076             findNextNonSpace();
1077             if(reader_.peekByte() == '#') { scanToNextBreak(); }
1078             if(reader_.peek().isBreak)
1079             {
1080                 scanLineBreak();
1081                 return;
1082             }
1083             error("While scanning a directive", startMark,
1084                   expected("comment or a line break", reader_.peek()), reader_.mark);
1085         }
1086 
1087 
1088         /// Scan an alias or an anchor.
1089         ///
1090         /// The specification does not restrict characters for anchors and
1091         /// aliases. This may lead to problems, for instance, the document:
1092         ///   [ *alias, value ]
1093         /// can be interpteted in two ways, as
1094         ///   [ "value" ]
1095         /// and
1096         ///   [ *alias , "value" ]
1097         /// Therefore we restrict aliases to ASCII alphanumeric characters.
1098         ///
1099         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1100         Token scanAnchor(const TokenID id) @safe
1101         {
1102             const startMark = reader_.mark;
1103             const dchar i = reader_.get();
1104 
1105             reader_.sliceBuilder.begin();
1106             if(i == '*') { scanAlphaNumericToSlice!"an alias"(startMark); }
1107             else         { scanAlphaNumericToSlice!"an anchor"(startMark); }
1108             // On error, value is discarded as we return immediately
1109             char[] value = reader_.sliceBuilder.finish();
1110             if(error_)   { return Token.init; }
1111 
1112             if(!reader_.peek().isWhiteSpace &&
1113                !"?:,]}%@"d.canFind(reader_.peekByte()))
1114             {
1115                 enum anchorCtx = "While scanning an anchor";
1116                 enum aliasCtx  = "While scanning an alias";
1117                 error(i == '*' ? aliasCtx : anchorCtx, startMark,
1118                       expected("alphanumeric, '-' or '_'", reader_.peek()), reader_.mark);
1119                 return Token.init;
1120             }
1121 
1122             if(id == TokenID.Alias)
1123             {
1124                 return aliasToken(startMark, reader_.mark, value);
1125             }
1126             if(id == TokenID.Anchor)
1127             {
1128                 return anchorToken(startMark, reader_.mark, value);
1129             }
1130             assert(false, "This code should never be reached");
1131         }
1132 
1133         /// Scan a tag token.
1134         ///
1135         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1136         Token scanTag() @safe
1137         {
1138             const startMark = reader_.mark;
1139             dchar c = reader_.peek(1);
1140 
1141             reader_.sliceBuilder.begin();
1142             scope(failure) { reader_.sliceBuilder.finish(); }
1143             // Index where tag handle ends and tag suffix starts in the tag value
1144             // (slice) we will produce.
1145             uint handleEnd;
1146 
1147             if(c == '<')
1148             {
1149                 reader_.forward(2);
1150 
1151                 handleEnd = 0;
1152                 scanTagURIToSlice!"tag"(startMark);
1153                 if(error_) { return Token.init; }
1154                 if(reader_.peekByte() != '>')
1155                 {
1156                     error("While scanning a tag", startMark,
1157                           expected("'>'", reader_.peek()), reader_.mark);
1158                     return Token.init;
1159                 }
1160                 reader_.forward();
1161             }
1162             else if(c.isWhiteSpace)
1163             {
1164                 reader_.forward();
1165                 handleEnd = 0;
1166                 reader_.sliceBuilder.write('!');
1167             }
1168             else
1169             {
1170                 uint length = 1;
1171                 bool useHandle;
1172 
1173                 while(!c.isBreakOrSpace)
1174                 {
1175                     if(c == '!')
1176                     {
1177                         useHandle = true;
1178                         break;
1179                     }
1180                     ++length;
1181                     c = reader_.peek(length);
1182                 }
1183 
1184                 if(useHandle)
1185                 {
1186                     scanTagHandleToSlice!"tag"(startMark);
1187                     handleEnd = cast(uint)reader_.sliceBuilder.length;
1188                     if(error_) { return Token.init; }
1189                 }
1190                 else
1191                 {
1192                     reader_.forward();
1193                     reader_.sliceBuilder.write('!');
1194                     handleEnd = cast(uint)reader_.sliceBuilder.length;
1195                 }
1196 
1197                 scanTagURIToSlice!"tag"(startMark);
1198                 if(error_) { return Token.init; }
1199             }
1200 
1201             if(reader_.peek().isBreakOrSpace)
1202             {
1203                 char[] slice = reader_.sliceBuilder.finish();
1204                 return tagToken(startMark, reader_.mark, slice, handleEnd);
1205             }
1206 
1207             error("While scanning a tag", startMark, expected("' '", reader_.peek()),
1208                   reader_.mark);
1209             return Token.init;
1210         }
1211 
1212         /// Scan a block scalar token with specified style.
1213         ///
1214         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1215         Token scanBlockScalar(const ScalarStyle style) @safe
1216         {
1217             const startMark = reader_.mark;
1218 
1219             // Scan the header.
1220             reader_.forward();
1221 
1222             const indicators = scanBlockScalarIndicators(startMark);
1223             if(error_) { return Token.init; }
1224 
1225             const chomping   = indicators[0];
1226             const increment  = indicators[1];
1227             scanBlockScalarIgnoredLine(startMark);
1228             if(error_) { return Token.init; }
1229 
1230             // Determine the indentation level and go to the first non-empty line.
1231             Mark endMark;
1232             uint indent = max(1, indent_ + 1);
1233 
1234             reader_.sliceBuilder.begin();
1235             alias Transaction = SliceBuilder.Transaction;
1236             // Used to strip the last line breaks written to the slice at the end of the
1237             // scalar, which may be needed based on chomping.
1238             Transaction breaksTransaction = Transaction(&reader_.sliceBuilder);
1239             // Read the first indentation/line breaks before the scalar.
1240             size_t startLen = reader_.sliceBuilder.length;
1241             if(increment == int.min)
1242             {
1243                 auto indentation = scanBlockScalarIndentationToSlice();
1244                 endMark = indentation[1];
1245                 indent  = max(indent, indentation[0]);
1246             }
1247             else
1248             {
1249                 indent += increment - 1;
1250                 endMark = scanBlockScalarBreaksToSlice(indent);
1251             }
1252 
1253             // int.max means there's no line break (int.max is outside UTF-32).
1254             dchar lineBreak = cast(dchar)int.max;
1255 
1256             // Scan the inner part of the block scalar.
1257             while(reader_.column == indent && reader_.peekByte() != '\0')
1258             {
1259                 breaksTransaction.commit();
1260                 const bool leadingNonSpace = !" \t"d.canFind(reader_.peekByte());
1261                 // This is where the 'interesting' non-whitespace data gets read.
1262                 scanToNextBreakToSlice();
1263                 lineBreak = scanLineBreak();
1264 
1265 
1266                 // This transaction serves to rollback data read in the
1267                 // scanBlockScalarBreaksToSlice() call.
1268                 breaksTransaction = Transaction(&reader_.sliceBuilder);
1269                 startLen = reader_.sliceBuilder.length;
1270                 // The line breaks should actually be written _after_ the if() block
1271                 // below. We work around that by inserting
1272                 endMark = scanBlockScalarBreaksToSlice(indent);
1273 
1274                 // This will not run during the last iteration (see the if() vs the
1275                 // while()), hence breaksTransaction rollback (which happens after this
1276                 // loop) will never roll back data written in this if() block.
1277                 if(reader_.column == indent && reader_.peekByte() != '\0')
1278                 {
1279                     // Unfortunately, folding rules are ambiguous.
1280 
1281                     // This is the folding according to the specification:
1282                     if(style == ScalarStyle.Folded && lineBreak == '\n' &&
1283                        leadingNonSpace && !" \t"d.canFind(reader_.peekByte()))
1284                     {
1285                         // No breaks were scanned; no need to insert the space in the
1286                         // middle of slice.
1287                         if(startLen == reader_.sliceBuilder.length)
1288                         {
1289                             reader_.sliceBuilder.write(' ');
1290                         }
1291                     }
1292                     else
1293                     {
1294                         // We need to insert in the middle of the slice in case any line
1295                         // breaks were scanned.
1296                         reader_.sliceBuilder.insert(lineBreak, startLen);
1297                     }
1298 
1299                     ////this is Clark Evans's interpretation (also in the spec
1300                     ////examples):
1301                     //
1302                     //if(style == ScalarStyle.Folded && lineBreak == '\n')
1303                     //{
1304                     //    if(startLen == endLen)
1305                     //    {
1306                     //        if(!" \t"d.canFind(reader_.peekByte()))
1307                     //        {
1308                     //            reader_.sliceBuilder.write(' ');
1309                     //        }
1310                     //        else
1311                     //        {
1312                     //            chunks ~= lineBreak;
1313                     //        }
1314                     //    }
1315                     //}
1316                     //else
1317                     //{
1318                     //    reader_.sliceBuilder.insertBack(lineBreak, endLen - startLen);
1319                     //}
1320                 }
1321                 else
1322                 {
1323                     break;
1324                 }
1325             }
1326 
1327             // If chompint is Keep, we keep (commit) the last scanned line breaks
1328             // (which are at the end of the scalar). Otherwise re remove them (end the
1329             // transaction).
1330             if(chomping == Chomping.Keep)  { breaksTransaction.commit(); }
1331             else                           { breaksTransaction.end(); }
1332             if(chomping != Chomping.Strip && lineBreak != int.max)
1333             {
1334                 // If chomping is Keep, we keep the line break but the first line break
1335                 // that isn't stripped (since chomping isn't Strip in this branch) must
1336                 // be inserted _before_ the other line breaks.
1337                 if(chomping == Chomping.Keep)
1338                 {
1339                     reader_.sliceBuilder.insert(lineBreak, startLen);
1340                 }
1341                 // If chomping is not Keep, breaksTransaction was cancelled so we can
1342                 // directly write the first line break (as it isn't stripped - chomping
1343                 // is not Strip)
1344                 else
1345                 {
1346                     reader_.sliceBuilder.write(lineBreak);
1347                 }
1348             }
1349 
1350             char[] slice = reader_.sliceBuilder.finish();
1351             return scalarToken(startMark, endMark, slice, style);
1352         }
1353 
1354         /// Scan chomping and indentation indicators of a scalar token.
1355         ///
1356         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1357         Tuple!(Chomping, int) scanBlockScalarIndicators(const Mark startMark) @safe
1358         {
1359             auto chomping = Chomping.Clip;
1360             int increment = int.min;
1361             dchar c       = reader_.peek();
1362 
1363             /// Indicators can be in any order.
1364             if(getChomping(c, chomping))
1365             {
1366                 getIncrement(c, increment, startMark);
1367                 if(error_) { return tuple(Chomping.init, int.max); }
1368             }
1369             else
1370             {
1371                 const gotIncrement = getIncrement(c, increment, startMark);
1372                 if(error_)       { return tuple(Chomping.init, int.max); }
1373                 if(gotIncrement) { getChomping(c, chomping); }
1374             }
1375 
1376             if(" \0\n\r\u0085\u2028\u2029"d.canFind(c))
1377             {
1378                 return tuple(chomping, increment);
1379             }
1380             error("While scanning a block scalar", startMark,
1381                   expected("chomping or indentation indicator", c), reader_.mark);
1382             return tuple(Chomping.init, int.max);
1383         }
1384 
1385         /// Get chomping indicator, if detected. Return false otherwise.
1386         ///
1387         /// Used in scanBlockScalarIndicators.
1388         ///
1389         /// Params:
1390         ///
1391         /// c        = The character that may be a chomping indicator.
1392         /// chomping = Write the chomping value here, if detected.
1393         bool getChomping(ref dchar c, ref Chomping chomping) @safe
1394         {
1395             if(!"+-"d.canFind(c)) { return false; }
1396             chomping = c == '+' ? Chomping.Keep : Chomping.Strip;
1397             reader_.forward();
1398             c = reader_.peek();
1399             return true;
1400         }
1401 
1402         /// Get increment indicator, if detected. Return false otherwise.
1403         ///
1404         /// Used in scanBlockScalarIndicators.
1405         ///
1406         /// Params:
1407         ///
1408         /// c         = The character that may be an increment indicator.
1409         ///             If an increment indicator is detected, this will be updated to
1410         ///             the next character in the Reader.
1411         /// increment = Write the increment value here, if detected.
1412         /// startMark = Mark for error messages.
1413         ///
1414         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1415         bool getIncrement(ref dchar c, ref int increment, const Mark startMark) @safe
1416         {
1417             if(!c.isDigit) { return false; }
1418             // Convert a digit to integer.
1419             increment = c - '0';
1420             assert(increment < 10 && increment >= 0, "Digit has invalid value");
1421             if(increment > 0)
1422             {
1423                 reader_.forward();
1424                 c = reader_.peek();
1425                 return true;
1426             }
1427             error("While scanning a block scalar", startMark,
1428                   expected("indentation indicator in range 1-9", "0"), reader_.mark);
1429             return false;
1430         }
1431 
1432         /// Scan (and ignore) ignored line in a block scalar.
1433         ///
1434         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1435         void scanBlockScalarIgnoredLine(const Mark startMark) @safe
1436         {
1437             findNextNonSpace();
1438             if(reader_.peekByte()== '#') { scanToNextBreak(); }
1439 
1440             if(reader_.peek().isBreak)
1441             {
1442                 scanLineBreak();
1443                 return;
1444             }
1445             error("While scanning a block scalar", startMark,
1446                   expected("comment or line break", reader_.peek()), reader_.mark);
1447         }
1448 
1449         /// Scan indentation in a block scalar, returning line breaks, max indent and end mark.
1450         ///
1451         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1452         /// characters into that slice.
1453         Tuple!(uint, Mark) scanBlockScalarIndentationToSlice() @safe
1454         {
1455             uint maxIndent;
1456             Mark endMark = reader_.mark;
1457 
1458             while(" \n\r\u0085\u2028\u2029"d.canFind(reader_.peek()))
1459             {
1460                 if(reader_.peekByte() != ' ')
1461                 {
1462                     reader_.sliceBuilder.write(scanLineBreak());
1463                     endMark = reader_.mark;
1464                     continue;
1465                 }
1466                 reader_.forward();
1467                 maxIndent = max(reader_.column, maxIndent);
1468             }
1469 
1470             return tuple(maxIndent, endMark);
1471         }
1472 
1473         /// Scan line breaks at lower or specified indentation in a block scalar.
1474         ///
1475         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1476         /// characters into that slice.
1477         Mark scanBlockScalarBreaksToSlice(const uint indent) @safe
1478         {
1479             Mark endMark = reader_.mark;
1480 
1481             for(;;)
1482             {
1483                 while(reader_.column < indent && reader_.peekByte() == ' ') { reader_.forward(); }
1484                 if(!"\n\r\u0085\u2028\u2029"d.canFind(reader_.peek()))  { break; }
1485                 reader_.sliceBuilder.write(scanLineBreak());
1486                 endMark = reader_.mark;
1487             }
1488 
1489             return endMark;
1490         }
1491 
1492         /// Scan a qouted flow scalar token with specified quotes.
1493         ///
1494         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1495         Token scanFlowScalar(const ScalarStyle quotes) @safe
1496         {
1497             const startMark = reader_.mark;
1498             const quote     = reader_.get();
1499 
1500             reader_.sliceBuilder.begin();
1501             scope(exit) if(error_) { reader_.sliceBuilder.finish(); }
1502 
1503             scanFlowScalarNonSpacesToSlice(quotes, startMark);
1504             if(error_) { return Token.init; }
1505 
1506             while(reader_.peek() != quote)
1507             {
1508                 scanFlowScalarSpacesToSlice(startMark);
1509                 if(error_) { return Token.init; }
1510                 scanFlowScalarNonSpacesToSlice(quotes, startMark);
1511                 if(error_) { return Token.init; }
1512             }
1513             reader_.forward();
1514 
1515             auto slice = reader_.sliceBuilder.finish();
1516             return scalarToken(startMark, reader_.mark, slice, quotes);
1517         }
1518 
1519         /// Scan nonspace characters in a flow scalar.
1520         ///
1521         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1522         /// characters into that slice.
1523         ///
1524         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1525         void scanFlowScalarNonSpacesToSlice(const ScalarStyle quotes, const Mark startMark)
1526             @safe
1527         {
1528             for(;;) with(ScalarStyle)
1529             {
1530                 dchar c = reader_.peek();
1531 
1532                 size_t numCodePoints;
1533                 // This is an optimized way of writing:
1534                 // while(!search.canFind(reader_.peek(numCodePoints))) { ++numCodePoints; }
1535                 outer: for(size_t oldSliceLength;;)
1536                 {
1537                     // This will not necessarily make slice 32 chars longer, as not all
1538                     // code points are 1 char.
1539                     const char[] slice = reader_.slice(numCodePoints + 32);
1540                     if(slice.length == oldSliceLength)
1541                     {
1542                         error("While reading a flow scalar", startMark,
1543                               "reached end of file", reader_.mark);
1544                         return;
1545                     }
1546                     for(size_t i = oldSliceLength; i < slice.length;)
1547                     {
1548                         // slice is UTF-8 - need to decode
1549                         const ch = slice[i] < 0x80 ? slice[i++] : decode(slice, i);
1550                         if(ch.isFlowScalarBreakSpace) { break outer; }
1551                         ++numCodePoints;
1552                     }
1553                     oldSliceLength = slice.length;
1554                 }
1555 
1556                 reader_.sliceBuilder.write(reader_.get(numCodePoints));
1557 
1558                 c = reader_.peek();
1559                 if(quotes == SingleQuoted && c == '\'' && reader_.peek(1) == '\'')
1560                 {
1561                     reader_.forward(2);
1562                     reader_.sliceBuilder.write('\'');
1563                 }
1564                 else if((quotes == DoubleQuoted && c == '\'') ||
1565                         (quotes == SingleQuoted && "\"\\"d.canFind(c)))
1566                 {
1567                     reader_.forward();
1568                     reader_.sliceBuilder.write(c);
1569                 }
1570                 else if(quotes == DoubleQuoted && c == '\\')
1571                 {
1572                     reader_.forward();
1573                     c = reader_.peek();
1574                     if(dyaml.escapes.escapes.canFind(c))
1575                     {
1576                         reader_.forward();
1577                         // Escaping has been moved to Parser as it can't be done in
1578                         // place (in a slice) in case of '\P' and '\L' (very uncommon,
1579                         // but we don't want to break the spec)
1580                         char[2] escapeSequence = ['\\', cast(char)c];
1581                         reader_.sliceBuilder.write(escapeSequence);
1582                     }
1583                     else if(dyaml.escapes.escapeHexCodeList.canFind(c))
1584                     {
1585                         const hexLength = dyaml.escapes.escapeHexLength(c);
1586                         reader_.forward();
1587 
1588                         foreach(i; 0 .. hexLength) if(!reader_.peek(i).isHexDigit)
1589                         {
1590                             error("While scanning a double quoted scalar", startMark,
1591                                   expected("escape sequence of hexadecimal numbers",
1592                                            reader_.peek(i)), reader_.mark);
1593                             return;
1594                         }
1595                         char[] hex = reader_.get(hexLength);
1596                         char[2] escapeStart = ['\\', cast(char) c];
1597                         reader_.sliceBuilder.write(escapeStart);
1598                         reader_.sliceBuilder.write(hex);
1599                         // Note: This is just error checking; Parser does the actual
1600                         //       escaping (otherwise we could accidentally create an
1601                         //       escape sequence here that wasn't in input, breaking the
1602                         //       escaping code in parser, which is in parser because it
1603                         //       can't always be done in place)
1604                         try {
1605                             parse!int(hex, 16u);
1606                         }
1607                         catch (Exception)
1608                         {
1609                             error("While scanning a double quoted scalar", startMark,
1610                                   "overflow when parsing an escape sequence of " ~
1611                                   "hexadecimal numbers.", reader_.mark);
1612                             return;
1613                         }
1614                     }
1615                     else if("\n\r\u0085\u2028\u2029"d.canFind(c))
1616                     {
1617                         scanLineBreak();
1618                         scanFlowScalarBreaksToSlice(startMark);
1619                         if(error_) { return; }
1620                     }
1621                     else
1622                     {
1623                         error("While scanning a double quoted scalar", startMark,
1624                               buildMsg("found unsupported escape character", c),
1625                               reader_.mark);
1626                         return;
1627                     }
1628                 }
1629                 else { return; }
1630             }
1631         }
1632 
1633         /// Scan space characters in a flow scalar.
1634         ///
1635         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1636         /// spaces into that slice.
1637         ///
1638         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1639         void scanFlowScalarSpacesToSlice(const Mark startMark) @safe
1640         {
1641             // Increase length as long as we see whitespace.
1642             size_t length;
1643             while(" \t"d.canFind(reader_.peekByte(length))) { ++length; }
1644             auto whitespaces = reader_.prefixBytes(length);
1645 
1646             // Can check the last byte without striding because '\0' is ASCII
1647             const c = reader_.peek(length);
1648             if(c == '\0')
1649             {
1650                 error("While scanning a quoted scalar", startMark,
1651                       "found unexpected end of buffer", reader_.mark);
1652                 return;
1653             }
1654 
1655             // Spaces not followed by a line break.
1656             if(!"\n\r\u0085\u2028\u2029"d.canFind(c))
1657             {
1658                 reader_.forward(length);
1659                 reader_.sliceBuilder.write(whitespaces);
1660                 return;
1661             }
1662 
1663             // There's a line break after the spaces.
1664             reader_.forward(length);
1665             const lineBreak = scanLineBreak();
1666 
1667             if(lineBreak != '\n') { reader_.sliceBuilder.write(lineBreak); }
1668 
1669             // If we have extra line breaks after the first, scan them into the
1670             // slice.
1671             const bool extraBreaks = scanFlowScalarBreaksToSlice(startMark);
1672             if(error_) { return; }
1673 
1674             // No extra breaks, one normal line break. Replace it with a space.
1675             if(lineBreak == '\n' && !extraBreaks) { reader_.sliceBuilder.write(' '); }
1676         }
1677 
1678         /// Scan line breaks in a flow scalar.
1679         ///
1680         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1681         /// line breaks into that slice.
1682         ///
1683         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1684         bool scanFlowScalarBreaksToSlice(const Mark startMark) @safe
1685         {
1686             // True if at least one line break was found.
1687             bool anyBreaks;
1688             for(;;)
1689             {
1690                 // Instead of checking indentation, we check for document separators.
1691                 const prefix = reader_.prefix(3);
1692                 if((prefix == "---" || prefix == "...") &&
1693                    reader_.peek(3).isWhiteSpace)
1694                 {
1695                     error("While scanning a quoted scalar", startMark,
1696                           "found unexpected document separator", reader_.mark);
1697                     return false;
1698                 }
1699 
1700                 // Skip any whitespaces.
1701                 while(" \t"d.canFind(reader_.peekByte())) { reader_.forward(); }
1702 
1703                 // Encountered a non-whitespace non-linebreak character, so we're done.
1704                 if(!"\n\r\u0085\u2028\u2029"d.canFind(reader_.peek())) { break; }
1705 
1706                 const lineBreak = scanLineBreak();
1707                 anyBreaks = true;
1708                 reader_.sliceBuilder.write(lineBreak);
1709             }
1710             return anyBreaks;
1711         }
1712 
1713         /// Scan plain scalar token (no block, no quotes).
1714         ///
1715         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1716         Token scanPlain() @safe
1717         {
1718             // We keep track of the allowSimpleKey_ flag here.
1719             // Indentation rules are loosed for the flow context
1720             const startMark = reader_.mark;
1721             Mark endMark = startMark;
1722             const indent = indent_ + 1;
1723 
1724             // We allow zero indentation for scalars, but then we need to check for
1725             // document separators at the beginning of the line.
1726             // if(indent == 0) { indent = 1; }
1727 
1728             reader_.sliceBuilder.begin();
1729 
1730             alias Transaction = SliceBuilder.Transaction;
1731             Transaction spacesTransaction;
1732             // Stop at a comment.
1733             while(reader_.peekByte() != '#')
1734             {
1735                 // Scan the entire plain scalar.
1736                 size_t length;
1737                 dchar c = void;
1738                 // Moved the if() out of the loop for optimization.
1739                 if(flowLevel_ == 0)
1740                 {
1741                     c = reader_.peek(length);
1742                     for(;;)
1743                     {
1744                         const cNext = reader_.peek(length + 1);
1745                         if(c.isWhiteSpace ||
1746                            (c == ':' && cNext.isWhiteSpace))
1747                         {
1748                             break;
1749                         }
1750                         ++length;
1751                         c = cNext;
1752                     }
1753                 }
1754                 else
1755                 {
1756                     for(;;)
1757                     {
1758                         c = reader_.peek(length);
1759                         if(c.isWhiteSpace || ",:?[]{}"d.canFind(c))
1760                         {
1761                             break;
1762                         }
1763                         ++length;
1764                     }
1765                 }
1766 
1767                 // It's not clear what we should do with ':' in the flow context.
1768                 if(flowLevel_ > 0 && c == ':' &&
1769                    !reader_.peek(length + 1).isWhiteSpace &&
1770                    !",[]{}"d.canFind(reader_.peek(length + 1)))
1771                 {
1772                     // This is an error; throw the slice away.
1773                     spacesTransaction.commit();
1774                     reader_.sliceBuilder.finish();
1775                     reader_.forward(length);
1776                     error("While scanning a plain scalar", startMark,
1777                           "found unexpected ':' . Please check " ~
1778                           "http://pyyaml.org/wiki/YAMLColonInFlowContext for details.",
1779                           reader_.mark);
1780                     return Token.init;
1781                 }
1782 
1783                 if(length == 0) { break; }
1784 
1785                 allowSimpleKey_ = false;
1786 
1787                 reader_.sliceBuilder.write(reader_.get(length));
1788 
1789                 endMark = reader_.mark;
1790 
1791                 spacesTransaction.commit();
1792                 spacesTransaction = Transaction(&reader_.sliceBuilder);
1793 
1794                 const startLength = reader_.sliceBuilder.length;
1795                 scanPlainSpacesToSlice();
1796                 if(startLength == reader_.sliceBuilder.length ||
1797                    (flowLevel_ == 0 && reader_.column < indent))
1798                 {
1799                     break;
1800                 }
1801             }
1802 
1803             spacesTransaction.end();
1804             char[] slice = reader_.sliceBuilder.finish();
1805 
1806             return scalarToken(startMark, endMark, slice, ScalarStyle.Plain);
1807         }
1808 
1809         /// Scan spaces in a plain scalar.
1810         ///
1811         /// Assumes that the caller is building a slice in Reader, and puts the spaces
1812         /// into that slice.
1813         void scanPlainSpacesToSlice() @safe
1814         {
1815             // The specification is really confusing about tabs in plain scalars.
1816             // We just forbid them completely. Do not use tabs in YAML!
1817 
1818             // Get as many plain spaces as there are.
1819             size_t length;
1820             while(reader_.peekByte(length) == ' ') { ++length; }
1821             char[] whitespaces = reader_.prefixBytes(length);
1822             reader_.forward(length);
1823 
1824             const dchar c = reader_.peek();
1825             if(!c.isNSChar)
1826             {
1827                 // We have spaces, but no newline.
1828                 if(whitespaces.length > 0) { reader_.sliceBuilder.write(whitespaces); }
1829                 return;
1830             }
1831 
1832             // Newline after the spaces (if any)
1833             const lineBreak = scanLineBreak();
1834             allowSimpleKey_ = true;
1835 
1836             static bool end(Reader reader_) @safe pure
1837             {
1838                 const prefix = reader_.prefix(3);
1839                 return ("---" == prefix || "..." == prefix)
1840                         && " \t\0\n\r\u0085\u2028\u2029"d.canFind(reader_.peek(3));
1841             }
1842 
1843             if(end(reader_)) { return; }
1844 
1845             bool extraBreaks;
1846 
1847             alias Transaction = SliceBuilder.Transaction;
1848             auto transaction = Transaction(&reader_.sliceBuilder);
1849             if(lineBreak != '\n') { reader_.sliceBuilder.write(lineBreak); }
1850             while(reader_.peek().isNSChar)
1851             {
1852                 if(reader_.peekByte() == ' ') { reader_.forward(); }
1853                 else
1854                 {
1855                     const lBreak = scanLineBreak();
1856                     extraBreaks  = true;
1857                     reader_.sliceBuilder.write(lBreak);
1858 
1859                     if(end(reader_)) { return; }
1860                 }
1861             }
1862             transaction.commit();
1863 
1864             // No line breaks, only a space.
1865             if(lineBreak == '\n' && !extraBreaks) { reader_.sliceBuilder.write(' '); }
1866         }
1867 
1868         /// Scan handle of a tag token.
1869         ///
1870         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1871         /// characters into that slice.
1872         ///
1873         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1874         void scanTagHandleToSlice(string name)(const Mark startMark)
1875         {
1876             dchar c = reader_.peek();
1877             enum contextMsg = "While scanning a " ~ name;
1878             if(c != '!')
1879             {
1880                 error(contextMsg, startMark, expected("'!'", c), reader_.mark);
1881                 return;
1882             }
1883 
1884             uint length = 1;
1885             c = reader_.peek(length);
1886             if(c != ' ')
1887             {
1888                 while(c.isAlphaNum || "-_"d.canFind(c))
1889                 {
1890                     ++length;
1891                     c = reader_.peek(length);
1892                 }
1893                 if(c != '!')
1894                 {
1895                     reader_.forward(length);
1896                     error(contextMsg, startMark, expected("'!'", c), reader_.mark);
1897                     return;
1898                 }
1899                 ++length;
1900             }
1901 
1902             reader_.sliceBuilder.write(reader_.get(length));
1903         }
1904 
1905         /// Scan URI in a tag token.
1906         ///
1907         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1908         /// characters into that slice.
1909         ///
1910         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1911         void scanTagURIToSlice(string name)(const Mark startMark)
1912         {
1913             // Note: we do not check if URI is well-formed.
1914             dchar c = reader_.peek();
1915             const startLen = reader_.sliceBuilder.length;
1916             {
1917                 uint length;
1918                 while(c.isAlphaNum || c.isURIChar)
1919                 {
1920                     if(c == '%')
1921                     {
1922                         auto chars = reader_.get(length);
1923                         reader_.sliceBuilder.write(chars);
1924                         length = 0;
1925                         scanURIEscapesToSlice!name(startMark);
1926                         if(error_) { return; }
1927                     }
1928                     else { ++length; }
1929                     c = reader_.peek(length);
1930                 }
1931                 if(length > 0)
1932                 {
1933                     auto chars = reader_.get(length);
1934                     reader_.sliceBuilder.write(chars);
1935                     length = 0;
1936                 }
1937             }
1938             // OK if we scanned something, error otherwise.
1939             if(reader_.sliceBuilder.length > startLen) { return; }
1940 
1941             enum contextMsg = "While parsing a " ~ name;
1942             error(contextMsg, startMark, expected("URI", c), reader_.mark);
1943         }
1944 
1945         // Not @nogc yet because std.utf.decode is not @nogc
1946         /// Scan URI escape sequences.
1947         ///
1948         /// Assumes that the caller is building a slice in Reader, and puts the scanned
1949         /// characters into that slice.
1950         ///
1951         /// In case of an error, error_ is set. Use throwIfError() to handle this.
1952         void scanURIEscapesToSlice(string name)(const Mark startMark)
1953         {
1954             // URI escapes encode a UTF-8 string. We store UTF-8 code units here for
1955             // decoding into UTF-32.
1956             char[4] bytes;
1957             size_t bytesUsed;
1958 
1959             // Get one dchar by decoding data from bytes.
1960             //
1961             // This is probably slow, but simple and URI escapes are extremely uncommon
1962             // in YAML.
1963             //
1964             // Returns the number of bytes used by the dchar in bytes on success,
1965             // size_t.max on failure.
1966             static size_t getDchar(char[] bytes, Reader reader_) @safe
1967             {
1968                 size_t nextChar;
1969                 dchar c;
1970                 if(bytes[0] < 0x80)
1971                 {
1972                     c = bytes[0];
1973                     ++nextChar;
1974                 }
1975                 else
1976                 {
1977                     c = decode(bytes[], nextChar);
1978                 }
1979                 reader_.sliceBuilder.write(c);
1980                 if(bytes.length - nextChar > 0)
1981                 {
1982                     copy(bytes[nextChar..bytes.length], bytes[0..bytes.length-nextChar]);
1983                 }
1984                 return bytes.length - nextChar;
1985             }
1986 
1987             enum contextMsg = "While scanning a " ~ name;
1988             while(reader_.peekByte() == '%')
1989             {
1990                 reader_.forward();
1991                 if(bytesUsed == bytes.length)
1992                 {
1993                     bytesUsed = getDchar(bytes[], reader_);
1994                     if(bytesUsed == size_t.max)
1995                     {
1996                         error(contextMsg, startMark,
1997                                 "Invalid UTF-8 data encoded in URI escape sequence",
1998                                 reader_.mark);
1999                         return;
2000                     }
2001                 }
2002 
2003                 char b = 0;
2004                 uint mult = 16;
2005                 // Converting 2 hexadecimal digits to a byte.
2006                 foreach(k; 0 .. 2)
2007                 {
2008                     const dchar c = reader_.peek(k);
2009                     if(!c.isHexDigit)
2010                     {
2011                         auto msg = expected("URI escape sequence of 2 hexadecimal " ~
2012                                             "numbers", c);
2013                         error(contextMsg, startMark, msg, reader_.mark);
2014                         return;
2015                     }
2016 
2017                     uint digit;
2018                     if(c - '0' < 10)     { digit = c - '0'; }
2019                     else if(c - 'A' < 6) { digit = c - 'A'; }
2020                     else if(c - 'a' < 6) { digit = c - 'a'; }
2021                     else                 { assert(false); }
2022                     b += mult * digit;
2023                     mult /= 16;
2024                 }
2025                 bytes[bytesUsed++] = b;
2026 
2027                 reader_.forward(2);
2028             }
2029 
2030             bytesUsed = getDchar(bytes[0 .. bytesUsed], reader_);
2031         }
2032 
2033 
2034         /// Scan a line break, if any.
2035         ///
2036         /// Transforms:
2037         ///   '\r\n'      :   '\n'
2038         ///   '\r'        :   '\n'
2039         ///   '\n'        :   '\n'
2040         ///   '\u0085'    :   '\n'
2041         ///   '\u2028'    :   '\u2028'
2042         ///   '\u2029     :   '\u2029'
2043         ///   no break    :   '\0'
2044         dchar scanLineBreak() @safe
2045         {
2046             // Fast path for ASCII line breaks.
2047             const b = reader_.peekByte();
2048             if(b < 0x80)
2049             {
2050                 if(b == '\n' || b == '\r')
2051                 {
2052                     if(reader_.prefix(2) == "\r\n") { reader_.forward(2); }
2053                     else { reader_.forward(); }
2054                     return '\n';
2055                 }
2056                 return '\0';
2057             }
2058 
2059             const c = reader_.peek();
2060             if(c == '\x85')
2061             {
2062                 reader_.forward();
2063                 return '\n';
2064             }
2065             if(c == '\u2028' || c == '\u2029')
2066             {
2067                 reader_.forward();
2068                 return c;
2069             }
2070             return '\0';
2071         }
2072 }
2073 
2074 private:
2075 
2076 /// A nothrow function that converts a dchar[] to a string.
2077 string utf32To8(C)(C[] str)
2078     if(is(Unqual!C == dchar))
2079 {
2080     try                    { return str.to!string; }
2081     catch(ConvException e) { assert(false, "Unexpected invalid UTF-32 string"); }
2082     catch(Exception e)     { assert(false, "Unexpected exception during UTF-8 encoding"); }
2083 }