1 
2 //          Copyright Ferdinand Majerech 2011.
3 // Distributed under the Boost Software License, Version 1.0.
4 //    (See accompanying file LICENSE_1_0.txt or copy at
5 //          http://www.boost.org/LICENSE_1_0.txt)
6 
7 /**
8  * Class that processes YAML mappings, sequences and scalars into nodes.
9  * This can be used to add custom data types. A tutorial can be found
10  * $(LINK2 https://dlang-community.github.io/D-YAML/, here).
11  */
12 module dyaml.constructor;
13 
14 
15 import std.array;
16 import std.algorithm;
17 import std.base64;
18 import std.container;
19 import std.conv;
20 import std.datetime;
21 import std.exception;
22 import std.regex;
23 import std.string;
24 import std.typecons;
25 import std.utf;
26 
27 import dyaml.node;
28 import dyaml.exception;
29 import dyaml.style;
30 
31 package:
32 
33 /** Constructs YAML values.
34  *
35  * Each YAML scalar, sequence or mapping has a tag specifying its data type.
36  * Constructor uses user-specifyable functions to create a node of desired
37  * data type from a scalar, sequence or mapping.
38  *
39  *
40  * Each of these functions is associated with a tag, and can process either
41  * a scalar, a sequence, or a mapping. The constructor passes each value to
42  * the function with corresponding tag, which then returns the resulting value
43  * that can be stored in a node.
44  *
45  * If a tag is detected with no known constructor function, it is considered an error.
46  */
47 /*
48  * Construct a node.
49  *
50  * Params:  start = Start position of the node.
51  *          end   = End position of the node.
52  *          tag   = Tag (data type) of the node.
53  *          value = Value to construct node from (string, nodes or pairs).
54  *          style = Style of the node (scalar or collection style).
55  *
56  * Returns: Constructed node.
57  */
58 Node constructNode(T)(const Mark start, const Mark end, const string tag,
59                 T value) @safe
60     if((is(T : string) || is(T == Node[]) || is(T == Node.Pair[])))
61 {
62     Node newNode;
63     noreturn error(string a, string b)()
64     {
65         enum msg = "Error constructing " ~ T.stringof ~ ": Only " ~ a ~ " can be " ~ b;
66         throw new ConstructorException(msg, start, "end", end);
67     }
68     switch(tag)
69     {
70         case "tag:yaml.org,2002:null":
71             newNode = Node(YAMLNull(), tag);
72             break;
73         case "tag:yaml.org,2002:bool":
74             static if(is(T == string))
75             {
76                 newNode = Node(constructBool(value, start, end), tag);
77                 break;
78             }
79             else error!("scalars", "bools");
80         case "tag:yaml.org,2002:int":
81             static if(is(T == string))
82             {
83                 newNode = Node(constructLong(value, start, end), tag);
84                 break;
85             }
86             else error!("scalars", "ints");
87         case "tag:yaml.org,2002:float":
88             static if(is(T == string))
89             {
90                 newNode = Node(constructReal(value, start, end), tag);
91                 break;
92             }
93             else error!("scalars", "floats");
94         case "tag:yaml.org,2002:binary":
95             static if(is(T == string))
96             {
97                 newNode = Node(constructBinary(value, start, end), tag);
98                 break;
99             }
100             else error!("scalars", "binary data");
101         case "tag:yaml.org,2002:timestamp":
102             static if(is(T == string))
103             {
104                 newNode = Node(constructTimestamp(value, start, end), tag);
105                 break;
106             }
107             else error!("scalars", "timestamps");
108         case "tag:yaml.org,2002:str":
109             static if(is(T == string))
110             {
111                 newNode = Node(constructString(value, start, end), tag);
112                 break;
113             }
114             else error!("scalars", "strings");
115         case "tag:yaml.org,2002:value":
116             static if(is(T == string))
117             {
118                 newNode = Node(constructString(value, start, end), tag);
119                 break;
120             }
121             else error!("scalars", "values");
122         case "tag:yaml.org,2002:omap":
123             static if(is(T == Node[]))
124             {
125                 newNode = Node(constructOrderedMap(value, start, end), tag);
126                 break;
127             }
128             else error!("sequences", "ordered maps");
129         case "tag:yaml.org,2002:pairs":
130             static if(is(T == Node[]))
131             {
132                 newNode = Node(constructPairs(value, start, end), tag);
133                 break;
134             }
135             else error!("sequences", "pairs");
136         case "tag:yaml.org,2002:set":
137             static if(is(T == Node.Pair[]))
138             {
139                 newNode = Node(constructSet(value, start, end), tag);
140                 break;
141             }
142             else error!("mappings", "sets");
143         case "tag:yaml.org,2002:seq":
144             static if(is(T == Node[]))
145             {
146                 newNode = Node(constructSequence(value, start, end), tag);
147                 break;
148             }
149             else error!("sequences", "sequences");
150         case "tag:yaml.org,2002:map":
151             static if(is(T == Node.Pair[]))
152             {
153                 newNode = Node(constructMap(value, start, end), tag);
154                 break;
155             }
156             else error!("mappings", "maps");
157         case "tag:yaml.org,2002:merge":
158             newNode = Node(YAMLMerge(), tag);
159             break;
160         default:
161             newNode = Node(value, tag);
162             break;
163     }
164 
165     newNode.startMark_ = start;
166 
167     return newNode;
168 }
169 
170 private:
171 // Construct a boolean _node.
172 bool constructBool(const string str, const Mark start, const Mark end) @safe
173 {
174     string value = str.toLower();
175     if(value.among!("yes", "true", "on")){return true;}
176     if(value.among!("no", "false", "off")){return false;}
177     throw new ConstructorException("Invalid boolean value: " ~ str, start, "ending at", end);
178 }
179 
180 @safe unittest
181 {
182     assert(collectException!ConstructorException(constructBool("foo", Mark("unittest", 1, 0), Mark("unittest", 1, 3))).msg == "Invalid boolean value: foo");
183 }
184 
185 // Construct an integer (long) _node.
186 long constructLong(const string str, const Mark start, const Mark end) @safe
187 {
188     string value = str.replace("_", "");
189     const char c = value[0];
190     const long sign = c != '-' ? 1 : -1;
191     if(c == '-' || c == '+')
192     {
193         value = value[1 .. $];
194     }
195 
196     enforce(value != "", new ConstructorException("Unable to parse integer value: " ~ str, start, "ending at", end));
197 
198     long result;
199     try
200     {
201         //Zero.
202         if(value == "0")               {result = cast(long)0;}
203         //Binary.
204         else if(value.startsWith("0b")){result = sign * to!int(value[2 .. $], 2);}
205         //Hexadecimal.
206         else if(value.startsWith("0x")){result = sign * to!int(value[2 .. $], 16);}
207         //Octal.
208         else if(value[0] == '0')       {result = sign * to!int(value, 8);}
209         //Sexagesimal.
210         else if(value.canFind(":"))
211         {
212             long val;
213             long base = 1;
214             foreach_reverse(digit; value.split(":"))
215             {
216                 val += to!long(digit) * base;
217                 base *= 60;
218             }
219             result = sign * val;
220         }
221         //Decimal.
222         else{result = sign * to!long(value);}
223     }
224     catch(ConvException e)
225     {
226         throw new ConstructorException("Unable to parse integer value: " ~ str, start, "ending at", end);
227     }
228 
229     return result;
230 }
231 @safe unittest
232 {
233     string canonical   = "685230";
234     string decimal     = "+685_230";
235     string octal       = "02472256";
236     string hexadecimal = "0x_0A_74_AE";
237     string binary      = "0b1010_0111_0100_1010_1110";
238     string sexagesimal = "190:20:30";
239 
240     assert(685230 == constructLong(canonical, Mark.init, Mark.init));
241     assert(685230 == constructLong(decimal, Mark.init, Mark.init));
242     assert(685230 == constructLong(octal, Mark.init, Mark.init));
243     assert(685230 == constructLong(hexadecimal, Mark.init, Mark.init));
244     assert(685230 == constructLong(binary, Mark.init, Mark.init));
245     assert(685230 == constructLong(sexagesimal, Mark.init, Mark.init));
246     assert(collectException!ConstructorException(constructLong("+", Mark.init, Mark.init)).msg == "Unable to parse integer value: +");
247     assert(collectException!ConstructorException(constructLong("0xINVALID", Mark.init, Mark.init)).msg == "Unable to parse integer value: 0xINVALID");
248 }
249 
250 // Construct a floating point (real) _node.
251 real constructReal(const string str, const Mark start, const Mark end) @safe
252 {
253     string value = str.replace("_", "").toLower();
254     const char c = value[0];
255     const real sign = c != '-' ? 1.0 : -1.0;
256     if(c == '-' || c == '+')
257     {
258         value = value[1 .. $];
259     }
260 
261     enforce(value != "" && value != "nan" && value != "inf" && value != "-inf",
262             new ConstructorException("Unable to parse float value: \"" ~ str ~ "\"", start, "ending at", end));
263 
264     real result;
265     try
266     {
267         //Infinity.
268         if     (value == ".inf"){result = sign * real.infinity;}
269         //Not a Number.
270         else if(value == ".nan"){result = real.nan;}
271         //Sexagesimal.
272         else if(value.canFind(":"))
273         {
274             real val = 0.0;
275             real base = 1.0;
276             foreach_reverse(digit; value.split(":"))
277             {
278                 val += to!real(digit) * base;
279                 base *= 60.0;
280             }
281             result = sign * val;
282         }
283         //Plain floating point.
284         else{result = sign * to!real(value);}
285     }
286     catch(ConvException e)
287     {
288         throw new ConstructorException("Unable to parse float value: \"" ~ str ~ "\"", start, "ending at", end);
289     }
290 
291     return result;
292 }
293 @safe unittest
294 {
295     bool eq(real a, real b, real epsilon = 0.2) @safe
296     {
297         return a >= (b - epsilon) && a <= (b + epsilon);
298     }
299 
300     string canonical   = "6.8523015e+5";
301     string exponential = "685.230_15e+03";
302     string fixed       = "685_230.15";
303     string sexagesimal = "190:20:30.15";
304     string negativeInf = "-.inf";
305     string NaN         = ".NaN";
306 
307     assert(eq(685230.15, constructReal(canonical, Mark.init, Mark.init)));
308     assert(eq(685230.15, constructReal(exponential, Mark.init, Mark.init)));
309     assert(eq(685230.15, constructReal(fixed, Mark.init, Mark.init)));
310     assert(eq(685230.15, constructReal(sexagesimal, Mark.init, Mark.init)));
311     assert(eq(-real.infinity, constructReal(negativeInf, Mark.init, Mark.init)));
312     assert(to!string(constructReal(NaN, Mark.init, Mark.init)) == "nan");
313     assert(collectException!ConstructorException(constructReal("+", Mark.init, Mark.init)).msg == "Unable to parse float value: \"+\"");
314     assert(collectException!ConstructorException(constructReal("74.invalid", Mark.init, Mark.init)).msg == "Unable to parse float value: \"74.invalid\"");
315 }
316 
317 // Construct a binary (base64) _node.
318 ubyte[] constructBinary(const string value, const Mark start, const Mark end) @safe
319 {
320     import std.ascii : newline;
321     import std.array : array;
322 
323     // For an unknown reason, this must be nested to work (compiler bug?).
324     try
325     {
326         return Base64.decode(value.representation.filter!(c => !newline.canFind(c)).array);
327     }
328     catch(Base64Exception e)
329     {
330         throw new ConstructorException("Unable to decode base64 value: " ~ e.msg, start, "ending at", end);
331     }
332 }
333 
334 @safe unittest
335 {
336     auto test = "The Answer: 42".representation;
337     char[] buffer;
338     buffer.length = 256;
339     string input = Base64.encode(test, buffer).idup;
340     const value = constructBinary(input, Mark.init, Mark.init);
341     assert(value == test);
342     assert(value == [84, 104, 101, 32, 65, 110, 115, 119, 101, 114, 58, 32, 52, 50]);
343 }
344 
345 // Construct a timestamp (SysTime) _node.
346 SysTime constructTimestamp(const string str, const Mark start, const Mark end) @safe
347 {
348     string value = str;
349 
350     auto YMDRegexp = regex("^([0-9][0-9][0-9][0-9])-([0-9][0-9]?)-([0-9][0-9]?)");
351     auto HMSRegexp = regex("^[Tt \t]+([0-9][0-9]?):([0-9][0-9]):([0-9][0-9])(\\.[0-9]*)?");
352     auto TZRegexp  = regex("^[ \t]*Z|([-+][0-9][0-9]?)(:[0-9][0-9])?");
353 
354     try
355     {
356         // First, get year, month and day.
357         auto matches = match(value, YMDRegexp);
358 
359         enforce(!matches.empty,
360                 new Exception("Unable to parse timestamp value: " ~ value));
361 
362         auto captures = matches.front.captures;
363         const year  = to!int(captures[1]);
364         const month = to!int(captures[2]);
365         const day   = to!int(captures[3]);
366 
367         // If available, get hour, minute, second and fraction, if present.
368         value = matches.front.post;
369         matches  = match(value, HMSRegexp);
370         if(matches.empty)
371         {
372             return SysTime(DateTime(year, month, day), UTC());
373         }
374 
375         captures = matches.front.captures;
376         const hour            = to!int(captures[1]);
377         const minute          = to!int(captures[2]);
378         const second          = to!int(captures[3]);
379         const hectonanosecond = cast(int)(to!real("0" ~ captures[4]) * 10_000_000);
380 
381         // If available, get timezone.
382         value = matches.front.post;
383         matches = match(value, TZRegexp);
384         if(matches.empty || matches.front.captures[0] == "Z")
385         {
386             // No timezone.
387             return SysTime(DateTime(year, month, day, hour, minute, second),
388                            hectonanosecond.dur!"hnsecs", UTC());
389         }
390 
391         // We have a timezone, so parse it.
392         captures = matches.front.captures;
393         int sign    = 1;
394         int tzHours;
395         if(!captures[1].empty)
396         {
397             if(captures[1][0] == '-') {sign = -1;}
398             tzHours   = to!int(captures[1][1 .. $]);
399         }
400         const tzMinutes = (!captures[2].empty) ? to!int(captures[2][1 .. $]) : 0;
401         const tzOffset  = dur!"minutes"(sign * (60 * tzHours + tzMinutes));
402 
403         return SysTime(DateTime(year, month, day, hour, minute, second),
404                        hectonanosecond.dur!"hnsecs",
405                        new immutable SimpleTimeZone(tzOffset));
406     }
407     catch(ConvException e)
408     {
409         throw new Exception("Unable to parse timestamp value " ~ value ~ " : " ~ e.msg);
410     }
411     catch(DateTimeException e)
412     {
413         throw new Exception("Invalid timestamp value " ~ value ~ " : " ~ e.msg);
414     }
415 
416     assert(false, "This code should never be reached");
417 }
418 @safe unittest
419 {
420     string timestamp(string value)
421     {
422         return constructTimestamp(value, Mark.init, Mark.init).toISOString();
423     }
424 
425     string canonical      = "2001-12-15T02:59:43.1Z";
426     string iso8601        = "2001-12-14t21:59:43.10-05:00";
427     string spaceSeparated = "2001-12-14 21:59:43.10 -5";
428     string noTZ           = "2001-12-15 2:59:43.10";
429     string noFraction     = "2001-12-15 2:59:43";
430     string ymd            = "2002-12-14";
431 
432     assert(timestamp(canonical)      == "20011215T025943.1Z");
433     //avoiding float conversion errors
434     assert(timestamp(iso8601)        == "20011214T215943.0999999-05:00" ||
435            timestamp(iso8601)        == "20011214T215943.1-05:00");
436     assert(timestamp(spaceSeparated) == "20011214T215943.0999999-05:00" ||
437            timestamp(spaceSeparated) == "20011214T215943.1-05:00");
438     assert(timestamp(noTZ)           == "20011215T025943.0999999Z" ||
439            timestamp(noTZ)           == "20011215T025943.1Z");
440     assert(timestamp(noFraction)     == "20011215T025943Z");
441     assert(timestamp(ymd)            == "20021214T000000Z");
442 }
443 
444 // Construct a string _node.
445 string constructString(const string str, const Mark start, const Mark end) @safe
446 {
447     return str;
448 }
449 
450 // Convert a sequence of single-element mappings into a sequence of pairs.
451 Node.Pair[] getPairs(string type)(const Node[] nodes) @safe
452 {
453     enum msg = "While constructing " ~ type ~ ", expected a mapping with single element";
454     Node.Pair[] pairs;
455     pairs.reserve(nodes.length);
456     foreach(node; nodes)
457     {
458         enforce(node.nodeID == NodeID.mapping && node.length == 1,
459                 new ConstructorException(msg, node.startMark));
460 
461         pairs ~= node.as!(Node.Pair[]);
462     }
463 
464     return pairs;
465 }
466 
467 // Construct an ordered map (ordered sequence of key:value pairs without duplicates) _node.
468 Node.Pair[] constructOrderedMap(const Node[] nodes, const Mark start, const Mark end) @safe
469 {
470     auto pairs = getPairs!"an ordered map"(nodes);
471 
472     //Detect duplicates.
473     //TODO this should be replaced by something with deterministic memory allocation.
474     auto keys = new RedBlackTree!Node();
475     foreach(ref pair; pairs)
476     {
477         auto foundMatch = keys.equalRange(pair.key);
478         enforce(foundMatch.empty, new ConstructorException(
479             "Duplicate entry in an ordered map", pair.key.startMark,
480             "first occurrence here", foundMatch.front.startMark));
481         keys.insert(pair.key);
482     }
483     return pairs;
484 }
485 @safe unittest
486 {
487     uint lines;
488     Node[] alternateTypes(uint length) @safe
489     {
490         Node[] pairs;
491         foreach(long i; 0 .. length)
492         {
493             auto pair = (i % 2) ? Node.Pair(i.to!string, i) : Node.Pair(i, i.to!string);
494             pair.key.startMark_ = Mark("unittest", lines++, 0);
495             pairs ~= Node([pair]);
496         }
497         return pairs;
498     }
499 
500     Node[] sameType(uint length) @safe
501     {
502         Node[] pairs;
503         foreach(long i; 0 .. length)
504         {
505             auto pair = Node.Pair(i.to!string, i);
506             pair.key.startMark_ = Mark("unittest", lines++, 0);
507             pairs ~= Node([pair]);
508         }
509         return pairs;
510     }
511 
512     assert(collectException!ConstructorException(constructOrderedMap(alternateTypes(8) ~ alternateTypes(2), Mark.init, Mark.init)).message == "Duplicate entry in an ordered map\nunittest:9,1\nfirst occurrence here: unittest:1,1");
513     assertNotThrown(constructOrderedMap(alternateTypes(8), Mark.init, Mark.init));
514     assert(collectException!ConstructorException(constructOrderedMap(sameType(64) ~ sameType(16), Mark.init, Mark.init)).message == "Duplicate entry in an ordered map\nunittest:83,1\nfirst occurrence here: unittest:19,1");
515     assert(collectException!ConstructorException(constructOrderedMap(alternateTypes(64) ~ alternateTypes(16), Mark.init, Mark.init)).message == "Duplicate entry in an ordered map\nunittest:163,1\nfirst occurrence here: unittest:99,1");
516     assertNotThrown(constructOrderedMap(sameType(64), Mark.init, Mark.init));
517     assertNotThrown(constructOrderedMap(alternateTypes(64), Mark.init, Mark.init));
518     assert(collectException!ConstructorException(constructOrderedMap([Node([Node(1), Node(2)])], Mark.init, Mark.init)).message == "While constructing an ordered map, expected a mapping with single element\n<unknown>:1,1");
519 }
520 
521 // Construct a pairs (ordered sequence of key: value pairs allowing duplicates) _node.
522 Node.Pair[] constructPairs(const Node[] nodes, const Mark start, const Mark end) @safe
523 {
524     return getPairs!"pairs"(nodes);
525 }
526 
527 // Construct a set _node.
528 Node[] constructSet(const Node.Pair[] pairs, const Mark start, const Mark end) @safe
529 {
530     // In future, the map here should be replaced with something with deterministic
531     // memory allocation if possible.
532     // Detect duplicates.
533     ubyte[Node] map;
534     Node[] nodes;
535     nodes.reserve(pairs.length);
536     foreach(pair; pairs)
537     {
538         enforce((pair.key in map) is null, new Exception("Duplicate entry in a set"));
539         map[pair.key] = 0;
540         nodes ~= pair.key;
541     }
542 
543     return nodes;
544 }
545 @safe unittest
546 {
547     Node.Pair[] set(uint length) @safe
548     {
549         Node.Pair[] pairs;
550         foreach(long i; 0 .. length)
551         {
552             pairs ~= Node.Pair(i.to!string, YAMLNull());
553         }
554 
555         return pairs;
556     }
557 
558     auto DuplicatesShort   = set(8) ~ set(2);
559     auto noDuplicatesShort = set(8);
560     auto DuplicatesLong    = set(64) ~ set(4);
561     auto noDuplicatesLong  = set(64);
562 
563     bool eq(Node.Pair[] a, Node[] b)
564     {
565         if(a.length != b.length){return false;}
566         foreach(i; 0 .. a.length)
567         {
568             if(a[i].key != b[i])
569             {
570                 return false;
571             }
572         }
573         return true;
574     }
575 
576     auto nodeDuplicatesShort   = DuplicatesShort.dup;
577     auto nodeNoDuplicatesShort = noDuplicatesShort.dup;
578     auto nodeDuplicatesLong    = DuplicatesLong.dup;
579     auto nodeNoDuplicatesLong  = noDuplicatesLong.dup;
580 
581     assertThrown(constructSet(nodeDuplicatesShort, Mark.init, Mark.init));
582     assertNotThrown(constructSet(nodeNoDuplicatesShort, Mark.init, Mark.init));
583     assertThrown(constructSet(nodeDuplicatesLong, Mark.init, Mark.init));
584     assertNotThrown(constructSet(nodeNoDuplicatesLong, Mark.init, Mark.init));
585 }
586 
587 // Construct a sequence (array) _node.
588 Node[] constructSequence(Node[] nodes, const Mark start, const Mark end) @safe
589 {
590     return nodes;
591 }
592 
593 // Construct an unordered map (unordered set of key:value _pairs without duplicates) _node.
594 Node.Pair[] constructMap(Node.Pair[] pairs, const Mark start, const Mark end) @safe
595 {
596     //Detect duplicates.
597     //TODO this should be replaced by something with deterministic memory allocation.
598     auto keys = new RedBlackTree!Node();
599     foreach(ref pair; pairs)
600     {
601         enforce(!(pair.key in keys),
602                 new Exception("Duplicate entry in a map: " ~ pair.key.debugString()));
603         keys.insert(pair.key);
604     }
605     return pairs;
606 }