1 
2 //          Copyright Ferdinand Majerech 2011.
3 // Distributed under the Boost Software License, Version 1.0.
4 //    (See accompanying file LICENSE_1_0.txt or copy at
5 //          http://www.boost.org/LICENSE_1_0.txt)
6 
7 /**
8  * Class that processes YAML mappings, sequences and scalars into nodes.
9  * This can be used to add custom data types. A tutorial can be found
10  * $(LINK2 https://dlang-community.github.io/D-YAML/, here).
11  */
12 module dyaml.constructor;
13 
14 
15 import std.array;
16 import std.algorithm;
17 import std.base64;
18 import std.container;
19 import std.conv;
20 import std.datetime;
21 import std.exception;
22 import std.regex;
23 import std..string;
24 import std.typecons;
25 import std.utf;
26 
27 import dyaml.node;
28 import dyaml.exception;
29 import dyaml.style;
30 
31 package:
32 
33 // Exception thrown at constructor errors.
34 class ConstructorException : YAMLException
35 {
36     /// Construct a ConstructorException.
37     ///
38     /// Params:  msg   = Error message.
39     ///          start = Start position of the error context.
40     ///          end   = End position of the error context.
41     this(string msg, Mark start, Mark end, string file = __FILE__, size_t line = __LINE__)
42         @safe pure nothrow
43     {
44         super(msg ~ "\nstart: " ~ start.toString() ~ "\nend: " ~ end.toString(),
45               file, line);
46     }
47 }
48 
49 /** Constructs YAML values.
50  *
51  * Each YAML scalar, sequence or mapping has a tag specifying its data type.
52  * Constructor uses user-specifyable functions to create a node of desired
53  * data type from a scalar, sequence or mapping.
54  *
55  *
56  * Each of these functions is associated with a tag, and can process either
57  * a scalar, a sequence, or a mapping. The constructor passes each value to
58  * the function with corresponding tag, which then returns the resulting value
59  * that can be stored in a node.
60  *
61  * If a tag is detected with no known constructor function, it is considered an error.
62  */
63 /*
64  * Construct a node.
65  *
66  * Params:  start = Start position of the node.
67  *          end   = End position of the node.
68  *          tag   = Tag (data type) of the node.
69  *          value = Value to construct node from (string, nodes or pairs).
70  *          style = Style of the node (scalar or collection style).
71  *
72  * Returns: Constructed node.
73  */
74 Node constructNode(T)(const Mark start, const Mark end, const string tag,
75                 T value) @safe
76     if((is(T : string) || is(T == Node[]) || is(T == Node.Pair[])))
77 {
78     Node newNode;
79     try
80     {
81         switch(tag)
82         {
83             case "tag:yaml.org,2002:null":
84                 newNode = Node(YAMLNull(), tag);
85                 break;
86             case "tag:yaml.org,2002:bool":
87                 static if(is(T == string))
88                 {
89                     newNode = Node(constructBool(value), tag);
90                     break;
91                 }
92                 else throw new Exception("Only scalars can be bools");
93             case "tag:yaml.org,2002:int":
94                 static if(is(T == string))
95                 {
96                     newNode = Node(constructLong(value), tag);
97                     break;
98                 }
99                 else throw new Exception("Only scalars can be ints");
100             case "tag:yaml.org,2002:float":
101                 static if(is(T == string))
102                 {
103                     newNode = Node(constructReal(value), tag);
104                     break;
105                 }
106                 else throw new Exception("Only scalars can be floats");
107             case "tag:yaml.org,2002:binary":
108                 static if(is(T == string))
109                 {
110                     newNode = Node(constructBinary(value), tag);
111                     break;
112                 }
113                 else throw new Exception("Only scalars can be binary data");
114             case "tag:yaml.org,2002:timestamp":
115                 static if(is(T == string))
116                 {
117                     newNode = Node(constructTimestamp(value), tag);
118                     break;
119                 }
120                 else throw new Exception("Only scalars can be timestamps");
121             case "tag:yaml.org,2002:str":
122                 static if(is(T == string))
123                 {
124                     newNode = Node(constructString(value), tag);
125                     break;
126                 }
127                 else throw new Exception("Only scalars can be strings");
128             case "tag:yaml.org,2002:value":
129                 static if(is(T == string))
130                 {
131                     newNode = Node(constructString(value), tag);
132                     break;
133                 }
134                 else throw new Exception("Only scalars can be values");
135             case "tag:yaml.org,2002:omap":
136                 static if(is(T == Node[]))
137                 {
138                     newNode = Node(constructOrderedMap(value), tag);
139                     break;
140                 }
141                 else throw new Exception("Only sequences can be ordered maps");
142             case "tag:yaml.org,2002:pairs":
143                 static if(is(T == Node[]))
144                 {
145                     newNode = Node(constructPairs(value), tag);
146                     break;
147                 }
148                 else throw new Exception("Only sequences can be pairs");
149             case "tag:yaml.org,2002:set":
150                 static if(is(T == Node.Pair[]))
151                 {
152                     newNode = Node(constructSet(value), tag);
153                     break;
154                 }
155                 else throw new Exception("Only mappings can be sets");
156             case "tag:yaml.org,2002:seq":
157                 static if(is(T == Node[]))
158                 {
159                     newNode = Node(constructSequence(value), tag);
160                     break;
161                 }
162                 else throw new Exception("Only sequences can be sequences");
163             case "tag:yaml.org,2002:map":
164                 static if(is(T == Node.Pair[]))
165                 {
166                     newNode = Node(constructMap(value), tag);
167                     break;
168                 }
169                 else throw new Exception("Only mappings can be maps");
170             case "tag:yaml.org,2002:merge":
171                 newNode = Node(YAMLMerge(), tag);
172                 break;
173             default:
174                 newNode = Node(value, tag);
175                 break;
176         }
177     }
178     catch(Exception e)
179     {
180         throw new ConstructorException("Error constructing " ~ typeid(T).toString()
181                         ~ ":\n" ~ e.msg, start, end);
182     }
183 
184     newNode.startMark_ = start;
185 
186     return newNode;
187 }
188 
189 private:
190 // Construct a boolean _node.
191 bool constructBool(const string str) @safe
192 {
193     string value = str.toLower();
194     if(value.among!("yes", "true", "on")){return true;}
195     if(value.among!("no", "false", "off")){return false;}
196     throw new Exception("Unable to parse boolean value: " ~ value);
197 }
198 
199 // Construct an integer (long) _node.
200 long constructLong(const string str) @safe
201 {
202     string value = str.replace("_", "");
203     const char c = value[0];
204     const long sign = c != '-' ? 1 : -1;
205     if(c == '-' || c == '+')
206     {
207         value = value[1 .. $];
208     }
209 
210     enforce(value != "", new Exception("Unable to parse float value: " ~ value));
211 
212     long result;
213     try
214     {
215         //Zero.
216         if(value == "0")               {result = cast(long)0;}
217         //Binary.
218         else if(value.startsWith("0b")){result = sign * to!int(value[2 .. $], 2);}
219         //Hexadecimal.
220         else if(value.startsWith("0x")){result = sign * to!int(value[2 .. $], 16);}
221         //Octal.
222         else if(value[0] == '0')       {result = sign * to!int(value, 8);}
223         //Sexagesimal.
224         else if(value.canFind(":"))
225         {
226             long val;
227             long base = 1;
228             foreach_reverse(digit; value.split(":"))
229             {
230                 val += to!long(digit) * base;
231                 base *= 60;
232             }
233             result = sign * val;
234         }
235         //Decimal.
236         else{result = sign * to!long(value);}
237     }
238     catch(ConvException e)
239     {
240         throw new Exception("Unable to parse integer value: " ~ value);
241     }
242 
243     return result;
244 }
245 @safe unittest
246 {
247     string canonical   = "685230";
248     string decimal     = "+685_230";
249     string octal       = "02472256";
250     string hexadecimal = "0x_0A_74_AE";
251     string binary      = "0b1010_0111_0100_1010_1110";
252     string sexagesimal = "190:20:30";
253 
254     assert(685230 == constructLong(canonical));
255     assert(685230 == constructLong(decimal));
256     assert(685230 == constructLong(octal));
257     assert(685230 == constructLong(hexadecimal));
258     assert(685230 == constructLong(binary));
259     assert(685230 == constructLong(sexagesimal));
260 }
261 
262 // Construct a floating point (real) _node.
263 real constructReal(const string str) @safe
264 {
265     string value = str.replace("_", "").toLower();
266     const char c = value[0];
267     const real sign = c != '-' ? 1.0 : -1.0;
268     if(c == '-' || c == '+')
269     {
270         value = value[1 .. $];
271     }
272 
273     enforce(value != "" && value != "nan" && value != "inf" && value != "-inf",
274             new Exception("Unable to parse float value: " ~ value));
275 
276     real result;
277     try
278     {
279         //Infinity.
280         if     (value == ".inf"){result = sign * real.infinity;}
281         //Not a Number.
282         else if(value == ".nan"){result = real.nan;}
283         //Sexagesimal.
284         else if(value.canFind(":"))
285         {
286             real val = 0.0;
287             real base = 1.0;
288             foreach_reverse(digit; value.split(":"))
289             {
290                 val += to!real(digit) * base;
291                 base *= 60.0;
292             }
293             result = sign * val;
294         }
295         //Plain floating point.
296         else{result = sign * to!real(value);}
297     }
298     catch(ConvException e)
299     {
300         throw new Exception("Unable to parse float value: \"" ~ value ~ "\"");
301     }
302 
303     return result;
304 }
305 @safe unittest
306 {
307     bool eq(real a, real b, real epsilon = 0.2) @safe
308     {
309         return a >= (b - epsilon) && a <= (b + epsilon);
310     }
311 
312     string canonical   = "6.8523015e+5";
313     string exponential = "685.230_15e+03";
314     string fixed       = "685_230.15";
315     string sexagesimal = "190:20:30.15";
316     string negativeInf = "-.inf";
317     string NaN         = ".NaN";
318 
319     assert(eq(685230.15, constructReal(canonical)));
320     assert(eq(685230.15, constructReal(exponential)));
321     assert(eq(685230.15, constructReal(fixed)));
322     assert(eq(685230.15, constructReal(sexagesimal)));
323     assert(eq(-real.infinity, constructReal(negativeInf)));
324     assert(to!string(constructReal(NaN)) == "nan");
325 }
326 
327 // Construct a binary (base64) _node.
328 ubyte[] constructBinary(const string value) @safe
329 {
330     import std.ascii : newline;
331     import std.array : array;
332 
333     // For an unknown reason, this must be nested to work (compiler bug?).
334     try
335     {
336         return Base64.decode(value.representation.filter!(c => !newline.canFind(c)).array);
337     }
338     catch(Base64Exception e)
339     {
340         throw new Exception("Unable to decode base64 value: " ~ e.msg);
341     }
342 }
343 
344 @safe unittest
345 {
346     auto test = "The Answer: 42".representation;
347     char[] buffer;
348     buffer.length = 256;
349     string input = Base64.encode(test, buffer).idup;
350     const value = constructBinary(input);
351     assert(value == test);
352     assert(value == [84, 104, 101, 32, 65, 110, 115, 119, 101, 114, 58, 32, 52, 50]);
353 }
354 
355 // Construct a timestamp (SysTime) _node.
356 SysTime constructTimestamp(const string str) @safe
357 {
358     string value = str;
359 
360     auto YMDRegexp = regex("^([0-9][0-9][0-9][0-9])-([0-9][0-9]?)-([0-9][0-9]?)");
361     auto HMSRegexp = regex("^[Tt \t]+([0-9][0-9]?):([0-9][0-9]):([0-9][0-9])(\\.[0-9]*)?");
362     auto TZRegexp  = regex("^[ \t]*Z|([-+][0-9][0-9]?)(:[0-9][0-9])?");
363 
364     try
365     {
366         // First, get year, month and day.
367         auto matches = match(value, YMDRegexp);
368 
369         enforce(!matches.empty,
370                 new Exception("Unable to parse timestamp value: " ~ value));
371 
372         auto captures = matches.front.captures;
373         const year  = to!int(captures[1]);
374         const month = to!int(captures[2]);
375         const day   = to!int(captures[3]);
376 
377         // If available, get hour, minute, second and fraction, if present.
378         value = matches.front.post;
379         matches  = match(value, HMSRegexp);
380         if(matches.empty)
381         {
382             return SysTime(DateTime(year, month, day), UTC());
383         }
384 
385         captures = matches.front.captures;
386         const hour            = to!int(captures[1]);
387         const minute          = to!int(captures[2]);
388         const second          = to!int(captures[3]);
389         const hectonanosecond = cast(int)(to!real("0" ~ captures[4]) * 10_000_000);
390 
391         // If available, get timezone.
392         value = matches.front.post;
393         matches = match(value, TZRegexp);
394         if(matches.empty || matches.front.captures[0] == "Z")
395         {
396             // No timezone.
397             return SysTime(DateTime(year, month, day, hour, minute, second),
398                            hectonanosecond.dur!"hnsecs", UTC());
399         }
400 
401         // We have a timezone, so parse it.
402         captures = matches.front.captures;
403         int sign    = 1;
404         int tzHours;
405         if(!captures[1].empty)
406         {
407             if(captures[1][0] == '-') {sign = -1;}
408             tzHours   = to!int(captures[1][1 .. $]);
409         }
410         const tzMinutes = (!captures[2].empty) ? to!int(captures[2][1 .. $]) : 0;
411         const tzOffset  = dur!"minutes"(sign * (60 * tzHours + tzMinutes));
412 
413         return SysTime(DateTime(year, month, day, hour, minute, second),
414                        hectonanosecond.dur!"hnsecs",
415                        new immutable SimpleTimeZone(tzOffset));
416     }
417     catch(ConvException e)
418     {
419         throw new Exception("Unable to parse timestamp value " ~ value ~ " : " ~ e.msg);
420     }
421     catch(DateTimeException e)
422     {
423         throw new Exception("Invalid timestamp value " ~ value ~ " : " ~ e.msg);
424     }
425 
426     assert(false, "This code should never be reached");
427 }
428 @safe unittest
429 {
430     string timestamp(string value)
431     {
432         return constructTimestamp(value).toISOString();
433     }
434 
435     string canonical      = "2001-12-15T02:59:43.1Z";
436     string iso8601        = "2001-12-14t21:59:43.10-05:00";
437     string spaceSeparated = "2001-12-14 21:59:43.10 -5";
438     string noTZ           = "2001-12-15 2:59:43.10";
439     string noFraction     = "2001-12-15 2:59:43";
440     string ymd            = "2002-12-14";
441 
442     assert(timestamp(canonical)      == "20011215T025943.1Z");
443     //avoiding float conversion errors
444     assert(timestamp(iso8601)        == "20011214T215943.0999999-05:00" ||
445            timestamp(iso8601)        == "20011214T215943.1-05:00");
446     assert(timestamp(spaceSeparated) == "20011214T215943.0999999-05:00" ||
447            timestamp(spaceSeparated) == "20011214T215943.1-05:00");
448     assert(timestamp(noTZ)           == "20011215T025943.0999999Z" ||
449            timestamp(noTZ)           == "20011215T025943.1Z");
450     assert(timestamp(noFraction)     == "20011215T025943Z");
451     assert(timestamp(ymd)            == "20021214T000000Z");
452 }
453 
454 // Construct a string _node.
455 string constructString(const string str) @safe
456 {
457     return str;
458 }
459 
460 // Convert a sequence of single-element mappings into a sequence of pairs.
461 Node.Pair[] getPairs(string type, const Node[] nodes) @safe
462 {
463     Node.Pair[] pairs;
464     pairs.reserve(nodes.length);
465     foreach(node; nodes)
466     {
467         enforce(node.nodeID == NodeID.mapping && node.length == 1,
468                 new Exception("While constructing " ~ type ~
469                               ", expected a mapping with single element"));
470 
471         pairs ~= node.as!(Node.Pair[]);
472     }
473 
474     return pairs;
475 }
476 
477 // Construct an ordered map (ordered sequence of key:value pairs without duplicates) _node.
478 Node.Pair[] constructOrderedMap(const Node[] nodes) @safe
479 {
480     auto pairs = getPairs("ordered map", nodes);
481 
482     //Detect duplicates.
483     //TODO this should be replaced by something with deterministic memory allocation.
484     auto keys = redBlackTree!Node();
485     foreach(ref pair; pairs)
486     {
487         enforce(!(pair.key in keys),
488                 new Exception("Duplicate entry in an ordered map: "
489                               ~ pair.key.debugString()));
490         keys.insert(pair.key);
491     }
492     return pairs;
493 }
494 @safe unittest
495 {
496     Node[] alternateTypes(uint length) @safe
497     {
498         Node[] pairs;
499         foreach(long i; 0 .. length)
500         {
501             auto pair = (i % 2) ? Node.Pair(i.to!string, i) : Node.Pair(i, i.to!string);
502             pairs ~= Node([pair]);
503         }
504         return pairs;
505     }
506 
507     Node[] sameType(uint length) @safe
508     {
509         Node[] pairs;
510         foreach(long i; 0 .. length)
511         {
512             auto pair = Node.Pair(i.to!string, i);
513             pairs ~= Node([pair]);
514         }
515         return pairs;
516     }
517 
518     assertThrown(constructOrderedMap(alternateTypes(8) ~ alternateTypes(2)));
519     assertNotThrown(constructOrderedMap(alternateTypes(8)));
520     assertThrown(constructOrderedMap(sameType(64) ~ sameType(16)));
521     assertThrown(constructOrderedMap(alternateTypes(64) ~ alternateTypes(16)));
522     assertNotThrown(constructOrderedMap(sameType(64)));
523     assertNotThrown(constructOrderedMap(alternateTypes(64)));
524 }
525 
526 // Construct a pairs (ordered sequence of key: value pairs allowing duplicates) _node.
527 Node.Pair[] constructPairs(const Node[] nodes) @safe
528 {
529     return getPairs("pairs", nodes);
530 }
531 
532 // Construct a set _node.
533 Node[] constructSet(const Node.Pair[] pairs) @safe
534 {
535     // In future, the map here should be replaced with something with deterministic
536     // memory allocation if possible.
537     // Detect duplicates.
538     ubyte[Node] map;
539     Node[] nodes;
540     nodes.reserve(pairs.length);
541     foreach(pair; pairs)
542     {
543         enforce((pair.key in map) is null, new Exception("Duplicate entry in a set"));
544         map[pair.key] = 0;
545         nodes ~= pair.key;
546     }
547 
548     return nodes;
549 }
550 @safe unittest
551 {
552     Node.Pair[] set(uint length) @safe
553     {
554         Node.Pair[] pairs;
555         foreach(long i; 0 .. length)
556         {
557             pairs ~= Node.Pair(i.to!string, YAMLNull());
558         }
559 
560         return pairs;
561     }
562 
563     auto DuplicatesShort   = set(8) ~ set(2);
564     auto noDuplicatesShort = set(8);
565     auto DuplicatesLong    = set(64) ~ set(4);
566     auto noDuplicatesLong  = set(64);
567 
568     bool eq(Node.Pair[] a, Node[] b)
569     {
570         if(a.length != b.length){return false;}
571         foreach(i; 0 .. a.length)
572         {
573             if(a[i].key != b[i])
574             {
575                 return false;
576             }
577         }
578         return true;
579     }
580 
581     auto nodeDuplicatesShort   = DuplicatesShort.dup;
582     auto nodeNoDuplicatesShort = noDuplicatesShort.dup;
583     auto nodeDuplicatesLong    = DuplicatesLong.dup;
584     auto nodeNoDuplicatesLong  = noDuplicatesLong.dup;
585 
586     assertThrown(constructSet(nodeDuplicatesShort));
587     assertNotThrown(constructSet(nodeNoDuplicatesShort));
588     assertThrown(constructSet(nodeDuplicatesLong));
589     assertNotThrown(constructSet(nodeNoDuplicatesLong));
590 }
591 
592 // Construct a sequence (array) _node.
593 Node[] constructSequence(Node[] nodes) @safe
594 {
595     return nodes;
596 }
597 
598 // Construct an unordered map (unordered set of key:value _pairs without duplicates) _node.
599 Node.Pair[] constructMap(Node.Pair[] pairs) @safe
600 {
601     //Detect duplicates.
602     //TODO this should be replaced by something with deterministic memory allocation.
603     auto keys = redBlackTree!Node();
604     foreach(ref pair; pairs)
605     {
606         enforce(!(pair.key in keys),
607                 new Exception("Duplicate entry in a map: " ~ pair.key.debugString()));
608         keys.insert(pair.key);
609     }
610     return pairs;
611 }