1 2 // Copyright Ferdinand Majerech 2011. 3 // Distributed under the Boost Software License, Version 1.0. 4 // (See accompanying file LICENSE_1_0.txt or copy at 5 // http://www.boost.org/LICENSE_1_0.txt) 6 7 /** 8 * Class that processes YAML mappings, sequences and scalars into nodes. 9 * This can be used to add custom data types. A tutorial can be found 10 * $(LINK2 https://dlang-community.github.io/D-YAML/, here). 11 */ 12 module dyaml.constructor; 13 14 15 import std.array; 16 import std.algorithm; 17 import std.base64; 18 import std.container; 19 import std.conv; 20 import std.datetime; 21 import std.exception; 22 import std.regex; 23 import std.string; 24 import std.typecons; 25 import std.utf; 26 27 import dyaml.node; 28 import dyaml.exception; 29 import dyaml.style; 30 31 package: 32 33 // Exception thrown at constructor errors. 34 class ConstructorException : YAMLException 35 { 36 /// Construct a ConstructorException. 37 /// 38 /// Params: msg = Error message. 39 /// start = Start position of the error context. 40 /// end = End position of the error context. 41 this(string msg, Mark start, Mark end, string file = __FILE__, size_t line = __LINE__) 42 @safe pure nothrow 43 { 44 super(msg ~ "\nstart: " ~ start.toString() ~ "\nend: " ~ end.toString(), 45 file, line); 46 } 47 } 48 49 /** Constructs YAML values. 50 * 51 * Each YAML scalar, sequence or mapping has a tag specifying its data type. 52 * Constructor uses user-specifyable functions to create a node of desired 53 * data type from a scalar, sequence or mapping. 54 * 55 * 56 * Each of these functions is associated with a tag, and can process either 57 * a scalar, a sequence, or a mapping. The constructor passes each value to 58 * the function with corresponding tag, which then returns the resulting value 59 * that can be stored in a node. 60 * 61 * If a tag is detected with no known constructor function, it is considered an error. 62 */ 63 /* 64 * Construct a node. 65 * 66 * Params: start = Start position of the node. 67 * end = End position of the node. 68 * tag = Tag (data type) of the node. 69 * value = Value to construct node from (string, nodes or pairs). 70 * style = Style of the node (scalar or collection style). 71 * 72 * Returns: Constructed node. 73 */ 74 Node constructNode(T)(const Mark start, const Mark end, const string tag, 75 T value) @safe 76 if((is(T : string) || is(T == Node[]) || is(T == Node.Pair[]))) 77 { 78 Node newNode; 79 try 80 { 81 switch(tag) 82 { 83 case "tag:yaml.org,2002:null": 84 newNode = Node(YAMLNull(), tag); 85 break; 86 case "tag:yaml.org,2002:bool": 87 static if(is(T == string)) 88 { 89 newNode = Node(constructBool(value), tag); 90 break; 91 } 92 else throw new Exception("Only scalars can be bools"); 93 case "tag:yaml.org,2002:int": 94 static if(is(T == string)) 95 { 96 newNode = Node(constructLong(value), tag); 97 break; 98 } 99 else throw new Exception("Only scalars can be ints"); 100 case "tag:yaml.org,2002:float": 101 static if(is(T == string)) 102 { 103 newNode = Node(constructReal(value), tag); 104 break; 105 } 106 else throw new Exception("Only scalars can be floats"); 107 case "tag:yaml.org,2002:binary": 108 static if(is(T == string)) 109 { 110 newNode = Node(constructBinary(value), tag); 111 break; 112 } 113 else throw new Exception("Only scalars can be binary data"); 114 case "tag:yaml.org,2002:timestamp": 115 static if(is(T == string)) 116 { 117 newNode = Node(constructTimestamp(value), tag); 118 break; 119 } 120 else throw new Exception("Only scalars can be timestamps"); 121 case "tag:yaml.org,2002:str": 122 static if(is(T == string)) 123 { 124 newNode = Node(constructString(value), tag); 125 break; 126 } 127 else throw new Exception("Only scalars can be strings"); 128 case "tag:yaml.org,2002:value": 129 static if(is(T == string)) 130 { 131 newNode = Node(constructString(value), tag); 132 break; 133 } 134 else throw new Exception("Only scalars can be values"); 135 case "tag:yaml.org,2002:omap": 136 static if(is(T == Node[])) 137 { 138 newNode = Node(constructOrderedMap(value), tag); 139 break; 140 } 141 else throw new Exception("Only sequences can be ordered maps"); 142 case "tag:yaml.org,2002:pairs": 143 static if(is(T == Node[])) 144 { 145 newNode = Node(constructPairs(value), tag); 146 break; 147 } 148 else throw new Exception("Only sequences can be pairs"); 149 case "tag:yaml.org,2002:set": 150 static if(is(T == Node.Pair[])) 151 { 152 newNode = Node(constructSet(value), tag); 153 break; 154 } 155 else throw new Exception("Only mappings can be sets"); 156 case "tag:yaml.org,2002:seq": 157 static if(is(T == Node[])) 158 { 159 newNode = Node(constructSequence(value), tag); 160 break; 161 } 162 else throw new Exception("Only sequences can be sequences"); 163 case "tag:yaml.org,2002:map": 164 static if(is(T == Node.Pair[])) 165 { 166 newNode = Node(constructMap(value), tag); 167 break; 168 } 169 else throw new Exception("Only mappings can be maps"); 170 case "tag:yaml.org,2002:merge": 171 newNode = Node(YAMLMerge(), tag); 172 break; 173 default: 174 newNode = Node(value, tag); 175 break; 176 } 177 } 178 catch(Exception e) 179 { 180 throw new ConstructorException("Error constructing " ~ typeid(T).toString() 181 ~ ":\n" ~ e.msg, start, end); 182 } 183 184 newNode.startMark_ = start; 185 186 return newNode; 187 } 188 189 private: 190 // Construct a boolean _node. 191 bool constructBool(const string str) @safe 192 { 193 string value = str.toLower(); 194 if(value.among!("yes", "true", "on")){return true;} 195 if(value.among!("no", "false", "off")){return false;} 196 throw new Exception("Unable to parse boolean value: " ~ value); 197 } 198 199 // Construct an integer (long) _node. 200 long constructLong(const string str) @safe 201 { 202 string value = str.replace("_", ""); 203 const char c = value[0]; 204 const long sign = c != '-' ? 1 : -1; 205 if(c == '-' || c == '+') 206 { 207 value = value[1 .. $]; 208 } 209 210 enforce(value != "", new Exception("Unable to parse float value: " ~ value)); 211 212 long result; 213 try 214 { 215 //Zero. 216 if(value == "0") {result = cast(long)0;} 217 //Binary. 218 else if(value.startsWith("0b")){result = sign * to!int(value[2 .. $], 2);} 219 //Hexadecimal. 220 else if(value.startsWith("0x")){result = sign * to!int(value[2 .. $], 16);} 221 //Octal. 222 else if(value[0] == '0') {result = sign * to!int(value, 8);} 223 //Sexagesimal. 224 else if(value.canFind(":")) 225 { 226 long val; 227 long base = 1; 228 foreach_reverse(digit; value.split(":")) 229 { 230 val += to!long(digit) * base; 231 base *= 60; 232 } 233 result = sign * val; 234 } 235 //Decimal. 236 else{result = sign * to!long(value);} 237 } 238 catch(ConvException e) 239 { 240 throw new Exception("Unable to parse integer value: " ~ value); 241 } 242 243 return result; 244 } 245 @safe unittest 246 { 247 string canonical = "685230"; 248 string decimal = "+685_230"; 249 string octal = "02472256"; 250 string hexadecimal = "0x_0A_74_AE"; 251 string binary = "0b1010_0111_0100_1010_1110"; 252 string sexagesimal = "190:20:30"; 253 254 assert(685230 == constructLong(canonical)); 255 assert(685230 == constructLong(decimal)); 256 assert(685230 == constructLong(octal)); 257 assert(685230 == constructLong(hexadecimal)); 258 assert(685230 == constructLong(binary)); 259 assert(685230 == constructLong(sexagesimal)); 260 } 261 262 // Construct a floating point (real) _node. 263 real constructReal(const string str) @safe 264 { 265 string value = str.replace("_", "").toLower(); 266 const char c = value[0]; 267 const real sign = c != '-' ? 1.0 : -1.0; 268 if(c == '-' || c == '+') 269 { 270 value = value[1 .. $]; 271 } 272 273 enforce(value != "" && value != "nan" && value != "inf" && value != "-inf", 274 new Exception("Unable to parse float value: " ~ value)); 275 276 real result; 277 try 278 { 279 //Infinity. 280 if (value == ".inf"){result = sign * real.infinity;} 281 //Not a Number. 282 else if(value == ".nan"){result = real.nan;} 283 //Sexagesimal. 284 else if(value.canFind(":")) 285 { 286 real val = 0.0; 287 real base = 1.0; 288 foreach_reverse(digit; value.split(":")) 289 { 290 val += to!real(digit) * base; 291 base *= 60.0; 292 } 293 result = sign * val; 294 } 295 //Plain floating point. 296 else{result = sign * to!real(value);} 297 } 298 catch(ConvException e) 299 { 300 throw new Exception("Unable to parse float value: \"" ~ value ~ "\""); 301 } 302 303 return result; 304 } 305 @safe unittest 306 { 307 bool eq(real a, real b, real epsilon = 0.2) @safe 308 { 309 return a >= (b - epsilon) && a <= (b + epsilon); 310 } 311 312 string canonical = "6.8523015e+5"; 313 string exponential = "685.230_15e+03"; 314 string fixed = "685_230.15"; 315 string sexagesimal = "190:20:30.15"; 316 string negativeInf = "-.inf"; 317 string NaN = ".NaN"; 318 319 assert(eq(685230.15, constructReal(canonical))); 320 assert(eq(685230.15, constructReal(exponential))); 321 assert(eq(685230.15, constructReal(fixed))); 322 assert(eq(685230.15, constructReal(sexagesimal))); 323 assert(eq(-real.infinity, constructReal(negativeInf))); 324 assert(to!string(constructReal(NaN)) == "nan"); 325 } 326 327 // Construct a binary (base64) _node. 328 ubyte[] constructBinary(const string value) @safe 329 { 330 import std.ascii : newline; 331 import std.array : array; 332 333 // For an unknown reason, this must be nested to work (compiler bug?). 334 try 335 { 336 return Base64.decode(value.representation.filter!(c => !newline.canFind(c)).array); 337 } 338 catch(Base64Exception e) 339 { 340 throw new Exception("Unable to decode base64 value: " ~ e.msg); 341 } 342 } 343 344 @safe unittest 345 { 346 auto test = "The Answer: 42".representation; 347 char[] buffer; 348 buffer.length = 256; 349 string input = Base64.encode(test, buffer).idup; 350 const value = constructBinary(input); 351 assert(value == test); 352 assert(value == [84, 104, 101, 32, 65, 110, 115, 119, 101, 114, 58, 32, 52, 50]); 353 } 354 355 // Construct a timestamp (SysTime) _node. 356 SysTime constructTimestamp(const string str) @safe 357 { 358 string value = str; 359 360 auto YMDRegexp = regex("^([0-9][0-9][0-9][0-9])-([0-9][0-9]?)-([0-9][0-9]?)"); 361 auto HMSRegexp = regex("^[Tt \t]+([0-9][0-9]?):([0-9][0-9]):([0-9][0-9])(\\.[0-9]*)?"); 362 auto TZRegexp = regex("^[ \t]*Z|([-+][0-9][0-9]?)(:[0-9][0-9])?"); 363 364 try 365 { 366 // First, get year, month and day. 367 auto matches = match(value, YMDRegexp); 368 369 enforce(!matches.empty, 370 new Exception("Unable to parse timestamp value: " ~ value)); 371 372 auto captures = matches.front.captures; 373 const year = to!int(captures[1]); 374 const month = to!int(captures[2]); 375 const day = to!int(captures[3]); 376 377 // If available, get hour, minute, second and fraction, if present. 378 value = matches.front.post; 379 matches = match(value, HMSRegexp); 380 if(matches.empty) 381 { 382 return SysTime(DateTime(year, month, day), UTC()); 383 } 384 385 captures = matches.front.captures; 386 const hour = to!int(captures[1]); 387 const minute = to!int(captures[2]); 388 const second = to!int(captures[3]); 389 const hectonanosecond = cast(int)(to!real("0" ~ captures[4]) * 10_000_000); 390 391 // If available, get timezone. 392 value = matches.front.post; 393 matches = match(value, TZRegexp); 394 if(matches.empty || matches.front.captures[0] == "Z") 395 { 396 // No timezone. 397 return SysTime(DateTime(year, month, day, hour, minute, second), 398 hectonanosecond.dur!"hnsecs", UTC()); 399 } 400 401 // We have a timezone, so parse it. 402 captures = matches.front.captures; 403 int sign = 1; 404 int tzHours; 405 if(!captures[1].empty) 406 { 407 if(captures[1][0] == '-') {sign = -1;} 408 tzHours = to!int(captures[1][1 .. $]); 409 } 410 const tzMinutes = (!captures[2].empty) ? to!int(captures[2][1 .. $]) : 0; 411 const tzOffset = dur!"minutes"(sign * (60 * tzHours + tzMinutes)); 412 413 return SysTime(DateTime(year, month, day, hour, minute, second), 414 hectonanosecond.dur!"hnsecs", 415 new immutable SimpleTimeZone(tzOffset)); 416 } 417 catch(ConvException e) 418 { 419 throw new Exception("Unable to parse timestamp value " ~ value ~ " : " ~ e.msg); 420 } 421 catch(DateTimeException e) 422 { 423 throw new Exception("Invalid timestamp value " ~ value ~ " : " ~ e.msg); 424 } 425 426 assert(false, "This code should never be reached"); 427 } 428 @safe unittest 429 { 430 string timestamp(string value) 431 { 432 return constructTimestamp(value).toISOString(); 433 } 434 435 string canonical = "2001-12-15T02:59:43.1Z"; 436 string iso8601 = "2001-12-14t21:59:43.10-05:00"; 437 string spaceSeparated = "2001-12-14 21:59:43.10 -5"; 438 string noTZ = "2001-12-15 2:59:43.10"; 439 string noFraction = "2001-12-15 2:59:43"; 440 string ymd = "2002-12-14"; 441 442 assert(timestamp(canonical) == "20011215T025943.1Z"); 443 //avoiding float conversion errors 444 assert(timestamp(iso8601) == "20011214T215943.0999999-05:00" || 445 timestamp(iso8601) == "20011214T215943.1-05:00"); 446 assert(timestamp(spaceSeparated) == "20011214T215943.0999999-05:00" || 447 timestamp(spaceSeparated) == "20011214T215943.1-05:00"); 448 assert(timestamp(noTZ) == "20011215T025943.0999999Z" || 449 timestamp(noTZ) == "20011215T025943.1Z"); 450 assert(timestamp(noFraction) == "20011215T025943Z"); 451 assert(timestamp(ymd) == "20021214T000000Z"); 452 } 453 454 // Construct a string _node. 455 string constructString(const string str) @safe 456 { 457 return str; 458 } 459 460 // Convert a sequence of single-element mappings into a sequence of pairs. 461 Node.Pair[] getPairs(string type, const Node[] nodes) @safe 462 { 463 Node.Pair[] pairs; 464 pairs.reserve(nodes.length); 465 foreach(node; nodes) 466 { 467 enforce(node.nodeID == NodeID.mapping && node.length == 1, 468 new Exception("While constructing " ~ type ~ 469 ", expected a mapping with single element")); 470 471 pairs ~= node.as!(Node.Pair[]); 472 } 473 474 return pairs; 475 } 476 477 // Construct an ordered map (ordered sequence of key:value pairs without duplicates) _node. 478 Node.Pair[] constructOrderedMap(const Node[] nodes) @safe 479 { 480 auto pairs = getPairs("ordered map", nodes); 481 482 //Detect duplicates. 483 //TODO this should be replaced by something with deterministic memory allocation. 484 auto keys = new RedBlackTree!Node(); 485 foreach(ref pair; pairs) 486 { 487 enforce(!(pair.key in keys), 488 new Exception("Duplicate entry in an ordered map: " 489 ~ pair.key.debugString())); 490 keys.insert(pair.key); 491 } 492 return pairs; 493 } 494 @safe unittest 495 { 496 Node[] alternateTypes(uint length) @safe 497 { 498 Node[] pairs; 499 foreach(long i; 0 .. length) 500 { 501 auto pair = (i % 2) ? Node.Pair(i.to!string, i) : Node.Pair(i, i.to!string); 502 pairs ~= Node([pair]); 503 } 504 return pairs; 505 } 506 507 Node[] sameType(uint length) @safe 508 { 509 Node[] pairs; 510 foreach(long i; 0 .. length) 511 { 512 auto pair = Node.Pair(i.to!string, i); 513 pairs ~= Node([pair]); 514 } 515 return pairs; 516 } 517 518 assertThrown(constructOrderedMap(alternateTypes(8) ~ alternateTypes(2))); 519 assertNotThrown(constructOrderedMap(alternateTypes(8))); 520 assertThrown(constructOrderedMap(sameType(64) ~ sameType(16))); 521 assertThrown(constructOrderedMap(alternateTypes(64) ~ alternateTypes(16))); 522 assertNotThrown(constructOrderedMap(sameType(64))); 523 assertNotThrown(constructOrderedMap(alternateTypes(64))); 524 } 525 526 // Construct a pairs (ordered sequence of key: value pairs allowing duplicates) _node. 527 Node.Pair[] constructPairs(const Node[] nodes) @safe 528 { 529 return getPairs("pairs", nodes); 530 } 531 532 // Construct a set _node. 533 Node[] constructSet(const Node.Pair[] pairs) @safe 534 { 535 // In future, the map here should be replaced with something with deterministic 536 // memory allocation if possible. 537 // Detect duplicates. 538 ubyte[Node] map; 539 Node[] nodes; 540 nodes.reserve(pairs.length); 541 foreach(pair; pairs) 542 { 543 enforce((pair.key in map) is null, new Exception("Duplicate entry in a set")); 544 map[pair.key] = 0; 545 nodes ~= pair.key; 546 } 547 548 return nodes; 549 } 550 @safe unittest 551 { 552 Node.Pair[] set(uint length) @safe 553 { 554 Node.Pair[] pairs; 555 foreach(long i; 0 .. length) 556 { 557 pairs ~= Node.Pair(i.to!string, YAMLNull()); 558 } 559 560 return pairs; 561 } 562 563 auto DuplicatesShort = set(8) ~ set(2); 564 auto noDuplicatesShort = set(8); 565 auto DuplicatesLong = set(64) ~ set(4); 566 auto noDuplicatesLong = set(64); 567 568 bool eq(Node.Pair[] a, Node[] b) 569 { 570 if(a.length != b.length){return false;} 571 foreach(i; 0 .. a.length) 572 { 573 if(a[i].key != b[i]) 574 { 575 return false; 576 } 577 } 578 return true; 579 } 580 581 auto nodeDuplicatesShort = DuplicatesShort.dup; 582 auto nodeNoDuplicatesShort = noDuplicatesShort.dup; 583 auto nodeDuplicatesLong = DuplicatesLong.dup; 584 auto nodeNoDuplicatesLong = noDuplicatesLong.dup; 585 586 assertThrown(constructSet(nodeDuplicatesShort)); 587 assertNotThrown(constructSet(nodeNoDuplicatesShort)); 588 assertThrown(constructSet(nodeDuplicatesLong)); 589 assertNotThrown(constructSet(nodeNoDuplicatesLong)); 590 } 591 592 // Construct a sequence (array) _node. 593 Node[] constructSequence(Node[] nodes) @safe 594 { 595 return nodes; 596 } 597 598 // Construct an unordered map (unordered set of key:value _pairs without duplicates) _node. 599 Node.Pair[] constructMap(Node.Pair[] pairs) @safe 600 { 601 //Detect duplicates. 602 //TODO this should be replaced by something with deterministic memory allocation. 603 auto keys = new RedBlackTree!Node(); 604 foreach(ref pair; pairs) 605 { 606 enforce(!(pair.key in keys), 607 new Exception("Duplicate entry in a map: " ~ pair.key.debugString())); 608 keys.insert(pair.key); 609 } 610 return pairs; 611 }