1 2 // Copyright Ferdinand Majerech 2011. 3 // Distributed under the Boost Software License, Version 1.0. 4 // (See accompanying file LICENSE_1_0.txt or copy at 5 // http://www.boost.org/LICENSE_1_0.txt) 6 7 /** 8 * Class that processes YAML mappings, sequences and scalars into nodes. 9 * This can be used to add custom data types. A tutorial can be found 10 * $(LINK2 https://dlang-community.github.io/D-YAML/, here). 11 */ 12 module dyaml.constructor; 13 14 15 import std.array; 16 import std.algorithm; 17 import std.base64; 18 import std.container; 19 import std.conv; 20 import std.datetime; 21 import std.exception; 22 import std.regex; 23 import std.string; 24 import std.typecons; 25 import std.utf; 26 27 import dyaml.node; 28 import dyaml.exception; 29 import dyaml.style; 30 31 package: 32 33 /** Constructs YAML values. 34 * 35 * Each YAML scalar, sequence or mapping has a tag specifying its data type. 36 * Constructor uses user-specifyable functions to create a node of desired 37 * data type from a scalar, sequence or mapping. 38 * 39 * 40 * Each of these functions is associated with a tag, and can process either 41 * a scalar, a sequence, or a mapping. The constructor passes each value to 42 * the function with corresponding tag, which then returns the resulting value 43 * that can be stored in a node. 44 * 45 * If a tag is detected with no known constructor function, it is considered an error. 46 */ 47 /* 48 * Construct a node. 49 * 50 * Params: start = Start position of the node. 51 * end = End position of the node. 52 * tag = Tag (data type) of the node. 53 * value = Value to construct node from (string, nodes or pairs). 54 * style = Style of the node (scalar or collection style). 55 * 56 * Returns: Constructed node. 57 */ 58 Node constructNode(T)(const Mark start, const Mark end, const string tag, 59 T value) @safe 60 if((is(T : string) || is(T == Node[]) || is(T == Node.Pair[]))) 61 { 62 Node newNode; 63 noreturn error(string a, string b)() 64 { 65 enum msg = "Error constructing " ~ T.stringof ~ ": Only " ~ a ~ " can be " ~ b; 66 throw new ConstructorException(msg, start, "end", end); 67 } 68 switch(tag) 69 { 70 case "tag:yaml.org,2002:null": 71 newNode = Node(YAMLNull(), tag); 72 break; 73 case "tag:yaml.org,2002:bool": 74 static if(is(T == string)) 75 { 76 newNode = Node(constructBool(value, start, end), tag); 77 break; 78 } 79 else error!("scalars", "bools"); 80 case "tag:yaml.org,2002:int": 81 static if(is(T == string)) 82 { 83 newNode = Node(constructLong(value, start, end), tag); 84 break; 85 } 86 else error!("scalars", "ints"); 87 case "tag:yaml.org,2002:float": 88 static if(is(T == string)) 89 { 90 newNode = Node(constructReal(value, start, end), tag); 91 break; 92 } 93 else error!("scalars", "floats"); 94 case "tag:yaml.org,2002:binary": 95 static if(is(T == string)) 96 { 97 newNode = Node(constructBinary(value, start, end), tag); 98 break; 99 } 100 else error!("scalars", "binary data"); 101 case "tag:yaml.org,2002:timestamp": 102 static if(is(T == string)) 103 { 104 newNode = Node(constructTimestamp(value, start, end), tag); 105 break; 106 } 107 else error!("scalars", "timestamps"); 108 case "tag:yaml.org,2002:str": 109 static if(is(T == string)) 110 { 111 newNode = Node(constructString(value, start, end), tag); 112 break; 113 } 114 else error!("scalars", "strings"); 115 case "tag:yaml.org,2002:value": 116 static if(is(T == string)) 117 { 118 newNode = Node(constructString(value, start, end), tag); 119 break; 120 } 121 else error!("scalars", "values"); 122 case "tag:yaml.org,2002:omap": 123 static if(is(T == Node[])) 124 { 125 newNode = Node(constructOrderedMap(value, start, end), tag); 126 break; 127 } 128 else error!("sequences", "ordered maps"); 129 case "tag:yaml.org,2002:pairs": 130 static if(is(T == Node[])) 131 { 132 newNode = Node(constructPairs(value, start, end), tag); 133 break; 134 } 135 else error!("sequences", "pairs"); 136 case "tag:yaml.org,2002:set": 137 static if(is(T == Node.Pair[])) 138 { 139 newNode = Node(constructSet(value, start, end), tag); 140 break; 141 } 142 else error!("mappings", "sets"); 143 case "tag:yaml.org,2002:seq": 144 static if(is(T == Node[])) 145 { 146 newNode = Node(constructSequence(value, start, end), tag); 147 break; 148 } 149 else error!("sequences", "sequences"); 150 case "tag:yaml.org,2002:map": 151 static if(is(T == Node.Pair[])) 152 { 153 newNode = Node(constructMap(value, start, end), tag); 154 break; 155 } 156 else error!("mappings", "maps"); 157 case "tag:yaml.org,2002:merge": 158 newNode = Node(YAMLMerge(), tag); 159 break; 160 default: 161 newNode = Node(value, tag); 162 break; 163 } 164 165 newNode.startMark_ = start; 166 167 return newNode; 168 } 169 170 private: 171 // Construct a boolean _node. 172 bool constructBool(const string str, const Mark start, const Mark end) @safe 173 { 174 string value = str.toLower(); 175 if(value.among!("yes", "true", "on")){return true;} 176 if(value.among!("no", "false", "off")){return false;} 177 throw new ConstructorException("Invalid boolean value: " ~ str, start, "ending at", end); 178 } 179 180 @safe unittest 181 { 182 assert(collectException!ConstructorException(constructBool("foo", Mark("unittest", 1, 0), Mark("unittest", 1, 3))).msg == "Invalid boolean value: foo"); 183 } 184 185 // Construct an integer (long) _node. 186 long constructLong(const string str, const Mark start, const Mark end) @safe 187 { 188 string value = str.replace("_", ""); 189 const char c = value[0]; 190 const long sign = c != '-' ? 1 : -1; 191 if(c == '-' || c == '+') 192 { 193 value = value[1 .. $]; 194 } 195 196 enforce(value != "", new ConstructorException("Unable to parse integer value: " ~ str, start, "ending at", end)); 197 198 long result; 199 try 200 { 201 //Zero. 202 if(value == "0") {result = cast(long)0;} 203 //Binary. 204 else if(value.startsWith("0b")){result = sign * to!int(value[2 .. $], 2);} 205 //Hexadecimal. 206 else if(value.startsWith("0x")){result = sign * to!int(value[2 .. $], 16);} 207 //Octal. 208 else if(value[0] == '0') {result = sign * to!int(value, 8);} 209 //Sexagesimal. 210 else if(value.canFind(":")) 211 { 212 long val; 213 long base = 1; 214 foreach_reverse(digit; value.split(":")) 215 { 216 val += to!long(digit) * base; 217 base *= 60; 218 } 219 result = sign * val; 220 } 221 //Decimal. 222 else{result = sign * to!long(value);} 223 } 224 catch(ConvException e) 225 { 226 throw new ConstructorException("Unable to parse integer value: " ~ str, start, "ending at", end); 227 } 228 229 return result; 230 } 231 @safe unittest 232 { 233 string canonical = "685230"; 234 string decimal = "+685_230"; 235 string octal = "02472256"; 236 string hexadecimal = "0x_0A_74_AE"; 237 string binary = "0b1010_0111_0100_1010_1110"; 238 string sexagesimal = "190:20:30"; 239 240 assert(685230 == constructLong(canonical, Mark.init, Mark.init)); 241 assert(685230 == constructLong(decimal, Mark.init, Mark.init)); 242 assert(685230 == constructLong(octal, Mark.init, Mark.init)); 243 assert(685230 == constructLong(hexadecimal, Mark.init, Mark.init)); 244 assert(685230 == constructLong(binary, Mark.init, Mark.init)); 245 assert(685230 == constructLong(sexagesimal, Mark.init, Mark.init)); 246 assert(collectException!ConstructorException(constructLong("+", Mark.init, Mark.init)).msg == "Unable to parse integer value: +"); 247 assert(collectException!ConstructorException(constructLong("0xINVALID", Mark.init, Mark.init)).msg == "Unable to parse integer value: 0xINVALID"); 248 } 249 250 // Construct a floating point (real) _node. 251 real constructReal(const string str, const Mark start, const Mark end) @safe 252 { 253 string value = str.replace("_", "").toLower(); 254 const char c = value[0]; 255 const real sign = c != '-' ? 1.0 : -1.0; 256 if(c == '-' || c == '+') 257 { 258 value = value[1 .. $]; 259 } 260 261 enforce(value != "" && value != "nan" && value != "inf" && value != "-inf", 262 new ConstructorException("Unable to parse float value: \"" ~ str ~ "\"", start, "ending at", end)); 263 264 real result; 265 try 266 { 267 //Infinity. 268 if (value == ".inf"){result = sign * real.infinity;} 269 //Not a Number. 270 else if(value == ".nan"){result = real.nan;} 271 //Sexagesimal. 272 else if(value.canFind(":")) 273 { 274 real val = 0.0; 275 real base = 1.0; 276 foreach_reverse(digit; value.split(":")) 277 { 278 val += to!real(digit) * base; 279 base *= 60.0; 280 } 281 result = sign * val; 282 } 283 //Plain floating point. 284 else{result = sign * to!real(value);} 285 } 286 catch(ConvException e) 287 { 288 throw new ConstructorException("Unable to parse float value: \"" ~ str ~ "\"", start, "ending at", end); 289 } 290 291 return result; 292 } 293 @safe unittest 294 { 295 bool eq(real a, real b, real epsilon = 0.2) @safe 296 { 297 return a >= (b - epsilon) && a <= (b + epsilon); 298 } 299 300 string canonical = "6.8523015e+5"; 301 string exponential = "685.230_15e+03"; 302 string fixed = "685_230.15"; 303 string sexagesimal = "190:20:30.15"; 304 string negativeInf = "-.inf"; 305 string NaN = ".NaN"; 306 307 assert(eq(685230.15, constructReal(canonical, Mark.init, Mark.init))); 308 assert(eq(685230.15, constructReal(exponential, Mark.init, Mark.init))); 309 assert(eq(685230.15, constructReal(fixed, Mark.init, Mark.init))); 310 assert(eq(685230.15, constructReal(sexagesimal, Mark.init, Mark.init))); 311 assert(eq(-real.infinity, constructReal(negativeInf, Mark.init, Mark.init))); 312 assert(to!string(constructReal(NaN, Mark.init, Mark.init)) == "nan"); 313 assert(collectException!ConstructorException(constructReal("+", Mark.init, Mark.init)).msg == "Unable to parse float value: \"+\""); 314 assert(collectException!ConstructorException(constructReal("74.invalid", Mark.init, Mark.init)).msg == "Unable to parse float value: \"74.invalid\""); 315 } 316 317 // Construct a binary (base64) _node. 318 ubyte[] constructBinary(const string value, const Mark start, const Mark end) @safe 319 { 320 import std.ascii : newline; 321 import std.array : array; 322 323 // For an unknown reason, this must be nested to work (compiler bug?). 324 try 325 { 326 return Base64.decode(value.representation.filter!(c => !newline.canFind(c)).array); 327 } 328 catch(Base64Exception e) 329 { 330 throw new ConstructorException("Unable to decode base64 value: " ~ e.msg, start, "ending at", end); 331 } 332 } 333 334 @safe unittest 335 { 336 auto test = "The Answer: 42".representation; 337 char[] buffer; 338 buffer.length = 256; 339 string input = Base64.encode(test, buffer).idup; 340 const value = constructBinary(input, Mark.init, Mark.init); 341 assert(value == test); 342 assert(value == [84, 104, 101, 32, 65, 110, 115, 119, 101, 114, 58, 32, 52, 50]); 343 } 344 345 // Construct a timestamp (SysTime) _node. 346 SysTime constructTimestamp(const string str, const Mark start, const Mark end) @safe 347 { 348 string value = str; 349 350 auto YMDRegexp = regex("^([0-9][0-9][0-9][0-9])-([0-9][0-9]?)-([0-9][0-9]?)"); 351 auto HMSRegexp = regex("^[Tt \t]+([0-9][0-9]?):([0-9][0-9]):([0-9][0-9])(\\.[0-9]*)?"); 352 auto TZRegexp = regex("^[ \t]*Z|([-+][0-9][0-9]?)(:[0-9][0-9])?"); 353 354 try 355 { 356 // First, get year, month and day. 357 auto matches = match(value, YMDRegexp); 358 359 enforce(!matches.empty, 360 new Exception("Unable to parse timestamp value: " ~ value)); 361 362 auto captures = matches.front.captures; 363 const year = to!int(captures[1]); 364 const month = to!int(captures[2]); 365 const day = to!int(captures[3]); 366 367 // If available, get hour, minute, second and fraction, if present. 368 value = matches.front.post; 369 matches = match(value, HMSRegexp); 370 if(matches.empty) 371 { 372 return SysTime(DateTime(year, month, day), UTC()); 373 } 374 375 captures = matches.front.captures; 376 const hour = to!int(captures[1]); 377 const minute = to!int(captures[2]); 378 const second = to!int(captures[3]); 379 const hectonanosecond = cast(int)(to!real("0" ~ captures[4]) * 10_000_000); 380 381 // If available, get timezone. 382 value = matches.front.post; 383 matches = match(value, TZRegexp); 384 if(matches.empty || matches.front.captures[0] == "Z") 385 { 386 // No timezone. 387 return SysTime(DateTime(year, month, day, hour, minute, second), 388 hectonanosecond.dur!"hnsecs", UTC()); 389 } 390 391 // We have a timezone, so parse it. 392 captures = matches.front.captures; 393 int sign = 1; 394 int tzHours; 395 if(!captures[1].empty) 396 { 397 if(captures[1][0] == '-') {sign = -1;} 398 tzHours = to!int(captures[1][1 .. $]); 399 } 400 const tzMinutes = (!captures[2].empty) ? to!int(captures[2][1 .. $]) : 0; 401 const tzOffset = dur!"minutes"(sign * (60 * tzHours + tzMinutes)); 402 403 return SysTime(DateTime(year, month, day, hour, minute, second), 404 hectonanosecond.dur!"hnsecs", 405 new immutable SimpleTimeZone(tzOffset)); 406 } 407 catch(ConvException e) 408 { 409 throw new Exception("Unable to parse timestamp value " ~ value ~ " : " ~ e.msg); 410 } 411 catch(DateTimeException e) 412 { 413 throw new Exception("Invalid timestamp value " ~ value ~ " : " ~ e.msg); 414 } 415 416 assert(false, "This code should never be reached"); 417 } 418 @safe unittest 419 { 420 string timestamp(string value) 421 { 422 return constructTimestamp(value, Mark.init, Mark.init).toISOString(); 423 } 424 425 string canonical = "2001-12-15T02:59:43.1Z"; 426 string iso8601 = "2001-12-14t21:59:43.10-05:00"; 427 string spaceSeparated = "2001-12-14 21:59:43.10 -5"; 428 string noTZ = "2001-12-15 2:59:43.10"; 429 string noFraction = "2001-12-15 2:59:43"; 430 string ymd = "2002-12-14"; 431 432 assert(timestamp(canonical) == "20011215T025943.1Z"); 433 //avoiding float conversion errors 434 assert(timestamp(iso8601) == "20011214T215943.0999999-05:00" || 435 timestamp(iso8601) == "20011214T215943.1-05:00"); 436 assert(timestamp(spaceSeparated) == "20011214T215943.0999999-05:00" || 437 timestamp(spaceSeparated) == "20011214T215943.1-05:00"); 438 assert(timestamp(noTZ) == "20011215T025943.0999999Z" || 439 timestamp(noTZ) == "20011215T025943.1Z"); 440 assert(timestamp(noFraction) == "20011215T025943Z"); 441 assert(timestamp(ymd) == "20021214T000000Z"); 442 } 443 444 // Construct a string _node. 445 string constructString(const string str, const Mark start, const Mark end) @safe 446 { 447 return str; 448 } 449 450 // Convert a sequence of single-element mappings into a sequence of pairs. 451 Node.Pair[] getPairs(string type)(const Node[] nodes) @safe 452 { 453 enum msg = "While constructing " ~ type ~ ", expected a mapping with single element"; 454 Node.Pair[] pairs; 455 pairs.reserve(nodes.length); 456 foreach(node; nodes) 457 { 458 enforce(node.nodeID == NodeID.mapping && node.length == 1, 459 new ConstructorException(msg, node.startMark)); 460 461 pairs ~= node.as!(Node.Pair[]); 462 } 463 464 return pairs; 465 } 466 467 // Construct an ordered map (ordered sequence of key:value pairs without duplicates) _node. 468 Node.Pair[] constructOrderedMap(const Node[] nodes, const Mark start, const Mark end) @safe 469 { 470 auto pairs = getPairs!"an ordered map"(nodes); 471 472 //Detect duplicates. 473 //TODO this should be replaced by something with deterministic memory allocation. 474 auto keys = new RedBlackTree!Node(); 475 foreach(ref pair; pairs) 476 { 477 auto foundMatch = keys.equalRange(pair.key); 478 enforce(foundMatch.empty, new ConstructorException( 479 "Duplicate entry in an ordered map", pair.key.startMark, 480 "first occurrence here", foundMatch.front.startMark)); 481 keys.insert(pair.key); 482 } 483 return pairs; 484 } 485 @safe unittest 486 { 487 uint lines; 488 Node[] alternateTypes(uint length) @safe 489 { 490 Node[] pairs; 491 foreach(long i; 0 .. length) 492 { 493 auto pair = (i % 2) ? Node.Pair(i.to!string, i) : Node.Pair(i, i.to!string); 494 pair.key.startMark_ = Mark("unittest", lines++, 0); 495 pairs ~= Node([pair]); 496 } 497 return pairs; 498 } 499 500 Node[] sameType(uint length) @safe 501 { 502 Node[] pairs; 503 foreach(long i; 0 .. length) 504 { 505 auto pair = Node.Pair(i.to!string, i); 506 pair.key.startMark_ = Mark("unittest", lines++, 0); 507 pairs ~= Node([pair]); 508 } 509 return pairs; 510 } 511 512 assert(collectException!ConstructorException(constructOrderedMap(alternateTypes(8) ~ alternateTypes(2), Mark.init, Mark.init)).message == "Duplicate entry in an ordered map\nunittest:9,1\nfirst occurrence here: unittest:1,1"); 513 assertNotThrown(constructOrderedMap(alternateTypes(8), Mark.init, Mark.init)); 514 assert(collectException!ConstructorException(constructOrderedMap(sameType(64) ~ sameType(16), Mark.init, Mark.init)).message == "Duplicate entry in an ordered map\nunittest:83,1\nfirst occurrence here: unittest:19,1"); 515 assert(collectException!ConstructorException(constructOrderedMap(alternateTypes(64) ~ alternateTypes(16), Mark.init, Mark.init)).message == "Duplicate entry in an ordered map\nunittest:163,1\nfirst occurrence here: unittest:99,1"); 516 assertNotThrown(constructOrderedMap(sameType(64), Mark.init, Mark.init)); 517 assertNotThrown(constructOrderedMap(alternateTypes(64), Mark.init, Mark.init)); 518 assert(collectException!ConstructorException(constructOrderedMap([Node([Node(1), Node(2)])], Mark.init, Mark.init)).message == "While constructing an ordered map, expected a mapping with single element\n<unknown>:1,1"); 519 } 520 521 // Construct a pairs (ordered sequence of key: value pairs allowing duplicates) _node. 522 Node.Pair[] constructPairs(const Node[] nodes, const Mark start, const Mark end) @safe 523 { 524 return getPairs!"pairs"(nodes); 525 } 526 527 // Construct a set _node. 528 Node[] constructSet(const Node.Pair[] pairs, const Mark start, const Mark end) @safe 529 { 530 // In future, the map here should be replaced with something with deterministic 531 // memory allocation if possible. 532 // Detect duplicates. 533 ubyte[Node] map; 534 Node[] nodes; 535 nodes.reserve(pairs.length); 536 foreach(pair; pairs) 537 { 538 enforce((pair.key in map) is null, new Exception("Duplicate entry in a set")); 539 map[pair.key] = 0; 540 nodes ~= pair.key; 541 } 542 543 return nodes; 544 } 545 @safe unittest 546 { 547 Node.Pair[] set(uint length) @safe 548 { 549 Node.Pair[] pairs; 550 foreach(long i; 0 .. length) 551 { 552 pairs ~= Node.Pair(i.to!string, YAMLNull()); 553 } 554 555 return pairs; 556 } 557 558 auto DuplicatesShort = set(8) ~ set(2); 559 auto noDuplicatesShort = set(8); 560 auto DuplicatesLong = set(64) ~ set(4); 561 auto noDuplicatesLong = set(64); 562 563 bool eq(Node.Pair[] a, Node[] b) 564 { 565 if(a.length != b.length){return false;} 566 foreach(i; 0 .. a.length) 567 { 568 if(a[i].key != b[i]) 569 { 570 return false; 571 } 572 } 573 return true; 574 } 575 576 auto nodeDuplicatesShort = DuplicatesShort.dup; 577 auto nodeNoDuplicatesShort = noDuplicatesShort.dup; 578 auto nodeDuplicatesLong = DuplicatesLong.dup; 579 auto nodeNoDuplicatesLong = noDuplicatesLong.dup; 580 581 assertThrown(constructSet(nodeDuplicatesShort, Mark.init, Mark.init)); 582 assertNotThrown(constructSet(nodeNoDuplicatesShort, Mark.init, Mark.init)); 583 assertThrown(constructSet(nodeDuplicatesLong, Mark.init, Mark.init)); 584 assertNotThrown(constructSet(nodeNoDuplicatesLong, Mark.init, Mark.init)); 585 } 586 587 // Construct a sequence (array) _node. 588 Node[] constructSequence(Node[] nodes, const Mark start, const Mark end) @safe 589 { 590 return nodes; 591 } 592 593 // Construct an unordered map (unordered set of key:value _pairs without duplicates) _node. 594 Node.Pair[] constructMap(Node.Pair[] pairs, const Mark start, const Mark end) @safe 595 { 596 //Detect duplicates. 597 //TODO this should be replaced by something with deterministic memory allocation. 598 auto keys = new RedBlackTree!Node(); 599 foreach(ref pair; pairs) 600 { 601 enforce(!(pair.key in keys), 602 new Exception("Duplicate entry in a map: " ~ pair.key.debugString())); 603 keys.insert(pair.key); 604 } 605 return pairs; 606 }