1 
2 //          Copyright Ferdinand Majerech 2011.
3 // Distributed under the Boost Software License, Version 1.0.
4 //    (See accompanying file LICENSE_1_0.txt or copy at
5 //          http://www.boost.org/LICENSE_1_0.txt)
6 
7 /**
8  * Implements a class that resolves YAML tags. This can be used to implicitly
9  * resolve tags for custom data types, removing the need to explicitly
10  * specify tags in YAML. A tutorial can be found
11  * $(LINK2 ../tutorials/custom_types.html, here).
12  *
13  * Code based on $(LINK2 http://www.pyyaml.org, PyYAML).
14  */
15 module dyaml.resolver;
16 
17 
18 import std.conv;
19 import std.regex;
20 import std.typecons;
21 import std.utf;
22 
23 import dyaml.node;
24 import dyaml.exception;
25 
26 
27 /// Type of `regexes`
28 private alias RegexType = Tuple!(string, "tag", const Regex!char, "regexp", string, "chars");
29 
30 private immutable RegexType[] regexes = [
31     RegexType("tag:yaml.org,2002:bool",
32               regex(r"^(?:yes|Yes|YES|no|No|NO|true|True|TRUE" ~
33                      "|false|False|FALSE|on|On|ON|off|Off|OFF)$"),
34               "yYnNtTfFoO"),
35     RegexType("tag:yaml.org,2002:float",
36               regex(r"^(?:[-+]?([0-9][0-9_]*)\\.[0-9_]*" ~
37                      "(?:[eE][-+][0-9]+)?|[-+]?(?:[0-9][0-9_]" ~
38                      "*)?\\.[0-9_]+(?:[eE][-+][0-9]+)?|[-+]?" ~
39                      "[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]" ~
40                      "*|[-+]?\\.(?:inf|Inf|INF)|\\." ~
41                      "(?:nan|NaN|NAN))$"),
42               "-+0123456789."),
43     RegexType("tag:yaml.org,2002:int",
44               regex(r"^(?:[-+]?0b[0-1_]+" ~
45                      "|[-+]?0[0-7_]+" ~
46                      "|[-+]?(?:0|[1-9][0-9_]*)" ~
47                      "|[-+]?0x[0-9a-fA-F_]+" ~
48                      "|[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$"),
49               "-+0123456789"),
50     RegexType("tag:yaml.org,2002:merge", regex(r"^<<$"), "<"),
51     RegexType("tag:yaml.org,2002:null",
52               regex(r"^$|^(?:~|null|Null|NULL)$"), "~nN\0"),
53     RegexType("tag:yaml.org,2002:timestamp",
54               regex(r"^[0-9][0-9][0-9][0-9]-[0-9][0-9]-" ~
55                      "[0-9][0-9]|[0-9][0-9][0-9][0-9]-[0-9]" ~
56                      "[0-9]?-[0-9][0-9]?[Tt]|[ \t]+[0-9]" ~
57                      "[0-9]?:[0-9][0-9]:[0-9][0-9]" ~
58                      "(?:\\.[0-9]*)?(?:[ \t]*Z|[-+][0-9]" ~
59                      "[0-9]?(?::[0-9][0-9])?)?$"),
60               "0123456789"),
61     RegexType("tag:yaml.org,2002:value", regex(r"^=$"), "="),
62 
63     //The following resolver is only for documentation purposes. It cannot work
64     //because plain scalars cannot start with '!', '&', or '*'.
65     RegexType("tag:yaml.org,2002:yaml", regex(r"^(?:!|&|\*)$"), "!&*"),
66 ];
67 
68 /**
69  * Resolves YAML tags (data types).
70  *
71  * Can be used to implicitly resolve custom data types of scalar values.
72  */
73 struct Resolver
74 {
75     private:
76         // Default tag to use for scalars.
77         string defaultScalarTag_ = "tag:yaml.org,2002:str";
78         // Default tag to use for sequences.
79         string defaultSequenceTag_ = "tag:yaml.org,2002:seq";
80         // Default tag to use for mappings.
81         string defaultMappingTag_ = "tag:yaml.org,2002:map";
82 
83         /*
84          * Arrays of scalar resolver tuples indexed by starting character of a scalar.
85          *
86          * Each tuple stores regular expression the scalar must match,
87          * and tag to assign to it if it matches.
88          */
89         Tuple!(string, const Regex!char)[][dchar] yamlImplicitResolvers_;
90 
91     package:
92         static auto withDefaultResolvers() @safe
93         {
94             Resolver resolver;
95             foreach(pair; regexes)
96             {
97                 resolver.addImplicitResolver(pair.tag, pair.regexp, pair.chars);
98             }
99             return resolver;
100         }
101 
102     public:
103         @disable bool opEquals(ref Resolver);
104         @disable int opCmp(ref Resolver);
105 
106         /**
107          * Add an implicit scalar resolver.
108          *
109          * If a scalar matches regexp and starts with any character in first,
110          * its _tag is set to tag. If it matches more than one resolver _regexp
111          * resolvers added _first override ones added later. Default resolvers
112          * override any user specified resolvers, but they can be disabled in
113          * Resolver constructor.
114          *
115          * If a scalar is not resolved to anything, it is assigned the default
116          * YAML _tag for strings.
117          *
118          * Params:  tag    = Tag to resolve to.
119          *          regexp = Regular expression the scalar must match to have this _tag.
120          *          first  = String of possible starting characters of the scalar.
121          *
122          */
123         void addImplicitResolver(string tag, const Regex!char regexp, string first)
124             pure @safe
125         {
126             foreach(const dchar c; first)
127             {
128                 if((c in yamlImplicitResolvers_) is null)
129                 {
130                     yamlImplicitResolvers_[c] = [];
131                 }
132                 yamlImplicitResolvers_[c] ~= tuple(tag, regexp);
133             }
134         }
135         /// Resolve scalars starting with 'A' to !_tag
136         @safe unittest
137         {
138             import std.file : write;
139             import std.regex : regex;
140             import dyaml.loader : Loader;
141             import dyaml.resolver : Resolver;
142 
143             write("example.yaml", "A");
144 
145             auto loader = Loader.fromFile("example.yaml");
146             loader.resolver.addImplicitResolver("!tag", regex("A.*"), "A");
147 
148             auto node = loader.load();
149             assert(node.tag == "!tag");
150         }
151 
152     package:
153         /**
154          * Resolve tag of a node.
155          *
156          * Params:  kind     = Type of the node.
157          *          tag      = Explicit tag of the node, if any.
158          *          value    = Value of the node, if any.
159          *          implicit = Should the node be implicitly resolved?
160          *
161          * If the tag is already specified and not non-specific, that tag will
162          * be returned.
163          *
164          * Returns: Resolved tag.
165          */
166         string resolve(const NodeID kind, const string tag, scope string value,
167                     const bool implicit) @safe
168         {
169             import std.array : empty, front;
170             if((tag !is null) && (tag != "!"))
171             {
172                 return tag;
173             }
174 
175             final switch (kind)
176             {
177                 case NodeID.scalar:
178                     if(!implicit)
179                     {
180                         return defaultScalarTag_;
181                     }
182 
183                     //Get the first char of the value.
184                     const dchar first = value.empty ? '\0' : value.front;
185 
186                     auto resolvers = (first in yamlImplicitResolvers_) is null ?
187                                      [] : yamlImplicitResolvers_[first];
188 
189                     //If regexp matches, return tag.
190                     foreach(resolver; resolvers)
191                     {
192                         // source/dyaml/resolver.d(192,35): Error: scope variable `__tmpfordtorXXX`
193                         // assigned to non-scope parameter `this` calling
194                         // `std.regex.RegexMatch!string.RegexMatch.~this`
195                         bool isEmpty = () @trusted {
196                             return match(value, resolver[1]).empty;
197                         }();
198                         if(!isEmpty)
199                         {
200                             return resolver[0];
201                         }
202                     }
203                     return defaultScalarTag_;
204             case NodeID.sequence:
205                 return defaultSequenceTag_;
206             case NodeID.mapping:
207                 return defaultMappingTag_;
208             case NodeID.invalid:
209                 assert(false, "Cannot resolve an invalid node");
210             }
211         }
212         @safe unittest
213         {
214             auto resolver = Resolver.withDefaultResolvers;
215 
216             bool tagMatch(string tag, string[] values) @safe
217             {
218                 const string expected = tag;
219                 foreach(value; values)
220                 {
221                     const string resolved = resolver.resolve(NodeID.scalar, null, value, true);
222                     if(expected != resolved)
223                     {
224                         return false;
225                     }
226                 }
227                 return true;
228             }
229 
230             assert(tagMatch("tag:yaml.org,2002:bool",
231                    ["yes", "NO", "True", "on"]));
232             assert(tagMatch("tag:yaml.org,2002:float",
233                    ["6.8523015e+5", "685.230_15e+03", "685_230.15",
234                     "190:20:30.15", "-.inf", ".NaN"]));
235             assert(tagMatch("tag:yaml.org,2002:int",
236                    ["685230", "+685_230", "02472256", "0x_0A_74_AE",
237                     "0b1010_0111_0100_1010_1110", "190:20:30"]));
238             assert(tagMatch("tag:yaml.org,2002:merge", ["<<"]));
239             assert(tagMatch("tag:yaml.org,2002:null", ["~", "null", ""]));
240             assert(tagMatch("tag:yaml.org,2002:str",
241                             ["abcd", "9a8b", "9.1adsf"]));
242             assert(tagMatch("tag:yaml.org,2002:timestamp",
243                    ["2001-12-15T02:59:43.1Z",
244                    "2001-12-14t21:59:43.10-05:00",
245                    "2001-12-14 21:59:43.10 -5",
246                    "2001-12-15 2:59:43.10",
247                    "2002-12-14"]));
248             assert(tagMatch("tag:yaml.org,2002:value", ["="]));
249             assert(tagMatch("tag:yaml.org,2002:yaml", ["!", "&", "*"]));
250         }
251 
252         ///Returns: Default scalar tag.
253         @property string defaultScalarTag()   const pure @safe nothrow {return defaultScalarTag_;}
254 
255         ///Returns: Default sequence tag.
256         @property string defaultSequenceTag() const pure @safe nothrow {return defaultSequenceTag_;}
257 
258         ///Returns: Default mapping tag.
259         @property string defaultMappingTag()  const pure @safe nothrow {return defaultMappingTag_;}
260 }