1 
2 //          Copyright Ferdinand Majerech 2011.
3 // Distributed under the Boost Software License, Version 1.0.
4 //    (See accompanying file LICENSE_1_0.txt or copy at
5 //          http://www.boost.org/LICENSE_1_0.txt)
6 
7 /**
8  * Implements a class that resolves YAML tags. This can be used to implicitly
9  * resolve tags for custom data types, removing the need to explicitly
10  * specify tags in YAML. A tutorial can be found
11  * $(LINK2 ../tutorials/custom_types.html, here).
12  *
13  * Code based on $(LINK2 http://www.pyyaml.org, PyYAML).
14  */
15 module dyaml.resolver;
16 
17 
18 import std.conv;
19 import std.regex;
20 import std.typecons;
21 import std.utf;
22 
23 import dyaml.node;
24 import dyaml.exception;
25 
26 
27 /// Type of `regexes`
28 private alias RegexType = Tuple!(string, "tag", const Regex!char, "regexp", string, "chars");
29 
30 private immutable RegexType[] regexes = [
31     RegexType("tag:yaml.org,2002:bool",
32               regex(r"^(?:yes|Yes|YES|no|No|NO|true|True|TRUE" ~
33                      "|false|False|FALSE|on|On|ON|off|Off|OFF)$"),
34               "yYnNtTfFoO"),
35     RegexType("tag:yaml.org,2002:float",
36               regex(r"^(?:[-+]?([0-9][0-9_]*)\\.[0-9_]*" ~
37                      "(?:[eE][-+][0-9]+)?|[-+]?(?:[0-9][0-9_]" ~
38                      "*)?\\.[0-9_]+(?:[eE][-+][0-9]+)?|[-+]?" ~
39                      "[0-9][0-9_]*(?::[0-5]?[0-9])+\\.[0-9_]" ~
40                      "*|[-+]?\\.(?:inf|Inf|INF)|\\." ~
41                      "(?:nan|NaN|NAN))$"),
42               "-+0123456789."),
43     RegexType("tag:yaml.org,2002:int",
44               regex(r"^(?:[-+]?0b[0-1_]+" ~
45                      "|[-+]?0[0-7_]+" ~
46                      "|[-+]?(?:0|[1-9][0-9_]*)" ~
47                      "|[-+]?0x[0-9a-fA-F_]+" ~
48                      "|[-+]?[1-9][0-9_]*(?::[0-5]?[0-9])+)$"),
49               "-+0123456789"),
50     RegexType("tag:yaml.org,2002:merge", regex(r"^<<$"), "<"),
51     RegexType("tag:yaml.org,2002:null",
52               regex(r"^$|^(?:~|null|Null|NULL)$"), "~nN\0"),
53     RegexType("tag:yaml.org,2002:timestamp",
54               regex(r"^[0-9][0-9][0-9][0-9]-[0-9][0-9]-" ~
55                      "[0-9][0-9]|[0-9][0-9][0-9][0-9]-[0-9]" ~
56                      "[0-9]?-[0-9][0-9]?[Tt]|[ \t]+[0-9]" ~
57                      "[0-9]?:[0-9][0-9]:[0-9][0-9]" ~
58                      "(?:\\.[0-9]*)?(?:[ \t]*Z|[-+][0-9]" ~
59                      "[0-9]?(?::[0-9][0-9])?)?$"),
60               "0123456789"),
61     RegexType("tag:yaml.org,2002:value", regex(r"^=$"), "="),
62 
63     //The following resolver is only for documentation purposes. It cannot work
64     //because plain scalars cannot start with '!', '&', or '*'.
65     RegexType("tag:yaml.org,2002:yaml", regex(r"^(?:!|&|\*)$"), "!&*"),
66 ];
67 
68 /**
69  * Resolves YAML tags (data types).
70  *
71  * Can be used to implicitly resolve custom data types of scalar values.
72  */
73 struct Resolver
74 {
75     private:
76         // Default tag to use for scalars.
77         string defaultScalarTag_ = "tag:yaml.org,2002:str";
78         // Default tag to use for sequences.
79         string defaultSequenceTag_ = "tag:yaml.org,2002:seq";
80         // Default tag to use for mappings.
81         string defaultMappingTag_ = "tag:yaml.org,2002:map";
82 
83         /*
84          * Arrays of scalar resolver tuples indexed by starting character of a scalar.
85          *
86          * Each tuple stores regular expression the scalar must match,
87          * and tag to assign to it if it matches.
88          */
89         Tuple!(string, const Regex!char)[][dchar] yamlImplicitResolvers_;
90 
91     package:
92         static auto withDefaultResolvers() @safe
93         {
94             Resolver resolver;
95             foreach(pair; regexes)
96             {
97                 resolver.addImplicitResolver(pair.tag, pair.regexp, pair.chars);
98             }
99             return resolver;
100         }
101 
102     public:
103         @disable bool opEquals(ref Resolver);
104         @disable int opCmp(ref Resolver);
105 
106         /**
107          * Add an implicit scalar resolver.
108          *
109          * If a scalar matches regexp and starts with any character in first,
110          * its _tag is set to tag. If it matches more than one resolver _regexp
111          * resolvers added _first override ones added later. Default resolvers
112          * override any user specified resolvers, but they can be disabled in
113          * Resolver constructor.
114          *
115          * If a scalar is not resolved to anything, it is assigned the default
116          * YAML _tag for strings.
117          *
118          * Params:  tag    = Tag to resolve to.
119          *          regexp = Regular expression the scalar must match to have this _tag.
120          *          first  = String of possible starting characters of the scalar.
121          *
122          */
123         void addImplicitResolver(string tag, const Regex!char regexp, string first)
124             pure @safe
125         {
126             foreach(const dchar c; first)
127             {
128                 if((c in yamlImplicitResolvers_) is null)
129                 {
130                     yamlImplicitResolvers_[c] = [];
131                 }
132                 yamlImplicitResolvers_[c] ~= tuple(tag, regexp);
133             }
134         }
135         /// Resolve scalars starting with 'A' to !_tag
136         @safe unittest
137         {
138             import std.file : write;
139             import std.regex : regex;
140             import dyaml.loader : Loader;
141             import dyaml.resolver : Resolver;
142 
143             write("example.yaml", "A");
144 
145             auto loader = Loader.fromFile("example.yaml");
146             loader.resolver.addImplicitResolver("!tag", regex("A.*"), "A");
147 
148             auto node = loader.load();
149             assert(node.tag == "!tag");
150         }
151 
152     package:
153         /**
154          * Resolve tag of a node.
155          *
156          * Params:  kind     = Type of the node.
157          *          tag      = Explicit tag of the node, if any.
158          *          value    = Value of the node, if any.
159          *          implicit = Should the node be implicitly resolved?
160          *
161          * If the tag is already specified and not non-specific, that tag will
162          * be returned.
163          *
164          * Returns: Resolved tag.
165          */
166         string resolve(const NodeID kind, const string tag, const string value,
167                     const bool implicit) @safe
168         {
169             import std.array : empty, front;
170             if((tag !is null) && (tag != "!"))
171             {
172                 return tag;
173             }
174 
175             final switch (kind)
176             {
177                 case NodeID.scalar:
178                     if(!implicit)
179                     {
180                         return defaultScalarTag_;
181                     }
182 
183                     //Get the first char of the value.
184                     const dchar first = value.empty ? '\0' : value.front;
185 
186                     auto resolvers = (first in yamlImplicitResolvers_) is null ?
187                                      [] : yamlImplicitResolvers_[first];
188 
189                     //If regexp matches, return tag.
190                     foreach(resolver; resolvers)
191                     {
192                         if(!(match(value, resolver[1]).empty))
193                         {
194                             return resolver[0];
195                         }
196                     }
197                     return defaultScalarTag_;
198             case NodeID.sequence:
199                 return defaultSequenceTag_;
200             case NodeID.mapping:
201                 return defaultMappingTag_;
202             case NodeID.invalid:
203                 assert(false, "Cannot resolve an invalid node");
204             }
205         }
206         @safe unittest
207         {
208             auto resolver = Resolver.withDefaultResolvers;
209 
210             bool tagMatch(string tag, string[] values) @safe
211             {
212                 const string expected = tag;
213                 foreach(value; values)
214                 {
215                     const string resolved = resolver.resolve(NodeID.scalar, null, value, true);
216                     if(expected != resolved)
217                     {
218                         return false;
219                     }
220                 }
221                 return true;
222             }
223 
224             assert(tagMatch("tag:yaml.org,2002:bool",
225                    ["yes", "NO", "True", "on"]));
226             assert(tagMatch("tag:yaml.org,2002:float",
227                    ["6.8523015e+5", "685.230_15e+03", "685_230.15",
228                     "190:20:30.15", "-.inf", ".NaN"]));
229             assert(tagMatch("tag:yaml.org,2002:int",
230                    ["685230", "+685_230", "02472256", "0x_0A_74_AE",
231                     "0b1010_0111_0100_1010_1110", "190:20:30"]));
232             assert(tagMatch("tag:yaml.org,2002:merge", ["<<"]));
233             assert(tagMatch("tag:yaml.org,2002:null", ["~", "null", ""]));
234             assert(tagMatch("tag:yaml.org,2002:str",
235                             ["abcd", "9a8b", "9.1adsf"]));
236             assert(tagMatch("tag:yaml.org,2002:timestamp",
237                    ["2001-12-15T02:59:43.1Z",
238                    "2001-12-14t21:59:43.10-05:00",
239                    "2001-12-14 21:59:43.10 -5",
240                    "2001-12-15 2:59:43.10",
241                    "2002-12-14"]));
242             assert(tagMatch("tag:yaml.org,2002:value", ["="]));
243             assert(tagMatch("tag:yaml.org,2002:yaml", ["!", "&", "*"]));
244         }
245 
246         ///Returns: Default scalar tag.
247         @property string defaultScalarTag()   const pure @safe nothrow {return defaultScalarTag_;}
248 
249         ///Returns: Default sequence tag.
250         @property string defaultSequenceTag() const pure @safe nothrow {return defaultSequenceTag_;}
251 
252         ///Returns: Default mapping tag.
253         @property string defaultMappingTag()  const pure @safe nothrow {return defaultMappingTag_;}
254 }