json5/
de.rs

1use pest::iterators::Pair;
2use pest::Parser as P;
3use pest_derive::Parser;
4use serde::de;
5use serde::forward_to_deserialize_any;
6use std::char;
7use std::collections::VecDeque;
8use std::f64;
9
10use crate::error::{self, Error, Result};
11
12#[derive(Parser)]
13#[grammar = "json5.pest"]
14struct Parser;
15
16/// Deserialize an instance of type `T` from a string of JSON5 text. Can fail if the input is
17/// invalid JSON5, or doesn’t match the structure of the target type.
18pub fn from_str<'a, T>(s: &'a str) -> Result<T>
19where
20    T: de::Deserialize<'a>,
21{
22    let mut deserializer = Deserializer::from_str(s)?;
23    T::deserialize(&mut deserializer)
24}
25
26pub struct Deserializer<'de> {
27    pair: Option<Pair<'de, Rule>>,
28}
29
30impl<'de> Deserializer<'de> {
31    /// Creates a JSON5 deserializer from a `&str`. This parses the input at construction time, so
32    /// can fail if the input is not valid JSON5.
33    pub fn from_str(input: &'de str) -> Result<Self> {
34        let pair = Parser::parse(Rule::text, input)?.next().unwrap();
35        Ok(Deserializer::from_pair(pair))
36    }
37
38    fn from_pair(pair: Pair<'de, Rule>) -> Self {
39        Deserializer { pair: Some(pair) }
40    }
41}
42
43impl<'de, 'a> de::Deserializer<'de> for &'a mut Deserializer<'de> {
44    type Error = Error;
45
46    fn deserialize_any<V>(self, visitor: V) -> Result<V::Value>
47    where
48        V: de::Visitor<'de>,
49    {
50        let pair = self.pair.take().unwrap();
51        let span = pair.as_span();
52        let mut res = (move || match pair.as_rule() {
53            Rule::null => visitor.visit_unit(),
54            Rule::boolean => visitor.visit_bool(parse_bool(&pair)),
55            Rule::string | Rule::identifier => visitor.visit_string(parse_string(pair)?),
56            Rule::number => {
57                if is_int(pair.as_str()) {
58                    visitor.visit_i64(parse_integer(&pair)?)
59                } else {
60                    visitor.visit_f64(parse_number(&pair)?)
61                }
62            }
63            Rule::array => visitor.visit_seq(Seq::new(pair)),
64            Rule::object => visitor.visit_map(Map::new(pair)),
65            _ => unreachable!(),
66        })();
67        error::set_location(&mut res, &span);
68        res
69    }
70
71    fn deserialize_enum<V>(
72        self,
73        _name: &'static str,
74        _variants: &'static [&'static str],
75        visitor: V,
76    ) -> Result<V::Value>
77    where
78        V: de::Visitor<'de>,
79    {
80        let pair = self.pair.take().unwrap();
81        let span = pair.as_span();
82        let mut res = (move || visitor.visit_enum(Enum { pair }))();
83        error::set_location(&mut res, &span);
84        res
85    }
86
87    // The below will get us the right types, but won't necessarily give
88    // meaningful results if the source is out of the range of the target type.
89    fn deserialize_i8<V>(self, visitor: V) -> Result<V::Value>
90    where
91        V: de::Visitor<'de>,
92    {
93        let pair = self.pair.take().unwrap();
94        let span = pair.as_span();
95        let mut res = (move || visitor.visit_i8(parse_number(&pair)? as i8))();
96        error::set_location(&mut res, &span);
97        res
98    }
99
100    fn deserialize_i16<V>(self, visitor: V) -> Result<V::Value>
101    where
102        V: de::Visitor<'de>,
103    {
104        let pair = self.pair.take().unwrap();
105        let span = pair.as_span();
106        let mut res = (move || visitor.visit_i16(parse_number(&pair)? as i16))();
107        error::set_location(&mut res, &span);
108        res
109    }
110
111    fn deserialize_i32<V>(self, visitor: V) -> Result<V::Value>
112    where
113        V: de::Visitor<'de>,
114    {
115        let pair = self.pair.take().unwrap();
116        let span = pair.as_span();
117        let mut res = (move || visitor.visit_i32(parse_number(&pair)? as i32))();
118        error::set_location(&mut res, &span);
119        res
120    }
121
122    fn deserialize_i64<V>(self, visitor: V) -> Result<V::Value>
123    where
124        V: de::Visitor<'de>,
125    {
126        let pair = self.pair.take().unwrap();
127        let span = pair.as_span();
128        let mut res = (move || visitor.visit_i64(parse_number(&pair)? as i64))();
129        error::set_location(&mut res, &span);
130        res
131    }
132
133    fn deserialize_i128<V>(self, visitor: V) -> Result<V::Value>
134    where
135        V: de::Visitor<'de>,
136    {
137        let pair = self.pair.take().unwrap();
138        let span = pair.as_span();
139        let mut res = (move || visitor.visit_i128(parse_number(&pair)? as i128))();
140        error::set_location(&mut res, &span);
141        res
142    }
143
144    fn deserialize_u8<V>(self, visitor: V) -> Result<V::Value>
145    where
146        V: de::Visitor<'de>,
147    {
148        let pair = self.pair.take().unwrap();
149        let span = pair.as_span();
150        let mut res = (move || visitor.visit_u8(parse_number(&pair)? as u8))();
151        error::set_location(&mut res, &span);
152        res
153    }
154
155    fn deserialize_u16<V>(self, visitor: V) -> Result<V::Value>
156    where
157        V: de::Visitor<'de>,
158    {
159        let pair = self.pair.take().unwrap();
160        let span = pair.as_span();
161        let mut res = (move || visitor.visit_u16(parse_number(&pair)? as u16))();
162        error::set_location(&mut res, &span);
163        res
164    }
165
166    fn deserialize_u32<V>(self, visitor: V) -> Result<V::Value>
167    where
168        V: de::Visitor<'de>,
169    {
170        let pair = self.pair.take().unwrap();
171        let span = pair.as_span();
172        let mut res = (move || visitor.visit_u32(parse_number(&pair)? as u32))();
173        error::set_location(&mut res, &span);
174        res
175    }
176
177    fn deserialize_u64<V>(self, visitor: V) -> Result<V::Value>
178    where
179        V: de::Visitor<'de>,
180    {
181        let pair = self.pair.take().unwrap();
182        let span = pair.as_span();
183        let mut res = (move || visitor.visit_u64(parse_number(&pair)? as u64))();
184        error::set_location(&mut res, &span);
185        res
186    }
187
188    fn deserialize_u128<V>(self, visitor: V) -> Result<V::Value>
189    where
190        V: de::Visitor<'de>,
191    {
192        let pair = self.pair.take().unwrap();
193        let span = pair.as_span();
194        let mut res = (move || visitor.visit_u128(parse_number(&pair)? as u128))();
195        error::set_location(&mut res, &span);
196        res
197    }
198
199    fn deserialize_f32<V>(self, visitor: V) -> Result<V::Value>
200    where
201        V: de::Visitor<'de>,
202    {
203        let pair = self.pair.take().unwrap();
204        let span = pair.as_span();
205        let mut res = (move || visitor.visit_f32(parse_number(&pair)? as f32))();
206        error::set_location(&mut res, &span);
207        res
208    }
209
210    fn deserialize_f64<V>(self, visitor: V) -> Result<V::Value>
211    where
212        V: de::Visitor<'de>,
213    {
214        let pair = self.pair.take().unwrap();
215        let span = pair.as_span();
216        let mut res = (move || visitor.visit_f64(parse_number(&pair)?))();
217        error::set_location(&mut res, &span);
218        res
219    }
220
221    fn deserialize_option<V>(self, visitor: V) -> Result<V::Value>
222    where
223        V: de::Visitor<'de>,
224    {
225        let pair = self.pair.take().unwrap();
226        let span = pair.as_span();
227        let mut res = (move || match pair.as_rule() {
228            Rule::null => visitor.visit_none(),
229            _ => visitor.visit_some(&mut Deserializer::from_pair(pair)),
230        })();
231        error::set_location(&mut res, &span);
232        res
233    }
234
235    fn deserialize_newtype_struct<V>(self, _name: &str, visitor: V) -> Result<V::Value>
236    where
237        V: de::Visitor<'de>,
238    {
239        let span = self.pair.as_ref().unwrap().as_span();
240        let mut res = (move || visitor.visit_newtype_struct(self))();
241        error::set_location(&mut res, &span);
242        res
243    }
244
245    forward_to_deserialize_any! {
246        bool char str string bytes byte_buf unit unit_struct seq
247        tuple tuple_struct map struct identifier ignored_any
248    }
249}
250
251fn parse_bool(pair: &Pair<'_, Rule>) -> bool {
252    match pair.as_str() {
253        "true" => true,
254        "false" => false,
255        _ => unreachable!(),
256    }
257}
258
259fn parse_string_component(pair: Pair<'_, Rule>) -> Result<String> {
260    let mut result = String::new();
261
262    let mut component_iter = pair.into_inner();
263    while let Some(component) = component_iter.next() {
264        match component.as_rule() {
265            Rule::char_literal => result.push_str(component.as_str()),
266            Rule::char_escape_sequence => result.push_str(parse_char_escape_sequence(&component)),
267            Rule::nul_escape_sequence => result.push_str("\u{0000}"),
268            Rule::hex_escape_sequence => {
269                let hex_escape = parse_hex(component.as_str())?;
270                match char::from_u32(hex_escape) {
271                    Some(c) => result.push(c),
272                    None => return Err(de::Error::custom("error parsing hex prefix")),
273                }
274            }
275            Rule::unicode_escape_sequence => {
276                match parse_hex(component.as_str())? {
277                    0xDC00..=0xDFFF => {
278                        // Expecting a low surrogate (trail surrogate)
279                        return Err(de::Error::custom("unexpected unicode trail surrogate"));
280                    }
281
282                    // Non-BMP characters are encoded as a sequence of to hex escapes,
283                    // representing UTF-16 surrogate
284                    rc1 @ 0xD800..=0xDBFF => {
285                        let rc2 = match component_iter.next() {
286                            Some(pc2) => match parse_hex(pc2.as_str())? {
287                                rc2 @ 0xDC00..=0xDFFF => rc2,
288                                _ => {
289                                    return Err(de::Error::custom(
290                                        "expecting unicode trail surrogate",
291                                    ))
292                                }
293                            },
294                            None => {
295                                // Missing a low surrogate (trail surrogate)
296                                return Err(de::Error::custom("missing unicode trail surrogate"));
297                            }
298                        };
299
300                        // Join together
301                        let rc = ((rc1 - 0xD800) << 10) | (rc2 - 0xDC00) + 0x1_0000;
302                        match char::from_u32(rc) {
303                            Some(c) => {
304                                result.push(c);
305                            }
306                            None => {
307                                return Err(de::Error::custom("invalid non-BMP unicode sequence"));
308                            }
309                        }
310                    }
311
312                    rc => match char::from_u32(rc) {
313                        Some(c) => {
314                            result.push(c);
315                        }
316                        None => {
317                            return Err(de::Error::custom("invalid unicode character"));
318                        }
319                    },
320                }
321            }
322
323            _ => unreachable!(),
324        }
325    }
326
327    Ok(result)
328}
329
330fn parse_string(pair: Pair<'_, Rule>) -> Result<String> {
331    let span = pair.as_span();
332    let mut res = parse_string_component(pair);
333    error::set_location(&mut res, &span);
334    res
335}
336
337fn parse_char_escape_sequence<'a>(pair: &'a Pair<'_, Rule>) -> &'a str {
338    match pair.as_str() {
339        "b" => "\u{0008}",
340        "f" => "\u{000C}",
341        "n" => "\n",
342        "r" => "\r",
343        "t" => "\t",
344        "v" => "\u{000B}",
345        c => c,
346    }
347}
348
349fn parse_number(pair: &Pair<'_, Rule>) -> Result<f64> {
350    match pair.as_str() {
351        "Infinity" => Ok(f64::INFINITY),
352        "-Infinity" => Ok(f64::NEG_INFINITY),
353        "NaN" | "-NaN" => Ok(f64::NAN),
354        s if is_hex_literal(s) => parse_hex(&s[2..]).map(f64::from),
355        s => {
356            if let Ok(r) = s.parse::<f64>() {
357                if r.is_finite() {
358                    Ok(r)
359                } else {
360                    Err(de::Error::custom("error parsing number: too large"))
361                }
362            } else {
363                Err(de::Error::custom("error parsing number"))
364            }
365        }
366    }
367}
368
369fn parse_integer(pair: &Pair<'_, Rule>) -> Result<i64> {
370    match pair.as_str() {
371        s if is_hex_literal(s) => Ok(parse_hex(&s[2..])? as i64),
372        s => s
373            .parse()
374            .or_else(|_| Err(de::Error::custom("error parsing integer"))),
375    }
376}
377
378fn is_int(s: &str) -> bool {
379    !s.contains('.')
380        && (is_hex_literal(s) || (!s.contains('e') && !s.contains('E')))
381        && !is_infinite(s)
382        && !is_nan(s)
383}
384
385fn parse_hex(s: &str) -> Result<u32> {
386    u32::from_str_radix(s, 16).or_else(|_| Err(de::Error::custom("error parsing hex")))
387}
388
389fn is_hex_literal(s: &str) -> bool {
390    s.len() > 2 && (&s[..2] == "0x" || &s[..2] == "0X")
391}
392
393fn is_infinite(s: &str) -> bool {
394    s == "Infinity" || s == "-Infinity"
395}
396
397fn is_nan(s: &str) -> bool {
398    s == "NaN" || s == "-NaN"
399}
400
401struct Seq<'de> {
402    pairs: VecDeque<Pair<'de, Rule>>,
403}
404
405impl<'de> Seq<'de> {
406    pub fn new(pair: Pair<'de, Rule>) -> Self {
407        Self {
408            pairs: pair.into_inner().collect(),
409        }
410    }
411}
412
413impl<'de> de::SeqAccess<'de> for Seq<'de> {
414    type Error = Error;
415
416    fn size_hint(&self) -> Option<usize> {
417        Some(self.pairs.len())
418    }
419
420    fn next_element_seed<T>(&mut self, seed: T) -> Result<Option<T::Value>>
421    where
422        T: de::DeserializeSeed<'de>,
423    {
424        if let Some(pair) = self.pairs.pop_front() {
425            seed.deserialize(&mut Deserializer::from_pair(pair))
426                .map(Some)
427        } else {
428            Ok(None)
429        }
430    }
431}
432
433struct Map<'de> {
434    pairs: VecDeque<Pair<'de, Rule>>,
435}
436
437impl<'de> Map<'de> {
438    pub fn new(pair: Pair<'de, Rule>) -> Self {
439        Self {
440            pairs: pair.into_inner().collect(),
441        }
442    }
443}
444
445impl<'de> de::MapAccess<'de> for Map<'de> {
446    type Error = Error;
447
448    fn size_hint(&self) -> Option<usize> {
449        Some(self.pairs.len() / 2)
450    }
451
452    fn next_key_seed<K>(&mut self, seed: K) -> Result<Option<K::Value>>
453    where
454        K: de::DeserializeSeed<'de>,
455    {
456        if let Some(pair) = self.pairs.pop_front() {
457            seed.deserialize(&mut Deserializer::from_pair(pair))
458                .map(Some)
459        } else {
460            Ok(None)
461        }
462    }
463
464    fn next_value_seed<V>(&mut self, seed: V) -> Result<V::Value>
465    where
466        V: de::DeserializeSeed<'de>,
467    {
468        seed.deserialize(&mut Deserializer::from_pair(
469            self.pairs.pop_front().unwrap(),
470        ))
471    }
472}
473
474struct Enum<'de> {
475    pair: Pair<'de, Rule>,
476}
477
478impl<'de> de::EnumAccess<'de> for Enum<'de> {
479    type Error = Error;
480    type Variant = Variant<'de>;
481
482    fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self::Variant)>
483    where
484        V: de::DeserializeSeed<'de>,
485    {
486        let span = self.pair.as_span();
487        let mut res = (move || match self.pair.as_rule() {
488            Rule::string => {
489                let tag = seed.deserialize(&mut Deserializer::from_pair(self.pair))?;
490                Ok((tag, Variant { pair: None }))
491            }
492            Rule::object => {
493                let mut pairs = self.pair.into_inner();
494
495                if let Some(tag_pair) = pairs.next() {
496                    let tag = seed.deserialize(&mut Deserializer::from_pair(tag_pair))?;
497                    Ok((tag, Variant { pair: pairs.next() }))
498                } else {
499                    Err(de::Error::custom("expected a nonempty object"))
500                }
501            }
502            _ => Err(de::Error::custom("expected a string or an object")),
503        })();
504        error::set_location(&mut res, &span);
505        res
506    }
507}
508
509struct Variant<'de> {
510    pair: Option<Pair<'de, Rule>>,
511}
512
513impl<'de, 'a> de::VariantAccess<'de> for Variant<'de> {
514    type Error = Error;
515
516    fn unit_variant(self) -> Result<()> {
517        Ok(())
518    }
519
520    fn newtype_variant_seed<T>(self, seed: T) -> Result<T::Value>
521    where
522        T: de::DeserializeSeed<'de>,
523    {
524        seed.deserialize(&mut Deserializer::from_pair(self.pair.unwrap()))
525    }
526
527    fn tuple_variant<V>(self, _len: usize, visitor: V) -> Result<V::Value>
528    where
529        V: de::Visitor<'de>,
530    {
531        match self.pair {
532            Some(pair) => match pair.as_rule() {
533                Rule::array => visitor.visit_seq(Seq::new(pair)),
534                _ => Err(de::Error::custom("expected an array")),
535            },
536            None => Err(de::Error::custom("expected an array")),
537        }
538    }
539
540    fn struct_variant<V>(self, _fields: &'static [&'static str], visitor: V) -> Result<V::Value>
541    where
542        V: de::Visitor<'de>,
543    {
544        match self.pair {
545            Some(pair) => match pair.as_rule() {
546                Rule::object => visitor.visit_map(Map::new(pair)),
547                _ => Err(de::Error::custom("expected an object")),
548            },
549            None => Err(de::Error::custom("expected an object")),
550        }
551    }
552}