serde_json/
read.rs

1use crate::error::{Error, ErrorCode, Result};
2use alloc::vec::Vec;
3use core::cmp;
4use core::mem;
5use core::ops::Deref;
6use core::str;
7
8#[cfg(feature = "std")]
9use crate::io;
10#[cfg(feature = "std")]
11use crate::iter::LineColIterator;
12
13#[cfg(feature = "raw_value")]
14use crate::raw::BorrowedRawDeserializer;
15#[cfg(all(feature = "raw_value", feature = "std"))]
16use crate::raw::OwnedRawDeserializer;
17#[cfg(all(feature = "raw_value", feature = "std"))]
18use alloc::string::String;
19#[cfg(feature = "raw_value")]
20use serde::de::Visitor;
21
22/// Trait used by the deserializer for iterating over input. This is manually
23/// "specialized" for iterating over `&[u8]`. Once feature(specialization) is
24/// stable we can use actual specialization.
25///
26/// This trait is sealed and cannot be implemented for types outside of
27/// `serde_json`.
28pub trait Read<'de>: private::Sealed {
29    #[doc(hidden)]
30    fn next(&mut self) -> Result<Option<u8>>;
31    #[doc(hidden)]
32    fn peek(&mut self) -> Result<Option<u8>>;
33
34    /// Only valid after a call to peek(). Discards the peeked byte.
35    #[doc(hidden)]
36    fn discard(&mut self);
37
38    /// Position of the most recent call to next().
39    ///
40    /// The most recent call was probably next() and not peek(), but this method
41    /// should try to return a sensible result if the most recent call was
42    /// actually peek() because we don't always know.
43    ///
44    /// Only called in case of an error, so performance is not important.
45    #[doc(hidden)]
46    fn position(&self) -> Position;
47
48    /// Position of the most recent call to peek().
49    ///
50    /// The most recent call was probably peek() and not next(), but this method
51    /// should try to return a sensible result if the most recent call was
52    /// actually next() because we don't always know.
53    ///
54    /// Only called in case of an error, so performance is not important.
55    #[doc(hidden)]
56    fn peek_position(&self) -> Position;
57
58    /// Offset from the beginning of the input to the next byte that would be
59    /// returned by next() or peek().
60    #[doc(hidden)]
61    fn byte_offset(&self) -> usize;
62
63    /// Assumes the previous byte was a quotation mark. Parses a JSON-escaped
64    /// string until the next quotation mark using the given scratch space if
65    /// necessary. The scratch space is initially empty.
66    #[doc(hidden)]
67    fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>>;
68
69    /// Assumes the previous byte was a quotation mark. Parses a JSON-escaped
70    /// string until the next quotation mark using the given scratch space if
71    /// necessary. The scratch space is initially empty.
72    ///
73    /// This function returns the raw bytes in the string with escape sequences
74    /// expanded but without performing unicode validation.
75    #[doc(hidden)]
76    fn parse_str_raw<'s>(
77        &'s mut self,
78        scratch: &'s mut Vec<u8>,
79    ) -> Result<Reference<'de, 's, [u8]>>;
80
81    /// Assumes the previous byte was a quotation mark. Parses a JSON-escaped
82    /// string until the next quotation mark but discards the data.
83    #[doc(hidden)]
84    fn ignore_str(&mut self) -> Result<()>;
85
86    /// Assumes the previous byte was a hex escape sequence ('\u') in a string.
87    /// Parses next hexadecimal sequence.
88    #[doc(hidden)]
89    fn decode_hex_escape(&mut self) -> Result<u16>;
90
91    /// Switch raw buffering mode on.
92    ///
93    /// This is used when deserializing `RawValue`.
94    #[cfg(feature = "raw_value")]
95    #[doc(hidden)]
96    fn begin_raw_buffering(&mut self);
97
98    /// Switch raw buffering mode off and provides the raw buffered data to the
99    /// given visitor.
100    #[cfg(feature = "raw_value")]
101    #[doc(hidden)]
102    fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
103    where
104        V: Visitor<'de>;
105
106    /// Whether StreamDeserializer::next needs to check the failed flag. True
107    /// for IoRead, false for StrRead and SliceRead which can track failure by
108    /// truncating their input slice to avoid the extra check on every next
109    /// call.
110    #[doc(hidden)]
111    const should_early_return_if_failed: bool;
112
113    /// Mark a persistent failure of StreamDeserializer, either by setting the
114    /// flag or by truncating the input data.
115    #[doc(hidden)]
116    fn set_failed(&mut self, failed: &mut bool);
117}
118
119pub struct Position {
120    pub line: usize,
121    pub column: usize,
122}
123
124pub enum Reference<'b, 'c, T>
125where
126    T: ?Sized + 'static,
127{
128    Borrowed(&'b T),
129    Copied(&'c T),
130}
131
132impl<'b, 'c, T> Deref for Reference<'b, 'c, T>
133where
134    T: ?Sized + 'static,
135{
136    type Target = T;
137
138    fn deref(&self) -> &Self::Target {
139        match *self {
140            Reference::Borrowed(b) => b,
141            Reference::Copied(c) => c,
142        }
143    }
144}
145
146/// JSON input source that reads from a std::io input stream.
147#[cfg(feature = "std")]
148#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
149pub struct IoRead<R>
150where
151    R: io::Read,
152{
153    iter: LineColIterator<io::Bytes<R>>,
154    /// Temporary storage of peeked byte.
155    ch: Option<u8>,
156    #[cfg(feature = "raw_value")]
157    raw_buffer: Option<Vec<u8>>,
158}
159
160/// JSON input source that reads from a slice of bytes.
161//
162// This is more efficient than other iterators because peek() can be read-only
163// and we can compute line/col position only if an error happens.
164pub struct SliceRead<'a> {
165    slice: &'a [u8],
166    /// Index of the *next* byte that will be returned by next() or peek().
167    index: usize,
168    #[cfg(feature = "raw_value")]
169    raw_buffering_start_index: usize,
170}
171
172/// JSON input source that reads from a UTF-8 string.
173//
174// Able to elide UTF-8 checks by assuming that the input is valid UTF-8.
175pub struct StrRead<'a> {
176    delegate: SliceRead<'a>,
177    #[cfg(feature = "raw_value")]
178    data: &'a str,
179}
180
181// Prevent users from implementing the Read trait.
182mod private {
183    pub trait Sealed {}
184}
185
186//////////////////////////////////////////////////////////////////////////////
187
188#[cfg(feature = "std")]
189impl<R> IoRead<R>
190where
191    R: io::Read,
192{
193    /// Create a JSON input source to read from a std::io input stream.
194    ///
195    /// When reading from a source against which short reads are not efficient, such
196    /// as a [`File`], you will want to apply your own buffering because serde_json
197    /// will not buffer the input. See [`std::io::BufReader`].
198    ///
199    /// [`File`]: std::fs::File
200    pub fn new(reader: R) -> Self {
201        IoRead {
202            iter: LineColIterator::new(reader.bytes()),
203            ch: None,
204            #[cfg(feature = "raw_value")]
205            raw_buffer: None,
206        }
207    }
208}
209
210#[cfg(feature = "std")]
211impl<R> private::Sealed for IoRead<R> where R: io::Read {}
212
213#[cfg(feature = "std")]
214impl<R> IoRead<R>
215where
216    R: io::Read,
217{
218    fn parse_str_bytes<'s, T, F>(
219        &'s mut self,
220        scratch: &'s mut Vec<u8>,
221        validate: bool,
222        result: F,
223    ) -> Result<T>
224    where
225        T: 's,
226        F: FnOnce(&'s Self, &'s [u8]) -> Result<T>,
227    {
228        loop {
229            let ch = tri!(next_or_eof(self));
230            if !is_escape(ch, true) {
231                scratch.push(ch);
232                continue;
233            }
234            match ch {
235                b'"' => {
236                    return result(self, scratch);
237                }
238                b'\\' => {
239                    tri!(parse_escape(self, validate, scratch));
240                }
241                _ => {
242                    if validate {
243                        return error(self, ErrorCode::ControlCharacterWhileParsingString);
244                    }
245                    scratch.push(ch);
246                }
247            }
248        }
249    }
250}
251
252#[cfg(feature = "std")]
253impl<'de, R> Read<'de> for IoRead<R>
254where
255    R: io::Read,
256{
257    #[inline]
258    fn next(&mut self) -> Result<Option<u8>> {
259        match self.ch.take() {
260            Some(ch) => {
261                #[cfg(feature = "raw_value")]
262                {
263                    if let Some(buf) = &mut self.raw_buffer {
264                        buf.push(ch);
265                    }
266                }
267                Ok(Some(ch))
268            }
269            None => match self.iter.next() {
270                Some(Err(err)) => Err(Error::io(err)),
271                Some(Ok(ch)) => {
272                    #[cfg(feature = "raw_value")]
273                    {
274                        if let Some(buf) = &mut self.raw_buffer {
275                            buf.push(ch);
276                        }
277                    }
278                    Ok(Some(ch))
279                }
280                None => Ok(None),
281            },
282        }
283    }
284
285    #[inline]
286    fn peek(&mut self) -> Result<Option<u8>> {
287        match self.ch {
288            Some(ch) => Ok(Some(ch)),
289            None => match self.iter.next() {
290                Some(Err(err)) => Err(Error::io(err)),
291                Some(Ok(ch)) => {
292                    self.ch = Some(ch);
293                    Ok(self.ch)
294                }
295                None => Ok(None),
296            },
297        }
298    }
299
300    #[cfg(not(feature = "raw_value"))]
301    #[inline]
302    fn discard(&mut self) {
303        self.ch = None;
304    }
305
306    #[cfg(feature = "raw_value")]
307    fn discard(&mut self) {
308        if let Some(ch) = self.ch.take() {
309            if let Some(buf) = &mut self.raw_buffer {
310                buf.push(ch);
311            }
312        }
313    }
314
315    fn position(&self) -> Position {
316        Position {
317            line: self.iter.line(),
318            column: self.iter.col(),
319        }
320    }
321
322    fn peek_position(&self) -> Position {
323        // The LineColIterator updates its position during peek() so it has the
324        // right one here.
325        self.position()
326    }
327
328    fn byte_offset(&self) -> usize {
329        match self.ch {
330            Some(_) => self.iter.byte_offset() - 1,
331            None => self.iter.byte_offset(),
332        }
333    }
334
335    fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>> {
336        self.parse_str_bytes(scratch, true, as_str)
337            .map(Reference::Copied)
338    }
339
340    fn parse_str_raw<'s>(
341        &'s mut self,
342        scratch: &'s mut Vec<u8>,
343    ) -> Result<Reference<'de, 's, [u8]>> {
344        self.parse_str_bytes(scratch, false, |_, bytes| Ok(bytes))
345            .map(Reference::Copied)
346    }
347
348    fn ignore_str(&mut self) -> Result<()> {
349        loop {
350            let ch = tri!(next_or_eof(self));
351            if !is_escape(ch, true) {
352                continue;
353            }
354            match ch {
355                b'"' => {
356                    return Ok(());
357                }
358                b'\\' => {
359                    tri!(ignore_escape(self));
360                }
361                _ => {
362                    return error(self, ErrorCode::ControlCharacterWhileParsingString);
363                }
364            }
365        }
366    }
367
368    fn decode_hex_escape(&mut self) -> Result<u16> {
369        let a = tri!(next_or_eof(self));
370        let b = tri!(next_or_eof(self));
371        let c = tri!(next_or_eof(self));
372        let d = tri!(next_or_eof(self));
373        match decode_four_hex_digits(a, b, c, d) {
374            Some(val) => Ok(val),
375            None => error(self, ErrorCode::InvalidEscape),
376        }
377    }
378
379    #[cfg(feature = "raw_value")]
380    fn begin_raw_buffering(&mut self) {
381        self.raw_buffer = Some(Vec::new());
382    }
383
384    #[cfg(feature = "raw_value")]
385    fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
386    where
387        V: Visitor<'de>,
388    {
389        let raw = self.raw_buffer.take().unwrap();
390        let raw = match String::from_utf8(raw) {
391            Ok(raw) => raw,
392            Err(_) => return error(self, ErrorCode::InvalidUnicodeCodePoint),
393        };
394        visitor.visit_map(OwnedRawDeserializer {
395            raw_value: Some(raw),
396        })
397    }
398
399    const should_early_return_if_failed: bool = true;
400
401    #[inline]
402    #[cold]
403    fn set_failed(&mut self, failed: &mut bool) {
404        *failed = true;
405    }
406}
407
408//////////////////////////////////////////////////////////////////////////////
409
410impl<'a> SliceRead<'a> {
411    /// Create a JSON input source to read from a slice of bytes.
412    pub fn new(slice: &'a [u8]) -> Self {
413        SliceRead {
414            slice,
415            index: 0,
416            #[cfg(feature = "raw_value")]
417            raw_buffering_start_index: 0,
418        }
419    }
420
421    fn position_of_index(&self, i: usize) -> Position {
422        let start_of_line = match memchr::memrchr(b'\n', &self.slice[..i]) {
423            Some(position) => position + 1,
424            None => 0,
425        };
426        Position {
427            line: 1 + memchr::memchr_iter(b'\n', &self.slice[..start_of_line]).count(),
428            column: i - start_of_line,
429        }
430    }
431
432    fn skip_to_escape(&mut self, forbid_control_characters: bool) {
433        // Immediately bail-out on empty strings and consecutive escapes (e.g. \u041b\u0435)
434        if self.index == self.slice.len()
435            || is_escape(self.slice[self.index], forbid_control_characters)
436        {
437            return;
438        }
439        self.index += 1;
440
441        let rest = &self.slice[self.index..];
442
443        if !forbid_control_characters {
444            self.index += memchr::memchr2(b'"', b'\\', rest).unwrap_or(rest.len());
445            return;
446        }
447
448        // We wish to find the first byte in range 0x00..=0x1F or " or \. Ideally, we'd use
449        // something akin to memchr3, but the memchr crate does not support this at the moment.
450        // Therefore, we use a variation on Mycroft's algorithm [1] to provide performance better
451        // than a naive loop. It runs faster than equivalent two-pass memchr2+SWAR code on
452        // benchmarks and it's cross-platform, so probably the right fit.
453        // [1]: https://groups.google.com/forum/#!original/comp.lang.c/2HtQXvg7iKc/xOJeipH6KLMJ
454
455        #[cfg(fast_arithmetic = "64")]
456        type Chunk = u64;
457        #[cfg(fast_arithmetic = "32")]
458        type Chunk = u32;
459
460        const STEP: usize = mem::size_of::<Chunk>();
461        const ONE_BYTES: Chunk = Chunk::MAX / 255; // 0x0101...01
462
463        for chunk in rest.chunks_exact(STEP) {
464            let chars = Chunk::from_le_bytes(chunk.try_into().unwrap());
465            let contains_ctrl = chars.wrapping_sub(ONE_BYTES * 0x20) & !chars;
466            let chars_quote = chars ^ (ONE_BYTES * Chunk::from(b'"'));
467            let contains_quote = chars_quote.wrapping_sub(ONE_BYTES) & !chars_quote;
468            let chars_backslash = chars ^ (ONE_BYTES * Chunk::from(b'\\'));
469            let contains_backslash = chars_backslash.wrapping_sub(ONE_BYTES) & !chars_backslash;
470            let masked = (contains_ctrl | contains_quote | contains_backslash) & (ONE_BYTES << 7);
471            if masked != 0 {
472                // SAFETY: chunk is in-bounds for slice
473                self.index = unsafe { chunk.as_ptr().offset_from(self.slice.as_ptr()) } as usize
474                    + masked.trailing_zeros() as usize / 8;
475                return;
476            }
477        }
478
479        self.index += rest.len() / STEP * STEP;
480        self.skip_to_escape_slow();
481    }
482
483    #[cold]
484    #[inline(never)]
485    fn skip_to_escape_slow(&mut self) {
486        while self.index < self.slice.len() && !is_escape(self.slice[self.index], true) {
487            self.index += 1;
488        }
489    }
490
491    /// The big optimization here over IoRead is that if the string contains no
492    /// backslash escape sequences, the returned &str is a slice of the raw JSON
493    /// data so we avoid copying into the scratch space.
494    fn parse_str_bytes<'s, T, F>(
495        &'s mut self,
496        scratch: &'s mut Vec<u8>,
497        validate: bool,
498        result: F,
499    ) -> Result<Reference<'a, 's, T>>
500    where
501        T: ?Sized + 's,
502        F: for<'f> FnOnce(&'s Self, &'f [u8]) -> Result<&'f T>,
503    {
504        // Index of the first byte not yet copied into the scratch space.
505        let mut start = self.index;
506
507        loop {
508            self.skip_to_escape(validate);
509            if self.index == self.slice.len() {
510                return error(self, ErrorCode::EofWhileParsingString);
511            }
512            match self.slice[self.index] {
513                b'"' => {
514                    if scratch.is_empty() {
515                        // Fast path: return a slice of the raw JSON without any
516                        // copying.
517                        let borrowed = &self.slice[start..self.index];
518                        self.index += 1;
519                        return result(self, borrowed).map(Reference::Borrowed);
520                    } else {
521                        scratch.extend_from_slice(&self.slice[start..self.index]);
522                        self.index += 1;
523                        return result(self, scratch).map(Reference::Copied);
524                    }
525                }
526                b'\\' => {
527                    scratch.extend_from_slice(&self.slice[start..self.index]);
528                    self.index += 1;
529                    tri!(parse_escape(self, validate, scratch));
530                    start = self.index;
531                }
532                _ => {
533                    self.index += 1;
534                    return error(self, ErrorCode::ControlCharacterWhileParsingString);
535                }
536            }
537        }
538    }
539}
540
541impl<'a> private::Sealed for SliceRead<'a> {}
542
543impl<'a> Read<'a> for SliceRead<'a> {
544    #[inline]
545    fn next(&mut self) -> Result<Option<u8>> {
546        // `Ok(self.slice.get(self.index).map(|ch| { self.index += 1; *ch }))`
547        // is about 10% slower.
548        Ok(if self.index < self.slice.len() {
549            let ch = self.slice[self.index];
550            self.index += 1;
551            Some(ch)
552        } else {
553            None
554        })
555    }
556
557    #[inline]
558    fn peek(&mut self) -> Result<Option<u8>> {
559        // `Ok(self.slice.get(self.index).map(|ch| *ch))` is about 10% slower
560        // for some reason.
561        Ok(if self.index < self.slice.len() {
562            Some(self.slice[self.index])
563        } else {
564            None
565        })
566    }
567
568    #[inline]
569    fn discard(&mut self) {
570        self.index += 1;
571    }
572
573    fn position(&self) -> Position {
574        self.position_of_index(self.index)
575    }
576
577    fn peek_position(&self) -> Position {
578        // Cap it at slice.len() just in case the most recent call was next()
579        // and it returned the last byte.
580        self.position_of_index(cmp::min(self.slice.len(), self.index + 1))
581    }
582
583    fn byte_offset(&self) -> usize {
584        self.index
585    }
586
587    fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'a, 's, str>> {
588        self.parse_str_bytes(scratch, true, as_str)
589    }
590
591    fn parse_str_raw<'s>(
592        &'s mut self,
593        scratch: &'s mut Vec<u8>,
594    ) -> Result<Reference<'a, 's, [u8]>> {
595        self.parse_str_bytes(scratch, false, |_, bytes| Ok(bytes))
596    }
597
598    fn ignore_str(&mut self) -> Result<()> {
599        loop {
600            self.skip_to_escape(true);
601            if self.index == self.slice.len() {
602                return error(self, ErrorCode::EofWhileParsingString);
603            }
604            match self.slice[self.index] {
605                b'"' => {
606                    self.index += 1;
607                    return Ok(());
608                }
609                b'\\' => {
610                    self.index += 1;
611                    tri!(ignore_escape(self));
612                }
613                _ => {
614                    return error(self, ErrorCode::ControlCharacterWhileParsingString);
615                }
616            }
617        }
618    }
619
620    #[inline]
621    fn decode_hex_escape(&mut self) -> Result<u16> {
622        match self.slice[self.index..] {
623            [a, b, c, d, ..] => {
624                self.index += 4;
625                match decode_four_hex_digits(a, b, c, d) {
626                    Some(val) => Ok(val),
627                    None => error(self, ErrorCode::InvalidEscape),
628                }
629            }
630            _ => {
631                self.index = self.slice.len();
632                error(self, ErrorCode::EofWhileParsingString)
633            }
634        }
635    }
636
637    #[cfg(feature = "raw_value")]
638    fn begin_raw_buffering(&mut self) {
639        self.raw_buffering_start_index = self.index;
640    }
641
642    #[cfg(feature = "raw_value")]
643    fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
644    where
645        V: Visitor<'a>,
646    {
647        let raw = &self.slice[self.raw_buffering_start_index..self.index];
648        let raw = match str::from_utf8(raw) {
649            Ok(raw) => raw,
650            Err(_) => return error(self, ErrorCode::InvalidUnicodeCodePoint),
651        };
652        visitor.visit_map(BorrowedRawDeserializer {
653            raw_value: Some(raw),
654        })
655    }
656
657    const should_early_return_if_failed: bool = false;
658
659    #[inline]
660    #[cold]
661    fn set_failed(&mut self, _failed: &mut bool) {
662        self.slice = &self.slice[..self.index];
663    }
664}
665
666//////////////////////////////////////////////////////////////////////////////
667
668impl<'a> StrRead<'a> {
669    /// Create a JSON input source to read from a UTF-8 string.
670    pub fn new(s: &'a str) -> Self {
671        StrRead {
672            delegate: SliceRead::new(s.as_bytes()),
673            #[cfg(feature = "raw_value")]
674            data: s,
675        }
676    }
677}
678
679impl<'a> private::Sealed for StrRead<'a> {}
680
681impl<'a> Read<'a> for StrRead<'a> {
682    #[inline]
683    fn next(&mut self) -> Result<Option<u8>> {
684        self.delegate.next()
685    }
686
687    #[inline]
688    fn peek(&mut self) -> Result<Option<u8>> {
689        self.delegate.peek()
690    }
691
692    #[inline]
693    fn discard(&mut self) {
694        self.delegate.discard();
695    }
696
697    fn position(&self) -> Position {
698        self.delegate.position()
699    }
700
701    fn peek_position(&self) -> Position {
702        self.delegate.peek_position()
703    }
704
705    fn byte_offset(&self) -> usize {
706        self.delegate.byte_offset()
707    }
708
709    fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'a, 's, str>> {
710        self.delegate.parse_str_bytes(scratch, true, |_, bytes| {
711            // The deserialization input came in as &str with a UTF-8 guarantee,
712            // and the \u-escapes are checked along the way, so don't need to
713            // check here.
714            Ok(unsafe { str::from_utf8_unchecked(bytes) })
715        })
716    }
717
718    fn parse_str_raw<'s>(
719        &'s mut self,
720        scratch: &'s mut Vec<u8>,
721    ) -> Result<Reference<'a, 's, [u8]>> {
722        self.delegate.parse_str_raw(scratch)
723    }
724
725    fn ignore_str(&mut self) -> Result<()> {
726        self.delegate.ignore_str()
727    }
728
729    fn decode_hex_escape(&mut self) -> Result<u16> {
730        self.delegate.decode_hex_escape()
731    }
732
733    #[cfg(feature = "raw_value")]
734    fn begin_raw_buffering(&mut self) {
735        self.delegate.begin_raw_buffering();
736    }
737
738    #[cfg(feature = "raw_value")]
739    fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
740    where
741        V: Visitor<'a>,
742    {
743        let raw = &self.data[self.delegate.raw_buffering_start_index..self.delegate.index];
744        visitor.visit_map(BorrowedRawDeserializer {
745            raw_value: Some(raw),
746        })
747    }
748
749    const should_early_return_if_failed: bool = false;
750
751    #[inline]
752    #[cold]
753    fn set_failed(&mut self, failed: &mut bool) {
754        self.delegate.set_failed(failed);
755    }
756}
757
758//////////////////////////////////////////////////////////////////////////////
759
760impl<'de, R> private::Sealed for &mut R where R: Read<'de> {}
761
762impl<'de, R> Read<'de> for &mut R
763where
764    R: Read<'de>,
765{
766    fn next(&mut self) -> Result<Option<u8>> {
767        R::next(self)
768    }
769
770    fn peek(&mut self) -> Result<Option<u8>> {
771        R::peek(self)
772    }
773
774    fn discard(&mut self) {
775        R::discard(self);
776    }
777
778    fn position(&self) -> Position {
779        R::position(self)
780    }
781
782    fn peek_position(&self) -> Position {
783        R::peek_position(self)
784    }
785
786    fn byte_offset(&self) -> usize {
787        R::byte_offset(self)
788    }
789
790    fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>> {
791        R::parse_str(self, scratch)
792    }
793
794    fn parse_str_raw<'s>(
795        &'s mut self,
796        scratch: &'s mut Vec<u8>,
797    ) -> Result<Reference<'de, 's, [u8]>> {
798        R::parse_str_raw(self, scratch)
799    }
800
801    fn ignore_str(&mut self) -> Result<()> {
802        R::ignore_str(self)
803    }
804
805    fn decode_hex_escape(&mut self) -> Result<u16> {
806        R::decode_hex_escape(self)
807    }
808
809    #[cfg(feature = "raw_value")]
810    fn begin_raw_buffering(&mut self) {
811        R::begin_raw_buffering(self);
812    }
813
814    #[cfg(feature = "raw_value")]
815    fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
816    where
817        V: Visitor<'de>,
818    {
819        R::end_raw_buffering(self, visitor)
820    }
821
822    const should_early_return_if_failed: bool = R::should_early_return_if_failed;
823
824    fn set_failed(&mut self, failed: &mut bool) {
825        R::set_failed(self, failed);
826    }
827}
828
829//////////////////////////////////////////////////////////////////////////////
830
831/// Marker for whether StreamDeserializer can implement FusedIterator.
832pub trait Fused: private::Sealed {}
833impl<'a> Fused for SliceRead<'a> {}
834impl<'a> Fused for StrRead<'a> {}
835
836fn is_escape(ch: u8, including_control_characters: bool) -> bool {
837    ch == b'"' || ch == b'\\' || (including_control_characters && ch < 0x20)
838}
839
840fn next_or_eof<'de, R>(read: &mut R) -> Result<u8>
841where
842    R: ?Sized + Read<'de>,
843{
844    match tri!(read.next()) {
845        Some(b) => Ok(b),
846        None => error(read, ErrorCode::EofWhileParsingString),
847    }
848}
849
850fn peek_or_eof<'de, R>(read: &mut R) -> Result<u8>
851where
852    R: ?Sized + Read<'de>,
853{
854    match tri!(read.peek()) {
855        Some(b) => Ok(b),
856        None => error(read, ErrorCode::EofWhileParsingString),
857    }
858}
859
860fn error<'de, R, T>(read: &R, reason: ErrorCode) -> Result<T>
861where
862    R: ?Sized + Read<'de>,
863{
864    let position = read.position();
865    Err(Error::syntax(reason, position.line, position.column))
866}
867
868fn as_str<'de, 's, R: Read<'de>>(read: &R, slice: &'s [u8]) -> Result<&'s str> {
869    str::from_utf8(slice).or_else(|_| error(read, ErrorCode::InvalidUnicodeCodePoint))
870}
871
872/// Parses a JSON escape sequence and appends it into the scratch space. Assumes
873/// the previous byte read was a backslash.
874fn parse_escape<'de, R: Read<'de>>(
875    read: &mut R,
876    validate: bool,
877    scratch: &mut Vec<u8>,
878) -> Result<()> {
879    let ch = tri!(next_or_eof(read));
880
881    match ch {
882        b'"' => scratch.push(b'"'),
883        b'\\' => scratch.push(b'\\'),
884        b'/' => scratch.push(b'/'),
885        b'b' => scratch.push(b'\x08'),
886        b'f' => scratch.push(b'\x0c'),
887        b'n' => scratch.push(b'\n'),
888        b'r' => scratch.push(b'\r'),
889        b't' => scratch.push(b'\t'),
890        b'u' => return parse_unicode_escape(read, validate, scratch),
891        _ => return error(read, ErrorCode::InvalidEscape),
892    }
893
894    Ok(())
895}
896
897/// Parses a JSON \u escape and appends it into the scratch space. Assumes `\u`
898/// has just been read.
899#[cold]
900fn parse_unicode_escape<'de, R: Read<'de>>(
901    read: &mut R,
902    validate: bool,
903    scratch: &mut Vec<u8>,
904) -> Result<()> {
905    let mut n = tri!(read.decode_hex_escape());
906
907    // Non-BMP characters are encoded as a sequence of two hex escapes,
908    // representing UTF-16 surrogates. If deserializing a utf-8 string the
909    // surrogates are required to be paired, whereas deserializing a byte string
910    // accepts lone surrogates.
911    if validate && n >= 0xDC00 && n <= 0xDFFF {
912        // XXX: This is actually a trailing surrogate.
913        return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
914    }
915
916    loop {
917        if n < 0xD800 || n > 0xDBFF {
918            // Every u16 outside of the surrogate ranges is guaranteed to be a
919            // legal char.
920            push_wtf8_codepoint(n as u32, scratch);
921            return Ok(());
922        }
923
924        // n is a leading surrogate, we now expect a trailing surrogate.
925        let n1 = n;
926
927        if tri!(peek_or_eof(read)) == b'\\' {
928            read.discard();
929        } else {
930            return if validate {
931                read.discard();
932                error(read, ErrorCode::UnexpectedEndOfHexEscape)
933            } else {
934                push_wtf8_codepoint(n1 as u32, scratch);
935                Ok(())
936            };
937        }
938
939        if tri!(peek_or_eof(read)) == b'u' {
940            read.discard();
941        } else {
942            return if validate {
943                read.discard();
944                error(read, ErrorCode::UnexpectedEndOfHexEscape)
945            } else {
946                push_wtf8_codepoint(n1 as u32, scratch);
947                // The \ prior to this byte started an escape sequence, so we
948                // need to parse that now. This recursive call does not blow the
949                // stack on malicious input because the escape is not \u, so it
950                // will be handled by one of the easy nonrecursive cases.
951                parse_escape(read, validate, scratch)
952            };
953        }
954
955        let n2 = tri!(read.decode_hex_escape());
956
957        if n2 < 0xDC00 || n2 > 0xDFFF {
958            if validate {
959                return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
960            }
961            push_wtf8_codepoint(n1 as u32, scratch);
962            // If n2 is a leading surrogate, we need to restart.
963            n = n2;
964            continue;
965        }
966
967        // This value is in range U+10000..=U+10FFFF, which is always a valid
968        // codepoint.
969        let n = ((((n1 - 0xD800) as u32) << 10) | (n2 - 0xDC00) as u32) + 0x1_0000;
970        push_wtf8_codepoint(n, scratch);
971        return Ok(());
972    }
973}
974
975/// Adds a WTF-8 codepoint to the end of the buffer. This is a more efficient
976/// implementation of String::push. The codepoint may be a surrogate.
977#[inline]
978fn push_wtf8_codepoint(n: u32, scratch: &mut Vec<u8>) {
979    if n < 0x80 {
980        scratch.push(n as u8);
981        return;
982    }
983
984    scratch.reserve(4);
985
986    // SAFETY: After the `reserve` call, `scratch` has at least 4 bytes of
987    // allocated but unintialized memory after its last initialized byte, which
988    // is where `ptr` points. All reachable match arms write `encoded_len` bytes
989    // to that region and update the length accordingly, and `encoded_len` is
990    // always <= 4.
991    unsafe {
992        let ptr = scratch.as_mut_ptr().add(scratch.len());
993
994        let encoded_len = match n {
995            0..=0x7F => unreachable!(),
996            0x80..=0x7FF => {
997                ptr.write(((n >> 6) & 0b0001_1111) as u8 | 0b1100_0000);
998                2
999            }
1000            0x800..=0xFFFF => {
1001                ptr.write(((n >> 12) & 0b0000_1111) as u8 | 0b1110_0000);
1002                ptr.add(1)
1003                    .write(((n >> 6) & 0b0011_1111) as u8 | 0b1000_0000);
1004                3
1005            }
1006            0x1_0000..=0x10_FFFF => {
1007                ptr.write(((n >> 18) & 0b0000_0111) as u8 | 0b1111_0000);
1008                ptr.add(1)
1009                    .write(((n >> 12) & 0b0011_1111) as u8 | 0b1000_0000);
1010                ptr.add(2)
1011                    .write(((n >> 6) & 0b0011_1111) as u8 | 0b1000_0000);
1012                4
1013            }
1014            0x11_0000.. => unreachable!(),
1015        };
1016        ptr.add(encoded_len - 1)
1017            .write((n & 0b0011_1111) as u8 | 0b1000_0000);
1018
1019        scratch.set_len(scratch.len() + encoded_len);
1020    }
1021}
1022
1023/// Parses a JSON escape sequence and discards the value. Assumes the previous
1024/// byte read was a backslash.
1025fn ignore_escape<'de, R>(read: &mut R) -> Result<()>
1026where
1027    R: ?Sized + Read<'de>,
1028{
1029    let ch = tri!(next_or_eof(read));
1030
1031    match ch {
1032        b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't' => {}
1033        b'u' => {
1034            // At this point we don't care if the codepoint is valid. We just
1035            // want to consume it. We don't actually know what is valid or not
1036            // at this point, because that depends on if this string will
1037            // ultimately be parsed into a string or a byte buffer in the "real"
1038            // parse.
1039
1040            tri!(read.decode_hex_escape());
1041        }
1042        _ => {
1043            return error(read, ErrorCode::InvalidEscape);
1044        }
1045    }
1046
1047    Ok(())
1048}
1049
1050const fn decode_hex_val_slow(val: u8) -> Option<u8> {
1051    match val {
1052        b'0'..=b'9' => Some(val - b'0'),
1053        b'A'..=b'F' => Some(val - b'A' + 10),
1054        b'a'..=b'f' => Some(val - b'a' + 10),
1055        _ => None,
1056    }
1057}
1058
1059const fn build_hex_table(shift: usize) -> [i16; 256] {
1060    let mut table = [0; 256];
1061    let mut ch = 0;
1062    while ch < 256 {
1063        table[ch] = match decode_hex_val_slow(ch as u8) {
1064            Some(val) => (val as i16) << shift,
1065            None => -1,
1066        };
1067        ch += 1;
1068    }
1069    table
1070}
1071
1072static HEX0: [i16; 256] = build_hex_table(0);
1073static HEX1: [i16; 256] = build_hex_table(4);
1074
1075fn decode_four_hex_digits(a: u8, b: u8, c: u8, d: u8) -> Option<u16> {
1076    let a = HEX1[a as usize] as i32;
1077    let b = HEX0[b as usize] as i32;
1078    let c = HEX1[c as usize] as i32;
1079    let d = HEX0[d as usize] as i32;
1080
1081    let codepoint = ((a | b) << 8) | c | d;
1082
1083    // A single sign bit check.
1084    if codepoint >= 0 {
1085        Some(codepoint as u16)
1086    } else {
1087        None
1088    }
1089}
serde_json/read.rs

serde_json/
read.rs