1use crate::error::{Error, ErrorCode, Result};
2use alloc::vec::Vec;
3use core::cmp;
4use core::mem;
5use core::ops::Deref;
6use core::str;
7
8#[cfg(feature = "std")]
9use crate::io;
10#[cfg(feature = "std")]
11use crate::iter::LineColIterator;
12
13#[cfg(feature = "raw_value")]
14use crate::raw::BorrowedRawDeserializer;
15#[cfg(all(feature = "raw_value", feature = "std"))]
16use crate::raw::OwnedRawDeserializer;
17#[cfg(all(feature = "raw_value", feature = "std"))]
18use alloc::string::String;
19#[cfg(feature = "raw_value")]
20use serde::de::Visitor;
21
22pub trait Read<'de>: private::Sealed {
29 #[doc(hidden)]
30 fn next(&mut self) -> Result<Option<u8>>;
31 #[doc(hidden)]
32 fn peek(&mut self) -> Result<Option<u8>>;
33
34 #[doc(hidden)]
36 fn discard(&mut self);
37
38 #[doc(hidden)]
46 fn position(&self) -> Position;
47
48 #[doc(hidden)]
56 fn peek_position(&self) -> Position;
57
58 #[doc(hidden)]
61 fn byte_offset(&self) -> usize;
62
63 #[doc(hidden)]
67 fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>>;
68
69 #[doc(hidden)]
76 fn parse_str_raw<'s>(
77 &'s mut self,
78 scratch: &'s mut Vec<u8>,
79 ) -> Result<Reference<'de, 's, [u8]>>;
80
81 #[doc(hidden)]
84 fn ignore_str(&mut self) -> Result<()>;
85
86 #[doc(hidden)]
89 fn decode_hex_escape(&mut self) -> Result<u16>;
90
91 #[cfg(feature = "raw_value")]
95 #[doc(hidden)]
96 fn begin_raw_buffering(&mut self);
97
98 #[cfg(feature = "raw_value")]
101 #[doc(hidden)]
102 fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
103 where
104 V: Visitor<'de>;
105
106 #[doc(hidden)]
111 const should_early_return_if_failed: bool;
112
113 #[doc(hidden)]
116 fn set_failed(&mut self, failed: &mut bool);
117}
118
119pub struct Position {
120 pub line: usize,
121 pub column: usize,
122}
123
124pub enum Reference<'b, 'c, T>
125where
126 T: ?Sized + 'static,
127{
128 Borrowed(&'b T),
129 Copied(&'c T),
130}
131
132impl<'b, 'c, T> Deref for Reference<'b, 'c, T>
133where
134 T: ?Sized + 'static,
135{
136 type Target = T;
137
138 fn deref(&self) -> &Self::Target {
139 match *self {
140 Reference::Borrowed(b) => b,
141 Reference::Copied(c) => c,
142 }
143 }
144}
145
146#[cfg(feature = "std")]
148#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
149pub struct IoRead<R>
150where
151 R: io::Read,
152{
153 iter: LineColIterator<io::Bytes<R>>,
154 ch: Option<u8>,
156 #[cfg(feature = "raw_value")]
157 raw_buffer: Option<Vec<u8>>,
158}
159
160pub struct SliceRead<'a> {
165 slice: &'a [u8],
166 index: usize,
168 #[cfg(feature = "raw_value")]
169 raw_buffering_start_index: usize,
170}
171
172pub struct StrRead<'a> {
176 delegate: SliceRead<'a>,
177 #[cfg(feature = "raw_value")]
178 data: &'a str,
179}
180
181mod private {
183 pub trait Sealed {}
184}
185
186#[cfg(feature = "std")]
189impl<R> IoRead<R>
190where
191 R: io::Read,
192{
193 pub fn new(reader: R) -> Self {
201 IoRead {
202 iter: LineColIterator::new(reader.bytes()),
203 ch: None,
204 #[cfg(feature = "raw_value")]
205 raw_buffer: None,
206 }
207 }
208}
209
210#[cfg(feature = "std")]
211impl<R> private::Sealed for IoRead<R> where R: io::Read {}
212
213#[cfg(feature = "std")]
214impl<R> IoRead<R>
215where
216 R: io::Read,
217{
218 fn parse_str_bytes<'s, T, F>(
219 &'s mut self,
220 scratch: &'s mut Vec<u8>,
221 validate: bool,
222 result: F,
223 ) -> Result<T>
224 where
225 T: 's,
226 F: FnOnce(&'s Self, &'s [u8]) -> Result<T>,
227 {
228 loop {
229 let ch = tri!(next_or_eof(self));
230 if !is_escape(ch, true) {
231 scratch.push(ch);
232 continue;
233 }
234 match ch {
235 b'"' => {
236 return result(self, scratch);
237 }
238 b'\\' => {
239 tri!(parse_escape(self, validate, scratch));
240 }
241 _ => {
242 if validate {
243 return error(self, ErrorCode::ControlCharacterWhileParsingString);
244 }
245 scratch.push(ch);
246 }
247 }
248 }
249 }
250}
251
252#[cfg(feature = "std")]
253impl<'de, R> Read<'de> for IoRead<R>
254where
255 R: io::Read,
256{
257 #[inline]
258 fn next(&mut self) -> Result<Option<u8>> {
259 match self.ch.take() {
260 Some(ch) => {
261 #[cfg(feature = "raw_value")]
262 {
263 if let Some(buf) = &mut self.raw_buffer {
264 buf.push(ch);
265 }
266 }
267 Ok(Some(ch))
268 }
269 None => match self.iter.next() {
270 Some(Err(err)) => Err(Error::io(err)),
271 Some(Ok(ch)) => {
272 #[cfg(feature = "raw_value")]
273 {
274 if let Some(buf) = &mut self.raw_buffer {
275 buf.push(ch);
276 }
277 }
278 Ok(Some(ch))
279 }
280 None => Ok(None),
281 },
282 }
283 }
284
285 #[inline]
286 fn peek(&mut self) -> Result<Option<u8>> {
287 match self.ch {
288 Some(ch) => Ok(Some(ch)),
289 None => match self.iter.next() {
290 Some(Err(err)) => Err(Error::io(err)),
291 Some(Ok(ch)) => {
292 self.ch = Some(ch);
293 Ok(self.ch)
294 }
295 None => Ok(None),
296 },
297 }
298 }
299
300 #[cfg(not(feature = "raw_value"))]
301 #[inline]
302 fn discard(&mut self) {
303 self.ch = None;
304 }
305
306 #[cfg(feature = "raw_value")]
307 fn discard(&mut self) {
308 if let Some(ch) = self.ch.take() {
309 if let Some(buf) = &mut self.raw_buffer {
310 buf.push(ch);
311 }
312 }
313 }
314
315 fn position(&self) -> Position {
316 Position {
317 line: self.iter.line(),
318 column: self.iter.col(),
319 }
320 }
321
322 fn peek_position(&self) -> Position {
323 self.position()
326 }
327
328 fn byte_offset(&self) -> usize {
329 match self.ch {
330 Some(_) => self.iter.byte_offset() - 1,
331 None => self.iter.byte_offset(),
332 }
333 }
334
335 fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>> {
336 self.parse_str_bytes(scratch, true, as_str)
337 .map(Reference::Copied)
338 }
339
340 fn parse_str_raw<'s>(
341 &'s mut self,
342 scratch: &'s mut Vec<u8>,
343 ) -> Result<Reference<'de, 's, [u8]>> {
344 self.parse_str_bytes(scratch, false, |_, bytes| Ok(bytes))
345 .map(Reference::Copied)
346 }
347
348 fn ignore_str(&mut self) -> Result<()> {
349 loop {
350 let ch = tri!(next_or_eof(self));
351 if !is_escape(ch, true) {
352 continue;
353 }
354 match ch {
355 b'"' => {
356 return Ok(());
357 }
358 b'\\' => {
359 tri!(ignore_escape(self));
360 }
361 _ => {
362 return error(self, ErrorCode::ControlCharacterWhileParsingString);
363 }
364 }
365 }
366 }
367
368 fn decode_hex_escape(&mut self) -> Result<u16> {
369 let a = tri!(next_or_eof(self));
370 let b = tri!(next_or_eof(self));
371 let c = tri!(next_or_eof(self));
372 let d = tri!(next_or_eof(self));
373 match decode_four_hex_digits(a, b, c, d) {
374 Some(val) => Ok(val),
375 None => error(self, ErrorCode::InvalidEscape),
376 }
377 }
378
379 #[cfg(feature = "raw_value")]
380 fn begin_raw_buffering(&mut self) {
381 self.raw_buffer = Some(Vec::new());
382 }
383
384 #[cfg(feature = "raw_value")]
385 fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
386 where
387 V: Visitor<'de>,
388 {
389 let raw = self.raw_buffer.take().unwrap();
390 let raw = match String::from_utf8(raw) {
391 Ok(raw) => raw,
392 Err(_) => return error(self, ErrorCode::InvalidUnicodeCodePoint),
393 };
394 visitor.visit_map(OwnedRawDeserializer {
395 raw_value: Some(raw),
396 })
397 }
398
399 const should_early_return_if_failed: bool = true;
400
401 #[inline]
402 #[cold]
403 fn set_failed(&mut self, failed: &mut bool) {
404 *failed = true;
405 }
406}
407
408impl<'a> SliceRead<'a> {
411 pub fn new(slice: &'a [u8]) -> Self {
413 SliceRead {
414 slice,
415 index: 0,
416 #[cfg(feature = "raw_value")]
417 raw_buffering_start_index: 0,
418 }
419 }
420
421 fn position_of_index(&self, i: usize) -> Position {
422 let start_of_line = match memchr::memrchr(b'\n', &self.slice[..i]) {
423 Some(position) => position + 1,
424 None => 0,
425 };
426 Position {
427 line: 1 + memchr::memchr_iter(b'\n', &self.slice[..start_of_line]).count(),
428 column: i - start_of_line,
429 }
430 }
431
432 fn skip_to_escape(&mut self, forbid_control_characters: bool) {
433 if self.index == self.slice.len()
435 || is_escape(self.slice[self.index], forbid_control_characters)
436 {
437 return;
438 }
439 self.index += 1;
440
441 let rest = &self.slice[self.index..];
442
443 if !forbid_control_characters {
444 self.index += memchr::memchr2(b'"', b'\\', rest).unwrap_or(rest.len());
445 return;
446 }
447
448 #[cfg(fast_arithmetic = "64")]
456 type Chunk = u64;
457 #[cfg(fast_arithmetic = "32")]
458 type Chunk = u32;
459
460 const STEP: usize = mem::size_of::<Chunk>();
461 const ONE_BYTES: Chunk = Chunk::MAX / 255; for chunk in rest.chunks_exact(STEP) {
464 let chars = Chunk::from_le_bytes(chunk.try_into().unwrap());
465 let contains_ctrl = chars.wrapping_sub(ONE_BYTES * 0x20) & !chars;
466 let chars_quote = chars ^ (ONE_BYTES * Chunk::from(b'"'));
467 let contains_quote = chars_quote.wrapping_sub(ONE_BYTES) & !chars_quote;
468 let chars_backslash = chars ^ (ONE_BYTES * Chunk::from(b'\\'));
469 let contains_backslash = chars_backslash.wrapping_sub(ONE_BYTES) & !chars_backslash;
470 let masked = (contains_ctrl | contains_quote | contains_backslash) & (ONE_BYTES << 7);
471 if masked != 0 {
472 self.index = unsafe { chunk.as_ptr().offset_from(self.slice.as_ptr()) } as usize
474 + masked.trailing_zeros() as usize / 8;
475 return;
476 }
477 }
478
479 self.index += rest.len() / STEP * STEP;
480 self.skip_to_escape_slow();
481 }
482
483 #[cold]
484 #[inline(never)]
485 fn skip_to_escape_slow(&mut self) {
486 while self.index < self.slice.len() && !is_escape(self.slice[self.index], true) {
487 self.index += 1;
488 }
489 }
490
491 fn parse_str_bytes<'s, T, F>(
495 &'s mut self,
496 scratch: &'s mut Vec<u8>,
497 validate: bool,
498 result: F,
499 ) -> Result<Reference<'a, 's, T>>
500 where
501 T: ?Sized + 's,
502 F: for<'f> FnOnce(&'s Self, &'f [u8]) -> Result<&'f T>,
503 {
504 let mut start = self.index;
506
507 loop {
508 self.skip_to_escape(validate);
509 if self.index == self.slice.len() {
510 return error(self, ErrorCode::EofWhileParsingString);
511 }
512 match self.slice[self.index] {
513 b'"' => {
514 if scratch.is_empty() {
515 let borrowed = &self.slice[start..self.index];
518 self.index += 1;
519 return result(self, borrowed).map(Reference::Borrowed);
520 } else {
521 scratch.extend_from_slice(&self.slice[start..self.index]);
522 self.index += 1;
523 return result(self, scratch).map(Reference::Copied);
524 }
525 }
526 b'\\' => {
527 scratch.extend_from_slice(&self.slice[start..self.index]);
528 self.index += 1;
529 tri!(parse_escape(self, validate, scratch));
530 start = self.index;
531 }
532 _ => {
533 self.index += 1;
534 return error(self, ErrorCode::ControlCharacterWhileParsingString);
535 }
536 }
537 }
538 }
539}
540
541impl<'a> private::Sealed for SliceRead<'a> {}
542
543impl<'a> Read<'a> for SliceRead<'a> {
544 #[inline]
545 fn next(&mut self) -> Result<Option<u8>> {
546 Ok(if self.index < self.slice.len() {
549 let ch = self.slice[self.index];
550 self.index += 1;
551 Some(ch)
552 } else {
553 None
554 })
555 }
556
557 #[inline]
558 fn peek(&mut self) -> Result<Option<u8>> {
559 Ok(if self.index < self.slice.len() {
562 Some(self.slice[self.index])
563 } else {
564 None
565 })
566 }
567
568 #[inline]
569 fn discard(&mut self) {
570 self.index += 1;
571 }
572
573 fn position(&self) -> Position {
574 self.position_of_index(self.index)
575 }
576
577 fn peek_position(&self) -> Position {
578 self.position_of_index(cmp::min(self.slice.len(), self.index + 1))
581 }
582
583 fn byte_offset(&self) -> usize {
584 self.index
585 }
586
587 fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'a, 's, str>> {
588 self.parse_str_bytes(scratch, true, as_str)
589 }
590
591 fn parse_str_raw<'s>(
592 &'s mut self,
593 scratch: &'s mut Vec<u8>,
594 ) -> Result<Reference<'a, 's, [u8]>> {
595 self.parse_str_bytes(scratch, false, |_, bytes| Ok(bytes))
596 }
597
598 fn ignore_str(&mut self) -> Result<()> {
599 loop {
600 self.skip_to_escape(true);
601 if self.index == self.slice.len() {
602 return error(self, ErrorCode::EofWhileParsingString);
603 }
604 match self.slice[self.index] {
605 b'"' => {
606 self.index += 1;
607 return Ok(());
608 }
609 b'\\' => {
610 self.index += 1;
611 tri!(ignore_escape(self));
612 }
613 _ => {
614 return error(self, ErrorCode::ControlCharacterWhileParsingString);
615 }
616 }
617 }
618 }
619
620 #[inline]
621 fn decode_hex_escape(&mut self) -> Result<u16> {
622 match self.slice[self.index..] {
623 [a, b, c, d, ..] => {
624 self.index += 4;
625 match decode_four_hex_digits(a, b, c, d) {
626 Some(val) => Ok(val),
627 None => error(self, ErrorCode::InvalidEscape),
628 }
629 }
630 _ => {
631 self.index = self.slice.len();
632 error(self, ErrorCode::EofWhileParsingString)
633 }
634 }
635 }
636
637 #[cfg(feature = "raw_value")]
638 fn begin_raw_buffering(&mut self) {
639 self.raw_buffering_start_index = self.index;
640 }
641
642 #[cfg(feature = "raw_value")]
643 fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
644 where
645 V: Visitor<'a>,
646 {
647 let raw = &self.slice[self.raw_buffering_start_index..self.index];
648 let raw = match str::from_utf8(raw) {
649 Ok(raw) => raw,
650 Err(_) => return error(self, ErrorCode::InvalidUnicodeCodePoint),
651 };
652 visitor.visit_map(BorrowedRawDeserializer {
653 raw_value: Some(raw),
654 })
655 }
656
657 const should_early_return_if_failed: bool = false;
658
659 #[inline]
660 #[cold]
661 fn set_failed(&mut self, _failed: &mut bool) {
662 self.slice = &self.slice[..self.index];
663 }
664}
665
666impl<'a> StrRead<'a> {
669 pub fn new(s: &'a str) -> Self {
671 StrRead {
672 delegate: SliceRead::new(s.as_bytes()),
673 #[cfg(feature = "raw_value")]
674 data: s,
675 }
676 }
677}
678
679impl<'a> private::Sealed for StrRead<'a> {}
680
681impl<'a> Read<'a> for StrRead<'a> {
682 #[inline]
683 fn next(&mut self) -> Result<Option<u8>> {
684 self.delegate.next()
685 }
686
687 #[inline]
688 fn peek(&mut self) -> Result<Option<u8>> {
689 self.delegate.peek()
690 }
691
692 #[inline]
693 fn discard(&mut self) {
694 self.delegate.discard();
695 }
696
697 fn position(&self) -> Position {
698 self.delegate.position()
699 }
700
701 fn peek_position(&self) -> Position {
702 self.delegate.peek_position()
703 }
704
705 fn byte_offset(&self) -> usize {
706 self.delegate.byte_offset()
707 }
708
709 fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'a, 's, str>> {
710 self.delegate.parse_str_bytes(scratch, true, |_, bytes| {
711 Ok(unsafe { str::from_utf8_unchecked(bytes) })
715 })
716 }
717
718 fn parse_str_raw<'s>(
719 &'s mut self,
720 scratch: &'s mut Vec<u8>,
721 ) -> Result<Reference<'a, 's, [u8]>> {
722 self.delegate.parse_str_raw(scratch)
723 }
724
725 fn ignore_str(&mut self) -> Result<()> {
726 self.delegate.ignore_str()
727 }
728
729 fn decode_hex_escape(&mut self) -> Result<u16> {
730 self.delegate.decode_hex_escape()
731 }
732
733 #[cfg(feature = "raw_value")]
734 fn begin_raw_buffering(&mut self) {
735 self.delegate.begin_raw_buffering();
736 }
737
738 #[cfg(feature = "raw_value")]
739 fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
740 where
741 V: Visitor<'a>,
742 {
743 let raw = &self.data[self.delegate.raw_buffering_start_index..self.delegate.index];
744 visitor.visit_map(BorrowedRawDeserializer {
745 raw_value: Some(raw),
746 })
747 }
748
749 const should_early_return_if_failed: bool = false;
750
751 #[inline]
752 #[cold]
753 fn set_failed(&mut self, failed: &mut bool) {
754 self.delegate.set_failed(failed);
755 }
756}
757
758impl<'de, R> private::Sealed for &mut R where R: Read<'de> {}
761
762impl<'de, R> Read<'de> for &mut R
763where
764 R: Read<'de>,
765{
766 fn next(&mut self) -> Result<Option<u8>> {
767 R::next(self)
768 }
769
770 fn peek(&mut self) -> Result<Option<u8>> {
771 R::peek(self)
772 }
773
774 fn discard(&mut self) {
775 R::discard(self);
776 }
777
778 fn position(&self) -> Position {
779 R::position(self)
780 }
781
782 fn peek_position(&self) -> Position {
783 R::peek_position(self)
784 }
785
786 fn byte_offset(&self) -> usize {
787 R::byte_offset(self)
788 }
789
790 fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>> {
791 R::parse_str(self, scratch)
792 }
793
794 fn parse_str_raw<'s>(
795 &'s mut self,
796 scratch: &'s mut Vec<u8>,
797 ) -> Result<Reference<'de, 's, [u8]>> {
798 R::parse_str_raw(self, scratch)
799 }
800
801 fn ignore_str(&mut self) -> Result<()> {
802 R::ignore_str(self)
803 }
804
805 fn decode_hex_escape(&mut self) -> Result<u16> {
806 R::decode_hex_escape(self)
807 }
808
809 #[cfg(feature = "raw_value")]
810 fn begin_raw_buffering(&mut self) {
811 R::begin_raw_buffering(self);
812 }
813
814 #[cfg(feature = "raw_value")]
815 fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
816 where
817 V: Visitor<'de>,
818 {
819 R::end_raw_buffering(self, visitor)
820 }
821
822 const should_early_return_if_failed: bool = R::should_early_return_if_failed;
823
824 fn set_failed(&mut self, failed: &mut bool) {
825 R::set_failed(self, failed);
826 }
827}
828
829pub trait Fused: private::Sealed {}
833impl<'a> Fused for SliceRead<'a> {}
834impl<'a> Fused for StrRead<'a> {}
835
836fn is_escape(ch: u8, including_control_characters: bool) -> bool {
837 ch == b'"' || ch == b'\\' || (including_control_characters && ch < 0x20)
838}
839
840fn next_or_eof<'de, R>(read: &mut R) -> Result<u8>
841where
842 R: ?Sized + Read<'de>,
843{
844 match tri!(read.next()) {
845 Some(b) => Ok(b),
846 None => error(read, ErrorCode::EofWhileParsingString),
847 }
848}
849
850fn peek_or_eof<'de, R>(read: &mut R) -> Result<u8>
851where
852 R: ?Sized + Read<'de>,
853{
854 match tri!(read.peek()) {
855 Some(b) => Ok(b),
856 None => error(read, ErrorCode::EofWhileParsingString),
857 }
858}
859
860fn error<'de, R, T>(read: &R, reason: ErrorCode) -> Result<T>
861where
862 R: ?Sized + Read<'de>,
863{
864 let position = read.position();
865 Err(Error::syntax(reason, position.line, position.column))
866}
867
868fn as_str<'de, 's, R: Read<'de>>(read: &R, slice: &'s [u8]) -> Result<&'s str> {
869 str::from_utf8(slice).or_else(|_| error(read, ErrorCode::InvalidUnicodeCodePoint))
870}
871
872fn parse_escape<'de, R: Read<'de>>(
875 read: &mut R,
876 validate: bool,
877 scratch: &mut Vec<u8>,
878) -> Result<()> {
879 let ch = tri!(next_or_eof(read));
880
881 match ch {
882 b'"' => scratch.push(b'"'),
883 b'\\' => scratch.push(b'\\'),
884 b'/' => scratch.push(b'/'),
885 b'b' => scratch.push(b'\x08'),
886 b'f' => scratch.push(b'\x0c'),
887 b'n' => scratch.push(b'\n'),
888 b'r' => scratch.push(b'\r'),
889 b't' => scratch.push(b'\t'),
890 b'u' => return parse_unicode_escape(read, validate, scratch),
891 _ => return error(read, ErrorCode::InvalidEscape),
892 }
893
894 Ok(())
895}
896
897#[cold]
900fn parse_unicode_escape<'de, R: Read<'de>>(
901 read: &mut R,
902 validate: bool,
903 scratch: &mut Vec<u8>,
904) -> Result<()> {
905 let mut n = tri!(read.decode_hex_escape());
906
907 if validate && n >= 0xDC00 && n <= 0xDFFF {
912 return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
914 }
915
916 loop {
917 if n < 0xD800 || n > 0xDBFF {
918 push_wtf8_codepoint(n as u32, scratch);
921 return Ok(());
922 }
923
924 let n1 = n;
926
927 if tri!(peek_or_eof(read)) == b'\\' {
928 read.discard();
929 } else {
930 return if validate {
931 read.discard();
932 error(read, ErrorCode::UnexpectedEndOfHexEscape)
933 } else {
934 push_wtf8_codepoint(n1 as u32, scratch);
935 Ok(())
936 };
937 }
938
939 if tri!(peek_or_eof(read)) == b'u' {
940 read.discard();
941 } else {
942 return if validate {
943 read.discard();
944 error(read, ErrorCode::UnexpectedEndOfHexEscape)
945 } else {
946 push_wtf8_codepoint(n1 as u32, scratch);
947 parse_escape(read, validate, scratch)
952 };
953 }
954
955 let n2 = tri!(read.decode_hex_escape());
956
957 if n2 < 0xDC00 || n2 > 0xDFFF {
958 if validate {
959 return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
960 }
961 push_wtf8_codepoint(n1 as u32, scratch);
962 n = n2;
964 continue;
965 }
966
967 let n = ((((n1 - 0xD800) as u32) << 10) | (n2 - 0xDC00) as u32) + 0x1_0000;
970 push_wtf8_codepoint(n, scratch);
971 return Ok(());
972 }
973}
974
975#[inline]
978fn push_wtf8_codepoint(n: u32, scratch: &mut Vec<u8>) {
979 if n < 0x80 {
980 scratch.push(n as u8);
981 return;
982 }
983
984 scratch.reserve(4);
985
986 unsafe {
992 let ptr = scratch.as_mut_ptr().add(scratch.len());
993
994 let encoded_len = match n {
995 0..=0x7F => unreachable!(),
996 0x80..=0x7FF => {
997 ptr.write(((n >> 6) & 0b0001_1111) as u8 | 0b1100_0000);
998 2
999 }
1000 0x800..=0xFFFF => {
1001 ptr.write(((n >> 12) & 0b0000_1111) as u8 | 0b1110_0000);
1002 ptr.add(1)
1003 .write(((n >> 6) & 0b0011_1111) as u8 | 0b1000_0000);
1004 3
1005 }
1006 0x1_0000..=0x10_FFFF => {
1007 ptr.write(((n >> 18) & 0b0000_0111) as u8 | 0b1111_0000);
1008 ptr.add(1)
1009 .write(((n >> 12) & 0b0011_1111) as u8 | 0b1000_0000);
1010 ptr.add(2)
1011 .write(((n >> 6) & 0b0011_1111) as u8 | 0b1000_0000);
1012 4
1013 }
1014 0x11_0000.. => unreachable!(),
1015 };
1016 ptr.add(encoded_len - 1)
1017 .write((n & 0b0011_1111) as u8 | 0b1000_0000);
1018
1019 scratch.set_len(scratch.len() + encoded_len);
1020 }
1021}
1022
1023fn ignore_escape<'de, R>(read: &mut R) -> Result<()>
1026where
1027 R: ?Sized + Read<'de>,
1028{
1029 let ch = tri!(next_or_eof(read));
1030
1031 match ch {
1032 b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't' => {}
1033 b'u' => {
1034 tri!(read.decode_hex_escape());
1041 }
1042 _ => {
1043 return error(read, ErrorCode::InvalidEscape);
1044 }
1045 }
1046
1047 Ok(())
1048}
1049
1050const fn decode_hex_val_slow(val: u8) -> Option<u8> {
1051 match val {
1052 b'0'..=b'9' => Some(val - b'0'),
1053 b'A'..=b'F' => Some(val - b'A' + 10),
1054 b'a'..=b'f' => Some(val - b'a' + 10),
1055 _ => None,
1056 }
1057}
1058
1059const fn build_hex_table(shift: usize) -> [i16; 256] {
1060 let mut table = [0; 256];
1061 let mut ch = 0;
1062 while ch < 256 {
1063 table[ch] = match decode_hex_val_slow(ch as u8) {
1064 Some(val) => (val as i16) << shift,
1065 None => -1,
1066 };
1067 ch += 1;
1068 }
1069 table
1070}
1071
1072static HEX0: [i16; 256] = build_hex_table(0);
1073static HEX1: [i16; 256] = build_hex_table(4);
1074
1075fn decode_four_hex_digits(a: u8, b: u8, c: u8, d: u8) -> Option<u16> {
1076 let a = HEX1[a as usize] as i32;
1077 let b = HEX0[b as usize] as i32;
1078 let c = HEX1[c as usize] as i32;
1079 let d = HEX0[d as usize] as i32;
1080
1081 let codepoint = ((a | b) << 8) | c | d;
1082
1083 if codepoint >= 0 {
1085 Some(codepoint as u16)
1086 } else {
1087 None
1088 }
1089}