toml_edit/parser/
trivia.rs

1use std::ops::RangeInclusive;
2
3use winnow::combinator::alt;
4use winnow::combinator::empty;
5use winnow::combinator::eof;
6use winnow::combinator::fail;
7use winnow::combinator::opt;
8use winnow::combinator::peek;
9use winnow::combinator::repeat;
10use winnow::combinator::terminated;
11use winnow::prelude::*;
12use winnow::token::any;
13use winnow::token::one_of;
14use winnow::token::take_while;
15
16use crate::parser::prelude::*;
17
18pub(crate) unsafe fn from_utf8_unchecked<'b>(
19    bytes: &'b [u8],
20    safety_justification: &'static str,
21) -> &'b str {
22    unsafe {
23        if cfg!(debug_assertions) {
24            // Catch problems more quickly when testing
25            std::str::from_utf8(bytes).expect(safety_justification)
26        } else {
27            std::str::from_utf8_unchecked(bytes)
28        }
29    }
30}
31
32// wschar = ( %x20 /              ; Space
33//            %x09 )              ; Horizontal tab
34pub(crate) const WSCHAR: (u8, u8) = (b' ', b'\t');
35
36// ws = *wschar
37pub(crate) fn ws<'i>(input: &mut Input<'i>) -> ModalResult<&'i str> {
38    take_while(0.., WSCHAR)
39        .map(|b| unsafe { from_utf8_unchecked(b, "`is_wschar` filters out on-ASCII") })
40        .parse_next(input)
41}
42
43// non-ascii = %x80-D7FF / %xE000-10FFFF
44// - ASCII is 0xxxxxxx
45// - First byte for UTF-8 is 11xxxxxx
46// - Subsequent UTF-8 bytes are 10xxxxxx
47pub(crate) const NON_ASCII: RangeInclusive<u8> = 0x80..=0xff;
48
49// non-eol = %x09 / %x20-7E / non-ascii
50pub(crate) const NON_EOL: (u8, RangeInclusive<u8>, RangeInclusive<u8>) =
51    (0x09, 0x20..=0x7E, NON_ASCII);
52
53// comment-start-symbol = %x23 ; #
54pub(crate) const COMMENT_START_SYMBOL: u8 = b'#';
55
56// comment = comment-start-symbol *non-eol
57pub(crate) fn comment(input: &mut Input<'_>) -> ModalResult<()> {
58    (COMMENT_START_SYMBOL, take_while(0.., NON_EOL))
59        .void()
60        .parse_next(input)
61}
62
63// newline = ( %x0A /              ; LF
64//             %x0D.0A )           ; CRLF
65pub(crate) fn newline(input: &mut Input<'_>) -> ModalResult<()> {
66    dispatch! {any;
67        b'\n' => empty,
68        b'\r' => one_of(LF).void(),
69        _ => fail,
70    }
71    .parse_next(input)
72}
73pub(crate) const LF: u8 = b'\n';
74pub(crate) const CR: u8 = b'\r';
75
76// ws-newline       = *( wschar / newline )
77pub(crate) fn ws_newline(input: &mut Input<'_>) -> ModalResult<()> {
78    repeat(
79        0..,
80        alt((newline.value(&b"\n"[..]), take_while(1.., WSCHAR))),
81    )
82    .map(|()| ())
83    .parse_next(input)
84}
85
86// ws-newlines      = newline *( wschar / newline )
87pub(crate) fn ws_newlines(input: &mut Input<'_>) -> ModalResult<()> {
88    (newline, ws_newline).void().parse_next(input)
89}
90
91// note: this rule is not present in the original grammar
92// ws-comment-newline = *( ws-newline-nonempty / comment )
93pub(crate) fn ws_comment_newline(input: &mut Input<'_>) -> ModalResult<()> {
94    let mut start = input.checkpoint();
95    loop {
96        let _ = ws.parse_next(input)?;
97
98        let next_token = opt(peek(any)).parse_next(input)?;
99        match next_token {
100            Some(b'#') => (comment, newline).void().parse_next(input)?,
101            Some(b'\n') => (newline).void().parse_next(input)?,
102            Some(b'\r') => (newline).void().parse_next(input)?,
103            _ => break,
104        }
105
106        let end = input.checkpoint();
107        if start == end {
108            break;
109        }
110        start = end;
111    }
112
113    Ok(())
114}
115
116// note: this rule is not present in the original grammar
117// line-ending = newline / eof
118pub(crate) fn line_ending(input: &mut Input<'_>) -> ModalResult<()> {
119    alt((newline.value("\n"), eof.value("")))
120        .void()
121        .parse_next(input)
122}
123
124// note: this rule is not present in the original grammar
125// line-trailing = ws [comment] skip-line-ending
126pub(crate) fn line_trailing(input: &mut Input<'_>) -> ModalResult<std::ops::Range<usize>> {
127    terminated((ws, opt(comment)).span(), line_ending).parse_next(input)
128}
129
130#[cfg(test)]
131#[cfg(feature = "parse")]
132#[cfg(feature = "display")]
133mod test {
134    use super::*;
135
136    #[test]
137    fn trivia() {
138        let inputs = [
139            "",
140            r#" "#,
141            r#"
142"#,
143            r#"
144# comment
145
146# comment2
147
148
149"#,
150            r#"
151        "#,
152            r#"# comment
153# comment2
154
155
156   "#,
157        ];
158        for input in inputs {
159            dbg!(input);
160            let parsed = ws_comment_newline.take().parse(new_input(input));
161            assert!(parsed.is_ok(), "{parsed:?}");
162            let parsed = parsed.unwrap();
163            assert_eq!(parsed, input.as_bytes());
164        }
165    }
166}