1use std::borrow::Cow;
2use std::char;
3use std::ops::RangeInclusive;
4
5use winnow::combinator::alt;
6use winnow::combinator::cut_err;
7use winnow::combinator::delimited;
8use winnow::combinator::empty;
9use winnow::combinator::fail;
10use winnow::combinator::opt;
11use winnow::combinator::peek;
12use winnow::combinator::preceded;
13use winnow::combinator::repeat;
14use winnow::combinator::terminated;
15use winnow::combinator::trace;
16use winnow::prelude::*;
17use winnow::stream::Stream;
18use winnow::token::any;
19use winnow::token::none_of;
20use winnow::token::one_of;
21use winnow::token::take_while;
22
23use crate::parser::error::CustomError;
24use crate::parser::numbers::HEXDIG;
25use crate::parser::prelude::*;
26use crate::parser::trivia::{from_utf8_unchecked, newline, ws, ws_newlines, NON_ASCII, WSCHAR};
27
28pub(crate) fn string<'i>(input: &mut Input<'i>) -> ModalResult<Cow<'i, str>> {
32 trace(
33 "string",
34 alt((
35 ml_basic_string,
36 basic_string,
37 ml_literal_string,
38 literal_string.map(Cow::Borrowed),
39 )),
40 )
41 .parse_next(input)
42}
43
44pub(crate) fn basic_string<'i>(input: &mut Input<'i>) -> ModalResult<Cow<'i, str>> {
48 trace("basic-string", |input: &mut Input<'i>| {
49 let _ = one_of(QUOTATION_MARK).parse_next(input)?;
50
51 let mut c = Cow::Borrowed("");
52 if let Some(ci) = opt(basic_chars).parse_next(input)? {
53 c = ci;
54 }
55 while let Some(ci) = opt(basic_chars).parse_next(input)? {
56 c.to_mut().push_str(&ci);
57 }
58
59 let _ = cut_err(one_of(QUOTATION_MARK))
60 .context(StrContext::Label("basic string"))
61 .parse_next(input)?;
62
63 Ok(c)
64 })
65 .parse_next(input)
66}
67
68pub(crate) const QUOTATION_MARK: u8 = b'"';
70
71fn basic_chars<'i>(input: &mut Input<'i>) -> ModalResult<Cow<'i, str>> {
73 alt((
74 take_while(1.., BASIC_UNESCAPED)
77 .try_map(std::str::from_utf8)
78 .map(Cow::Borrowed),
79 escaped.map(|c| Cow::Owned(String::from(c))),
80 ))
81 .parse_next(input)
82}
83
84pub(crate) const BASIC_UNESCAPED: (
86 (u8, u8),
87 u8,
88 RangeInclusive<u8>,
89 RangeInclusive<u8>,
90 RangeInclusive<u8>,
91) = (WSCHAR, 0x21, 0x23..=0x5B, 0x5D..=0x7E, NON_ASCII);
92
93fn escaped(input: &mut Input<'_>) -> ModalResult<char> {
95 preceded(ESCAPE, escape_seq_char).parse_next(input)
96}
97
98pub(crate) const ESCAPE: u8 = b'\\';
100
101fn escape_seq_char(input: &mut Input<'_>) -> ModalResult<char> {
111 dispatch! {any;
112 b'b' => empty.value('\u{8}'),
113 b'f' => empty.value('\u{c}'),
114 b'n' => empty.value('\n'),
115 b'r' => empty.value('\r'),
116 b't' => empty.value('\t'),
117 b'u' => cut_err(hexescape::<4>).context(StrContext::Label("unicode 4-digit hex code")),
118 b'U' => cut_err(hexescape::<8>).context(StrContext::Label("unicode 8-digit hex code")),
119 b'\\' => empty.value('\\'),
120 b'"' => empty.value('"'),
121 _ => {
122 cut_err(fail::<_, char, _>)
123 .context(StrContext::Label("escape sequence"))
124 .context(StrContext::Expected(StrContextValue::CharLiteral('b')))
125 .context(StrContext::Expected(StrContextValue::CharLiteral('f')))
126 .context(StrContext::Expected(StrContextValue::CharLiteral('n')))
127 .context(StrContext::Expected(StrContextValue::CharLiteral('r')))
128 .context(StrContext::Expected(StrContextValue::CharLiteral('t')))
129 .context(StrContext::Expected(StrContextValue::CharLiteral('u')))
130 .context(StrContext::Expected(StrContextValue::CharLiteral('U')))
131 .context(StrContext::Expected(StrContextValue::CharLiteral('\\')))
132 .context(StrContext::Expected(StrContextValue::CharLiteral('"')))
133 }
134 }
135 .parse_next(input)
136}
137
138pub(crate) fn hexescape<const N: usize>(input: &mut Input<'_>) -> ModalResult<char> {
139 take_while(0..=N, HEXDIG)
140 .verify(|b: &[u8]| b.len() == N)
141 .map(|b: &[u8]| unsafe { from_utf8_unchecked(b, "`is_ascii_digit` filters out on-ASCII") })
142 .verify_map(|s| u32::from_str_radix(s, 16).ok())
143 .try_map(|h| char::from_u32(h).ok_or(CustomError::OutOfRange))
144 .parse_next(input)
145}
146
147fn ml_basic_string<'i>(input: &mut Input<'i>) -> ModalResult<Cow<'i, str>> {
152 trace(
153 "ml-basic-string",
154 delimited(
155 ML_BASIC_STRING_DELIM,
156 preceded(opt(newline), cut_err(ml_basic_body))
157 .context(StrContext::Label("multiline basic string")),
158 cut_err(ML_BASIC_STRING_DELIM).context(StrContext::Label("multiline basic string")),
159 ),
160 )
161 .parse_next(input)
162}
163
164pub(crate) const ML_BASIC_STRING_DELIM: &[u8] = b"\"\"\"";
166
167fn ml_basic_body<'i>(input: &mut Input<'i>) -> ModalResult<Cow<'i, str>> {
169 let mut c = Cow::Borrowed("");
170 if let Some(ci) = opt(mlb_content).parse_next(input)? {
171 c = ci;
172 }
173 while let Some(ci) = opt(mlb_content).parse_next(input)? {
174 c.to_mut().push_str(&ci);
175 }
176
177 while let Some(qi) = opt(mlb_quotes(none_of(b'\"').value(()))).parse_next(input)? {
178 if let Some(ci) = opt(mlb_content).parse_next(input)? {
179 c.to_mut().push_str(qi);
180 c.to_mut().push_str(&ci);
181 while let Some(ci) = opt(mlb_content).parse_next(input)? {
182 c.to_mut().push_str(&ci);
183 }
184 } else {
185 break;
186 }
187 }
188
189 if let Some(qi) = opt(mlb_quotes(ML_BASIC_STRING_DELIM.void())).parse_next(input)? {
190 c.to_mut().push_str(qi);
191 }
192
193 Ok(c)
194}
195
196fn mlb_content<'i>(input: &mut Input<'i>) -> ModalResult<Cow<'i, str>> {
199 alt((
200 take_while(1.., MLB_UNESCAPED)
203 .try_map(std::str::from_utf8)
204 .map(Cow::Borrowed),
205 mlb_escaped_nl.map(|_| Cow::Borrowed("")),
207 escaped.map(|c| Cow::Owned(String::from(c))),
208 newline.map(|_| Cow::Borrowed("\n")),
209 ))
210 .parse_next(input)
211}
212
213fn mlb_quotes<'i>(
215 mut term: impl ModalParser<Input<'i>, (), ContextError>,
216) -> impl ModalParser<Input<'i>, &'i str, ContextError> {
217 move |input: &mut Input<'i>| {
218 let start = input.checkpoint();
219 let res = terminated(b"\"\"", peek(term.by_ref()))
220 .map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") })
221 .parse_next(input);
222
223 match res {
224 Err(winnow::error::ErrMode::Backtrack(_)) => {
225 input.reset(&start);
226 terminated(b"\"", peek(term.by_ref()))
227 .map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") })
228 .parse_next(input)
229 }
230 res => res,
231 }
232 }
233}
234
235pub(crate) const MLB_UNESCAPED: (
237 (u8, u8),
238 u8,
239 RangeInclusive<u8>,
240 RangeInclusive<u8>,
241 RangeInclusive<u8>,
242) = (WSCHAR, 0x21, 0x23..=0x5B, 0x5D..=0x7E, NON_ASCII);
243
244fn mlb_escaped_nl(input: &mut Input<'_>) -> ModalResult<()> {
250 repeat(1.., (ESCAPE, ws, ws_newlines))
251 .map(|()| ())
252 .value(())
253 .parse_next(input)
254}
255
256pub(crate) fn literal_string<'i>(input: &mut Input<'i>) -> ModalResult<&'i str> {
260 trace(
261 "literal-string",
262 delimited(
263 APOSTROPHE,
264 cut_err(take_while(0.., LITERAL_CHAR)),
265 cut_err(APOSTROPHE),
266 )
267 .try_map(std::str::from_utf8)
268 .context(StrContext::Label("literal string")),
269 )
270 .parse_next(input)
271}
272
273pub(crate) const APOSTROPHE: u8 = b'\'';
275
276pub(crate) const LITERAL_CHAR: (
278 u8,
279 RangeInclusive<u8>,
280 RangeInclusive<u8>,
281 RangeInclusive<u8>,
282) = (0x9, 0x20..=0x26, 0x28..=0x7E, NON_ASCII);
283
284fn ml_literal_string<'i>(input: &mut Input<'i>) -> ModalResult<Cow<'i, str>> {
289 trace(
290 "ml-literal-string",
291 delimited(
292 (ML_LITERAL_STRING_DELIM, opt(newline)),
293 cut_err(ml_literal_body.map(|t| {
294 if t.contains("\r\n") {
295 Cow::Owned(t.replace("\r\n", "\n"))
296 } else {
297 Cow::Borrowed(t)
298 }
299 }))
300 .context(StrContext::Label("multiline literal string")),
301 cut_err(ML_LITERAL_STRING_DELIM).context(StrContext::Label("multiline literal string")),
302 ),
303 )
304 .parse_next(input)
305}
306
307pub(crate) const ML_LITERAL_STRING_DELIM: &[u8] = b"'''";
309
310fn ml_literal_body<'i>(input: &mut Input<'i>) -> ModalResult<&'i str> {
312 (
313 repeat(0.., mll_content).map(|()| ()),
314 repeat(
315 0..,
316 (
317 mll_quotes(none_of(APOSTROPHE).value(())),
318 repeat(1.., mll_content).map(|()| ()),
319 ),
320 )
321 .map(|()| ()),
322 opt(mll_quotes(ML_LITERAL_STRING_DELIM.void())),
323 )
324 .take()
325 .try_map(std::str::from_utf8)
326 .parse_next(input)
327}
328
329fn mll_content(input: &mut Input<'_>) -> ModalResult<u8> {
331 alt((one_of(MLL_CHAR), newline.value(b'\n'))).parse_next(input)
332}
333
334const MLL_CHAR: (
336 u8,
337 RangeInclusive<u8>,
338 RangeInclusive<u8>,
339 RangeInclusive<u8>,
340) = (0x9, 0x20..=0x26, 0x28..=0x7E, NON_ASCII);
341
342fn mll_quotes<'i>(
344 mut term: impl ModalParser<Input<'i>, (), ContextError>,
345) -> impl ModalParser<Input<'i>, &'i str, ContextError> {
346 move |input: &mut Input<'i>| {
347 let start = input.checkpoint();
348 let res = terminated(b"''", peek(term.by_ref()))
349 .map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") })
350 .parse_next(input);
351
352 match res {
353 Err(winnow::error::ErrMode::Backtrack(_)) => {
354 input.reset(&start);
355 terminated(b"'", peek(term.by_ref()))
356 .map(|b| unsafe { from_utf8_unchecked(b, "`bytes` out non-ASCII") })
357 .parse_next(input)
358 }
359 res => res,
360 }
361 }
362}
363
364#[cfg(test)]
365#[cfg(feature = "parse")]
366#[cfg(feature = "display")]
367mod test {
368 use super::*;
369
370 #[test]
371 fn basic_string() {
372 let input =
373 r#""I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF. \U0002070E""#;
374 let expected = "I\'m a string. \"You can quote me\". Name\tJosé\nLocation\tSF. \u{2070E}";
375 let parsed = string.parse(new_input(input));
376 assert_eq!(parsed.as_deref(), Ok(expected), "Parsing {input:?}");
377 }
378
379 #[test]
380 fn ml_basic_string() {
381 let cases = [
382 (
383 r#""""
384Roses are red
385Violets are blue""""#,
386 r#"Roses are red
387Violets are blue"#,
388 ),
389 (r#"""" \""" """"#, " \"\"\" "),
390 (r#"""" \\""""#, " \\"),
391 ];
392
393 for &(input, expected) in &cases {
394 let parsed = string.parse(new_input(input));
395 assert_eq!(parsed.as_deref(), Ok(expected), "Parsing {input:?}");
396 }
397
398 let invalid_cases = [r#"""" """#, r#"""" \""""#];
399
400 for input in &invalid_cases {
401 let parsed = string.parse(new_input(input));
402 assert!(parsed.is_err());
403 }
404 }
405
406 #[test]
407 fn ml_basic_string_escape_ws() {
408 let inputs = [
409 r#""""
410The quick brown \
411
412
413 fox jumps over \
414 the lazy dog.""""#,
415 r#""""\
416 The quick brown \
417 fox jumps over \
418 the lazy dog.\
419 """"#,
420 ];
421 for input in &inputs {
422 let expected = "The quick brown fox jumps over the lazy dog.";
423 let parsed = string.parse(new_input(input));
424 assert_eq!(parsed.as_deref(), Ok(expected), "Parsing {input:?}");
425 }
426 let empties = [
427 r#""""\
428 """"#,
429 r#""""
430\
431 \
432""""#,
433 ];
434 for input in &empties {
435 let expected = "";
436 let parsed = string.parse(new_input(input));
437 assert_eq!(parsed.as_deref(), Ok(expected), "Parsing {input:?}");
438 }
439 }
440
441 #[test]
442 fn literal_string() {
443 let inputs = [
444 r"'C:\Users\nodejs\templates'",
445 r"'\\ServerX\admin$\system32\'",
446 r#"'Tom "Dubs" Preston-Werner'"#,
447 r"'<\i\c*\s*>'",
448 ];
449
450 for input in &inputs {
451 let expected = &input[1..input.len() - 1];
452 let parsed = string.parse(new_input(input));
453 assert_eq!(parsed.as_deref(), Ok(expected), "Parsing {input:?}");
454 }
455 }
456
457 #[test]
458 fn ml_literal_string() {
459 let inputs = [
460 r"'''I [dw]on't need \d{2} apples'''",
461 r#"''''one_quote''''"#,
462 ];
463 for input in &inputs {
464 let expected = &input[3..input.len() - 3];
465 let parsed = string.parse(new_input(input));
466 assert_eq!(parsed.as_deref(), Ok(expected), "Parsing {input:?}");
467 }
468
469 let input = r#"'''
470The first newline is
471trimmed in raw strings.
472 All other whitespace
473 is preserved.
474'''"#;
475 let expected = &input[4..input.len() - 3];
476 let parsed = string.parse(new_input(input));
477 assert_eq!(parsed.as_deref(), Ok(expected), "Parsing {input:?}");
478 }
479}