| 1 | //! Extensions to the parsing API with niche applicability. | 
| 2 |  | 
|---|
| 3 | use crate::buffer::Cursor; | 
|---|
| 4 | use crate::error::Result; | 
|---|
| 5 | use crate::parse::{inner_unexpected, ParseBuffer, Unexpected}; | 
|---|
| 6 | use proc_macro2::extra::DelimSpan; | 
|---|
| 7 | use proc_macro2::Delimiter; | 
|---|
| 8 | use std::cell::Cell; | 
|---|
| 9 | use std::mem; | 
|---|
| 10 | use std::rc::Rc; | 
|---|
| 11 |  | 
|---|
| 12 | /// Extensions to the `ParseStream` API to support speculative parsing. | 
|---|
| 13 | pub trait Speculative { | 
|---|
| 14 | /// Advance this parse stream to the position of a forked parse stream. | 
|---|
| 15 | /// | 
|---|
| 16 | /// This is the opposite operation to [`ParseStream::fork`]. You can fork a | 
|---|
| 17 | /// parse stream, perform some speculative parsing, then join the original | 
|---|
| 18 | /// stream to the fork to "commit" the parsing from the fork to the main | 
|---|
| 19 | /// stream. | 
|---|
| 20 | /// | 
|---|
| 21 | /// If you can avoid doing this, you should, as it limits the ability to | 
|---|
| 22 | /// generate useful errors. That said, it is often the only way to parse | 
|---|
| 23 | /// syntax of the form `A* B*` for arbitrary syntax `A` and `B`. The problem | 
|---|
| 24 | /// is that when the fork fails to parse an `A`, it's impossible to tell | 
|---|
| 25 | /// whether that was because of a syntax error and the user meant to provide | 
|---|
| 26 | /// an `A`, or that the `A`s are finished and it's time to start parsing | 
|---|
| 27 | /// `B`s. Use with care. | 
|---|
| 28 | /// | 
|---|
| 29 | /// Also note that if `A` is a subset of `B`, `A* B*` can be parsed by | 
|---|
| 30 | /// parsing `B*` and removing the leading members of `A` from the | 
|---|
| 31 | /// repetition, bypassing the need to involve the downsides associated with | 
|---|
| 32 | /// speculative parsing. | 
|---|
| 33 | /// | 
|---|
| 34 | /// [`ParseStream::fork`]: ParseBuffer::fork | 
|---|
| 35 | /// | 
|---|
| 36 | /// # Example | 
|---|
| 37 | /// | 
|---|
| 38 | /// There has been chatter about the possibility of making the colons in the | 
|---|
| 39 | /// turbofish syntax like `path::to::<T>` no longer required by accepting | 
|---|
| 40 | /// `path::to<T>` in expression position. Specifically, according to [RFC | 
|---|
| 41 | /// 2544], [`PathSegment`] parsing should always try to consume a following | 
|---|
| 42 | /// `<` token as the start of generic arguments, and reset to the `<` if | 
|---|
| 43 | /// that fails (e.g. the token is acting as a less-than operator). | 
|---|
| 44 | /// | 
|---|
| 45 | /// This is the exact kind of parsing behavior which requires the "fork, | 
|---|
| 46 | /// try, commit" behavior that [`ParseStream::fork`] discourages. With | 
|---|
| 47 | /// `advance_to`, we can avoid having to parse the speculatively parsed | 
|---|
| 48 | /// content a second time. | 
|---|
| 49 | /// | 
|---|
| 50 | /// This change in behavior can be implemented in syn by replacing just the | 
|---|
| 51 | /// `Parse` implementation for `PathSegment`: | 
|---|
| 52 | /// | 
|---|
| 53 | /// ``` | 
|---|
| 54 | /// # use syn::ext::IdentExt; | 
|---|
| 55 | /// use syn::parse::discouraged::Speculative; | 
|---|
| 56 | /// # use syn::parse::{Parse, ParseStream}; | 
|---|
| 57 | /// # use syn::{Ident, PathArguments, Result, Token}; | 
|---|
| 58 | /// | 
|---|
| 59 | /// pub struct PathSegment { | 
|---|
| 60 | ///     pub ident: Ident, | 
|---|
| 61 | ///     pub arguments: PathArguments, | 
|---|
| 62 | /// } | 
|---|
| 63 | /// # | 
|---|
| 64 | /// # impl<T> From<T> for PathSegment | 
|---|
| 65 | /// # where | 
|---|
| 66 | /// #     T: Into<Ident>, | 
|---|
| 67 | /// # { | 
|---|
| 68 | /// #     fn from(ident: T) -> Self { | 
|---|
| 69 | /// #         PathSegment { | 
|---|
| 70 | /// #             ident: ident.into(), | 
|---|
| 71 | /// #             arguments: PathArguments::None, | 
|---|
| 72 | /// #         } | 
|---|
| 73 | /// #     } | 
|---|
| 74 | /// # } | 
|---|
| 75 | /// | 
|---|
| 76 | /// impl Parse for PathSegment { | 
|---|
| 77 | ///     fn parse(input: ParseStream) -> Result<Self> { | 
|---|
| 78 | ///         if input.peek(Token![super]) | 
|---|
| 79 | ///             || input.peek(Token![self]) | 
|---|
| 80 | ///             || input.peek(Token![Self]) | 
|---|
| 81 | ///             || input.peek(Token![crate]) | 
|---|
| 82 | ///         { | 
|---|
| 83 | ///             let ident = input.call(Ident::parse_any)?; | 
|---|
| 84 | ///             return Ok(PathSegment::from(ident)); | 
|---|
| 85 | ///         } | 
|---|
| 86 | /// | 
|---|
| 87 | ///         let ident = input.parse()?; | 
|---|
| 88 | ///         if input.peek(Token![::]) && input.peek3(Token![<]) { | 
|---|
| 89 | ///             return Ok(PathSegment { | 
|---|
| 90 | ///                 ident, | 
|---|
| 91 | ///                 arguments: PathArguments::AngleBracketed(input.parse()?), | 
|---|
| 92 | ///             }); | 
|---|
| 93 | ///         } | 
|---|
| 94 | ///         if input.peek(Token![<]) && !input.peek(Token![<=]) { | 
|---|
| 95 | ///             let fork = input.fork(); | 
|---|
| 96 | ///             if let Ok(arguments) = fork.parse() { | 
|---|
| 97 | ///                 input.advance_to(&fork); | 
|---|
| 98 | ///                 return Ok(PathSegment { | 
|---|
| 99 | ///                     ident, | 
|---|
| 100 | ///                     arguments: PathArguments::AngleBracketed(arguments), | 
|---|
| 101 | ///                 }); | 
|---|
| 102 | ///             } | 
|---|
| 103 | ///         } | 
|---|
| 104 | ///         Ok(PathSegment::from(ident)) | 
|---|
| 105 | ///     } | 
|---|
| 106 | /// } | 
|---|
| 107 | /// | 
|---|
| 108 | /// # syn::parse_str::<PathSegment>( "a<b,c>").unwrap(); | 
|---|
| 109 | /// ``` | 
|---|
| 110 | /// | 
|---|
| 111 | /// # Drawbacks | 
|---|
| 112 | /// | 
|---|
| 113 | /// The main drawback of this style of speculative parsing is in error | 
|---|
| 114 | /// presentation. Even if the lookahead is the "correct" parse, the error | 
|---|
| 115 | /// that is shown is that of the "fallback" parse. To use the same example | 
|---|
| 116 | /// as the turbofish above, take the following unfinished "turbofish": | 
|---|
| 117 | /// | 
|---|
| 118 | /// ```text | 
|---|
| 119 | /// let _ = f<&'a fn(), for<'a> serde::>(); | 
|---|
| 120 | /// ``` | 
|---|
| 121 | /// | 
|---|
| 122 | /// If this is parsed as generic arguments, we can provide the error message | 
|---|
| 123 | /// | 
|---|
| 124 | /// ```text | 
|---|
| 125 | /// error: expected identifier | 
|---|
| 126 | ///  --> src.rs:L:C | 
|---|
| 127 | ///   | | 
|---|
| 128 | /// L | let _ = f<&'a fn(), for<'a> serde::>(); | 
|---|
| 129 | ///   |                                    ^ | 
|---|
| 130 | /// ``` | 
|---|
| 131 | /// | 
|---|
| 132 | /// but if parsed using the above speculative parsing, it falls back to | 
|---|
| 133 | /// assuming that the `<` is a less-than when it fails to parse the generic | 
|---|
| 134 | /// arguments, and tries to interpret the `&'a` as the start of a labelled | 
|---|
| 135 | /// loop, resulting in the much less helpful error | 
|---|
| 136 | /// | 
|---|
| 137 | /// ```text | 
|---|
| 138 | /// error: expected `:` | 
|---|
| 139 | ///  --> src.rs:L:C | 
|---|
| 140 | ///   | | 
|---|
| 141 | /// L | let _ = f<&'a fn(), for<'a> serde::>(); | 
|---|
| 142 | ///   |               ^^ | 
|---|
| 143 | /// ``` | 
|---|
| 144 | /// | 
|---|
| 145 | /// This can be mitigated with various heuristics (two examples: show both | 
|---|
| 146 | /// forks' parse errors, or show the one that consumed more tokens), but | 
|---|
| 147 | /// when you can control the grammar, sticking to something that can be | 
|---|
| 148 | /// parsed LL(3) and without the LL(*) speculative parsing this makes | 
|---|
| 149 | /// possible, displaying reasonable errors becomes much more simple. | 
|---|
| 150 | /// | 
|---|
| 151 | /// [RFC 2544]: https://github.com/rust-lang/rfcs/pull/2544 | 
|---|
| 152 | /// [`PathSegment`]: crate::PathSegment | 
|---|
| 153 | /// | 
|---|
| 154 | /// # Performance | 
|---|
| 155 | /// | 
|---|
| 156 | /// This method performs a cheap fixed amount of work that does not depend | 
|---|
| 157 | /// on how far apart the two streams are positioned. | 
|---|
| 158 | /// | 
|---|
| 159 | /// # Panics | 
|---|
| 160 | /// | 
|---|
| 161 | /// The forked stream in the argument of `advance_to` must have been | 
|---|
| 162 | /// obtained by forking `self`. Attempting to advance to any other stream | 
|---|
| 163 | /// will cause a panic. | 
|---|
| 164 | fn advance_to(&self, fork: &Self); | 
|---|
| 165 | } | 
|---|
| 166 |  | 
|---|
| 167 | impl<'a> Speculative for ParseBuffer<'a> { | 
|---|
| 168 | fn advance_to(&self, fork: &Self) { | 
|---|
| 169 | if !crate::buffer::same_scope(self.cursor(), fork.cursor()) { | 
|---|
| 170 | panic!( "fork was not derived from the advancing parse stream"); | 
|---|
| 171 | } | 
|---|
| 172 |  | 
|---|
| 173 | let (self_unexp, self_sp) = inner_unexpected(self); | 
|---|
| 174 | let (fork_unexp, fork_sp) = inner_unexpected(fork); | 
|---|
| 175 | if !Rc::ptr_eq(&self_unexp, &fork_unexp) { | 
|---|
| 176 | match (fork_sp, self_sp) { | 
|---|
| 177 | // Unexpected set on the fork, but not on `self`, copy it over. | 
|---|
| 178 | (Some((span, delimiter)), None) => { | 
|---|
| 179 | self_unexp.set(Unexpected::Some(span, delimiter)); | 
|---|
| 180 | } | 
|---|
| 181 | // Unexpected unset. Use chain to propagate errors from fork. | 
|---|
| 182 | (None, None) => { | 
|---|
| 183 | fork_unexp.set(Unexpected::Chain(self_unexp)); | 
|---|
| 184 |  | 
|---|
| 185 | // Ensure toplevel 'unexpected' tokens from the fork don't | 
|---|
| 186 | // bubble up the chain by replacing the root `unexpected` | 
|---|
| 187 | // pointer, only 'unexpected' tokens from existing group | 
|---|
| 188 | // parsers should bubble. | 
|---|
| 189 | fork.unexpected | 
|---|
| 190 | .set(Some(Rc::new(Cell::new(Unexpected::None)))); | 
|---|
| 191 | } | 
|---|
| 192 | // Unexpected has been set on `self`. No changes needed. | 
|---|
| 193 | (_, Some(_)) => {} | 
|---|
| 194 | } | 
|---|
| 195 | } | 
|---|
| 196 |  | 
|---|
| 197 | // See comment on `cell` in the struct definition. | 
|---|
| 198 | self.cell | 
|---|
| 199 | .set(unsafe { mem::transmute::<Cursor, Cursor<'static>>(fork.cursor()) }); | 
|---|
| 200 | } | 
|---|
| 201 | } | 
|---|
| 202 |  | 
|---|
| 203 | /// Extensions to the `ParseStream` API to support manipulating invisible | 
|---|
| 204 | /// delimiters the same as if they were visible. | 
|---|
| 205 | pub trait AnyDelimiter { | 
|---|
| 206 | /// Returns the delimiter, the span of the delimiter token, and the nested | 
|---|
| 207 | /// contents for further parsing. | 
|---|
| 208 | fn parse_any_delimiter(&self) -> Result<(Delimiter, DelimSpan, ParseBuffer)>; | 
|---|
| 209 | } | 
|---|
| 210 |  | 
|---|
| 211 | impl<'a> AnyDelimiter for ParseBuffer<'a> { | 
|---|
| 212 | fn parse_any_delimiter(&self) -> Result<(Delimiter, DelimSpan, ParseBuffer)> { | 
|---|
| 213 | self.step(|cursor: StepCursor<'_, 'a>| { | 
|---|
| 214 | if let Some((content: Cursor<'_>, delimiter: Delimiter, span: DelimSpan, rest: Cursor<'_>)) = cursor.any_group() { | 
|---|
| 215 | let scope: Span = span.close(); | 
|---|
| 216 | let nested: Cursor<'_> = crate::parse::advance_step_cursor(proof:cursor, to:content); | 
|---|
| 217 | let unexpected: Rc | >= crate::parse::get_unexpected(self); |  | 
|---|
| 218 | let content: ParseBuffer<'_> = crate::parse::new_parse_buffer(scope, cursor:nested, unexpected); | 
|---|
| 219 | Ok(((delimiter, span, content), rest)) | 
|---|
| 220 | } else { | 
|---|
| 221 | Err(cursor.error(message: "expected any delimiter")) | 
|---|
| 222 | } | 
|---|
| 223 | }) | 
|---|
| 224 | } | 
|---|
| 225 | } | 
|---|
| 226 |  | 
|---|