diff options
| author | Nicholas Nethercote <n.nethercote@gmail.com> | 2024-11-19 15:11:18 +1100 |
|---|---|---|
| committer | Nicholas Nethercote <n.nethercote@gmail.com> | 2024-11-19 18:04:01 +1100 |
| commit | 2c7c3697dbaac3e0599aa0e7cd3581822caec17b (patch) | |
| tree | 0109ee2517ae4591b590ad3c6c6432b73c8a38c6 /compiler/rustc_lexer | |
| parent | df29f9b0c3a2277c2eb2de2e0bf4f4b3329dbd00 (diff) | |
| download | rust-2c7c3697dbaac3e0599aa0e7cd3581822caec17b.tar.gz rust-2c7c3697dbaac3e0599aa0e7cd3581822caec17b.zip | |
Improve `TokenKind` comments.
- Improve wording. - Use backticks consistently for examples.
Diffstat (limited to 'compiler/rustc_lexer')
| -rw-r--r-- | compiler/rustc_lexer/src/lib.rs | 80 |
1 files changed, 39 insertions, 41 deletions
diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index 7f221098364..c01dad810c4 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -57,11 +57,10 @@ impl Token { /// Enum representing common lexeme types. #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum TokenKind { - // Multi-char tokens: - /// "// comment" + /// A line comment, e.g. `// comment`. LineComment { doc_style: Option<DocStyle> }, - /// `/* block comment */` + /// A block comment, e.g. `/* block comment */`. /// /// Block comments can be recursive, so a sequence like `/* /* */` /// will not be considered terminated and will result in a parsing error. @@ -70,18 +69,17 @@ pub enum TokenKind { /// Any whitespace character sequence. Whitespace, - /// "ident" or "continue" - /// - /// At this step, keywords are also considered identifiers. + /// An identifier or keyword, e.g. `ident` or `continue`. Ident, - /// Like the above, but containing invalid unicode codepoints. + /// An identifier that is invalid because it contains emoji. InvalidIdent, - /// "r#ident" + /// A raw identifier, e.g. "r#ident". RawIdent, - /// An unknown prefix, like `foo#`, `foo'`, `foo"`. + /// An unknown literal prefix, like `foo#`, `foo'`, `foo"`. Excludes + /// literal prefixes that contain emoji, which are considered "invalid". /// /// Note that only the /// prefix (`foo`) is included in the token, not the separator (which is @@ -93,11 +91,12 @@ pub enum TokenKind { /// An unknown prefix in a lifetime, like `'foo#`. /// - /// Note that like above, only the `'` and prefix are included in the token + /// Like `UnknownPrefix`, only the `'` and prefix are included in the token /// and not the separator. UnknownPrefixLifetime, - /// `'r#lt`, which in edition < 2021 is split into several tokens: `'r # lt`. + /// A raw lifetime, e.g. `'r#foo`. In edition < 2021 it will be split into + /// several tokens: `'r` and `#` and `foo`. RawLifetime, /// Similar to the above, but *always* an error on every edition. This is used @@ -110,70 +109,69 @@ pub enum TokenKind { /// Split into the component tokens on older editions. GuardedStrPrefix, - /// Examples: `12u8`, `1.0e-40`, `b"123"`. Note that `_` is an invalid + /// Literals, e.g. `12u8`, `1.0e-40`, `b"123"`. Note that `_` is an invalid /// suffix, but may be present here on string and float literals. Users of /// this type will need to check for and reject that case. /// /// See [LiteralKind] for more details. Literal { kind: LiteralKind, suffix_start: u32 }, - /// "'a" + /// A lifetime, e.g. `'a`. Lifetime { starts_with_number: bool }, - // One-char tokens: - /// ";" + /// `;` Semi, - /// "," + /// `,` Comma, - /// "." + /// `.` Dot, - /// "(" + /// `(` OpenParen, - /// ")" + /// `)` CloseParen, - /// "{" + /// `{` OpenBrace, - /// "}" + /// `}` CloseBrace, - /// "[" + /// `[` OpenBracket, - /// "]" + /// `]` CloseBracket, - /// "@" + /// `@` At, - /// "#" + /// `#` Pound, - /// "~" + /// `~` Tilde, - /// "?" + /// `?` Question, - /// ":" + /// `:` Colon, - /// "$" + /// `$` Dollar, - /// "=" + /// `=` Eq, - /// "!" + /// `!` Bang, - /// "<" + /// `<` Lt, - /// ">" + /// `>` Gt, - /// "-" + /// `-` Minus, - /// "&" + /// `&` And, - /// "|" + /// `|` Or, - /// "+" + /// `+` Plus, - /// "*" + /// `*` Star, - /// "/" + /// `/` Slash, - /// "^" + /// `^` Caret, - /// "%" + /// `%` Percent, /// Unknown token, not expected by the lexer, e.g. "№" |
