about summary refs log tree commit diff
diff options
context:
space:
mode:
authorDeadbeef <ent3rm4n@gmail.com>2023-07-16 18:59:05 +0000
committerDeadbeef <ent3rm4n@gmail.com>2023-07-23 06:54:07 +0000
commitdf9bd80d74b72aacf336d4f1a4e44ddaff2757ba (patch)
tree797e68a699837e18a1f452f681ba279199ff7d61
parentcec34a43b1b14f4e39363f3b283d7ac4f593ee81 (diff)
downloadrust-df9bd80d74b72aacf336d4f1a4e44ddaff2757ba.tar.gz
rust-df9bd80d74b72aacf336d4f1a4e44ddaff2757ba.zip
reimplement C string literals
-rw-r--r--compiler/rustc_lexer/src/cursor.rs4
-rw-r--r--compiler/rustc_lexer/src/lib.rs7
-rw-r--r--compiler/rustc_parse/src/lexer/mod.rs26
-rw-r--r--tests/ui/rfcs/rfc-3348-c-string-literals/basic.rs3
-rw-r--r--tests/ui/rfcs/rfc-3348-c-string-literals/basic.stderr25
-rw-r--r--tests/ui/rfcs/rfc-3348-c-string-literals/gate.stderr31
-rw-r--r--tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.rsbin760 -> 623 bytes
-rw-r--r--tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderrbin4477 -> 674 bytes
-rw-r--r--tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.rs3
-rw-r--r--tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.stderr38
10 files changed, 48 insertions, 89 deletions
diff --git a/compiler/rustc_lexer/src/cursor.rs b/compiler/rustc_lexer/src/cursor.rs
index eceef59802e..aba7f95487e 100644
--- a/compiler/rustc_lexer/src/cursor.rs
+++ b/compiler/rustc_lexer/src/cursor.rs
@@ -24,6 +24,10 @@ impl<'a> Cursor<'a> {
         }
     }
 
+    pub fn as_str(&self) -> &'a str {
+        self.chars.as_str()
+    }
+
     /// Returns the last eaten symbol (or `'\0'` in release builds).
     /// (For debug assertions only.)
     pub(crate) fn prev(&self) -> char {
diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs
index 29335a8c0f4..d511d2b1280 100644
--- a/compiler/rustc_lexer/src/lib.rs
+++ b/compiler/rustc_lexer/src/lib.rs
@@ -367,6 +367,13 @@ impl Cursor<'_> {
                 Some(|terminated| Byte { terminated }),
             ),
 
+            // c-string literal, raw c-string literal or identifier.
+            'c' => self.c_or_byte_string(
+                |terminated| CStr { terminated },
+                |n_hashes| RawCStr { n_hashes },
+                None,
+            ),
+
             // Identifier (this should be checked after other variant that can
             // start as identifier).
             c if is_id_start(c) => self.ident_or_unknown_prefix(),
diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
index c6e6b46e455..cfcc2ec42fa 100644
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@@ -9,8 +9,8 @@ use rustc_ast::tokenstream::TokenStream;
 use rustc_ast::util::unicode::contains_text_flow_control_chars;
 use rustc_errors::{error_code, Applicability, Diagnostic, DiagnosticBuilder, StashKey};
 use rustc_lexer::unescape::{self, EscapeError, Mode};
-use rustc_lexer::Cursor;
 use rustc_lexer::{Base, DocStyle, RawStrError};
+use rustc_lexer::{Cursor, LiteralKind};
 use rustc_session::lint::builtin::{
     RUST_2021_PREFIXES_INCOMPATIBLE_SYNTAX, TEXT_DIRECTION_CODEPOINT_IN_COMMENT,
 };
@@ -118,6 +118,7 @@ impl<'a> StringReader<'a> {
         let mut swallow_next_invalid = 0;
         // Skip trivial (whitespace & comments) tokens
         loop {
+            let str_before = self.cursor.as_str();
             let token = self.cursor.advance_token();
             let start = self.pos;
             self.pos = self.pos + BytePos(token.len);
@@ -203,6 +204,29 @@ impl<'a> StringReader<'a> {
                         .push(span);
                     token::Ident(sym, false)
                 }
+                // split up (raw) c string literals to an ident and a string literal when edition < 2021.
+                rustc_lexer::TokenKind::Literal {
+                    kind: kind @ (LiteralKind::CStr { .. } | LiteralKind::RawCStr { .. }),
+                    suffix_start: _,
+                } if !self.mk_sp(start, self.pos).edition().at_least_rust_2021() => {
+                    let prefix_len = match kind {
+                        LiteralKind::CStr { .. } => 1,
+                        LiteralKind::RawCStr { .. } => 2,
+                        _ => unreachable!(),
+                    };
+
+                    // reset the state so that only the prefix ("c" or "cr")
+                    // was consumed.
+                    let lit_start = start + BytePos(prefix_len);
+                    self.pos = lit_start;
+                    self.cursor = Cursor::new(&str_before[prefix_len as usize..]);
+
+                    self.report_unknown_prefix(start);
+                    let sym = nfc_normalize(self.str_from(start));
+                    let prefix_span = self.mk_sp(start, lit_start);
+                    self.sess.symbol_gallery.insert(sym, prefix_span);
+                    return (Token::new(token::Ident(sym, false), prefix_span), preceded_by_whitespace);
+                }
                 rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
                     let suffix_start = start + BytePos(suffix_start);
                     let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind);
diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/basic.rs b/tests/ui/rfcs/rfc-3348-c-string-literals/basic.rs
index 3fc5fd481ea..5037396000b 100644
--- a/tests/ui/rfcs/rfc-3348-c-string-literals/basic.rs
+++ b/tests/ui/rfcs/rfc-3348-c-string-literals/basic.rs
@@ -1,5 +1,4 @@
-// FIXME(c_str_literals): This should be `run-pass`
-// known-bug: #113333
+// run-pass
 // edition: 2021
 
 #![feature(c_str_literals)]
diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/basic.stderr b/tests/ui/rfcs/rfc-3348-c-string-literals/basic.stderr
deleted file mode 100644
index 571c319d8c5..00000000000
--- a/tests/ui/rfcs/rfc-3348-c-string-literals/basic.stderr
+++ /dev/null
@@ -1,25 +0,0 @@
-error: prefix `c` is unknown
-  --> $DIR/basic.rs:8:27
-   |
-LL |     assert_eq!(b"test\0", c"test".to_bytes_with_nul());
-   |                           ^ unknown prefix
-   |
-   = note: prefixed identifiers and literals are reserved since Rust 2021
-help: consider inserting whitespace here
-   |
-LL |     assert_eq!(b"test\0", c "test".to_bytes_with_nul());
-   |                            +
-
-error: no rules expected the token `"test"`
-  --> $DIR/basic.rs:8:28
-   |
-LL |     assert_eq!(b"test\0", c"test".to_bytes_with_nul());
-   |                            -^^^^^
-   |                            |
-   |                            no rules expected this token in macro call
-   |                            help: missing comma here
-   |
-   = note: while trying to match sequence start
-
-error: aborting due to 2 previous errors
-
diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/gate.stderr b/tests/ui/rfcs/rfc-3348-c-string-literals/gate.stderr
index 8de36ca4a6e..ea666e43308 100644
--- a/tests/ui/rfcs/rfc-3348-c-string-literals/gate.stderr
+++ b/tests/ui/rfcs/rfc-3348-c-string-literals/gate.stderr
@@ -1,32 +1,21 @@
-error: prefix `c` is unknown
+error[E0658]: `c".."` literals are experimental
   --> $DIR/gate.rs:10:5
    |
 LL |     c"foo";
-   |     ^ unknown prefix
+   |     ^^^^^^
    |
-   = note: prefixed identifiers and literals are reserved since Rust 2021
-help: consider inserting whitespace here
-   |
-LL |     c "foo";
-   |      +
+   = note: see issue #105723 <https://github.com/rust-lang/rust/issues/105723> for more information
+   = help: add `#![feature(c_str_literals)]` to the crate attributes to enable
 
-error: prefix `c` is unknown
+error[E0658]: `c".."` literals are experimental
   --> $DIR/gate.rs:13:8
    |
 LL |     m!(c"test");
-   |        ^ unknown prefix
-   |
-   = note: prefixed identifiers and literals are reserved since Rust 2021
-help: consider inserting whitespace here
+   |        ^^^^^^^
    |
-LL |     m!(c "test");
-   |         +
-
-error: expected one of `!`, `.`, `::`, `;`, `?`, `{`, `}`, or an operator, found `"foo"`
-  --> $DIR/gate.rs:10:6
-   |
-LL |     c"foo";
-   |      ^^^^^ expected one of 8 possible tokens
+   = note: see issue #105723 <https://github.com/rust-lang/rust/issues/105723> for more information
+   = help: add `#![feature(c_str_literals)]` to the crate attributes to enable
 
-error: aborting due to 3 previous errors
+error: aborting due to 2 previous errors
 
+For more information about this error, try `rustc --explain E0658`.
diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.rs b/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.rs
index 96945f125da..369173e2318 100644
--- a/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.rs
+++ b/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.rs
Binary files differdiff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr b/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr
index 2226c7aa6a9..82d9f9cb320 100644
--- a/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr
+++ b/tests/ui/rfcs/rfc-3348-c-string-literals/no-nuls.stderr
Binary files differdiff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.rs b/tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.rs
index 066505c23df..380445d7a7f 100644
--- a/tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.rs
+++ b/tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.rs
@@ -1,5 +1,4 @@
-// FIXME(c_str_literals): This should be `run-pass`
-// known-bug: #113333
+// run-pass
 // edition: 2021
 
 #![feature(c_str_literals)]
diff --git a/tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.stderr b/tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.stderr
deleted file mode 100644
index 47361fb61d2..00000000000
--- a/tests/ui/rfcs/rfc-3348-c-string-literals/non-ascii.stderr
+++ /dev/null
@@ -1,38 +0,0 @@
-error: prefix `c` is unknown
-  --> $DIR/non-ascii.rs:9:9
-   |
-LL |         c"\xEF\x80🦀\u{1F980}".to_bytes_with_nul(),
-   |         ^ unknown prefix
-   |
-   = note: prefixed identifiers and literals are reserved since Rust 2021
-help: consider inserting whitespace here
-   |
-LL |         c "\xEF\x80🦀\u{1F980}".to_bytes_with_nul(),
-   |          +
-
-error: out of range hex escape
-  --> $DIR/non-ascii.rs:9:11
-   |
-LL |         c"\xEF\x80🦀\u{1F980}".to_bytes_with_nul(),
-   |           ^^^^ must be a character in the range [\x00-\x7f]
-
-error: out of range hex escape
-  --> $DIR/non-ascii.rs:9:15
-   |
-LL |         c"\xEF\x80🦀\u{1F980}".to_bytes_with_nul(),
-   |               ^^^^ must be a character in the range [\x00-\x7f]
-
-error: no rules expected the token `"\xEF\x80🦀\u{1F980}"`
-  --> $DIR/non-ascii.rs:9:10
-   |
-LL |         c"\xEF\x80🦀\u{1F980}".to_bytes_with_nul(),
-   |          -^^^^^^^^^^^^^^^^^^^^
-   |          |
-   |          no rules expected this token in macro call
-   |          help: missing comma here
-   |
-note: while trying to match `,`
-  --> $SRC_DIR/core/src/macros/mod.rs:LL:COL
-
-error: aborting due to 4 previous errors
-