Introduce rustc_lexer

The idea here is to make a reusable library out of the existing rust-lexer, by separating out pure lexing and rustc-specific concerns, like spans, error reporting an interning. So, rustc_lexer operates directly on `&str`, produces simple tokens which are a pair of type-tag and a bit of original text, and does not report errors, instead storing them as flags on the token.
author: Aleksey Kladov <aleksey.kladov@gmail.com> 2019-05-06 11:53:40 +0300
committer: Aleksey Kladov <aleksey.kladov@gmail.com> 2019-07-20 21:12:34 +0300
commit: 395ee0b79f23b90593b01dd0a78451b8c93b0aa6 (patch)
tree: 46544580dab78a9ce0e2ad59231c65709955ba2d /src/libsyntax/parse/lexer/unicode_chars.rs
parent: 95b1fe560d2bd8472f250fb8cfd2168520a58405 (diff)
download: rust-395ee0b79f23b90593b01dd0a78451b8c93b0aa6.tar.gz
rust-395ee0b79f23b90593b01dd0a78451b8c93b0aa6.zip
1 files changed, 17 insertions, 24 deletions
diff --git a/src/libsyntax/parse/lexer/unicode_chars.rs b/src/libsyntax/parse/lexer/unicode_chars.rs
index 6a870685938..e51657c0f13 100644
--- a/src/libsyntax/parse/lexer/unicode_chars.rs
+++ b/src/libsyntax/parse/lexer/unicode_chars.rs
@@ -3,7 +3,7 @@
 
 use super::StringReader;
 use errors::{Applicability, DiagnosticBuilder};
-use syntax_pos::{Pos, Span, NO_EXPANSION};
+use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION};
 
 #[rustfmt::skip] // for line breaks
 const UNICODE_ARRAY: &[(char, &str, char)] = &[
@@ -327,6 +327,7 @@ const ASCII_ARRAY: &[(char, &str)] = &[
 
 crate fn check_for_substitution<'a>(
     reader: &StringReader<'a>,
+    pos: BytePos,
     ch: char,
     err: &mut DiagnosticBuilder<'a>,
 ) -> bool {
@@ -335,19 +336,19 @@ crate fn check_for_substitution<'a>(
         None => return false,
     };
 
-    let span = Span::new(reader.pos, reader.next_pos, NO_EXPANSION);
+    let span = Span::new(pos, pos + Pos::from_usize(ch.len_utf8()), NO_EXPANSION);
 
     let ascii_name = match ASCII_ARRAY.iter().find(|&&(c, _)| c == ascii_char) {
         Some((_ascii_char, ascii_name)) => ascii_name,
         None => {
             let msg = format!("substitution character not found for '{}'", ch);
             reader.sess.span_diagnostic.span_bug_no_panic(span, &msg);
-            return false
-        },
+            return false;
+        }
     };
 
     // special help suggestion for "directed" double quotes
-    if let Some(s) = reader.peek_delimited('“', '”') {
+    if let Some(s) = peek_delimited(&reader.src[reader.src_index(pos)..], '“', '”') {
         let msg = format!(
             "Unicode characters '“' (Left Double Quotation Mark) and \
              '”' (Right Double Quotation Mark) look like '{}' ({}), but are not",
@@ -355,8 +356,8 @@ crate fn check_for_substitution<'a>(
         );
         err.span_suggestion(
             Span::new(
-                reader.pos,
-                reader.next_pos + Pos::from_usize(s.len()) + Pos::from_usize('”'.len_utf8()),
+                pos,
+                pos + Pos::from_usize('“'.len_utf8() + s.len() + '”'.len_utf8()),
                 NO_EXPANSION,
             ),
             &msg,
@@ -368,26 +369,18 @@ crate fn check_for_substitution<'a>(
             "Unicode character '{}' ({}) looks like '{}' ({}), but it is not",
             ch, u_name, ascii_char, ascii_name
         );
-        err.span_suggestion(
-            span,
-            &msg,
-            ascii_char.to_string(),
-            Applicability::MaybeIncorrect,
-        );
+        err.span_suggestion(span, &msg, ascii_char.to_string(), Applicability::MaybeIncorrect);
     }
     true
 }
 
-impl StringReader<'_> {
-    /// Immutably extract string if found at current position with given delimiters
-    fn peek_delimited(&self, from_ch: char, to_ch: char) -> Option<&str> {
-        let tail = &self.src[self.src_index(self.pos)..];
-        let mut chars = tail.chars();
-        let first_char = chars.next()?;
-        if first_char != from_ch {
-            return None;
-        }
-        let last_char_idx = chars.as_str().find(to_ch)?;
-        Some(&chars.as_str()[..last_char_idx])
+/// Extract string if found at current position with given delimiters
+fn peek_delimited(text: &str, from_ch: char, to_ch: char) -> Option<&str> {
+    let mut chars = text.chars();
+    let first_char = chars.next()?;
+    if first_char != from_ch {
+        return None;
     }
+    let last_char_idx = chars.as_str().find(to_ch)?;
+    Some(&chars.as_str()[..last_char_idx])
 }
author	Aleksey Kladov <aleksey.kladov@gmail.com>	2019-05-06 11:53:40 +0300
committer	Aleksey Kladov <aleksey.kladov@gmail.com>	2019-07-20 21:12:34 +0300
commit	395ee0b79f23b90593b01dd0a78451b8c93b0aa6 (patch)
tree	46544580dab78a9ce0e2ad59231c65709955ba2d /src/libsyntax/parse/lexer/unicode_chars.rs
parent	95b1fe560d2bd8472f250fb8cfd2168520a58405 (diff)
download	rust-395ee0b79f23b90593b01dd0a78451b8c93b0aa6.tar.gz rust-395ee0b79f23b90593b01dd0a78451b8c93b0aa6.zip