diff options
| author | Malo Jaffré <jaffre.malo@gmail.com> | 2017-08-17 20:02:13 +0200 |
|---|---|---|
| committer | Malo Jaffré <jaffre.malo@gmail.com> | 2017-08-17 20:03:32 +0200 |
| commit | d4e0e5228111cd47294342a60b5f8af44c65e206 (patch) | |
| tree | 5364e51b1733e40ff74fb64564ff76e2603e2778 /src/libsyntax/parse/lexer | |
| parent | dd39ecf368a3cdb937e129f36a2a342d0c9358f0 (diff) | |
| download | rust-d4e0e5228111cd47294342a60b5f8af44c65e206.tar.gz rust-d4e0e5228111cd47294342a60b5f8af44c65e206.zip | |
Accept underscores in unicode escapes
Fixes #43692.
Diffstat (limited to 'src/libsyntax/parse/lexer')
| -rw-r--r-- | src/libsyntax/parse/lexer/mod.rs | 87 |
1 files changed, 47 insertions, 40 deletions
diff --git a/src/libsyntax/parse/lexer/mod.rs b/src/libsyntax/parse/lexer/mod.rs index 527d2e41396..a80b7a112b0 100644 --- a/src/libsyntax/parse/lexer/mod.rs +++ b/src/libsyntax/parse/lexer/mod.rs @@ -963,60 +963,67 @@ impl<'a> StringReader<'a> { true } - /// Scan over a \u{...} escape + /// Scan over a `\u{...}` escape /// - /// At this point, we have already seen the \ and the u, the { is the current character. We - /// will read at least one digit, and up to 6, and pass over the }. + /// At this point, we have already seen the `\` and the `u`, the `{` is the current character. + /// We will read a hex number (with `_` separators), with 1 to 6 actual digits, + /// and pass over the `}`. fn scan_unicode_escape(&mut self, delim: char) -> bool { self.bump(); // past the { let start_bpos = self.pos; - let mut count = 0; - let mut accum_int = 0; let mut valid = true; - while !self.ch_is('}') && count <= 6 { - let c = match self.ch { - Some(c) => c, - None => { - panic!(self.fatal_span_(start_bpos, - self.pos, - "unterminated unicode escape (found EOF)")); - } - }; - accum_int *= 16; - accum_int += c.to_digit(16).unwrap_or_else(|| { - if c == delim { - panic!(self.fatal_span_(self.pos, - self.next_pos, - "unterminated unicode escape (needed a `}`)")); - } else { - self.err_span_char(self.pos, - self.next_pos, - "invalid character in unicode escape", - c); - } - valid = false; - 0 - }); - self.bump(); - count += 1; + if let Some('_') = self.ch { + // disallow leading `_` + self.err_span_(self.pos, + self.next_pos, + "invalid start of unicode escape"); + valid = false; } + let count = self.scan_digits(16, 16); + if count > 6 { self.err_span_(start_bpos, self.pos, - "overlong unicode escape (can have at most 6 hex digits)"); + "overlong unicode escape (must have at most 6 hex digits)"); valid = false; } - - if valid && (char::from_u32(accum_int).is_none() || count == 0) { - self.err_span_(start_bpos, - self.pos, - "invalid unicode character escape"); - valid = false; + loop { + match self.ch { + Some('}') => { + if valid && count == 0 { + self.err_span_(start_bpos, + self.pos, + "empty unicode escape (must have at least 1 hex digit)"); + valid = false; + } + self.bump(); // past the ending `}` + break; + }, + Some(c) => { + if c == delim { + self.err_span_(self.pos, + self.pos, + "unterminated unicode escape (needed a `}`)"); + valid = false; + break; + } else if valid { + self.err_span_char(start_bpos, + self.pos, + "invalid character in unicode escape", + c); + valid = false; + } + }, + None => { + panic!(self.fatal_span_(start_bpos, + self.pos, + "unterminated unicode escape (found EOF)")); + } + } + self.bump(); } - - self.bump(); // past the ending } valid } |
