diff options
| author | Gary Linscott <glinscott@gmail.com> | 2013-07-10 17:06:16 -0400 |
|---|---|---|
| committer | Gary Linscott <glinscott@gmail.com> | 2013-07-11 14:23:15 -0400 |
| commit | 5aee5a11e3d4807c6df190e33cc6c4dc81ef7ea3 (patch) | |
| tree | 7a5607a93faa47e6e9b7e519a13b58ce380d3455 /src/libstd | |
| parent | 179637304a5d913a7ea2dc13cffed6e53f342377 (diff) | |
| download | rust-5aee5a11e3d4807c6df190e33cc6c4dc81ef7ea3.tar.gz rust-5aee5a11e3d4807c6df190e33cc6c4dc81ef7ea3.zip | |
Optimize is_utf8
Manually unroll the multibyte loops, and optimize for the single byte chars.
Diffstat (limited to 'src/libstd')
| -rw-r--r-- | src/libstd/str.rs | 24 |
1 files changed, 16 insertions, 8 deletions
diff --git a/src/libstd/str.rs b/src/libstd/str.rs index bc59164637e..1d8a2d404a7 100644 --- a/src/libstd/str.rs +++ b/src/libstd/str.rs @@ -596,17 +596,25 @@ pub fn is_utf8(v: &[u8]) -> bool { let mut i = 0u; let total = v.len(); while i < total { - let mut chsize = utf8_char_width(v[i]); - if chsize == 0u { return false; } - if i + chsize > total { return false; } - i += 1u; - while chsize > 1u { - if v[i] & 192u8 != TAG_CONT_U8 { return false; } + if v[i] < 128u8 { i += 1u; - chsize -= 1u; + } else { + let w = utf8_char_width(v[i]); + if w == 0u { return false; } + + let nexti = i + w; + if nexti > total { return false; } + + if v[i + 1] & 192u8 != TAG_CONT_U8 { return false; } + if w > 2 { + if v[i + 2] & 192u8 != TAG_CONT_U8 { return false; } + if w > 3 && (v[i + 3] & 192u8 != TAG_CONT_U8) { return false; } + } + + i = nexti; } } - return true; + true } /// Determines if a vector of `u16` contains valid UTF-16 |
