auto merge of #7114 : pnkfelix/rust/issue3961-fix-whitespace-detection, r=brson

r? (yes, the review request is back, now that I got it building against incom... I mean master!) (Attempting to port from orphaned pull-request #6764 ) Fix for #3961. Also includes a test case to illustrate the issues. (All of the entries that say "should align" should align with each other, and the four lines near the end that say "compare _" for _ in {A,B,C,D} should line up with each other.) Before applying this change set: -- the "(should align)"'s are all over the place, and the form/line feeding spaces are not cut out as one might or might not expect. -- compare B and D do not match A and C. (To be honest, its hard to really say what the right behavior is here, and people who are expecting a particular behavior out of a pretty printer in these cases may well get burned.)
author: bors <bors@rust-lang.org> 2013-06-15 20:40:14 -0700
committer: bors <bors@rust-lang.org> 2013-06-15 20:40:14 -0700
commit: b9119edc55287bb1a9b5609bdef84001c3341e22 (patch)
tree: 77af5c825954526dfb8cddf081e37c27fcdc0e93
parent: c989b79127c5062df0a64d8c383de93c82a3d9b7 (diff)
parent: 876f6deb4af73d3a6a9845c8ca0a9edff0e25989 (diff)
download: rust-b9119edc55287bb1a9b5609bdef84001c3341e22.tar.gz
rust-b9119edc55287bb1a9b5609bdef84001c3341e22.zip
3 files changed, 248 insertions, 14 deletions
diff --git a/src/libsyntax/parse/comments.rs b/src/libsyntax/parse/comments.rs
index 68473f11537..b7bb1b3bc53 100644
--- a/src/libsyntax/parse/comments.rs
+++ b/src/libsyntax/parse/comments.rs
@@ -197,26 +197,35 @@ fn read_line_comments(rdr: @mut StringReader, code_to_the_left: bool,
     }
 }
 
-// FIXME #3961: This is not the right way to convert string byte
-// offsets to characters.
-fn all_whitespace(s: &str, begin: uint, end: uint) -> bool {
-    let mut i: uint = begin;
-    while i != end {
-        if !is_whitespace(s[i] as char) { return false; } i += 1u;
+// Returns None if the first col chars of s contain a non-whitespace char.
+// Otherwise returns Some(k) where k is first char offset after that leading
+// whitespace.  Note k may be outside bounds of s.
+fn all_whitespace(s: &str, col: CharPos) -> Option<uint> {
+    let len = s.len();
+    let mut col = col.to_uint();
+    let mut cursor: uint = 0;
+    while col > 0 && cursor < len {
+        let r: str::CharRange = s.char_range_at(cursor);
+        if !r.ch.is_whitespace() {
+            return None;
+        }
+        cursor = r.next;
+        col -= 1;
     }
-    return true;
+    return Some(cursor);
 }
 
 fn trim_whitespace_prefix_and_push_line(lines: &mut ~[~str],
                                         s: ~str, col: CharPos) {
     let len = s.len();
-    // FIXME #3961: Doing bytewise comparison and slicing with CharPos
-    let col = col.to_uint();
-    let s1 = if all_whitespace(s, 0, uint::min(len, col)) {
-        if col < len {
-            s.slice(col, len).to_owned()
-        } else {  ~"" }
-    } else { s };
+    let s1 = match all_whitespace(s, col) {
+        Some(col) => {
+            if col < len {
+                s.slice(col, len).to_owned()
+            } else {  ~"" }
+        }
+        None => s,
+    };
     debug!("pushing line: %s", s1);
     lines.push(s1);
 }
diff --git a/src/test/pretty/block-comment-wchar.pp b/src/test/pretty/block-comment-wchar.pp
new file mode 100644
index 00000000000..911de166e8f
--- /dev/null
+++ b/src/test/pretty/block-comment-wchar.pp
@@ -0,0 +1,116 @@
+// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// This is meant as a test case for Issue 3961.
+//
+// Test via: rustc --pretty normal src/test/pretty/block-comment-wchar.rs
+//
+// pp-exact:block-comment-wchar.pp
+fn f() {
+    fn nested() {
+        /*
+          Spaced2
+        */
+        /*
+          Spaced10
+        */
+        /*
+          Tabbed8+2
+        */
+        /*
+          CR8+2
+        */
+    }
+    /*
+      Spaced2:                       (prefixed so start of space aligns with comment)
+    */
+    /*
+    		Tabbed2: (more indented b/c *start* of space will align with comment)
+    */
+    /*
+      Spaced6:                       (Alignment removed and realigning spaces inserted)
+    */
+    /*
+      Tabbed4+2:                     (Alignment removed and realigning spaces inserted)
+    */
+
+    /*
+      VT4+2:                         (should align)
+    */
+    /*
+      FF4+2:                         (should align)
+    */
+    /*
+      CR4+2:                         (should align)
+    */
+    /*
+    // (NEL deliberately omitted)
+    */
+    /*
+      Ogham Space Mark 4+2:          (should align)
+    */
+    /*
+      Mongolian Vowel Separator 4+2: (should align)
+    */
+    /*
+      Four-per-em space 4+2:         (should align)
+    */
+
+    /*
+      Mongolian Vowel Sep   count 1: (should align)
+      Mongolian Vowel Sep   count 2: (should align)
+      Mongolian Vowel Sep   count 3: (should align)
+      Mongolian Vowel Sep   count 4: (should align)
+      Mongolian Vowel Sep   count 5: (should align)
+      Mongolian Vowel Sep   count 6: (should align)
+      Mongolian Vowel Sep   count 7: (should align)
+      Mongolian Vowel Sep   count 8: (should align)
+      Mongolian Vowel Sep   count 9: (should align)
+      Mongolian Vowel Sep   count A: (should align)
+      Mongolian Vowel Sep   count B: (should align)
+      Mongolian Vowel Sep   count C: (should align)
+      Mongolian Vowel Sep   count D: (should align)
+      Mongolian Vowel Sep   count E: (should align)
+      Mongolian Vowel Sep   count F: (should align)
+    */
+
+
+
+    /* */
+
+    /*
+      Hello from offset 6
+      Space 6+2:                     compare A
+      Mongolian Vowel Separator 6+2: compare B
+    */
+
+    /*᠎*/
+
+    /*
+      Hello from another offset 6 with wchars establishing column offset
+      Space 6+2:                     compare C
+      Mongolian Vowel Separator 6+2: compare D
+    */
+}
+
+fn main() {
+    // Taken from http://en.wikipedia.org/wiki/Whitespace_character
+    let chars =
+        ['\x0A', '\x0B', '\x0C', '\x0D', '\x20',
+         // '\x85', // for some reason Rust thinks NEL isn't whitespace
+         '\xA0', '\u1680', '\u180E', '\u2000', '\u2001', '\u2002', '\u2003',
+         '\u2004', '\u2005', '\u2006', '\u2007', '\u2008', '\u2009', '\u200A',
+         '\u2028', '\u2029', '\u202F', '\u205F', '\u3000'];
+     // <= bugs in pretty-printer?
+    for chars.each |c| {
+        let ws = c.is_whitespace();
+        println(fmt!("%? %?" , c , ws));
+    }
+}
diff --git a/src/test/pretty/block-comment-wchar.rs b/src/test/pretty/block-comment-wchar.rs
new file mode 100644
index 00000000000..d8a820542a7
--- /dev/null
+++ b/src/test/pretty/block-comment-wchar.rs
@@ -0,0 +1,109 @@
+// Copyright 2013 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// This is meant as a test case for Issue 3961.
+//
+// Test via: rustc --pretty normal src/test/pretty/block-comment-wchar.rs
+//
+// pp-exact:block-comment-wchar.pp
+fn f() {
+    fn nested() {
+        /*
+  Spaced2
+        */
+        /*
+          Spaced10
+        */
+        /*
+								  Tabbed8+2
+        */
+        /*
+
  CR8+2
+        */
+    }
+    /*
+  Spaced2:                       (prefixed so start of space aligns with comment)
+    */
+    /*
+		Tabbed2: (more indented b/c *start* of space will align with comment)
+    */
+    /*
+      Spaced6:                       (Alignment removed and realigning spaces inserted)
+    */
+    /*
+				  Tabbed4+2:                     (Alignment removed and realigning spaces inserted)
+    */
+
+    /*
+  VT4+2:                         (should align)
+    */
+    /*
+  FF4+2:                         (should align)
+    */
+    /*
+
  CR4+2:                         (should align)
+    */
+    /*
+    // (NEL deliberately omitted)
+    */
+    /*
+      Ogham Space Mark 4+2:          (should align)
+    */
+    /*
+᠎᠎᠎᠎  Mongolian Vowel Separator 4+2: (should align)
+    */
+    /*
+      Four-per-em space 4+2:         (should align)
+    */
+
+    /*
+   ᠎  Mongolian Vowel Sep   count 1: (should align)
+  ᠎   Mongolian Vowel Sep   count 2: (should align)
+  ᠎᠎  Mongolian Vowel Sep   count 3: (should align)
+ ᠎    Mongolian Vowel Sep   count 4: (should align)
+ ᠎ ᠎  Mongolian Vowel Sep   count 5: (should align)
+ ᠎᠎   Mongolian Vowel Sep   count 6: (should align)
+ ᠎᠎᠎  Mongolian Vowel Sep   count 7: (should align)
+᠎     Mongolian Vowel Sep   count 8: (should align)
+᠎  ᠎  Mongolian Vowel Sep   count 9: (should align)
+᠎ ᠎   Mongolian Vowel Sep   count A: (should align)
+᠎ ᠎᠎  Mongolian Vowel Sep   count B: (should align)
+᠎᠎    Mongolian Vowel Sep   count C: (should align)
+᠎᠎ ᠎  Mongolian Vowel Sep   count D: (should align)
+᠎᠎᠎   Mongolian Vowel Sep   count E: (should align)
+᠎᠎᠎᠎  Mongolian Vowel Sep   count F: (should align)
+    */
+
+
+/* */ /*
+        Hello from offset 6
+        Space 6+2:                     compare A
+᠎᠎᠎᠎᠎᠎  Mongolian Vowel Separator 6+2: compare B
+      */
+/*᠎*/ /*
+        Hello from another offset 6 with wchars establishing column offset
+        Space 6+2:                     compare C
+᠎᠎᠎᠎᠎᠎  Mongolian Vowel Separator 6+2: compare D
+      */
+}
+
+fn main() {
+    // Taken from http://en.wikipedia.org/wiki/Whitespace_character
+    let chars =
+        ['\x0A', '\x0B', '\x0C', '\x0D', '\x20',
+         // '\x85', // for some reason Rust thinks NEL isn't whitespace
+         '\xA0', '\u1680', '\u180E', '\u2000', '\u2001', '\u2002', '\u2003',
+         '\u2004', '\u2005', '\u2006', '\u2007', '\u2008', '\u2009', '\u200A',
+         '\u2028', '\u2029', '\u202F', '\u205F', '\u3000'];
+    for chars.each |c| {
+        let ws = c.is_whitespace();
+        println(fmt!("%? %?", c , ws)); // <= bugs in pretty-printer?
+    }
+}
author	bors <bors@rust-lang.org>	2013-06-15 20:40:14 -0700
committer	bors <bors@rust-lang.org>	2013-06-15 20:40:14 -0700
commit	b9119edc55287bb1a9b5609bdef84001c3341e22 (patch)
tree	77af5c825954526dfb8cddf081e37c27fcdc0e93
parent	c989b79127c5062df0a64d8c383de93c82a3d9b7 (diff)
parent	876f6deb4af73d3a6a9845c8ca0a9edff0e25989 (diff)
download	rust-b9119edc55287bb1a9b5609bdef84001c3341e22.tar.gz rust-b9119edc55287bb1a9b5609bdef84001c3341e22.zip