about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
authorbors <bors@rust-lang.org>2020-05-26 01:43:40 +0000
committerbors <bors@rust-lang.org>2020-05-26 01:43:40 +0000
commit9eedd138ee22147111a885d6948fb050d9849bf4 (patch)
tree632e765c1e9e7cfa86312316324e318c9eddad9f /src
parent698c5c6d95218735afebdada8a518ab66e0e9213 (diff)
parenta93d31603f80e16a185cda3377c328ae85273325 (diff)
downloadrust-9eedd138ee22147111a885d6948fb050d9849bf4.tar.gz
rust-9eedd138ee22147111a885d6948fb050d9849bf4.zip
Auto merge of #71487 - rcoh:71471-shebang, r=petrochenkov
Fix bug in shebang handling

Shebang handling was too agressive in stripping out the first line in cases where it is actually _not_ a shebang, but instead, valid rust (#70528). This is a second attempt at resolving this issue (the first attempt was reverted, for, among other reasons, causing an ICE in certain cases (#71372, #71471).

The behavior is now codified by a number of UI tests, but simply:
For the first line to be a shebang, the following must all be true:
1. The line must start with `#!`
2. The line must contain a non-whitespace character after `#!`
3. The next character in the file, ignoring comments & whitespace must not be `[`

I believe this is a strict superset of what we used to allow, so perhaps a crater run is unnecessary, but probably not a terrible idea.

Fixes #70528
Diffstat (limited to 'src')
-rw-r--r--src/librustc_lexer/src/lib.rs26
-rw-r--r--src/librustc_lexer/src/tests.rs57
-rw-r--r--src/test/ui/parser/shebang/issue-71471-ignore-tidy.rs2
-rw-r--r--src/test/ui/parser/shebang/issue-71471-ignore-tidy.stderr8
-rw-r--r--src/test/ui/parser/shebang/multiline-attrib.rs7
-rw-r--r--src/test/ui/parser/shebang/regular-attrib.rs5
-rw-r--r--src/test/ui/parser/shebang/shebang-and-attrib.rs9
-rw-r--r--src/test/ui/parser/shebang/shebang-comment.rs6
-rw-r--r--src/test/ui/parser/shebang/shebang-must-start-file.rs6
-rw-r--r--src/test/ui/parser/shebang/shebang-must-start-file.stderr8
-rw-r--r--src/test/ui/parser/shebang/sneaky-attrib.rs16
-rw-r--r--src/test/ui/parser/shebang/valid-shebang.rs6
-rw-r--r--src/tools/tidy/src/style.rs5
13 files changed, 154 insertions, 7 deletions
diff --git a/src/librustc_lexer/src/lib.rs b/src/librustc_lexer/src/lib.rs
index e44feee9660..fe6785de009 100644
--- a/src/librustc_lexer/src/lib.rs
+++ b/src/librustc_lexer/src/lib.rs
@@ -236,16 +236,28 @@ pub enum Base {
 }
 
 /// `rustc` allows files to have a shebang, e.g. "#!/usr/bin/rustrun",
-/// but shebang isn't a part of rust syntax, so this function
-/// skips the line if it starts with a shebang ("#!").
-/// Line won't be skipped if it represents a valid Rust syntax
-/// (e.g. "#![deny(missing_docs)]").
+/// but shebang isn't a part of rust syntax.
 pub fn strip_shebang(input: &str) -> Option<usize> {
-    debug_assert!(!input.is_empty());
-    if !input.starts_with("#!") || input.starts_with("#![") {
+    let first_line = input.lines().next()?;
+    // A shebang is intentionally loosely defined as `#! [non whitespace]` on the first line.
+    let could_be_shebang =
+        first_line.starts_with("#!") && first_line[2..].contains(|c| !is_whitespace(c));
+    if !could_be_shebang {
         return None;
     }
-    Some(input.find('\n').unwrap_or(input.len()))
+    let non_whitespace_tokens = tokenize(input).map(|tok| tok.kind).filter(|tok|
+        !matches!(tok, TokenKind::LineComment | TokenKind::BlockComment { .. } | TokenKind::Whitespace)
+    );
+    let prefix = [TokenKind::Pound, TokenKind::Not, TokenKind::OpenBracket];
+    let starts_with_attribute = non_whitespace_tokens.take(3).eq(prefix.iter().copied());
+    if starts_with_attribute {
+        // If the file starts with #![ then it's definitely not a shebang -- it couldn't be
+        // a rust program since a Rust program can't start with `[`
+        None
+    } else {
+        // It's a #!... and there isn't a `[` in sight, must be a shebang
+        Some(first_line.len())
+    }
 }
 
 /// Parses the first token from the provided input string.
diff --git a/src/librustc_lexer/src/tests.rs b/src/librustc_lexer/src/tests.rs
index 06fc159fe25..725799374fc 100644
--- a/src/librustc_lexer/src/tests.rs
+++ b/src/librustc_lexer/src/tests.rs
@@ -145,4 +145,61 @@ mod tests {
             }),
         );
     }
+
+    #[test]
+    fn test_valid_shebang() {
+        // https://github.com/rust-lang/rust/issues/70528
+        let input = "#!/usr/bin/rustrun\nlet x = 5;";
+        assert_eq!(strip_shebang(input), Some(18));
+    }
+
+    #[test]
+    fn test_invalid_shebang_valid_rust_syntax() {
+        // https://github.com/rust-lang/rust/issues/70528
+        let input = "#!    [bad_attribute]";
+        assert_eq!(strip_shebang(input), None);
+    }
+
+    #[test]
+    fn test_shebang_second_line() {
+        // Because shebangs are interpreted by the kernel, they must be on the first line
+        let input = "\n#!/bin/bash";
+        assert_eq!(strip_shebang(input), None);
+    }
+
+    #[test]
+    fn test_shebang_space() {
+        let input = "#!    /bin/bash";
+        assert_eq!(strip_shebang(input), Some(input.len()));
+    }
+
+    #[test]
+    fn test_shebang_empty_shebang() {
+        let input = "#!    \n[attribute(foo)]";
+        assert_eq!(strip_shebang(input), None);
+    }
+
+    #[test]
+    fn test_invalid_shebang_comment() {
+        let input = "#!//bin/ami/a/comment\n[";
+        assert_eq!(strip_shebang(input), None)
+    }
+
+    #[test]
+    fn test_invalid_shebang_another_comment() {
+        let input = "#!/*bin/ami/a/comment*/\n[attribute";
+        assert_eq!(strip_shebang(input), None)
+    }
+
+    #[test]
+    fn test_shebang_valid_rust_after() {
+        let input = "#!/*bin/ami/a/comment*/\npub fn main() {}";
+        assert_eq!(strip_shebang(input), Some(23))
+    }
+
+    #[test]
+    fn test_shebang_followed_by_attrib() {
+        let input = "#!/bin/rust-scripts\n#![allow_unused(true)]";
+        assert_eq!(strip_shebang(input), Some(19));
+    }
 }
diff --git a/src/test/ui/parser/shebang/issue-71471-ignore-tidy.rs b/src/test/ui/parser/shebang/issue-71471-ignore-tidy.rs
new file mode 100644
index 00000000000..a2505180884
--- /dev/null
+++ b/src/test/ui/parser/shebang/issue-71471-ignore-tidy.rs
@@ -0,0 +1,2 @@
+
+#!B //~ expected `[`, found `B`
diff --git a/src/test/ui/parser/shebang/issue-71471-ignore-tidy.stderr b/src/test/ui/parser/shebang/issue-71471-ignore-tidy.stderr
new file mode 100644
index 00000000000..896a9dc83d8
--- /dev/null
+++ b/src/test/ui/parser/shebang/issue-71471-ignore-tidy.stderr
@@ -0,0 +1,8 @@
+error: expected `[`, found `B`
+  --> $DIR/issue-71471-ignore-tidy.rs:2:3
+   |
+LL | #!B
+   |   ^ expected `[`
+
+error: aborting due to previous error
+
diff --git a/src/test/ui/parser/shebang/multiline-attrib.rs b/src/test/ui/parser/shebang/multiline-attrib.rs
new file mode 100644
index 00000000000..931c94c7fba
--- /dev/null
+++ b/src/test/ui/parser/shebang/multiline-attrib.rs
@@ -0,0 +1,7 @@
+#!
+[allow(unused_variables)]
+// check-pass
+
+fn main() {
+    let x = 5;
+}
diff --git a/src/test/ui/parser/shebang/regular-attrib.rs b/src/test/ui/parser/shebang/regular-attrib.rs
new file mode 100644
index 00000000000..ca8fb0830ff
--- /dev/null
+++ b/src/test/ui/parser/shebang/regular-attrib.rs
@@ -0,0 +1,5 @@
+#![allow(unused_variables)]
+// check-pass
+fn main() {
+    let x = 5;
+}
diff --git a/src/test/ui/parser/shebang/shebang-and-attrib.rs b/src/test/ui/parser/shebang/shebang-and-attrib.rs
new file mode 100644
index 00000000000..61b89c655a3
--- /dev/null
+++ b/src/test/ui/parser/shebang/shebang-and-attrib.rs
@@ -0,0 +1,9 @@
+#!/usr/bin/env run-cargo-script
+
+// check-pass
+#![allow(unused_variables)]
+
+
+fn main() {
+    let x = 5;
+}
diff --git a/src/test/ui/parser/shebang/shebang-comment.rs b/src/test/ui/parser/shebang/shebang-comment.rs
new file mode 100644
index 00000000000..2b1ab0c574d
--- /dev/null
+++ b/src/test/ui/parser/shebang/shebang-comment.rs
@@ -0,0 +1,6 @@
+#!//bin/bash
+
+// check-pass
+fn main() {
+    println!("a valid shebang (that is also a rust comment)")
+}
diff --git a/src/test/ui/parser/shebang/shebang-must-start-file.rs b/src/test/ui/parser/shebang/shebang-must-start-file.rs
new file mode 100644
index 00000000000..e0392572dc8
--- /dev/null
+++ b/src/test/ui/parser/shebang/shebang-must-start-file.rs
@@ -0,0 +1,6 @@
+// something on the first line for tidy
+#!/bin/bash  //~ expected `[`, found `/`
+
+fn main() {
+    println!("ok!");
+}
diff --git a/src/test/ui/parser/shebang/shebang-must-start-file.stderr b/src/test/ui/parser/shebang/shebang-must-start-file.stderr
new file mode 100644
index 00000000000..50543e8bdb8
--- /dev/null
+++ b/src/test/ui/parser/shebang/shebang-must-start-file.stderr
@@ -0,0 +1,8 @@
+error: expected `[`, found `/`
+  --> $DIR/shebang-must-start-file.rs:2:3
+   |
+LL | #!/bin/bash
+   |   ^ expected `[`
+
+error: aborting due to previous error
+
diff --git a/src/test/ui/parser/shebang/sneaky-attrib.rs b/src/test/ui/parser/shebang/sneaky-attrib.rs
new file mode 100644
index 00000000000..b406cc3aa13
--- /dev/null
+++ b/src/test/ui/parser/shebang/sneaky-attrib.rs
@@ -0,0 +1,16 @@
+#!//bin/bash
+
+
+// This could not possibly be a shebang & also a valid rust file, since a Rust file
+// can't start with `[`
+/*
+    [ (mixing comments to also test that we ignore both types of comments)
+
+ */
+
+[allow(unused_variables)]
+
+// check-pass
+fn main() {
+    let x = 5;
+}
diff --git a/src/test/ui/parser/shebang/valid-shebang.rs b/src/test/ui/parser/shebang/valid-shebang.rs
new file mode 100644
index 00000000000..e480d3da3fc
--- /dev/null
+++ b/src/test/ui/parser/shebang/valid-shebang.rs
@@ -0,0 +1,6 @@
+#!/usr/bin/env run-cargo-script
+
+// check-pass
+fn main() {
+    println!("Hello World!");
+}
diff --git a/src/tools/tidy/src/style.rs b/src/tools/tidy/src/style.rs
index 4247fcb3b7f..396d6c0cfcd 100644
--- a/src/tools/tidy/src/style.rs
+++ b/src/tools/tidy/src/style.rs
@@ -174,6 +174,11 @@ pub fn check(path: &Path, bad: &mut bool) {
 
         let can_contain =
             contents.contains("// ignore-tidy-") || contents.contains("# ignore-tidy-");
+        // Enable testing ICE's that require specific (untidy)
+        // file formats easily eg. `issue-1234-ignore-tidy.rs`
+        if filename.contains("ignore-tidy") {
+            return;
+        }
         let mut skip_cr = contains_ignore_directive(can_contain, &contents, "cr");
         let mut skip_undocumented_unsafe =
             contains_ignore_directive(can_contain, &contents, "undocumented-unsafe");