about summary refs log tree commit diff
path: root/src/libstd
diff options
context:
space:
mode:
authorKevin Ballard <kevin@sb.org>2013-08-25 19:18:32 -0700
committerKevin Ballard <kevin@sb.org>2013-10-15 20:10:10 -0700
commitb41391306692c82d9b44721a47833eb809e0cbf1 (patch)
tree9037866337af058ed5dc7e7a62be72ac898e623e /src/libstd
parente97d61672b3f31e4d54589bed20286aca02bf42b (diff)
downloadrust-b41391306692c82d9b44721a47833eb809e0cbf1.tar.gz
rust-b41391306692c82d9b44721a47833eb809e0cbf1.zip
path2: Start reimplementing in terms of ~[u8] instead of ~str
As documented in #7225, we cannot rely on paths being representable in
utf-8. Specifically, Linux allows anything (besides NUL) in a path.
Redesign GenericPath in light of this.

PosixPath hasn't been reimplemented yet for ~[u8].
Diffstat (limited to 'src/libstd')
-rw-r--r--src/libstd/path2.rs445
1 files changed, 357 insertions, 88 deletions
diff --git a/src/libstd/path2.rs b/src/libstd/path2.rs
index a98fcc40e18..45ce6e94ae4 100644
--- a/src/libstd/path2.rs
+++ b/src/libstd/path2.rs
@@ -19,8 +19,8 @@ use iterator::{AdditiveIterator, Extendable, Iterator};
 use option::{Option, None, Some};
 use str;
 use str::{OwnedStr, Str, StrSlice, StrVector};
-use to_str::ToStr;
 use util;
+use vec;
 use vec::{ImmutableVector, OwnedVector};
 
 /// Typedef for the platform-native path type
@@ -40,10 +40,16 @@ pub type ComponentIter<'self> = PosixComponentIter<'self>;
 /// Iterator that yields successive components of a PosixPath
 type PosixComponentIter<'self> = str::CharSplitIterator<'self, char>;
 
+// Condition that is raised when a NUL is found in a byte vector given to a Path function
+condition! {
+    // this should be a &[u8] but there's a lifetime issue
+    null_byte: ~[u8] -> ~[u8];
+}
+
 /// Represents a POSIX file path
 #[deriving(Clone, DeepClone)]
 pub struct PosixPath {
-    priv repr: ~str, // assumed to never be empty
+    priv repr: ~[u8], // assumed to never be empty or contain NULs
     priv sepidx: Option<uint> // index of the final separator in repr
 }
 
@@ -55,111 +61,262 @@ impl Eq for PosixPath {
 
 impl FromStr for PosixPath {
     fn from_str(s: &str) -> Option<PosixPath> {
-        Some(PosixPath::new(s))
+        let v = s.as_bytes();
+        if contains_nul(v) {
+            None
+        } else {
+            Some(unsafe { GenericPathUnsafe::from_vec_unchecked(v) })
+        }
     }
 }
 
 /// A trait that represents the generic operations available on paths
-pub trait GenericPath: Clone {
+pub trait GenericPath: Clone + GenericPathUnsafe {
+    /// Creates a new Path from a byte vector.
+    /// The resulting Path will always be normalized.
+    ///
+    /// # Failure
+    ///
+    /// Raises the `null_byte` condition if the path contains a NUL.
+    #[inline]
+    fn from_vec(path: &[u8]) -> Self {
+        if contains_nul(path) {
+            let path = self::null_byte::cond.raise(path.to_owned());
+            assert!(!contains_nul(path));
+            unsafe { GenericPathUnsafe::from_vec_unchecked(path) }
+        } else {
+            unsafe { GenericPathUnsafe::from_vec_unchecked(path) }
+        }
+    }
+
     /// Creates a new Path from a string.
-    /// The resulting path will always be normalized.
-    fn from_str(path: &str) -> Self;
-
-    /// Returns the path as a string
-    fn as_str<'a>(&'a self) -> &'a str;
-
-    /// Returns the directory component of `self`, as a string (with no trailing separator).
-    /// If `self` has no directory component, returns ".".
-    fn dirname<'a>(&'a self) -> &'a str;
-    /// Returns the file component of `self`, as a string.
-    /// If `self` represents the root of the file hierarchy, returns the empty string.
-    /// If `self` is ".", returns the empty string.
-    fn filename<'a>(&'a self) -> &'a str;
-    /// Returns the stem of the filename of `self`, as a string.
+    /// The resulting Path will always be normalized.
+    ///
+    /// # Failure
+    ///
+    /// Raises the `null_byte` condition if the path contains a NUL.
+    #[inline]
+    fn from_str(path: &str) -> Self {
+        GenericPath::from_vec(path.as_bytes())
+    }
+
+    /// Creates a new Path from a CString.
+    /// The resulting Path will always be normalized.
+    #[inline]
+    fn from_c_str(path: CString) -> Self {
+        // CStrings can't contain NULs
+        unsafe { GenericPathUnsafe::from_vec_unchecked(path.as_bytes()) }
+    }
+
+    /// Returns the path as a string, if possible.
+    /// If the path is not representable in utf-8, this returns None.
+    #[inline]
+    fn as_str<'a>(&'a self) -> Option<&'a str> {
+        str::from_bytes_slice_opt(self.as_vec())
+    }
+
+    /// Returns the path as a byte vector
+    fn as_vec<'a>(&'a self) -> &'a [u8];
+
+    /// Returns the directory component of `self`, as a byte vector (with no trailing separator).
+    /// If `self` has no directory component, returns ['.'].
+    fn dirname<'a>(&'a self) -> &'a [u8];
+    /// Returns the directory component of `self`, as a string, if possible.
+    /// See `dirname` for details.
+    #[inline]
+    fn dirname_str<'a>(&'a self) -> Option<&'a str> {
+        str::from_bytes_slice_opt(self.dirname())
+    }
+    /// Returns the file component of `self`, as a byte vector.
+    /// If `self` represents the root of the file hierarchy, returns the empty vector.
+    /// If `self` is ".", returns the empty vector.
+    fn filename<'a>(&'a self) -> &'a [u8];
+    /// Returns the file component of `self`, as a string, if possible.
+    /// See `filename` for details.
+    #[inline]
+    fn filename_str<'a>(&'a self) -> Option<&'a str> {
+        str::from_bytes_slice_opt(self.filename())
+    }
+    /// Returns the stem of the filename of `self`, as a byte vector.
     /// The stem is the portion of the filename just before the last '.'.
     /// If there is no '.', the entire filename is returned.
-    fn filestem<'a>(&'a self) -> &'a str {
+    fn filestem<'a>(&'a self) -> &'a [u8] {
         let name = self.filename();
-        match name.rfind('.') {
+        let dot = '.' as u8;
+        match name.rposition_elem(&dot) {
             None | Some(0) => name,
-            Some(1) if name == ".." => name,
+            Some(1) if name == bytes!("..") => name,
             Some(pos) => name.slice_to(pos)
         }
     }
-    /// Returns the extension of the filename of `self`, as a string option.
+    /// Returns the stem of the filename of `self`, as a string, if possible.
+    /// See `filestem` for details.
+    #[inline]
+    fn filestem_str<'a>(&'a self) -> Option<&'a str> {
+        str::from_bytes_slice_opt(self.filestem())
+    }
+    /// Returns the extension of the filename of `self`, as an optional byte vector.
     /// The extension is the portion of the filename just after the last '.'.
     /// If there is no extension, None is returned.
-    /// If the filename ends in '.', the empty string is returned.
-    fn extension<'a>(&'a self) -> Option<&'a str> {
+    /// If the filename ends in '.', the empty vector is returned.
+    fn extension<'a>(&'a self) -> Option<&'a [u8]> {
         let name = self.filename();
-        match name.rfind('.') {
+        let dot = '.' as u8;
+        match name.rposition_elem(&dot) {
             None | Some(0) => None,
-            Some(1) if name == ".." => None,
+            Some(1) if name == bytes!("..") => None,
             Some(pos) => Some(name.slice_from(pos+1))
         }
     }
+    /// Returns the extension of the filename of `self`, as a string, if possible.
+    /// See `extension` for details.
+    #[inline]
+    fn extension_str<'a>(&'a self) -> Option<&'a str> {
+        self.extension().chain(|v| str::from_bytes_slice_opt(v))
+    }
 
-    /// Replaces the directory portion of the path with the given string.
+    /// Replaces the directory portion of the path with the given byte vector.
     /// If `self` represents the root of the filesystem hierarchy, the last path component
-    /// of the given string becomes the filename.
-    fn set_dirname(&mut self, dirname: &str);
+    /// of the given byte vector becomes the filename.
+    ///
+    /// # Failure
+    ///
+    /// Raises the `null_byte` condition if the dirname contains a NUL.
+    #[inline]
+    fn set_dirname(&mut self, dirname: &[u8]) {
+        if contains_nul(dirname) {
+            let dirname = self::null_byte::cond.raise(dirname.to_owned());
+            assert!(!contains_nul(dirname));
+            unsafe { self.set_dirname_unchecked(dirname) }
+        } else {
+            unsafe { self.set_dirname_unchecked(dirname) }
+        }
+    }
+    /// Replaces the directory portion of the path with the given string.
+    /// See `set_dirname` for details.
+    #[inline]
+    fn set_dirname_str(&mut self, dirname: &str) {
+        self.set_dirname(dirname.as_bytes())
+    }
+    /// Replaces the filename portion of the path with the given byte vector.
+    /// If the replacement name is [], this is equivalent to popping the path.
+    ///
+    /// # Failure
+    ///
+    /// Raises the `null_byte` condition if the filename contains a NUL.
+    #[inline]
+    fn set_filename(&mut self, filename: &[u8]) {
+        if contains_nul(filename) {
+            let filename = self::null_byte::cond.raise(filename.to_owned());
+            assert!(!contains_nul(filename));
+            unsafe { self.set_filename_unchecked(filename) }
+        } else {
+            unsafe { self.set_filename_unchecked(filename) }
+        }
+    }
     /// Replaces the filename portion of the path with the given string.
-    /// If the replacement name is "", this is equivalent to popping the path.
-    fn set_filename(&mut self, filename: &str);
-    /// Replaces the filestem with the given string.
+    /// See `set_filename` for details.
+    #[inline]
+    fn set_filename_str(&mut self, filename: &str) {
+        self.set_filename(filename.as_bytes())
+    }
+    /// Replaces the filestem with the given byte vector.
     /// If there is no extension in `self` (or `self` has no filename), this is equivalent
-    /// to `set_filename`. Otherwise, if the given string is "", the extension (including
-    /// the preceding ".") becomes the new filename.
-    fn set_filestem(&mut self, filestem: &str) {
+    /// to `set_filename`. Otherwise, if the given byte vector is [], the extension (including
+    /// the preceding '.') becomes the new filename.
+    ///
+    /// # Failure
+    ///
+    /// Raises the `null_byte` condition if the filestem contains a NUL.
+    fn set_filestem(&mut self, filestem: &[u8]) {
         // borrowck is being a pain here
         let val = {
             let name = self.filename();
             if !name.is_empty() {
-                match name.rfind('.') {
+                let dot = '.' as u8;
+                match name.rposition_elem(&dot) {
                     None | Some(0) => None,
                     Some(idx) => {
-                        let mut s = str::with_capacity(filestem.len() + name.len() - idx);
-                        s.push_str(filestem);
-                        s.push_str(name.slice_from(idx));
-                        Some(s)
+                        let mut v;
+                        if contains_nul(filestem) {
+                            let filestem = self::null_byte::cond.raise(filestem.to_owned());
+                            assert!(!contains_nul(filestem));
+                            v = vec::with_capacity(filestem.len() + name.len() - idx);
+                            v.push_all(filestem);
+                        } else {
+                            v = vec::with_capacity(filestem.len() + name.len() - idx);
+                            v.push_all(filestem);
+                        }
+                        v.push_all(name.slice_from(idx));
+                        Some(v)
                     }
                 }
             } else { None }
         };
         match val {
             None => self.set_filename(filestem),
-            Some(s) => self.set_filename(s)
+            Some(v) => unsafe { self.set_filename_unchecked(v) }
         }
     }
-    /// Replaces the extension with the given string.
+    /// Replaces the filestem with the given string.
+    /// See `set_filestem` for details.
+    #[inline]
+    fn set_filestem_str(&mut self, filestem: &str) {
+        self.set_filestem(filestem.as_bytes())
+    }
+    /// Replaces the extension with the given byte vector.
     /// If there is no extension in `self`, this adds one.
-    /// If the given string is "", this removes the extension.
+    /// If the given byte vector is [], this removes the extension.
     /// If `self` has no filename, this is a no-op.
-    fn set_extension(&mut self, extension: &str) {
+    ///
+    /// # Failure
+    ///
+    /// Raises the `null_byte` condition if the extension contains a NUL.
+    fn set_extension(&mut self, extension: &[u8]) {
         // borrowck causes problems here too
         let val = {
             let name = self.filename();
             if !name.is_empty() {
-                match name.rfind('.') {
+                let dot = '.' as u8;
+                match name.rposition_elem(&dot) {
                     None | Some(0) => {
                         if extension.is_empty() {
                             None
                         } else {
-                            let mut s = str::with_capacity(name.len() + extension.len() + 1);
-                            s.push_str(name);
-                            s.push_char('.');
-                            s.push_str(extension);
-                            Some(s)
+                            let mut v;
+                            if contains_nul(extension) {
+                                let extension = self::null_byte::cond.raise(extension.to_owned());
+                                assert!(!contains_nul(extension));
+                                v = vec::with_capacity(name.len() + extension.len() + 1);
+                                v.push_all(name);
+                                v.push(dot);
+                                v.push_all(extension);
+                            } else {
+                                v = vec::with_capacity(name.len() + extension.len() + 1);
+                                v.push_all(name);
+                                v.push(dot);
+                                v.push_all(extension);
+                            }
+                            Some(v)
                         }
                     }
                     Some(idx) => {
                         if extension.is_empty() {
                             Some(name.slice_to(idx).to_owned())
                         } else {
-                            let mut s = str::with_capacity(idx + extension.len() + 1);
-                            s.push_str(name.slice_to(idx+1));
-                            s.push_str(extension);
-                            Some(s)
+                            let mut v;
+                            if contains_nul(extension) {
+                                let extension = self::null_byte::cond.raise(extension.to_owned());
+                                assert!(!contains_nul(extension));
+                                v = vec::with_capacity(idx + extension.len() + 1);
+                                v.push_all(name.slice_to(idx+1));
+                                v.push_all(extension);
+                            } else {
+                                v = vec::with_capacity(idx + extension.len() + 1);
+                                v.push_all(name.slice_to(idx+1));
+                                v.push_all(extension);
+                            }
+                            Some(v)
                         }
                     }
                 }
@@ -167,73 +324,163 @@ pub trait GenericPath: Clone {
         };
         match val {
             None => (),
-            Some(s) => self.set_filename(s)
+            Some(v) => unsafe { self.set_filename_unchecked(v) }
         }
     }
+    /// Replaces the extension with the given string.
+    /// See `set_extension` for details.
+    #[inline]
+    fn set_extension_str(&mut self, extension: &str) {
+        self.set_extension(extension.as_bytes())
+    }
 
-    /// Returns a new Path constructed by replacing the dirname with the given string.
+    /// Returns a new Path constructed by replacing the dirname with the given byte vector.
     /// See `set_dirname` for details.
-    fn with_dirname(&self, dirname: &str) -> Self {
+    ///
+    /// # Failure
+    ///
+    /// Raises the `null_byte` condition if the dirname contains a NUL.
+    #[inline]
+    fn with_dirname(&self, dirname: &[u8]) -> Self {
         let mut p = self.clone();
         p.set_dirname(dirname);
         p
     }
-    /// Returns a new Path constructed by replacing the filename with the given string.
+    /// Returns a new Path constructed by replacing the dirname with the given string.
+    /// See `set_dirname` for details.
+    #[inline]
+    fn with_dirname_str(&self, dirname: &str) -> Self {
+        self.with_dirname(dirname.as_bytes())
+    }
+    /// Returns a new Path constructed by replacing the filename with the given byte vector.
     /// See `set_filename` for details.
-    fn with_filename(&self, filename: &str) -> Self {
+    ///
+    /// # Failure
+    ///
+    /// Raises the `null_byte` condition if the filename contains a NUL.
+    #[inline]
+    fn with_filename(&self, filename: &[u8]) -> Self {
         let mut p = self.clone();
         p.set_filename(filename);
         p
     }
-    /// Returns a new Path constructed by setting the filestem to the given string.
+    /// Returns a new Path constructed by replacing the filename with the given string.
+    /// See `set_filename` for details.
+    #[inline]
+    fn with_filename_str(&self, filename: &str) -> Self {
+        self.with_filename(filename.as_bytes())
+    }
+    /// Returns a new Path constructed by setting the filestem to the given byte vector.
     /// See `set_filestem` for details.
-    fn with_filestem(&self, filestem: &str) -> Self {
+    ///
+    /// # Failure
+    ///
+    /// Raises the `null_byte` condition if the filestem contains a NUL.
+    #[inline]
+    fn with_filestem(&self, filestem: &[u8]) -> Self {
         let mut p = self.clone();
         p.set_filestem(filestem);
         p
     }
-    /// Returns a new Path constructed by setting the extension to the given string.
+    /// Returns a new Path constructed by setting the filestem to the given string.
+    /// See `set_filestem` for details.
+    #[inline]
+    fn with_filestem_str(&self, filestem: &str) -> Self {
+        self.with_filestem(filestem.as_bytes())
+    }
+    /// Returns a new Path constructed by setting the extension to the given byte vector.
     /// See `set_extension` for details.
-    fn with_extension(&self, extension: &str) -> Self {
+    ///
+    /// # Failure
+    ///
+    /// Raises the `null_byte` condition if the extension contains a NUL.
+    #[inline]
+    fn with_extension(&self, extension: &[u8]) -> Self {
         let mut p = self.clone();
         p.set_extension(extension);
         p
     }
-
+    /// Returns a new Path constructed by setting the extension to the given string.
+    /// See `set_extension` for details.
+    #[inline]
+    fn with_extension_str(&self, extension: &str) -> Self {
+        self.with_extension(extension.as_bytes())
+    }
 
     /// Returns the directory component of `self`, as a Path.
     /// If `self` represents the root of the filesystem hierarchy, returns `self`.
     fn dir_path(&self) -> Self {
-        GenericPath::from_str(self.dirname())
+        GenericPath::from_vec(self.dirname())
     }
     /// Returns the file component of `self`, as a relative Path.
     /// If `self` represents the root of the filesystem hierarchy, returns None.
     fn file_path(&self) -> Option<Self> {
         match self.filename() {
-            "" => None,
-            s => Some(GenericPath::from_str(s))
+            [] => None,
+            v => Some(GenericPath::from_vec(v))
         }
     }
 
-    /// Pushes a path (as a string) onto `self`.
+    /// Pushes a path (as a byte vector) onto `self`.
     /// If the argument represents an absolute path, it replaces `self`.
-    fn push(&mut self, path: &str);
+    ///
+    /// # Failure
+    ///
+    /// Raises the `null_byte` condition if the path contains a NUL.
+    #[inline]
+    fn push(&mut self, path: &[u8]) {
+        if contains_nul(path) {
+            let path = self::null_byte::cond.raise(path.to_owned());
+            assert!(!contains_nul(path));
+            unsafe { self.push_unchecked(path) }
+        } else {
+            unsafe { self.push_unchecked(path) }
+        }
+    }
+    /// Pushes a path (as a string) onto `self.
+    /// See `push` for details.
+    #[inline]
+    fn push_str(&mut self, path: &str) {
+        self.push(path.as_bytes())
+    }
     /// Pushes a Path onto `self`.
     /// If the argument represents an absolute path, it replaces `self`.
-    fn push_path(&mut self, path: &Self);
+    #[inline]
+    fn push_path(&mut self, path: &Self) {
+        self.push(path.as_vec())
+    }
     /// Pops the last path component off of `self` and returns it.
     /// If `self` represents the root of the file hierarchy, None is returned.
-    fn pop_opt(&mut self) -> Option<~str>;
+    fn pop_opt(&mut self) -> Option<~[u8]>;
+    /// Pops the last path component off of `self` and returns it as a string, if possible.
+    /// `self` will still be modified even if None is returned.
+    /// See `pop_opt` for details.
+    #[inline]
+    fn pop_opt_str(&mut self) -> Option<~str> {
+        self.pop_opt().chain(|v| str::from_bytes_owned_opt(v))
+    }
 
-    /// Returns a new Path constructed by joining `self` with the given path (as a string).
+    /// Returns a new Path constructed by joining `self` with the given path (as a byte vector).
     /// If the given path is absolute, the new Path will represent just that.
-    fn join(&self, path: &str) -> Self {
+    ///
+    /// # Failure
+    ///
+    /// Raises the `null_byte` condition if the path contains a NUL.
+    #[inline]
+    fn join(&self, path: &[u8]) -> Self {
         let mut p = self.clone();
         p.push(path);
         p
     }
+    /// Returns a new Path constructed by joining `self` with the given path (as a string).
+    /// See `join` for details.
+    #[inline]
+    fn join_str(&self, path: &str) -> Self {
+        self.join(path.as_bytes())
+    }
     /// Returns a new Path constructed by joining `self` with the given path.
     /// If the given path is absolute, the new Path will represent just that.
+    #[inline]
     fn join_path(&self, path: &Self) -> Self {
         let mut p = self.clone();
         p.push_path(path);
@@ -255,22 +502,42 @@ pub trait GenericPath: Clone {
     fn path_relative_from(&self, base: &Self) -> Option<Self>;
 }
 
-impl ToStr for PosixPath {
-    #[inline]
-    fn to_str(&self) -> ~str {
-        self.as_str().to_owned()
-    }
+/// A trait that represents the unsafe operations on GenericPaths
+pub trait GenericPathUnsafe {
+    /// Creates a new Path from a byte vector without checking for null bytes.
+    /// The resulting Path will always be normalized.
+    unsafe fn from_vec_unchecked(path: &[u8]) -> Self;
+
+    /// Replaces the directory portion of the path with the given byte vector without
+    /// checking for null bytes.
+    /// See `set_dirname` for details.
+    unsafe fn set_dirname_unchecked(&mut self, dirname: &[u8]);
+
+    /// Replaces the filename portion of the path with the given byte vector without
+    /// checking for null bytes.
+    /// See `set_filename` for details.
+    unsafe fn set_filename_unchecked(&mut self, filename: &[u8]);
+
+    /// Pushes a path onto `self` without checking for null bytes.
+    /// See `push` for details.
+    unsafe fn push_unchecked(&mut self, path: &[u8]);
+}
+
+#[inline(always)]
+fn contains_nul(v: &[u8]) -> bool {
+    v.iter().any(|&x| x == 0)
 }
 
 impl ToCStr for PosixPath {
     #[inline]
     fn to_c_str(&self) -> CString {
-        self.as_str().to_c_str()
+        // The Path impl guarantees no internal NUL
+        unsafe { self.as_vec().to_c_str_unchecked() }
     }
 
     #[inline]
     unsafe fn to_c_str_unchecked(&self) -> CString {
-        self.as_str().to_c_str_unchecked()
+        self.as_vec().to_c_str_unchecked()
     }
 }
 
@@ -583,24 +850,26 @@ fn normalize_helper<'a, Sep: str::CharEq>(s: &'a str, is_abs: bool, sep: Sep) ->
 /// Various POSIX helpers
 pub mod posix {
     /// The standard path separator character
-    pub static sep: char = '/';
+    pub static sep: u8 = '/' as u8;
 
-    /// Returns whether the given char is a path separator
+    /// Returns whether the given byte is a path separator
     #[inline]
-    pub fn is_sep(u: char) -> bool {
-        u == sep
+    pub fn is_sep(u: &u8) -> bool {
+        *u == sep
     }
 }
 
 /// Various Windows helpers
 pub mod windows {
     /// The standard path separator character
-    pub static sep: char = '\\';
+    pub static sep: u8 = '\\' as u8;
+    /// The alternative path separator character
+    pub static sep2: u8 = '/' as u8;
 
-    /// Returns whether the given char is a path separator (both / and \)
+    /// Returns whether the given byte is a path separator
     #[inline]
-    pub fn is_sep(u: char) -> bool {
-        u == sep || u == '/'
+    pub fn is_sep(u: &u8) -> bool {
+        *u == sep || *u == sep2
     }
 }