Auto merge of #25416 - kballard:ffi-cstr-to-str-convenience, r=alexcrichton

This was motivated by http://www.evanmiller.org/a-taste-of-rust.html. A common problem when working with FFI right now is converting from raw C strings into `&str` or `String`. Right now you're required to say something like let cstr = unsafe { CStr::from_ptr(ptr) }; let result = str::from_utf8(cstr.to_bytes()); This is slightly awkward, and is not particularly intuitive for people who haven't used the ffi module before. We can do a bit better by providing some convenience methods on CStr: fn to_str(&self) -> Result<&str, str::Utf8Error> fn to_string_lossy(&self) -> Cow<str> This will make it immediately apparent to new users of CStr how to get a string from a raw C string, so they can say: let s = unsafe { CStr::from_ptr(ptr).to_string_lossy() };
author: bors <bors@rust-lang.org> 2015-05-23 11:12:02 +0000
committer: bors <bors@rust-lang.org> 2015-05-23 11:12:02 +0000
commit: 4c2ebc3947568f5ca11c4e8a97538296b4f78285 (patch)
tree: 63e1f3a8bdb342abacb432405355deefe586b555 /src/libstd
parent: 8bc80ba9fc8a297e0633a4f6e7fad1c333cbe8b4 (diff)
parent: d0b5eb35a26a92b3845e90ac340a6422893c447b (diff)
download: rust-4c2ebc3947568f5ca11c4e8a97538296b4f78285.tar.gz
rust-4c2ebc3947568f5ca11c4e8a97538296b4f78285.zip
1 files changed, 72 insertions, 0 deletions
diff --git a/src/libstd/ffi/c_str.rs b/src/libstd/ffi/c_str.rs
index b83a8efe1d0..8c066b3dc2e 100644
--- a/src/libstd/ffi/c_str.rs
+++ b/src/libstd/ffi/c_str.rs
@@ -10,6 +10,7 @@
 
 #![unstable(feature = "std_misc")]
 
+use borrow::Cow;
 use convert::{Into, From};
 use cmp::{PartialEq, Eq, PartialOrd, Ord, Ordering};
 use error::Error;
@@ -22,6 +23,7 @@ use ops::Deref;
 use option::Option::{self, Some, None};
 use result::Result::{self, Ok, Err};
 use slice;
+use str;
 use string::String;
 use vec::Vec;
 
@@ -113,6 +115,26 @@ pub struct CString {
 ///     work(&s);
 /// }
 /// ```
+///
+/// Converting a foreign C string into a Rust `String`
+///
+/// ```no_run
+/// # #![feature(libc,cstr_to_str)]
+/// extern crate libc;
+/// use std::ffi::CStr;
+///
+/// extern { fn my_string() -> *const libc::c_char; }
+///
+/// fn my_string_safe() -> String {
+///     unsafe {
+///         CStr::from_ptr(my_string()).to_string_lossy().into_owned()
+///     }
+/// }
+///
+/// fn main() {
+///     println!("string: {}", my_string_safe());
+/// }
+/// ```
 #[derive(Hash)]
 #[stable(feature = "rust1", since = "1.0.0")]
 pub struct CStr {
@@ -327,6 +349,39 @@ impl CStr {
     pub fn to_bytes_with_nul(&self) -> &[u8] {
         unsafe { mem::transmute::<&[libc::c_char], &[u8]>(&self.inner) }
     }
+
+    /// Yields a `&str` slice if the `CStr` contains valid UTF-8.
+    ///
+    /// This function will calculate the length of this string and check for
+    /// UTF-8 validity, and then return the `&str` if it's valid.
+    ///
+    /// > **Note**: This method is currently implemented to check for validity
+    /// > after a 0-cost cast, but it is planned to alter its definition in the
+    /// > future to perform the length calculation in addition to the UTF-8
+    /// > check whenever this method is called.
+    #[unstable(feature = "cstr_to_str", reason = "recently added")]
+    pub fn to_str(&self) -> Result<&str, str::Utf8Error> {
+        // NB: When CStr is changed to perform the length check in .to_bytes() instead of in
+        // from_ptr(), it may be worth considering if this should be rewritten to do the UTF-8
+        // check inline with the length calculation instead of doing it afterwards.
+        str::from_utf8(self.to_bytes())
+    }
+
+    /// Converts a `CStr` into a `Cow<str>`.
+    ///
+    /// This function will calculate the length of this string (which normally
+    /// requires a linear amount of work to be done) and then return the
+    /// resulting slice as a `Cow<str>`, replacing any invalid UTF-8 sequences
+    /// with `U+FFFD REPLACEMENT CHARACTER`.
+    ///
+    /// > **Note**: This method is currently implemented to check for validity
+    /// > after a 0-cost cast, but it is planned to alter its definition in the
+    /// > future to perform the length calculation in addition to the UTF-8
+    /// > check whenever this method is called.
+    #[unstable(feature = "cstr_to_str", reason = "recently added")]
+    pub fn to_string_lossy(&self) -> Cow<str> {
+        String::from_utf8_lossy(self.to_bytes())
+    }
 }
 
 #[stable(feature = "rust1", since = "1.0.0")]
@@ -355,6 +410,7 @@ mod tests {
     use prelude::v1::*;
     use super::*;
     use libc;
+    use borrow::Cow::{Borrowed, Owned};
 
     #[test]
     fn c_to_rust() {
@@ -404,4 +460,20 @@ mod tests {
             assert_eq!(s.to_bytes_with_nul(), b"12\0");
         }
     }
+
+    #[test]
+    fn to_str() {
+        let data = b"123\xE2\x80\xA6\0";
+        let ptr = data.as_ptr() as *const libc::c_char;
+        unsafe {
+            assert_eq!(CStr::from_ptr(ptr).to_str(), Ok("123…"));
+            assert_eq!(CStr::from_ptr(ptr).to_string_lossy(), Borrowed("123…"));
+        }
+        let data = b"123\xE2\0";
+        let ptr = data.as_ptr() as *const libc::c_char;
+        unsafe {
+            assert!(CStr::from_ptr(ptr).to_str().is_err());
+            assert_eq!(CStr::from_ptr(ptr).to_string_lossy(), Owned::<str>(format!("123\u{FFFD}")));
+        }
+    }
 }
author	bors <bors@rust-lang.org>	2015-05-23 11:12:02 +0000
committer	bors <bors@rust-lang.org>	2015-05-23 11:12:02 +0000
commit	4c2ebc3947568f5ca11c4e8a97538296b4f78285 (patch)
tree	63e1f3a8bdb342abacb432405355deefe586b555 /src/libstd
parent	8bc80ba9fc8a297e0633a4f6e7fad1c333cbe8b4 (diff)
parent	d0b5eb35a26a92b3845e90ac340a6422893c447b (diff)
download	rust-4c2ebc3947568f5ca11c4e8a97538296b4f78285.tar.gz rust-4c2ebc3947568f5ca11c4e8a97538296b4f78285.zip