diff options
| author | bors <bors@rust-lang.org> | 2015-05-23 11:12:02 +0000 |
|---|---|---|
| committer | bors <bors@rust-lang.org> | 2015-05-23 11:12:02 +0000 |
| commit | 4c2ebc3947568f5ca11c4e8a97538296b4f78285 (patch) | |
| tree | 63e1f3a8bdb342abacb432405355deefe586b555 /src/libstd | |
| parent | 8bc80ba9fc8a297e0633a4f6e7fad1c333cbe8b4 (diff) | |
| parent | d0b5eb35a26a92b3845e90ac340a6422893c447b (diff) | |
| download | rust-4c2ebc3947568f5ca11c4e8a97538296b4f78285.tar.gz rust-4c2ebc3947568f5ca11c4e8a97538296b4f78285.zip | |
Auto merge of #25416 - kballard:ffi-cstr-to-str-convenience, r=alexcrichton
This was motivated by http://www.evanmiller.org/a-taste-of-rust.html.
A common problem when working with FFI right now is converting from raw
C strings into `&str` or `String`. Right now you're required to say
something like
let cstr = unsafe { CStr::from_ptr(ptr) };
let result = str::from_utf8(cstr.to_bytes());
This is slightly awkward, and is not particularly intuitive for people
who haven't used the ffi module before. We can do a bit better by
providing some convenience methods on CStr:
fn to_str(&self) -> Result<&str, str::Utf8Error>
fn to_string_lossy(&self) -> Cow<str>
This will make it immediately apparent to new users of CStr how to get a
string from a raw C string, so they can say:
let s = unsafe { CStr::from_ptr(ptr).to_string_lossy() };
Diffstat (limited to 'src/libstd')
| -rw-r--r-- | src/libstd/ffi/c_str.rs | 72 |
1 files changed, 72 insertions, 0 deletions
diff --git a/src/libstd/ffi/c_str.rs b/src/libstd/ffi/c_str.rs index b83a8efe1d0..8c066b3dc2e 100644 --- a/src/libstd/ffi/c_str.rs +++ b/src/libstd/ffi/c_str.rs @@ -10,6 +10,7 @@ #![unstable(feature = "std_misc")] +use borrow::Cow; use convert::{Into, From}; use cmp::{PartialEq, Eq, PartialOrd, Ord, Ordering}; use error::Error; @@ -22,6 +23,7 @@ use ops::Deref; use option::Option::{self, Some, None}; use result::Result::{self, Ok, Err}; use slice; +use str; use string::String; use vec::Vec; @@ -113,6 +115,26 @@ pub struct CString { /// work(&s); /// } /// ``` +/// +/// Converting a foreign C string into a Rust `String` +/// +/// ```no_run +/// # #![feature(libc,cstr_to_str)] +/// extern crate libc; +/// use std::ffi::CStr; +/// +/// extern { fn my_string() -> *const libc::c_char; } +/// +/// fn my_string_safe() -> String { +/// unsafe { +/// CStr::from_ptr(my_string()).to_string_lossy().into_owned() +/// } +/// } +/// +/// fn main() { +/// println!("string: {}", my_string_safe()); +/// } +/// ``` #[derive(Hash)] #[stable(feature = "rust1", since = "1.0.0")] pub struct CStr { @@ -327,6 +349,39 @@ impl CStr { pub fn to_bytes_with_nul(&self) -> &[u8] { unsafe { mem::transmute::<&[libc::c_char], &[u8]>(&self.inner) } } + + /// Yields a `&str` slice if the `CStr` contains valid UTF-8. + /// + /// This function will calculate the length of this string and check for + /// UTF-8 validity, and then return the `&str` if it's valid. + /// + /// > **Note**: This method is currently implemented to check for validity + /// > after a 0-cost cast, but it is planned to alter its definition in the + /// > future to perform the length calculation in addition to the UTF-8 + /// > check whenever this method is called. + #[unstable(feature = "cstr_to_str", reason = "recently added")] + pub fn to_str(&self) -> Result<&str, str::Utf8Error> { + // NB: When CStr is changed to perform the length check in .to_bytes() instead of in + // from_ptr(), it may be worth considering if this should be rewritten to do the UTF-8 + // check inline with the length calculation instead of doing it afterwards. + str::from_utf8(self.to_bytes()) + } + + /// Converts a `CStr` into a `Cow<str>`. + /// + /// This function will calculate the length of this string (which normally + /// requires a linear amount of work to be done) and then return the + /// resulting slice as a `Cow<str>`, replacing any invalid UTF-8 sequences + /// with `U+FFFD REPLACEMENT CHARACTER`. + /// + /// > **Note**: This method is currently implemented to check for validity + /// > after a 0-cost cast, but it is planned to alter its definition in the + /// > future to perform the length calculation in addition to the UTF-8 + /// > check whenever this method is called. + #[unstable(feature = "cstr_to_str", reason = "recently added")] + pub fn to_string_lossy(&self) -> Cow<str> { + String::from_utf8_lossy(self.to_bytes()) + } } #[stable(feature = "rust1", since = "1.0.0")] @@ -355,6 +410,7 @@ mod tests { use prelude::v1::*; use super::*; use libc; + use borrow::Cow::{Borrowed, Owned}; #[test] fn c_to_rust() { @@ -404,4 +460,20 @@ mod tests { assert_eq!(s.to_bytes_with_nul(), b"12\0"); } } + + #[test] + fn to_str() { + let data = b"123\xE2\x80\xA6\0"; + let ptr = data.as_ptr() as *const libc::c_char; + unsafe { + assert_eq!(CStr::from_ptr(ptr).to_str(), Ok("123…")); + assert_eq!(CStr::from_ptr(ptr).to_string_lossy(), Borrowed("123…")); + } + let data = b"123\xE2\0"; + let ptr = data.as_ptr() as *const libc::c_char; + unsafe { + assert!(CStr::from_ptr(ptr).to_str().is_err()); + assert_eq!(CStr::from_ptr(ptr).to_string_lossy(), Owned::<str>(format!("123\u{FFFD}"))); + } + } } |
