diff options
| author | Graydon Hoare <graydon@mozilla.com> | 2011-12-13 16:25:51 -0800 |
|---|---|---|
| committer | Graydon Hoare <graydon@mozilla.com> | 2011-12-13 16:34:50 -0800 |
| commit | fa9ad984fb2f013baebdbe01a42baa3b9101dd84 (patch) | |
| tree | 49115690e45ca322337b93f25308cd618f85b013 /src/libcore | |
| parent | 32087f5c2a35bf8050067c22a57fd60269633a60 (diff) | |
| download | rust-fa9ad984fb2f013baebdbe01a42baa3b9101dd84.tar.gz rust-fa9ad984fb2f013baebdbe01a42baa3b9101dd84.zip | |
Copy first batch of material from libstd to libcore.
Diffstat (limited to 'src/libcore')
| -rw-r--r-- | src/libcore/bool.rs | 134 | ||||
| -rw-r--r-- | src/libcore/box.rs | 20 | ||||
| -rw-r--r-- | src/libcore/char.rs | 150 | ||||
| -rw-r--r-- | src/libcore/comm.rs | 184 | ||||
| -rw-r--r-- | src/libcore/core.rc | 41 | ||||
| -rw-r--r-- | src/libcore/ctypes.rs | 146 | ||||
| -rw-r--r-- | src/libcore/either.rs | 89 | ||||
| -rw-r--r-- | src/libcore/extfmt.rs | 453 | ||||
| -rw-r--r-- | src/libcore/float.rs | 343 | ||||
| -rw-r--r-- | src/libcore/int.rs | 189 | ||||
| -rw-r--r-- | src/libcore/option.rs | 93 | ||||
| -rw-r--r-- | src/libcore/ptr.rs | 52 | ||||
| -rw-r--r-- | src/libcore/result.rs | 112 | ||||
| -rw-r--r-- | src/libcore/str.rs | 962 | ||||
| -rw-r--r-- | src/libcore/sys.rs | 96 | ||||
| -rw-r--r-- | src/libcore/task.rs | 357 | ||||
| -rw-r--r-- | src/libcore/u32.rs | 27 | ||||
| -rw-r--r-- | src/libcore/u64.rs | 88 | ||||
| -rw-r--r-- | src/libcore/u8.rs | 68 | ||||
| -rw-r--r-- | src/libcore/uint.rs | 254 | ||||
| -rw-r--r-- | src/libcore/unsafe.rs | 41 | ||||
| -rw-r--r-- | src/libcore/vec.rs | 835 |
22 files changed, 4734 insertions, 0 deletions
diff --git a/src/libcore/bool.rs b/src/libcore/bool.rs new file mode 100644 index 00000000000..0fc869d1b41 --- /dev/null +++ b/src/libcore/bool.rs @@ -0,0 +1,134 @@ +// -*- rust -*- + +/* +Module: bool + +Classic Boolean logic reified as ADT +*/ + +export t; +export not, and, or, xor, implies; +export eq, ne, is_true, is_false; +export from_str, to_str, all_values, to_bit; + +/* +Type: t + +The type of boolean logic values +*/ +type t = bool; + +/* Function: not + +Negation/Inverse +*/ +pure fn not(v: t) -> t { !v } + +/* Function: and + +Conjunction +*/ +pure fn and(a: t, b: t) -> t { a && b } + +/* Function: or + +Disjunction +*/ +pure fn or(a: t, b: t) -> t { a || b } + +/* +Function: xor + +Exclusive or, i.e. `or(and(a, not(b)), and(not(a), b))` +*/ +pure fn xor(a: t, b: t) -> t { (a && !b) || (!a && b) } + +/* +Function: implies + +Implication in the logic, i.e. from `a` follows `b` +*/ +pure fn implies(a: t, b: t) -> t { !a || b } + +/* +Predicate: eq + +Returns: + +true if truth values `a` and `b` are indistinguishable in the logic +*/ +pure fn eq(a: t, b: t) -> bool { a == b } + +/* +Predicate: ne + +Returns: + +true if truth values `a` and `b` are distinguishable in the logic +*/ +pure fn ne(a: t, b: t) -> bool { a != b } + +/* +Predicate: is_true + +Returns: + +true if `v` represents truth in the logic +*/ +pure fn is_true(v: t) -> bool { v } + +/* +Predicate: is_false + +Returns: + +true if `v` represents falsehood in the logic +*/ +pure fn is_false(v: t) -> bool { !v } + +/* +Function: from_str + +Parse logic value from `s` +*/ +pure fn from_str(s: str) -> t { + alt s { + "true" { true } + "false" { false } + } +} + +/* +Function: to_str + +Convert `v` into a string +*/ +pure fn to_str(v: t) -> str { if v { "true" } else { "false" } } + +/* +Function: all_values + +Iterates over all truth values by passing them to `blk` +in an unspecified order +*/ +fn all_values(blk: block(v: t)) { + blk(true); + blk(false); +} + +/* +Function: to_bit + +Returns: + +An u8 whose first bit is set if `if_true(v)` holds +*/ +fn to_bit(v: t) -> u8 { if v { 1u8 } else { 0u8 } } + +// Local Variables: +// mode: rust; +// fill-column: 78; +// indent-tabs-mode: nil +// c-basic-offset: 4 +// buffer-file-coding-system: utf-8-unix +// End: diff --git a/src/libcore/box.rs b/src/libcore/box.rs new file mode 100644 index 00000000000..6682fe17284 --- /dev/null +++ b/src/libcore/box.rs @@ -0,0 +1,20 @@ +/* +Module: box +*/ + + +export ptr_eq; + +/* +Function: ptr_eq + +Determine if two shared boxes point to the same object +*/ +fn ptr_eq<T>(a: @T, b: @T) -> bool { + // FIXME: ptr::addr_of + unsafe { + let a_ptr: uint = unsafe::reinterpret_cast(a); + let b_ptr: uint = unsafe::reinterpret_cast(b); + ret a_ptr == b_ptr; + } +} diff --git a/src/libcore/char.rs b/src/libcore/char.rs new file mode 100644 index 00000000000..5ff7681ae8a --- /dev/null +++ b/src/libcore/char.rs @@ -0,0 +1,150 @@ +/* +Module: char + +Utilities for manipulating the char type +*/ + +/* +Function: is_whitespace + +Indicates whether a character is whitespace. + +Whitespace characters include space (U+0020), tab (U+0009), line feed +(U+000A), carriage return (U+000D), and a number of less common +ASCII and unicode characters. +*/ +pure fn is_whitespace(c: char) -> bool { + const ch_space: char = '\u0020'; + const ch_ogham_space_mark: char = '\u1680'; + const ch_mongolian_vowel_sep: char = '\u180e'; + const ch_en_quad: char = '\u2000'; + const ch_em_quad: char = '\u2001'; + const ch_en_space: char = '\u2002'; + const ch_em_space: char = '\u2003'; + const ch_three_per_em_space: char = '\u2004'; + const ch_four_per_em_space: char = '\u2005'; + const ch_six_per_em_space: char = '\u2006'; + const ch_figure_space: char = '\u2007'; + const ch_punctuation_space: char = '\u2008'; + const ch_thin_space: char = '\u2009'; + const ch_hair_space: char = '\u200a'; + const ch_narrow_no_break_space: char = '\u202f'; + const ch_medium_mathematical_space: char = '\u205f'; + const ch_ideographic_space: char = '\u3000'; + const ch_line_separator: char = '\u2028'; + const ch_paragraph_separator: char = '\u2029'; + const ch_character_tabulation: char = '\u0009'; + const ch_line_feed: char = '\u000a'; + const ch_line_tabulation: char = '\u000b'; + const ch_form_feed: char = '\u000c'; + const ch_carriage_return: char = '\u000d'; + const ch_next_line: char = '\u0085'; + const ch_no_break_space: char = '\u00a0'; + + if c == ch_space { + true + } else if c == ch_ogham_space_mark { + true + } else if c == ch_mongolian_vowel_sep { + true + } else if c == ch_en_quad { + true + } else if c == ch_em_quad { + true + } else if c == ch_en_space { + true + } else if c == ch_em_space { + true + } else if c == ch_three_per_em_space { + true + } else if c == ch_four_per_em_space { + true + } else if c == ch_six_per_em_space { + true + } else if c == ch_figure_space { + true + } else if c == ch_punctuation_space { + true + } else if c == ch_thin_space { + true + } else if c == ch_hair_space { + true + } else if c == ch_narrow_no_break_space { + true + } else if c == ch_medium_mathematical_space { + true + } else if c == ch_ideographic_space { + true + } else if c == ch_line_tabulation { + true + } else if c == ch_paragraph_separator { + true + } else if c == ch_character_tabulation { + true + } else if c == ch_line_feed { + true + } else if c == ch_line_tabulation { + true + } else if c == ch_form_feed { + true + } else if c == ch_carriage_return { + true + } else if c == ch_next_line { + true + } else if c == ch_no_break_space { true } else { false } +} + +/* + Function: to_digit + + Convert a char to the corresponding digit. + + Parameters: + c - a char, either '0' to '9', 'a' to 'z' or 'A' to 'Z' + + Returns: + If `c` is between '0' and '9', the corresponding value between 0 and 9. + If `c` is 'a' or 'A', 10. If `c` is 'b' or 'B', 11, etc. + + Safety note: + This function fails if `c` is not a valid char +*/ +pure fn to_digit(c: char) -> u8 unsafe { + alt maybe_digit(c) { + option::some(x) { x } + option::none. { fail; } + } +} + +/* + Function: to_digit + + Convert a char to the corresponding digit. Returns none when the + character is not a valid hexadecimal digit. +*/ +fn maybe_digit(c: char) -> option::t<u8> { + alt c { + '0' to '9' { option::some(c as u8 - ('0' as u8)) } + 'a' to 'z' { option::some(c as u8 + 10u8 - ('a' as u8)) } + 'A' to 'Z' { option::some(c as u8 + 10u8 - ('A' as u8)) } + _ { option::none } + } +} + +/* + Function: cmp + + Compare two chars. + + Parameters: + a - a char + b - a char + + Returns: + -1 if a<b, 0 if a==b, +1 if a>b +*/ +fn cmp(a: char, b: char) -> int { + ret if b > a { -1 } + else if b < a { 1 } + else { 0 } +} \ No newline at end of file diff --git a/src/libcore/comm.rs b/src/libcore/comm.rs new file mode 100644 index 00000000000..e00205a522b --- /dev/null +++ b/src/libcore/comm.rs @@ -0,0 +1,184 @@ +/* +Module: comm + +Communication between tasks + +Communication between tasks is facilitated by ports (in the receiving task), +and channels (in the sending task). Any number of channels may feed into a +single port. + +Ports and channels may only transmit values of unique types; that is, +values that are statically guaranteed to be accessed by a single +'owner' at a time. Unique types include scalars, vectors, strings, +and records, tags, tuples and unique boxes (~T) thereof. Most notably, +shared boxes (@T) may not be transmitted across channels. + +Example: + +> use std::{task, comm, io}; +> +> let p = comm::port(); +> task::spawn(comm::chan(p), fn (c: chan<str>) { +> comm::send(c, "Hello, World"); +> }); +> +> io::println(comm::recv(p)); + +*/ + +import sys; +import task; + +export send; +export recv; +export chan; +export port; + +#[abi = "cdecl"] +native mod rustrt { + type void; + type rust_port; + + fn chan_id_send<send T>(t: *sys::type_desc, + target_task: task::task, target_port: port_id, + data: T) -> ctypes::uintptr_t; + + fn new_port(unit_sz: uint) -> *rust_port; + fn del_port(po: *rust_port); + fn rust_port_detach(po: *rust_port); + fn get_port_id(po: *rust_port) -> port_id; + fn rust_port_size(po: *rust_port) -> ctypes::size_t; + fn port_recv(dptr: *uint, po: *rust_port, + yield: *ctypes::uintptr_t, + killed: *ctypes::uintptr_t); +} + +#[abi = "rust-intrinsic"] +native mod rusti { + fn call_with_retptr<send T>(&&f: fn@(*uint)) -> T; +} + +type port_id = int; + +// It's critical that this only have one variant, so it has a record +// layout, and will work in the rust_task structure in task.rs. +/* +Type: chan + +A communication endpoint that can send messages. Channels send +messages to ports. + +Each channel is bound to a port when the channel is constructed, so +the destination port for a channel must exist before the channel +itself. + +Channels are weak: a channel does not keep the port it is bound to alive. +If a channel attempts to send data to a dead port that data will be silently +dropped. + +Channels may be duplicated and themselves transmitted over other channels. +*/ +tag chan<send T> { + chan_t(task::task, port_id); +} + +resource port_ptr<send T>(po: *rustrt::rust_port) { + // Once the port is detached it's guaranteed not to receive further + // messages + rustrt::rust_port_detach(po); + // Drain the port so that all the still-enqueued items get dropped + while rustrt::rust_port_size(po) > 0u { + // FIXME: For some reason if we don't assign to something here + // we end up with invalid reads in the drop glue. + let _t = recv_::<T>(po); + } + rustrt::del_port(po); +} + +/* +Type: port + +A communication endpoint that can receive messages. Ports receive +messages from channels. + +Each port has a unique per-task identity and may not be replicated or +transmitted. If a port value is copied, both copies refer to the same port. + +Ports may be associated with multiple <chan>s. +*/ +tag port<send T> { port_t(@port_ptr<T>); } + +/* +Function: send + +Sends data over a channel. + +The sent data is moved into the channel, whereupon the caller loses access +to it. +*/ +fn send<send T>(ch: chan<T>, -data: T) { + let chan_t(t, p) = ch; + let res = rustrt::chan_id_send(sys::get_type_desc::<T>(), t, p, data); + if res != 0u unsafe { + // Data sent successfully + unsafe::leak(data); + } + task::yield(); +} + +/* +Function: port + +Constructs a port. +*/ +fn port<send T>() -> port<T> { + port_t(@port_ptr(rustrt::new_port(sys::size_of::<T>()))) +} + +/* +Function: recv + +Receive from a port. + +If no data is available on the port then the task will block until data +becomes available. +*/ +fn recv<send T>(p: port<T>) -> T { recv_(***p) } + +// Receive on a raw port pointer +fn recv_<send T>(p: *rustrt::rust_port) -> T { + // FIXME: Due to issue 1185 we can't use a return pointer when + // calling C code, and since we can't create our own return + // pointer on the stack, we're going to call a little intrinsic + // that will grab the value of the return pointer, then call this + // function, which we will then use to call the runtime. + fn recv(dptr: *uint, port: *rustrt::rust_port, + yield: *ctypes::uintptr_t, + killed: *ctypes::uintptr_t) unsafe { + rustrt::port_recv(dptr, port, yield, killed); + } + let yield = 0u; + let yieldp = ptr::addr_of(yield); + let killed = 0u; + let killedp = ptr::addr_of(killed); + let res = rusti::call_with_retptr(bind recv(_, p, yieldp, killedp)); + if killed != 0u { + fail "killed"; + } + if yield != 0u { + // Data isn't available yet, so res has not been initialized. + task::yield(); + } + ret res; +} + +/* +Function: chan + +Constructs a channel. + +The channel is bound to the port used to construct it. +*/ +fn chan<send T>(p: port<T>) -> chan<T> { + chan_t(task::get_task(), rustrt::get_port_id(***p)) +} diff --git a/src/libcore/core.rc b/src/libcore/core.rc index 90cd1d145f0..6805c5b30c7 100644 --- a/src/libcore/core.rc +++ b/src/libcore/core.rc @@ -7,6 +7,47 @@ #[license = "BSD"]; #[crate_type = "lib"]; +export box, char, float, int, str, ptr, uint, u8, u32, u64, vec, bool; +export either, option, result; +export ctypes, sys, unsafe, comm, task; +export extfmt; + +// Built-in-type support modules + +mod box; +mod char; +mod float; +mod int; +mod str; +mod ptr; +mod uint; +mod u8; +mod u32; +mod u64; +mod vec; +mod bool; + + +// Ubiquitous-utility-type modules + +mod either; +mod option; +mod result; + + +// Runtime and language-primitive support + +mod ctypes; +mod sys; +mod unsafe; +mod comm; +mod task; + +// Compiler support modules + +mod extfmt; + + // Local Variables: // mode: rust; // fill-column: 78; diff --git a/src/libcore/ctypes.rs b/src/libcore/ctypes.rs new file mode 100644 index 00000000000..509eb3ef057 --- /dev/null +++ b/src/libcore/ctypes.rs @@ -0,0 +1,146 @@ +/* +Module: ctypes + +Definitions useful for C interop +*/ + +/* +FIXME: Add a test that uses some native code to verify these sizes, +which are not obviously correct for all potential platforms. +*/ + +/* +Type: c_int + +A signed integer with the same size as a C `int` +*/ +type c_int = i32; + +/* +Type: c_uint + +An unsigned integer with the same size as a C `unsigned int` +*/ +type c_uint = u32; + +/* +Type: long + +A signed integer with the same size as a C `long` +*/ +type long = int; + +/* +Type: unsigned + +An unsigned integer with the same size as a C `unsigned int` +*/ +type unsigned = u32; + +/* +Type: ulong + +An unsigned integer with the same size as a C `unsigned long` +*/ +type ulong = uint; + +/* +Type: intptr_t + +A signed integer with the same size as a pointer. This is +guaranteed to always be the same type as a Rust `int` +*/ +type intptr_t = uint; // FIXME: int + +/* +Type: uintptr_t + +An unsigned integer with the same size as a pointer. This is +guaranteed to always be the same type as a Rust `uint`. +*/ +type uintptr_t = uint; +type uint32_t = u32; + +/* +Type: void + +A type, a pointer to which can be used as C `void *` + +Note that this does not directly correspond to the C `void` type, +which is an incomplete type. Using pointers to this type +when interoperating with C void pointers can help in documentation. +*/ +type void = int; + +// machine type equivalents of rust int, uint, float + +/* +Type: m_int + +FIXME: What C type does this represent? +*/ +#[cfg(target_arch="x86")] +type m_int = i32; +#[cfg(target_arch="x86_64")] +type m_int = i64; + +/* +Type: m_uint + +FIXME: What C type does this represent? +*/ +#[cfg(target_arch="x86")] +type m_uint = u32; +#[cfg(target_arch="x86_64")] +type m_uint = u64; + +// This *must* match with "import m_float = fXX" in std::math per arch +/* +Type: m_float + +FIXME: What C type does this represent? +*/ +type m_float = f64; + +/* +Type: size_t + +An unsigned integer corresponding to the C `size_t` +*/ +type size_t = uint; + +/* +Type: ssize_t + +A signed integer correpsonding to the C `ssize_t` +*/ +type ssize_t = int; + +/* +Type: off_t + +An unsigned integer corresponding to the C `off_t` +*/ +type off_t = uint; + +/* +Type: fd_t + +A type that can be used for C file descriptors +*/ +type fd_t = i32; // not actually a C type, but should be. + +/* +Type: pid_t + +A type for representing process ID's, corresponding to C `pid_t` +*/ +type pid_t = i32; + +// enum is implementation-defined, but is 32-bits in practice +/* +Type: enum + +An unsigned integer with the same size as a C enum +*/ +type enum = u32; diff --git a/src/libcore/either.rs b/src/libcore/either.rs new file mode 100644 index 00000000000..89d47b20746 --- /dev/null +++ b/src/libcore/either.rs @@ -0,0 +1,89 @@ +/* +Module: either + +A type that represents one of two alternatives +*/ + + +/* +Tag: t + +The either type +*/ +tag t<T, U> { + /* Variant: left */ + left(T); + /* Variant: right */ + right(U); +} + +/* Section: Operations */ + +/* +Function: either + +Applies a function based on the given either value + +If `value` is left(T) then `f_left` is applied to its contents, if +`value` is right(U) then `f_right` is applied to its contents, and +the result is returned. +*/ +fn either<T, U, + V>(f_left: block(T) -> V, f_right: block(U) -> V, value: t<T, U>) -> + V { + alt value { left(l) { f_left(l) } right(r) { f_right(r) } } +} + +/* +Function: lefts + +Extracts from a vector of either all the left values. +*/ +fn lefts<copy T, U>(eithers: [t<T, U>]) -> [T] { + let result: [T] = []; + for elt: t<T, U> in eithers { + alt elt { left(l) { result += [l]; } _ {/* fallthrough */ } } + } + ret result; +} + +/* +Function: rights + +Extracts from a vector of either all the right values +*/ +fn rights<T, copy U>(eithers: [t<T, U>]) -> [U] { + let result: [U] = []; + for elt: t<T, U> in eithers { + alt elt { right(r) { result += [r]; } _ {/* fallthrough */ } } + } + ret result; +} + +/* +Function: partition + +Extracts from a vector of either all the left values and right values + +Returns a structure containing a vector of left values and a vector of +right values. +*/ +fn partition<copy T, copy U>(eithers: [t<T, U>]) + -> {lefts: [T], rights: [U]} { + let lefts: [T] = []; + let rights: [U] = []; + for elt: t<T, U> in eithers { + alt elt { left(l) { lefts += [l]; } right(r) { rights += [r]; } } + } + ret {lefts: lefts, rights: rights}; +} + +// +// Local Variables: +// mode: rust +// fill-column: 78; +// indent-tabs-mode: nil +// c-basic-offset: 4 +// buffer-file-coding-system: utf-8-unix +// End: +// diff --git a/src/libcore/extfmt.rs b/src/libcore/extfmt.rs new file mode 100644 index 00000000000..14a014e891a --- /dev/null +++ b/src/libcore/extfmt.rs @@ -0,0 +1,453 @@ +/* +Syntax Extension: fmt + +Format a string + +The 'fmt' extension is modeled on the posix printf system. + +A posix conversion ostensibly looks like this + +> %[parameter][flags][width][.precision][length]type + +Given the different numeric type bestiary we have, we omit the 'length' +parameter and support slightly different conversions for 'type' + +> %[parameter][flags][width][.precision]type + +we also only support translating-to-rust a tiny subset of the possible +combinations at the moment. + +Example: + +log #fmt("hello, %s!", "world"); + +*/ + +import option::{some, none}; + + +/* + * We have a 'ct' (compile-time) module that parses format strings into a + * sequence of conversions. From those conversions AST fragments are built + * that call into properly-typed functions in the 'rt' (run-time) module. + * Each of those run-time conversion functions accepts another conversion + * description that specifies how to format its output. + * + * The building of the AST is currently done in a module inside the compiler, + * but should migrate over here as the plugin interface is defined. + */ + +// Functions used by the fmt extension at compile time +mod ct { + tag signedness { signed; unsigned; } + tag caseness { case_upper; case_lower; } + tag ty { + ty_bool; + ty_str; + ty_char; + ty_int(signedness); + ty_bits; + ty_hex(caseness); + ty_octal; + ty_float; + // FIXME: More types + } + tag flag { + flag_left_justify; + flag_left_zero_pad; + flag_space_for_sign; + flag_sign_always; + flag_alternate; + } + tag count { + count_is(int); + count_is_param(int); + count_is_next_param; + count_implied; + } + + // A formatted conversion from an expression to a string + type conv = + {param: option::t<int>, + flags: [flag], + width: count, + precision: count, + ty: ty}; + + + // A fragment of the output sequence + tag piece { piece_string(str); piece_conv(conv); } + type error_fn = fn@(str) -> ! ; + + fn parse_fmt_string(s: str, error: error_fn) -> [piece] { + let pieces: [piece] = []; + let lim = str::byte_len(s); + let buf = ""; + fn flush_buf(buf: str, &pieces: [piece]) -> str { + if str::byte_len(buf) > 0u { + let piece = piece_string(buf); + pieces += [piece]; + } + ret ""; + } + let i = 0u; + while i < lim { + let curr = str::substr(s, i, 1u); + if str::eq(curr, "%") { + i += 1u; + if i >= lim { + error("unterminated conversion at end of string"); + } + let curr2 = str::substr(s, i, 1u); + if str::eq(curr2, "%") { + i += 1u; + } else { + buf = flush_buf(buf, pieces); + let rs = parse_conversion(s, i, lim, error); + pieces += [rs.piece]; + i = rs.next; + } + } else { buf += curr; i += 1u; } + } + buf = flush_buf(buf, pieces); + ret pieces; + } + fn peek_num(s: str, i: uint, lim: uint) -> + option::t<{num: uint, next: uint}> { + if i >= lim { ret none; } + let c = s[i]; + if !('0' as u8 <= c && c <= '9' as u8) { ret option::none; } + let n = c - ('0' as u8) as uint; + ret alt peek_num(s, i + 1u, lim) { + none. { some({num: n, next: i + 1u}) } + some(next) { + let m = next.num; + let j = next.next; + some({num: n * 10u + m, next: j}) + } + }; + } + fn parse_conversion(s: str, i: uint, lim: uint, error: error_fn) -> + {piece: piece, next: uint} { + let parm = parse_parameter(s, i, lim); + let flags = parse_flags(s, parm.next, lim); + let width = parse_count(s, flags.next, lim); + let prec = parse_precision(s, width.next, lim); + let ty = parse_type(s, prec.next, lim, error); + ret {piece: + piece_conv({param: parm.param, + flags: flags.flags, + width: width.count, + precision: prec.count, + ty: ty.ty}), + next: ty.next}; + } + fn parse_parameter(s: str, i: uint, lim: uint) -> + {param: option::t<int>, next: uint} { + if i >= lim { ret {param: none, next: i}; } + let num = peek_num(s, i, lim); + ret alt num { + none. { {param: none, next: i} } + some(t) { + let n = t.num; + let j = t.next; + if j < lim && s[j] == '$' as u8 { + {param: some(n as int), next: j + 1u} + } else { {param: none, next: i} } + } + }; + } + fn parse_flags(s: str, i: uint, lim: uint) -> + {flags: [flag], next: uint} { + let noflags: [flag] = []; + if i >= lim { ret {flags: noflags, next: i}; } + + // FIXME: This recursion generates illegal instructions if the return + // value isn't boxed. Only started happening after the ivec conversion + fn more_(f: flag, s: str, i: uint, lim: uint) -> + @{flags: [flag], next: uint} { + let next = parse_flags(s, i + 1u, lim); + let rest = next.flags; + let j = next.next; + let curr: [flag] = [f]; + ret @{flags: curr + rest, next: j}; + } + let more = bind more_(_, s, i, lim); + let f = s[i]; + ret if f == '-' as u8 { + *more(flag_left_justify) + } else if f == '0' as u8 { + *more(flag_left_zero_pad) + } else if f == ' ' as u8 { + *more(flag_space_for_sign) + } else if f == '+' as u8 { + *more(flag_sign_always) + } else if f == '#' as u8 { + *more(flag_alternate) + } else { {flags: noflags, next: i} }; + } + fn parse_count(s: str, i: uint, lim: uint) -> {count: count, next: uint} { + ret if i >= lim { + {count: count_implied, next: i} + } else if s[i] == '*' as u8 { + let param = parse_parameter(s, i + 1u, lim); + let j = param.next; + alt param.param { + none. { {count: count_is_next_param, next: j} } + some(n) { {count: count_is_param(n), next: j} } + } + } else { + let num = peek_num(s, i, lim); + alt num { + none. { {count: count_implied, next: i} } + some(num) { + {count: count_is(num.num as int), next: num.next} + } + } + }; + } + fn parse_precision(s: str, i: uint, lim: uint) -> + {count: count, next: uint} { + ret if i >= lim { + {count: count_implied, next: i} + } else if s[i] == '.' as u8 { + let count = parse_count(s, i + 1u, lim); + + + // If there were no digits specified, i.e. the precision + // was ".", then the precision is 0 + alt count.count { + count_implied. { {count: count_is(0), next: count.next} } + _ { count } + } + } else { {count: count_implied, next: i} }; + } + fn parse_type(s: str, i: uint, lim: uint, error: error_fn) -> + {ty: ty, next: uint} { + if i >= lim { error("missing type in conversion"); } + let tstr = str::substr(s, i, 1u); + // TODO: Do we really want two signed types here? + // How important is it to be printf compatible? + let t = + if str::eq(tstr, "b") { + ty_bool + } else if str::eq(tstr, "s") { + ty_str + } else if str::eq(tstr, "c") { + ty_char + } else if str::eq(tstr, "d") || str::eq(tstr, "i") { + ty_int(signed) + } else if str::eq(tstr, "u") { + ty_int(unsigned) + } else if str::eq(tstr, "x") { + ty_hex(case_lower) + } else if str::eq(tstr, "X") { + ty_hex(case_upper) + } else if str::eq(tstr, "t") { + ty_bits + } else if str::eq(tstr, "o") { + ty_octal + } else if str::eq(tstr, "f") { + ty_float + } else { error("unknown type in conversion: " + tstr) }; + ret {ty: t, next: i + 1u}; + } +} + + +// Functions used by the fmt extension at runtime. For now there are a lot of +// decisions made a runtime. If it proves worthwhile then some of these +// conditions can be evaluated at compile-time. For now though it's cleaner to +// implement it this way, I think. +mod rt { + tag flag { + flag_left_justify; + flag_left_zero_pad; + flag_space_for_sign; + flag_sign_always; + flag_alternate; + + + // FIXME: This is a hack to avoid creating 0-length vec exprs, + // which have some difficulty typechecking currently. See + // comments in front::extfmt::make_flags + flag_none; + } + tag count { count_is(int); count_implied; } + tag ty { ty_default; ty_bits; ty_hex_upper; ty_hex_lower; ty_octal; } + + // FIXME: May not want to use a vector here for flags; + // instead just use a bool per flag + type conv = {flags: [flag], width: count, precision: count, ty: ty}; + + fn conv_int(cv: conv, i: int) -> str { + let radix = 10u; + let prec = get_int_precision(cv); + let s = int_to_str_prec(i, radix, prec); + if 0 <= i { + if have_flag(cv.flags, flag_sign_always) { + s = "+" + s; + } else if have_flag(cv.flags, flag_space_for_sign) { + s = " " + s; + } + } + ret pad(cv, s, pad_signed); + } + fn conv_uint(cv: conv, u: uint) -> str { + let prec = get_int_precision(cv); + let rs = + alt cv.ty { + ty_default. { uint_to_str_prec(u, 10u, prec) } + ty_hex_lower. { uint_to_str_prec(u, 16u, prec) } + ty_hex_upper. { str::to_upper(uint_to_str_prec(u, 16u, prec)) } + ty_bits. { uint_to_str_prec(u, 2u, prec) } + ty_octal. { uint_to_str_prec(u, 8u, prec) } + }; + ret pad(cv, rs, pad_unsigned); + } + fn conv_bool(cv: conv, b: bool) -> str { + let s = if b { "true" } else { "false" }; + // run the boolean conversion through the string conversion logic, + // giving it the same rules for precision, etc. + + ret conv_str(cv, s); + } + fn conv_char(cv: conv, c: char) -> str { + ret pad(cv, str::from_char(c), pad_nozero); + } + fn conv_str(cv: conv, s: str) -> str { + // For strings, precision is the maximum characters + // displayed + + // FIXME: substr works on bytes, not chars! + let unpadded = + alt cv.precision { + count_implied. { s } + count_is(max) { + if max as uint < str::char_len(s) { + str::substr(s, 0u, max as uint) + } else { s } + } + }; + ret pad(cv, unpadded, pad_nozero); + } + fn conv_float(cv: conv, f: float) -> str { + let (to_str, digits) = alt cv.precision { + count_is(c) { (float::to_str_exact, c as uint) } + count_implied. { (float::to_str, 6u) } + }; + let s = to_str(f, digits); + if 0.0 <= f { + if have_flag(cv.flags, flag_sign_always) { + s = "+" + s; + } else if have_flag(cv.flags, flag_space_for_sign) { + s = " " + s; + } + } + ret pad(cv, s, pad_signed); + } + + // Convert an int to string with minimum number of digits. If precision is + // 0 and num is 0 then the result is the empty string. + fn int_to_str_prec(num: int, radix: uint, prec: uint) -> str { + ret if num < 0 { + "-" + uint_to_str_prec(-num as uint, radix, prec) + } else { uint_to_str_prec(num as uint, radix, prec) }; + } + + // Convert a uint to string with a minimum number of digits. If precision + // is 0 and num is 0 then the result is the empty string. Could move this + // to uint: but it doesn't seem all that useful. + fn uint_to_str_prec(num: uint, radix: uint, prec: uint) -> str { + ret if prec == 0u && num == 0u { + "" + } else { + let s = uint::to_str(num, radix); + let len = str::char_len(s); + if len < prec { + let diff = prec - len; + let pad = str_init_elt('0', diff); + pad + s + } else { s } + }; + } + fn get_int_precision(cv: conv) -> uint { + ret alt cv.precision { + count_is(c) { c as uint } + count_implied. { 1u } + }; + } + + // FIXME: This might be useful in str: but needs to be utf8 safe first + fn str_init_elt(c: char, n_elts: uint) -> str { + let svec = vec::init_elt::<u8>(c as u8, n_elts); + + ret str::unsafe_from_bytes(svec); + } + tag pad_mode { pad_signed; pad_unsigned; pad_nozero; } + fn pad(cv: conv, s: str, mode: pad_mode) -> str { + let uwidth; + alt cv.width { + count_implied. { ret s; } + count_is(width) { + // FIXME: Maybe width should be uint + + uwidth = width as uint; + } + } + let strlen = str::char_len(s); + if uwidth <= strlen { ret s; } + let padchar = ' '; + let diff = uwidth - strlen; + if have_flag(cv.flags, flag_left_justify) { + let padstr = str_init_elt(padchar, diff); + ret s + padstr; + } + let might_zero_pad = false; + let signed = false; + alt mode { + pad_nozero. { + // fallthrough + + } + pad_signed. { might_zero_pad = true; signed = true; } + pad_unsigned. { might_zero_pad = true; } + } + fn have_precision(cv: conv) -> bool { + ret alt cv.precision { count_implied. { false } _ { true } }; + } + let zero_padding = false; + if might_zero_pad && have_flag(cv.flags, flag_left_zero_pad) && + !have_precision(cv) { + padchar = '0'; + zero_padding = true; + } + let padstr = str_init_elt(padchar, diff); + // This is completely heinous. If we have a signed value then + // potentially rip apart the intermediate result and insert some + // zeros. It may make sense to convert zero padding to a precision + // instead. + + if signed && zero_padding && str::byte_len(s) > 0u { + let head = s[0]; + if head == '+' as u8 || head == '-' as u8 || head == ' ' as u8 { + let headstr = str::unsafe_from_bytes([head]); + let bytelen = str::byte_len(s); + let numpart = str::substr(s, 1u, bytelen - 1u); + ret headstr + padstr + numpart; + } + } + ret padstr + s; + } + fn have_flag(flags: [flag], f: flag) -> bool { + for candidate: flag in flags { if candidate == f { ret true; } } + ret false; + } +} +// Local Variables: +// mode: rust; +// fill-column: 78; +// indent-tabs-mode: nil +// c-basic-offset: 4 +// buffer-file-coding-system: utf-8-unix +// End: diff --git a/src/libcore/float.rs b/src/libcore/float.rs new file mode 100644 index 00000000000..015dceb1dc1 --- /dev/null +++ b/src/libcore/float.rs @@ -0,0 +1,343 @@ +/* +Module: float +*/ + +/** + * Section: String Conversions + */ + +/* +Function: to_str_common + +Converts a float to a string + +Parameters: + +num - The float value +digits - The number of significant digits +exact - Whether to enforce the exact number of significant digits +*/ +fn to_str_common(num: float, digits: uint, exact: bool) -> str { + let (num, accum) = num < 0.0 ? (-num, "-") : (num, ""); + let trunc = num as uint; + let frac = num - (trunc as float); + accum += uint::str(trunc); + if frac == 0.0 || digits == 0u { ret accum; } + accum += "."; + let i = digits; + let epsilon = 1. / pow_uint_to_uint_as_float(10u, i); + while i > 0u && (frac >= epsilon || exact) { + frac *= 10.0; + epsilon *= 10.0; + let digit = frac as uint; + accum += uint::str(digit); + frac -= digit as float; + i -= 1u; + } + ret accum; + +} + +/* +Function: to_str + +Converts a float to a string with exactly the number of provided significant +digits + +Parameters: + +num - The float value +digits - The number of significant digits +*/ +fn to_str_exact(num: float, digits: uint) -> str { + to_str_common(num, digits, true) +} + +/* +Function: to_str + +Converts a float to a string with a maximum number of significant digits + +Parameters: + +num - The float value +digits - The number of significant digits +*/ +fn to_str(num: float, digits: uint) -> str { + to_str_common(num, digits, false) +} + +/* +Function: from_str + +Convert a string to a float + +This function accepts strings such as +* "3.14" +* "+3.14", equivalent to "3.14" +* "-3.14" +* "2.5E10", or equivalently, "2.5e10" +* "2.5E-10" +* "", or, equivalently, "." (understood as 0) +* "5." +* ".5", or, equivalently, "0.5" + +Leading and trailing whitespace are ignored. + +Parameters: + +num - A string, possibly empty. + +Returns: + +<NaN> If the string did not represent a valid number. +Otherwise, the floating-point number represented [num]. +*/ +fn from_str(num: str) -> float { + let num = str::trim(num); + + let pos = 0u; //Current byte position in the string. + //Used to walk the string in O(n). + let len = str::byte_len(num); //Length of the string, in bytes. + + if len == 0u { ret 0.; } + let total = 0f; //Accumulated result + let c = 'z'; //Latest char. + + //The string must start with one of the following characters. + alt str::char_at(num, 0u) { + '-' | '+' | '0' to '9' | '.' {} + _ { ret NaN; } + } + + //Determine if first char is '-'/'+'. Set [pos] and [neg] accordingly. + let neg = false; //Sign of the result + alt str::char_at(num, 0u) { + '-' { + neg = true; + pos = 1u; + } + '+' { + pos = 1u; + } + _ {} + } + + //Examine the following chars until '.', 'e', 'E' + while(pos < len) { + let char_range = str::char_range_at(num, pos); + c = char_range.ch; + pos = char_range.next; + alt c { + '0' to '9' { + total = total * 10f; + total += ((c as int) - ('0' as int)) as float; + } + '.' | 'e' | 'E' { + break; + } + _ { + ret NaN; + } + } + } + + if c == '.' {//Examine decimal part + let decimal = 1.f; + while(pos < len) { + let char_range = str::char_range_at(num, pos); + c = char_range.ch; + pos = char_range.next; + alt c { + '0' | '1' | '2' | '3' | '4' | '5' | '6'| '7' | '8' | '9' { + decimal /= 10.f; + total += (((c as int) - ('0' as int)) as float)*decimal; + } + 'e' | 'E' { + break; + } + _ { + ret NaN; + } + } + } + } + + if (c == 'e') | (c == 'E') {//Examine exponent + let exponent = 0u; + let neg_exponent = false; + if(pos < len) { + let char_range = str::char_range_at(num, pos); + c = char_range.ch; + alt c { + '+' { + pos = char_range.next; + } + '-' { + pos = char_range.next; + neg_exponent = true; + } + _ {} + } + while(pos < len) { + let char_range = str::char_range_at(num, pos); + c = char_range.ch; + alt c { + '0' | '1' | '2' | '3' | '4' | '5' | '6'| '7' | '8' | '9' { + exponent *= 10u; + exponent += ((c as uint) - ('0' as uint)); + } + _ { + break; + } + } + pos = char_range.next; + } + let multiplier = pow_uint_to_uint_as_float(10u, exponent); + //Note: not [int::pow], otherwise, we'll quickly + //end up with a nice overflow + if neg_exponent { + total = total / multiplier; + } else { + total = total * multiplier; + } + } else { + ret NaN; + } + } + + if(pos < len) { + ret NaN; + } else { + if(neg) { + total *= -1f; + } + ret total; + } +} + +/** + * Section: Arithmetics + */ + +/* +Function: pow_uint_to_uint_as_float + +Compute the exponentiation of an integer by another integer as a float. + +Parameters: +x - The base. +pow - The exponent. + +Returns: +<NaN> of both `x` and `pow` are `0u`, otherwise `x^pow`. +*/ +fn pow_uint_to_uint_as_float(x: uint, pow: uint) -> float { + if x == 0u { + if pow == 0u { + ret NaN; + } + ret 0.; + } + let my_pow = pow; + let total = 1f; + let multiplier = x as float; + while (my_pow > 0u) { + if my_pow % 2u == 1u { + total = total * multiplier; + } + my_pow /= 2u; + multiplier *= multiplier; + } + ret total; +} + + +/* Const: NaN */ +const NaN: float = 0./0.; + +/* Const: infinity */ +const infinity: float = 1./0.; + +/* Const: neg_infinity */ +const neg_infinity: float = -1./0.; + +/* Predicate: isNaN */ +pure fn isNaN(f: float) -> bool { f != f } + +/* Function: add */ +pure fn add(x: float, y: float) -> float { ret x + y; } + +/* Function: sub */ +pure fn sub(x: float, y: float) -> float { ret x - y; } + +/* Function: mul */ +pure fn mul(x: float, y: float) -> float { ret x * y; } + +/* Function: div */ +pure fn div(x: float, y: float) -> float { ret x / y; } + +/* Function: rem */ +pure fn rem(x: float, y: float) -> float { ret x % y; } + +/* Predicate: lt */ +pure fn lt(x: float, y: float) -> bool { ret x < y; } + +/* Predicate: le */ +pure fn le(x: float, y: float) -> bool { ret x <= y; } + +/* Predicate: eq */ +pure fn eq(x: float, y: float) -> bool { ret x == y; } + +/* Predicate: ne */ +pure fn ne(x: float, y: float) -> bool { ret x != y; } + +/* Predicate: ge */ +pure fn ge(x: float, y: float) -> bool { ret x >= y; } + +/* Predicate: gt */ +pure fn gt(x: float, y: float) -> bool { ret x > y; } + +/* +Predicate: positive + +Returns true if `x` is a positive number, including +0.0 and +Infinity. + */ +pure fn positive(x: float) -> bool { ret x > 0. || (1./x) == infinity; } + +/* +Predicate: negative + +Returns true if `x` is a negative number, including -0.0 and -Infinity. + */ +pure fn negative(x: float) -> bool { ret x < 0. || (1./x) == neg_infinity; } + +/* +Predicate: nonpositive + +Returns true if `x` is a negative number, including -0.0 and -Infinity. +(This is the same as `float::negative`.) +*/ +pure fn nonpositive(x: float) -> bool { + ret x < 0. || (1./x) == neg_infinity; +} + +/* +Predicate: nonnegative + +Returns true if `x` is a positive number, including +0.0 and +Infinity. +(This is the same as `float::positive`.) +*/ +pure fn nonnegative(x: float) -> bool { + ret x > 0. || (1./x) == infinity; +} + +// +// Local Variables: +// mode: rust +// fill-column: 78; +// indent-tabs-mode: nil +// c-basic-offset: 4 +// buffer-file-coding-system: utf-8-unix +// End: +// diff --git a/src/libcore/int.rs b/src/libcore/int.rs new file mode 100644 index 00000000000..2ab299800ad --- /dev/null +++ b/src/libcore/int.rs @@ -0,0 +1,189 @@ +/* +Module: int +*/ + +/* +Const: max_value + +The maximum value of an integer +*/ +// FIXME: Find another way to access the machine word size in a const expr +#[cfg(target_arch="x86")] +const max_value: int = (-1 << 31)-1; + +#[cfg(target_arch="x86_64")] +const max_value: int = (-1 << 63)-1; + +/* +Const: min_value + +The minumum value of an integer +*/ +#[cfg(target_arch="x86")] +const min_value: int = -1 << 31; + +#[cfg(target_arch="x86_64")] +const min_value: int = -1 << 63; + +/* Function: add */ +pure fn add(x: int, y: int) -> int { ret x + y; } + +/* Function: sub */ +pure fn sub(x: int, y: int) -> int { ret x - y; } + +/* Function: mul */ +pure fn mul(x: int, y: int) -> int { ret x * y; } + +/* Function: div */ +pure fn div(x: int, y: int) -> int { ret x / y; } + +/* Function: rem */ +pure fn rem(x: int, y: int) -> int { ret x % y; } + +/* Predicate: lt */ +pure fn lt(x: int, y: int) -> bool { ret x < y; } + +/* Predicate: le */ +pure fn le(x: int, y: int) -> bool { ret x <= y; } + +/* Predicate: eq */ +pure fn eq(x: int, y: int) -> bool { ret x == y; } + +/* Predicate: ne */ +pure fn ne(x: int, y: int) -> bool { ret x != y; } + +/* Predicate: ge */ +pure fn ge(x: int, y: int) -> bool { ret x >= y; } + +/* Predicate: gt */ +pure fn gt(x: int, y: int) -> bool { ret x > y; } + +/* Predicate: positive */ +pure fn positive(x: int) -> bool { ret x > 0; } + +/* Predicate: negative */ +pure fn negative(x: int) -> bool { ret x < 0; } + +/* Predicate: nonpositive */ +pure fn nonpositive(x: int) -> bool { ret x <= 0; } + +/* Predicate: nonnegative */ +pure fn nonnegative(x: int) -> bool { ret x >= 0; } + + +// FIXME: Make sure this works with negative integers. +/* +Function: hash + +Produce a uint suitable for use in a hash table +*/ +fn hash(x: int) -> uint { ret x as uint; } + +/* +Function: range + +Iterate over the range [`lo`..`hi`) +*/ +fn range(lo: int, hi: int, it: block(int)) { + let i = lo; + while i < hi { it(i); i += 1; } +} + +/* +Function: parse_buf + +Parse a buffer of bytes + +Parameters: + +buf - A byte buffer +radix - The base of the number + +Failure: + +buf must not be empty +*/ +fn parse_buf(buf: [u8], radix: uint) -> int { + if vec::len::<u8>(buf) == 0u { + log_err "parse_buf(): buf is empty"; + fail; + } + let i = vec::len::<u8>(buf) - 1u; + let start = 0u; + let power = 1; + + if buf[0] == ('-' as u8) { + power = -1; + start = 1u; + } + let n = 0; + while true { + let digit = char::to_digit(buf[i] as char); + if (digit as uint) >= radix { + fail; + } + n += (digit as int) * power; + power *= radix as int; + if i <= start { ret n; } + i -= 1u; + } + fail; +} + +/* +Function: from_str + +Parse a string to an int + +Failure: + +s must not be empty +*/ +fn from_str(s: str) -> int { parse_buf(str::bytes(s), 10u) } + +/* +Function: to_str + +Convert to a string in a given base +*/ +fn to_str(n: int, radix: uint) -> str { + assert (0u < radix && radix <= 16u); + ret if n < 0 { + "-" + uint::to_str(-n as uint, radix) + } else { uint::to_str(n as uint, radix) }; +} + +/* +Function: str + +Convert to a string +*/ +fn str(i: int) -> str { ret to_str(i, 10u); } + +/* +Function: pow + +Returns `base` raised to the power of `exponent` +*/ +fn pow(base: int, exponent: uint) -> int { + if exponent == 0u { ret 1; } //Not mathemtically true if [base == 0] + if base == 0 { ret 0; } + let my_pow = exponent; + let acc = 1; + let multiplier = base; + while(my_pow > 0u) { + if my_pow % 2u == 1u { + acc *= multiplier; + } + my_pow /= 2u; + multiplier *= multiplier; + } + ret acc; +} +// Local Variables: +// mode: rust; +// fill-column: 78; +// indent-tabs-mode: nil +// c-basic-offset: 4 +// buffer-file-coding-system: utf-8-unix +// End: diff --git a/src/libcore/option.rs b/src/libcore/option.rs new file mode 100644 index 00000000000..403cb47f47e --- /dev/null +++ b/src/libcore/option.rs @@ -0,0 +1,93 @@ +/* +Module: option + +Represents the presence or absence of a value. + +Every option<T> value can either be some(T) or none. Where in other languages +you might use a nullable type, in Rust you would use an option type. +*/ + +/* +Tag: t + +The option type +*/ +tag t<T> { + /* Variant: none */ + none; + /* Variant: some */ + some(T); +} + +/* Section: Operations */ + +/* +Function: get + +Gets the value out of an option + +Failure: + +Fails if the value equals `none`. +*/ +fn get<copy T>(opt: t<T>) -> T { + alt opt { some(x) { ret x; } none. { fail "option none"; } } +} + +/* +*/ +fn map<T, U>(f: block(T) -> U, opt: t<T>) -> t<U> { + alt opt { some(x) { some(f(x)) } none. { none } } +} + +/* +Function: is_none + +Returns true if the option equals none +*/ +pure fn is_none<T>(opt: t<T>) -> bool { + alt opt { none. { true } some(_) { false } } +} + +/* +Function: is_some + +Returns true if the option contains some value +*/ +pure fn is_some<T>(opt: t<T>) -> bool { !is_none(opt) } + +/* +Function: from_maybe + +Returns the contained value or a default +*/ +fn from_maybe<T>(def: T, opt: t<T>) -> T { + alt opt { some(x) { x } none. { def } } +} + +/* +Function: maybe + +Applies a function to the contained value or returns a default +*/ +fn maybe<T, U>(def: U, f: block(T) -> U, opt: t<T>) -> U { + alt opt { none. { def } some(t) { f(t) } } +} + +// FIXME: Can be defined in terms of the above when/if we have const bind. +/* +Function: may + +Performs an operation on the contained value or does nothing +*/ +fn may<T>(f: block(T), opt: t<T>) { + alt opt { none. {/* nothing */ } some(t) { f(t); } } +} + +// Local Variables: +// mode: rust; +// fill-column: 78; +// indent-tabs-mode: nil +// c-basic-offset: 4 +// buffer-file-coding-system: utf-8-unix +// End: diff --git a/src/libcore/ptr.rs b/src/libcore/ptr.rs new file mode 100644 index 00000000000..0372b17cdf1 --- /dev/null +++ b/src/libcore/ptr.rs @@ -0,0 +1,52 @@ +/* +Module: ptr + +Unsafe pointer utility functions +*/ +#[abi = "rust-intrinsic"] +native mod rusti { + fn addr_of<T>(val: T) -> *T; + fn ptr_offset<T>(ptr: *T, count: uint) -> *T; +} + +/* +Function: addr_of + +Get an unsafe pointer to a value +*/ +fn addr_of<T>(val: T) -> *T { ret rusti::addr_of(val); } + +/* +Function: mut_addr_of + +Get an unsafe mutable pointer to a value +*/ +fn mut_addr_of<T>(val: T) -> *mutable T unsafe { + ret unsafe::reinterpret_cast(rusti::addr_of(val)); +} + +/* +Function: offset + +Calculate the offset from a pointer +*/ +fn offset<T>(ptr: *T, count: uint) -> *T { + ret rusti::ptr_offset(ptr, count); +} + +/* +Function: mut_offset + +Calculate the offset from a mutable pointer +*/ +fn mut_offset<T>(ptr: *mutable T, count: uint) -> *mutable T { + ret rusti::ptr_offset(ptr as *T, count) as *mutable T; +} + + +/* +Function: null + +Create an unsafe null pointer +*/ +fn null<T>() -> *T unsafe { ret unsafe::reinterpret_cast(0u); } diff --git a/src/libcore/result.rs b/src/libcore/result.rs new file mode 100644 index 00000000000..550f53470bb --- /dev/null +++ b/src/libcore/result.rs @@ -0,0 +1,112 @@ +/* +Module: result + +A type representing either success or failure +*/ + +/* Section: Types */ + +/* +Tag: t + +The result type +*/ +tag t<T, U> { + /* + Variant: ok + + Contains the result value + */ + ok(T); + /* + Variant: err + + Contains the error value + */ + err(U); +} + +/* Section: Operations */ + +/* +Function: get + +Get the value out of a successful result + +Failure: + +If the result is an error +*/ +fn get<T, U>(res: t<T, U>) -> T { + alt res { + ok(t) { t } + err(_) { + // FIXME: Serialize the error value + // and include it in the fail message + fail "get called on error result"; + } + } +} + +/* +Function: get_err + +Get the value out of an error result + +Failure: + +If the result is not an error +*/ +fn get_err<T, U>(res: t<T, U>) -> U { + alt res { + err(u) { u } + ok(_) { + fail "get_error called on ok result"; + } + } +} + +/* +Function: success + +Returns true if the result is <ok> +*/ +fn success<T, U>(res: t<T, U>) -> bool { + alt res { + ok(_) { true } + err(_) { false } + } +} + +/* +Function: failure + +Returns true if the result is <error> +*/ +fn failure<T, U>(res: t<T, U>) -> bool { + !success(res) +} + +/* +Function: chain + +Call a function based on a previous result + +If `res` is <ok> then the value is extracted and passed to `op` whereupon +`op`s result is returned. if `res` is <err> then it is immediately returned. +This function can be used to compose the results of two functions. + +Example: + +> let res = chain(read_file(file), { |buf| +> ok(parse_buf(buf)) +> }) + +*/ +fn chain<T, copy U, copy V>(res: t<T, V>, op: block(T) -> t<U, V>) + -> t<U, V> { + alt res { + ok(t) { op(t) } + err(e) { err(e) } + } +} diff --git a/src/libcore/str.rs b/src/libcore/str.rs new file mode 100644 index 00000000000..31c3516b862 --- /dev/null +++ b/src/libcore/str.rs @@ -0,0 +1,962 @@ +/* +Module: str + +String manipulation. +*/ + +export eq, lteq, hash, is_empty, is_not_empty, is_whitespace, byte_len, + byte_len_range, index, + rindex, find, starts_with, ends_with, substr, slice, split, concat, + connect, to_upper, replace, char_slice, trim_left, trim_right, trim, + unshift_char, shift_char, pop_char, push_char, is_utf8, from_chars, + to_chars, char_len, char_len_range, char_at, bytes, is_ascii, + shift_byte, pop_byte, + unsafe_from_byte, unsafe_from_bytes, from_char, char_range_at, + str_from_cstr, sbuf, as_buf, push_byte, utf8_char_width, safe_slice, + contains, iter_chars, loop_chars, loop_chars_sub, + escape; + +#[abi = "cdecl"] +native mod rustrt { + fn rust_str_push(&s: str, ch: u8); +} + +/* +Function: eq + +Bytewise string equality +*/ +fn eq(&&a: str, &&b: str) -> bool { a == b } + +/* +Function: lteq + +Bytewise less than or equal +*/ +fn lteq(&&a: str, &&b: str) -> bool { a <= b } + +/* +Function: hash + +String hash function +*/ +fn hash(&&s: str) -> uint { + // djb hash. + // FIXME: replace with murmur. + + let u: uint = 5381u; + for c: u8 in s { u *= 33u; u += c as uint; } + ret u; +} + +// UTF-8 tags and ranges +const tag_cont_u8: u8 = 128u8; +const tag_cont: uint = 128u; +const max_one_b: uint = 128u; +const tag_two_b: uint = 192u; +const max_two_b: uint = 2048u; +const tag_three_b: uint = 224u; +const max_three_b: uint = 65536u; +const tag_four_b: uint = 240u; +const max_four_b: uint = 2097152u; +const tag_five_b: uint = 248u; +const max_five_b: uint = 67108864u; +const tag_six_b: uint = 252u; + +/* +Function: is_utf8 + +Determines if a vector uf bytes contains valid UTF-8 +*/ +fn is_utf8(v: [u8]) -> bool { + let i = 0u; + let total = vec::len::<u8>(v); + while i < total { + let chsize = utf8_char_width(v[i]); + if chsize == 0u { ret false; } + if i + chsize > total { ret false; } + i += 1u; + while chsize > 1u { + if v[i] & 192u8 != tag_cont_u8 { ret false; } + i += 1u; + chsize -= 1u; + } + } + ret true; +} + +/* +Function: is_ascii + +Determines if a string contains only ASCII characters +*/ +fn is_ascii(s: str) -> bool { + let i: uint = byte_len(s); + while i > 0u { i -= 1u; if s[i] & 128u8 != 0u8 { ret false; } } + ret true; +} + +/* +Predicate: is_empty + +Returns true if the string has length 0 +*/ +pure fn is_empty(s: str) -> bool { for c: u8 in s { ret false; } ret true; } + +/* +Predicate: is_not_empty + +Returns true if the string has length greater than 0 +*/ +pure fn is_not_empty(s: str) -> bool { !is_empty(s) } + +/* +Function: is_whitespace + +Returns true if the string contains only whitespace +*/ +fn is_whitespace(s: str) -> bool { + let i = 0u; + let len = char_len(s); + while i < len { + // FIXME: This is not how char_at works + if !char::is_whitespace(char_at(s, i)) { ret false; } + i += 1u; + } + ret true; +} + +/* +Function: byte_len + +Returns the length in bytes of a string +*/ +fn byte_len(s: str) -> uint unsafe { + let v: [u8] = unsafe::reinterpret_cast(s); + let vlen = vec::len(v); + unsafe::leak(v); + // There should always be a null terminator + assert (vlen > 0u); + ret vlen - 1u; +} + +/* +Function: byte_len_range + +As byte_len but for a substring + +Parameters: +s - A string +byte_offset - The byte offset at which to start in the string +char_len - The number of chars (not bytes!) in the range + +Returns: +The number of bytes in the substring starting at `byte_offset` and +containing `char_len` chars. + +Safety note: + +This function fails if `byte_offset` or `char_len` do not represent +valid positions in `s` +*/ +fn byte_len_range(s: str, byte_offset: uint, char_len: uint) -> uint { + let i = byte_offset; + let chars = 0u; + while chars < char_len { + let chsize = utf8_char_width(s[i]); + assert (chsize > 0u); + i += chsize; + chars += 1u; + } + ret i - byte_offset; +} + +/* +Function: bytes + +Converts a string to a vector of bytes. The result vector is not +null-terminated. +*/ +fn bytes(s: str) -> [u8] unsafe { + let v = unsafe::reinterpret_cast(s); + let vcopy = vec::slice(v, 0u, vec::len(v) - 1u); + unsafe::leak(v); + ret vcopy; +} + +/* +Function: unsafe_from_bytes + +Converts a vector of bytes to a string. Does not verify that the +vector contains valid UTF-8. +*/ +fn unsafe_from_bytes(v: [const u8]) -> str unsafe { + let vcopy: [u8] = v + [0u8]; + let scopy: str = unsafe::reinterpret_cast(vcopy); + unsafe::leak(vcopy); + ret scopy; +} + +/* +Function: unsafe_from_byte + +Converts a byte to a string. Does not verify that the byte is +valid UTF-8. +*/ +fn unsafe_from_byte(u: u8) -> str { unsafe_from_bytes([u]) } + +fn push_utf8_bytes(&s: str, ch: char) { + let code = ch as uint; + let bytes = + if code < max_one_b { + [code as u8] + } else if code < max_two_b { + [code >> 6u & 31u | tag_two_b as u8, code & 63u | tag_cont as u8] + } else if code < max_three_b { + [code >> 12u & 15u | tag_three_b as u8, + code >> 6u & 63u | tag_cont as u8, code & 63u | tag_cont as u8] + } else if code < max_four_b { + [code >> 18u & 7u | tag_four_b as u8, + code >> 12u & 63u | tag_cont as u8, + code >> 6u & 63u | tag_cont as u8, code & 63u | tag_cont as u8] + } else if code < max_five_b { + [code >> 24u & 3u | tag_five_b as u8, + code >> 18u & 63u | tag_cont as u8, + code >> 12u & 63u | tag_cont as u8, + code >> 6u & 63u | tag_cont as u8, code & 63u | tag_cont as u8] + } else { + [code >> 30u & 1u | tag_six_b as u8, + code >> 24u & 63u | tag_cont as u8, + code >> 18u & 63u | tag_cont as u8, + code >> 12u & 63u | tag_cont as u8, + code >> 6u & 63u | tag_cont as u8, code & 63u | tag_cont as u8] + }; + push_bytes(s, bytes); +} + +/* +Function: from_char + +Convert a char to a string +*/ +fn from_char(ch: char) -> str { + let buf = ""; + push_utf8_bytes(buf, ch); + ret buf; +} + +/* +Function: from_chars + +Convert a vector of chars to a string +*/ +fn from_chars(chs: [char]) -> str { + let buf = ""; + for ch: char in chs { push_utf8_bytes(buf, ch); } + ret buf; +} + +/* +Function: utf8_char_width + +FIXME: What does this function do? +*/ +fn utf8_char_width(b: u8) -> uint { + let byte: uint = b as uint; + if byte < 128u { ret 1u; } + if byte < 192u { + ret 0u; // Not a valid start byte + + } + if byte < 224u { ret 2u; } + if byte < 240u { ret 3u; } + if byte < 248u { ret 4u; } + if byte < 252u { ret 5u; } + ret 6u; +} + +/* +Function: char_range_at + +Pluck a character out of a string and return the index of the next character. +This function can be used to iterate over the unicode characters of a string. + +Example: + +> let s = "Clam chowder, hot sauce, pork rinds"; +> let i = 0; +> while i < len(s) { +> let {ch, next} = char_range_at(s, i); +> log ch; +> i = next; +> } + +Parameters: + +s - The string +i - The byte offset of the char to extract + +Returns: + +A record {ch: char, next: uint} containing the char value and the byte +index of the next unicode character. + +Failure: + +If `i` is greater than or equal to the length of the string. +If `i` is not the index of the beginning of a valid UTF-8 character. +*/ +fn char_range_at(s: str, i: uint) -> {ch: char, next: uint} { + let b0 = s[i]; + let w = utf8_char_width(b0); + assert (w != 0u); + if w == 1u { ret {ch: b0 as char, next: i + 1u}; } + let val = 0u; + let end = i + w; + let i = i + 1u; + while i < end { + let byte = s[i]; + assert (byte & 192u8 == tag_cont_u8); + val <<= 6u; + val += byte & 63u8 as uint; + i += 1u; + } + // Clunky way to get the right bits from the first byte. Uses two shifts, + // the first to clip off the marker bits at the left of the byte, and then + // a second (as uint) to get it to the right position. + val += (b0 << (w + 1u as u8) as uint) << ((w - 1u) * 6u - w - 1u); + ret {ch: val as char, next: i}; +} + +/* +Function: char_at + +Pluck a character out of a string +*/ +fn char_at(s: str, i: uint) -> char { ret char_range_at(s, i).ch; } + +/* +Function: iter_chars + +Iterate over the characters in a string +*/ + +fn iter_chars(s: str, it: block(char)) { + let pos = 0u, len = byte_len(s); + while (pos < len) { + let {ch, next} = char_range_at(s, pos); + pos = next; + it(ch); + } +} + +/* +Function: loop_chars + +Loop through a string, char by char + +Parameters: +s - A string to traverse. It may be empty. +it - A block to execute with each consecutive character of `s`. +Return `true` to continue, `false` to stop. + +Returns: + +`true` If execution proceeded correctly, `false` if it was interrupted, +that is if `it` returned `false` at any point. + */ +fn loop_chars(s: str, it: block(char) -> bool) -> bool{ + ret loop_chars_sub(s, 0u, byte_len(s), it); +} + +/* +Function: loop_chars_sub + +Loop through a substring, char by char + +Parameters: +s - A string to traverse. It may be empty. +byte_offset - The byte offset at which to start in the string. +byte_len - The number of bytes to traverse in the string +it - A block to execute with each consecutive character of `s`. +Return `true` to continue, `false` to stop. + +Returns: + +`true` If execution proceeded correctly, `false` if it was interrupted, +that is if `it` returned `false` at any point. + +Safety note: +- This function does not check whether the substring is valid. +- This function fails if `byte_offset` or `byte_len` do not + represent valid positions inside `s` + */ +fn loop_chars_sub(s: str, byte_offset: uint, byte_len: uint, + it: block(char) -> bool) -> bool { + let i = byte_offset; + let result = true; + while i < byte_len { + let {ch, next} = char_range_at(s, i); + if !it(ch) {result = false; break;} + i = next; + } + ret result; +} + + +/* +Function: char_len + +Count the number of unicode characters in a string +*/ +fn char_len(s: str) -> uint { + ret char_len_range(s, 0u, byte_len(s)); +} + +/* +Function: char_len_range + +As char_len but for a slice of a string + +Parameters: + s - A valid string + byte_start - The position inside `s` where to start counting in bytes. + byte_len - The number of bytes of `s` to take into account. + +Returns: + The number of Unicode characters in `s` in +segment [byte_start, byte_start+len( . + +Safety note: +- This function does not check whether the substring is valid. +- This function fails if `byte_offset` or `byte_len` do not + represent valid positions inside `s` +*/ +fn char_len_range(s: str, byte_start: uint, byte_len: uint) -> uint { + let i = byte_start; + let len = 0u; + while i < byte_len { + let chsize = utf8_char_width(s[i]); + assert (chsize > 0u); + len += 1u; + i += chsize; + } + assert (i == byte_len); + ret len; +} + +/* +Function: to_chars + +Convert a string to a vector of characters +*/ +fn to_chars(s: str) -> [char] { + let buf: [char] = []; + let i = 0u; + let len = byte_len(s); + while i < len { + let cur = char_range_at(s, i); + buf += [cur.ch]; + i = cur.next; + } + ret buf; +} + +/* +Function: push_char + +Append a character to a string +*/ +fn push_char(&s: str, ch: char) { s += from_char(ch); } + +/* +Function: pop_char + +Remove the final character from a string and return it. + +Failure: + +If the string does not contain any characters. +*/ +fn pop_char(&s: str) -> char { + let end = byte_len(s); + while end > 0u && s[end - 1u] & 192u8 == tag_cont_u8 { end -= 1u; } + assert (end > 0u); + let ch = char_at(s, end - 1u); + s = substr(s, 0u, end - 1u); + ret ch; +} + +/* +Function: shift_char + +Remove the first character from a string and return it. + +Failure: + +If the string does not contain any characters. +*/ +fn shift_char(&s: str) -> char { + let r = char_range_at(s, 0u); + s = substr(s, r.next, byte_len(s) - r.next); + ret r.ch; +} + +/* +Function: unshift_char + +Prepend a char to a string +*/ +fn unshift_char(&s: str, ch: char) { s = from_char(ch) + s; } + +/* +Function: index + +Returns the index of the first matching byte. Returns -1 if +no match is found. +*/ +fn index(s: str, c: u8) -> int { + let i: int = 0; + for k: u8 in s { if k == c { ret i; } i += 1; } + ret -1; +} + +/* +Function: rindex + +Returns the index of the last matching byte. Returns -1 +if no match is found. +*/ +fn rindex(s: str, c: u8) -> int { + let n: int = byte_len(s) as int; + while n >= 0 { if s[n] == c { ret n; } n -= 1; } + ret n; +} + +/* +Function: find + +Finds the index of the first matching substring. +Returns -1 if `haystack` does not contain `needle`. + +Parameters: + +haystack - The string to look in +needle - The string to look for + +Returns: + +The index of the first occurance of `needle`, or -1 if not found. +*/ +fn find(haystack: str, needle: str) -> int { + let haystack_len: int = byte_len(haystack) as int; + let needle_len: int = byte_len(needle) as int; + if needle_len == 0 { ret 0; } + fn match_at(haystack: str, needle: str, i: int) -> bool { + let j: int = i; + for c: u8 in needle { if haystack[j] != c { ret false; } j += 1; } + ret true; + } + let i: int = 0; + while i <= haystack_len - needle_len { + if match_at(haystack, needle, i) { ret i; } + i += 1; + } + ret -1; +} + +/* +Function: contains + +Returns true if one string contains another + +Parameters: + +haystack - The string to look in +needle - The string to look for +*/ +fn contains(haystack: str, needle: str) -> bool { + 0 <= find(haystack, needle) +} + +/* +Function: starts_with + +Returns true if one string starts with another + +Parameters: + +haystack - The string to look in +needle - The string to look for +*/ +fn starts_with(haystack: str, needle: str) -> bool { + let haystack_len: uint = byte_len(haystack); + let needle_len: uint = byte_len(needle); + if needle_len == 0u { ret true; } + if needle_len > haystack_len { ret false; } + ret eq(substr(haystack, 0u, needle_len), needle); +} + +/* +Function: ends_with + +Returns true if one string ends with another + +haystack - The string to look in +needle - The string to look for +*/ +fn ends_with(haystack: str, needle: str) -> bool { + let haystack_len: uint = byte_len(haystack); + let needle_len: uint = byte_len(needle); + ret if needle_len == 0u { + true + } else if needle_len > haystack_len { + false + } else { + eq(substr(haystack, haystack_len - needle_len, needle_len), + needle) + }; +} + +/* +Function: substr + +Take a substring of another. Returns a string containing `len` bytes +starting at byte offset `begin`. + +This function is not unicode-safe. + +Failure: + +If `begin` + `len` is is greater than the byte length of the string +*/ +fn substr(s: str, begin: uint, len: uint) -> str { + ret slice(s, begin, begin + len); +} + +/* +Function: slice + +Takes a bytewise slice from a string. Returns the substring from +[`begin`..`end`). + +This function is not unicode-safe. + +Failure: + +- If begin is greater than end. +- If end is greater than the length of the string. +*/ +fn slice(s: str, begin: uint, end: uint) -> str unsafe { + // FIXME: Typestate precondition + assert (begin <= end); + assert (end <= byte_len(s)); + + let v: [u8] = unsafe::reinterpret_cast(s); + let v2 = vec::slice(v, begin, end); + unsafe::leak(v); + v2 += [0u8]; + let s2: str = unsafe::reinterpret_cast(v2); + unsafe::leak(v2); + ret s2; +} + +/* +Function: safe_slice +*/ +fn safe_slice(s: str, begin: uint, end: uint) : uint::le(begin, end) -> str { + // would need some magic to make this a precondition + assert (end <= byte_len(s)); + ret slice(s, begin, end); +} + +/* +Function: shift_byte + +Removes the first byte from a string and returns it. + +This function is not unicode-safe. +*/ +fn shift_byte(&s: str) -> u8 { + let len = byte_len(s); + assert (len > 0u); + let b = s[0]; + s = substr(s, 1u, len - 1u); + ret b; +} + +/* +Function: pop_byte + +Removes the last byte from a string and returns it. + +This function is not unicode-safe. +*/ +fn pop_byte(&s: str) -> u8 { + let len = byte_len(s); + assert (len > 0u); + let b = s[len - 1u]; + s = substr(s, 0u, len - 1u); + ret b; +} + +/* +Function: push_byte + +Appends a byte to a string. + +This function is not unicode-safe. +*/ +fn push_byte(&s: str, b: u8) { rustrt::rust_str_push(s, b); } + +/* +Function: push_bytes + +Appends a vector of bytes to a string. + +This function is not unicode-safe. +*/ +fn push_bytes(&s: str, bytes: [u8]) { + for byte in bytes { rustrt::rust_str_push(s, byte); } +} + +/* +Function: split + +Split a string at each occurance of a given separator + +Returns: + +A vector containing all the strings between each occurance of the separator +*/ +fn split(s: str, sep: u8) -> [str] { + let v: [str] = []; + let accum: str = ""; + let ends_with_sep: bool = false; + for c: u8 in s { + if c == sep { + v += [accum]; + accum = ""; + ends_with_sep = true; + } else { accum += unsafe_from_byte(c); ends_with_sep = false; } + } + if byte_len(accum) != 0u || ends_with_sep { v += [accum]; } + ret v; +} + +/* +Function: concat + +Concatenate a vector of strings +*/ +fn concat(v: [str]) -> str { + let s: str = ""; + for ss: str in v { s += ss; } + ret s; +} + +/* +Function: connect + +Concatenate a vector of strings, placing a given separator between each +*/ +fn connect(v: [str], sep: str) -> str { + let s: str = ""; + let first: bool = true; + for ss: str in v { + if first { first = false; } else { s += sep; } + s += ss; + } + ret s; +} + +// FIXME: This only handles ASCII +/* +Function: to_upper + +Convert a string to uppercase +*/ +fn to_upper(s: str) -> str { + let outstr = ""; + let ascii_a = 'a' as u8; + let ascii_z = 'z' as u8; + let diff = 32u8; + for byte: u8 in s { + let next; + if ascii_a <= byte && byte <= ascii_z { + next = byte - diff; + } else { next = byte; } + push_byte(outstr, next); + } + ret outstr; +} + +// FIXME: This is super-inefficient +/* +Function: replace + +Replace all occurances of one string with another + +Parameters: + +s - The string containing substrings to replace +from - The string to replace +to - The replacement string + +Returns: + +The original string with all occurances of `from` replaced with `to` +*/ +fn replace(s: str, from: str, to: str) : is_not_empty(from) -> str { + // FIXME (694): Shouldn't have to check this + check (is_not_empty(from)); + if byte_len(s) == 0u { + ret ""; + } else if starts_with(s, from) { + ret to + replace(slice(s, byte_len(from), byte_len(s)), from, to); + } else { + ret unsafe_from_byte(s[0]) + + replace(slice(s, 1u, byte_len(s)), from, to); + } +} + +// FIXME: Also not efficient +/* +Function: char_slice + +Unicode-safe slice. Returns a slice of the given string containing +the characters in the range [`begin`..`end`). `begin` and `end` are +character indexes, not byte indexes. + +Failure: + +- If begin is greater than end +- If end is greater than the character length of the string +*/ +fn char_slice(s: str, begin: uint, end: uint) -> str { + from_chars(vec::slice(to_chars(s), begin, end)) +} + +/* +Function: trim_left + +Returns a string with leading whitespace removed. +*/ +fn trim_left(s: str) -> str { + fn count_whities(s: [char]) -> uint { + let i = 0u; + while i < vec::len(s) { + if !char::is_whitespace(s[i]) { break; } + i += 1u; + } + ret i; + } + let chars = to_chars(s); + let whities = count_whities(chars); + ret from_chars(vec::slice(chars, whities, vec::len(chars))); +} + +/* +Function: trim_right + +Returns a string with trailing whitespace removed. +*/ +fn trim_right(s: str) -> str { + fn count_whities(s: [char]) -> uint { + let i = vec::len(s); + while 0u < i { + if !char::is_whitespace(s[i - 1u]) { break; } + i -= 1u; + } + ret i; + } + let chars = to_chars(s); + let whities = count_whities(chars); + ret from_chars(vec::slice(chars, 0u, whities)); +} + +/* +Function: trim + +Returns a string with leading and trailing whitespace removed +*/ +fn trim(s: str) -> str { trim_left(trim_right(s)) } + +/* +Type: sbuf + +An unsafe buffer of bytes. Corresponds to a C char pointer. +*/ +type sbuf = *u8; + +// NB: This is intentionally unexported because it's easy to misuse (there's +// no guarantee that the string is rooted). Instead, use as_buf below. +unsafe fn buf(s: str) -> sbuf { + let saddr = ptr::addr_of(s); + let vaddr: *[u8] = unsafe::reinterpret_cast(saddr); + let buf = vec::to_ptr(*vaddr); + ret buf; +} + +/* +Function: as_buf + +Work with the byte buffer of a string. Allows for unsafe manipulation +of strings, which is useful for native interop. + +Example: + +> let s = str::as_buf("PATH", { |path_buf| libc::getenv(path_buf) }); + +*/ +fn as_buf<T>(s: str, f: block(sbuf) -> T) -> T unsafe { + let buf = buf(s); f(buf) +} + +/* +Function: str_from_cstr + +Create a Rust string from a null-terminated C string +*/ +unsafe fn str_from_cstr(cstr: sbuf) -> str { + let res = ""; + let start = cstr; + let curr = start; + let i = 0u; + while *curr != 0u8 { + push_byte(res, *curr); + i += 1u; + curr = ptr::offset(start, i); + } + ret res; +} + +/* +Function: escape_char + +Escapes a single character. +*/ +fn escape_char(c: char) -> str { + alt c { + '"' { "\\\"" } + '\\' { "\\\\" } + '\n' { "\\n" } + '\t' { "\\t" } + '\r' { "\\r" } + // FIXME: uncomment this when extfmt is moved to core + // in a snapshot. + // '\x00' to '\x1f' { #fmt["\\x%02x", c as uint] } + v { from_char(c) } + } +} + +/* +Function: escape + +Escapes special characters inside the string, making it safe for transfer. +*/ +fn escape(s: str) -> str { + let r = ""; + loop_chars(s, { |c| r += escape_char(c); true }); + r +} diff --git a/src/libcore/sys.rs b/src/libcore/sys.rs new file mode 100644 index 00000000000..3b4a3b8c643 --- /dev/null +++ b/src/libcore/sys.rs @@ -0,0 +1,96 @@ +/* +Module: sys + +Misc low level stuff +*/ +tag type_desc = { + first_param: **ctypes::c_int, + size: ctypes::size_t, + align: ctypes::size_t + // Remaining fields not listed +}; + +#[abi = "cdecl"] +native mod rustrt { + // Explicitly re-export native stuff we want to be made + // available outside this crate. Otherwise it's + // visible-in-crate, but not re-exported. + fn last_os_error() -> str; + fn refcount<T>(t: @T) -> uint; + fn do_gc(); + fn unsupervise(); +} + +#[abi = "rust-intrinsic"] +native mod rusti { + fn get_type_desc<T>() -> *type_desc; +} + +/* +Function: get_type_desc + +Returns a pointer to a type descriptor. Useful for calling certain +function in the Rust runtime or otherwise performing dark magick. +*/ +fn get_type_desc<T>() -> *type_desc { + ret rusti::get_type_desc::<T>(); +} + +/* +Function: last_os_error + +Get a string representing the platform-dependent last error +*/ +fn last_os_error() -> str { + ret rustrt::last_os_error(); +} + +/* +Function: size_of + +Returns the size of a type +*/ +fn size_of<T>() -> uint unsafe { + ret (*get_type_desc::<T>()).size; +} + +/* +Function: align_of + +Returns the alignment of a type +*/ +fn align_of<T>() -> uint unsafe { + ret (*get_type_desc::<T>()).align; +} + +/* +Function: refcount + +Returns the refcount of a shared box +*/ +fn refcount<T>(t: @T) -> uint { + ret rustrt::refcount::<T>(t); +} + +/* +Function: do_gc + +Force a garbage collection +*/ +fn do_gc() -> () { + ret rustrt::do_gc(); +} + +// FIXME: There's a wrapper for this in the task module and this really +// just belongs there +fn unsupervise() -> () { + ret rustrt::unsupervise(); +} + +// Local Variables: +// mode: rust; +// fill-column: 78; +// indent-tabs-mode: nil +// c-basic-offset: 4 +// buffer-file-coding-system: utf-8-unix +// End: diff --git a/src/libcore/task.rs b/src/libcore/task.rs new file mode 100644 index 00000000000..31f01cf8384 --- /dev/null +++ b/src/libcore/task.rs @@ -0,0 +1,357 @@ +/* +Module: task + +Task management. + +An executing Rust program consists of a tree of tasks, each with their own +stack, and sole ownership of their allocated heap data. Tasks communicate +with each other using ports and channels. + +When a task fails, that failure will propagate to its parent (the task +that spawned it) and the parent will fail as well. The reverse is not +true: when a parent task fails its children will continue executing. When +the root (main) task fails, all tasks fail, and then so does the entire +process. + +A task may remove itself from this failure propagation mechanism by +calling the <unsupervise> function, after which failure will only +result in the termination of that task. + +Tasks may execute in parallel and are scheduled automatically by the runtime. + +Example: + +> spawn("Hello, World", fn (&&msg: str) { +> log msg; +> }); + +*/ +import cast = unsafe::reinterpret_cast; +import comm; +import option::{some, none}; +import option = option::t; +import ptr; + +export task; +export joinable_task; +export sleep; +export yield; +export task_notification; +export join; +export unsupervise; +export pin; +export unpin; +export set_min_stack; +export task_result; +export tr_success; +export tr_failure; +export get_task; +export spawn; +export spawn_notify; +export spawn_joinable; + +#[abi = "rust-intrinsic"] +native mod rusti { + // these must run on the Rust stack so that they can swap stacks etc: + fn task_sleep(task: *rust_task, time_in_us: uint, &killed: bool); +} + +#[link_name = "rustrt"] +#[abi = "cdecl"] +native mod rustrt { + // these can run on the C stack: + fn pin_task(); + fn unpin_task(); + fn get_task_id() -> task_id; + fn rust_get_task() -> *rust_task; + + fn set_min_stack(stack_size: uint); + + fn new_task() -> task_id; + fn drop_task(task_id: *rust_task); + fn get_task_pointer(id: task_id) -> *rust_task; + + fn migrate_alloc(alloc: *u8, target: task_id); + + fn start_task(id: task, closure: *u8); + +} + +/* Section: Types */ + +type rust_task = + {id: task, + mutable notify_enabled: int, + mutable notify_chan: comm::chan<task_notification>, + mutable stack_ptr: *u8}; + +resource rust_task_ptr(task: *rust_task) { rustrt::drop_task(task); } + +type task_id = int; + +/* +Type: task + +A handle to a task +*/ +type task = task_id; + +/* +Type: joinable_task + +A task that sends notification upon termination +*/ +type joinable_task = (task, comm::port<task_notification>); + +/* +Tag: task_result + +Indicates the manner in which a task exited +*/ +tag task_result { + /* Variant: tr_success */ + tr_success; + /* Variant: tr_failure */ + tr_failure; +} + +/* +Tag: task_notification + +Message sent upon task exit to indicate normal or abnormal termination +*/ +tag task_notification { + /* Variant: exit */ + exit(task, task_result); +} + +/* Section: Operations */ + +/* +Type: get_task + +Retreives a handle to the currently executing task +*/ +fn get_task() -> task { rustrt::get_task_id() } + +/* +Function: sleep + +Hints the scheduler to yield this task for a specified ammount of time. + +Parameters: + +time_in_us - maximum number of microseconds to yield control for +*/ +fn sleep(time_in_us: uint) { + let task = rustrt::rust_get_task(); + let killed = false; + // FIXME: uncomment this when extfmt is moved to core + // in a snapshot. + // log #fmt("yielding for %u us", time_in_us); + rusti::task_sleep(task, time_in_us, killed); + if killed { + fail "killed"; + } +} + +/* +Function: yield + +Yield control to the task scheduler + +The scheduler may schedule another task to execute. +*/ +fn yield() { sleep(1u) } + +/* +Function: join + +Wait for a child task to exit + +The child task must have been spawned with <spawn_joinable>, which +produces a notification port that the child uses to communicate its +exit status. + +Returns: + +A task_result indicating whether the task terminated normally or failed +*/ +fn join(task_port: joinable_task) -> task_result { + let (id, port) = task_port; + alt comm::recv::<task_notification>(port) { + exit(_id, res) { + if _id == id { + ret res + } else { + // FIXME: uncomment this when extfmt is moved to core + // in a snapshot. + // fail #fmt["join received id %d, expected %d", _id, id] + fail; + } + } + } +} + +/* +Function: unsupervise + +Detaches this task from its parent in the task tree + +An unsupervised task will not propagate its failure up the task tree +*/ +fn unsupervise() { ret sys::unsupervise(); } + +/* +Function: pin + +Pins the current task and future child tasks to a single scheduler thread +*/ +fn pin() { rustrt::pin_task(); } + +/* +Function: unpin + +Unpin the current task and future child tasks +*/ +fn unpin() { rustrt::unpin_task(); } + +/* +Function: set_min_stack + +Set the minimum stack size (in bytes) for tasks spawned in the future. + +This function has global effect and should probably not be used. +*/ +fn set_min_stack(stack_size: uint) { rustrt::set_min_stack(stack_size); } + +/* +Function: spawn + +Creates and executes a new child task + +Sets up a new task with its own call stack and schedules it to be executed. +Upon execution the new task will call function `f` with the provided +argument `data`. + +Function `f` is a bare function, meaning it may not close over any data, as do +shared functions (fn@) and lambda blocks. `data` must be a uniquely owned +type; it is moved into the new task and thus can no longer be accessed +locally. + +Parameters: + +data - A unique-type value to pass to the new task +f - A function to execute in the new task + +Returns: + +A handle to the new task +*/ +fn spawn<send T>(-data: T, f: fn(T)) -> task { + spawn_inner(data, f, none) +} + +/* +Function: spawn_notify + +Create and execute a new child task, requesting notification upon its +termination + +Immediately before termination, either on success or failure, the spawned +task will send a <task_notification> message on the provided channel. +*/ +fn spawn_notify<send T>(-data: T, f: fn(T), + notify: comm::chan<task_notification>) -> task { + spawn_inner(data, f, some(notify)) +} + +/* +Function: spawn_joinable + +Create and execute a task which can later be joined with the <join> function + +This is a convenience wrapper around spawn_notify which, when paired +with <join> can be easily used to spawn a task then wait for it to +complete. +*/ +fn spawn_joinable<send T>(-data: T, f: fn(T)) -> joinable_task { + let p = comm::port::<task_notification>(); + let id = spawn_notify(data, f, comm::chan::<task_notification>(p)); + ret (id, p); +} + +// FIXME: To transition from the unsafe spawn that spawns a shared closure to +// the safe spawn that spawns a bare function we're going to write +// barefunc-spawn on top of unsafe-spawn. Sadly, bind does not work reliably +// enough to suite our needs (#1034, probably others yet to be discovered), so +// we're going to copy the bootstrap data into a unique pointer, cast it to an +// unsafe pointer then wrap up the bare function and the unsafe pointer in a +// shared closure to spawn. +// +// After the transition this should all be rewritten. + +fn spawn_inner<send T>(-data: T, f: fn(T), + notify: option<comm::chan<task_notification>>) + -> task unsafe { + + fn wrapper<send T>(data: *u8, f: fn(T)) unsafe { + let data: ~T = unsafe::reinterpret_cast(data); + f(*data); + } + + let data = ~data; + let dataptr: *u8 = unsafe::reinterpret_cast(data); + unsafe::leak(data); + let wrapped = bind wrapper(dataptr, f); + ret unsafe_spawn_inner(wrapped, notify); +} + +// FIXME: This is the old spawn function that spawns a shared closure. +// It is a hack and needs to be rewritten. +fn unsafe_spawn_inner(-thunk: fn@(), + notify: option<comm::chan<task_notification>>) -> + task unsafe { + let id = rustrt::new_task(); + + let raw_thunk: {code: uint, env: uint} = cast(thunk); + + // set up the task pointer + let task_ptr <- rust_task_ptr(rustrt::get_task_pointer(id)); + + assert (ptr::null() != (**task_ptr).stack_ptr); + + // copy the thunk from our stack to the new stack + let sp: uint = cast((**task_ptr).stack_ptr); + let ptrsize = sys::size_of::<*u8>(); + let thunkfn: *mutable uint = cast(sp - ptrsize * 2u); + let thunkenv: *mutable uint = cast(sp - ptrsize); + *thunkfn = cast(raw_thunk.code);; + *thunkenv = cast(raw_thunk.env);; + // align the stack to 16 bytes + (**task_ptr).stack_ptr = cast(sp - ptrsize * 4u); + + // set up notifications if they are enabled. + alt notify { + some(c) { + (**task_ptr).notify_enabled = 1; + (**task_ptr).notify_chan = c; + } + none { } + } + + // give the thunk environment's allocation to the new task + rustrt::migrate_alloc(cast(raw_thunk.env), id); + rustrt::start_task(id, cast(thunkfn)); + // don't cleanup the thunk in this task + unsafe::leak(thunk); + ret id; +} + +// Local Variables: +// mode: rust; +// fill-column: 78; +// indent-tabs-mode: nil +// c-basic-offset: 4 +// buffer-file-coding-system: utf-8-unix +// End: diff --git a/src/libcore/u32.rs b/src/libcore/u32.rs new file mode 100644 index 00000000000..f33b8414a4d --- /dev/null +++ b/src/libcore/u32.rs @@ -0,0 +1,27 @@ +/* +Module: u32 +*/ + +/* +Const: min_value + +Return the minimal value for a u32 +*/ +const min_value: u32 = 0u32; + +/* +Const: max_value + +Return the maximal value for a u32 +*/ +const max_value: u32 = 0xffff_ffffu32; + +// +// Local Variables: +// mode: rust +// fill-column: 78; +// indent-tabs-mode: nil +// c-basic-offset: 4 +// buffer-file-coding-system: utf-8-unix +// End: +// diff --git a/src/libcore/u64.rs b/src/libcore/u64.rs new file mode 100644 index 00000000000..0e1a330c663 --- /dev/null +++ b/src/libcore/u64.rs @@ -0,0 +1,88 @@ +/* +Module: u64 +*/ + +/* +Const: min_value + +Return the minimal value for a u64 +*/ +const min_value: u64 = 0u64; + +/* +Const: max_value + +Return the maximal value for a u64 +*/ +const max_value: u64 = 18446744073709551615u64; + +/* +Function: to_str + +Convert to a string in a given base +*/ +fn to_str(n: u64, radix: uint) -> str { + assert (0u < radix && radix <= 16u); + + let r64 = radix as u64; + + fn digit(n: u64) -> str { + ret alt n { + 0u64 { "0" } + 1u64 { "1" } + 2u64 { "2" } + 3u64 { "3" } + 4u64 { "4" } + 5u64 { "5" } + 6u64 { "6" } + 7u64 { "7" } + 8u64 { "8" } + 9u64 { "9" } + 10u64 { "a" } + 11u64 { "b" } + 12u64 { "c" } + 13u64 { "d" } + 14u64 { "e" } + 15u64 { "f" } + _ { fail } + }; + } + + if n == 0u64 { ret "0"; } + + let s = ""; + + let n = n; + while n > 0u64 { s = digit(n % r64) + s; n /= r64; } + ret s; +} + +/* +Function: str + +Convert to a string +*/ +fn str(n: u64) -> str { ret to_str(n, 10u); } + +/* +Function: parse_buf + +Parse a string as an unsigned integer. +*/ +fn from_str(buf: str, radix: u64) -> u64 { + if str::byte_len(buf) == 0u { + log_err "parse_buf(): buf is empty"; + fail; + } + let i = str::byte_len(buf) - 1u; + let power = 1u64, n = 0u64; + while true { + let digit = char::to_digit(buf[i] as char) as u64; + if digit >= radix { fail; } + n += digit * power; + power *= radix; + if i == 0u { ret n; } + i -= 1u; + } + fail; +} diff --git a/src/libcore/u8.rs b/src/libcore/u8.rs new file mode 100644 index 00000000000..eadfcadda4a --- /dev/null +++ b/src/libcore/u8.rs @@ -0,0 +1,68 @@ +/* +Module: u8 +*/ + +/* +Const: max_value + +The maximum value of a u8. +*/ +const max_value: u8 = 255u8; + +/* +Const: min_value + +The minumum value of a u8. +*/ +const min_value: u8 = 0u8; + +/* Function: add */ +pure fn add(x: u8, y: u8) -> u8 { ret x + y; } + +/* Function: sub */ +pure fn sub(x: u8, y: u8) -> u8 { ret x - y; } + +/* Function: mul */ +pure fn mul(x: u8, y: u8) -> u8 { ret x * y; } + +/* Function: div */ +pure fn div(x: u8, y: u8) -> u8 { ret x / y; } + +/* Function: rem */ +pure fn rem(x: u8, y: u8) -> u8 { ret x % y; } + +/* Predicate: lt */ +pure fn lt(x: u8, y: u8) -> bool { ret x < y; } + +/* Predicate: le */ +pure fn le(x: u8, y: u8) -> bool { ret x <= y; } + +/* Predicate: eq */ +pure fn eq(x: u8, y: u8) -> bool { ret x == y; } + +/* Predicate: ne */ +pure fn ne(x: u8, y: u8) -> bool { ret x != y; } + +/* Predicate: ge */ +pure fn ge(x: u8, y: u8) -> bool { ret x >= y; } + +/* Predicate: gt */ +pure fn gt(x: u8, y: u8) -> bool { ret x > y; } + +/* +Function: range + +Iterate over the range [`lo`..`hi`) +*/ +fn range(lo: u8, hi: u8, it: block(u8)) { + let i = lo; + while i < hi { it(i); i += 1u8; } +} + +// Local Variables: +// mode: rust; +// fill-column: 78; +// indent-tabs-mode: nil +// c-basic-offset: 4 +// buffer-file-coding-system: utf-8-unix +// End: diff --git a/src/libcore/uint.rs b/src/libcore/uint.rs new file mode 100644 index 00000000000..ae1ba628aa6 --- /dev/null +++ b/src/libcore/uint.rs @@ -0,0 +1,254 @@ +/* +Module: uint +*/ + +/* +Const: min_value + +Return the minimal value for an uint. + +This is always 0 +*/ +const min_value: uint = 0u; + +/* +Const: max_value + +Return the maximal value for an uint. + +This is 2^wordsize - 1 +*/ +const max_value: uint = 0u - 1u; + +/* Function: add */ +pure fn add(x: uint, y: uint) -> uint { ret x + y; } + +/* Function: sub */ +pure fn sub(x: uint, y: uint) -> uint { ret x - y; } + +/* Function: mul */ +pure fn mul(x: uint, y: uint) -> uint { ret x * y; } + +/* Function: div */ +pure fn div(x: uint, y: uint) -> uint { ret x / y; } + +/* Function: div_ceil + + Divide two numbers, return the result, rounded up. + + Parameters: + x - an integer + y - an integer distinct from 0u + + Return: + The smallest integer `q` such that `x/y <= q`. +*/ +pure fn div_ceil(x: uint, y: uint) -> uint { + let div = div(x, y); + if x % y == 0u { ret div;} + else { ret div + 1u; } +} + +/* Function: div_ceil + + Divide two numbers, return the result, rounded to the closest integer. + + Parameters: + x - an integer + y - an integer distinct from 0u + + Return: + The integer `q` closest to `x/y`. +*/ +pure fn div_round(x: uint, y: uint) -> uint { + let div = div(x, y); + if x % y * 2u < y { ret div;} + else { ret div + 1u; } +} + +/* Function: div_ceil + + Divide two numbers, return the result, rounded down. + + Parameters: + x - an integer + y - an integer distinct from 0u + + Note: This is the same function as `div`. + + Return: + The smallest integer `q` such that `x/y <= q`. This + is either `x/y` or `x/y + 1`. +*/ +pure fn div_floor(x: uint, y: uint) -> uint { ret x / y; } + +/* Function: rem */ +pure fn rem(x: uint, y: uint) -> uint { ret x % y; } + +/* Predicate: lt */ +pure fn lt(x: uint, y: uint) -> bool { ret x < y; } + +/* Predicate: le */ +pure fn le(x: uint, y: uint) -> bool { ret x <= y; } + +/* Predicate: eq */ +pure fn eq(x: uint, y: uint) -> bool { ret x == y; } + +/* Predicate: ne */ +pure fn ne(x: uint, y: uint) -> bool { ret x != y; } + +/* Predicate: ge */ +pure fn ge(x: uint, y: uint) -> bool { ret x >= y; } + +/* Predicate: gt */ +pure fn gt(x: uint, y: uint) -> bool { ret x > y; } + +/* +Function: range + +Iterate over the range [`lo`..`hi`) +*/ +fn range(lo: uint, hi: uint, it: block(uint)) { + let i = lo; + while i < hi { it(i); i += 1u; } +} + +/* +Function: loop + +Iterate over the range [`lo`..`hi`), or stop when requested + +Parameters: +lo - The integer at which to start the loop (included) +hi - The integer at which to stop the loop (excluded) +it - A block to execute with each consecutive integer of the range. +Return `true` to continue, `false` to stop. + +Returns: + +`true` If execution proceeded correctly, `false` if it was interrupted, +that is if `it` returned `false` at any point. +*/ +fn loop(lo: uint, hi: uint, it: block(uint) -> bool) -> bool { + let i = lo; + while i < hi { + if (!it(i)) { ret false; } + i += 1u; + } + ret true; +} + +/* +Function: next_power_of_two + +Returns the smallest power of 2 greater than or equal to `n` +*/ +fn next_power_of_two(n: uint) -> uint { + let halfbits: uint = sys::size_of::<uint>() * 4u; + let tmp: uint = n - 1u; + let shift: uint = 1u; + while shift <= halfbits { tmp |= tmp >> shift; shift <<= 1u; } + ret tmp + 1u; +} + +/* +Function: parse_buf + +Parse a buffer of bytes + +Parameters: + +buf - A byte buffer +radix - The base of the number + +Failure: + +buf must not be empty +*/ +fn parse_buf(buf: [u8], radix: uint) -> uint { + if vec::len::<u8>(buf) == 0u { + log_err "parse_buf(): buf is empty"; + fail; + } + let i = vec::len::<u8>(buf) - 1u; + let power = 1u; + let n = 0u; + while true { + let digit = char::to_digit(buf[i] as char); + if (digit as uint) >= radix { + fail; + } + n += (digit as uint) * power; + power *= radix; + if i == 0u { ret n; } + i -= 1u; + } + fail; +} + +/* +Function: from_str + +Parse a string to an int + +Failure: + +s must not be empty +*/ +fn from_str(s: str) -> uint { parse_buf(str::bytes(s), 10u) } + +/* +Function: to_str + +Convert to a string in a given base +*/ +fn to_str(num: uint, radix: uint) -> str { + let n = num; + assert (0u < radix && radix <= 16u); + fn digit(n: uint) -> char { + ret alt n { + 0u { '0' } + 1u { '1' } + 2u { '2' } + 3u { '3' } + 4u { '4' } + 5u { '5' } + 6u { '6' } + 7u { '7' } + 8u { '8' } + 9u { '9' } + 10u { 'a' } + 11u { 'b' } + 12u { 'c' } + 13u { 'd' } + 14u { 'e' } + 15u { 'f' } + _ { fail } + }; + } + if n == 0u { ret "0"; } + let s: str = ""; + while n != 0u { + s += str::unsafe_from_byte(digit(n % radix) as u8); + n /= radix; + } + let s1: str = ""; + let len: uint = str::byte_len(s); + while len != 0u { len -= 1u; s1 += str::unsafe_from_byte(s[len]); } + ret s1; +} + +/* +Function: str + +Convert to a string +*/ +fn str(i: uint) -> str { ret to_str(i, 10u); } + +// Local Variables: +// mode: rust; +// fill-column: 78; +// indent-tabs-mode: nil +// c-basic-offset: 4 +// buffer-file-coding-system: utf-8-unix +// End: diff --git a/src/libcore/unsafe.rs b/src/libcore/unsafe.rs new file mode 100644 index 00000000000..22cce495649 --- /dev/null +++ b/src/libcore/unsafe.rs @@ -0,0 +1,41 @@ +/* +Module: unsafe + +Unsafe operations +*/ + +#[abi = "rust-intrinsic"] +native mod rusti { + fn cast<T, U>(src: T) -> U; +} + +#[abi = "cdecl"] +native mod rustrt { + fn leak<T>(-thing: T); +} + +/* +Function: reinterpret_cast + +Casts the value at `src` to U. The two types must have the same length. +*/ +unsafe fn reinterpret_cast<T, U>(src: T) -> U { + let t1 = sys::get_type_desc::<T>(); + let t2 = sys::get_type_desc::<U>(); + if (*t1).size != (*t2).size { + fail "attempt to cast values of differing sizes"; + } + ret rusti::cast(src); +} + +/* +Function: leak + +Move `thing` into the void. + +The leak function will take ownership of the provided value but neglect +to run any required cleanup or memory-management operations on it. This +can be used for various acts of magick, particularly when using +reinterpret_cast on managed pointer types. +*/ +unsafe fn leak<T>(-thing: T) { rustrt::leak(thing); } diff --git a/src/libcore/vec.rs b/src/libcore/vec.rs new file mode 100644 index 00000000000..d8f89b3b27b --- /dev/null +++ b/src/libcore/vec.rs @@ -0,0 +1,835 @@ +/* +Module: vec +*/ + +import option::{some, none}; +import uint::next_power_of_two; +import ptr::addr_of; + +#[abi = "rust-intrinsic"] +native mod rusti { + fn vec_len<T>(&&v: [const T]) -> uint; +} + +#[abi = "cdecl"] +native mod rustrt { + fn vec_reserve_shared<T>(t: *sys::type_desc, + &v: [const T], + n: uint); + fn vec_from_buf_shared<T>(t: *sys::type_desc, + ptr: *T, + count: uint) -> [T]; +} + +/* +Type: init_op + +A function used to initialize the elements of a vector. +*/ +type init_op<T> = block(uint) -> T; + + +/* +Predicate: is_empty + +Returns true if a vector contains no elements. +*/ +pure fn is_empty<T>(v: [const T]) -> bool { + // FIXME: This would be easier if we could just call len + for t: T in v { ret false; } + ret true; +} + +/* +Predicate: is_not_empty + +Returns true if a vector contains some elements. +*/ +pure fn is_not_empty<T>(v: [const T]) -> bool { ret !is_empty(v); } + +/* +Predicate: same_length + +Returns true if two vectors have the same length +*/ +pure fn same_length<T, U>(xs: [T], ys: [U]) -> bool { + vec::len(xs) == vec::len(ys) +} + +/* +Function: reserve + +Reserves capacity for `n` elements in the given vector. + +If the capacity for `v` is already equal to or greater than the requested +capacity, then no action is taken. + +Parameters: + +v - A vector +n - The number of elements to reserve space for +*/ +fn reserve<T>(&v: [const T], n: uint) { + rustrt::vec_reserve_shared(sys::get_type_desc::<T>(), v, n); +} + +/* +Function: len + +Returns the length of a vector +*/ +pure fn len<T>(v: [const T]) -> uint { unchecked { rusti::vec_len(v) } } + +/* +Function: init_fn + +Creates and initializes an immutable vector. + +Creates an immutable vector of size `n_elts` and initializes the elements +to the value returned by the function `op`. +*/ +fn init_fn<T>(op: init_op<T>, n_elts: uint) -> [T] { + let v = []; + reserve(v, n_elts); + let i: uint = 0u; + while i < n_elts { v += [op(i)]; i += 1u; } + ret v; +} + +// TODO: Remove me once we have slots. +/* +Function: init_fn_mut + +Creates and initializes a mutable vector. + +Creates a mutable vector of size `n_elts` and initializes the elements to +the value returned by the function `op`. +*/ +fn init_fn_mut<T>(op: init_op<T>, n_elts: uint) -> [mutable T] { + let v = [mutable]; + reserve(v, n_elts); + let i: uint = 0u; + while i < n_elts { v += [mutable op(i)]; i += 1u; } + ret v; +} + +/* +Function: init_elt + +Creates and initializes an immutable vector. + +Creates an immutable vector of size `n_elts` and initializes the elements +to the value `t`. +*/ +fn init_elt<copy T>(t: T, n_elts: uint) -> [T] { + let v = []; + reserve(v, n_elts); + let i: uint = 0u; + while i < n_elts { v += [t]; i += 1u; } + ret v; +} + +// TODO: Remove me once we have slots. +/* +Function: init_elt_mut + +Creates and initializes a mutable vector. + +Creates a mutable vector of size `n_elts` and initializes the elements +to the value `t`. +*/ +fn init_elt_mut<copy T>(t: T, n_elts: uint) -> [mutable T] { + let v = [mutable]; + reserve(v, n_elts); + let i: uint = 0u; + while i < n_elts { v += [mutable t]; i += 1u; } + ret v; +} + +// FIXME: Possible typestate postcondition: +// len(result) == len(v) (needs issue #586) +/* +Function: to_mut + +Produces a mutable vector from an immutable vector. +*/ +fn to_mut<copy T>(v: [T]) -> [mutable T] { + let vres = [mutable]; + for t: T in v { vres += [mutable t]; } + ret vres; +} + +// Same comment as from_mut +/* +Function: from_mut + +Produces an immutable vector from a mutable vector. +*/ +fn from_mut<copy T>(v: [mutable T]) -> [T] { + let vres = []; + for t: T in v { vres += [t]; } + ret vres; +} + +// Accessors + +/* +Function: head + +Returns the first element of a vector + +Predicates: +<is_not_empty> (v) +*/ +fn head<copy T>(v: [const T]) : is_not_empty(v) -> T { ret v[0]; } + +/* +Function: tail + +Returns all but the first element of a vector + +Predicates: +<is_not_empty> (v) +*/ +fn tail<copy T>(v: [const T]) : is_not_empty(v) -> [T] { + ret slice(v, 1u, len(v)); +} + +// FIXME: This name is sort of confusing next to init_fn, etc +// but this is the name haskell uses for this function, +// along with head/tail/last. +/* +Function: init + +Returns all but the last elemnt of a vector + +Preconditions: +`v` is not empty +*/ +fn init<copy T>(v: [const T]) -> [T] { + assert len(v) != 0u; + slice(v, 0u, len(v) - 1u) +} + +/* +Function: last + +Returns the last element of a vector + +Returns: + +An option containing the last element of `v` if `v` is not empty, or +none if `v` is empty. +*/ +fn last<copy T>(v: [const T]) -> option::t<T> { + if len(v) == 0u { ret none; } + ret some(v[len(v) - 1u]); +} + +/* +Function: last_total + +Returns the last element of a non-empty vector `v` + +Predicates: +<is_not_empty> (v) +*/ +fn last_total<copy T>(v: [const T]) : is_not_empty(v) -> T { + ret v[len(v) - 1u]; +} + +/* +Function: slice + +Returns a copy of the elements from [`start`..`end`) from `v`. +*/ +fn slice<copy T>(v: [const T], start: uint, end: uint) -> [T] { + assert (start <= end); + assert (end <= len(v)); + let result = []; + reserve(result, end - start); + let i = start; + while i < end { result += [v[i]]; i += 1u; } + ret result; +} + +// TODO: Remove me once we have slots. +/* +Function: slice_mut + +Returns a copy of the elements from [`start`..`end`) from `v`. +*/ +fn slice_mut<copy T>(v: [const T], start: uint, end: uint) -> [mutable T] { + assert (start <= end); + assert (end <= len(v)); + let result = [mutable]; + reserve(result, end - start); + let i = start; + while i < end { result += [mutable v[i]]; i += 1u; } + ret result; +} + + +// Mutators + +/* +Function: shift + +Removes the first element from a vector and return it +*/ +fn shift<copy T>(&v: [const T]) -> T { + let ln = len::<T>(v); + assert (ln > 0u); + let e = v[0]; + v = slice::<T>(v, 1u, ln); + ret e; +} + +// TODO: Write this, unsafely, in a way that's not O(n). +/* +Function: pop + +Remove the last element from a vector and return it +*/ +fn pop<copy T>(&v: [const T]) -> T { + let ln = len(v); + assert (ln > 0u); + ln -= 1u; + let e = v[ln]; + v = slice(v, 0u, ln); + ret e; +} + +// TODO: More. + + +// Appending + +/* +Function: grow + +Expands a vector in place, initializing the new elements to a given value + +Parameters: + +v - The vector to grow +n - The number of elements to add +initval - The value for the new elements +*/ +fn grow<copy T>(&v: [T], n: uint, initval: T) { + reserve(v, next_power_of_two(len(v) + n)); + let i: uint = 0u; + while i < n { v += [initval]; i += 1u; } +} + +// TODO: Remove me once we have slots. +// FIXME: Can't grow take a [const T] +/* +Function: grow_mut + +Expands a vector in place, initializing the new elements to a given value + +Parameters: + +v - The vector to grow +n - The number of elements to add +initval - The value for the new elements +*/ +fn grow_mut<copy T>(&v: [mutable T], n: uint, initval: T) { + reserve(v, next_power_of_two(len(v) + n)); + let i: uint = 0u; + while i < n { v += [mutable initval]; i += 1u; } +} + +/* +Function: grow_fn + +Expands a vector in place, initializing the new elements to the result of a +function + +Function `init_fn` is called `n` times with the values [0..`n`) + +Parameters: + +v - The vector to grow +n - The number of elements to add +init_fn - A function to call to retreive each appended element's value +*/ +fn grow_fn<T>(&v: [T], n: uint, op: init_op<T>) { + reserve(v, next_power_of_two(len(v) + n)); + let i: uint = 0u; + while i < n { v += [op(i)]; i += 1u; } +} + +/* +Function: grow_set + +Sets the value of a vector element at a given index, growing the vector as +needed + +Sets the element at position `index` to `val`. If `index` is past the end +of the vector, expands the vector by replicating `initval` to fill the +intervening space. +*/ +fn grow_set<copy T>(&v: [mutable T], index: uint, initval: T, val: T) { + if index >= len(v) { grow_mut(v, index - len(v) + 1u, initval); } + v[index] = val; +} + + +// Functional utilities + +/* +Function: map + +Apply a function to each element of a vector and return the results +*/ +fn map<T, U>(f: block(T) -> U, v: [T]) -> [U] { + let result = []; + reserve(result, len(v)); + for elem: T in v { result += [f(elem)]; } + ret result; +} + +/* +Function: map_mut + +Apply a function to each element of a mutable vector and return the results +*/ +fn map_mut<copy T, U>(f: block(T) -> U, v: [const T]) -> [U] { + let result = []; + reserve(result, len(v)); + for elem: T in v { + // copy satisfies alias checker + result += [f(copy elem)]; + } + ret result; +} + +/* +Function: map2 + +Apply a function to each pair of elements and return the results +*/ +fn map2<copy T, copy U, V>(f: block(T, U) -> V, v0: [T], v1: [U]) -> [V] { + let v0_len = len(v0); + if v0_len != len(v1) { fail; } + let u: [V] = []; + let i = 0u; + while i < v0_len { u += [f(copy v0[i], copy v1[i])]; i += 1u; } + ret u; +} + +/* +Function: filter_map + +Apply a function to each element of a vector and return the results + +If function `f` returns `none` then that element is excluded from +the resulting vector. +*/ +fn filter_map<copy T, copy U>(f: block(T) -> option::t<U>, v: [const T]) + -> [U] { + let result = []; + for elem: T in v { + alt f(copy elem) { + none. {/* no-op */ } + some(result_elem) { result += [result_elem]; } + } + } + ret result; +} + +/* +Function: filter + +Construct a new vector from the elements of a vector for which some predicate +holds. + +Apply function `f` to each element of `v` and return a vector containing +only those elements for which `f` returned true. +*/ +fn filter<copy T>(f: block(T) -> bool, v: [T]) -> [T] { + let result = []; + for elem: T in v { + if f(elem) { result += [elem]; } + } + ret result; +} + +/* +Function: concat + +Concatenate a vector of vectors. Flattens a vector of vectors of T into +a single vector of T. +*/ +fn concat<copy T>(v: [const [const T]]) -> [T] { + let new: [T] = []; + for inner: [T] in v { new += inner; } + ret new; +} + +/* +Function: foldl + +Reduce a vector from left to right +*/ +fn foldl<copy T, U>(p: block(T, U) -> T, z: T, v: [const U]) -> T { + let accum = z; + iter(v) { |elt| + accum = p(accum, elt); + } + ret accum; +} + +/* +Function: foldr + +Reduce a vector from right to left +*/ +fn foldr<T, copy U>(p: block(T, U) -> U, z: U, v: [const T]) -> U { + let accum = z; + riter(v) { |elt| + accum = p(elt, accum); + } + ret accum; +} + +/* +Function: any + +Return true if a predicate matches any elements + +If the vector contains no elements then false is returned. +*/ +fn any<T>(f: block(T) -> bool, v: [T]) -> bool { + for elem: T in v { if f(elem) { ret true; } } + ret false; +} + +/* +Function: all + +Return true if a predicate matches all elements + +If the vector contains no elements then true is returned. +*/ +fn all<T>(f: block(T) -> bool, v: [T]) -> bool { + for elem: T in v { if !f(elem) { ret false; } } + ret true; +} + +/* +Function: member + +Return true if a vector contains an element with the given value +*/ +fn member<T>(x: T, v: [T]) -> bool { + for elt: T in v { if x == elt { ret true; } } + ret false; +} + +/* +Function: count + +Returns the number of elements that are equal to a given value +*/ +fn count<T>(x: T, v: [const T]) -> uint { + let cnt = 0u; + for elt: T in v { if x == elt { cnt += 1u; } } + ret cnt; +} + +/* +Function: find + +Search for an element that matches a given predicate + +Apply function `f` to each element of `v`, starting from the first. +When function `f` returns true then an option containing the element +is returned. If `f` matches no elements then none is returned. +*/ +fn find<copy T>(f: block(T) -> bool, v: [T]) -> option::t<T> { + for elt: T in v { if f(elt) { ret some(elt); } } + ret none; +} + +/* +Function: position + +Find the first index containing a matching value + +Returns: + +option::some(uint) - The first index containing a matching value +option::none - No elements matched +*/ +fn position<T>(x: T, v: [T]) -> option::t<uint> { + let i: uint = 0u; + while i < len(v) { if x == v[i] { ret some::<uint>(i); } i += 1u; } + ret none; +} + +/* +Function: position_pred + +Find the first index for which the value matches some predicate +*/ +fn position_pred<T>(f: block(T) -> bool, v: [T]) -> option::t<uint> { + let i: uint = 0u; + while i < len(v) { if f(v[i]) { ret some::<uint>(i); } i += 1u; } + ret none; +} + +// FIXME: if issue #586 gets implemented, could have a postcondition +// saying the two result lists have the same length -- or, could +// return a nominal record with a constraint saying that, instead of +// returning a tuple (contingent on issue #869) +/* +Function: unzip + +Convert a vector of pairs into a pair of vectors + +Returns a tuple containing two vectors where the i-th element of the first +vector contains the first element of the i-th tuple of the input vector, +and the i-th element of the second vector contains the second element +of the i-th tuple of the input vector. +*/ +fn unzip<copy T, copy U>(v: [(T, U)]) -> ([T], [U]) { + let as = [], bs = []; + for (a, b) in v { as += [a]; bs += [b]; } + ret (as, bs); +} + +/* +Function: zip + +Convert two vectors to a vector of pairs + +Returns a vector of tuples, where the i-th tuple contains contains the +i-th elements from each of the input vectors. + +Preconditions: + +<same_length> (v, u) +*/ +fn zip<copy T, copy U>(v: [T], u: [U]) : same_length(v, u) -> [(T, U)] { + let zipped = []; + let sz = len(v), i = 0u; + assert (sz == len(u)); + while i < sz { zipped += [(v[i], u[i])]; i += 1u; } + ret zipped; +} + +/* +Function: swap + +Swaps two elements in a vector + +Parameters: +v - The input vector +a - The index of the first element +b - The index of the second element +*/ +fn swap<T>(v: [mutable T], a: uint, b: uint) { + v[a] <-> v[b]; +} + +/* +Function: reverse + +Reverse the order of elements in a vector, in place +*/ +fn reverse<T>(v: [mutable T]) { + let i: uint = 0u; + let ln = len::<T>(v); + while i < ln / 2u { v[i] <-> v[ln - i - 1u]; i += 1u; } +} + + +/* +Function: reversed + +Returns a vector with the order of elements reversed +*/ +fn reversed<copy T>(v: [const T]) -> [T] { + let rs: [T] = []; + let i = len::<T>(v); + if i == 0u { ret rs; } else { i -= 1u; } + while i != 0u { rs += [v[i]]; i -= 1u; } + rs += [v[0]]; + ret rs; +} + +// FIXME: Seems like this should take char params. Maybe belongs in char +/* +Function: enum_chars + +Returns a vector containing a range of chars +*/ +fn enum_chars(start: u8, end: u8) : u8::le(start, end) -> [char] { + let i = start; + let r = []; + while i <= end { r += [i as char]; i += 1u as u8; } + ret r; +} + +// FIXME: Probably belongs in uint. Compare to uint::range +/* +Function: enum_uints + +Returns a vector containing a range of uints +*/ +fn enum_uints(start: uint, end: uint) : uint::le(start, end) -> [uint] { + let i = start; + let r = []; + while i <= end { r += [i]; i += 1u; } + ret r; +} + +/* +Function: iter + +Iterates over a vector + +Iterates over vector `v` and, for each element, calls function `f` with the +element's value. + +*/ +fn iter<T>(v: [const T], f: block(T)) { + iter2(v) { |_i, v| f(v) } +} + +/* +Function: iter2 + +Iterates over a vector's elements and indexes + +Iterates over vector `v` and, for each element, calls function `f` with the +element's value and index. +*/ +fn iter2<T>(v: [const T], f: block(uint, T)) { + let i = 0u, l = len(v); + while i < l { f(i, v[i]); i += 1u; } +} + +/* +Function: riter + +Iterates over a vector in reverse + +Iterates over vector `v` and, for each element, calls function `f` with the +element's value. + +*/ +fn riter<T>(v: [const T], f: block(T)) { + riter2(v) { |_i, v| f(v) } +} + +/* +Function: riter2 + +Iterates over a vector's elements and indexes in reverse + +Iterates over vector `v` and, for each element, calls function `f` with the +element's value and index. +*/ +fn riter2<T>(v: [const T], f: block(uint, T)) { + let i = len(v); + while 0u < i { + i -= 1u; + f(i, v[i]); + }; +} + +/* +Function: permute + +Iterate over all permutations of vector `v`. Permutations are produced in +lexicographic order with respect to the order of elements in `v` (so if `v` +is sorted then the permutations are lexicographically sorted). + +The total number of permutations produced is `len(v)!`. If `v` contains +repeated elements, then some permutations are repeated. +*/ +fn permute<copy T>(v: [const T], put: block([T])) { + let ln = len(v); + if ln == 0u { + put([]); + } else { + let i = 0u; + while i < ln { + let elt = v[i]; + let rest = slice(v, 0u, i) + slice(v, i+1u, ln); + permute(rest) {|permutation| put([elt] + permutation)} + i += 1u; + } + } +} + +/* +Function: to_ptr + +FIXME: We don't need this wrapper +*/ +unsafe fn to_ptr<T>(v: [T]) -> *T { ret unsafe::to_ptr(v); } + +/* +Module: unsafe +*/ +mod unsafe { + type vec_repr = {mutable fill: uint, mutable alloc: uint, data: u8}; + + /* + Function: from_buf + + Constructs a vector from an unsafe pointer to a buffer + + Parameters: + + ptr - An unsafe pointer to a buffer of `T` + elts - The number of elements in the buffer + */ + unsafe fn from_buf<T>(ptr: *T, elts: uint) -> [T] { + ret rustrt::vec_from_buf_shared(sys::get_type_desc::<T>(), + ptr, elts); + } + + /* + Function: set_len + + Sets the length of a vector + + This well explicitly set the size of the vector, without actually + modifing its buffers, so it is up to the caller to ensure that + the vector is actually the specified size. + */ + unsafe fn set_len<T>(&v: [const T], new_len: uint) { + let repr: **vec_repr = ::unsafe::reinterpret_cast(addr_of(v)); + (**repr).fill = new_len * sys::size_of::<T>(); + } + + /* + Function: to_ptr + + Returns an unsafe pointer to the vector's buffer + + The caller must ensure that the vector outlives the pointer this + function returns, or else it will end up pointing to garbage. + + Modifying the vector may cause its buffer to be reallocated, which + would also make any pointers to it invalid. + */ + unsafe fn to_ptr<T>(v: [const T]) -> *T { + let repr: **vec_repr = ::unsafe::reinterpret_cast(addr_of(v)); + ret ::unsafe::reinterpret_cast(addr_of((**repr).data)); + } +} + +// Local Variables: +// mode: rust; +// fill-column: 78; +// indent-tabs-mode: nil +// c-basic-offset: 4 +// buffer-file-coding-system: utf-8-unix +// End: |
