diff options
| author | Marijn Haverbeke <marijnh@gmail.com> | 2012-01-19 12:51:20 +0100 |
|---|---|---|
| committer | Marijn Haverbeke <marijnh@gmail.com> | 2012-01-19 13:34:10 +0100 |
| commit | a4b77758f0e1b6b409e874e739fe7cf4609651ee (patch) | |
| tree | b9e698e7299b8535895c15e0b104718b2b2513aa | |
| parent | d699db699a07d5bb80d5d08508540b0aba6e1026 (diff) | |
| download | rust-a4b77758f0e1b6b409e874e739fe7cf4609651ee.tar.gz rust-a4b77758f0e1b6b409e874e739fe7cf4609651ee.zip | |
Move tutorial over to a format similar to the reference doc
And adjust highlighting/testing scripts to deal with this.
28 files changed, 2719 insertions, 4015 deletions
diff --git a/doc/extract-tests.js b/doc/extract-tests.js new file mode 100755 index 00000000000..88de3f563fb --- /dev/null +++ b/doc/extract-tests.js @@ -0,0 +1,44 @@ +#!/usr/local/bin/node + +/*** + * Script for extracting compilable fragments from markdown + * documentation. See prep.js for a description of the format + * recognized by this tool. Expects a directory fragements/ to exist + * under the current directory, and writes the fragments in there as + * individual .rs files. + */ + +var fs = require("fs"); + +if (!process.argv[2]) { + console.log("Please provide an input file name."); + process.exit(1); +} + +var lines = fs.readFileSync(process.argv[2]).toString().split(/\n\r?/g); +var cur = 0, line, chapter, chapter_n; + +while ((line = lines[cur++]) != null) { + var chap = line.match(/^# (.*)/); + if (chap) { + chapter = chap[1].toLowerCase().replace(/\W/g, "_"); + chapter_n = 1; + } else if (/^~~~/.test(line)) { + var block = "", ignore = false; + while ((line = lines[cur++]) != null) { + if (/^\s*## (?:notrust|ignore)/.test(line)) ignore = true; + else if (/^~~~/.test(line)) break; + else block += line.replace(/^# /, "") + "\n"; + } + if (!ignore) { + if (!/\bfn main\b/.test(block)) { + if (/(^|\n) *(native|use|mod|import|export)\b/.test(block)) + block += "\nfn main() {}\n"; + else block = "fn main() {\n" + block + "\n}\n"; + } + if (!/\buse std\b/.test(block)) block = "use std;\n" + block; + var filename = "fragments/" + chapter + "_" + (chapter_n++) + ".rs"; + fs.writeFileSync(filename, block); + } + } +} diff --git a/doc/tutorial/lib/codemirror-node.js b/doc/lib/codemirror-node.js index fac4c076d48..fac4c076d48 100644 --- a/doc/tutorial/lib/codemirror-node.js +++ b/doc/lib/codemirror-node.js diff --git a/doc/tutorial/lib/codemirror-rust.js b/doc/lib/codemirror-rust.js index 47fd7696a1d..47fd7696a1d 100644 --- a/doc/tutorial/lib/codemirror-rust.js +++ b/doc/lib/codemirror-rust.js diff --git a/doc/prep.js b/doc/prep.js new file mode 100755 index 00000000000..f8876b55020 --- /dev/null +++ b/doc/prep.js @@ -0,0 +1,67 @@ +#!/usr/local/bin/node + +/*** + * Pandoc-style markdown preprocessor that drops extra directives + * included for running doc code, and that optionally, when + * --highlight is provided, replaces code blocks that are Rust code + * with highlighted HTML blocks. The directives recognized are: + * + * '## ignore' tells the test extractor (extract-tests.js) to ignore + * the block completely. + * '## notrust' makes the test extractor ignore the block, makes + * this script not highlight the block. + * '# [any text]' is a line that is stripped out by this script, and + * converted to a normal line of code (without the leading #) by + * the test extractor. + */ + +var fs = require("fs"); +CodeMirror = require("./lib/codemirror-node"); +require("./lib/codemirror-rust"); + +function help() { + console.log("usage: " + process.argv[0] + " [--highlight] [-o outfile] [infile]"); + process.exit(1); +} + +var highlight = false, infile, outfile; + +for (var i = 2; i < process.argv.length; ++i) { + var arg = process.argv[i]; + if (arg == "--highlight") highlight = true; + else if (arg == "-o" && outfile == null && ++i < process.argv.length) outfile = process.argv[i]; + else if (arg[0] != "-") infile = arg; + else help(); +} + +var lines = fs.readFileSync(infile || "/dev/stdin").toString().split(/\n\r?/g), cur = 0, line; +var out = outfile ? fs.createWriteStream(outfile) : process.stdout; + +while ((line = lines[cur++]) != null) { + if (/^~~~/.test(line)) { + var block = "", bline, isRust = true; + while ((bline = lines[cur++]) != null) { + if (/^\s*## notrust/.test(bline)) isRust = false; + else if (/^~~~/.test(bline)) break; + if (!/^\s*##? /.test(bline)) block += bline + "\n"; + } + if (!highlight || !isRust) + out.write(line + "\n" + block + bline + "\n"); + else { + var html = '<pre class="cm-s-default">', curstr = "", curstyle = null; + function add(str, style) { + if (style != curstyle) { + if (curstyle) html += '<span class="cm-' + curstyle + '">' + curstr + + "</span>"; + else if (curstr) html += curstr; + curstr = str; curstyle = style; + } else curstr += str; + } + CodeMirror.runMode(block, "rust", add); + add("", "bogus"); // Flush pending string. + out.write(html + "</pre>\n"); + } + } else { + out.write(line + "\n"); + } +} diff --git a/doc/tutorial/test.sh b/doc/run-tests.sh index 9cdb1aa9f8e..a878570f4a9 100755 --- a/doc/tutorial/test.sh +++ b/doc/run-tests.sh @@ -1,7 +1,7 @@ #!/bin/bash rm -f fragments/*.rs mkdir -p fragments -node extract.js +node extract-tests.js $1 for F in `ls fragments/*.rs`; do $RUSTC $F > /dev/null if [[ $? != 0 ]] ; then echo $F; fi diff --git a/doc/rust.css b/doc/rust.css index 9e77ca43811..e7329802c4c 100644 --- a/doc/rust.css +++ b/doc/rust.css @@ -6,10 +6,14 @@ body { body { padding: 1em 6em; - max-width: 50em; + max-width: 60em; } -h1 { font-size: 22pt; } +h1 { + font-size: 22pt; + margin-top: 2em; + border-bottom: 2px solid silver; +} h2 { font-size: 17pt; } h3 { font-size: 14pt; } @@ -23,3 +27,27 @@ a, a:visited, a:link { text-decoration: none; color: #00438a; } + +h1 a:link, h1 a:visited, h2 a:link, h2 a:visited, +h3 a:link, h3 a:visited { color: black; } + +/* Code highlighting */ +.cm-s-default span.cm-keyword {color: #708;} +.cm-s-default span.cm-atom {color: #219;} +.cm-s-default span.cm-number {color: #164;} +.cm-s-default span.cm-def {color: #00f;} +.cm-s-default span.cm-variable {color: black;} +.cm-s-default span.cm-variable-2 {color: #05a;} +.cm-s-default span.cm-variable-3 {color: #085;} +.cm-s-default span.cm-property {color: black;} +.cm-s-default span.cm-operator {color: black;} +.cm-s-default span.cm-comment {color: #a50;} +.cm-s-default span.cm-string {color: #a11;} +.cm-s-default span.cm-string-2 {color: #f50;} +.cm-s-default span.cm-meta {color: #555;} +.cm-s-default span.cm-error {color: #f00;} +.cm-s-default span.cm-qualifier {color: #555;} +.cm-s-default span.cm-builtin {color: #30a;} +.cm-s-default span.cm-bracket {color: #cc7;} +.cm-s-default span.cm-tag {color: #170;} +.cm-s-default span.cm-attribute {color: #00c;} diff --git a/doc/tutorial.md b/doc/tutorial.md new file mode 100644 index 00000000000..e7aa12456e6 --- /dev/null +++ b/doc/tutorial.md @@ -0,0 +1,2556 @@ +% Rust Language Tutorial + +# Introduction + +## Scope + +This is a tutorial for the Rust programming language. It assumes the +reader is familiar with the basic concepts of programming, and has +programmed in one or more other languages before. The tutorial covers +the whole language, though not with the depth and precision of the +[language reference][1]. + +[1]: http://www.rust-lang.org/doc/rust.html + +## Disclaimer + +Rust is a language under development. The general flavor of the +language has settled, but details will continue to change as it is +further refined. Nothing in this tutorial is final, and though we try +to keep it updated, it is possible that the text occasionally does not +reflect the actual state of the language. + +## First Impressions + +Though syntax is something you get used to, an initial encounter with +a language can be made easier if the notation looks familiar. Rust is +a curly-brace language in the tradition of C, C++, and JavaScript. + +~~~~ +fn fac(n: int) -> int { + let result = 1, i = 1; + while i <= n { + result *= i; + i += 1; + } + ret result; +} +~~~~ + +Several differences from C stand out. Types do not come before, but +after variable names (preceded by a colon). In local variables +(introduced with `let`), they are optional, and will be inferred when +left off. Constructs like `while` and `if` do not require parenthesis +around the condition (though they allow them). Also, there's a +tendency towards aggressive abbreviation in the keywords—`fn` for +function, `ret` for return. + +You should, however, not conclude that Rust is simply an evolution of +C. As will become clear in the rest of this tutorial, it goes in +quite a different direction. + +## Conventions + +Throughout the tutorial, words that indicate language keywords or +identifiers defined in the example code are displayed in `code font`. + +Code snippets are indented, and also shown in a monospace font. Not +all snippets constitute whole programs. For brevity, we'll often show +fragments of programs that don't compile on their own. To try them +out, you might have to wrap them in `fn main() { ... }`, and make sure +they don't contain references to things that aren't actually defined. + +# Getting started + +## Installation + +FIXME Fill this in when the installation package is finished. + +## Compiling your first program + +Rust program files are, by convention, given the extension `.rs`. Say +we have a file `hello.rs` containing this program: + +~~~~ +use std; +fn main(args: [str]) { + std::io::println("hello world from '" + args[0] + "'!"); +} +~~~~ + +If the Rust compiler was installed successfully, running `rustc +hello.rs` will produce a binary called `hello` (or `hello.exe`). + +If you modify the program to make it invalid (for example, remove the +`use std` line), and then compile it, you'll see an error message like +this: + +~~~~ +## notrust +hello.rs:2:4: 2:20 error: unresolved modulename: std +hello.rs:2 std::io::println("hello world!"); + ^~~~~~~~~~~~~~~~ +~~~~ + +The Rust compiler tries to provide useful information when it runs +into an error. + +## Anatomy of a Rust program + +In its simplest form, a Rust program is simply a `.rs` file with some +types and functions defined in it. If it has a `main` function, it can +be compiled to an executable. Rust does not allow code that's not a +declaration to appear at the top level of the file—all statements must +live inside a function. + +Rust programs can also be compiled as libraries, and included in other +programs. The `use std` directive that appears at the top of a lot of +examples imports the [standard library][std]. This is described in more +detail [later on](mod.html). + +[std]: http://doc.rust-lang.org/doc/std/index/General.html + +## Editing Rust code + +There are Vim highlighting and indentation scrips in the Rust source +distribution under `src/etc/vim/`, and an emacs mode under +`src/etc/emacs/`. + +[rust-mode]: https://github.com/marijnh/rust-mode + +Other editors are not provided for yet. If you end up writing a Rust +mode for your favorite editor, let us know so that we can link to it. + +# Syntax Basics + +## Braces + +Assuming you've programmed in any C-family language (C++, Java, +JavaScript, C#, or PHP), Rust will feel familiar. The main surface +difference to be aware of is that the bodies of `if` statements and of +loops *have* to be wrapped in brackets. Single-statement, bracket-less +bodies are not allowed. + +If the verbosity of that bothers you, consider the fact that this +allows you to omit the parentheses around the condition in `if`, +`while`, and similar constructs. This will save you two characters +every time. As a bonus, you no longer have to spend any mental energy +on deciding whether you need to add braces or not, or on adding them +after the fact when adding a statement to an `if` branch. + +Accounting for these differences, the surface syntax of Rust +statements and expressions is C-like. Function calls are written +`myfunc(arg1, arg2)`, operators have mostly the same name and +precedence that they have in C, comments look the same, and constructs +like `if` and `while` are available: + +~~~~ +# fn call_a_function(_a: int) {} +fn main() { + if 1 < 2 { + while false { call_a_function(10 * 4); } + } else if 4 < 3 || 3 < 4 { + // Comments are C++-style too + } else { + /* Multi-line comment syntax */ + } +} +~~~~ + +## Expression syntax + +Though it isn't apparent in all code, there is a fundamental +difference between Rust's syntax and the predecessors in this family +of languages. A lot of thing that are statements in C are expressions +in Rust. This allows for useless things like this (which passes +nil—the void type—to a function): + +~~~~ +# fn a_function(_a: ()) {} +a_function(while false {}); +~~~~ + +But also useful things like this: + +~~~~ +# fn the_stars_align() -> bool { false } +# fn something_else() -> bool { true } +let x = if the_stars_align() { 4 } + else if something_else() { 3 } + else { 0 }; +~~~~ + +This piece of code will bind the variable `x` to a value depending on +the conditions. Note the condition bodies, which look like `{ +expression }`. The lack of a semicolon after the last statement in a +braced block gives the whole block the value of that last expression. +If the branches of the `if` had looked like `{ 4; }`, the above +example would simply assign nil (void) to `x`. But without the +semicolon, each branch has a different value, and `x` gets the value +of the branch that was taken. + +This also works for function bodies. This function returns a boolean: + +~~~~ +fn is_four(x: int) -> bool { x == 4 } +~~~~ + +In short, everything that's not a declaration (`let` for variables, +`fn` for functions, etcetera) is an expression. + +If all those things are expressions, you might conclude that you have +to add a terminating semicolon after *every* statement, even ones that +are not traditionally terminated with a semicolon in C (like `while`). +That is not the case, though. Expressions that end in a block only +need a semicolon if that block contains a trailing expression. `while` +loops do not allow trailing expressions, and `if` statements tend to +only have a trailing expression when you want to use their value for +something—in which case you'll have embedded it in a bigger statement, +like the `let x = ...` example above. + +## Identifiers + +Rust identifiers must start with an alphabetic character or an +underscore, and after that may contain any alphanumeric character, and +more underscores. + +NOTE: The parser doesn't currently recognize non-ascii alphabetic +characters. This is a bug that will eventually be fixed. + +The double-colon (`::`) is used as a module separator, so +`std::io::println` means 'the thing named `println` in the module +named `io` in the module named `std`'. + +Rust will normally emit warning about unused variables. These can be +suppressed by using a variable name that starts with an underscore. + +~~~~ +fn this_warns(x: int) {} +fn this_doesnt(_x: int) {} +~~~~ + +## Variable declaration + +The `let` keyword, as we've seen, introduces a local variable. Global +constants can be defined with `const`: + +~~~~ +use std; +const repeat: uint = 5u; +fn main() { + let count = 0u; + while count < repeat { + std::io::println("Hi!"); + count += 1u; + } +} +~~~~ + +## Types + +The `-> bool` in the `is_four` example is the way a function's return +type is written. For functions that do not return a meaningful value +(these conceptually return nil in Rust), you can optionally say `-> +()` (`()` is how nil is written), but usually the return annotation is +simply left off, as in the `fn main() { ... }` examples we've seen +earlier. + +Every argument to a function must have its type declared (for example, +`x: int`). Inside the function, type inference will be able to +automatically deduce the type of most locals (generic functions, which +we'll come back to later, will occasionally need additional +annotation). Locals can be written either with or without a type +annotation: + +~~~~ +// The type of this vector will be inferred based on its use. +let x = []; +# x = [3]; +// Explicitly say this is a vector of integers. +let y: [int] = []; +~~~~ + +The basic types are written like this: + +`()` + : Nil, the type that has only a single value. + +`bool` + : Boolean type, with values `true` and `false`. + +`int` + : A machine-pointer-sized integer. + +`uint` + : A machine-pointer-sized unsigned integer. + +`i8`, `i16`, `i32`, `i64` + : Signed integers with a specific size (in bits). + +`u8`, `u16`, `u32`, `u64` + : Unsigned integers with a specific size. + +`f32`, `f64` + : Floating-point types. + +`float` + : The largest floating-point type efficiently supported on the target machine. + +`char` + : A character is a 32-bit Unicode code point. + +`str` + : String type. A string contains a utf-8 encoded sequence of characters. + +These can be combined in composite types, which will be described in +more detail later on (the `T`s here stand for any other type): + +`[T]` + : Vector type. + +`[mutable T]` + : Mutable vector type. + +`(T1, T2)` + : Tuple type. Any arity above 1 is supported. + +`{field1: T1, field2: T2}` + : Record type. + +`fn(arg1: T1, arg2: T2) -> T3`, `lambda()`, `block()` + : Function types. + +`@T`, `~T`, `*T` + : Pointer types. + +Types can be given names with `type` declarations: + +~~~~ +type monster_size = uint; +~~~~ + +This will provide a synonym, `monster_size`, for unsigned integers. It +will not actually create a new type—`monster_size` and `uint` can be +used interchangeably, and using one where the other is expected is not +a type error. Read about [single-variant enums][sve] further on if you +need to create a type name that's not just a synonym. + +[sve]: data.html#single_variant_enum + +## Literals + +Integers can be written in decimal (`144`), hexadecimal (`0x90`), and +binary (`0b10010000`) base. Without suffix, an integer literal is +considered to be of type `int`. Add a `u` (`144u`) to make it a `uint` +instead. Literals of the fixed-size integer types can be created by +the literal with the type name (`255u8`, `50i64`, etc). + +Note that, in Rust, no implicit conversion between integer types +happens. If you are adding one to a variable of type `uint`, you must +type `v += 1u`—saying `+= 1` will give you a type error. + +Floating point numbers are written `0.0`, `1e6`, or `2.1e-4`. Without +suffix, the literal is assumed to be of type `float`. Suffixes `f32` +and `f64` can be used to create literals of a specific type. The +suffix `f` can be used to write `float` literals without a dot or +exponent: `3f`. + +The nil literal is written just like the type: `()`. The keywords +`true` and `false` produce the boolean literals. + +Character literals are written between single quotes, as in `'x'`. You +may put non-ascii characters between single quotes (your source files +should be encoded as utf-8). Rust understands a number of +character escapes, using the backslash character: + +`\n` + : A newline (unicode character 32). + +`\r` + : A carriage return (13). + +`\t` + : A tab character (9). + +`\\`, `\'`, `\"` + : Simply escapes the following character. + +`\xHH`, `\uHHHH`, `\UHHHHHHHH` + : Unicode escapes, where the `H` characters are the hexadecimal digits that + form the character code. + +String literals allow the same escape sequences. They are written +between double quotes (`"hello"`). Rust strings may contain newlines. +When a newline is preceded by a backslash, it, and all white space +following it, will not appear in the resulting string literal. So +this is equivalent to `"abc"`: + +~~~~ +let s = "a\ + b\ + c"; +~~~~ + +## Operators + +Rust's set of operators contains very few surprises. The main +difference with C is that `++` and `--` are missing, and that the +logical binary operators have higher precedence—in C, `x & 2 > 0` +comes out as `x & (2 > 0)`, in Rust, it means `(x & 2) > 0`, which is +more likely to be what you expect (unless you are a C veteran). + +Thus, binary arithmetic is done with `*`, `/`, `%`, `+`, and `-` +(multiply, divide, remainder, plus, minus). `-` is also a unary prefix +operator (there are no unary postfix operators in Rust) that does +negation. + +Binary shifting is done with `>>` (shift right), `>>>` (arithmetic +shift right), and `<<` (shift left). Logical bitwise operators are +`&`, `|`, and `^` (and, or, and exclusive or), and unary `!` for +bitwise negation (or boolean negation when applied to a boolean +value). + +The comparison operators are the traditional `==`, `!=`, `<`, `>`, +`<=`, and `>=`. Short-circuiting (lazy) boolean operators are written +`&&` (and) and `||` (or). + +Rust has a ternary conditional operator `?:`, as in: + +~~~~ +let badness = 12; +let message = badness < 10 ? "error" : "FATAL ERROR"; +~~~~ + +For type casting, Rust uses the binary `as` operator, which has a +precedence between the bitwise combination operators (`&`, `|`, `^`) +and the comparison operators. It takes an expression on the left side, +and a type on the right side, and will, if a meaningful conversion +exists, convert the result of the expression to the given type. + +~~~~ +let x: float = 4.0; +let y: uint = x as uint; +assert y == 4u; +~~~~ + +## Attributes + +<a name="conditional"></a> + +Every definition can be annotated with attributes. Attributes are meta +information that can serve a variety of purposes. One of those is +conditional compilation: + +~~~~ +#[cfg(target_os = "win32")] +fn register_win_service() { /* ... */ } +~~~~ + +This will cause the function to vanish without a trace during +compilation on a non-Windows platform, much like `#ifdef` in C (it +allows `cfg(flag=value)` and `cfg(flag)` forms, where the second +simply checks whether the configuration flag is defined at all). Flags +for `target_os` and `target_arch` are set by the compiler. It is +possible to set additional flags with the `--cfg` command-line option. + +Attributes are always wrapped in hash-braces (`#[attr]`). Inside the +braces, a small minilanguage is supported, whose interpretation +depends on the attribute that's being used. The simplest form is a +plain name (as in `#[test]`, which is used by the [built-in test +framework](test.html '')). A name-value pair can be provided using an `=` +character followed by a literal (as in `#[license = "BSD"]`, which is +a valid way to annotate a Rust program as being released under a +BSD-style license). Finally, you can have a name followed by a +comma-separated list of nested attributes, as in the `cfg` example +above, or in this [crate](mod.html) metadata declaration: + +~~~~ +## ignore +#[link(name = "std", + vers = "0.1", + url = "http://rust-lang.org/src/std")]; +~~~~ + +An attribute without a semicolon following it applies to the +definition that follows it. When terminated with a semicolon, it +applies to the module or crate in which it appears. + +## Syntax extensions + +There are plans to support user-defined syntax (macros) in Rust. This +currently only exists in very limited form. + +The compiler defines a few built-in syntax extensions. The most useful +one is `#fmt`, a printf-style text formatting macro that is expanded +at compile time. + +~~~~ +std::io::println(#fmt("%s is %d", "the answer", 42)); +~~~~ + +`#fmt` supports most of the directives that [printf][pf] supports, but +will give you a compile-time error when the types of the directives +don't match the types of the arguments. + +[pf]: http://en.cppreference.com/w/cpp/io/c/fprintf + +All syntax extensions look like `#word`. Another built-in one is +`#env`, which will look up its argument as an environment variable at +compile-time. + +~~~~ +std::io::println(#env("PATH")); +~~~~ +# Control structures + +## Conditionals + +We've seen `if` pass by a few times already. To recap, braces are +compulsory, an optional `else` clause can be appended, and multiple +`if`/`else` constructs can be chained together: + +~~~~ +if false { + std::io::println("that's odd"); +} else if true { + std::io::println("right"); +} else { + std::io::println("neither true nor false"); +} +~~~~ + +The condition given to an `if` construct *must* be of type boolean (no +implicit conversion happens). If the arms return a value, this value +must be of the same type for every arm in which control reaches the +end of the block: + +~~~~ +fn signum(x: int) -> int { + if x < 0 { -1 } + else if x > 0 { 1 } + else { ret 0; } +} +~~~~ + +The `ret` (return) and its semicolon could have been left out without +changing the meaning of this function, but it illustrates that you +will not get a type error in this case, although the last arm doesn't +have type `int`, because control doesn't reach the end of that arm +(`ret` is jumping out of the function). + +## Pattern matching + +Rust's `alt` construct is a generalized, cleaned-up version of C's +`switch` construct. You provide it with a value and a number of arms, +each labelled with a pattern, and it will execute the arm that matches +the value. + +~~~~ +# let my_number = 1; +alt my_number { + 0 { std::io::println("zero"); } + 1 | 2 { std::io::println("one or two"); } + 3 to 10 { std::io::println("three to ten"); } + _ { std::io::println("something else"); } +} +~~~~ + +There is no 'falling through' between arms, as in C—only one arm is +executed, and it doesn't have to explicitly `break` out of the +construct when it is finished. + +The part to the left of each arm is called the pattern. Literals are +valid patterns, and will match only their own value. The pipe operator +(`|`) can be used to assign multiple patterns to a single arm. Ranges +of numeric literal patterns can be expressed with `to`. The underscore +(`_`) is a wildcard pattern that matches everything. + +If the arm with the wildcard pattern was left off in the above +example, running it on a number greater than ten (or negative) would +cause a run-time failure. When no arm matches, `alt` constructs do not +silently fall through—they blow up instead. + +A powerful application of pattern matching is *destructuring*, where +you use the matching to get at the contents of data types. Remember +that `(float, float)` is a tuple of two floats: + +~~~~ +fn angle(vec: (float, float)) -> float { + alt vec { + (0f, y) if y < 0f { 1.5 * float::consts::pi } + (0f, y) { 0.5 * float::consts::pi } + (x, y) { float::atan(y / x) } + } +} +~~~~ + +A variable name in a pattern matches everything, *and* binds that name +to the value of the matched thing inside of the arm block. Thus, `(0f, +y)` matches any tuple whose first element is zero, and binds `y` to +the second element. `(x, y)` matches any tuple, and binds both +elements to a variable. + +Any `alt` arm can have a guard clause (written `if EXPR`), which is +an expression of type `bool` that determines, after the pattern is +found to match, whether the arm is taken or not. The variables bound +by the pattern are available in this guard expression. + +## Destructuring let + +To a limited extent, it is possible to use destructuring patterns when +declaring a variable with `let`. For example, you can say this to +extract the fields from a tuple: + +~~~~ +# fn get_tuple_of_two_ints() -> (int, int) { (1, 1) } +let (a, b) = get_tuple_of_two_ints(); +~~~~ + +This will introduce two new variables, `a` and `b`, bound to the +content of the tuple. + +You may only use irrevocable patterns—patterns that can never fail to +match—in let bindings, though. Things like literals, which only match +a specific value, are not allowed. + +## Loops + +`while` produces a loop that runs as long as its given condition +(which must have type `bool`) evaluates to true. Inside a loop, the +keyword `break` can be used to abort the loop, and `cont` can be used +to abort the current iteration and continue with the next. + +~~~~ +let x = 5; +while true { + x += x - 3; + if x % 5 == 0 { break; } + std::io::println(int::str(x)); +} +~~~~ + +This code prints out a weird sequence of numbers and stops as soon as +it finds one that can be divided by five. + +There's also `while`'s ugly cousin, `do`/`while`, which does not check +its condition on the first iteration, using traditional syntax: + +~~~~ +# fn eat_cake() {} +# fn any_cake_left() -> bool { false } +do { + eat_cake(); +} while any_cake_left(); +~~~~ + +When iterating over a vector, use `for` instead. + +~~~~ +for elt in ["red", "green", "blue"] { + std::io::println(elt); +} +~~~~ + +This will go over each element in the given vector (a three-element +vector of strings, in this case), and repeatedly execute the body with +`elt` bound to the current element. You may add an optional type +declaration (`elt: str`) for the iteration variable if you want. + +For more involved iteration, such as going over the elements of a hash +table, Rust uses higher-order functions. We'll come back to those in a +moment. + +## Failure + +The `fail` keyword causes the current [task][tasks] to fail. You use +it to indicate unexpected failure, much like you'd use `exit(1)` in a +C program, except that in Rust, it is possible for other tasks to +handle the failure, allowing the program to continue running. + +`fail` takes an optional argument, which must have type `str`. Trying +to access a vector out of bounds, or running a pattern match with no +matching clauses, both result in the equivalent of a `fail`. + +[tasks]: task.html + +## Logging + +Rust has a built-in logging mechanism, using the `log` statement. +Logging is polymorphic—any type of value can be logged, and the +runtime will do its best to output a textual representation of the +value. + +~~~~ +log(warn, "hi"); +log(error, (1, [2.5, -1.8])); +~~~~ + +The first argument is the log level (levels `info`, `warn`, and +`error` are predefined), and the second is the value to log. By +default, you *will not* see the output of that first log statement, +which has `warn` level. The environment variable `RUST_LOG` controls +which log level is used. It can contain a comma-separated list of +paths for modules that should be logged. For example, running `rustc` +with `RUST_LOG=rustc::front::attr` will turn on logging in its +attribute parser. If you compile a program named `foo.rs`, its +top-level module will be called `foo`, and you can set `RUST_LOG` to +`foo` to enable `warn` and `info` logging for the module. + +Turned-off `log` statements impose minimal overhead on the code that +contains them, so except in code that needs to be really, really fast, +you should feel free to scatter around debug logging statements, and +leave them in. + +Three macros that combine text-formatting (as with `#fmt`) and logging +are available. These take a string and any number of format arguments, +and will log the formatted string: + +~~~~ +# fn get_error_string() -> str { "boo" } +#warn("only %d seconds remaining", 10); +#error("fatal: %s", get_error_string()); +~~~~ + +## Assertions + +The keyword `assert`, followed by an expression with boolean type, +will check that the given expression results in `true`, and cause a +failure otherwise. It is typically used to double-check things that +*should* hold at a certain point in a program. + +~~~~ +let x = 100; +while (x > 10) { x -= 10; } +assert x == 10; +~~~~ + +# Functions + +Functions (like all other static declarations, such as `type`) can be +declared both at the top level and inside other functions (or modules, +which we'll come back to in moment). + +The `ret` keyword immediately returns from a function. It is +optionally followed by an expression to return. In functions that +return `()`, the returned expression can be left off. A function can +also return a value by having its top level block produce an +expression (by omitting the final semicolon). + +Some functions (such as the C function `exit`) never return normally. +In Rust, these are annotated with the pseudo-return type '`!`': + +~~~~ +fn dead_end() -> ! { fail; } +~~~~ + +This helps the compiler avoid spurious error messages. For example, +the following code would be a type error if `dead_end` would be +expected to return. + +~~~~ +# fn can_go_left() -> bool { true } +# fn can_go_right() -> bool { true } +# enum dir { left; right; } +# fn dead_end() -> ! { fail; } +let dir = if can_go_left() { left } + else if can_go_right() { right } + else { dead_end(); }; +~~~~ + +## Closures + +Named functions, like those in the previous section, do not close over +their environment. Rust also includes support for closures, which are +functions that can access variables in the scope in which they are +created. + +There are several forms of closures, each with its own role. The most +common type is called a 'block', this is a closure which has full +access to its environment. + +~~~~ +fn call_block_with_ten(b: block(int)) { b(10); } + +let x = 20; +call_block_with_ten({|arg| + #info("x=%d, arg=%d", x, arg); +}); +~~~~ + +This defines a function that accepts a block, and then calls it with a +simple block that executes a log statement, accessing both its +argument and the variable `x` from its environment. + +Blocks can only be used in a restricted way, because it is not allowed +to survive the scope in which it was created. They are allowed to +appear in function argument position and in call position, but nowhere +else. + +### Boxed closures + +When you need to store a closure in a data structure, a block will not +do, since the compiler will refuse to let you store it. For this +purpose, Rust provides a type of closure that has an arbitrary +lifetime, written `fn@` (boxed closure, analogous to the `@` pointer +type described in the next section). + +A boxed closure does not directly access its environment, but merely +copies out the values that it closes over into a private data +structure. This means that it can not assign to these variables, and +will not 'see' updates to them. + +This code creates a closure that adds a given string to its argument, +returns it from a function, and then calls it: + +~~~~ +use std; + +fn mk_appender(suffix: str) -> fn@(str) -> str { + let f = fn@(s: str) -> str { s + suffix }; + ret f; +} + +fn main() { + let shout = mk_appender("!"); + std::io::println(shout("hey ho, let's go")); +} +~~~~ + +### Closure compatibility + +A nice property of Rust closures is that you can pass any kind of +closure (as long as the arguments and return types match) to functions +that expect a `block`. Thus, when writing a higher-order function that +wants to do nothing with its function argument beyond calling it, you +should almost always specify the type of that argument as `block`, so +that callers have the flexibility to pass whatever they want. + +~~~~ +fn call_twice(f: block()) { f(); f(); } +call_twice({|| "I am a block"; }); +call_twice(fn@() { "I am a boxed closure"; }); +fn bare_function() { "I am a plain function"; } +call_twice(bare_function); +~~~~ + +### Unique closures + +<a name="unique"></a> + +Unique closures, written `fn~` in analogy to the `~` pointer type (see +next section), hold on to things that can safely be sent between +processes. They copy the values they close over, much like boxed +closures, but they also 'own' them—meaning no other code can access +them. Unique closures mostly exist to for spawning new +[tasks](task.html). + +### Shorthand syntax + +The compact syntax used for blocks (`{|arg1, arg2| body}`) can also +be used to express boxed and unique closures in situations where the +closure style can be unambiguously derived from the context. Most +notably, when calling a higher-order function you do not have to use +the long-hand syntax for the function you're passing, since the +compiler can look at the argument type to find out what the parameter +types are. + +As a further simplification, if the final parameter to a function is a +closure, the closure need not be placed within parenthesis. You could, +for example, write... + +~~~~ +let doubled = vec::map([1, 2, 3]) {|x| x*2}; +~~~~ + +`vec::map` is a function in the core library that applies its last +argument to every element of a vector, producing a new vector. + +Even when a closure takes no parameters, you must still write the bars +for the parameter list, as in `{|| ...}`. + +## Binding + +Partial application is done using the `bind` keyword in Rust. + +~~~~ +let daynum = bind vec::position(_, ["mo", "tu", "we", "do", + "fr", "sa", "su"]); +~~~~ + +Binding a function produces a boxed closure (`fn@` type) in which some +of the arguments to the bound function have already been provided. +`daynum` will be a function taking a single string argument, and +returning the day of the week that string corresponds to (if any). + +## Iteration + +Functions taking blocks provide a good way to define non-trivial +iteration constructs. For example, this one iterates over a vector +of integers backwards: + +~~~~ +fn for_rev(v: [int], act: block(int)) { + let i = vec::len(v); + while (i > 0u) { + i -= 1u; + act(v[i]); + } +} +~~~~ + +To run such an iteration, you could do this: + +~~~~ +# fn for_rev(v: [int], act: block(int)) {} +for_rev([1, 2, 3], {|n| log(error, n); }); +~~~~ + +Making use of the shorthand where a final closure argument can be +moved outside of the parentheses permits the following, which +looks quite like a normal loop: + +~~~~ +# fn for_rev(v: [int], act: block(int)) {} +for_rev([1, 2, 3]) {|n| + log(error, n); +} +~~~~ + +Note that, because `for_rev()` returns unit type, no semicolon is +needed when the final closure is pulled outside of the parentheses. + +# Datatypes + +Rust datatypes are, by default, immutable. The core datatypes of Rust +are structural records and 'enums' (tagged unions, algebraic data +types). + +~~~~ +type point = {x: float, y: float}; +enum shape { + circle(point, float); + rectangle(point, point); +} +let my_shape = circle({x: 0.0, y: 0.0}, 10.0); +~~~~ + +## Records + +Rust record types are written `{field1: TYPE, field2: TYPE [, ...]}`, +and record literals are written in the same way, but with expressions +instead of types. They are quite similar to C structs, and even laid +out the same way in memory (so you can read from a Rust struct in C, +and vice-versa). + +The dot operator is used to access record fields (`mypoint.x`). + +Fields that you want to mutate must be explicitly marked as such. For +example... + +~~~~ +type stack = {content: [int], mutable head: uint}; +~~~~ + +With such a type, you can do `mystack.head += 1u`. If `mutable` were +omitted from the type, such an assignment would result in a type +error. + +To 'update' an immutable record, you use functional record update +syntax, by ending a record literal with the keyword `with`: + +~~~~ +let oldpoint = {x: 10f, y: 20f}; +let newpoint = {x: 0f with oldpoint}; +assert newpoint == {x: 0f, y: 20f}; +~~~~ + +This will create a new struct, copying all the fields from `oldpoint` +into it, except for the ones that are explicitly set in the literal. + +Rust record types are *structural*. This means that `{x: float, y: +float}` is not just a way to define a new type, but is the actual name +of the type. Record types can be used without first defining them. If +module A defines `type point = {x: float, y: float}`, and module B, +without knowing anything about A, defines a function that returns an +`{x: float, y: float}`, you can use that return value as a `point` in +module A. (Remember that `type` defines an additional name for a type, +not an actual new type.) + +## Record patterns + +Records can be destructured on in `alt` patterns. The basic syntax is +`{fieldname: pattern, ...}`, but the pattern for a field can be +omitted as a shorthand for simply binding the variable with the same +name as the field. + +~~~~ +# let mypoint = {x: 0f, y: 0f}; +alt mypoint { + {x: 0f, y: y_name} { /* Provide sub-patterns for fields */ } + {x, y} { /* Simply bind the fields */ } +} +~~~~ + +The field names of a record do not have to appear in a pattern in the +same order they appear in the type. When you are not interested in all +the fields of a record, a record pattern may end with `, _` (as in +`{field1, _}`) to indicate that you're ignoring all other fields. + +## Enums + +Enums are datatypes that have several different representations. For +example, the type shown earlier: + +~~~~ +# type point = {x: float, y: float}; +enum shape { + circle(point, float); + rectangle(point, point); +} +~~~~ + +A value of this type is either a circle¸ in which case it contains a +point record and a float, or a rectangle, in which case it contains +two point records. The run-time representation of such a value +includes an identifier of the actual form that it holds, much like the +'tagged union' pattern in C, but with better ergonomics. + +The above declaration will define a type `shape` that can be used to +refer to such shapes, and two functions, `circle` and `rectangle`, +which can be used to construct values of the type (taking arguments of +the specified types). So `circle({x: 0f, y: 0f}, 10f)` is the way to +create a new circle. + +Enum variants do not have to have parameters. This, for example, is +equivalent to a C enum: + +~~~~ +enum direction { + north; + east; + south; + west; +} +~~~~ + +This will define `north`, `east`, `south`, and `west` as constants, +all of which have type `direction`. + +When the enum is C like, that is none of the variants have parameters, +it is possible to explicitly set the discriminator values to an integer +value: + +~~~~ +enum color { + red = 0xff0000; + green = 0x00ff00; + blue = 0x0000ff; +} +~~~~ + +If an explicit discriminator is not specified for a variant, the value +defaults to the value of the previous variant plus one. If the first +variant does not have a discriminator, it defaults to 0. For example, +the value of `north` is 0, `east` is 1, etc. + +When an enum is C-like the `as` cast operator can be used to get the +discriminator's value. + +<a name="single_variant_enum"></a> + +There is a special case for enums with a single variant. These are +used to define new types in such a way that the new name is not just a +synonym for an existing type, but its own distinct type. If you say: + +~~~~ +enum gizmo_id = int; +~~~~ + +That is a shorthand for this: + +~~~~ +enum gizmo_id { gizmo_id(int); } +~~~~ + +Enum types like this can have their content extracted with the +dereference (`*`) unary operator: + +~~~~ +# enum gizmo_id = int; +let my_gizmo_id = gizmo_id(10); +let id_int: int = *my_gizmo_id; +~~~~ + +## Enum patterns + +For enum types with multiple variants, destructuring is the only way to +get at their contents. All variant constructors can be used as +patterns, as in this definition of `area`: + +~~~~ +# type point = {x: float, y: float}; +# enum shape { circle(point, float); rectangle(point, point); } +fn area(sh: shape) -> float { + alt sh { + circle(_, size) { float::consts::pi * size * size } + rectangle({x, y}, {x: x2, y: y2}) { (x2 - x) * (y2 - y) } + } +} +~~~~ + +Another example, matching nullary enum variants: + +~~~~ +# type point = {x: float, y: float}; +# enum direction { north; east; south; west; } +fn point_from_direction(dir: direction) -> point { + alt dir { + north { {x: 0f, y: 1f} } + east { {x: 1f, y: 0f} } + south { {x: 0f, y: -1f} } + west { {x: -1f, y: 0f} } + } +} +~~~~ + +## Tuples + +Tuples in Rust behave exactly like records, except that their fields +do not have names (and can thus not be accessed with dot notation). +Tuples can have any arity except for 0 or 1 (though you may see nil, +`()`, as the empty tuple if you like). + +~~~~ +let mytup: (int, int, float) = (10, 20, 30.0); +alt mytup { + (a, b, c) { log(info, a + b + (c as int)); } +} +~~~~ + +## Pointers + +In contrast to a lot of modern languages, record and enum types in +Rust are not represented as pointers to allocated memory. They are, +like in C and C++, represented directly. This means that if you `let x += {x: 1f, y: 1f};`, you are creating a record on the stack. If you +then copy it into a data structure, the whole record is copied, not +just a pointer. + +For small records like `point`, this is usually more efficient than +allocating memory and going through a pointer. But for big records, or +records with mutable fields, it can be useful to have a single copy on +the heap, and refer to that through a pointer. + +Rust supports several types of pointers. The simplest is the unsafe +pointer, written `*TYPE`, which is a completely unchecked pointer +type only used in unsafe code (and thus, in typical Rust code, very +rarely). The safe pointer types are `@TYPE` for shared, +reference-counted boxes, and `~TYPE`, for uniquely-owned pointers. + +All pointer types can be dereferenced with the `*` unary operator. + +### Shared boxes + +<a name="shared-box"></a> + +Shared boxes are pointers to heap-allocated, reference counted memory. +A cycle collector ensures that circular references do not result in +memory leaks. + +Creating a shared box is done by simply applying the unary `@` +operator to an expression. The result of the expression will be boxed, +resulting in a box of the right type. For example: + +~~~~ +let x = @10; // New box, refcount of 1 +let y = x; // Copy the pointer, increase refcount +// When x and y go out of scope, refcount goes to 0, box is freed +~~~~ + +NOTE: We may in the future switch to garbage collection, rather than +reference counting, for shared boxes. + +Shared boxes never cross task boundaries. + +### Unique boxes + +<a name="unique-box"></a> + +In contrast to shared boxes, unique boxes are not reference counted. +Instead, it is statically guaranteed that only a single owner of the +box exists at any time. + +~~~~ +let x = ~10; +let y <- x; +~~~~ + +This is where the 'move' (`<-`) operator comes in. It is similar to +`=`, but it de-initializes its source. Thus, the unique box can move +from `x` to `y`, without violating the constraint that it only has a +single owner (if you used assignment instead of the move operator, the +box would, in principle, be copied). + +Unique boxes, when they do not contain any shared boxes, can be sent +to other tasks. The sending task will give up ownership of the box, +and won't be able to access it afterwards. The receiving task will +become the sole owner of the box. + +### Mutability + +All pointer types have a mutable variant, written `@mutable TYPE` or +`~mutable TYPE`. Given such a pointer, you can write to its contents +by combining the dereference operator with a mutating action. + +~~~~ +fn increase_contents(pt: @mutable int) { + *pt += 1; +} +~~~~ + +## Vectors + +Rust vectors are always heap-allocated and unique. A value of type +`[TYPE]` is represented by a pointer to a section of heap memory +containing any number of `TYPE` values. + +NOTE: This uniqueness is turning out to be quite awkward in practice, +and might change in the future. + +Vector literals are enclosed in square brackets. Dereferencing is done +with square brackets (zero-based): + +~~~~ +let myvec = [true, false, true, false]; +if myvec[1] { std::io::println("boom"); } +~~~~ + +By default, vectors are immutable—you can not replace their elements. +The type written as `[mutable TYPE]` is a vector with mutable +elements. Mutable vector literals are written `[mutable]` (empty) or +`[mutable 1, 2, 3]` (with elements). + +The `+` operator means concatenation when applied to vector types. +Growing a vector in Rust is not as inefficient as it looks : + +~~~~ +let myvec = [], i = 0; +while i < 100 { + myvec += [i]; + i += 1; +} +~~~~ + +Because a vector is unique, replacing it with a longer one (which is +what `+= [i]` does) is indistinguishable from appending to it +in-place. Vector representations are optimized to grow +logarithmically, so the above code generates about the same amount of +copying and reallocation as `push` implementations in most other +languages. + +## Strings + +The `str` type in Rust is represented exactly the same way as a vector +of bytes (`[u8]`), except that it is guaranteed to have a trailing +null byte (for interoperability with C APIs). + +This sequence of bytes is interpreted as an UTF-8 encoded sequence of +characters. This has the advantage that UTF-8 encoded I/O (which +should really be the default for modern systems) is very fast, and +that strings have, for most intents and purposes, a nicely compact +representation. It has the disadvantage that you only get +constant-time access by byte, not by character. + +A lot of algorithms don't need constant-time indexed access (they +iterate over all characters, which `str::chars` helps with), and +for those that do, many don't need actual characters, and can operate +on bytes. For algorithms that do really need to index by character, +there's the option to convert your string to a character vector (using +`str::to_chars`). + +Like vectors, strings are always unique. You can wrap them in a shared +box to share them. Unlike vectors, there is no mutable variant of +strings. They are always immutable. + +## Resources + +Resources are data types that have a destructor associated with them. + +~~~~ +# fn close_file_desc(x: int) {} +resource file_desc(fd: int) { + close_file_desc(fd); +} +~~~~ + +This defines a type `file_desc` and a constructor of the same name, +which takes an integer. Values of such a type can not be copied, and +when they are destroyed (by going out of scope, or, when boxed, when +their box is cleaned up), their body runs. In the example above, this +would cause the given file descriptor to be closed. + +NOTE: We're considering alternative approaches for data types with +destructors. Resources might go away in the future. + +# Argument passing + +Rust datatypes are not trivial to copy (the way, for example, +JavaScript values can be copied by simply taking one or two machine +words and plunking them somewhere else). Shared boxes require +reference count updates, big records, tags, or unique pointers require +an arbitrary amount of data to be copied (plus updating the reference +counts of shared boxes hanging off them). + +For this reason, the default calling convention for Rust functions +leaves ownership of the arguments with the caller. The caller +guarantees that the arguments will outlive the call, the callee merely +gets access to them. + +## Safe references + +There is one catch with this approach: sometimes the compiler can +*not* statically guarantee that the argument value at the caller side +will survive to the end of the call. Another argument might indirectly +refer to it and be used to overwrite it, or a closure might assign a +new value to it. + +Fortunately, Rust tasks are single-threaded worlds, which share no +data with other tasks, and that most data is immutable. This allows +most argument-passing situations to be proved safe without further +difficulty. + +Take the following program: + +~~~~ +# fn get_really_big_record() -> int { 1 } +# fn myfunc(a: int) {} +fn main() { + let x = get_really_big_record(); + myfunc(x); +} +~~~~ + +Here we know for sure that no one else has access to the `x` variable +in `main`, so we're good. But the call could also look like this: + +~~~~ +# fn myfunc(a: int, b: block()) {} +# fn get_another_record() -> int { 1 } +# let x = 1; +myfunc(x, {|| x = get_another_record(); }); +~~~~ + +Now, if `myfunc` first calls its second argument and then accesses its +first argument, it will see a different value from the one that was +passed to it. + +In such a case, the compiler will insert an implicit copy of `x`, +*except* if `x` contains something mutable, in which case a copy would +result in code that behaves differently. If copying `x` might be +expensive (for example, if it holds a vector), the compiler will emit +a warning. + +There are even more tricky cases, in which the Rust compiler is forced +to pessimistically assume a value will get mutated, even though it is +not sure. + +~~~~ +fn for_each(v: [mutable @int], iter: block(@int)) { + for elt in v { iter(elt); } +} +~~~~ + +For all this function knows, calling `iter` (which is a closure that +might have access to the vector that's passed as `v`) could cause the +elements in the vector to be mutated, with the effect that it can not +guarantee that the boxes will live for the duration of the call. So it +has to copy them. In this case, this will happen implicitly (bumping a +reference count is considered cheap enough to not warn about it). + +## The copy operator + +If the `for_each` function given above were to take a vector of +`{mutable a: int}` instead of `@int`, it would not be able to +implicitly copy, since if the `iter` function changes a copy of a +mutable record, the changes won't be visible in the record itself. If +we *do* want to allow copies there, we have to explicitly allow it +with the `copy` operator: + +~~~~ +type mutrec = {mutable x: int}; +fn for_each(v: [mutable mutrec], iter: block(mutrec)) { + for elt in v { iter(copy elt); } +} +~~~~ + +Adding a `copy` operator is also the way to muffle warnings about +implicit copies. + +## Other uses of safe references + +Safe references are not only used for argument passing. When you +destructure on a value in an `alt` expression, or loop over a vector +with `for`, variables bound to the inside of the given data structure +will use safe references, not copies. This means such references are +very cheap, but you'll occasionally have to copy them to ensure +safety. + +~~~~ +let my_rec = {a: 4, b: [1, 2, 3]}; +alt my_rec { + {a, b} { + log(info, b); // This is okay + my_rec = {a: a + 1, b: b + [a]}; + log(info, b); // Here reference b has become invalid + } +} +~~~~ + +## Argument passing styles + +The fact that arguments are conceptually passed by safe reference does +not mean all arguments are passed by pointer. Composite types like +records and tags *are* passed by pointer, but single-word values, like +integers and pointers, are simply passed by value. Most of the time, +the programmer does not have to worry about this, as the compiler will +simply pick the most efficient passing style. There is one exception, +which will be described in the section on [generics](generic.html). + +To explicitly set the passing-style for a parameter, you prefix the +argument name with a sigil. There are two special passing styles that +are often useful. The first is by-mutable-pointer, written with a +single `&`: + +~~~~ +fn vec_push(&v: [int], elt: int) { + v += [elt]; +} +~~~~ + +This allows the function to mutate the value of the argument, *in the +caller's context*. Clearly, you are only allowed to pass things that +can actually be mutated to such a function. + +Then there is the by-copy style, written `+`. This indicates that the +function wants to take ownership of the argument value. If the caller +does not use the argument after the call, it will be 'given' to the +callee. Otherwise a copy will be made. This mode is mostly used for +functions that construct data structures. The argument will end up +being owned by the data structure, so if that can be done without a +copy, that's a win. + +~~~~ +type person = {name: str, address: str}; +fn make_person(+name: str, +address: str) -> person { + ret {name: name, address: address}; +} +~~~~ + +# Generics + +## Generic functions + +Throughout this tutorial, I've been defining functions like `for_rev` +that act only on integers. It is 2012, and we no longer expect to be +defining such functions again and again for every type they apply to. +Thus, Rust allows functions and datatypes to have type parameters. + +~~~~ +fn for_rev<T>(v: [T], act: block(T)) { + let i = vec::len(v); + while i > 0u { + i -= 1u; + act(v[i]); + } +} + +fn map<T, U>(v: [T], f: block(T) -> U) -> [U] { + let acc = []; + for elt in v { acc += [f(elt)]; } + ret acc; +} +~~~~ + +When defined in this way, these functions can be applied to any type +of vector, as long as the type of the block's argument and the type of +the vector's content agree with each other. + +Inside a parameterized (generic) function, the names of the type +parameters (capitalized by convention) stand for opaque types. You +can't look inside them, but you can pass them around. + +## Generic datatypes + +Generic `type` and `enum` declarations follow the same pattern: + +~~~~ +type circular_buf<T> = {start: uint, + end: uint, + buf: [mutable T]}; + +enum option<T> { some(T); none; } +~~~~ + +You can then declare a function to take a `circular_buf<u8>` or return +an `option<str>`, or even an `option<T>` if the function itself is +generic. + +The `option` type given above exists in the core library as +`option::t`, and is the way Rust programs express the thing that in C +would be a nullable pointer. The nice part is that you have to +explicitly unpack an `option` type, so accidental null pointer +dereferences become impossible. + +## Type-inference and generics + +Rust's type inferrer works very well with generics, but there are +programs that just can't be typed. + +~~~~ +let n = option::none; +# n = option::some(1); +~~~~ + +If you never do anything else with `n`, the compiler will not be able +to assign a type to it. (The same goes for `[]`, the empty vector.) If +you really want to have such a statement, you'll have to write it like +this: + +~~~~ +let n2: option::t<int> = option::none; +// or +let n = option::none::<int>; +~~~~ + +Note that, in a value expression, `<` already has a meaning as a +comparison operator, so you'll have to write `::<T>` to explicitly +give a type to a name that denotes a generic value. Fortunately, this +is rarely necessary. + +## Polymorphic built-ins + +There are two built-in operations that, perhaps surprisingly, act on +values of any type. It was already mentioned earlier that `log` can +take any type of value and output it. + +More interesting is that Rust also defines an ordering for values of +all datatypes, and allows you to meaningfully apply comparison +operators (`<`, `>`, `<=`, `>=`, `==`, `!=`) to them. For structural +types, the comparison happens left to right, so `"abc" < "bac"` (but +note that `"bac" < "ác"`, because the ordering acts on UTF-8 sequences +without any sophistication). + +## Kinds + +<a name="kind"></a> + +Perhaps surprisingly, the 'copy' (duplicate) operation is not defined +for all Rust types. Resource types (types with destructors) can not be +copied, and neither can any type whose copying would require copying a +resource (such as records or unique boxes containing a resource). + +This complicates handling of generic functions. If you have a type +parameter `T`, can you copy values of that type? In Rust, you can't, +unless you explicitly declare that type parameter to have copyable +'kind'. A kind is a type of type. + +~~~~ +## ignore +// This does not compile +fn head_bad<T>(v: [T]) -> T { v[0] } +// This does +fn head<T: copy>(v: [T]) -> T { v[0] } +~~~~ + +When instantiating a generic function, you can only instantiate it +with types that fit its kinds. So you could not apply `head` to a +resource type. + +Rust has three kinds: 'noncopyable', 'copyable', and 'sendable'. By +default, type parameters are considered to be noncopyable. You can +annotate them with the `copy` keyword to declare them copyable, and +with the `send` keyword to make them sendable. + +Sendable types are a subset of copyable types. They are types that do +not contain shared (reference counted) types, which are thus uniquely +owned by the function that owns them, and can be sent over channels to +other tasks. Most of the generic functions in the core `comm` module +take sendable types. + +## Generic functions and argument-passing + +The previous section mentioned that arguments are passed by pointer or +by value based on their type. There is one situation in which this is +difficult. If you try this program: + +~~~~ +# fn map(f: block(int) -> int, v: [int]) {} +fn plus1(x: int) -> int { x + 1 } +map(plus1, [1, 2, 3]); +~~~~ + +You will get an error message about argument passing styles +disagreeing. The reason is that generic types are always passed by +pointer, so `map` expects a function that takes its argument by +pointer. The `plus1` you defined, however, uses the default, efficient +way to pass integers, which is by value. To get around this issue, you +have to explicitly mark the arguments to a function that you want to +pass to a generic higher-order function as being passed by pointer, +using the `&&` sigil: + +~~~~ +# fn map<T, U>(f: block(T) -> U, v: [T]) {} +fn plus1(&&x: int) -> int { x + 1 } +map(plus1, [1, 2, 3]); +~~~~ + +NOTE: This is inconvenient, and we are hoping to get rid of this +restriction in the future. + +# Modules and crates + +The Rust namespace is divided into modules. Each source file starts +with its own module. + +## Local modules + +The `mod` keyword can be used to open a new, local module. In the +example below, `chicken` lives in the module `farm`, so, unless you +explicitly import it, you must refer to it by its long name, +`farm::chicken`. + +~~~~ +mod farm { + fn chicken() -> str { "cluck cluck" } + fn cow() -> str { "mooo" } +} +fn main() { + std::io::println(farm::chicken()); +} +~~~~ + +Modules can be nested to arbitrary depth. + +## Crates + +The unit of independent compilation in Rust is the crate. Libraries +tend to be packaged as crates, and your own programs may consist of +one or more crates. + +When compiling a single `.rs` file, the file acts as the whole crate. +You can compile it with the `--lib` compiler switch to create a shared +library, or without, provided that your file contains a `fn main` +somewhere, to create an executable. + +It is also possible to include multiple files in a crate. For this +purpose, you create a `.rc` crate file, which references any number of +`.rs` code files. A crate file could look like this: + +~~~~ +## ignore +#[link(name = "farm", vers = "2.5", author = "mjh")]; +mod cow; +mod chicken; +mod horse; +~~~~ + +Compiling this file will cause `rustc` to look for files named +`cow.rs`, `chicken.rs`, `horse.rs` in the same directory as the `.rc` +file, compile them all together, and, depending on the presence of the +`--lib` switch, output a shared library or an executable. + +The `#[link(...)]` part provides meta information about the module, +which other crates can use to load the right module. More about that +later. + +To have a nested directory structure for your source files, you can +nest mods in your `.rc` file: + +~~~~ +## ignore +mod poultry { + mod chicken; + mod turkey; +} +~~~~ + +The compiler will now look for `poultry/chicken.rs` and +`poultry/turkey.rs`, and export their content in `poultry::chicken` +and `poultry::turkey`. You can also provide a `poultry.rs` to add +content to the `poultry` module itself. + +## Using other crates + +Having compiled a crate with `--lib`, you can use it in another crate +with a `use` directive. We've already seen `use std` in several of the +examples, which loads in the [standard library][std]. + +[std]: http://doc.rust-lang.org/doc/std/index/General.html + +`use` directives can appear in a crate file, or at the top level of a +single-file `.rs` crate. They will cause the compiler to search its +library search path (which you can extend with `-L` switch) for a Rust +crate library with the right name. + +It is possible to provide more specific information when using an +external crate. + +~~~~ +## ignore +use myfarm (name = "farm", vers = "2.7"); +~~~~ + +When a comma-separated list of name/value pairs is given after `use`, +these are matched against the attributes provided in the `link` +attribute of the crate file, and a crate is only used when the two +match. A `name` value can be given to override the name used to search +for the crate. So the above would import the `farm` crate under the +local name `myfarm`. + +Our example crate declared this set of `link` attributes: + +~~~~ +## ignore +#[link(name = "farm", vers = "2.5", author = "mjh")]; +~~~~ + +The version does not match the one provided in the `use` directive, so +unless the compiler can find another crate with the right version +somewhere, it will complain that no matching crate was found. + +## The core library + +A set of basic library routines, mostly related to built-in datatypes +and the task system, are always implicitly linked and included in any +Rust program, unless the `--no-core` compiler switch is given. + +This library is document [here][core]. + +[core]: http://doc.rust-lang.org/doc/core/index/General.html + +## A minimal example + +Now for something that you can actually compile yourself. We have +these two files: + +~~~~ +// mylib.rs +#[link(name = "mylib", vers = "1.0")]; +fn world() -> str { "world" } +~~~~ + +~~~~ +## ignore +// main.rs +use mylib; +fn main() { std::io::println("hello " + mylib::world()); } +~~~~ + +Now compile and run like this (adjust to your platform if necessary): + +~~~~ +## notrust +> rustc --lib mylib.rs +> rustc main.rs -L . +> ./main +"hello world" +~~~~ + +## Importing + +When using identifiers from other modules, it can get tiresome to +qualify them with the full module path every time (especially when +that path is several modules deep). Rust allows you to import +identifiers at the top of a file, module, or block. + +~~~~ +use std; +import std::io::println; +fn main() { + println("that was easy"); +} +~~~~ + +It is also possible to import just the name of a module (`import +std::io;`, then use `io::println`), to import all identifiers exported +by a given module (`import std::io::*`), or to import a specific set +of identifiers (`import math::{min, max, pi}`). + +You can rename an identifier when importing using the `=` operator: + +~~~~ +import prnt = std::io::println; +~~~~ + +## Exporting + +By default, a module exports everything that it defines. This can be +restricted with `export` directives at the top of the module or file. + +~~~~ +mod enc { + export encrypt, decrypt; + const super_secret_number: int = 10; + fn encrypt(n: int) -> int { n + super_secret_number } + fn decrypt(n: int) -> int { n - super_secret_number } +} +~~~~ + +This defines a rock-solid encryption algorithm. Code outside of the +module can refer to the `enc::encrypt` and `enc::decrypt` identifiers +just fine, but it does not have access to `enc::super_secret_number`. + +## Namespaces + +Rust uses three different namespaces. One for modules, one for types, +and one for values. This means that this code is valid: + +~~~~ +mod buffalo { + type buffalo = int; + fn buffalo(buffalo: buffalo) -> buffalo { buffalo } +} +fn main() { + let buffalo: buffalo::buffalo = 1; + buffalo::buffalo(buffalo::buffalo(buffalo)); +} +~~~~ + +You don't want to write things like that, but it *is* very practical +to not have to worry about name clashes between types, values, and +modules. This allows us to have a module `core::str`, for example, even +though `str` is a built-in type name. + +## Resolution + +The resolution process in Rust simply goes up the chain of contexts, +looking for the name in each context. Nested functions and modules +create new contexts inside their parent function or module. A file +that's part of a bigger crate will have that crate's context as parent +context. + +Identifiers can shadow each others. In this program, `x` is of type +`int`: + +~~~~ +type t = str; +fn main() { + type t = int; + let x: t; +} +~~~~ + +An `import` directive will only import into the namespaces for which +identifiers are actually found. Consider this example: + +~~~~ +type bar = uint; +mod foo { fn bar() {} } +mod baz { + import foo::bar; + const x: bar = 20u; +} +~~~~ + +When resolving the type name `bar` in the `const` definition, the +resolver will first look at the module context for `baz`. This has an +import named `bar`, but that's a function, not a type, So it continues +to the top level and finds a type named `bar` defined there. + +Normally, multiple definitions of the same identifier in a scope are +disallowed. Local variables defined with `let` are an exception to +this—multiple `let` directives can redefine the same variable in a +single scope. When resolving the name of such a variable, the most +recent definition is used. + +~~~~ +fn main() { + let x = 10; + let x = x + 10; + assert x == 20; +} +~~~~ + +This makes it possible to rebind a variable without actually mutating +it, which is mostly useful for destructuring (which can rebind, but +not assign). + +# Interfaces + +Interfaces are Rust's take on value polymorphism—the thing that +object-oriented languages tend to solve with methods and inheritance. +For example, writing a function that can operate on multiple types of +collections. + +NOTE: This feature is very new, and will need a few extensions to be +applicable to more advanced use cases. + +## Declaration + +An interface consists of a set of methods. A method is a function that +can be applied to a `self` value and a number of arguments, using the +dot notation: `self.foo(arg1, arg2)`. + +For example, we could declare the interface `to_str` for things that +can be converted to a string, with a single method of the same name: + +~~~~ +iface to_str { + fn to_str() -> str; +} +~~~~ + +## Implementation + +To actually implement an interface for a given type, the `impl` form +is used. This defines implementations of `to_str` for the `int` and +`str` types. + +~~~~ +# iface to_str { fn to_str() -> str; } +impl of to_str for int { + fn to_str() -> str { int::to_str(self, 10u) } +} +impl of to_str for str { + fn to_str() -> str { self } +} +~~~~ + +Given these, we may call `1.to_str()` to get `"1"`, or +`"foo".to_str()` to get `"foo"` again. This is basically a form of +static overloading—when the Rust compiler sees the `to_str` method +call, it looks for an implementation that matches the type with a +method that matches the name, and simply calls that. + +## Scoping + +Implementations are not globally visible. Resolving a method to an +implementation requires that implementation to be in scope. You can +import and export implementations using the name of the interface they +implement (multiple implementations with the same name can be in scope +without problems). Or you can give them an explicit name if you +prefer, using this syntax: + +~~~~ +# iface to_str { fn to_str() -> str; } +impl nil_to_str of to_str for () { + fn to_str() -> str { "()" } +} +~~~~ + +## Bounded type parameters + +The useful thing about value polymorphism is that it does not have to +be static. If object-oriented languages only let you call a method on +an object when they knew exactly which sub-type it had, that would not +get you very far. To be able to call methods on types that aren't +known at compile time, it is possible to specify 'bounds' for type +parameters. + +~~~~ +# iface to_str { fn to_str() -> str; } +fn comma_sep<T: to_str>(elts: [T]) -> str { + let result = "", first = true; + for elt in elts { + if first { first = false; } + else { result += ", "; } + result += elt.to_str(); + } + ret result; +} +~~~~ + +The syntax for this is similar to the syntax for specifying that a +parameter type has to be copyable (which is, in principle, another +kind of bound). By declaring `T` as conforming to the `to_str` +interface, it becomes possible to call methods from that interface on +values of that type inside the function. It will also cause a +compile-time error when anyone tries to call `comma_sep` on an array +whose element type does not have a `to_str` implementation in scope. + +## Polymorphic interfaces + +Interfaces may contain type parameters. This defines an interface for +generalized sequence types: + +~~~~ +iface seq<T> { + fn len() -> uint; + fn iter(block(T)); +} +impl <T> of seq<T> for [T] { + fn len() -> uint { vec::len(self) } + fn iter(b: block(T)) { + for elt in self { b(elt); } + } +} +~~~~ + +Note that the implementation has to explicitly declare the its +parameter `T` before using it to specify its interface type. This is +needed because it could also, for example, specify an implementation +of `seq<int>`—the `of` clause *refers* to a type, rather than defining +one. + +## Casting to an interface type + +The above allows us to define functions that polymorphically act on +values of *an* unknown type that conforms to a given interface. +However, consider this function: + +~~~~ +# iface drawable { fn draw(); } +fn draw_all<T: drawable>(shapes: [T]) { + for shape in shapes { shape.draw(); } +} +~~~~ + +You can call that on an array of circles, or an array of squares +(assuming those have suitable `drawable` interfaces defined), but not +on an array containing both circles and squares. + +When this is needed, an interface name can be used as a type, causing +the function to be written simply like this: + +~~~~ +# iface drawable { fn draw(); } +fn draw_all(shapes: [drawable]) { + for shape in shapes { shape.draw(); } +} +~~~~ + +There is no type parameter anymore (since there isn't a single type +that we're calling the function on). Instead, the `drawable` type is +used to refer to a type that is a reference-counted box containing a +value for which a `drawable` implementation exists, combined with +information on where to find the methods for this implementation. This +is very similar to the 'vtables' used in most object-oriented +languages. + +To construct such a value, you use the `as` operator to cast a value +to an interface type: + +~~~~ +# type circle = int; type rectangle = int; +# iface drawable { fn draw(); } +# impl of drawable for int { fn draw() {} } +# fn new_circle() -> int { 1 } +# fn new_rectangle() -> int { 2 } +# fn draw_all(shapes: [drawable]) {} +let c: circle = new_circle(); +let r: rectangle = new_rectangle(); +draw_all([c as drawable, r as drawable]); +~~~~ + +This will store the value into a box, along with information about the +implementation (which is looked up in the scope of the cast). The +`drawable` type simply refers to such boxes, and calling methods on it +always works, no matter what implementations are in scope. + +Note that the allocation of a box is somewhat more expensive than +simply using a type parameter and passing in the value as-is, and much +more expensive than statically resolved method calls. + +## Interface-less implementations + +If you only intend to use an implementation for static overloading, +and there is no interface available that it conforms to, you are free +to leave off the `of` clause. + +~~~~ +# type currency = (); +# fn mk_currency(x: int, s: str) {} +impl int_util for int { + fn times(b: block(int)) { + let i = 0; + while i < self { b(i); i += 1; } + } + fn dollars() -> currency { + mk_currency(self, "USD") + } +} +~~~~ + +This allows cutesy things like `send_payment(10.dollars())`. And the +nice thing is that it's fully scoped, so the uneasy feeling that +anybody with experience in object-oriented languages (with the +possible exception of Rubyists) gets at the sight of such things is +not justified. It's harmless! + +# Interacting with foreign code + +One of Rust's aims, as a system programming language, is to +interoperate well with C code. + +We'll start with an example. It's a bit bigger than usual, and +contains a number of new concepts. We'll go over it one piece at a +time. + +This is a program that uses OpenSSL's `SHA1` function to compute the +hash of its first command-line argument, which it then converts to a +hexadecimal string and prints to standard output. If you have the +OpenSSL libraries installed, it should 'just work'. + +~~~~ +use std; + +native mod crypto { + fn SHA1(src: *u8, sz: uint, out: *u8) -> *u8; +} + +fn as_hex(data: [u8]) -> str { + let acc = ""; + for byte in data { acc += #fmt("%02x", byte as uint); } + ret acc; +} + +fn sha1(data: str) -> str unsafe { + let bytes = str::bytes(data); + let hash = crypto::SHA1(vec::unsafe::to_ptr(bytes), + vec::len(bytes), ptr::null()); + ret as_hex(vec::unsafe::from_buf(hash, 20u)); +} + +fn main(args: [str]) { + std::io::println(sha1(args[1])); +} +~~~~ + +## Native modules + +Before we can call `SHA1`, we have to declare it. That is what this +part of the program is responsible for: + +~~~~ +native mod crypto { + fn SHA1(src: *u8, sz: uint, out: *u8) -> *u8; +} +~~~~ + +A `native` module declaration tells the compiler that the program +should be linked with a library by that name, and that the given list +of functions are available in that library. + +In this case, it'll change the name `crypto` to a shared library name +in a platform-specific way (`libcrypto.so` on Linux, for example), and +link that in. If you want the module to have a different name from the +actual library, you can use the `"link_name"` attribute, like: + +~~~~ +#[link_name = "crypto"] +native mod something { + fn SHA1(src: *u8, sz: uint, out: *u8) -> *u8; +} +~~~~ + +## Native calling conventions + +Most native C code use the cdecl calling convention, so that is what +Rust uses by default when calling native functions. Some native functions, +most notably the Windows API, use other calling conventions, so Rust +provides a way to to hint to the compiler which is expected by using +the `"abi"` attribute: + +~~~~ +#[cfg(target_os = "win32")] +#[abi = "stdcall"] +native mod kernel32 { + fn SetEnvironmentVariableA(n: *u8, v: *u8) -> int; +} +~~~~ + +The `"abi"` attribute applies to a native mod (it can not be applied +to a single function within a module), and must be either `"cdecl"` +or `"stdcall"`. Other conventions may be defined in the future. + +## Unsafe pointers + +The native `SHA1` function is declared to take three arguments, and +return a pointer. + +~~~~ +# native mod crypto { +fn SHA1(src: *u8, sz: uint, out: *u8) -> *u8; +# } +~~~~ + +When declaring the argument types to a foreign function, the Rust +compiler has no way to check whether your declaration is correct, so +you have to be careful. If you get the number or types of the +arguments wrong, you're likely to get a segmentation fault. Or, +probably even worse, your code will work on one platform, but break on +another. + +In this case, `SHA1` is defined as taking two `unsigned char*` +arguments and one `unsigned long`. The rust equivalents are `*u8` +unsafe pointers and an `uint` (which, like `unsigned long`, is a +machine-word-sized type). + +Unsafe pointers can be created through various functions in the +standard lib, usually with `unsafe` somewhere in their name. You can +dereference an unsafe pointer with `*` operator, but use +caution—unlike Rust's other pointer types, unsafe pointers are +completely unmanaged, so they might point at invalid memory, or be +null pointers. + +## Unsafe blocks + +The `sha1` function is the most obscure part of the program. + +~~~~ +# mod crypto { fn SHA1(src: *u8, sz: uint, out: *u8) -> *u8 { out } } +# fn as_hex(data: [u8]) -> str { "hi" } +fn sha1(data: str) -> str unsafe { + let bytes = str::bytes(data); + let hash = crypto::SHA1(vec::unsafe::to_ptr(bytes), + vec::len(bytes), ptr::null()); + ret as_hex(vec::unsafe::from_buf(hash, 20u)); +} +~~~~ + +Firstly, what does the `unsafe` keyword at the top of the function +mean? `unsafe` is a block modifier—it declares the block following it +to be known to be unsafe. + +Some operations, like dereferencing unsafe pointers or calling +functions that have been marked unsafe, are only allowed inside unsafe +blocks. With the `unsafe` keyword, you're telling the compiler 'I know +what I'm doing'. The main motivation for such an annotation is that +when you have a memory error (and you will, if you're using unsafe +constructs), you have some idea where to look—it will most likely be +caused by some unsafe code. + +Unsafe blocks isolate unsafety. Unsafe functions, on the other hand, +advertise it to the world. An unsafe function is written like this: + +~~~~ +unsafe fn kaboom() { "I'm harmless!"; } +~~~~ + +This function can only be called from an unsafe block or another +unsafe function. + +## Pointer fiddling + +The standard library defines a number of helper functions for dealing +with unsafe data, casting between types, and generally subverting +Rust's safety mechanisms. + +Let's look at our `sha1` function again. + +~~~~ +# mod crypto { fn SHA1(src: *u8, sz: uint, out: *u8) -> *u8 { out } } +# fn as_hex(data: [u8]) -> str { "hi" } +# fn x(data: str) -> str unsafe { +let bytes = str::bytes(data); +let hash = crypto::SHA1(vec::unsafe::to_ptr(bytes), + vec::len(bytes), ptr::null()); +ret as_hex(vec::unsafe::from_buf(hash, 20u)); +# } +~~~~ + +The `str::bytes` function is perfectly safe, it converts a string to +an `[u8]`. This byte array is then fed to `vec::unsafe::to_ptr`, which +returns an unsafe pointer to its contents. + +This pointer will become invalid as soon as the vector it points into +is cleaned up, so you should be very careful how you use it. In this +case, the local variable `bytes` outlives the pointer, so we're good. + +Passing a null pointer as third argument to `SHA1` causes it to use a +static buffer, and thus save us the effort of allocating memory +ourselves. `ptr::null` is a generic function that will return an +unsafe null pointer of the correct type (Rust generics are awesome +like that—they can take the right form depending on the type that they +are expected to return). + +Finally, `vec::unsafe::from_buf` builds up a new `[u8]` from the +unsafe pointer that was returned by `SHA1`. SHA1 digests are always +twenty bytes long, so we can pass `20u` for the length of the new +vector. + +## Passing structures + +C functions often take pointers to structs as arguments. Since Rust +records are binary-compatible with C structs, Rust programs can call +such functions directly. + +This program uses the Posix function `gettimeofday` to get a +microsecond-resolution timer. + +~~~~ +use std; +type timeval = {mutable tv_sec: u32, + mutable tv_usec: u32}; +#[nolink] +native mod libc { + fn gettimeofday(tv: *timeval, tz: *()) -> i32; +} +fn unix_time_in_microseconds() -> u64 unsafe { + let x = {mutable tv_sec: 0u32, mutable tv_usec: 0u32}; + libc::gettimeofday(ptr::addr_of(x), ptr::null()); + ret (x.tv_sec as u64) * 1000_000_u64 + (x.tv_usec as u64); +} +~~~~ + +The `#[nolink]` attribute indicates that there's no native library to link +in. The standard C library is already linked with Rust programs. + +A `timeval`, in C, is a struct with two 32-bit integers. Thus, we +define a record type with the same contents, and declare +`gettimeofday` to take a pointer to such a record. + +The second argument to `gettimeofday` (the time zone) is not used by +this program, so it simply declares it to be a pointer to the nil +type. Since null pointer look the same, no matter which type they are +supposed to point at, this is safe. + +# Tasks + +Rust supports a system of lightweight tasks, similar to what is found +in Erlang or other actor systems. Rust tasks communicate via messages +and do not share data. However, it is possible to send data without +copying it by making use of [unique boxes][uniques], which allow the +sending task to release ownership of a value, so that the receiving +task can keep on using it. + +[uniques]: data.html#unique-box + +NOTE: As Rust evolves, we expect the Task API to grow and change +somewhat. The tutorial documents the API as it exists today. + +## Spawning a task + +Spawning a task is done using the various spawn functions in the +module `task`. Let's begin with the simplest one, `task::spawn()`: + +~~~~ +let some_value = 22; +let child_task = task::spawn {|| + std::io::println("This executes in the child task."); + std::io::println(#fmt("%d", some_value)); +}; +~~~~ + +The argument to `task::spawn()` is a [unique +closure](func.html#unique) of type `fn~()`, meaning that it takes no +arguments and generates no return value. The effect of `task::spawn()` +is to fire up a child task that will execute the closure in parallel +with the creator. The result is a task id, here stored into the +variable `child_task`. + +## Ports and channels + +Now that we have spawned a child task, it would be nice if we could +communicate with it. This is done by creating a *port* with an +associated *channel*. A port is simply a location to receive messages +of a particular type. A channel is used to send messages to a port. +For example, imagine we wish to perform two expensive computations +in parallel. We might write something like: + +~~~~ +# fn some_expensive_computation() -> int { 42 } +# fn some_other_expensive_computation() {} +let port = comm::port::<int>(); +let chan = comm::chan::<int>(port); +let child_task = task::spawn {|| + let result = some_expensive_computation(); + comm::send(chan, result); +}; +some_other_expensive_computation(); +let result = comm::recv(port); +~~~~ + +Let's walk through this code line-by-line. The first line creates a +port for receiving integers: + +~~~~ +let port = comm::port::<int>(); + +~~~~ +This port is where we will receive the message from the child task +once it is complete. The second line creates a channel for sending +integers to the port `port`: + +~~~~ +# let port = comm::port::<int>(); +let chan = comm::chan::<int>(port); +~~~~ + +The channel will be used by the child to send a message to the port. +The next statement actually spawns the child: + +~~~~ +# fn some_expensive_computation() -> int { 42 } +# let port = comm::port::<int>(); +# let chan = comm::chan::<int>(port); +let child_task = task::spawn {|| + let result = some_expensive_computation(); + comm::send(chan, result); +}; +~~~~ + +This child will perform the expensive computation send the result +over the channel. Finally, the parent continues by performing +some other expensive computation and then waiting for the child's result +to arrive on the port: + +~~~~ +# fn some_other_expensive_computation() {} +# let port = comm::port::<int>(); +some_other_expensive_computation(); +let result = comm::recv(port); +~~~~ + +## Creating a task with a bi-directional communication path + +A very common thing to do is to spawn a child task where the parent +and child both need to exchange messages with each other. The function +`task::spawn_connected()` supports this pattern. We'll look briefly at +how it is used. + +To see how `spawn_connected()` works, we will create a child task +which receives `uint` messages, converts them to a string, and sends +the string in response. The child terminates when `0` is received. +Here is the function which implements the child task: + +~~~~ +fn stringifier(from_par: comm::port<uint>, + to_par: comm::chan<str>) { + let value: uint; + do { + value = comm::recv(from_par); + comm::send(to_par, uint::to_str(value, 10u)); + } while value != 0u; +} + +~~~~ +You can see that the function takes two parameters. The first is a +port used to receive messages from the parent, and the second is a +channel used to send messages to the parent. The body itself simply +loops, reading from the `from_par` port and then sending its response +to the `to_par` channel. The actual response itself is simply the +strified version of the received value, `uint::to_str(value)`. + +Here is the code for the parent task: +~~~~ + +# fn stringifier(from_par: comm::port<uint>, +# to_par: comm::chan<str>) {} +fn main() { + let t = task::spawn_connected(stringifier); + comm::send(t.to_child, 22u); + assert comm::recv(t.from_child) == "22"; + comm::send(t.to_child, 23u); + assert comm::recv(t.from_child) == "23"; + comm::send(t.to_child, 0u); + assert comm::recv(t.from_child) == "0"; +} +~~~~ + +The call to `spawn_connected()` on the first line will instantiate the +various ports and channels and startup the child task. The returned +value, `t`, is a record of type `task::connected_task<uint,str>`. In +addition to the task id of the child, this record defines two fields, +`from_child` and `to_child`, which contain the port and channel +respectively for communicating with the child. Those fields are used +here to send and receive three messages from the child task. + +## Joining a task + +The function `spawn_joinable()` is used to spawn a task that can later +be joined. This is implemented by having the child task send a message +when it has completed (either successfully or by failing). Therefore, +`spawn_joinable()` returns a structure containing both the task ID and +the port where this message will be sent---this structure type is +called `task::joinable_task`. The structure can be passed to +`task::join()`, which simply blocks on the port, waiting to receive +the message from the child task. + +## The supervisor relationship + +By default, failures in Rust propagate upward through the task tree. +We say that each task is supervised by its parent, meaning that if the +task fails, that failure is propagated to the parent task, which will +fail sometime later. This propagation can be disabled by using the +function `task::unsupervise()`, which disables error propagation from +the current task to its parent. + +# Testing + +The Rust language has a facility for testing built into the language. +Tests can be interspersed with other code, and annotated with the +`#[test]` attribute. + +~~~~ +use std; + +fn twice(x: int) -> int { x + x } + +#[test] +fn test_twice() { + let i = -100; + while i < 100 { + assert twice(i) == 2 * i; + i += 1; + } +} +~~~~ + +When you compile the program normally, the `test_twice` function will +not be included. To compile and run such tests, compile with the +`--test` flag, and then run the result: + +~~~~ +## notrust +> rustc --test twice.rs +> ./twice +running 1 tests +test test_twice ... ok +result: ok. 1 passed; 0 failed; 0 ignored +~~~~ + +Or, if we change the file to fail, for example by replacing `x + x` +with `x + 1`: + +~~~~ +## notrust +running 1 tests +test test_twice ... FAILED +failures: + test_twice +result: FAILED. 0 passed; 1 failed; 0 ignored +~~~~ + +You can pass a command-line argument to a program compiled with +`--test` to run only the tests whose name matches the given string. If +we had, for example, test functions `test_twice`, `test_once_1`, and +`test_once_2`, running our program with `./twice test_once` would run +the latter two, and running it with `./twice test_once_2` would run +only the last. + +To indicate that a test is supposed to fail instead of pass, you can +give it a `#[should_fail]` attribute. + +~~~~ +use std; + +fn divide(a: float, b: float) -> float { + if b == 0f { fail; } + a / b +} + +#[test] +#[should_fail] +fn divide_by_zero() { divide(1f, 0f); } +~~~~ + +To disable a test completely, add an `#[ignore]` attribute. Running a +test runner (the program compiled with `--test`) with an `--ignored` +command-line flag will cause it to also run the tests labelled as +ignored. + +A program compiled as a test runner will have the configuration flag +`test` defined, so that you can add code that won't be included in a +normal compile with the `#[cfg(test)]` attribute (see [conditional +compilation](syntax.md#conditional)). diff --git a/doc/tutorial/args.md b/doc/tutorial/args.md deleted file mode 100644 index d02e9236c57..00000000000 --- a/doc/tutorial/args.md +++ /dev/null @@ -1,139 +0,0 @@ -# Argument passing - -Rust datatypes are not trivial to copy (the way, for example, -JavaScript values can be copied by simply taking one or two machine -words and plunking them somewhere else). Shared boxes require -reference count updates, big records, tags, or unique pointers require -an arbitrary amount of data to be copied (plus updating the reference -counts of shared boxes hanging off them). - -For this reason, the default calling convention for Rust functions -leaves ownership of the arguments with the caller. The caller -guarantees that the arguments will outlive the call, the callee merely -gets access to them. - -## Safe references - -There is one catch with this approach: sometimes the compiler can -*not* statically guarantee that the argument value at the caller side -will survive to the end of the call. Another argument might indirectly -refer to it and be used to overwrite it, or a closure might assign a -new value to it. - -Fortunately, Rust tasks are single-threaded worlds, which share no -data with other tasks, and that most data is immutable. This allows -most argument-passing situations to be proved safe without further -difficulty. - -Take the following program: - - # fn get_really_big_record() -> int { 1 } - # fn myfunc(a: int) {} - fn main() { - let x = get_really_big_record(); - myfunc(x); - } - -Here we know for sure that no one else has access to the `x` variable -in `main`, so we're good. But the call could also look like this: - - # fn myfunc(a: int, b: block()) {} - # fn get_another_record() -> int { 1 } - # let x = 1; - myfunc(x, {|| x = get_another_record(); }); - -Now, if `myfunc` first calls its second argument and then accesses its -first argument, it will see a different value from the one that was -passed to it. - -In such a case, the compiler will insert an implicit copy of `x`, -*except* if `x` contains something mutable, in which case a copy would -result in code that behaves differently. If copying `x` might be -expensive (for example, if it holds a vector), the compiler will emit -a warning. - -There are even more tricky cases, in which the Rust compiler is forced -to pessimistically assume a value will get mutated, even though it is -not sure. - - fn for_each(v: [mutable @int], iter: block(@int)) { - for elt in v { iter(elt); } - } - -For all this function knows, calling `iter` (which is a closure that -might have access to the vector that's passed as `v`) could cause the -elements in the vector to be mutated, with the effect that it can not -guarantee that the boxes will live for the duration of the call. So it -has to copy them. In this case, this will happen implicitly (bumping a -reference count is considered cheap enough to not warn about it). - -## The copy operator - -If the `for_each` function given above were to take a vector of -`{mutable a: int}` instead of `@int`, it would not be able to -implicitly copy, since if the `iter` function changes a copy of a -mutable record, the changes won't be visible in the record itself. If -we *do* want to allow copies there, we have to explicitly allow it -with the `copy` operator: - - type mutrec = {mutable x: int}; - fn for_each(v: [mutable mutrec], iter: block(mutrec)) { - for elt in v { iter(copy elt); } - } - -Adding a `copy` operator is also the way to muffle warnings about -implicit copies. - -## Other uses of safe references - -Safe references are not only used for argument passing. When you -destructure on a value in an `alt` expression, or loop over a vector -with `for`, variables bound to the inside of the given data structure -will use safe references, not copies. This means such references are -very cheap, but you'll occasionally have to copy them to ensure -safety. - - let my_rec = {a: 4, b: [1, 2, 3]}; - alt my_rec { - {a, b} { - log(info, b); // This is okay - my_rec = {a: a + 1, b: b + [a]}; - log(info, b); // Here reference b has become invalid - } - } - -## Argument passing styles - -The fact that arguments are conceptually passed by safe reference does -not mean all arguments are passed by pointer. Composite types like -records and tags *are* passed by pointer, but single-word values, like -integers and pointers, are simply passed by value. Most of the time, -the programmer does not have to worry about this, as the compiler will -simply pick the most efficient passing style. There is one exception, -which will be described in the section on [generics](generic.html). - -To explicitly set the passing-style for a parameter, you prefix the -argument name with a sigil. There are two special passing styles that -are often useful. The first is by-mutable-pointer, written with a -single `&`: - - fn vec_push(&v: [int], elt: int) { - v += [elt]; - } - -This allows the function to mutate the value of the argument, *in the -caller's context*. Clearly, you are only allowed to pass things that -can actually be mutated to such a function. - -Then there is the by-copy style, written `+`. This indicates that the -function wants to take ownership of the argument value. If the caller -does not use the argument after the call, it will be 'given' to the -callee. Otherwise a copy will be made. This mode is mostly used for -functions that construct data structures. The argument will end up -being owned by the data structure, so if that can be done without a -copy, that's a win. - - type person = {name: str, address: str}; - fn make_person(+name: str, +address: str) -> person { - ret {name: name, address: address}; - } diff --git a/doc/tutorial/build.js b/doc/tutorial/build.js deleted file mode 100644 index 49e3e0a7b86..00000000000 --- a/doc/tutorial/build.js +++ /dev/null @@ -1,109 +0,0 @@ -var fs = require("fs"), md = require("./lib/markdown"); -CodeMirror = require("./lib/codemirror-node"); -require("./lib/codemirror-rust"); - -md.Markdown.dialects.Maruku.block.code = function code(block, next) { - if (block.match(/^ /)) { - var text = String(block); - while (next.length && next[0].match(/^ /)) text += "\n" + String(next.shift()); - var leaveAlone, accum = [], curstr = "", curstyle = null; - text = text.split("\n").map(function(line) { - line = line.slice(4); - if (line == "## notrust") leaveAlone = true; - return line; - }).filter(function(x) { return !/^##? /.test(x); }).join("\n"); - if (leaveAlone) return [["pre", {}, text]]; - function add(str, style) { - if (style != curstyle) { - if (curstyle) accum.push(["span", {"class": "cm-" + curstyle}, curstr]); - else if (curstr) accum.push(curstr); - curstr = str; curstyle = style; - } else curstr += str; - } - CodeMirror.runMode(text, "rust", add); - add("", "bogus"); // Flush pending string. - return [["pre", {"class": "cm-s-default"}].concat(accum)]; - } -}; - -function markdown(str) { return md.toHTML(str, "Maruku"); } - -function fileDates(file, c) { - function takeTime(str) { - return Number(str.match(/^(\S+)\s/)[1]) * 1000; - } - require("child_process").exec("git rev-list --timestamp HEAD -- " + file + ".md", function(err, stdout) { - if (err != null) { console.log("Failed to run git rev-list"); return; } - var history = stdout.split("\n"); - if (history.length && history[history.length-1] == "") history.pop(); - var created = history.length ? takeTime(history[history.length-1]) : Date.now(); - var modified = created; - if (history.length > 1) modified = takeTime(history[0]); - c(created, modified); - }); -} - -function head(title) { - return "<html><head><link rel='stylesheet' href='style.css' type='text/css'>" + - "<link rel='stylesheet' href='default.css' type='text/css'>" + - "<meta http-equiv='Content-Type' content='text/html; charset=utf-8'><title>" + - title + "</title></head><body>\n"; -} - -function foot(created, modified) { - var r = "<p class='head'>" - var crStr = formatTime(created), modStr = formatTime(modified); - if (created) r += "Created " + crStr; - if (crStr != modStr) - r += (created ? ", l" : "L") + "ast modified on " + modStr; - return r + "</p>"; -} - -function formatTime(tm) { - var d = new Date(tm); - var months = ["January", "February", "March", "April", "May", "June", "July", "August", - "September", "October", "November", "December"]; - return months[d.getMonth()] + " " + d.getDate() + ", " + d.getFullYear(); -} - -var files = fs.readFileSync("order", "utf8").split("\n").filter(function(x) { return x; }); -var max_modified = 0; -var sections = []; - -// Querying git for modified dates has to be done async in node it seems... -var queried = 0; -for (var i = 0; i < files.length; ++i) - (function(i) { // Make lexical i stable - fileDates(files[i], function(ctime, mtime) { - sections[i] = { - text: fs.readFileSync(files[i] + ".md", "utf8"), - ctime: ctime, mtime: mtime, - name: files[i], - }; - max_modified = Math.max(mtime, max_modified); - if (++queried == files.length) buildTutorial(); - }); - })(i); - -function htmlName(i) { return sections[i].name + ".html"; } - -function buildTutorial() { - var index = head("Rust language tutorial") + "<div id='content'>" + - markdown(fs.readFileSync("index.md", "utf8")) + "<ol>"; - for (var i = 0; i < sections.length; ++i) { - var s = sections[i]; - var html = htmlName(i); - var title = s.text.match(/^# (.*)\n/)[1]; - index += '<li><a href="' + html + '">' + title + "</a></li>"; - - var nav = '<p class="head">Section ' + (i + 1) + ' of the Rust language tutorial.<br>'; - if (i > 0) nav += '<a href="' + htmlName(i-1) + '">« Section ' + i + "</a> | "; - nav += '<a href="index.html">Index</a>'; - if (i + 1 < sections.length) nav += ' | <a href="' + htmlName(i+1) + '">Section ' + (i + 2) + " »</a>"; - nav += "</p>"; - fs.writeFileSync("web/" + html, head(title) + nav + '<div id="content">' + markdown(s.text) + "</div>" + - nav + foot(s.ctime, s.mtime) + "</body></html>"); - } - index += "</ol></div>" + foot(null, max_modified) + "</body></html>"; - fs.writeFileSync("web/index.html", index); -} diff --git a/doc/tutorial/control.md b/doc/tutorial/control.md deleted file mode 100644 index e13d976aa79..00000000000 --- a/doc/tutorial/control.md +++ /dev/null @@ -1,200 +0,0 @@ -# Control structures - -## Conditionals - -We've seen `if` pass by a few times already. To recap, braces are -compulsory, an optional `else` clause can be appended, and multiple -`if`/`else` constructs can be chained together: - - if false { - std::io::println("that's odd"); - } else if true { - std::io::println("right"); - } else { - std::io::println("neither true nor false"); - } - -The condition given to an `if` construct *must* be of type boolean (no -implicit conversion happens). If the arms return a value, this value -must be of the same type for every arm in which control reaches the -end of the block: - - fn signum(x: int) -> int { - if x < 0 { -1 } - else if x > 0 { 1 } - else { ret 0; } - } - -The `ret` (return) and its semicolon could have been left out without -changing the meaning of this function, but it illustrates that you -will not get a type error in this case, although the last arm doesn't -have type `int`, because control doesn't reach the end of that arm -(`ret` is jumping out of the function). - -## Pattern matching - -Rust's `alt` construct is a generalized, cleaned-up version of C's -`switch` construct. You provide it with a value and a number of arms, -each labelled with a pattern, and it will execute the arm that matches -the value. - - # let my_number = 1; - alt my_number { - 0 { std::io::println("zero"); } - 1 | 2 { std::io::println("one or two"); } - 3 to 10 { std::io::println("three to ten"); } - _ { std::io::println("something else"); } - } - -There is no 'falling through' between arms, as in C—only one arm is -executed, and it doesn't have to explicitly `break` out of the -construct when it is finished. - -The part to the left of each arm is called the pattern. Literals are -valid patterns, and will match only their own value. The pipe operator -(`|`) can be used to assign multiple patterns to a single arm. Ranges -of numeric literal patterns can be expressed with `to`. The underscore -(`_`) is a wildcard pattern that matches everything. - -If the arm with the wildcard pattern was left off in the above -example, running it on a number greater than ten (or negative) would -cause a run-time failure. When no arm matches, `alt` constructs do not -silently fall through—they blow up instead. - -A powerful application of pattern matching is *destructuring*, where -you use the matching to get at the contents of data types. Remember -that `(float, float)` is a tuple of two floats: - - fn angle(vec: (float, float)) -> float { - alt vec { - (0f, y) if y < 0f { 1.5 * float::consts::pi } - (0f, y) { 0.5 * float::consts::pi } - (x, y) { float::atan(y / x) } - } - } - -A variable name in a pattern matches everything, *and* binds that name -to the value of the matched thing inside of the arm block. Thus, `(0f, -y)` matches any tuple whose first element is zero, and binds `y` to -the second element. `(x, y)` matches any tuple, and binds both -elements to a variable. - -Any `alt` arm can have a guard clause (written `if EXPR`), which is -an expression of type `bool` that determines, after the pattern is -found to match, whether the arm is taken or not. The variables bound -by the pattern are available in this guard expression. - -## Destructuring let - -To a limited extent, it is possible to use destructuring patterns when -declaring a variable with `let`. For example, you can say this to -extract the fields from a tuple: - - # fn get_tuple_of_two_ints() -> (int, int) { (1, 1) } - let (a, b) = get_tuple_of_two_ints(); - -This will introduce two new variables, `a` and `b`, bound to the -content of the tuple. - -You may only use irrevocable patterns—patterns that can never fail to -match—in let bindings, though. Things like literals, which only match -a specific value, are not allowed. - -## Loops - -`while` produces a loop that runs as long as its given condition -(which must have type `bool`) evaluates to true. Inside a loop, the -keyword `break` can be used to abort the loop, and `cont` can be used -to abort the current iteration and continue with the next. - - let x = 5; - while true { - x += x - 3; - if x % 5 == 0 { break; } - std::io::println(int::str(x)); - } - -This code prints out a weird sequence of numbers and stops as soon as -it finds one that can be divided by five. - -There's also `while`'s ugly cousin, `do`/`while`, which does not check -its condition on the first iteration, using traditional syntax: - - # fn eat_cake() {} - # fn any_cake_left() -> bool { false } - do { - eat_cake(); - } while any_cake_left(); - -When iterating over a vector, use `for` instead. - - for elt in ["red", "green", "blue"] { - std::io::println(elt); - } - -This will go over each element in the given vector (a three-element -vector of strings, in this case), and repeatedly execute the body with -`elt` bound to the current element. You may add an optional type -declaration (`elt: str`) for the iteration variable if you want. - -For more involved iteration, such as going over the elements of a hash -table, Rust uses higher-order functions. We'll come back to those in a -moment. - -## Failure - -The `fail` keyword causes the current [task][tasks] to fail. You use -it to indicate unexpected failure, much like you'd use `exit(1)` in a -C program, except that in Rust, it is possible for other tasks to -handle the failure, allowing the program to continue running. - -`fail` takes an optional argument, which must have type `str`. Trying -to access a vector out of bounds, or running a pattern match with no -matching clauses, both result in the equivalent of a `fail`. - -[tasks]: task.html - -## Logging - -Rust has a built-in logging mechanism, using the `log` statement. -Logging is polymorphic—any type of value can be logged, and the -runtime will do its best to output a textual representation of the -value. - - log(warn, "hi"); - log(error, (1, [2.5, -1.8])); - -The first argument is the log level (levels `info`, `warn`, and -`error` are predefined), and the second is the value to log. By -default, you *will not* see the output of that first log statement, -which has `warn` level. The environment variable `RUST_LOG` controls -which log level is used. It can contain a comma-separated list of -paths for modules that should be logged. For example, running `rustc` -with `RUST_LOG=rustc::front::attr` will turn on logging in its -attribute parser. If you compile a program named `foo.rs`, its -top-level module will be called `foo`, and you can set `RUST_LOG` to -`foo` to enable `warn` and `info` logging for the module. - -Turned-off `log` statements impose minimal overhead on the code that -contains them, so except in code that needs to be really, really fast, -you should feel free to scatter around debug logging statements, and -leave them in. - -Three macros that combine text-formatting (as with `#fmt`) and logging -are available. These take a string and any number of format arguments, -and will log the formatted string: - - # fn get_error_string() -> str { "boo" } - #warn("only %d seconds remaining", 10); - #error("fatal: %s", get_error_string()); - -## Assertions - -The keyword `assert`, followed by an expression with boolean type, -will check that the given expression results in `true`, and cause a -failure otherwise. It is typically used to double-check things that -*should* hold at a certain point in a program. - - let x = 100; - while (x > 10) { x -= 10; } - assert x == 10; diff --git a/doc/tutorial/data.md b/doc/tutorial/data.md deleted file mode 100644 index 79a0cb85ac5..00000000000 --- a/doc/tutorial/data.md +++ /dev/null @@ -1,334 +0,0 @@ -# Datatypes - -Rust datatypes are, by default, immutable. The core datatypes of Rust -are structural records and 'enums' (tagged unions, algebraic data -types). - - type point = {x: float, y: float}; - enum shape { - circle(point, float); - rectangle(point, point); - } - let my_shape = circle({x: 0.0, y: 0.0}, 10.0); - -## Records - -Rust record types are written `{field1: TYPE, field2: TYPE [, ...]}`, -and record literals are written in the same way, but with expressions -instead of types. They are quite similar to C structs, and even laid -out the same way in memory (so you can read from a Rust struct in C, -and vice-versa). - -The dot operator is used to access record fields (`mypoint.x`). - -Fields that you want to mutate must be explicitly marked as such. For -example... - - type stack = {content: [int], mutable head: uint}; - -With such a type, you can do `mystack.head += 1u`. If `mutable` were -omitted from the type, such an assignment would result in a type -error. - -To 'update' an immutable record, you use functional record update -syntax, by ending a record literal with the keyword `with`: - - let oldpoint = {x: 10f, y: 20f}; - let newpoint = {x: 0f with oldpoint}; - assert newpoint == {x: 0f, y: 20f}; - -This will create a new struct, copying all the fields from `oldpoint` -into it, except for the ones that are explicitly set in the literal. - -Rust record types are *structural*. This means that `{x: float, y: -float}` is not just a way to define a new type, but is the actual name -of the type. Record types can be used without first defining them. If -module A defines `type point = {x: float, y: float}`, and module B, -without knowing anything about A, defines a function that returns an -`{x: float, y: float}`, you can use that return value as a `point` in -module A. (Remember that `type` defines an additional name for a type, -not an actual new type.) - -## Record patterns - -Records can be destructured on in `alt` patterns. The basic syntax is -`{fieldname: pattern, ...}`, but the pattern for a field can be -omitted as a shorthand for simply binding the variable with the same -name as the field. - - # let mypoint = {x: 0f, y: 0f}; - alt mypoint { - {x: 0f, y: y_name} { /* Provide sub-patterns for fields */ } - {x, y} { /* Simply bind the fields */ } - } - -The field names of a record do not have to appear in a pattern in the -same order they appear in the type. When you are not interested in all -the fields of a record, a record pattern may end with `, _` (as in -`{field1, _}`) to indicate that you're ignoring all other fields. - -## Enums - -Enums are datatypes that have several different representations. For -example, the type shown earlier: - - # type point = {x: float, y: float}; - enum shape { - circle(point, float); - rectangle(point, point); - } - -A value of this type is either a circle¸ in which case it contains a -point record and a float, or a rectangle, in which case it contains -two point records. The run-time representation of such a value -includes an identifier of the actual form that it holds, much like the -'tagged union' pattern in C, but with better ergonomics. - -The above declaration will define a type `shape` that can be used to -refer to such shapes, and two functions, `circle` and `rectangle`, -which can be used to construct values of the type (taking arguments of -the specified types). So `circle({x: 0f, y: 0f}, 10f)` is the way to -create a new circle. - -Enum variants do not have to have parameters. This, for example, is -equivalent to a C enum: - - enum direction { - north; - east; - south; - west; - } - -This will define `north`, `east`, `south`, and `west` as constants, -all of which have type `direction`. - -When the enum is C like, that is none of the variants have parameters, -it is possible to explicitly set the discriminator values to an integer -value: - - enum color { - red = 0xff0000; - green = 0x00ff00; - blue = 0x0000ff; - } - -If an explicit discriminator is not specified for a variant, the value -defaults to the value of the previous variant plus one. If the first -variant does not have a discriminator, it defaults to 0. For example, -the value of `north` is 0, `east` is 1, etc. - -When an enum is C-like the `as` cast operator can be used to get the -discriminator's value. - -<a name="single_variant_enum"></a> - -There is a special case for enums with a single variant. These are -used to define new types in such a way that the new name is not just a -synonym for an existing type, but its own distinct type. If you say: - - enum gizmo_id = int; - -That is a shorthand for this: - - enum gizmo_id { gizmo_id(int); } - -Enum types like this can have their content extracted with the -dereference (`*`) unary operator: - - # enum gizmo_id = int; - let my_gizmo_id = gizmo_id(10); - let id_int: int = *my_gizmo_id; - -## Enum patterns - -For enum types with multiple variants, destructuring is the only way to -get at their contents. All variant constructors can be used as -patterns, as in this definition of `area`: - - # type point = {x: float, y: float}; - # enum shape { circle(point, float); rectangle(point, point); } - fn area(sh: shape) -> float { - alt sh { - circle(_, size) { float::consts::pi * size * size } - rectangle({x, y}, {x: x2, y: y2}) { (x2 - x) * (y2 - y) } - } - } - -Another example: - - # type point = {x: float, y: float}; - # enum direction { north; east; south; west; } - fn point_from_direction(dir: direction) -> point { - alt dir { - north { {x: 0f, y: 1f} } - east { {x: 1f, y: 0f} } - south { {x: 0f, y: -1f} } - west { {x: -1f, y: 0f} } - } - } - -## Tuples - -Tuples in Rust behave exactly like records, except that their fields -do not have names (and can thus not be accessed with dot notation). -Tuples can have any arity except for 0 or 1 (though you may see nil, -`()`, as the empty tuple if you like). - - let mytup: (int, int, float) = (10, 20, 30.0); - alt mytup { - (a, b, c) { log(info, a + b + (c as int)); } - } - -## Pointers - -In contrast to a lot of modern languages, record and enum types in -Rust are not represented as pointers to allocated memory. They are, -like in C and C++, represented directly. This means that if you `let x -= {x: 1f, y: 1f};`, you are creating a record on the stack. If you -then copy it into a data structure, the whole record is copied, not -just a pointer. - -For small records like `point`, this is usually more efficient than -allocating memory and going through a pointer. But for big records, or -records with mutable fields, it can be useful to have a single copy on -the heap, and refer to that through a pointer. - -Rust supports several types of pointers. The simplest is the unsafe -pointer, written `*TYPE`, which is a completely unchecked pointer -type only used in unsafe code (and thus, in typical Rust code, very -rarely). The safe pointer types are `@TYPE` for shared, -reference-counted boxes, and `~TYPE`, for uniquely-owned pointers. - -All pointer types can be dereferenced with the `*` unary operator. - -### Shared boxes - -<a name="shared-box"></a> - -Shared boxes are pointers to heap-allocated, reference counted memory. -A cycle collector ensures that circular references do not result in -memory leaks. - -Creating a shared box is done by simply applying the unary `@` -operator to an expression. The result of the expression will be boxed, -resulting in a box of the right type. For example: - - let x = @10; // New box, refcount of 1 - let y = x; // Copy the pointer, increase refcount - // When x and y go out of scope, refcount goes to 0, box is freed - -NOTE: We may in the future switch to garbage collection, rather than -reference counting, for shared boxes. - -Shared boxes never cross task boundaries. - -### Unique boxes - -<a name="unique-box"></a> - -In contrast to shared boxes, unique boxes are not reference counted. -Instead, it is statically guaranteed that only a single owner of the -box exists at any time. - - let x = ~10; - let y <- x; - -This is where the 'move' (`<-`) operator comes in. It is similar to -`=`, but it de-initializes its source. Thus, the unique box can move -from `x` to `y`, without violating the constraint that it only has a -single owner (if you used assignment instead of the move operator, the -box would, in principle, be copied). - -Unique boxes, when they do not contain any shared boxes, can be sent -to other tasks. The sending task will give up ownership of the box, -and won't be able to access it afterwards. The receiving task will -become the sole owner of the box. - -### Mutability - -All pointer types have a mutable variant, written `@mutable TYPE` or -`~mutable TYPE`. Given such a pointer, you can write to its contents -by combining the dereference operator with a mutating action. - - fn increase_contents(pt: @mutable int) { - *pt += 1; - } - -## Vectors - -Rust vectors are always heap-allocated and unique. A value of type -`[TYPE]` is represented by a pointer to a section of heap memory -containing any number of `TYPE` values. - -NOTE: This uniqueness is turning out to be quite awkward in practice, -and might change in the future. - -Vector literals are enclosed in square brackets. Dereferencing is done -with square brackets (zero-based): - - let myvec = [true, false, true, false]; - if myvec[1] { std::io::println("boom"); } - -By default, vectors are immutable—you can not replace their elements. -The type written as `[mutable TYPE]` is a vector with mutable -elements. Mutable vector literals are written `[mutable]` (empty) or -`[mutable 1, 2, 3]` (with elements). - -The `+` operator means concatenation when applied to vector types. -Growing a vector in Rust is not as inefficient as it looks : - - let myvec = [], i = 0; - while i < 100 { - myvec += [i]; - i += 1; - } - -Because a vector is unique, replacing it with a longer one (which is -what `+= [i]` does) is indistinguishable from appending to it -in-place. Vector representations are optimized to grow -logarithmically, so the above code generates about the same amount of -copying and reallocation as `push` implementations in most other -languages. - -## Strings - -The `str` type in Rust is represented exactly the same way as a vector -of bytes (`[u8]`), except that it is guaranteed to have a trailing -null byte (for interoperability with C APIs). - -This sequence of bytes is interpreted as an UTF-8 encoded sequence of -characters. This has the advantage that UTF-8 encoded I/O (which -should really be the default for modern systems) is very fast, and -that strings have, for most intents and purposes, a nicely compact -representation. It has the disadvantage that you only get -constant-time access by byte, not by character. - -A lot of algorithms don't need constant-time indexed access (they -iterate over all characters, which `str::chars` helps with), and -for those that do, many don't need actual characters, and can operate -on bytes. For algorithms that do really need to index by character, -there's the option to convert your string to a character vector (using -`str::to_chars`). - -Like vectors, strings are always unique. You can wrap them in a shared -box to share them. Unlike vectors, there is no mutable variant of -strings. They are always immutable. - -## Resources - -Resources are data types that have a destructor associated with them. - - # fn close_file_desc(x: int) {} - resource file_desc(fd: int) { - close_file_desc(fd); - } - -This defines a type `file_desc` and a constructor of the same name, -which takes an integer. Values of such a type can not be copied, and -when they are destroyed (by going out of scope, or, when boxed, when -their box is cleaned up), their body runs. In the example above, this -would cause the given file descriptor to be closed. - -NOTE: We're considering alternative approaches for data types with -destructors. Resources might go away in the future. diff --git a/doc/tutorial/extract.js b/doc/tutorial/extract.js deleted file mode 100644 index e8461967545..00000000000 --- a/doc/tutorial/extract.js +++ /dev/null @@ -1,42 +0,0 @@ -var fs = require("fs"), md = require("./lib/markdown"); - -// Runs markdown.js over the tutorial, to find the code blocks in it. -// Uses the #-markers in those code blocks, along with some vague -// heuristics, to turn them into compilable files. Outputs these files -// to fragments/. -// -// '##ignore' means don't test this block -// '##notrust' means the block isn't rust code -// (used by build.js to not highlight it) -// '# code' means insert the given code to complete the fragment -// (build.js strips out such lines) - -var curFile, curFrag; -md.Markdown.dialects.Maruku.block.code = function code(block, next) { - if (block.match(/^ /)) { - var ignore, text = String(block); - while (next.length && next[0].match(/^ /)) text += "\n" + String(next.shift()); - text = text.split("\n").map(function(line) { - line = line.slice(4); - if (line == "## ignore" || line == "## notrust") { ignore = true; line = ""; } - if (/^# /.test(line)) line = line.slice(2); - return line; - }).join("\n"); - if (ignore) return; - if (!/\bfn main\b/.test(text)) { - if (/(^|\n) *(native|use|mod|import|export)\b/.test(text)) - text += "\nfn main() {}\n"; - else text = "fn main() {\n" + text + "\n}\n"; - } - if (!/\buse std\b/.test(text)) text = "use std;\n" + text; - fs.writeFileSync("fragments/" + curFile + "_" + (++curFrag) + ".rs", text); - } -}; - -fs.readFileSync("order", "utf8").split("\n").filter(id).forEach(handle); - -function id(x) { return x; } -function handle(file) { - curFile = file; curFrag = 0; - md.parse(fs.readFileSync(file + ".md", "utf8"), "Maruku"); -} diff --git a/doc/tutorial/ffi.md b/doc/tutorial/ffi.md deleted file mode 100644 index 5b6d72933c9..00000000000 --- a/doc/tutorial/ffi.md +++ /dev/null @@ -1,209 +0,0 @@ -# Interacting with foreign code - -One of Rust's aims, as a system programming language, is to -interoperate well with C code. - -We'll start with an example. It's a bit bigger than usual, and -contains a number of new concepts. We'll go over it one piece at a -time. - -This is a program that uses OpenSSL's `SHA1` function to compute the -hash of its first command-line argument, which it then converts to a -hexadecimal string and prints to standard output. If you have the -OpenSSL libraries installed, it should 'just work'. - - use std; - - native mod crypto { - fn SHA1(src: *u8, sz: uint, out: *u8) -> *u8; - } - - fn as_hex(data: [u8]) -> str { - let acc = ""; - for byte in data { acc += #fmt("%02x", byte as uint); } - ret acc; - } - - fn sha1(data: str) -> str unsafe { - let bytes = str::bytes(data); - let hash = crypto::SHA1(vec::unsafe::to_ptr(bytes), - vec::len(bytes), ptr::null()); - ret as_hex(vec::unsafe::from_buf(hash, 20u)); - } - - fn main(args: [str]) { - std::io::println(sha1(args[1])); - } - -## Native modules - -Before we can call `SHA1`, we have to declare it. That is what this -part of the program is responsible for: - - native mod crypto { - fn SHA1(src: *u8, sz: uint, out: *u8) -> *u8; - } - -A `native` module declaration tells the compiler that the program -should be linked with a library by that name, and that the given list -of functions are available in that library. - -In this case, it'll change the name `crypto` to a shared library name -in a platform-specific way (`libcrypto.so` on Linux, for example), and -link that in. If you want the module to have a different name from the -actual library, you can use the `"link_name"` attribute, like: - - #[link_name = "crypto"] - native mod something { - fn SHA1(src: *u8, sz: uint, out: *u8) -> *u8; - } - -## Native calling conventions - -Most native C code use the cdecl calling convention, so that is what -Rust uses by default when calling native functions. Some native functions, -most notably the Windows API, use other calling conventions, so Rust -provides a way to to hint to the compiler which is expected by using -the `"abi"` attribute: - - #[cfg(target_os = "win32")] - #[abi = "stdcall"] - native mod kernel32 { - fn SetEnvironmentVariableA(n: *u8, v: *u8) -> int; - } - -The `"abi"` attribute applies to a native mod (it can not be applied -to a single function within a module), and must be either `"cdecl"` -or `"stdcall"`. Other conventions may be defined in the future. - -## Unsafe pointers - -The native `SHA1` function is declared to take three arguments, and -return a pointer. - - # native mod crypto { - fn SHA1(src: *u8, sz: uint, out: *u8) -> *u8; - # } - -When declaring the argument types to a foreign function, the Rust -compiler has no way to check whether your declaration is correct, so -you have to be careful. If you get the number or types of the -arguments wrong, you're likely to get a segmentation fault. Or, -probably even worse, your code will work on one platform, but break on -another. - -In this case, `SHA1` is defined as taking two `unsigned char*` -arguments and one `unsigned long`. The rust equivalents are `*u8` -unsafe pointers and an `uint` (which, like `unsigned long`, is a -machine-word-sized type). - -Unsafe pointers can be created through various functions in the -standard lib, usually with `unsafe` somewhere in their name. You can -dereference an unsafe pointer with `*` operator, but use -caution—unlike Rust's other pointer types, unsafe pointers are -completely unmanaged, so they might point at invalid memory, or be -null pointers. - -## Unsafe blocks - -The `sha1` function is the most obscure part of the program. - - # mod crypto { fn SHA1(src: *u8, sz: uint, out: *u8) -> *u8 { out } } - # fn as_hex(data: [u8]) -> str { "hi" } - fn sha1(data: str) -> str unsafe { - let bytes = str::bytes(data); - let hash = crypto::SHA1(vec::unsafe::to_ptr(bytes), - vec::len(bytes), ptr::null()); - ret as_hex(vec::unsafe::from_buf(hash, 20u)); - } - -Firstly, what does the `unsafe` keyword at the top of the function -mean? `unsafe` is a block modifier—it declares the block following it -to be known to be unsafe. - -Some operations, like dereferencing unsafe pointers or calling -functions that have been marked unsafe, are only allowed inside unsafe -blocks. With the `unsafe` keyword, you're telling the compiler 'I know -what I'm doing'. The main motivation for such an annotation is that -when you have a memory error (and you will, if you're using unsafe -constructs), you have some idea where to look—it will most likely be -caused by some unsafe code. - -Unsafe blocks isolate unsafety. Unsafe functions, on the other hand, -advertise it to the world. An unsafe function is written like this: - - unsafe fn kaboom() { "I'm harmless!"; } - -This function can only be called from an unsafe block or another -unsafe function. - -## Pointer fiddling - -The standard library defines a number of helper functions for dealing -with unsafe data, casting between types, and generally subverting -Rust's safety mechanisms. - -Let's look at our `sha1` function again. - - # mod crypto { fn SHA1(src: *u8, sz: uint, out: *u8) -> *u8 { out } } - # fn as_hex(data: [u8]) -> str { "hi" } - # fn x(data: str) -> str unsafe { - let bytes = str::bytes(data); - let hash = crypto::SHA1(vec::unsafe::to_ptr(bytes), - vec::len(bytes), ptr::null()); - ret as_hex(vec::unsafe::from_buf(hash, 20u)); - # } - -The `str::bytes` function is perfectly safe, it converts a string to -an `[u8]`. This byte array is then fed to `vec::unsafe::to_ptr`, which -returns an unsafe pointer to its contents. - -This pointer will become invalid as soon as the vector it points into -is cleaned up, so you should be very careful how you use it. In this -case, the local variable `bytes` outlives the pointer, so we're good. - -Passing a null pointer as third argument to `SHA1` causes it to use a -static buffer, and thus save us the effort of allocating memory -ourselves. `ptr::null` is a generic function that will return an -unsafe null pointer of the correct type (Rust generics are awesome -like that—they can take the right form depending on the type that they -are expected to return). - -Finally, `vec::unsafe::from_buf` builds up a new `[u8]` from the -unsafe pointer that was returned by `SHA1`. SHA1 digests are always -twenty bytes long, so we can pass `20u` for the length of the new -vector. - -## Passing structures - -C functions often take pointers to structs as arguments. Since Rust -records are binary-compatible with C structs, Rust programs can call -such functions directly. - -This program uses the Posix function `gettimeofday` to get a -microsecond-resolution timer. - - use std; - type timeval = {mutable tv_sec: u32, - mutable tv_usec: u32}; - #[nolink] - native mod libc { - fn gettimeofday(tv: *timeval, tz: *()) -> i32; - } - fn unix_time_in_microseconds() -> u64 unsafe { - let x = {mutable tv_sec: 0u32, mutable tv_usec: 0u32}; - libc::gettimeofday(ptr::addr_of(x), ptr::null()); - ret (x.tv_sec as u64) * 1000_000_u64 + (x.tv_usec as u64); - } - -The `#[nolink]` attribute indicates that there's no native library to link -in. The standard C library is already linked with Rust programs. - -A `timeval`, in C, is a struct with two 32-bit integers. Thus, we -define a record type with the same contents, and declare -`gettimeofday` to take a pointer to such a record. - -The second argument to `gettimeofday` (the time zone) is not used by -this program, so it simply declares it to be a pointer to the nil -type. Since null pointer look the same, no matter which type they are -supposed to point at, this is safe. diff --git a/doc/tutorial/func.md b/doc/tutorial/func.md deleted file mode 100644 index 08744a68dd8..00000000000 --- a/doc/tutorial/func.md +++ /dev/null @@ -1,174 +0,0 @@ -# Functions - -Functions (like all other static declarations, such as `type`) can be -declared both at the top level and inside other functions (or modules, -which we'll come back to in moment). - -The `ret` keyword immediately returns from a function. It is -optionally followed by an expression to return. In functions that -return `()`, the returned expression can be left off. A function can -also return a value by having its top level block produce an -expression (by omitting the final semicolon). - -Some functions (such as the C function `exit`) never return normally. -In Rust, these are annotated with the pseudo-return type '`!`': - - fn dead_end() -> ! { fail; } - -This helps the compiler avoid spurious error messages. For example, -the following code would be a type error if `dead_end` would be -expected to return. - - # fn can_go_left() -> bool { true } - # fn can_go_right() -> bool { true } - # enum dir { left; right; } - # fn dead_end() -> ! { fail; } - let dir = if can_go_left() { left } - else if can_go_right() { right } - else { dead_end(); }; - -## Closures - -Named functions, like those in the previous section, do not close over -their environment. Rust also includes support for closures, which are -functions that can access variables in the scope in which they are -created. - -There are several forms of closures, each with its own role. The most -common type is called a 'block', this is a closure which has full -access to its environment. - - fn call_block_with_ten(b: block(int)) { b(10); } - - let x = 20; - call_block_with_ten({|arg| - #info("x=%d, arg=%d", x, arg); - }); - -This defines a function that accepts a block, and then calls it with a -simple block that executes a log statement, accessing both its -argument and the variable `x` from its environment. - -Blocks can only be used in a restricted way, because it is not allowed -to survive the scope in which it was created. They are allowed to -appear in function argument position and in call position, but nowhere -else. - -### Boxed closures - -When you need to store a closure in a data structure, a block will not -do, since the compiler will refuse to let you store it. For this -purpose, Rust provides a type of closure that has an arbitrary -lifetime, written `fn@` (boxed closure, analogous to the `@` pointer -type described in the next section). - -A boxed closure does not directly access its environment, but merely -copies out the values that it closes over into a private data -structure. This means that it can not assign to these variables, and -will not 'see' updates to them. - -This code creates a closure that adds a given string to its argument, -returns it from a function, and then calls it: - - use std; - - fn mk_appender(suffix: str) -> fn@(str) -> str { - let f = fn@(s: str) -> str { s + suffix }; - ret f; - } - - fn main() { - let shout = mk_appender("!"); - std::io::println(shout("hey ho, let's go")); - } - -### Closure compatibility - -A nice property of Rust closures is that you can pass any kind of -closure (as long as the arguments and return types match) to functions -that expect a `block`. Thus, when writing a higher-order function that -wants to do nothing with its function argument beyond calling it, you -should almost always specify the type of that argument as `block`, so -that callers have the flexibility to pass whatever they want. - - fn call_twice(f: block()) { f(); f(); } - call_twice({|| "I am a block"; }); - call_twice(fn@() { "I am a boxed closure"; }); - fn bare_function() { "I am a plain function"; } - call_twice(bare_function); - -### Unique closures - -<a name="unique"></a> - -Unique closures, written `fn~` in analogy to the `~` pointer type (see -next section), hold on to things that can safely be sent between -processes. They copy the values they close over, much like boxed -closures, but they also 'own' them—meaning no other code can access -them. Unique closures mostly exist to for spawning new -[tasks](task.html). - -### Shorthand syntax - -The compact syntax used for blocks (`{|arg1, arg2| body}`) can also -be used to express boxed and unique closures in situations where the -closure style can be unambiguously derived from the context. Most -notably, when calling a higher-order function you do not have to use -the long-hand syntax for the function you're passing, since the -compiler can look at the argument type to find out what the parameter -types are. - -As a further simplification, if the final parameter to a function is a -closure, the closure need not be placed within parenthesis. You could, -for example, write... - - let doubled = vec::map([1, 2, 3]) {|x| x*2}; - -`vec::map` is a function in the core library that applies its last -argument to every element of a vector, producing a new vector. - -Even when a closure takes no parameters, you must still write the bars -for the parameter list, as in `{|| ...}`. - -## Binding - -Partial application is done using the `bind` keyword in Rust. - - let daynum = bind vec::position(_, ["mo", "tu", "we", "do", - "fr", "sa", "su"]); - -Binding a function produces a boxed closure (`fn@` type) in which some -of the arguments to the bound function have already been provided. -`daynum` will be a function taking a single string argument, and -returning the day of the week that string corresponds to (if any). - -## Iteration - -Functions taking blocks provide a good way to define non-trivial -iteration constructs. For example, this one iterates over a vector -of integers backwards: - - fn for_rev(v: [int], act: block(int)) { - let i = vec::len(v); - while (i > 0u) { - i -= 1u; - act(v[i]); - } - } - -To run such an iteration, you could do this: - - # fn for_rev(v: [int], act: block(int)) {} - for_rev([1, 2, 3], {|n| log(error, n); }); - -Making use of the shorthand where a final closure argument can be -moved outside of the parentheses permits the following, which -looks quite like a normal loop: - - # fn for_rev(v: [int], act: block(int)) {} - for_rev([1, 2, 3]) {|n| - log(error, n); - } - -Note that, because `for_rev()` returns unit type, no semicolon is -needed when the final closure is pulled outside of the parentheses. diff --git a/doc/tutorial/generic.md b/doc/tutorial/generic.md deleted file mode 100644 index 2dc8d5684f3..00000000000 --- a/doc/tutorial/generic.md +++ /dev/null @@ -1,146 +0,0 @@ -# Generics - -## Generic functions - -Throughout this tutorial, I've been defining functions like `for_rev` -that act only on integers. It is 2012, and we no longer expect to be -defining such functions again and again for every type they apply to. -Thus, Rust allows functions and datatypes to have type parameters. - - fn for_rev<T>(v: [T], act: block(T)) { - let i = vec::len(v); - while i > 0u { - i -= 1u; - act(v[i]); - } - } - - fn map<T, U>(v: [T], f: block(T) -> U) -> [U] { - let acc = []; - for elt in v { acc += [f(elt)]; } - ret acc; - } - -When defined in this way, these functions can be applied to any type -of vector, as long as the type of the block's argument and the type of -the vector's content agree with each other. - -Inside a parameterized (generic) function, the names of the type -parameters (capitalized by convention) stand for opaque types. You -can't look inside them, but you can pass them around. - -## Generic datatypes - -Generic `type` and `enum` declarations follow the same pattern: - - type circular_buf<T> = {start: uint, - end: uint, - buf: [mutable T]}; - - enum option<T> { some(T); none; } - -You can then declare a function to take a `circular_buf<u8>` or return -an `option<str>`, or even an `option<T>` if the function itself is -generic. - -The `option` type given above exists in the core library as -`option::t`, and is the way Rust programs express the thing that in C -would be a nullable pointer. The nice part is that you have to -explicitly unpack an `option` type, so accidental null pointer -dereferences become impossible. - -## Type-inference and generics - -Rust's type inferrer works very well with generics, but there are -programs that just can't be typed. - - let n = option::none; - # n = option::some(1); - -If you never do anything else with `n`, the compiler will not be able -to assign a type to it. (The same goes for `[]`, the empty vector.) If -you really want to have such a statement, you'll have to write it like -this: - - let n2: option::t<int> = option::none; - // or - let n = option::none::<int>; - -Note that, in a value expression, `<` already has a meaning as a -comparison operator, so you'll have to write `::<T>` to explicitly -give a type to a name that denotes a generic value. Fortunately, this -is rarely necessary. - -## Polymorphic built-ins - -There are two built-in operations that, perhaps surprisingly, act on -values of any type. It was already mentioned earlier that `log` can -take any type of value and output it. - -More interesting is that Rust also defines an ordering for values of -all datatypes, and allows you to meaningfully apply comparison -operators (`<`, `>`, `<=`, `>=`, `==`, `!=`) to them. For structural -types, the comparison happens left to right, so `"abc" < "bac"` (but -note that `"bac" < "ác"`, because the ordering acts on UTF-8 sequences -without any sophistication). - -## Kinds - -<a name="kind"></a> - -Perhaps surprisingly, the 'copy' (duplicate) operation is not defined -for all Rust types. Resource types (types with destructors) can not be -copied, and neither can any type whose copying would require copying a -resource (such as records or unique boxes containing a resource). - -This complicates handling of generic functions. If you have a type -parameter `T`, can you copy values of that type? In Rust, you can't, -unless you explicitly declare that type parameter to have copyable -'kind'. A kind is a type of type. - - ## ignore - // This does not compile - fn head_bad<T>(v: [T]) -> T { v[0] } - // This does - fn head<T: copy>(v: [T]) -> T { v[0] } - -When instantiating a generic function, you can only instantiate it -with types that fit its kinds. So you could not apply `head` to a -resource type. - -Rust has three kinds: 'noncopyable', 'copyable', and 'sendable'. By -default, type parameters are considered to be noncopyable. You can -annotate them with the `copy` keyword to declare them copyable, and -with the `send` keyword to make them sendable. - -Sendable types are a subset of copyable types. They are types that do -not contain shared (reference counted) types, which are thus uniquely -owned by the function that owns them, and can be sent over channels to -other tasks. Most of the generic functions in the core `comm` module -take sendable types. - -## Generic functions and argument-passing - -The previous section mentioned that arguments are passed by pointer or -by value based on their type. There is one situation in which this is -difficult. If you try this program: - - # fn map(f: block(int) -> int, v: [int]) {} - fn plus1(x: int) -> int { x + 1 } - map(plus1, [1, 2, 3]); - -You will get an error message about argument passing styles -disagreeing. The reason is that generic types are always passed by -pointer, so `map` expects a function that takes its argument by -pointer. The `plus1` you defined, however, uses the default, efficient -way to pass integers, which is by value. To get around this issue, you -have to explicitly mark the arguments to a function that you want to -pass to a generic higher-order function as being passed by pointer, -using the `&&` sigil: - - # fn map<T, U>(f: block(T) -> U, v: [T]) {} - fn plus1(&&x: int) -> int { x + 1 } - map(plus1, [1, 2, 3]); - -NOTE: This is inconvenient, and we are hoping to get rid of this -restriction in the future. diff --git a/doc/tutorial/iface.md b/doc/tutorial/iface.md deleted file mode 100644 index c047cf06da5..00000000000 --- a/doc/tutorial/iface.md +++ /dev/null @@ -1,183 +0,0 @@ -# Interfaces - -Interfaces are Rust's take on value polymorphism—the thing that -object-oriented languages tend to solve with methods and inheritance. -For example, writing a function that can operate on multiple types of -collections. - -NOTE: This feature is very new, and will need a few extensions to be -applicable to more advanced use cases. - -## Declaration - -An interface consists of a set of methods. A method is a function that -can be applied to a `self` value and a number of arguments, using the -dot notation: `self.foo(arg1, arg2)`. - -For example, we could declare the interface `to_str` for things that -can be converted to a string, with a single method of the same name: - - iface to_str { - fn to_str() -> str; - } - -## Implementation - -To actually implement an interface for a given type, the `impl` form -is used. This defines implementations of `to_str` for the `int` and -`str` types. - - # iface to_str { fn to_str() -> str; } - impl of to_str for int { - fn to_str() -> str { int::to_str(self, 10u) } - } - impl of to_str for str { - fn to_str() -> str { self } - } - -Given these, we may call `1.to_str()` to get `"1"`, or -`"foo".to_str()` to get `"foo"` again. This is basically a form of -static overloading—when the Rust compiler sees the `to_str` method -call, it looks for an implementation that matches the type with a -method that matches the name, and simply calls that. - -## Scoping - -Implementations are not globally visible. Resolving a method to an -implementation requires that implementation to be in scope. You can -import and export implementations using the name of the interface they -implement (multiple implementations with the same name can be in scope -without problems). Or you can give them an explicit name if you -prefer, using this syntax: - - # iface to_str { fn to_str() -> str; } - impl nil_to_str of to_str for () { - fn to_str() -> str { "()" } - } - -## Bounded type parameters - -The useful thing about value polymorphism is that it does not have to -be static. If object-oriented languages only let you call a method on -an object when they knew exactly which sub-type it had, that would not -get you very far. To be able to call methods on types that aren't -known at compile time, it is possible to specify 'bounds' for type -parameters. - - # iface to_str { fn to_str() -> str; } - fn comma_sep<T: to_str>(elts: [T]) -> str { - let result = "", first = true; - for elt in elts { - if first { first = false; } - else { result += ", "; } - result += elt.to_str(); - } - ret result; - } - -The syntax for this is similar to the syntax for specifying that a -parameter type has to be copyable (which is, in principle, another -kind of bound). By declaring `T` as conforming to the `to_str` -interface, it becomes possible to call methods from that interface on -values of that type inside the function. It will also cause a -compile-time error when anyone tries to call `comma_sep` on an array -whose element type does not have a `to_str` implementation in scope. - -## Polymorphic interfaces - -Interfaces may contain type parameters. This defines an interface for -generalized sequence types: - - iface seq<T> { - fn len() -> uint; - fn iter(block(T)); - } - impl <T> of seq<T> for [T] { - fn len() -> uint { vec::len(self) } - fn iter(b: block(T)) { - for elt in self { b(elt); } - } - } - -Note that the implementation has to explicitly declare the its -parameter `T` before using it to specify its interface type. This is -needed because it could also, for example, specify an implementation -of `seq<int>`—the `of` clause *refers* to a type, rather than defining -one. - -## Casting to an interface type - -The above allows us to define functions that polymorphically act on -values of *an* unknown type that conforms to a given interface. -However, consider this function: - - # iface drawable { fn draw(); } - fn draw_all<T: drawable>(shapes: [T]) { - for shape in shapes { shape.draw(); } - } - -You can call that on an array of circles, or an array of squares -(assuming those have suitable `drawable` interfaces defined), but not -on an array containing both circles and squares. - -When this is needed, an interface name can be used as a type, causing -the function to be written simply like this: - - # iface drawable { fn draw(); } - fn draw_all(shapes: [drawable]) { - for shape in shapes { shape.draw(); } - } - -There is no type parameter anymore (since there isn't a single type -that we're calling the function on). Instead, the `drawable` type is -used to refer to a type that is a reference-counted box containing a -value for which a `drawable` implementation exists, combined with -information on where to find the methods for this implementation. This -is very similar to the 'vtables' used in most object-oriented -languages. - -To construct such a value, you use the `as` operator to cast a value -to an interface type: - - # type circle = int; type rectangle = int; - # iface drawable { fn draw(); } - # impl of drawable for int { fn draw() {} } - # fn new_circle() -> int { 1 } - # fn new_rectangle() -> int { 2 } - # fn draw_all(shapes: [drawable]) {} - let c: circle = new_circle(); - let r: rectangle = new_rectangle(); - draw_all([c as drawable, r as drawable]); - -This will store the value into a box, along with information about the -implementation (which is looked up in the scope of the cast). The -`drawable` type simply refers to such boxes, and calling methods on it -always works, no matter what implementations are in scope. - -Note that the allocation of a box is somewhat more expensive than -simply using a type parameter and passing in the value as-is, and much -more expensive than statically resolved method calls. - -## Interface-less implementations - -If you only intend to use an implementation for static overloading, -and there is no interface available that it conforms to, you are free -to leave off the `of` clause. - - # type currency = (); - # fn mk_currency(x: int, s: str) {} - impl int_util for int { - fn times(b: block(int)) { - let i = 0; - while i < self { b(i); i += 1; } - } - fn dollars() -> currency { - mk_currency(self, "USD") - } - } - -This allows cutesy things like `send_payment(10.dollars())`. And the -nice thing is that it's fully scoped, so the uneasy feeling that -anybody with experience in object-oriented languages (with the -possible exception of Rubyists) gets at the sight of such things is -not justified. It's harmless! diff --git a/doc/tutorial/index.md b/doc/tutorial/index.md deleted file mode 100644 index d8c9be56568..00000000000 --- a/doc/tutorial/index.md +++ /dev/null @@ -1 +0,0 @@ -# Rust language tutorial diff --git a/doc/tutorial/intro.md b/doc/tutorial/intro.md deleted file mode 100644 index 61510b87675..00000000000 --- a/doc/tutorial/intro.md +++ /dev/null @@ -1,57 +0,0 @@ -# Introduction - -## Scope - -This is a tutorial for the Rust programming language. It assumes the -reader is familiar with the basic concepts of programming, and has -programmed in one or more other languages before. The tutorial covers -the whole language, though not with the depth and precision of the -[language reference][1]. - -[1]: http://www.rust-lang.org/doc/rust.html - -## Disclaimer - -Rust is a language under development. The general flavor of the -language has settled, but details will continue to change as it is -further refined. Nothing in this tutorial is final, and though we try -to keep it updated, it is possible that the text occasionally does not -reflect the actual state of the language. - -## First Impressions - -Though syntax is something you get used to, an initial encounter with -a language can be made easier if the notation looks familiar. Rust is -a curly-brace language in the tradition of C, C++, and JavaScript. - - fn fac(n: int) -> int { - let result = 1, i = 1; - while i <= n { - result *= i; - i += 1; - } - ret result; - } - -Several differences from C stand out. Types do not come before, but -after variable names (preceded by a colon). In local variables -(introduced with `let`), they are optional, and will be inferred when -left off. Constructs like `while` and `if` do not require parenthesis -around the condition (though they allow them). Also, there's a -tendency towards aggressive abbreviation in the keywords—`fn` for -function, `ret` for return. - -You should, however, not conclude that Rust is simply an evolution of -C. As will become clear in the rest of this tutorial, it goes in -quite a different direction. - -## Conventions - -Throughout the tutorial, words that indicate language keywords or -identifiers defined in the example code are displayed in `code font`. - -Code snippets are indented, and also shown in a monospace font. Not -all snippets constitute whole programs. For brevity, we'll often show -fragments of programs that don't compile on their own. To try them -out, you might have to wrap them in `fn main() { ... }`, and make sure -they don't contain references to things that aren't actually defined. diff --git a/doc/tutorial/lib/markdown.js b/doc/tutorial/lib/markdown.js deleted file mode 100644 index a5b1240fc70..00000000000 --- a/doc/tutorial/lib/markdown.js +++ /dev/null @@ -1,1469 +0,0 @@ -// Released under MIT license -// Copyright (c) 2009-2010 Dominic Baggott -// Copyright (c) 2009-2010 Ash Berlin -// Copyright (c) 2011 Christoph Dorn <christoph@christophdorn.com> (http://www.christophdorn.com) - -(function( expose ) { - -/** - * class Markdown - * - * Markdown processing in Javascript done right. We have very particular views - * on what constitutes 'right' which include: - * - * - produces well-formed HTML (this means that em and strong nesting is - * important) - * - * - has an intermediate representation to allow processing of parsed data (We - * in fact have two, both as [JsonML]: a markdown tree and an HTML tree). - * - * - is easily extensible to add new dialects without having to rewrite the - * entire parsing mechanics - * - * - has a good test suite - * - * This implementation fulfills all of these (except that the test suite could - * do with expanding to automatically run all the fixtures from other Markdown - * implementations.) - * - * ##### Intermediate Representation - * - * *TODO* Talk about this :) Its JsonML, but document the node names we use. - * - * [JsonML]: http://jsonml.org/ "JSON Markup Language" - **/ -var Markdown = expose.Markdown = function Markdown(dialect) { - switch (typeof dialect) { - case "undefined": - this.dialect = Markdown.dialects.Gruber; - break; - case "object": - this.dialect = dialect; - break; - default: - if (dialect in Markdown.dialects) { - this.dialect = Markdown.dialects[dialect]; - } - else { - throw new Error("Unknown Markdown dialect '" + String(dialect) + "'"); - } - break; - } - this.em_state = []; - this.strong_state = []; - this.debug_indent = ""; -} - -/** - * parse( markdown, [dialect] ) -> JsonML - * - markdown (String): markdown string to parse - * - dialect (String | Dialect): the dialect to use, defaults to gruber - * - * Parse `markdown` and return a markdown document as a Markdown.JsonML tree. - **/ -expose.parse = function( source, dialect ) { - // dialect will default if undefined - var md = new Markdown( dialect ); - return md.toTree( source ); -} - -/** - * toHTML( markdown, [dialect] ) -> String - * toHTML( md_tree ) -> String - * - markdown (String): markdown string to parse - * - md_tree (Markdown.JsonML): parsed markdown tree - * - * Take markdown (either as a string or as a JsonML tree) and run it through - * [[toHTMLTree]] then turn it into a well-formated HTML fragment. - **/ -expose.toHTML = function toHTML( source , dialect , options ) { - var input = expose.toHTMLTree( source , dialect , options ); - - return expose.renderJsonML( input ); -} - -/** - * toHTMLTree( markdown, [dialect] ) -> JsonML - * toHTMLTree( md_tree ) -> JsonML - * - markdown (String): markdown string to parse - * - dialect (String | Dialect): the dialect to use, defaults to gruber - * - md_tree (Markdown.JsonML): parsed markdown tree - * - * Turn markdown into HTML, represented as a JsonML tree. If a string is given - * to this function, it is first parsed into a markdown tree by calling - * [[parse]]. - **/ -expose.toHTMLTree = function toHTMLTree( input, dialect , options ) { - // convert string input to an MD tree - if ( typeof input ==="string" ) input = this.parse( input, dialect ); - - // Now convert the MD tree to an HTML tree - - // remove references from the tree - var attrs = extract_attr( input ), - refs = {}; - - if ( attrs && attrs.references ) { - refs = attrs.references; - } - - var html = convert_tree_to_html( input, refs , options ); - merge_text_nodes( html ); - return html; -} - -var mk_block = Markdown.mk_block = function(block, trail, line) { - // Be helpful for default case in tests. - if ( arguments.length == 1 ) trail = "\n\n"; - - var s = new String(block); - s.trailing = trail; - // To make it clear its not just a string - s.toSource = function() { - return "Markdown.mk_block( " + - uneval(block) + - ", " + - uneval(trail) + - ", " + - uneval(line) + - " )" - } - - if (line != undefined) - s.lineNumber = line; - - return s; -} - -function count_lines( str ) { - var n = 0, i = -1;; - while ( ( i = str.indexOf('\n', i+1) ) != -1) n++; - return n; -} - -// Internal - split source into rough blocks -Markdown.prototype.split_blocks = function splitBlocks( input, startLine ) { - // [\s\S] matches _anything_ (newline or space) - var re = /([\s\S]+?)($|\n(?:\s*\n|$)+)/g, - blocks = [], - m; - - var line_no = 1; - - if ( ( m = /^(\s*\n)/.exec(input) ) != null ) { - // skip (but count) leading blank lines - line_no += count_lines( m[0] ); - re.lastIndex = m[0].length; - } - - while ( ( m = re.exec(input) ) != null ) { - blocks.push( mk_block( m[1], m[2], line_no ) ); - line_no += count_lines( m[0] ); - } - - return blocks; -} - -/** - * Markdown#processBlock( block, next ) -> undefined | [ JsonML, ... ] - * - block (String): the block to process - * - next (Array): the following blocks - * - * Process `block` and return an array of JsonML nodes representing `block`. - * - * It does this by asking each block level function in the dialect to process - * the block until one can. Succesful handling is indicated by returning an - * array (with zero or more JsonML nodes), failure by a false value. - * - * Blocks handlers are responsible for calling [[Markdown#processInline]] - * themselves as appropriate. - * - * If the blocks were split incorrectly or adjacent blocks need collapsing you - * can adjust `next` in place using shift/splice etc. - * - * If any of this default behaviour is not right for the dialect, you can - * define a `__call__` method on the dialect that will get invoked to handle - * the block processing. - */ -Markdown.prototype.processBlock = function processBlock( block, next ) { - var cbs = this.dialect.block, - ord = cbs.__order__; - - if ( "__call__" in cbs ) { - return cbs.__call__.call(this, block, next); - } - - for ( var i = 0; i < ord.length; i++ ) { - //D:this.debug( "Testing", ord[i] ); - var res = cbs[ ord[i] ].call( this, block, next ); - if ( res ) { - //D:this.debug(" matched"); - if ( !isArray(res) || ( res.length > 0 && !( isArray(res[0]) ) ) ) - this.debug(ord[i], "didn't return a proper array"); - //D:this.debug( "" ); - return res; - } - } - - // Uhoh! no match! Should we throw an error? - return []; -} - -Markdown.prototype.processInline = function processInline( block ) { - return this.dialect.inline.__call__.call( this, String( block ) ); -} - -/** - * Markdown#toTree( source ) -> JsonML - * - source (String): markdown source to parse - * - * Parse `source` into a JsonML tree representing the markdown document. - **/ -// custom_tree means set this.tree to `custom_tree` and restore old value on return -Markdown.prototype.toTree = function toTree( source, custom_root ) { - var blocks = source instanceof Array - ? source - : this.split_blocks( source ); - - // Make tree a member variable so its easier to mess with in extensions - var old_tree = this.tree; - try { - this.tree = custom_root || this.tree || [ "markdown" ]; - - blocks: - while ( blocks.length ) { - var b = this.processBlock( blocks.shift(), blocks ); - - // Reference blocks and the like won't return any content - if ( !b.length ) continue blocks; - - this.tree.push.apply( this.tree, b ); - } - return this.tree; - } - finally { - if ( custom_root ) - this.tree = old_tree; - } - -} - -// Noop by default -Markdown.prototype.debug = function () { - var args = Array.prototype.slice.call( arguments); - args.unshift(this.debug_indent); - if (typeof console !== "undefined" && typeof console.log !== "undefined") - console.log.apply( null, args ); -} - -Markdown.prototype.loop_re_over_block = function( re, block, cb ) { - // Dont use /g regexps with this - var m, - b = block.valueOf(); - - while ( b.length && (m = re.exec(b) ) != null) { - b = b.substr( m[0].length ); - cb.call(this, m); - } - return b; -} - -/** - * Markdown.dialects - * - * Namespace of built-in dialects. - **/ -Markdown.dialects = {}; - -/** - * Markdown.dialects.Gruber - * - * The default dialect that follows the rules set out by John Gruber's - * markdown.pl as closely as possible. Well actually we follow the behaviour of - * that script which in some places is not exactly what the syntax web page - * says. - **/ -Markdown.dialects.Gruber = { - block: { - atxHeader: function atxHeader( block, next ) { - var m = block.match( /^(#{1,6})\s*(.*?)\s*#*\s*(?:\n|$)/ ); - - if ( !m ) return undefined; - - var header = [ "header", { level: m[ 1 ].length } ]; - Array.prototype.push.apply(header, this.processInline(m[ 2 ])); - - if ( m[0].length < block.length ) - next.unshift( mk_block( block.substr( m[0].length ), block.trailing, block.lineNumber + 2 ) ); - - return [ header ]; - }, - - setextHeader: function setextHeader( block, next ) { - var m = block.match( /^(.*)\n([-=])\2\2+(?:\n|$)/ ); - - if ( !m ) return undefined; - - var level = ( m[ 2 ] === "=" ) ? 1 : 2; - var header = [ "header", { level : level }, m[ 1 ] ]; - - if ( m[0].length < block.length ) - next.unshift( mk_block( block.substr( m[0].length ), block.trailing, block.lineNumber + 2 ) ); - - return [ header ]; - }, - - code: function code( block, next ) { - // | Foo - // |bar - // should be a code block followed by a paragraph. Fun - // - // There might also be adjacent code block to merge. - - var ret = [], - re = /^(?: {0,3}\t| {4})(.*)\n?/, - lines; - - // 4 spaces + content - var m = block.match( re ); - - if ( !m ) return undefined; - - block_search: - do { - // Now pull out the rest of the lines - var b = this.loop_re_over_block( - re, block.valueOf(), function( m ) { ret.push( m[1] ) } ); - - if (b.length) { - // Case alluded to in first comment. push it back on as a new block - next.unshift( mk_block(b, block.trailing) ); - break block_search; - } - else if (next.length) { - // Check the next block - it might be code too - var m = next[0].match( re ); - - if ( !m ) break block_search; - - // Pull how how many blanks lines follow - minus two to account for .join - ret.push ( block.trailing.replace(/[^\n]/g, '').substring(2) ); - - block = next.shift(); - } - else - break block_search; - } while (true); - - return [ [ "code_block", ret.join("\n") ] ]; - }, - - horizRule: function horizRule( block, next ) { - // this needs to find any hr in the block to handle abutting blocks - var m = block.match( /^(?:([\s\S]*?)\n)?[ \t]*([-_*])(?:[ \t]*\2){2,}[ \t]*(?:\n([\s\S]*))?$/ ); - - if ( !m ) { - return undefined; - } - - var jsonml = [ [ "hr" ] ]; - - // if there's a leading abutting block, process it - if ( m[ 1 ] ) { - jsonml.unshift.apply( jsonml, this.processBlock( m[ 1 ], [] ) ); - } - - // if there's a trailing abutting block, stick it into next - if ( m[ 3 ] ) { - next.unshift( mk_block( m[ 3 ] ) ); - } - - return jsonml; - }, - - // There are two types of lists. Tight and loose. Tight lists have no whitespace - // between the items (and result in text just in the <li>) and loose lists, - // which have an empty line between list items, resulting in (one or more) - // paragraphs inside the <li>. - // - // There are all sorts weird edge cases about the original markdown.pl's - // handling of lists: - // - // * Nested lists are supposed to be indented by four chars per level. But - // if they aren't, you can get a nested list by indenting by less than - // four so long as the indent doesn't match an indent of an existing list - // item in the 'nest stack'. - // - // * The type of the list (bullet or number) is controlled just by the - // first item at the indent. Subsequent changes are ignored unless they - // are for nested lists - // - lists: (function( ) { - // Use a closure to hide a few variables. - var any_list = "[*+-]|\\d\\.", - bullet_list = /[*+-]/, - number_list = /\d+\./, - // Capture leading indent as it matters for determining nested lists. - is_list_re = new RegExp( "^( {0,3})(" + any_list + ")[ \t]+" ), - indent_re = "(?: {0,3}\\t| {4})"; - - // TODO: Cache this regexp for certain depths. - // Create a regexp suitable for matching an li for a given stack depth - function regex_for_depth( depth ) { - - return new RegExp( - // m[1] = indent, m[2] = list_type - "(?:^(" + indent_re + "{0," + depth + "} {0,3})(" + any_list + ")\\s+)|" + - // m[3] = cont - "(^" + indent_re + "{0," + (depth-1) + "}[ ]{0,4})" - ); - } - function expand_tab( input ) { - return input.replace( / {0,3}\t/g, " " ); - } - - // Add inline content `inline` to `li`. inline comes from processInline - // so is an array of content - function add(li, loose, inline, nl) { - if (loose) { - li.push( [ "para" ].concat(inline) ); - return; - } - // Hmmm, should this be any block level element or just paras? - var add_to = li[li.length -1] instanceof Array && li[li.length - 1][0] == "para" - ? li[li.length -1] - : li; - - // If there is already some content in this list, add the new line in - if (nl && li.length > 1) inline.unshift(nl); - - for (var i=0; i < inline.length; i++) { - var what = inline[i], - is_str = typeof what == "string"; - if (is_str && add_to.length > 1 && typeof add_to[add_to.length-1] == "string" ) - { - add_to[ add_to.length-1 ] += what; - } - else { - add_to.push( what ); - } - } - } - - // contained means have an indent greater than the current one. On - // *every* line in the block - function get_contained_blocks( depth, blocks ) { - - var re = new RegExp( "^(" + indent_re + "{" + depth + "}.*?\\n?)*$" ), - replace = new RegExp("^" + indent_re + "{" + depth + "}", "gm"), - ret = []; - - while ( blocks.length > 0 ) { - if ( re.exec( blocks[0] ) ) { - var b = blocks.shift(), - // Now remove that indent - x = b.replace( replace, ""); - - ret.push( mk_block( x, b.trailing, b.lineNumber ) ); - } - break; - } - return ret; - } - - // passed to stack.forEach to turn list items up the stack into paras - function paragraphify(s, i, stack) { - var list = s.list; - var last_li = list[list.length-1]; - - if (last_li[1] instanceof Array && last_li[1][0] == "para") { - return; - } - if (i+1 == stack.length) { - // Last stack frame - // Keep the same array, but replace the contents - last_li.push( ["para"].concat( last_li.splice(1) ) ); - } - else { - var sublist = last_li.pop(); - last_li.push( ["para"].concat( last_li.splice(1) ), sublist ); - } - } - - // The matcher function - return function( block, next ) { - var m = block.match( is_list_re ); - if ( !m ) return undefined; - - function make_list( m ) { - var list = bullet_list.exec( m[2] ) - ? ["bulletlist"] - : ["numberlist"]; - - stack.push( { list: list, indent: m[1] } ); - return list; - } - - - var stack = [], // Stack of lists for nesting. - list = make_list( m ), - last_li, - loose = false, - ret = [ stack[0].list ]; - - // Loop to search over block looking for inner block elements and loose lists - loose_search: - while( true ) { - // Split into lines preserving new lines at end of line - var lines = block.split( /(?=\n)/ ); - - // We have to grab all lines for a li and call processInline on them - // once as there are some inline things that can span lines. - var li_accumulate = ""; - - // Loop over the lines in this block looking for tight lists. - tight_search: - for (var line_no=0; line_no < lines.length; line_no++) { - var nl = "", - l = lines[line_no].replace(/^\n/, function(n) { nl = n; return "" }); - - // TODO: really should cache this - var line_re = regex_for_depth( stack.length ); - - m = l.match( line_re ); - //print( "line:", uneval(l), "\nline match:", uneval(m) ); - - // We have a list item - if ( m[1] !== undefined ) { - // Process the previous list item, if any - if ( li_accumulate.length ) { - add( last_li, loose, this.processInline( li_accumulate ), nl ); - // Loose mode will have been dealt with. Reset it - loose = false; - li_accumulate = ""; - } - - m[1] = expand_tab( m[1] ); - var wanted_depth = Math.floor(m[1].length/4)+1; - //print( "want:", wanted_depth, "stack:", stack.length); - if ( wanted_depth > stack.length ) { - // Deep enough for a nested list outright - //print ( "new nested list" ); - list = make_list( m ); - last_li.push( list ); - last_li = list[1] = [ "listitem" ]; - } - else { - // We aren't deep enough to be strictly a new level. This is - // where Md.pl goes nuts. If the indent matches a level in the - // stack, put it there, else put it one deeper then the - // wanted_depth deserves. - var found = stack.some(function(s, i) { - if ( s.indent != m[1] ) return false; - list = s.list; // Found the level we want - stack.splice(i+1); // Remove the others - //print("found"); - return true; // And stop looping - }); - - if (!found) { - //print("not found. l:", uneval(l)); - wanted_depth++; - if (wanted_depth <= stack.length) { - stack.splice(wanted_depth); - //print("Desired depth now", wanted_depth, "stack:", stack.length); - list = stack[wanted_depth-1].list; - //print("list:", uneval(list) ); - } - else { - //print ("made new stack for messy indent"); - list = make_list(m); - last_li.push(list); - } - } - - //print( uneval(list), "last", list === stack[stack.length-1].list ); - last_li = [ "listitem" ]; - list.push(last_li); - } // end depth of shenegains - nl = ""; - } - - // Add content - if (l.length > m[0].length) { - li_accumulate += nl + l.substr( m[0].length ); - } - } // tight_search - - if ( li_accumulate.length ) { - add( last_li, loose, this.processInline( li_accumulate ), nl ); - // Loose mode will have been dealt with. Reset it - loose = false; - li_accumulate = ""; - } - - // Look at the next block - we might have a loose list. Or an extra - // paragraph for the current li - var contained = get_contained_blocks( stack.length, next ); - - // Deal with code blocks or properly nested lists - if (contained.length > 0) { - // Make sure all listitems up the stack are paragraphs - stack.forEach( paragraphify, this ); - - last_li.push.apply( last_li, this.toTree( contained, [] ) ); - } - - var next_block = next[0] && next[0].valueOf() || ""; - - if ( next_block.match(is_list_re) || next_block.match( /^ / ) ) { - block = next.shift(); - - // Check for an HR following a list: features/lists/hr_abutting - var hr = this.dialect.block.horizRule( block, next ); - - if (hr) { - ret.push.apply(ret, hr); - break; - } - - // Make sure all listitems up the stack are paragraphs - stack.forEach( paragraphify , this ); - - loose = true; - continue loose_search; - } - break; - } // loose_search - - return ret; - } - })(), - - blockquote: function blockquote( block, next ) { - if ( !block.match( /^>/m ) ) - return undefined; - - var jsonml = []; - - // separate out the leading abutting block, if any - if ( block[ 0 ] != ">" ) { - var lines = block.split( /\n/ ), - prev = []; - - // keep shifting lines until you find a crotchet - while ( lines.length && lines[ 0 ][ 0 ] != ">" ) { - prev.push( lines.shift() ); - } - - // reassemble! - block = lines.join( "\n" ); - jsonml.push.apply( jsonml, this.processBlock( prev.join( "\n" ), [] ) ); - } - - // if the next block is also a blockquote merge it in - while ( next.length && next[ 0 ][ 0 ] == ">" ) { - var b = next.shift(); - block += block.trailing + b; - block.trailing = b.trailing; - } - - // Strip off the leading "> " and re-process as a block. - var input = block.replace( /^> ?/gm, '' ), - old_tree = this.tree; - jsonml.push( this.toTree( input, [ "blockquote" ] ) ); - - return jsonml; - }, - - referenceDefn: function referenceDefn( block, next) { - var re = /^\s*\[(.*?)\]:\s*(\S+)(?:\s+(?:(['"])(.*?)\3|\((.*?)\)))?\n?/; - // interesting matches are [ , ref_id, url, , title, title ] - - if ( !block.match(re) ) - return undefined; - - // make an attribute node if it doesn't exist - if ( !extract_attr( this.tree ) ) { - this.tree.splice( 1, 0, {} ); - } - - var attrs = extract_attr( this.tree ); - - // make a references hash if it doesn't exist - if ( attrs.references === undefined ) { - attrs.references = {}; - } - - var b = this.loop_re_over_block(re, block, function( m ) { - - if ( m[2] && m[2][0] == '<' && m[2][m[2].length-1] == '>' ) - m[2] = m[2].substring( 1, m[2].length - 1 ); - - var ref = attrs.references[ m[1].toLowerCase() ] = { - href: m[2] - }; - - if (m[4] !== undefined) - ref.title = m[4]; - else if (m[5] !== undefined) - ref.title = m[5]; - - } ); - - if (b.length) - next.unshift( mk_block( b, block.trailing ) ); - - return []; - }, - - para: function para( block, next ) { - // everything's a para! - return [ ["para"].concat( this.processInline( block ) ) ]; - } - } -} - -Markdown.dialects.Gruber.inline = { - __call__: function inline( text, patterns ) { - // Hmmm - should this function be directly in Md#processInline, or - // conversely, should Md#processBlock be moved into block.__call__ too - var out = [ ], - m, - // Look for the next occurange of a special character/pattern - re = new RegExp( "([\\s\\S]*?)(" + (patterns.source || patterns) + ")", "g" ), - lastIndex = 0; - - //D:var self = this; - //D:self.debug("processInline:", uneval(text) ); - function add(x) { - //D:self.debug(" adding output", uneval(x)); - if (typeof x == "string" && typeof out[out.length-1] == "string") - out[ out.length-1 ] += x; - else - out.push(x); - } - - while ( ( m = re.exec(text) ) != null) { - if ( m[1] ) add( m[1] ); // Some un-interesting text matched - else m[1] = { length: 0 }; // Or there was none, but make m[1].length == 0 - - var res; - if ( m[2] in this.dialect.inline ) { - res = this.dialect.inline[ m[2] ].call( - this, - text.substr( m.index + m[1].length ), m, out ); - } - // Default for now to make dev easier. just slurp special and output it. - res = res || [ m[2].length, m[2] ]; - - var len = res.shift(); - // Update how much input was consumed - re.lastIndex += ( len - m[2].length ); - - // Add children - res.forEach(add); - - lastIndex = re.lastIndex; - } - - // Add last 'boring' chunk - if ( text.length > lastIndex ) - add( text.substr( lastIndex ) ); - - return out; - }, - - "\\": function escaped( text ) { - // [ length of input processed, node/children to add... ] - // Only esacape: \ ` * _ { } [ ] ( ) # * + - . ! - if ( text.match( /^\\[\\`\*_{}\[\]()#\+.!\-]/ ) ) - return [ 2, text[1] ]; - else - // Not an esacpe - return [ 1, "\\" ]; - }, - - " - // 1 2 3 4 <--- captures - var m = text.match( /^!\[(.*?)\][ \t]*\([ \t]*(\S*)(?:[ \t]+(["'])(.*?)\3)?[ \t]*\)/ ); - - if ( m ) { - if ( m[2] && m[2][0] == '<' && m[2][m[2].length-1] == '>' ) - m[2] = m[2].substring( 1, m[2].length - 1 ); - - m[2] == this.dialect.inline.__call__.call( this, m[2], /\\/ )[0]; - - var attrs = { alt: m[1], href: m[2] || "" }; - if ( m[4] !== undefined) - attrs.title = m[4]; - - return [ m[0].length, [ "img", attrs ] ]; - } - - // ![Alt text][id] - m = text.match( /^!\[(.*?)\][ \t]*\[(.*?)\]/ ); - - if ( m ) { - // We can't check if the reference is known here as it likely wont be - // found till after. Check it in md tree->hmtl tree conversion - return [ m[0].length, [ "img_ref", { alt: m[1], ref: m[2].toLowerCase(), text: m[0] } ] ]; - } - - // Just consume the '![' - return [ 2, "![" ]; - }, - - "[": function link( text ) { - // [link text](/path/to/img.jpg "Optional title") - // 1 2 3 4 <--- captures - var m = text.match( /^\[([\s\S]*?)\][ \t]*\([ \t]*(\S+)(?:[ \t]+(["'])(.*?)\3)?[ \t]*\)/ ); - - if ( m && m[1].indexOf("]") == -1 ) { - if ( m[2] && m[2][0] == '<' && m[2][m[2].length-1] == '>' ) - m[2] = m[2].substring( 1, m[2].length - 1 ); - - // Process escapes only - m[2] = this.dialect.inline.__call__.call( this, m[2], /\\/ )[0]; - - var attrs = { href: m[2] || "" }; - if ( m[4] !== undefined) - attrs.title = m[4]; - - var link = [ "link", attrs ]; - Array.prototype.push.apply( link, this.processInline( m[1] ) ); - return [ m[0].length, link ]; - } - - // [Alt text][id] - // [Alt text] [id] - // [id] - m = text.match( /^\[([\s\S]*?)\](?: ?\[(.*?)\])?/ ); - - if ( m ) { - // [id] case, text == id - if ( m[2] === undefined || m[2] === "" ) m[2] = m[1]; - - attrs = { ref: m[ 2 ].toLowerCase(), original: m[ 0 ] }; - link = [ "link_ref", attrs ]; - Array.prototype.push.apply( link, this.processInline( m[1] ) ); - - // We can't check if the reference is known here as it likely wont be - // found till after. Check it in md tree->hmtl tree conversion. - // Store the original so that conversion can revert if the ref isn't found. - return [ - m[ 0 ].length, - link - ]; - } - - // Just consume the '[' - return [ 1, "[" ]; - }, - - - "<": function autoLink( text ) { - var m; - - if ( ( m = text.match( /^<(?:((https?|ftp|mailto):[^>]+)|(.*?@.*?\.[a-zA-Z]+))>/ ) ) != null ) { - if ( m[3] ) { - return [ m[0].length, [ "link", { href: "mailto:" + m[3] }, m[3] ] ]; - - } - else if ( m[2] == "mailto" ) { - return [ m[0].length, [ "link", { href: m[1] }, m[1].substr("mailto:".length ) ] ]; - } - else - return [ m[0].length, [ "link", { href: m[1] }, m[1] ] ]; - } - - return [ 1, "<" ]; - }, - - "`": function inlineCode( text ) { - // Inline code block. as many backticks as you like to start it - // Always skip over the opening ticks. - var m = text.match( /(`+)(([\s\S]*?)\1)/ ); - - if ( m && m[2] ) - return [ m[1].length + m[2].length, [ "inlinecode", m[3] ] ]; - else { - // TODO: No matching end code found - warn! - return [ 1, "`" ]; - } - }, - - " \n": function lineBreak( text ) { - return [ 3, [ "linebreak" ] ]; - } - -} - -// Meta Helper/generator method for em and strong handling -function strong_em( tag, md ) { - - var state_slot = tag + "_state", - other_slot = tag == "strong" ? "em_state" : "strong_state"; - - function CloseTag(len) { - this.len_after = len; - this.name = "close_" + md; - } - - return function ( text, orig_match ) { - - if (this[state_slot][0] == md) { - // Most recent em is of this type - //D:this.debug("closing", md); - this[state_slot].shift(); - - // "Consume" everything to go back to the recrusion in the else-block below - return[ text.length, new CloseTag(text.length-md.length) ]; - } - else { - // Store a clone of the em/strong states - var other = this[other_slot].slice(), - state = this[state_slot].slice(); - - this[state_slot].unshift(md); - - //D:this.debug_indent += " "; - - // Recurse - var res = this.processInline( text.substr( md.length ) ); - //D:this.debug_indent = this.debug_indent.substr(2); - - var last = res[res.length - 1]; - - //D:this.debug("processInline from", tag + ": ", uneval( res ) ); - - var check = this[state_slot].shift(); - if (last instanceof CloseTag) { - res.pop(); - // We matched! Huzzah. - var consumed = text.length - last.len_after; - return [ consumed, [ tag ].concat(res) ]; - } - else { - // Restore the state of the other kind. We might have mistakenly closed it. - this[other_slot] = other; - this[state_slot] = state; - - // We can't reuse the processed result as it could have wrong parsing contexts in it. - return [ md.length, md ]; - } - } - } // End returned function -} - -Markdown.dialects.Gruber.inline["**"] = strong_em("strong", "**"); -Markdown.dialects.Gruber.inline["__"] = strong_em("strong", "__"); -Markdown.dialects.Gruber.inline["*"] = strong_em("em", "*"); -Markdown.dialects.Gruber.inline["_"] = strong_em("em", "_"); - - -// Build default order from insertion order. -Markdown.buildBlockOrder = function(d) { - var ord = []; - for ( var i in d ) { - if ( i == "__order__" || i == "__call__" ) continue; - ord.push( i ); - } - d.__order__ = ord; -} - -// Build patterns for inline matcher -Markdown.buildInlinePatterns = function(d) { - var patterns = []; - - for ( var i in d ) { - if (i == "__call__") continue; - var l = i.replace( /([\\.*+?|()\[\]{}])/g, "\\$1" ) - .replace( /\n/, "\\n" ); - patterns.push( i.length == 1 ? l : "(?:" + l + ")" ); - } - - patterns = patterns.join("|"); - //print("patterns:", uneval( patterns ) ); - - var fn = d.__call__; - d.__call__ = function(text, pattern) { - if (pattern != undefined) - return fn.call(this, text, pattern); - else - return fn.call(this, text, patterns); - } -} - -// Helper function to make sub-classing a dialect easier -Markdown.subclassDialect = function( d ) { - function Block() {}; - Block.prototype = d.block; - function Inline() {}; - Inline.prototype = d.inline; - - return { block: new Block(), inline: new Inline() }; -} - -Markdown.buildBlockOrder ( Markdown.dialects.Gruber.block ); -Markdown.buildInlinePatterns( Markdown.dialects.Gruber.inline ); - -Markdown.dialects.Maruku = Markdown.subclassDialect( Markdown.dialects.Gruber ); - -Markdown.dialects.Maruku.block.document_meta = function document_meta( block, next ) { - // we're only interested in the first block - if ( block.lineNumber > 1 ) return undefined; - - // document_meta blocks consist of one or more lines of `Key: Value\n` - if ( ! block.match( /^(?:\w+:.*\n)*\w+:.*$/ ) ) return undefined; - - // make an attribute node if it doesn't exist - if ( !extract_attr( this.tree ) ) { - this.tree.splice( 1, 0, {} ); - } - - var pairs = block.split( /\n/ ); - for ( p in pairs ) { - var m = pairs[ p ].match( /(\w+):\s*(.*)$/ ), - key = m[ 1 ].toLowerCase(), - value = m[ 2 ]; - - this.tree[ 1 ][ key ] = value; - } - - // document_meta produces no content! - return []; -} - -Markdown.dialects.Maruku.block.block_meta = function block_meta( block, next ) { - // check if the last line of the block is an meta hash - var m = block.match( /(^|\n) {0,3}\{:\s*((?:\\\}|[^\}])*)\s*\}$/ ); - if ( !m ) return undefined; - - // process the meta hash - var attr = process_meta_hash( m[ 2 ] ); - - // if we matched ^ then we need to apply meta to the previous block - if ( m[ 1 ] === "" ) { - var node = this.tree[ this.tree.length - 1 ], - hash = extract_attr( node ); - - // if the node is a string (rather than JsonML), bail - if ( typeof node === "string" ) return undefined; - - // create the attribute hash if it doesn't exist - if ( !hash ) { - hash = {}; - node.splice( 1, 0, hash ); - } - - // add the attributes in - for ( a in attr ) { - hash[ a ] = attr[ a ]; - } - - // return nothing so the meta hash is removed - return []; - } - - // pull the meta hash off the block and process what's left - var b = block.replace( /\n.*$/, "" ), - result = this.processBlock( b, [] ); - - // get or make the attributes hash - var hash = extract_attr( result[ 0 ] ); - if ( !hash ) { - hash = {}; - result[ 0 ].splice( 1, 0, hash ); - } - - // attach the attributes to the block - for ( a in attr ) { - hash[ a ] = attr[ a ]; - } - - return result; -} - -Markdown.dialects.Maruku.block.definition_list = function definition_list( block, next ) { - // one or more terms followed by one or more definitions, in a single block - var tight = /^((?:[^\s:].*\n)+):\s+([^]+)$/, - list = [ "dl" ]; - - // see if we're dealing with a tight or loose block - if ( ( m = block.match( tight ) ) ) { - // pull subsequent tight DL blocks out of `next` - var blocks = [ block ]; - while ( next.length && tight.exec( next[ 0 ] ) ) { - blocks.push( next.shift() ); - } - - for ( var b = 0; b < blocks.length; ++b ) { - var m = blocks[ b ].match( tight ), - terms = m[ 1 ].replace( /\n$/, "" ).split( /\n/ ), - defns = m[ 2 ].split( /\n:\s+/ ); - - // print( uneval( m ) ); - - for ( var i = 0; i < terms.length; ++i ) { - list.push( [ "dt" ].concat(this.processInline(terms[i]))); - } - - for ( var i = 0; i < defns.length; ++i ) { - // run inline processing over the definition - list.push( [ "dd" ].concat( this.processInline( defns[ i ].replace( /(\n)\s+/, "$1" ) ) ) ); - } - } - } - else { - return undefined; - } - - return [ list ]; -} - -Markdown.dialects.Maruku.block.html_paragraph = function html_paragraph( block, next ) { - if (block.match(/^<\w/)) return [["RAW", block.toString()]]; -} - -Markdown.dialects.Maruku.inline[ "{:" ] = function inline_meta( text, matches, out ) { - if ( !out.length ) { - return [ 2, "{:" ]; - } - - // get the preceeding element - var before = out[ out.length - 1 ]; - - if ( typeof before === "string" ) { - return [ 2, "{:" ]; - } - - // match a meta hash - var m = text.match( /^\{:\s*((?:\\\}|[^\}])*)\s*\}/ ); - - // no match, false alarm - if ( !m ) { - return [ 2, "{:" ]; - } - - // attach the attributes to the preceeding element - var meta = process_meta_hash( m[ 1 ] ), - attr = extract_attr( before ); - - if ( !attr ) { - attr = {}; - before.splice( 1, 0, attr ); - } - - for ( var k in meta ) { - attr[ k ] = meta[ k ]; - } - - // cut out the string and replace it with nothing - return [ m[ 0 ].length, "" ]; -} - -Markdown.buildBlockOrder ( Markdown.dialects.Maruku.block ); -Markdown.buildInlinePatterns( Markdown.dialects.Maruku.inline ); - -var isArray = expose.isArray = function(obj) { - return (obj instanceof Array || typeof obj === "array" || Array.isArray(obj)); -} - -function extract_attr( jsonml ) { - return isArray(jsonml) - && jsonml.length > 1 - && typeof jsonml[ 1 ] === "object" - && !( isArray(jsonml[ 1 ]) ) - ? jsonml[ 1 ] - : undefined; -} - -function process_meta_hash( meta_string ) { - var meta = split_meta_hash( meta_string ), - attr = {}; - - for ( var i = 0; i < meta.length; ++i ) { - // id: #foo - if ( /^#/.test( meta[ i ] ) ) { - attr.id = meta[ i ].substring( 1 ); - } - // class: .foo - else if ( /^\./.test( meta[ i ] ) ) { - // if class already exists, append the new one - if ( attr['class'] ) { - attr['class'] = attr['class'] + meta[ i ].replace( /./, " " ); - } - else { - attr['class'] = meta[ i ].substring( 1 ); - } - } - // attribute: foo=bar - else if ( /=/.test( meta[ i ] ) ) { - var s = meta[ i ].split( /=/ ); - attr[ s[ 0 ] ] = s[ 1 ]; - } - } - - return attr; -} - -function split_meta_hash( meta_string ) { - var meta = meta_string.split( "" ), - parts = [ "" ], - in_quotes = false; - - while ( meta.length ) { - var letter = meta.shift(); - switch ( letter ) { - case " " : - // if we're in a quoted section, keep it - if ( in_quotes ) { - parts[ parts.length - 1 ] += letter; - } - // otherwise make a new part - else { - parts.push( "" ); - } - break; - case "'" : - case '"' : - // reverse the quotes and move straight on - in_quotes = !in_quotes; - break; - case "\\" : - // shift off the next letter to be used straight away. - // it was escaped so we'll keep it whatever it is - letter = meta.shift(); - default : - parts[ parts.length - 1 ] += letter; - break; - } - } - - return parts; -} - -/** - * renderJsonML( jsonml[, options] ) -> String - * - jsonml (Array): JsonML array to render to XML - * - options (Object): options - * - * Converts the given JsonML into well-formed XML. - * - * The options currently understood are: - * - * - root (Boolean): wether or not the root node should be included in the - * output, or just its children. The default `false` is to not include the - * root itself. - */ -expose.renderJsonML = function( jsonml, options ) { - options = options || {}; - // include the root element in the rendered output? - options.root = options.root || false; - - var content = []; - - if ( options.root ) { - content.push( render_tree( jsonml ) ); - } - else { - jsonml.shift(); // get rid of the tag - if ( jsonml.length && typeof jsonml[ 0 ] === "object" && !( jsonml[ 0 ] instanceof Array ) ) { - jsonml.shift(); // get rid of the attributes - } - - while ( jsonml.length ) { - content.push( render_tree( jsonml.shift() ) ); - } - } - - return content.join( "\n\n" ); -} - -function escapeHTML( text ) { - return text.replace( /&/g, "&" ) - .replace( /</g, "<" ) - .replace( />/g, ">" ) - .replace( /"/g, """ ) - .replace( /'/g, "'" ); -} - -function render_tree( jsonml ) { - // basic case - if ( typeof jsonml === "string" ) { - return escapeHTML( jsonml ); - } - - var tag = jsonml.shift(), - attributes = {}, - content = []; - if (tag == "RAW") return jsonml[0]; - - if ( jsonml.length && typeof jsonml[ 0 ] === "object" && !( jsonml[ 0 ] instanceof Array ) ) { - attributes = jsonml.shift(); - } - - while ( jsonml.length ) { - content.push( arguments.callee( jsonml.shift() ) ); - } - - var tag_attrs = ""; - for ( var a in attributes ) { - tag_attrs += " " + a + '="' + escapeHTML( attributes[ a ] ) + '"'; - } - - // be careful about adding whitespace here for inline elements - return "<"+ tag + tag_attrs + ">" + content.join( "" ) + "</" + tag + ">"; -} - -function convert_tree_to_html( tree, references, options ) { - options = options || {}; - - // shallow clone - var jsonml = tree.slice( 0 ); - - if (typeof options.preprocessTreeNode === "function") { - jsonml = options.preprocessTreeNode(jsonml, references); - } - - // Clone attributes if they exist - var attrs = extract_attr( jsonml ); - if ( attrs ) { - jsonml[ 1 ] = {}; - for ( var i in attrs ) { - jsonml[ 1 ][ i ] = attrs[ i ]; - } - attrs = jsonml[ 1 ]; - } - - // basic case - if ( typeof jsonml === "string" ) { - return jsonml; - } - - // convert this node - switch ( jsonml[ 0 ] ) { - case "header": - jsonml[ 0 ] = "h" + jsonml[ 1 ].level; - delete jsonml[ 1 ].level; - break; - case "bulletlist": - jsonml[ 0 ] = "ul"; - break; - case "numberlist": - jsonml[ 0 ] = "ol"; - break; - case "listitem": - jsonml[ 0 ] = "li"; - break; - case "para": - jsonml[ 0 ] = "p"; - break; - case "markdown": - jsonml[ 0 ] = "html"; - if ( attrs ) delete attrs.references; - break; - case "code_block": - jsonml[ 0 ] = "pre"; - var i = attrs ? 2 : 1; - var code = [ "code" ]; - code.push.apply( code, jsonml.splice( i ) ); - jsonml[ i ] = code; - break; - case "inlinecode": - jsonml[ 0 ] = "code"; - break; - case "img": - jsonml[ 1 ].src = jsonml[ 1 ].href; - delete jsonml[ 1 ].href; - break; - case "linebreak": - jsonml[0] = "br"; - break; - case "link": - jsonml[ 0 ] = "a"; - break; - case "link_ref": - jsonml[ 0 ] = "a"; - - // grab this ref and clean up the attribute node - var ref = references[ attrs.ref ]; - - // if the reference exists, make the link - if ( ref ) { - delete attrs.ref; - - // add in the href and title, if present - attrs.href = ref.href; - if ( ref.title ) { - attrs.title = ref.title; - } - - // get rid of the unneeded original text - delete attrs.original; - } - // the reference doesn't exist, so revert to plain text - else { - return attrs.original; - } - break; - } - - // convert all the children - var i = 1; - - // deal with the attribute node, if it exists - if ( attrs ) { - // if there are keys, skip over it - for ( var key in jsonml[ 1 ] ) { - i = 2; - } - // if there aren't, remove it - if ( i === 1 ) { - jsonml.splice( i, 1 ); - } - } - - for ( ; i < jsonml.length; ++i ) { - jsonml[ i ] = arguments.callee( jsonml[ i ], references, options ); - } - - return jsonml; -} - - -// merges adjacent text nodes into a single node -function merge_text_nodes( jsonml ) { - // skip the tag name and attribute hash - var i = extract_attr( jsonml ) ? 2 : 1; - - while ( i < jsonml.length ) { - // if it's a string check the next item too - if ( typeof jsonml[ i ] === "string" ) { - if ( i + 1 < jsonml.length && typeof jsonml[ i + 1 ] === "string" ) { - // merge the second string into the first and remove it - jsonml[ i ] += jsonml.splice( i + 1, 1 )[ 0 ]; - } - else { - ++i; - } - } - // if it's not a string recurse - else { - arguments.callee( jsonml[ i ] ); - ++i; - } - } -} - -} )( (function() { - if ( typeof exports === "undefined" ) { - window.markdown = {}; - return window.markdown; - } - else { - return exports; - } -} )() ); diff --git a/doc/tutorial/mod.md b/doc/tutorial/mod.md deleted file mode 100644 index 9251e8fee4b..00000000000 --- a/doc/tutorial/mod.md +++ /dev/null @@ -1,237 +0,0 @@ -# Modules and crates - -The Rust namespace is divided into modules. Each source file starts -with its own module. - -## Local modules - -The `mod` keyword can be used to open a new, local module. In the -example below, `chicken` lives in the module `farm`, so, unless you -explicitly import it, you must refer to it by its long name, -`farm::chicken`. - - mod farm { - fn chicken() -> str { "cluck cluck" } - fn cow() -> str { "mooo" } - } - fn main() { - std::io::println(farm::chicken()); - } - -Modules can be nested to arbitrary depth. - -## Crates - -The unit of independent compilation in Rust is the crate. Libraries -tend to be packaged as crates, and your own programs may consist of -one or more crates. - -When compiling a single `.rs` file, the file acts as the whole crate. -You can compile it with the `--lib` compiler switch to create a shared -library, or without, provided that your file contains a `fn main` -somewhere, to create an executable. - -It is also possible to include multiple files in a crate. For this -purpose, you create a `.rc` crate file, which references any number of -`.rs` code files. A crate file could look like this: - - ## ignore - #[link(name = "farm", vers = "2.5", author = "mjh")]; - mod cow; - mod chicken; - mod horse; - -Compiling this file will cause `rustc` to look for files named -`cow.rs`, `chicken.rs`, `horse.rs` in the same directory as the `.rc` -file, compile them all together, and, depending on the presence of the -`--lib` switch, output a shared library or an executable. - -The `#[link(...)]` part provides meta information about the module, -which other crates can use to load the right module. More about that -later. - -To have a nested directory structure for your source files, you can -nest mods in your `.rc` file: - - ## ignore - mod poultry { - mod chicken; - mod turkey; - } - -The compiler will now look for `poultry/chicken.rs` and -`poultry/turkey.rs`, and export their content in `poultry::chicken` -and `poultry::turkey`. You can also provide a `poultry.rs` to add -content to the `poultry` module itself. - -## Using other crates - -Having compiled a crate with `--lib`, you can use it in another crate -with a `use` directive. We've already seen `use std` in several of the -examples, which loads in the [standard library][std]. - -[std]: http://doc.rust-lang.org/doc/std/index/General.html - -`use` directives can appear in a crate file, or at the top level of a -single-file `.rs` crate. They will cause the compiler to search its -library search path (which you can extend with `-L` switch) for a Rust -crate library with the right name. - -It is possible to provide more specific information when using an -external crate. - - ## ignore - use myfarm (name = "farm", vers = "2.7"); - -When a comma-separated list of name/value pairs is given after `use`, -these are matched against the attributes provided in the `link` -attribute of the crate file, and a crate is only used when the two -match. A `name` value can be given to override the name used to search -for the crate. So the above would import the `farm` crate under the -local name `myfarm`. - -Our example crate declared this set of `link` attributes: - - ## ignore - #[link(name = "farm", vers = "2.5", author = "mjh")]; - -The version does not match the one provided in the `use` directive, so -unless the compiler can find another crate with the right version -somewhere, it will complain that no matching crate was found. - -## The core library - -A set of basic library routines, mostly related to built-in datatypes -and the task system, are always implicitly linked and included in any -Rust program, unless the `--no-core` compiler switch is given. - -This library is document [here][core]. - -[core]: http://doc.rust-lang.org/doc/core/index/General.html - -## A minimal example - -Now for something that you can actually compile yourself. We have -these two files: - - // mylib.rs - #[link(name = "mylib", vers = "1.0")]; - fn world() -> str { "world" } - - ## ignore - // main.rs - use mylib; - fn main() { std::io::println("hello " + mylib::world()); } - -Now compile and run like this (adjust to your platform if necessary): - - ## notrust - > rustc --lib mylib.rs - > rustc main.rs -L . - > ./main - "hello world" - -## Importing - -When using identifiers from other modules, it can get tiresome to -qualify them with the full module path every time (especially when -that path is several modules deep). Rust allows you to import -identifiers at the top of a file, module, or block. - - use std; - import std::io::println; - fn main() { - println("that was easy"); - } - -It is also possible to import just the name of a module (`import -std::io;`, then use `io::println`), to import all identifiers exported -by a given module (`import std::io::*`), or to import a specific set -of identifiers (`import math::{min, max, pi}`). - -You can rename an identifier when importing using the `=` operator: - - import prnt = std::io::println; - -## Exporting - -By default, a module exports everything that it defines. This can be -restricted with `export` directives at the top of the module or file. - - mod enc { - export encrypt, decrypt; - const super_secret_number: int = 10; - fn encrypt(n: int) -> int { n + super_secret_number } - fn decrypt(n: int) -> int { n - super_secret_number } - } - -This defines a rock-solid encryption algorithm. Code outside of the -module can refer to the `enc::encrypt` and `enc::decrypt` identifiers -just fine, but it does not have access to `enc::super_secret_number`. - -## Namespaces - -Rust uses three different namespaces. One for modules, one for types, -and one for values. This means that this code is valid: - - mod buffalo { - type buffalo = int; - fn buffalo(buffalo: buffalo) -> buffalo { buffalo } - } - fn main() { - let buffalo: buffalo::buffalo = 1; - buffalo::buffalo(buffalo::buffalo(buffalo)); - } - -You don't want to write things like that, but it *is* very practical -to not have to worry about name clashes between types, values, and -modules. This allows us to have a module `core::str`, for example, even -though `str` is a built-in type name. - -## Resolution - -The resolution process in Rust simply goes up the chain of contexts, -looking for the name in each context. Nested functions and modules -create new contexts inside their parent function or module. A file -that's part of a bigger crate will have that crate's context as parent -context. - -Identifiers can shadow each others. In this program, `x` is of type -`int`: - - type t = str; - fn main() { - type t = int; - let x: t; - } - -An `import` directive will only import into the namespaces for which -identifiers are actually found. Consider this example: - - type bar = uint; - mod foo { fn bar() {} } - mod baz { - import foo::bar; - const x: bar = 20u; - } - -When resolving the type name `bar` in the `const` definition, the -resolver will first look at the module context for `baz`. This has an -import named `bar`, but that's a function, not a type, So it continues -to the top level and finds a type named `bar` defined there. - -Normally, multiple definitions of the same identifier in a scope are -disallowed. Local variables defined with `let` are an exception to -this—multiple `let` directives can redefine the same variable in a -single scope. When resolving the name of such a variable, the most -recent definition is used. - - fn main() { - let x = 10; - let x = x + 10; - assert x == 20; - } - -This makes it possible to rebind a variable without actually mutating -it, which is mostly useful for destructuring (which can rebind, but -not assign). diff --git a/doc/tutorial/order b/doc/tutorial/order deleted file mode 100644 index 9f751663901..00000000000 --- a/doc/tutorial/order +++ /dev/null @@ -1,13 +0,0 @@ -intro -setup -syntax -control -func -data -args -generic -mod -iface -ffi -task -test diff --git a/doc/tutorial/setup.md b/doc/tutorial/setup.md deleted file mode 100644 index f85052961a0..00000000000 --- a/doc/tutorial/setup.md +++ /dev/null @@ -1,56 +0,0 @@ -# Getting started - -## Installation - -FIXME Fill this in when the installation package is finished. - -## Compiling your first program - -Rust program files are, by convention, given the extension `.rs`. Say -we have a file `hello.rs` containing this program: - - use std; - fn main(args: [str]) { - std::io::println("hello world from '" + args[0] + "'!"); - } - -If the Rust compiler was installed successfully, running `rustc -hello.rs` will produce a binary called `hello` (or `hello.exe`). - -If you modify the program to make it invalid (for example, remove the -`use std` line), and then compile it, you'll see an error message like -this: - - ## notrust - hello.rs:2:4: 2:20 error: unresolved modulename: std - hello.rs:2 std::io::println("hello world!"); - ^~~~~~~~~~~~~~~~ - -The Rust compiler tries to provide useful information when it runs -into an error. - -## Anatomy of a Rust program - -In its simplest form, a Rust program is simply a `.rs` file with some -types and functions defined in it. If it has a `main` function, it can -be compiled to an executable. Rust does not allow code that's not a -declaration to appear at the top level of the file—all statements must -live inside a function. - -Rust programs can also be compiled as libraries, and included in other -programs. The `use std` directive that appears at the top of a lot of -examples imports the [standard library][std]. This is described in more -detail [later on](mod.html). - -[std]: http://doc.rust-lang.org/doc/std/index/General.html - -## Editing Rust code - -There are Vim highlighting and indentation scrips in the Rust source -distribution under `src/etc/vim/`, and an emacs mode under -`src/etc/emacs/`. - -[rust-mode]: https://github.com/marijnh/rust-mode - -Other editors are not provided for yet. If you end up writing a Rust -mode for your favorite editor, let us know so that we can link to it. diff --git a/doc/tutorial/syntax.md b/doc/tutorial/syntax.md deleted file mode 100644 index b0e3f002191..00000000000 --- a/doc/tutorial/syntax.md +++ /dev/null @@ -1,349 +0,0 @@ -# Syntax Basics - -## Braces - -Assuming you've programmed in any C-family language (C++, Java, -JavaScript, C#, or PHP), Rust will feel familiar. The main surface -difference to be aware of is that the bodies of `if` statements and of -loops *have* to be wrapped in brackets. Single-statement, bracket-less -bodies are not allowed. - -If the verbosity of that bothers you, consider the fact that this -allows you to omit the parentheses around the condition in `if`, -`while`, and similar constructs. This will save you two characters -every time. As a bonus, you no longer have to spend any mental energy -on deciding whether you need to add braces or not, or on adding them -after the fact when adding a statement to an `if` branch. - -Accounting for these differences, the surface syntax of Rust -statements and expressions is C-like. Function calls are written -`myfunc(arg1, arg2)`, operators have mostly the same name and -precedence that they have in C, comments look the same, and constructs -like `if` and `while` are available: - - # fn call_a_function(_a: int) {} - fn main() { - if 1 < 2 { - while false { call_a_function(10 * 4); } - } else if 4 < 3 || 3 < 4 { - // Comments are C++-style too - } else { - /* Multi-line comment syntax */ - } - } - -## Expression syntax - -Though it isn't apparent in all code, there is a fundamental -difference between Rust's syntax and the predecessors in this family -of languages. A lot of thing that are statements in C are expressions -in Rust. This allows for useless things like this (which passes -nil—the void type—to a function): - - # fn a_function(_a: ()) {} - a_function(while false {}); - -But also useful things like this: - - # fn the_stars_align() -> bool { false } - # fn something_else() -> bool { true } - let x = if the_stars_align() { 4 } - else if something_else() { 3 } - else { 0 }; - -This piece of code will bind the variable `x` to a value depending on -the conditions. Note the condition bodies, which look like `{ -expression }`. The lack of a semicolon after the last statement in a -braced block gives the whole block the value of that last expression. -If the branches of the `if` had looked like `{ 4; }`, the above -example would simply assign nil (void) to `x`. But without the -semicolon, each branch has a different value, and `x` gets the value -of the branch that was taken. - -This also works for function bodies. This function returns a boolean: - - fn is_four(x: int) -> bool { x == 4 } - -In short, everything that's not a declaration (`let` for variables, -`fn` for functions, etcetera) is an expression. - -If all those things are expressions, you might conclude that you have -to add a terminating semicolon after *every* statement, even ones that -are not traditionally terminated with a semicolon in C (like `while`). -That is not the case, though. Expressions that end in a block only -need a semicolon if that block contains a trailing expression. `while` -loops do not allow trailing expressions, and `if` statements tend to -only have a trailing expression when you want to use their value for -something—in which case you'll have embedded it in a bigger statement, -like the `let x = ...` example above. - -## Identifiers - -Rust identifiers must start with an alphabetic character or an -underscore, and after that may contain any alphanumeric character, and -more underscores. - -NOTE: The parser doesn't currently recognize non-ascii alphabetic -characters. This is a bug that will eventually be fixed. - -The double-colon (`::`) is used as a module separator, so -`std::io::println` means 'the thing named `println` in the module -named `io` in the module named `std`'. - -Rust will normally emit warning about unused variables. These can be -suppressed by using a variable name that starts with an underscore. - - fn this_warns(x: int) {} - fn this_doesnt(_x: int) {} - -## Variable declaration - -The `let` keyword, as we've seen, introduces a local variable. Global -constants can be defined with `const`: - - use std; - const repeat: uint = 5u; - fn main() { - let count = 0u; - while count < repeat { - std::io::println("Hi!"); - count += 1u; - } - } - -## Types - -The `-> bool` in the `is_four` example is the way a function's return -type is written. For functions that do not return a meaningful value -(these conceptually return nil in Rust), you can optionally say `-> -()` (`()` is how nil is written), but usually the return annotation is -simply left off, as in the `fn main() { ... }` examples we've seen -earlier. - -Every argument to a function must have its type declared (for example, -`x: int`). Inside the function, type inference will be able to -automatically deduce the type of most locals (generic functions, which -we'll come back to later, will occasionally need additional -annotation). Locals can be written either with or without a type -annotation: - - // The type of this vector will be inferred based on its use. - let x = []; - # x = [3]; - // Explicitly say this is a vector of integers. - let y: [int] = []; - -The basic types are written like this: - -`()` -: Nil, the type that has only a single value. - -`bool` -: Boolean type, with values `true` and `false`. - -`int` -: A machine-pointer-sized integer. - -`uint` -: A machine-pointer-sized unsigned integer. - -`i8`, `i16`, `i32`, `i64` -: Signed integers with a specific size (in bits). - -`u8`, `u16`, `u32`, `u64` -: Unsigned integers with a specific size. - -`f32`, `f64` -: Floating-point types. - -`float` -: The largest floating-point type efficiently supported on the target machine. - -`char` -: A character is a 32-bit Unicode code point. - -`str` -: String type. A string contains a utf-8 encoded sequence of characters. - -These can be combined in composite types, which will be described in -more detail later on (the `T`s here stand for any other type): - -`[T]` -: Vector type. - -`[mutable T]` -: Mutable vector type. - -`(T1, T2)` -: Tuple type. Any arity above 1 is supported. - -`{field1: T1, field2: T2}` -: Record type. - -`fn(arg1: T1, arg2: T2) -> T3`, `lambda()`, `block()` -: Function types. - -`@T`, `~T`, `*T` -: Pointer types. - -Types can be given names with `type` declarations: - - type monster_size = uint; - -This will provide a synonym, `monster_size`, for unsigned integers. It -will not actually create a new type—`monster_size` and `uint` can be -used interchangeably, and using one where the other is expected is not -a type error. Read about [single-variant enums][sve] further on if you -need to create a type name that's not just a synonym. - -[sve]: data.html#single_variant_enum - -## Literals - -Integers can be written in decimal (`144`), hexadecimal (`0x90`), and -binary (`0b10010000`) base. Without suffix, an integer literal is -considered to be of type `int`. Add a `u` (`144u`) to make it a `uint` -instead. Literals of the fixed-size integer types can be created by -the literal with the type name (`255u8`, `50i64`, etc). - -Note that, in Rust, no implicit conversion between integer types -happens. If you are adding one to a variable of type `uint`, you must -type `v += 1u`—saying `+= 1` will give you a type error. - -Floating point numbers are written `0.0`, `1e6`, or `2.1e-4`. Without -suffix, the literal is assumed to be of type `float`. Suffixes `f32` -and `f64` can be used to create literals of a specific type. The -suffix `f` can be used to write `float` literals without a dot or -exponent: `3f`. - -The nil literal is written just like the type: `()`. The keywords -`true` and `false` produce the boolean literals. - -Character literals are written between single quotes, as in `'x'`. You -may put non-ascii characters between single quotes (your source files -should be encoded as utf-8). Rust understands a number of -character escapes, using the backslash character: - -`\n` -: A newline (unicode character 32). - -`\r` -: A carriage return (13). - -`\t` -: A tab character (9). - -`\\`, `\'`, `\"` -: Simply escapes the following character. - -`\xHH`, `\uHHHH`, `\UHHHHHHHH` -: Unicode escapes, where the `H` characters are the hexadecimal digits that form the character code. - -String literals allow the same escape sequences. They are written -between double quotes (`"hello"`). Rust strings may contain newlines. -When a newline is preceded by a backslash, it, and all white space -following it, will not appear in the resulting string literal. So -this is equivalent to `"abc"`: - - let s = "a\ - b\ - c"; - -## Operators - -Rust's set of operators contains very few surprises. The main -difference with C is that `++` and `--` are missing, and that the -logical binary operators have higher precedence—in C, `x & 2 > 0` -comes out as `x & (2 > 0)`, in Rust, it means `(x & 2) > 0`, which is -more likely to be what you expect (unless you are a C veteran). - -Thus, binary arithmetic is done with `*`, `/`, `%`, `+`, and `-` -(multiply, divide, remainder, plus, minus). `-` is also a unary prefix -operator (there are no unary postfix operators in Rust) that does -negation. - -Binary shifting is done with `>>` (shift right), `>>>` (arithmetic -shift right), and `<<` (shift left). Logical bitwise operators are -`&`, `|`, and `^` (and, or, and exclusive or), and unary `!` for -bitwise negation (or boolean negation when applied to a boolean -value). - -The comparison operators are the traditional `==`, `!=`, `<`, `>`, -`<=`, and `>=`. Short-circuiting (lazy) boolean operators are written -`&&` (and) and `||` (or). - -Rust has a ternary conditional operator `?:`, as in: - - let badness = 12; - let message = badness < 10 ? "error" : "FATAL ERROR"; - -For type casting, Rust uses the binary `as` operator, which has a -precedence between the bitwise combination operators (`&`, `|`, `^`) -and the comparison operators. It takes an expression on the left side, -and a type on the right side, and will, if a meaningful conversion -exists, convert the result of the expression to the given type. - - let x: float = 4.0; - let y: uint = x as uint; - assert y == 4u; - -## Attributes - -<a name="conditional"></a> - -Every definition can be annotated with attributes. Attributes are meta -information that can serve a variety of purposes. One of those is -conditional compilation: - - #[cfg(target_os = "win32")] - fn register_win_service() { /* ... */ } - -This will cause the function to vanish without a trace during -compilation on a non-Windows platform, much like `#ifdef` in C (it -allows `cfg(flag=value)` and `cfg(flag)` forms, where the second -simply checks whether the configuration flag is defined at all). Flags -for `target_os` and `target_arch` are set by the compiler. It is -possible to set additional flags with the `--cfg` command-line option. - -Attributes are always wrapped in hash-braces (`#[attr]`). Inside the -braces, a small minilanguage is supported, whose interpretation -depends on the attribute that's being used. The simplest form is a -plain name (as in `#[test]`, which is used by the [built-in test -framework](test.html '')). A name-value pair can be provided using an `=` -character followed by a literal (as in `#[license = "BSD"]`, which is -a valid way to annotate a Rust program as being released under a -BSD-style license). Finally, you can have a name followed by a -comma-separated list of nested attributes, as in the `cfg` example -above, or in this [crate](mod.html) metadata declaration: - - ## ignore - #[link(name = "std", - vers = "0.1", - url = "http://rust-lang.org/src/std")]; - -An attribute without a semicolon following it applies to the -definition that follows it. When terminated with a semicolon, it -applies to the module or crate in which it appears. - -## Syntax extensions - -There are plans to support user-defined syntax (macros) in Rust. This -currently only exists in very limited form. - -The compiler defines a few built-in syntax extensions. The most useful -one is `#fmt`, a printf-style text formatting macro that is expanded -at compile time. - - std::io::println(#fmt("%s is %d", "the answer", 42)); - -`#fmt` supports most of the directives that [printf][pf] supports, but -will give you a compile-time error when the types of the directives -don't match the types of the arguments. - -[pf]: http://en.cppreference.com/w/cpp/io/c/fprintf - -All syntax extensions look like `#word`. Another built-in one is -`#env`, which will look up its argument as an environment variable at -compile-time. - - std::io::println(#env("PATH")); diff --git a/doc/tutorial/task.md b/doc/tutorial/task.md deleted file mode 100644 index 21cafe91f95..00000000000 --- a/doc/tutorial/task.md +++ /dev/null @@ -1,154 +0,0 @@ -# Tasks - -Rust supports a system of lightweight tasks, similar to what is found -in Erlang or other actor systems. Rust tasks communicate via messages -and do not share data. However, it is possible to send data without -copying it by making use of [unique boxes][uniques], which allow the -sending task to release ownership of a value, so that the receiving -task can keep on using it. - -[uniques]: data.html#unique-box - -NOTE: As Rust evolves, we expect the Task API to grow and change -somewhat. The tutorial documents the API as it exists today. - -## Spawning a task - -Spawning a task is done using the various spawn functions in the -module `task`. Let's begin with the simplest one, `task::spawn()`: - - let some_value = 22; - let child_task = task::spawn {|| - std::io::println("This executes in the child task."); - std::io::println(#fmt("%d", some_value)); - }; - -The argument to `task::spawn()` is a [unique -closure](func.html#unique) of type `fn~()`, meaning that it takes no -arguments and generates no return value. The effect of `task::spawn()` -is to fire up a child task that will execute the closure in parallel -with the creator. The result is a task id, here stored into the -variable `child_task`. - -## Ports and channels - -Now that we have spawned a child task, it would be nice if we could -communicate with it. This is done by creating a *port* with an -associated *channel*. A port is simply a location to receive messages -of a particular type. A channel is used to send messages to a port. -For example, imagine we wish to perform two expensive computations -in parallel. We might write something like: - - # fn some_expensive_computation() -> int { 42 } - # fn some_other_expensive_computation() {} - let port = comm::port::<int>(); - let chan = comm::chan::<int>(port); - let child_task = task::spawn {|| - let result = some_expensive_computation(); - comm::send(chan, result); - }; - some_other_expensive_computation(); - let result = comm::recv(port); - -Let's walk through this code line-by-line. The first line creates a -port for receiving integers: - - let port = comm::port::<int>(); - -This port is where we will receive the message from the child task -once it is complete. The second line creates a channel for sending -integers to the port `port`: - - # let port = comm::port::<int>(); - let chan = comm::chan::<int>(port); - -The channel will be used by the child to send a message to the port. -The next statement actually spawns the child: - - # fn some_expensive_computation() -> int { 42 } - # let port = comm::port::<int>(); - # let chan = comm::chan::<int>(port); - let child_task = task::spawn {|| - let result = some_expensive_computation(); - comm::send(chan, result); - }; - -This child will perform the expensive computation send the result -over the channel. Finally, the parent continues by performing -some other expensive computation and then waiting for the child's result -to arrive on the port: - - # fn some_other_expensive_computation() {} - # let port = comm::port::<int>(); - some_other_expensive_computation(); - let result = comm::recv(port); - -## Creating a task with a bi-directional communication path - -A very common thing to do is to spawn a child task where the parent -and child both need to exchange messages with each other. The function -`task::spawn_connected()` supports this pattern. We'll look briefly at -how it is used. - -To see how `spawn_connected()` works, we will create a child task -which receives `uint` messages, converts them to a string, and sends -the string in response. The child terminates when `0` is received. -Here is the function which implements the child task: - - fn stringifier(from_par: comm::port<uint>, - to_par: comm::chan<str>) { - let value: uint; - do { - value = comm::recv(from_par); - comm::send(to_par, uint::to_str(value, 10u)); - } while value != 0u; - } - -You can see that the function takes two parameters. The first is a -port used to receive messages from the parent, and the second is a -channel used to send messages to the parent. The body itself simply -loops, reading from the `from_par` port and then sending its response -to the `to_par` channel. The actual response itself is simply the -strified version of the received value, `uint::to_str(value)`. - -Here is the code for the parent task: - - # fn stringifier(from_par: comm::port<uint>, - # to_par: comm::chan<str>) {} - fn main() { - let t = task::spawn_connected(stringifier); - comm::send(t.to_child, 22u); - assert comm::recv(t.from_child) == "22"; - comm::send(t.to_child, 23u); - assert comm::recv(t.from_child) == "23"; - comm::send(t.to_child, 0u); - assert comm::recv(t.from_child) == "0"; - } - -The call to `spawn_connected()` on the first line will instantiate the -various ports and channels and startup the child task. The returned -value, `t`, is a record of type `task::connected_task<uint,str>`. In -addition to the task id of the child, this record defines two fields, -`from_child` and `to_child`, which contain the port and channel -respectively for communicating with the child. Those fields are used -here to send and receive three messages from the child task. - -## Joining a task - -The function `spawn_joinable()` is used to spawn a task that can later -be joined. This is implemented by having the child task send a message -when it has completed (either successfully or by failing). Therefore, -`spawn_joinable()` returns a structure containing both the task ID and -the port where this message will be sent---this structure type is -called `task::joinable_task`. The structure can be passed to -`task::join()`, which simply blocks on the port, waiting to receive -the message from the child task. - -## The supervisor relationship - -By default, failures in Rust propagate upward through the task tree. -We say that each task is supervised by its parent, meaning that if the -task fails, that failure is propagated to the parent task, which will -fail sometime later. This propagation can be disabled by using the -function `task::unsupervise()`, which disables error propagation from -the current task to its parent. diff --git a/doc/tutorial/test.md b/doc/tutorial/test.md deleted file mode 100644 index 6adfbdcca96..00000000000 --- a/doc/tutorial/test.md +++ /dev/null @@ -1,70 +0,0 @@ -# Testing - -The Rust language has a facility for testing built into the language. -Tests can be interspersed with other code, and annotated with the -`#[test]` attribute. - - use std; - - fn twice(x: int) -> int { x + x } - - #[test] - fn test_twice() { - let i = -100; - while i < 100 { - assert twice(i) == 2 * i; - i += 1; - } - } - -When you compile the program normally, the `test_twice` function will -not be included. To compile and run such tests, compile with the -`--test` flag, and then run the result: - - ## notrust - > rustc --test twice.rs - > ./twice - running 1 tests - test test_twice ... ok - result: ok. 1 passed; 0 failed; 0 ignored - -Or, if we change the file to fail, for example by replacing `x + x` -with `x + 1`: - - ## notrust - running 1 tests - test test_twice ... FAILED - failures: - test_twice - result: FAILED. 0 passed; 1 failed; 0 ignored - -You can pass a command-line argument to a program compiled with -`--test` to run only the tests whose name matches the given string. If -we had, for example, test functions `test_twice`, `test_once_1`, and -`test_once_2`, running our program with `./twice test_once` would run -the latter two, and running it with `./twice test_once_2` would run -only the last. - -To indicate that a test is supposed to fail instead of pass, you can -give it a `#[should_fail]` attribute. - - use std; - - fn divide(a: float, b: float) -> float { - if b == 0f { fail; } - a / b - } - - #[test] - #[should_fail] - fn divide_by_zero() { divide(1f, 0f); } - -To disable a test completely, add an `#[ignore]` attribute. Running a -test runner (the program compiled with `--test`) with an `--ignored` -command-line flag will cause it to also run the tests labelled as -ignored. - -A program compiled as a test runner will have the configuration flag -`test` defined, so that you can add code that won't be included in a -normal compile with the `#[cfg(test)]` attribute (see [conditional -compilation](syntax.md#conditional)). diff --git a/doc/tutorial/web/default.css b/doc/tutorial/web/default.css deleted file mode 100644 index aff4f66f386..00000000000 --- a/doc/tutorial/web/default.css +++ /dev/null @@ -1,19 +0,0 @@ -.cm-s-default span.cm-keyword {color: #708;} -.cm-s-default span.cm-atom {color: #219;} -.cm-s-default span.cm-number {color: #164;} -.cm-s-default span.cm-def {color: #00f;} -.cm-s-default span.cm-variable {color: black;} -.cm-s-default span.cm-variable-2 {color: #05a;} -.cm-s-default span.cm-variable-3 {color: #085;} -.cm-s-default span.cm-property {color: black;} -.cm-s-default span.cm-operator {color: black;} -.cm-s-default span.cm-comment {color: #a50;} -.cm-s-default span.cm-string {color: #a11;} -.cm-s-default span.cm-string-2 {color: #f50;} -.cm-s-default span.cm-meta {color: #555;} -.cm-s-default span.cm-error {color: #f00;} -.cm-s-default span.cm-qualifier {color: #555;} -.cm-s-default span.cm-builtin {color: #30a;} -.cm-s-default span.cm-bracket {color: #cc7;} -.cm-s-default span.cm-tag {color: #170;} -.cm-s-default span.cm-attribute {color: #00c;} diff --git a/doc/tutorial/web/style.css b/doc/tutorial/web/style.css deleted file mode 100644 index 01f8523819d..00000000000 --- a/doc/tutorial/web/style.css +++ /dev/null @@ -1,31 +0,0 @@ -body { - padding: 1em; - margin: 0; - font-family: "Helvetica Neue", Helvetica, sans-serif; -} - -#content { - padding: 1em 6em; - max-width: 50em; -} - -h1 { font-size: 22pt; } -h2 { font-size: 17pt; } -h3 { font-size: 14pt; } - -pre { - margin: 1.1em 0; - padding: .4em .4em .4em 1em; - font-size: 120%; -} - -p.head { - font-size: 80%; - font-style: italic; - text-align: right; -} - -a, a:visited, a:link { - text-decoration: none; - color: #00438a; -} diff --git a/mk/docs.mk b/mk/docs.mk index e22500f1a56..e565863f107 100644 --- a/mk/docs.mk +++ b/mk/docs.mk @@ -12,7 +12,7 @@ ifeq ($(CFG_PANDOC),) $(info cfg: no pandoc found, omitting doc/rust.pdf) else -DOCS += doc/rust.html +DOCS += doc/rust.html doc/rust.css doc/rust.html: rust.md doc/version.md doc/keywords.md @$(call E, pandoc: $@) $(Q)$(CFG_PANDOC) \ @@ -57,6 +57,26 @@ doc/rust.pdf: doc/rust.tex endif endif endif + +###################################################################### +# Node (tutorial related) +###################################################################### + ifeq ($(CFG_NODE),) + $(info cfg: no node found, omitting doc/tutorial.html) + else + +DOCS += doc/tutorial.html +doc/tutorial.html: $(S)doc/tutorial.md + @$(call E, cp: $(S)doc/rust.css) + -$(Q)cp -a $(S)doc/rust.css doc/ 2> /dev/null + @$(call E, pandoc: $@) + $(Q)$(CFG_NODE) $(S)doc/prep.js --highlight $< | \ + $(CFG_PANDOC) --standalone --toc \ + --section-divs --number-sections \ + --from=markdown --to=html --css=rust.css \ + --output=$@ + + endif endif @@ -80,25 +100,6 @@ endif ###################################################################### -# Node (tutorial related) -###################################################################### -ifeq ($(CFG_NODE),) - $(info cfg: no node found, omitting doc/tutorial/web) -else - -DOCS += doc/tutorial/web/index.html -doc/tutorial/web/index.html: \ - $(wildcard $(S)doc/tutorial/*.md) - @$(call E, cp: $(S)doc/tutorial) - -$(Q)cp -a $(S)doc/tutorial doc/ 2> /dev/null - @$(call E, node: build.js) - $(Q)cd doc/tutorial && $(CFG_NODE) build.js - -endif - - - -###################################################################### # Naturaldocs (library reference related) ###################################################################### |
