diff --git a/configure b/configure index 70b2bb75a38fc..b2929d632b637 100755 --- a/configure +++ b/configure @@ -735,7 +735,9 @@ do make_dir $h/test/doc-tutorial make_dir $h/test/doc-tutorial-ffi make_dir $h/test/doc-tutorial-macros - make_dir $h/test/doc-tutorial-borrowed-ptr + make_dir $h/test/doc-tutorial-lifetimes + make_dir $h/test/doc-tutorial-rustpkg + make_dir $h/test/doc-tutorial-pointers make_dir $h/test/doc-tutorial-container make_dir $h/test/doc-tutorial-tasks make_dir $h/test/doc-tutorial-conditions diff --git a/doc/tutorial-borrowed-ptr.md b/doc/tutorial-lifetimes.md similarity index 85% rename from doc/tutorial-borrowed-ptr.md rename to doc/tutorial-lifetimes.md index 1da1d046878a7..d0a1f653a2f74 100644 --- a/doc/tutorial-borrowed-ptr.md +++ b/doc/tutorial-lifetimes.md @@ -1,26 +1,21 @@ -% Rust Borrowed Pointers Tutorial +% Lifetimes Tutorial # Introduction -Borrowed pointers are one of the more flexible and powerful tools available in -Rust. A borrowed pointer can point anywhere: into the managed or exchange -heap, into the stack, and even into the interior of another data structure. A -borrowed pointer is as flexible as a C pointer or C++ reference. However, -unlike C and C++ compilers, the Rust compiler includes special static checks -that ensure that programs use borrowed pointers safely. Another advantage of -borrowed pointers is that they are invisible to the garbage collector, so -working with borrowed pointers helps reduce the overhead of automatic memory -management. - -Despite their complete safety, a borrowed pointer's representation at runtime -is the same as that of an ordinary pointer in a C program. They introduce zero -overhead. The compiler does all safety checks at compile time. - -Although borrowed pointers have rather elaborate theoretical -underpinnings (region pointers), the core concepts will be familiar to -anyone who has worked with C or C++. Therefore, the best way to explain -how they are used—and their limitations—is probably just to work -through several examples. +"Lifetimes" are a concept that comes into play with Rust's borrowed pointers, +represented by `&`. Borrowed pointers are one of the more flexible and powerful +tools available in Rust. A borrowed pointer can point anywhere: into a managed +box or owned box, into the stack, and even into the interior of another data +structure. A borrowed pointer is as efficient as a C pointer or C++ reference. +However, unlike C and C++ compilers, the Rust compiler includes special static +checks that ensure that programs use borrowed pointers safely. Another +advantage of borrowed pointers is that they do not require the garbage +collector, unless you borrow a pointer to a managed box. + +Although borrowed pointers have rather elaborate theoretical underpinnings +(region pointers), the core concepts will be familiar to anyone who has worked +with C or C++. Therefore, the best way to explain how they are used—and their +limitations—is probably just to work through several examples. # By example @@ -103,79 +98,6 @@ should make intuitive sense: you must wait for a borrower to return the value that you lent it (that is, wait for the borrowed pointer to go out of scope) before you can make full use of it again. -# Other uses for the & operator - -In the previous example, the value `on_the_stack` was defined like so: - -~~~ -# struct Point {x: float, y: float} -let on_the_stack: Point = Point {x: 3.0, y: 4.0}; -~~~ - -This declaration means that code can only pass `Point` by value to other -functions. As a consequence, we had to explicitly take the address of -`on_the_stack` to get a borrowed pointer. Sometimes however it is more -convenient to move the & operator into the definition of `on_the_stack`: - -~~~ -# struct Point {x: float, y: float} -let on_the_stack2: &Point = &Point {x: 3.0, y: 4.0}; -~~~ - -Applying `&` to an rvalue (non-assignable location) is just a convenient -shorthand for creating a temporary and taking its address. A more verbose -way to write the same code is: - -~~~ -# struct Point {x: float, y: float} -let tmp = Point {x: 3.0, y: 4.0}; -let on_the_stack2 : &Point = &tmp; -~~~ - -# Taking the address of fields - -As in C, the `&` operator is not limited to taking the address of -local variables. It can also take the address of fields or -individual array elements. For example, consider this type definition -for `rectangle`: - -~~~ -struct Point {x: float, y: float} // as before -struct Size {w: float, h: float} // as before -struct Rectangle {origin: Point, size: Size} -~~~ - -Now, as before, we can define rectangles in a few different ways: - -~~~ -# struct Point {x: float, y: float} -# struct Size {w: float, h: float} // as before -# struct Rectangle {origin: Point, size: Size} -let rect_stack = &Rectangle {origin: Point {x: 1f, y: 2f}, - size: Size {w: 3f, h: 4f}}; -let rect_managed = @Rectangle {origin: Point {x: 3f, y: 4f}, - size: Size {w: 3f, h: 4f}}; -let rect_owned = ~Rectangle {origin: Point {x: 5f, y: 6f}, - size: Size {w: 3f, h: 4f}}; -~~~ - -In each case, we can extract out individual subcomponents with the `&` -operator. For example, I could write: - -~~~ -# struct Point {x: float, y: float} // as before -# struct Size {w: float, h: float} // as before -# struct Rectangle {origin: Point, size: Size} -# let rect_stack = &Rectangle {origin: Point {x: 1f, y: 2f}, size: Size {w: 3f, h: 4f}}; -# let rect_managed = @Rectangle {origin: Point {x: 3f, y: 4f}, size: Size {w: 3f, h: 4f}}; -# let rect_owned = ~Rectangle {origin: Point {x: 5f, y: 6f}, size: Size {w: 3f, h: 4f}}; -# fn compute_distance(p1: &Point, p2: &Point) -> float { 0f } -compute_distance(&rect_stack.origin, &rect_managed.origin); -~~~ - -which would borrow the field `origin` from the rectangle on the stack -as well as from the managed box, and then compute the distance between them. - # Borrowing managed boxes and rooting We’ve seen a few examples so far of borrowing heap boxes, both managed diff --git a/doc/tutorial-pointers.md b/doc/tutorial-pointers.md new file mode 100644 index 0000000000000..7be912f1242e3 --- /dev/null +++ b/doc/tutorial-pointers.md @@ -0,0 +1,475 @@ +% Rust Pointers Tutorial + +Rust's pointers are one of its more unique and compelling features. Pointers +are also one of the more confusing topics for newcomers to Rust. They can also +be confusing for people coming from other languages that support pointers, such +as C++. This tutorial will help you understand this important topic. + +# You don't actually need pointers + +I have good news for you: you probably don't need to care about pointers, +especially as you're getting started. Think of it this way: Rust is a language +that emphasizes safety. Pointers, as the joke goes, are very pointy: it's easy +to accidentally stab yourself. Therefore, Rust is made in a way such that you +don't need them very often. + +"But tutorial!" you may cry. "My co-worker wrote a function that looks like +this: + +```rust +fn succ(x: &int) -> int { *x + 1 } +``` + +So I wrote this code to try it out: + +```rust +fn main() { + let number = 5; + let succ_number = succ(number); + println!("{}", succ_number); +} +``` + +And now I get an error: + +``` +error: mismatched types: expected `&int` but found `` (expected &-ptr but found integral variable) +``` + +What gives? It needs a pointer! Therefore I have to use pointers!" + +Turns out, you don't. All you need is a reference. Try this on for size: + +```rust +fn main() { + let number = 5; + let succ_number = succ(&number); + println!("{}", succ_number); +} +``` + +It's that easy! One extra little `&` there. This code will run, and print `6`. + +That's all you need to know. Your co-worker could have written the function +like this: + +```rust +fn succ(x: int) -> int { x + 1 } + +fn main() { + let number = 5; + let succ_number = succ(number); + println!("{}", succ_number); +} +``` + +No pointers even needed. Then again, this is a simple example. I assume that +your real-world `succ` function is more complicated, and maybe your co-worker +had a good reason for `x` to be a pointer of some kind. In that case, references +are your best friend. Don't worry about it, life is too short. + +However. + +Here are the use-cases for pointers. I've prefixed them with the name of the +pointer that satisfies that use-case: + +1. Owned: ~Trait must be a pointer, becuase you don't know the size of the +object, so indirection is mandatory. +2. Owned: You need a recursive data structure. These can be infinite sized, so +indirection is mandatory. +3. Owned: A very, very, very rare situation in which you have a *huge* chunk of +data that you wish to pass to many methods. Passing a pointer will make this +more efficient. If you're coming from another language where this technique is +common, such as C++, please read "A note..." below. +4. Managed: Having only a single owner to a piece of data would be inconvenient +or impossible. This is only often useful when a program is very large or very +complicated. Using a managed pointer will activate Rust's garbage collection +mechanism. +5: Borrowed: You're writing a function, and you need a pointer, but you don't +care about its ownership. If you make the argument a borrowed pointer, callers +can send in whatever kind they want. + +Five exceptions. That's it. Otherwise, you shouldn't need them. Be skeptical +of pointers in Rust: use them for a deliberate purpose, not just to make the +compiler happy. + +## A note for those proficient in pointers + +If you're coming to Rust from a language like C or C++, you may be used to +passing things by reference, or passing things by pointer. In some langauges, +like Java, you can't even have objects without a pointer to them. Therefore, if +you were writing this Rust code: + +```rust +struct Point { + x: int, + y: int, +} + +fn main() { + let p0 = Point { x: 5, y: 10}; + let p1 = transform(p0); + println!("{:?}", p1); +} + +``` + +I think you'd implement `transform` like this: + +```rust +fn transform(p: &Point) -> Point { + Point { x: p.x + 1, y: p.y + 1} +} + +// and change this: +let p1 = transform(&p0); +``` + +This does work, but you don't need to create those references! The better way to write this is simply: + +```rust +struct Point { + x: int, + y: int, +} + +fn transform(p: Point) -> Point { + Point { x: p.x + 1, y: p.y + 1} +} + +fn main() { + let p0 = Point { x: 5, y: 10}; + let p1 = transform(p0); + println!("{:?}", p1); +} +``` + +But won't this be inefficent? Well, that's a complicated question, but it's +important to know that Rust, like C and C++, store aggregate data types +'unboxed,' whereas languages like Java and Ruby store these types as 'boxed.' +For smaller structs, this way will be more efficient. For larger ones, it may +be less so. But don't reach for that pointer until you must! Make sure that the +struct is large enough by performing some tests before you add in the +complexity of pointers. + +# Owned Pointers + +Owned pointers are the conceptually simplest kind of pointer in Rust. A rough +approximation of owned pointers follows: + +1. Only one owned pointer may exist to a particular place in memory. It may be +borrowed from that owner, however. +2. The Rust compiler uses static analysis to determine where the pointer is in +scope, and handles allocating and de-allocating that memory. Owned pointers are +not garbage collected. + +These two properties make for three use cases. + +## References to Traits + +Traits must be referenced through a pointer, becuase the struct that implements +the trait may be a different size than a different struct that implements the +trait. Therefore, unboxed traits don't make any sense, and aren't allowed. + +## Recursive Data Structures + +Sometimes, you need a recursive data structure. The simplest is known as a 'cons list': + +```rust +enum List { + Nil, + Cons(T, ~List), +} + +fn main() { + let list: List = Cons(1, ~Cons(2, ~Cons(3, ~Nil))); + println!("{:?}", list); +} +``` + +This prints: + +``` +Cons(1, ~Cons(2, ~Cons(3, ~Nil))) +``` + +The inner lists _must_ be an owned pointer, becuase we can't know how many +elements are in the list. Without knowing the length, we don't know the size, +and therefore require the indirection that pointers offer. + +## Efficiency + +This should almost never be a concern, but because creating an owned pointer +boxes its value, it therefore makes referring to the value the size of the box. +This may make passing an owned pointer to a function less expensive than +passing the value itself. Don't worry yourself with this case until you've +proved that it's an issue through benchmarks. + +For example, this will work: + +```rust +struct Point { + x: int, + y: int, +} + +fn main() { + let a = Point { x: 10, y: 20 }; + do spawn { + println(a.x.to_str()); + } +} +``` + +This struct is tiny, so it's fine. If `Point` were large, this would be more +efficient: + +```rust +struct Point { + x: int, + y: int, +} + +fn main() { + let a = ~Point { x: 10, y: 20 }; + do spawn { + println(a.x.to_str()); + } +} +``` + +Now it'll be copying a pointer-sized chunk of memory rather than the whole +struct. + +# Managed Pointers + +Managed pointers, notated by an `@`, are used when having a single owner for +some data isn't convenient or possible. This generally happens when your +program is very large and complicated. + +For example, let's say you're using an owned pointer, and you want to do this: + +```rust +struct Point { + x: int, + y: int, +} + +fn main() { + let a = ~Point { x: 10, y: 20 }; + let b = a; + println(b.x.to_str()); + println(a.x.to_str()); +} +``` + +You'll get this error: + +``` +test.rs:10:12: 10:13 error: use of moved value: `a` +test.rs:10 println(a.x.to_str()); + ^ +test.rs:8:8: 8:9 note: `a` moved here because it has type `~Point`, which is moved by default (use `ref` to override) +test.rs:8 let b = a; + ^ +``` + +As the message says, owned pointers only allow for one owner at a time. When you assign `a` to `b`, `a` becomes invalid. Change your code to this, however: + +```rust +struct Point { + x: int, + y: int, +} + +fn main() { + let a = @Point { x: 10, y: 20 }; + let b = a; + println(b.x.to_str()); + println(a.x.to_str()); +} +``` + +And it works: + +``` +10 +10 +``` + +So why not just use managed pointers everywhere? There are two big drawbacks to +managed pointers: + +1. They activate Rust's garbage collector. Other pointer types don't share this +drawback. +2. You cannot pass this data to another task. Shared ownership across +concurrency boundaries is the source of endless pain in other langauges, so +Rust does not let you do this. + +# Borrowed Pointers + +Borrowed pointers are the third major kind of pointer Rust supports. They are +simultaneously the simplest and the most complicated kind. Let me explain: +they're called 'borrowed' pointers because they claim no ownership over the +data they're pointing to. They're just borrowing it for a while. So in that +sense, they're simple: just keep whatever ownership the data already has. For +example: + +```rust +use std::num::sqrt; + +struct Point { + x: float, + y: float, +} + +fn compute_distance(p1: &Point, p2: &Point) -> float { + let x_d = p1.x - p2.x; + let y_d = p1.y - p2.y; + + sqrt(x_d * x_d + y_d * y_d) +} + +fn main() { + let origin = @Point { x: 0.0, y: 0.0 }; + let p1 = ~Point { x: 5.0, y: 3.0 }; + + println!("{:?}", compute_distance(origin, p1)); +} +``` + +This prints `5.83095189`. You can see that the `compute_distance` function +takes in two borrowed pointers, but we give it a managed and unique pointer. Of +course, if this were a real program, we wouldn't have any of these pointers, +they're just there to demonstrate the concepts. + +So how is this hard? Well, because we're igorning ownership, the compiler needs +to take great care to make sure that everything is safe. Despite their complete +safety, a borrowed pointer's representation at runtime is the same as that of +an ordinary pointer in a C program. They introduce zero overhead. The compiler +does all safety checks at compile time. + +This theory is called 'region pointers,' and involve a concept called +'lifetimes'. Here's the simple explanation: would you expect this code to +compile? + +```rust +fn main() { + println(x.to_str()); + let x = 5; +} +``` + +Probably not. That's becuase you know that the name `x` is valid from where +it's declared to when it goes out of scope. In this case, that's the end of +the `main` function. So you know this code will cause an error. We call this +duration a 'lifetime'. Let's try a more complex example: + +```rust +fn main() { + let mut x = ~5; + if(*x < 10) { + let y = &x; + println!("Oh no: {:?}", y); + return; + } + *x = *x - 1; + println!("Oh no: {:?}", x); +} +``` + +Here, we're borrowing a pointer to `x` inside of the `if`. The compiler, however, +is able to determine that that pointer will go out of scope without `x` being +mutated, and therefore, lets us pass. This wouldn't work: + +```rust +fn main() { + let mut x = ~5; + if(*x < 10) { + let y = &x; + *x = *x - 1; + + println!("Oh no: {:?}", y); + return; + } + *x = *x - 1; + println!("Oh no: {:?}", x); +} +``` + +It gives this error: + +``` +test.rs:5:8: 5:10 error: cannot assign to `*x` because it is borrowed +test.rs:5 *x = *x - 1; + ^~ +test.rs:4:16: 4:18 note: borrow of `*x` occurs here +test.rs:4 let y = &x; + ^~ +``` + +As you might guess, this kind of analysis is complex for a human, and therefore +hard for a computer, too! There is an entire [tutorial devoted to borrowed +pointers and lifetimes](tutorial-lifetimes.html) that goes into lifetimes in +great detail, so if you want the full details, check that out. + +# Returning Pointers + +We've talked a lot about funtions that accept various kinds of pointers, but +what about returning them? Here's the rule of thumb: only return a unique or +managed pointer if you were given one in the first place. + +What does that mean? Don't do this: + +```rust +fn foo(x: ~int) -> ~int { + return ~*x; +} + +fn main() { + let x = ~5; + let y = foo(x); +} +``` + +Do this: + +```rust +fn foo(x: ~int) -> int { + return *x; +} + +fn main() { + let x = ~5; + let y = ~foo(x); +} +``` + +This gives you flexibility, without sacrificing performance. For example, this will +also work: + +```rust +fn foo(x: ~int) -> int { + return *x; +} + +fn main() { + let x = ~5; + let y = @foo(x); +} +``` + +You may think that this gives us terrible performance: return a value and then +immediately box it up?!?! Isn't that the worst of both worlds? Rust is smarter +than that. There is no copy in this code. `main` allocates enough room for the +`@int`, passes it into `foo` as `x`, and then `foo` writes the value into the +new box. This writes the return value directly into the allocated box. + +This is important enough that it bears repeating: pointers are not for optimizing +returning values from your code. Allow the caller to choose how they want to +use your output. + + +# Related Resources + +* [Lifetimes tutorial](tutorial-lifetimes.html) diff --git a/doc/tutorial.md b/doc/tutorial.md index 2f9a84d984f87..ead14991b1011 100644 --- a/doc/tutorial.md +++ b/doc/tutorial.md @@ -1129,10 +1129,8 @@ intuitive sense: you must wait for a borrowed value to be returned (that is, for the borrowed pointer to go out of scope) before you can make full use of it again. -For a more in-depth explanation of borrowed pointers, read the -[borrowed pointer tutorial][borrowtut]. - -[borrowtut]: tutorial-borrowed-ptr.html +For a more in-depth explanation of borrowed pointers and lifetimes, read the +[lifetimes and borrowed pointer tutorial][lifetimes]. ## Freezing @@ -2774,8 +2772,6 @@ but for this tutorial it's only important to know that you can optionally annota extern mod rust = "github.com/mozilla/rust"; // pretend Rust is an simple library ~~~ -[rustpkg]: rustpkg.html - ## Crate metadata and settings For every crate you can define a number of metadata items, such as link name, version or author. @@ -2981,7 +2977,8 @@ re-export a bunch of 'officially blessed' crates that get managed with `rustpkg` Now that you know the essentials, check out any of the additional tutorials on individual topics. -* [Borrowed pointers][borrow] +* [Pointers][pointers-tutorial] +* [Lifetimes][lifetimes] * [Tasks and communication][tasks] * [Macros][macros] * [The foreign function interface][ffi] @@ -2991,11 +2988,12 @@ tutorials on individual topics. There is further documentation on the [wiki], however those tend to be even more out of date as this document. -[borrow]: tutorial-borrowed-ptr.html +[lifetimes]: tutorial-lifetimes.html [tasks]: tutorial-tasks.html [macros]: tutorial-macros.html [ffi]: tutorial-ffi.html [rustpkg]: tutorial-rustpkg.html +[pointers-tutorial]: tutorial-pointers.html [wiki]: https://github.com/mozilla/rust/wiki/Docs diff --git a/mk/docs.mk b/mk/docs.mk index e38590188b3e4..955bf704225d6 100644 --- a/mk/docs.mk +++ b/mk/docs.mk @@ -130,8 +130,8 @@ doc/tutorial-ffi.html: tutorial-ffi.md doc/version_info.html doc/rust.css --include-before-body=doc/version_info.html \ --output=$@ -DOCS += doc/tutorial-borrowed-ptr.html -doc/tutorial-borrowed-ptr.html: tutorial-borrowed-ptr.md doc/version_info.html doc/rust.css +DOCS += doc/tutorial-lifetimes.html +doc/tutorial-lifetimes.html: tutorial-lifetimes.md doc/version_info.html doc/rust.css @$(call E, pandoc: $@) $(Q)$(CFG_NODE) $(S)doc/prep.js --highlight $< | \ $(CFG_PANDOC) --standalone --toc \ @@ -140,6 +140,17 @@ doc/tutorial-borrowed-ptr.html: tutorial-borrowed-ptr.md doc/version_info.html d --include-before-body=doc/version_info.html \ --output=$@ +DOCS += doc/tutorial-pointers.html +doc/tutorial-pointers.html: tutorial-pointers.md doc/version_info.html doc/rust.css + @$(call E, pandoc: $@) + $(Q)$(CFG_NODE) $(S)doc/prep.js --highlight $< | \ + $(CFG_PANDOC) --standalone --toc \ + --section-divs --number-sections \ + --from=markdown --to=html --css=rust.css \ + --include-before-body=doc/version_info.html \ + --output=$@ + + DOCS += doc/tutorial-tasks.html doc/tutorial-tasks.html: tutorial-tasks.md doc/version_info.html doc/rust.css @$(call E, pandoc: $@)