From b54abb6c09834dfefd3e5f97c781002a7ddc2854 Mon Sep 17 00:00:00 2001 From: Bradlee Speice Date: Sat, 9 Feb 2019 23:20:41 -0500 Subject: [PATCH] First draft of everything done. Going to get it all arranged, proof-read, and then release. --- _drafts/a-heaping-helping.md | 6 +- _drafts/compiler-optimizations.md | 135 ++++++++++++------- _drafts/stacking-up.md | 15 ++- _drafts/understanding-allocations-in-rust.md | 13 +- 4 files changed, 100 insertions(+), 69 deletions(-) diff --git a/_drafts/a-heaping-helping.md b/_drafts/a-heaping-helping.md index ddc7e6b..4d5d69a 100644 --- a/_drafts/a-heaping-helping.md +++ b/_drafts/a-heaping-helping.md @@ -242,8 +242,10 @@ that needs heap allocation. # Tracing Allocators When writing performance-sensitive code, there's no alternative to measuring your code. -[Measure first](https://youtu.be/nXaxk27zwlk?t=583), because you should never rely on -your instincts when [a microsecond is an eternity](https://www.youtube.com/watch?v=NH1Tta7purM). +If you didn't write a benchmark, +[you don't care about it's performance](https://www.youtube.com/watch?v=2EWejmkKlxs&feature=youtu.be&t=263) +You should never rely on your instincts when +[a microsecond is an eternity](https://www.youtube.com/watch?v=NH1Tta7purM). Similarly, there's great work going on in Rust with allocators that keep track of what they're doing. [`alloc_counter`](https://crates.io/crates/alloc_counter) was designed diff --git a/_drafts/compiler-optimizations.md b/_drafts/compiler-optimizations.md index d74c140..2e96008 100644 --- a/_drafts/compiler-optimizations.md +++ b/_drafts/compiler-optimizations.md @@ -20,10 +20,12 @@ both the Rust compiler and the LLVM optimizers are incredibly sophisticated, and we'll step back and let them do their job. Similar to ["What Has My Compiler Done For Me Lately?"](https://www.youtube.com/watch?v=bSkpMdDe4g4), -we're focusing on interesting things the Rust language (and LLVM!) can do. -We'll still be looking at assembly code to understand what's going on, -but it's important to mention again: **please use automated tools like -[alloc-counter](https://crates.io/crates/alloc_counter) to double-check memory behavior**. +we're focusing on interesting things the Rust language (and LLVM!) can do +as regards memory management. We'll still be looking at assembly code to +understand what's going on, but it's important to mention again: +**please use automated tools like +[alloc-counter](https://crates.io/crates/alloc_counter) to double-check +memory behavior if it's something you care about**. It's far too easy to mis-read assembly in large code sections, you should always have an automated tool verify behavior if you care about memory usage. @@ -34,63 +36,23 @@ There will, however, be an opera of optimization. # The Case of the Disappearing Box +Our first optimization comes when LLVM can reason that the lifetime of an object +is sufficiently short that heap allocations aren't necessary. In these cases, +LLVM will move the allocation to the stack instead! The way this interacts +with `#[inline]` attributes is a bit opaque, but the important part is that LLVM +can sometimes do better than the baseline Rust language. + ```rust use std::alloc::{GlobalAlloc, Layout, System}; use std::sync::atomic::{AtomicBool, Ordering}; -fn allocate_box() { - let _x = Box::new(0); -} - pub fn main() { // Turn on panicking if we allocate on the heap DO_PANIC.store(true, Ordering::SeqCst); // This code will only run with the mode set to "Release". // If you try running in "Debug", you'll get a panic. - allocate_box(); - - // Turn off panicking, as there are some deallocations - // when we exit main. - DO_PANIC.store(false, Ordering::SeqCst); -} - -#[global_allocator] -static A: PanicAllocator = PanicAllocator; -static DO_PANIC: AtomicBool = AtomicBool::new(false); -struct PanicAllocator; - -unsafe impl GlobalAlloc for PanicAllocator { - unsafe fn alloc(&self, layout: Layout) -> *mut u8 { - if DO_PANIC.load(Ordering::SeqCst) { - panic!("Unexpected allocation."); - } - System.alloc(layout) - } - - unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { - if DO_PANIC.load(Ordering::SeqCst) { - panic!("Unexpected deallocation."); - } - System.dealloc(ptr, layout); - } -} -``` --- [Rust Playground](https://play.rust-lang.org/?version=stable&mode=release&edition=2018&gist=3fe2846dac6755dbb7bb90342d0bf135) - -# Vectors of Usual Size - -```rust -use std::alloc::{GlobalAlloc, Layout, System}; -use std::sync::atomic::{AtomicBool, Ordering}; - -fn main() { - // Turn on panicking if we allocate on the heap - DO_PANIC.store(true, Ordering::SeqCst); - - // If the compiler can predict how large a vector will be, - // it can optimize out the heap storage needed. - let x: Vec = Vec::with_capacity(5); + let x = Box::new(0); drop(x); // Turn off panicking, as there are some deallocations @@ -119,10 +81,79 @@ unsafe impl GlobalAlloc for PanicAllocator { } } ``` --- [Rust Playground](https://play.rust-lang.org/?version=stable&mode=release&edition=2018&gist=5e9761b63243018d094829d901dd85c4) +-- [Rust Playground](https://play.rust-lang.org/?version=stable&mode=release&edition=2018&gist=614994a20e362bf04de868b19daf5ca4) + +# Vectors of Usual Size + +With some collections, LLVM can predict how large they will become +and allocate the entire size on the stack instead of the heap. +This works whether with both the pre-allocation (`Vec::with_capacity`) +*and re-allocation* (`Vec::push`) methods for collections types. +Not only can LLVM predict sizing if you reserve the fully size up front, +it can see through the resizing operations and find the total size. +While this specific optimization is unlikely to come up in production +usage, it's cool to note that LLVM does a considerable amount of work +to understand what code actually does. + +```rust +use std::alloc::{GlobalAlloc, Layout, System}; +use std::sync::atomic::{AtomicBool, Ordering}; + +fn main() { + // Turn on panicking if we allocate on the heap + DO_PANIC.store(true, Ordering::SeqCst); + + // If the compiler can predict how large a vector will be, + // it can optimize out the heap storage needed. This also + // works with `Vec::with_capacity()`, but the push case + // is a bit more interesting. + let mut x: Vec = Vec::new(); + x.push(12); + assert_eq!(x[0], 12); + drop(x); + + // Turn off panicking, as there are some deallocations + // when we exit main. + DO_PANIC.store(false, Ordering::SeqCst); +} + +#[global_allocator] +static A: PanicAllocator = PanicAllocator; +static DO_PANIC: AtomicBool = AtomicBool::new(false); +struct PanicAllocator; + +unsafe impl GlobalAlloc for PanicAllocator { + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + if DO_PANIC.load(Ordering::SeqCst) { + panic!("Unexpected allocation."); + } + System.alloc(layout) + } + + unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { + if DO_PANIC.load(Ordering::SeqCst) { + panic!("Unexpected deallocation."); + } + System.dealloc(ptr, layout); + } +} +``` +-- [Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=1dfccfcf63d8800e644a3b948f1eeb7b) # Dr. Array or: How I Learned to Love the Optimizer +Finally, this isn't so much about LLVM figuring out different memory behavior, +but LLVM totally stripping out code that has no side effects. Optimizations of +this type have a lot of nuance to them; if you're not careful, they can +make your benchmarks look +[impossibly good](https://www.youtube.com/watch?v=nXaxk27zwlk&feature=youtu.be&t=1199). +In Rust, the `black_box` function (in both +[`libtest`](https://doc.rust-lang.org/1.1.0/test/fn.black_box.html) and +[`criterion`](https://docs.rs/criterion/0.2.10/criterion/fn.black_box.html)) +will tell the compiler to disable this kind of optimization. But if you let +LLVM remove unnecessary code, you can end up with programs that +would have previously caused errors running just fine: + ```rust #[derive(Default)] struct TwoFiftySix { diff --git a/_drafts/stacking-up.md b/_drafts/stacking-up.md index ebfe353..27dfff9 100644 --- a/_drafts/stacking-up.md +++ b/_drafts/stacking-up.md @@ -39,8 +39,8 @@ fastest allocator is the one you never use. As such, we're not going to discuss but we'll focus instead on the conditions that enable the Rust compiler to use the faster stack-based allocation for variables. -With that in mind, let's get into the details. How do we know when Rust will or will not use -stack allocation for objects we create? Looking at other languages, it's often easy to delineate +So, **how do we know when Rust will or will not use stack allocation for objects we create?** +Looking at other languages, it's often easy to delineate between stack and heap. Managed memory languages (Python, Java, [C#](https://blogs.msdn.microsoft.com/ericlippert/2010/09/30/the-truth-about-value-types/)) place everything on the heap. JIT compilers ([PyPy](https://www.pypy.org/), @@ -51,8 +51,9 @@ is one) being the way to use heap memory. Old C++ has the [`new`](https://stacko keyword, though modern C++/C++11 is more complicated with [RAII](https://en.cppreference.com/w/cpp/language/raii). For Rust specifically, the principle is this: **stack allocation will be used for everything -that doesn't involve "smart pointers" and collections.** If we're interested in dissecting it though, -there are three things we pay attention to: +that doesn't involve "smart pointers" and collections.** We'll skip over a precise definition +of the term "smart pointer" for now, and instead discuss what we should watch for when talking +about the memory region used for allocation: 1. Stack manipulation instructions (`push`, `pop`, and `add`/`sub` of the `rsp` register) indicate allocation of stack memory: @@ -85,8 +86,8 @@ there are three things we pay attention to: the Rust standard library only defines `Drop` implementations for types that involve heap allocation. 3. If you don't want to inspect the assembly, use a custom allocator that's able to track - and alert when heap allocations occur. As an unashamed plug, [qadapt](https://crates.io/crates/qadapt) - was designed for exactly this purpose. + and alert when heap allocations occur. Crates like [`alloc_counter`](https://crates.io/crates/alloc_counter) + are designed for exactly this purpose. With all that in mind, let's talk about situations in which we're guaranteed to use stack memory: @@ -96,7 +97,7 @@ With all that in mind, let's talk about situations in which we're guaranteed to will not change the memory region used. - Enums and unions are stack-allocated. - [Arrays](https://doc.rust-lang.org/std/primitive.array.html) are always stack-allocated. -- Closures capture their arguments on the stack +- Closures capture their arguments on the stack. - Generics will use stack allocation, even with dynamic dispatch. - [`Copy`](https://doc.rust-lang.org/std/marker/trait.Copy.html) types are guaranteed to be stack-allocated, and copying them will be done in stack memory. diff --git a/_drafts/understanding-allocations-in-rust.md b/_drafts/understanding-allocations-in-rust.md index fe5dfc8..f363079 100644 --- a/_drafts/understanding-allocations-in-rust.md +++ b/_drafts/understanding-allocations-in-rust.md @@ -32,8 +32,8 @@ section at the end for easy future citation. To that end, a table of contents is - [The Whole World: Global Memory Usage](/2019/02/the-whole-world) - [Stacking Up: Fixed Memory](/2019/02/stacking-up) - [A Heaping Helping: Dynamic Memory](/2019/02/a-heaping-helping) -- [Compiler Optimizations: What It's Done For You Lately](#compiler-optimizations-what-its-done-for-you-lately) -- Summary: When Does Rust Allocate? +- [Compiler Optimizations: What It's Done For You Lately](/2019/02/compiler-optimizations) +- [Summary: What Are the Rules?](/2019/02/summary) # Foreword @@ -96,6 +96,9 @@ Now let's address some conditions and caveats before going much further: a [refresher](https://stackoverflow.com/a/26026278/1454178) on the `push` and `pop` [instructions](http://www.cs.virginia.edu/~evans/cs216/guides/x86.html) was helpful while writing this post. +- I've tried to be precise in saying only what I can prove using the tools (ASM, docs) + that are available. That said, if there's something said in error, please reach out + and let me know - [bradlee@speice.io](mailto:bradlee@speice.io) Finally, I'll do what I can to flag potential future changes but the Rust docs have a notice worth repeating: @@ -103,9 +106,3 @@ have a notice worth repeating: > Rust does not currently have a rigorously and formally defined memory model. > > -- [the docs](https://doc.rust-lang.org/std/ptr/fn.read_volatile.html) - -# Compiler Optimizations: What It's Done For You Lately - -1. Box<> getting inlined into stack allocations -2. Vec::push() === Vec::with_capacity() for fixed/predictable capacities -3. Inlining statics that don't change value