speice.io/assets/js/761aff6b.81e1ad08.js

1 line
17 KiB
JavaScript

"use strict";(self.webpackChunkspeice_io=self.webpackChunkspeice_io||[]).push([["7366"],{35623:function(e,n,t){t.r(n),t.d(n,{assets:function(){return i},contentTitle:function(){return l},default:function(){return d},frontMatter:function(){return r},metadata:function(){return s},toc:function(){return c}});var s=t(65238),o=t(85893),a=t(50065);let r={slug:"2019/02/a-heaping-helping",title:"Allocations in Rust: Dynamic memory",date:new Date("2019-02-07T12:00:00.000Z"),authors:["bspeice"],tags:[]},l=void 0,i={authorsImageUrls:[void 0]},c=[{value:"Smart pointers",id:"smart-pointers",level:2},{value:"Collections",id:"collections",level:2},{value:"Heap Alternatives",id:"heap-alternatives",level:2},{value:"Tracing Allocators",id:"tracing-allocators",level:2}];function h(e){let n={a:"a",code:"code",em:"em",h2:"h2",li:"li",p:"p",pre:"pre",strong:"strong",ul:"ul",...(0,a.a)(),...e.components};return(0,o.jsxs)(o.Fragment,{children:[(0,o.jsxs)(n.p,{children:["Managing dynamic memory is hard. Some languages assume users will do it themselves (C, C++), and\nsome languages go to extreme lengths to protect users from themselves (Java, Python). In Rust, how\nthe language uses dynamic memory (also referred to as the ",(0,o.jsx)(n.strong,{children:"heap"}),") is a system called ",(0,o.jsx)(n.em,{children:"ownership"}),".\nAnd as the docs mention, ownership\n",(0,o.jsx)(n.a,{href:"https://doc.rust-lang.org/book/ch04-00-understanding-ownership.html",children:"is Rust's most unique feature"}),"."]}),"\n",(0,o.jsxs)(n.p,{children:["The heap is used in two situations; when the compiler is unable to predict either the ",(0,o.jsx)(n.em,{children:"total size of\nmemory needed"}),", or ",(0,o.jsx)(n.em,{children:"how long the memory is needed for"}),", it allocates space in the heap."]}),"\n",(0,o.jsxs)(n.p,{children:["This happens\npretty frequently; if you want to download the Google home page, you won't know how large it is\nuntil your program runs. And when you're finished with Google, we deallocate the memory so it can be\nused to store other webpages. If you're interested in a slightly longer explanation of the heap,\ncheck out\n",(0,o.jsx)(n.a,{href:"https://doc.rust-lang.org/book/ch04-01-what-is-ownership.html#the-stack-and-the-heap",children:"The Stack and the Heap"}),"\nin Rust's documentation."]}),"\n",(0,o.jsxs)(n.p,{children:["We won't go into detail on how the heap is managed; the\n",(0,o.jsx)(n.a,{href:"https://doc.rust-lang.org/book/ch04-01-what-is-ownership.html",children:"ownership documentation"}),' does a\nphenomenal job explaining both the "why" and "how" of memory management. Instead, we\'re going to\nfocus on understanding "when" heap allocations occur in Rust.']}),"\n",(0,o.jsx)(n.p,{children:"To start off, take a guess for how many allocations happen in the program below:"}),"\n",(0,o.jsx)(n.pre,{children:(0,o.jsx)(n.code,{className:"language-rust",children:"fn main() {}\n"})}),"\n",(0,o.jsxs)(n.p,{children:["It's obviously a trick question; while no heap allocations occur as a result of that code, the setup\nneeded to call ",(0,o.jsx)(n.code,{children:"main"})," does allocate on the heap. Here's a way to show it:"]}),"\n",(0,o.jsx)(n.pre,{children:(0,o.jsx)(n.code,{className:"language-rust",children:'#![feature(integer_atomics)]\nuse std::alloc::{GlobalAlloc, Layout, System};\nuse std::sync::atomic::{AtomicU64, Ordering};\n\nstatic ALLOCATION_COUNT: AtomicU64 = AtomicU64::new(0);\n\nstruct CountingAllocator;\n\nunsafe impl GlobalAlloc for CountingAllocator {\n unsafe fn alloc(&self, layout: Layout) -> *mut u8 {\n ALLOCATION_COUNT.fetch_add(1, Ordering::SeqCst);\n System.alloc(layout)\n }\n\n unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {\n System.dealloc(ptr, layout);\n }\n}\n\n#[global_allocator]\nstatic A: CountingAllocator = CountingAllocator;\n\nfn main() {\n let x = ALLOCATION_COUNT.fetch_add(0, Ordering::SeqCst);\n println!("There were {} allocations before calling main!", x);\n}\n'})}),"\n",(0,o.jsxs)(n.p,{children:["--\n",(0,o.jsx)(n.a,{href:"https://play.rust-lang.org/?version=nightly&mode=debug&edition=2018&gist=fb5060025ba79fc0f906b65a4ef8eb8e",children:"Rust Playground"})]}),"\n",(0,o.jsxs)(n.p,{children:["As of the time of writing, there are five allocations that happen before ",(0,o.jsx)(n.code,{children:"main"})," is ever called."]}),"\n",(0,o.jsx)(n.p,{children:"But when we want to understand more practically where heap allocation happens, we'll follow this\nguide:"}),"\n",(0,o.jsxs)(n.ul,{children:["\n",(0,o.jsx)(n.li,{children:"Smart pointers hold their contents in the heap"}),"\n",(0,o.jsx)(n.li,{children:"Collections are smart pointers for many objects at a time, and reallocate when they need to grow"}),"\n"]}),"\n",(0,o.jsx)(n.p,{children:'Finally, there are two "addendum" issues that are important to address when discussing Rust and the\nheap:'}),"\n",(0,o.jsxs)(n.ul,{children:["\n",(0,o.jsx)(n.li,{children:"Non-heap alternatives to many standard library types are available."}),"\n",(0,o.jsx)(n.li,{children:"Special allocators to track memory behavior should be used to benchmark code."}),"\n"]}),"\n",(0,o.jsx)(n.h2,{id:"smart-pointers",children:"Smart pointers"}),"\n",(0,o.jsx)(n.p,{children:'The first thing to note are the "smart pointer" types. When you have data that must outlive the\nscope in which it is declared, or your data is of unknown or dynamic size, you\'ll make use of these\ntypes.'}),"\n",(0,o.jsxs)(n.p,{children:["The term ",(0,o.jsx)(n.a,{href:"https://en.wikipedia.org/wiki/Smart_pointer",children:"smart pointer"})," comes from C++, and while it's\nclosely linked to a general design pattern of\n",(0,o.jsx)(n.a,{href:"https://en.cppreference.com/w/cpp/language/raii",children:'"Resource Acquisition Is Initialization"'}),", we'll\nuse it here specifically to describe objects that are responsible for managing ownership of data\nallocated on the heap. The smart pointers available in the ",(0,o.jsx)(n.code,{children:"alloc"})," crate should look mostly\nfamiliar:"]}),"\n",(0,o.jsxs)(n.ul,{children:["\n",(0,o.jsx)(n.li,{children:(0,o.jsx)(n.a,{href:"https://doc.rust-lang.org/alloc/boxed/struct.Box.html",children:(0,o.jsx)(n.code,{children:"Box"})})}),"\n",(0,o.jsx)(n.li,{children:(0,o.jsx)(n.a,{href:"https://doc.rust-lang.org/alloc/rc/struct.Rc.html",children:(0,o.jsx)(n.code,{children:"Rc"})})}),"\n",(0,o.jsx)(n.li,{children:(0,o.jsx)(n.a,{href:"https://doc.rust-lang.org/alloc/sync/struct.Arc.html",children:(0,o.jsx)(n.code,{children:"Arc"})})}),"\n",(0,o.jsx)(n.li,{children:(0,o.jsx)(n.a,{href:"https://doc.rust-lang.org/alloc/borrow/enum.Cow.html",children:(0,o.jsx)(n.code,{children:"Cow"})})}),"\n"]}),"\n",(0,o.jsxs)(n.p,{children:["The ",(0,o.jsx)(n.a,{href:"https://doc.rust-lang.org/std/",children:"standard library"})," also defines some smart pointers to manage\nheap objects, though more than can be covered here. Some examples are:"]}),"\n",(0,o.jsxs)(n.ul,{children:["\n",(0,o.jsx)(n.li,{children:(0,o.jsx)(n.a,{href:"https://doc.rust-lang.org/std/sync/struct.RwLock.html",children:(0,o.jsx)(n.code,{children:"RwLock"})})}),"\n",(0,o.jsx)(n.li,{children:(0,o.jsx)(n.a,{href:"https://doc.rust-lang.org/std/sync/struct.Mutex.html",children:(0,o.jsx)(n.code,{children:"Mutex"})})}),"\n"]}),"\n",(0,o.jsxs)(n.p,{children:["Finally, there is one ",(0,o.jsx)(n.a,{href:"https://www.merriam-webster.com/dictionary/gotcha",children:'"gotcha"'}),": ",(0,o.jsx)(n.strong,{children:"cell types"}),"\n(like ",(0,o.jsx)(n.a,{href:"https://doc.rust-lang.org/stable/core/cell/struct.RefCell.html",children:(0,o.jsx)(n.code,{children:"RefCell"})}),") look and behave\nsimilarly, but ",(0,o.jsx)(n.strong,{children:"don't involve heap allocation"}),". The\n",(0,o.jsxs)(n.a,{href:"https://doc.rust-lang.org/stable/core/cell/index.html",children:[(0,o.jsx)(n.code,{children:"core::cell"})," docs"]})," have more information."]}),"\n",(0,o.jsxs)(n.p,{children:["When a smart pointer is created, the data it is given is placed in heap memory and the location of\nthat data is recorded in the smart pointer. Once the smart pointer has determined it's safe to\ndeallocate that memory (when a ",(0,o.jsx)(n.code,{children:"Box"})," has\n",(0,o.jsx)(n.a,{href:"https://doc.rust-lang.org/stable/std/boxed/index.html",children:"gone out of scope"})," or a reference count\n",(0,o.jsx)(n.a,{href:"https://doc.rust-lang.org/alloc/rc/index.html",children:"goes to zero"}),"), the heap space is reclaimed. We can\nprove these types use heap memory by looking at code:"]}),"\n",(0,o.jsx)(n.pre,{children:(0,o.jsx)(n.code,{className:"language-rust",children:'use std::rc::Rc;\nuse std::sync::Arc;\nuse std::borrow::Cow;\n\npub fn my_box() {\n // Drop at assembly line 1640\n Box::new(0);\n}\n\npub fn my_rc() {\n // Drop at assembly line 1650\n Rc::new(0);\n}\n\npub fn my_arc() {\n // Drop at assembly line 1660\n Arc::new(0);\n}\n\npub fn my_cow() {\n // Drop at assembly line 1672\n Cow::from("drop");\n}\n'})}),"\n",(0,o.jsxs)(n.p,{children:["-- ",(0,o.jsx)(n.a,{href:"https://godbolt.org/z/4AMQug",children:"Compiler Explorer"})]}),"\n",(0,o.jsx)(n.h2,{id:"collections",children:"Collections"}),"\n",(0,o.jsxs)(n.p,{children:["Collection types use heap memory because their contents have dynamic size; they will request more\nmemory ",(0,o.jsx)(n.a,{href:"https://doc.rust-lang.org/std/vec/struct.Vec.html#method.reserve",children:"when needed"}),", and can\n",(0,o.jsx)(n.a,{href:"https://doc.rust-lang.org/std/vec/struct.Vec.html#method.shrink_to_fit",children:"release memory"})," when it's\nno longer necessary. This dynamic property forces Rust to heap allocate everything they contain. In\na way, ",(0,o.jsx)(n.strong,{children:"collections are smart pointers for many objects at a time"}),". Common types that fall under\nthis umbrella are ",(0,o.jsx)(n.a,{href:"https://doc.rust-lang.org/stable/alloc/vec/struct.Vec.html",children:(0,o.jsx)(n.code,{children:"Vec"})}),",\n",(0,o.jsx)(n.a,{href:"https://doc.rust-lang.org/stable/std/collections/struct.HashMap.html",children:(0,o.jsx)(n.code,{children:"HashMap"})}),", and\n",(0,o.jsx)(n.a,{href:"https://doc.rust-lang.org/stable/alloc/string/struct.String.html",children:(0,o.jsx)(n.code,{children:"String"})})," (not\n",(0,o.jsx)(n.a,{href:"https://doc.rust-lang.org/std/primitive.str.html",children:(0,o.jsx)(n.code,{children:"str"})}),")."]}),"\n",(0,o.jsxs)(n.p,{children:["While collections store the objects they own in heap memory, ",(0,o.jsx)(n.em,{children:"creating new collections will not\nallocate on the heap"}),". This is a bit weird; if we call ",(0,o.jsx)(n.code,{children:"Vec::new()"}),", the assembly shows a\ncorresponding call to ",(0,o.jsx)(n.code,{children:"real_drop_in_place"}),":"]}),"\n",(0,o.jsx)(n.pre,{children:(0,o.jsx)(n.code,{className:"language-rust",children:"pub fn my_vec() {\n // Drop in place at line 481\n Vec::<u8>::new();\n}\n"})}),"\n",(0,o.jsxs)(n.p,{children:["-- ",(0,o.jsx)(n.a,{href:"https://godbolt.org/z/1WkNtC",children:"Compiler Explorer"})]}),"\n",(0,o.jsx)(n.p,{children:"But because the vector has no elements to manage, no calls to the allocator will ever be dispatched:"}),"\n",(0,o.jsx)(n.pre,{children:(0,o.jsx)(n.code,{className:"language-rust",children:'use std::alloc::{GlobalAlloc, Layout, System};\nuse std::sync::atomic::{AtomicBool, Ordering};\n\nfn main() {\n // Turn on panicking if we allocate on the heap\n DO_PANIC.store(true, Ordering::SeqCst);\n\n // Interesting bit happens here\n let x: Vec<u8> = Vec::new();\n drop(x);\n\n // Turn panicking back off, some deallocations occur\n // after main as well.\n DO_PANIC.store(false, Ordering::SeqCst);\n}\n\n#[global_allocator]\nstatic A: PanicAllocator = PanicAllocator;\nstatic DO_PANIC: AtomicBool = AtomicBool::new(false);\nstruct PanicAllocator;\n\nunsafe impl GlobalAlloc for PanicAllocator {\n unsafe fn alloc(&self, layout: Layout) -> *mut u8 {\n if DO_PANIC.load(Ordering::SeqCst) {\n panic!("Unexpected allocation.");\n }\n System.alloc(layout)\n }\n\n unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {\n if DO_PANIC.load(Ordering::SeqCst) {\n panic!("Unexpected deallocation.");\n }\n System.dealloc(ptr, layout);\n }\n}\n'})}),"\n",(0,o.jsxs)(n.p,{children:["--\n",(0,o.jsx)(n.a,{href:"https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=831a297d176d015b1f9ace01ae416cc6",children:"Rust Playground"})]}),"\n",(0,o.jsxs)(n.p,{children:["Other standard library types follow the same behavior; make sure to check out\n",(0,o.jsx)(n.a,{href:"https://doc.rust-lang.org/std/collections/hash_map/struct.HashMap.html#method.new",children:(0,o.jsx)(n.code,{children:"HashMap::new()"})}),",\nand ",(0,o.jsx)(n.a,{href:"https://doc.rust-lang.org/std/string/struct.String.html#method.new",children:(0,o.jsx)(n.code,{children:"String::new()"})}),"."]}),"\n",(0,o.jsx)(n.h2,{id:"heap-alternatives",children:"Heap Alternatives"}),"\n",(0,o.jsx)(n.p,{children:"While it is a bit strange to speak of the stack after spending time with the heap, it's worth\npointing out that some heap-allocated objects in Rust have stack-based counterparts provided by\nother crates. If you have need of the functionality, but want to avoid allocating, there are\ntypically alternatives available."}),"\n",(0,o.jsxs)(n.p,{children:["When it comes to some standard library smart pointers\n(",(0,o.jsx)(n.a,{href:"https://doc.rust-lang.org/std/sync/struct.RwLock.html",children:(0,o.jsx)(n.code,{children:"RwLock"})})," and\n",(0,o.jsx)(n.a,{href:"https://doc.rust-lang.org/std/sync/struct.Mutex.html",children:(0,o.jsx)(n.code,{children:"Mutex"})}),"), stack-based alternatives are\nprovided in crates like ",(0,o.jsx)(n.a,{href:"https://crates.io/crates/parking_lot",children:"parking_lot"})," and\n",(0,o.jsx)(n.a,{href:"https://crates.io/crates/spin",children:"spin"}),". You can check out\n",(0,o.jsx)(n.a,{href:"https://docs.rs/lock_api/0.1.5/lock_api/struct.RwLock.html",children:(0,o.jsx)(n.code,{children:"lock_api::RwLock"})}),",\n",(0,o.jsx)(n.a,{href:"https://docs.rs/lock_api/0.1.5/lock_api/struct.Mutex.html",children:(0,o.jsx)(n.code,{children:"lock_api::Mutex"})}),", and\n",(0,o.jsx)(n.a,{href:"https://mvdnes.github.io/rust-docs/spin-rs/spin/struct.Once.html",children:(0,o.jsx)(n.code,{children:"spin::Once"})})," if you're in need\nof synchronization primitives."]}),"\n",(0,o.jsxs)(n.p,{children:[(0,o.jsx)(n.a,{href:"https://crates.io/crates/thread-id",children:"thread_id"})," may be necessary if you're implementing an allocator\nbecause ",(0,o.jsx)(n.a,{href:"https://doc.rust-lang.org/std/thread/struct.ThreadId.html",children:(0,o.jsx)(n.code,{children:"thread::current().id()"})})," uses a\n",(0,o.jsxs)(n.a,{href:"https://doc.rust-lang.org/stable/src/std/sys_common/thread_info.rs.html#17-36",children:[(0,o.jsx)(n.code,{children:"thread_local!"})," structure"]}),"\nthat needs heap allocation."]}),"\n",(0,o.jsx)(n.h2,{id:"tracing-allocators",children:"Tracing Allocators"}),"\n",(0,o.jsxs)(n.p,{children:["When writing performance-sensitive code, there's no alternative to measuring your code. If you\ndidn't write a benchmark,\n",(0,o.jsx)(n.a,{href:"https://www.youtube.com/watch?v=2EWejmkKlxs&feature=youtu.be&t=263",children:"you don't care about it's performance"}),"\nYou should never rely on your instincts when\n",(0,o.jsx)(n.a,{href:"https://www.youtube.com/watch?v=NH1Tta7purM",children:"a microsecond is an eternity"}),"."]}),"\n",(0,o.jsxs)(n.p,{children:["Similarly, there's great work going on in Rust with allocators that keep track of what they're doing\n(like ",(0,o.jsx)(n.a,{href:"https://crates.io/crates/alloc_counter",children:(0,o.jsx)(n.code,{children:"alloc_counter"})}),"). When it comes to tracking heap\nbehavior, it's easy to make mistakes; please write tests and make sure you have tools to guard\nagainst future issues."]})]})}function d(e={}){let{wrapper:n}={...(0,a.a)(),...e.components};return n?(0,o.jsx)(n,{...e,children:(0,o.jsx)(h,{...e})}):h(e)}},50065:function(e,n,t){t.d(n,{Z:function(){return l},a:function(){return r}});var s=t(67294);let o={},a=s.createContext(o);function r(e){let n=s.useContext(a);return s.useMemo(function(){return"function"==typeof e?e(n):{...n,...e}},[n,e])}function l(e){let n;return n=e.disableParentContext?"function"==typeof e.components?e.components(o):e.components||o:r(e.components),s.createElement(a.Provider,{value:n},e.children)}},65238:function(e){e.exports=JSON.parse('{"permalink":"/2019/02/a-heaping-helping","source":"@site/blog/2019-02-07-a-heaping-helping/index.mdx","title":"Allocations in Rust: Dynamic memory","description":"Managing dynamic memory is hard. Some languages assume users will do it themselves (C, C++), and","date":"2019-02-07T12:00:00.000Z","tags":[],"readingTime":5.86,"hasTruncateMarker":true,"authors":[{"name":"Bradlee Speice","socials":{"github":"https://github.com/bspeice"},"key":"bspeice","page":null}],"frontMatter":{"slug":"2019/02/a-heaping-helping","title":"Allocations in Rust: Dynamic memory","date":"2019-02-07T12:00:00.000Z","authors":["bspeice"],"tags":[]},"unlisted":false,"lastUpdatedAt":1731204300000,"prevItem":{"title":"Allocations in Rust: Compiler optimizations","permalink":"/2019/02/08/compiler-optimizations"},"nextItem":{"title":"Allocations in Rust: Fixed memory","permalink":"/2019/02/stacking-up"}}')}}]);