mirror of
https://github.com/bspeice/speice.io
synced 2025-01-13 03:00:03 -05:00
1 line
14 KiB
JavaScript
1 line
14 KiB
JavaScript
"use strict";(self.webpackChunkspeice_io=self.webpackChunkspeice_io||[]).push([["4760"],{60072:function(e,t,n){n.r(t),n.d(t,{assets:function(){return c},contentTitle:function(){return o},default:function(){return d},frontMatter:function(){return r},metadata:function(){return a},toc:function(){return l}});var a=n(94111),s=n(85893),i=n(50065);let r={slug:"2018/10/case-study-optimization",title:"A case study in heaptrack",date:new Date("2018-10-08T12:00:00.000Z"),authors:["bspeice"],tags:[]},o=void 0,c={authorsImageUrls:[void 0]},l=[{value:"Curiosity",id:"curiosity",level:2},{value:"Turning on the System Allocator",id:"turning-on-the-system-allocator",level:2},{value:"Running heaptrack",id:"running-heaptrack",level:2},{value:"Reading Flamegraphs",id:"reading-flamegraphs",level:2},{value:"Optimizing dtparse",id:"optimizing-dtparse",level:2},{value:"Conclusion",id:"conclusion",level:2}];function h(e){let t={a:"a",blockquote:"blockquote",code:"code",em:"em",h2:"h2",hr:"hr",img:"img",p:"p",pre:"pre",strong:"strong",...(0,i.a)(),...e.components};return(0,s.jsxs)(s.Fragment,{children:[(0,s.jsx)(t.p,{children:"I remember early in my career someone joking that:"}),"\n",(0,s.jsxs)(t.blockquote,{children:["\n",(0,s.jsx)(t.p,{children:"Programmers have it too easy these days. They should learn to develop in low memory environments\nand be more efficient."}),"\n"]}),"\n",(0,s.jsxs)(t.p,{children:["...though it's not like the first code I wrote was for a\n",(0,s.jsx)(t.a,{href:"https://web.archive.org/web/20180924060530/https://education.ti.com/en/products/calculators/graphing-calculators/ti-84-plus-se",children:"graphing calculator"}),"\npacking a whole 24KB of RAM."]}),"\n",(0,s.jsxs)(t.p,{children:["But the principle remains: be efficient with the resources you have, because\n",(0,s.jsx)(t.a,{href:"http://exo-blog.blogspot.com/2007/09/what-intel-giveth-microsoft-taketh-away.html",children:"what Intel giveth, Microsoft taketh away"}),"."]}),"\n",(0,s.jsxs)(t.p,{children:["My professional work is focused on this kind of efficiency; low-latency financial markets demand\nthat you understand at a deep level ",(0,s.jsx)(t.em,{children:"exactly"})," what your code is doing. As I continue experimenting\nwith Rust for personal projects, it's exciting to bring a utilitarian mindset with me: there's\nflexibility for the times I pretend to have a garbage collector, and flexibility for the times that\nI really care about how memory is used."]}),"\n",(0,s.jsx)(t.p,{children:"This post is a (small) case study in how I went from the former to the latter. And ultimately, it's\nintended to be a starting toolkit to empower analysis of your own code."}),"\n",(0,s.jsx)(t.h2,{id:"curiosity",children:"Curiosity"}),"\n",(0,s.jsxs)(t.p,{children:["When I first started building the ",(0,s.jsx)(t.a,{href:"https://crates.io/crates/dtparse",children:"dtparse"})," crate, my intention was to mirror as closely as possible\nthe equivalent ",(0,s.jsx)(t.a,{href:"https://github.com/dateutil/dateutil",children:"Python library"}),". Python, as you may know, is garbage collected. Very\nrarely is memory usage considered in Python, and I likewise wasn't paying too much attention when\n",(0,s.jsx)(t.code,{children:"dtparse"})," was first being built."]}),"\n",(0,s.jsxs)(t.p,{children:["This lackadaisical approach to memory works well enough, and I'm not planning on making ",(0,s.jsx)(t.code,{children:"dtparse"}),'\nhyper-efficient. But every so often, I\'ve wondered: "what exactly is going on in memory?" With the\nadvent of Rust 1.28 and the\n',(0,s.jsx)(t.a,{href:"https://doc.rust-lang.org/std/alloc/trait.GlobalAlloc.html",children:"Global Allocator trait"}),", I had a really\ngreat idea: ",(0,s.jsx)(t.em,{children:"build a custom allocator that allows you to track your own allocations."})," That way, you\ncan do things like writing tests for both correct results and correct memory usage. I gave it a\n",(0,s.jsx)(t.a,{href:"https://crates.io/crates/qadapt",children:"shot"}),", but learned very quickly: ",(0,s.jsx)(t.strong,{children:"never write your own allocator"}),'. It went from "fun\nweekend project" to "I have literally no idea what my computer is doing" at breakneck speed.']}),"\n",(0,s.jsxs)(t.p,{children:["Instead, I'll highlight a separate path I took to make sense of my memory usage: ",(0,s.jsx)(t.a,{href:"https://github.com/KDE/heaptrack",children:"heaptrack"}),"."]}),"\n",(0,s.jsx)(t.h2,{id:"turning-on-the-system-allocator",children:"Turning on the System Allocator"}),"\n",(0,s.jsxs)(t.p,{children:["This is the hardest part of the post. Because Rust uses\n",(0,s.jsx)(t.a,{href:"https://github.com/rust-lang/rust/pull/27400#issue-41256384",children:"its own allocator"})," by default,\n",(0,s.jsx)(t.code,{children:"heaptrack"})," is unable to properly record unmodified Rust code. To remedy this, we'll make use of the\n",(0,s.jsx)(t.code,{children:"#[global_allocator]"})," attribute."]}),"\n",(0,s.jsxs)(t.p,{children:["Specifically, in ",(0,s.jsx)(t.code,{children:"lib.rs"})," or ",(0,s.jsx)(t.code,{children:"main.rs"}),", add this:"]}),"\n",(0,s.jsx)(t.pre,{children:(0,s.jsx)(t.code,{className:"language-rust",children:"use std::alloc::System;\n\n#[global_allocator]\nstatic GLOBAL: System = System;\n"})}),"\n",(0,s.jsx)(t.p,{children:"...and that's it. Everything else comes essentially for free."}),"\n",(0,s.jsx)(t.h2,{id:"running-heaptrack",children:"Running heaptrack"}),"\n",(0,s.jsxs)(t.p,{children:["Assuming you've installed heaptrack ",(0,s.jsx)("small",{children:"(Homebrew in Mac, package manager\nin Linux, ??? in Windows)"}),", all that's left is to fire up your application:"]}),"\n",(0,s.jsx)(t.pre,{children:(0,s.jsx)(t.code,{children:"heaptrack my_application\n"})}),"\n",(0,s.jsxs)(t.p,{children:["It's that easy. After the program finishes, you'll see a file in your local directory with a name\nlike ",(0,s.jsx)(t.code,{children:"heaptrack.my_appplication.XXXX.gz"}),". If you load that up in ",(0,s.jsx)(t.code,{children:"heaptrack_gui"}),", you'll see\nsomething like this:"]}),"\n",(0,s.jsx)(t.p,{children:(0,s.jsx)(t.img,{alt:"heaptrack",src:n(67029).Z+"",width:"1312",height:"320"})}),"\n",(0,s.jsx)(t.hr,{}),"\n",(0,s.jsx)(t.p,{children:"And even these pretty colors:"}),"\n",(0,s.jsx)(t.p,{children:(0,s.jsx)(t.img,{alt:"pretty colors",src:n(74078).Z+"",width:"1284",height:"715"})}),"\n",(0,s.jsx)(t.h2,{id:"reading-flamegraphs",children:"Reading Flamegraphs"}),"\n",(0,s.jsxs)(t.p,{children:["To make sense of our memory usage, we're going to focus on that last picture - it's called a\n",(0,s.jsx)(t.a,{href:"http://www.brendangregg.com/flamegraphs.html",children:'"flamegraph"'}),". These charts are typically used to\nshow how much time your program spends executing each function, but they're used here to show how\nmuch memory was allocated during those functions instead."]}),"\n",(0,s.jsxs)(t.p,{children:["For example, we can see that all executions happened during the ",(0,s.jsx)(t.code,{children:"main"})," function:"]}),"\n",(0,s.jsx)(t.p,{children:(0,s.jsx)(t.img,{alt:"allocations in main",src:n(49206).Z+"",width:"654",height:"343"})}),"\n",(0,s.jsxs)(t.p,{children:["...and within that, all allocations happened during ",(0,s.jsx)(t.code,{children:"dtparse::parse"}),":"]}),"\n",(0,s.jsx)(t.p,{children:(0,s.jsx)(t.img,{alt:"allocations in dtparse",src:n(18644).Z+"",width:"654",height:"315"})}),"\n",(0,s.jsxs)(t.p,{children:["...and within ",(0,s.jsx)(t.em,{children:"that"}),", allocations happened in two different places:"]}),"\n",(0,s.jsx)(t.p,{children:(0,s.jsx)(t.img,{alt:"allocations in parseinfo",src:n(66746).Z+"",width:"654",height:"372"})}),"\n",(0,s.jsxs)(t.p,{children:["Now I apologize that it's hard to see, but there's one area specifically that stuck out as an issue:\n",(0,s.jsxs)(t.strong,{children:["what the heck is the ",(0,s.jsx)(t.code,{children:"Default"})," thing doing?"]})]}),"\n",(0,s.jsx)(t.p,{children:(0,s.jsx)(t.img,{alt:"pretty colors",src:n(78308).Z+"",width:"1284",height:"715"})}),"\n",(0,s.jsx)(t.h2,{id:"optimizing-dtparse",children:"Optimizing dtparse"}),"\n",(0,s.jsxs)(t.p,{children:["See, I knew that there were some allocations during calls to ",(0,s.jsx)(t.code,{children:"dtparse::parse"}),", but I was totally\nwrong about where the bulk of allocations occurred in my program. Let me post the code and see if\nyou can spot the mistake:"]}),"\n",(0,s.jsx)(t.pre,{children:(0,s.jsx)(t.code,{className:"language-rust",children:"/// Main entry point for using `dtparse`.\npub fn parse(timestr: &str) -> ParseResult<(NaiveDateTime, Option<FixedOffset>)> {\n let res = Parser::default().parse(\n timestr, None, None, false, false,\n None, false,\n &HashMap::new(),\n )?;\n\n Ok((res.0, res.1))\n}\n"})}),"\n",(0,s.jsxs)(t.blockquote,{children:["\n",(0,s.jsx)(t.p,{children:(0,s.jsx)(t.a,{href:"https://github.com/bspeice/dtparse/blob/4d7c5dd99572823fa4a390b483c38ab020a2172f/src/lib.rs#L1286",children:"dtparse"})}),"\n"]}),"\n",(0,s.jsx)(t.hr,{}),"\n",(0,s.jsxs)(t.p,{children:["Because ",(0,s.jsx)(t.code,{children:"Parser::parse"})," requires a mutable reference to itself, I have to create a new\n",(0,s.jsx)(t.code,{children:"Parser::default"})," every time it receives a string. This is excessive! We'd rather have an immutable\nparser that can be re-used, and avoid allocating memory in the first place."]}),"\n",(0,s.jsxs)(t.p,{children:["Armed with that information, I put some time in to\n",(0,s.jsx)(t.a,{href:"https://github.com/bspeice/dtparse/commit/741afa34517d6bc1155713bbc5d66905fea13fad#diff-b4aea3e418ccdb71239b96952d9cddb6",children:"make the parser immutable"}),".\nNow that I can re-use the same parser over and over, the allocations disappear:"]}),"\n",(0,s.jsx)(t.p,{children:(0,s.jsx)(t.img,{alt:"allocations cleaned up",src:n(20809).Z+"",width:"1272",height:"712"})}),"\n",(0,s.jsxs)(t.p,{children:["In total, we went from requiring 2 MB of memory in\n",(0,s.jsx)(t.a,{href:"https://crates.io/crates/dtparse/1.0.2",children:"version 1.0.2"}),":"]}),"\n",(0,s.jsx)(t.p,{children:(0,s.jsx)(t.img,{alt:"memory before",src:n(83346).Z+"",width:"717",height:"116"})}),"\n",(0,s.jsxs)(t.p,{children:["All the way down to 300KB in ",(0,s.jsx)(t.a,{href:"https://crates.io/crates/dtparse/1.0.3",children:"version 1.0.3"}),":"]}),"\n",(0,s.jsx)(t.p,{children:(0,s.jsx)(t.img,{alt:"memory after",src:n(39838).Z+"",width:"739",height:"123"})}),"\n",(0,s.jsx)(t.h2,{id:"conclusion",children:"Conclusion"}),"\n",(0,s.jsx)(t.p,{children:"In the end, you don't need to write a custom allocator to be efficient with memory, great tools\nalready exist to help you understand what your program is doing."}),"\n",(0,s.jsx)(t.p,{children:(0,s.jsx)(t.strong,{children:"Use them."})}),"\n",(0,s.jsxs)(t.p,{children:["Given that ",(0,s.jsx)(t.a,{href:"https://en.wikipedia.org/wiki/Moore%27s_law",children:"Moore's Law"})," is\n",(0,s.jsx)(t.a,{href:"https://www.technologyreview.com/s/601441/moores-law-is-dead-now-what/",children:"dead"}),", we've all got to do\nour part to take back what Microsoft stole."]})]})}function d(e={}){let{wrapper:t}={...(0,i.a)(),...e.components};return t?(0,s.jsx)(t,{...e,children:(0,s.jsx)(h,{...e})}):h(e)}},67029:function(e,t,n){n.d(t,{Z:function(){return a}});let a=n.p+"assets/images/heaptrack-before-11fba190f97831448cc539ebb32fa579.png"},39838:function(e,t,n){n.d(t,{Z:function(){return a}});let a=n.p+"assets/images/heaptrack-closeup-after-967bc4596c480bcc9e8410b0a7a64a00.png"},83346:function(e,t,n){n.d(t,{Z:function(){return a}});let a=n.p+"assets/images/heaptrack-closeup-12ae3897c033ccb3684a88dd45592e14.png"},18644:function(e,t,n){n.d(t,{Z:function(){return a}});let a=n.p+"assets/images/heaptrack-dtparse-colorized-e6caf224f50df2dd56981f5b02970325.png"},20809:function(e,t,n){n.d(t,{Z:function(){return a}});let a=n.p+"assets/images/heaptrack-flamegraph-after-cedc4c3519313f5af538364165e92c34.png"},78308:function(e,t,n){n.d(t,{Z:function(){return a}});let a=n.p+"assets/images/heaptrack-flamegraph-default-26cc411d387f58f50cb548f8e81df1a1.png"},74078:function(e,t,n){n.d(t,{Z:function(){return a}});let a=n.p+"assets/images/heaptrack-flamegraph-5094664fa79faaf2664b38505c15ac1f.png"},49206:function(e,t,n){n.d(t,{Z:function(){return a}});let a=n.p+"assets/images/heaptrack-main-colorized-cfe5d7d345d32cfc1a0f297580619718.png"},66746:function(e,t,n){n.d(t,{Z:function(){return a}});let a=n.p+"assets/images/heaptrack-parseinfo-colorized-a1898beaf28a3997ac86810f872539b7.png"},50065:function(e,t,n){n.d(t,{Z:function(){return o},a:function(){return r}});var a=n(67294);let s={},i=a.createContext(s);function r(e){let t=a.useContext(i);return a.useMemo(function(){return"function"==typeof e?e(t):{...t,...e}},[t,e])}function o(e){let t;return t=e.disableParentContext?"function"==typeof e.components?e.components(s):e.components||s:r(e.components),a.createElement(i.Provider,{value:t},e.children)}},94111:function(e){e.exports=JSON.parse('{"permalink":"/2018/10/case-study-optimization","source":"@site/blog/2018-10-08-case-study-optimization/index.mdx","title":"A case study in heaptrack","description":"I remember early in my career someone joking that:","date":"2018-10-08T12:00:00.000Z","tags":[],"readingTime":4.26,"hasTruncateMarker":true,"authors":[{"name":"Bradlee Speice","socials":{"github":"https://github.com/bspeice"},"key":"bspeice","page":null}],"frontMatter":{"slug":"2018/10/case-study-optimization","title":"A case study in heaptrack","date":"2018-10-08T12:00:00.000Z","authors":["bspeice"],"tags":[]},"unlisted":false,"lastUpdatedAt":1731189722000,"prevItem":{"title":"More \\"what companies really mean\\"","permalink":"/2018/12/what-small-business-really-means"},"nextItem":{"title":"Isomorphic desktop apps with Rust","permalink":"/2018/09/isomorphic-apps"}}')}}]); |