mirror of
https://github.com/bspeice/speice.io
synced 2024-12-22 08:38:09 -05:00
151 lines
51 KiB
HTML
151 lines
51 KiB
HTML
<!doctype html><html lang=en dir=ltr class="blog-wrapper blog-post-page plugin-blog plugin-id-default" data-has-hydrated=false><meta charset=UTF-8><meta name=generator content="Docusaurus v3.6.1"><title data-rh=true>Binary format shootout | The Old Speice Guy</title><meta data-rh=true name=viewport content="width=device-width,initial-scale=1.0"><meta data-rh=true name=twitter:card content=summary_large_image><meta data-rh=true property=og:url content=https://speice.io/2019/09/binary-format-shootout><meta data-rh=true property=og:locale content=en><meta data-rh=true name=docusaurus_locale content=en><meta data-rh=true name=docusaurus_tag content=default><meta data-rh=true name=docsearch:language content=en><meta data-rh=true name=docsearch:docusaurus_tag content=default><meta data-rh=true property=og:title content="Binary format shootout | The Old Speice Guy"><meta data-rh=true name=description content="I've found that in many personal projects,"><meta data-rh=true property=og:description content="I've found that in many personal projects,"><meta data-rh=true property=og:type content=article><meta data-rh=true property=article:published_time content=2019-09-28T12:00:00.000Z><link data-rh=true rel=icon href=/img/favicon.ico><link data-rh=true rel=canonical href=https://speice.io/2019/09/binary-format-shootout><link data-rh=true rel=alternate href=https://speice.io/2019/09/binary-format-shootout hreflang=en><link data-rh=true rel=alternate href=https://speice.io/2019/09/binary-format-shootout hreflang=x-default><script data-rh=true type=application/ld+json>{"@context":"https://schema.org","@id":"https://speice.io/2019/09/binary-format-shootout","@type":"BlogPosting","author":{"@type":"Person","name":"Bradlee Speice"},"dateModified":"2024-11-10T03:06:23.000Z","datePublished":"2019-09-28T12:00:00.000Z","description":"I've found that in many personal projects,","headline":"Binary format shootout","isPartOf":{"@id":"https://speice.io/","@type":"Blog","name":"Blog"},"keywords":[],"mainEntityOfPage":"https://speice.io/2019/09/binary-format-shootout","name":"Binary format shootout","url":"https://speice.io/2019/09/binary-format-shootout"}</script><link rel=alternate type=application/rss+xml href=/rss.xml title="The Old Speice Guy RSS Feed"><link rel=alternate type=application/atom+xml href=/atom.xml title="The Old Speice Guy Atom Feed"><link rel=stylesheet href=/katex/katex.min.css><link rel=stylesheet href=/assets/css/styles.16c3428d.css><script src=/assets/js/runtime~main.29a27dcf.js defer></script><script src=/assets/js/main.d461af80.js defer></script><body class=navigation-with-keyboard><script>!function(){var t,e=function(){try{return new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}}()||function(){try{return window.localStorage.getItem("theme")}catch(t){}}();t=null!==e?e:"light",document.documentElement.setAttribute("data-theme",t)}(),function(){try{for(var[t,e]of new URLSearchParams(window.location.search).entries())if(t.startsWith("docusaurus-data-")){var a=t.replace("docusaurus-data-","data-");document.documentElement.setAttribute(a,e)}}catch(t){}}()</script><div id=__docusaurus><div role=region aria-label="Skip to main content"><a class=skipToContent_fXgn href=#__docusaurus_skipToContent_fallback>Skip to main content</a></div><nav aria-label=Main class="navbar navbar--fixed-top"><div class=navbar__inner><div class=navbar__items><button aria-label="Toggle navigation bar" aria-expanded=false class="navbar__toggle clean-btn" type=button><svg width=30 height=30 viewBox="0 0 30 30" aria-hidden=true><path stroke=currentColor stroke-linecap=round stroke-miterlimit=10 stroke-width=2 d="M4 7h22M4 15h22M4 23h22"/></svg></button><a class=navbar__brand href=/><div class=navbar__logo><img src=/img/logo.svg alt="Sierpinski Gasket" class="themedComponent_mlkZ themedComponent--light_NVdE"><img src=/img/logo-dark.svg alt="Sierpinski Gasket" class="themedComponent_mlkZ themedComponent--dark_xIcU"></div><b class="navbar__title text--truncate">The Old Speice Guy</b></a></div><div class="navbar__items navbar__items--right"><a href=https://github.com/bspeice target=_blank rel="noopener noreferrer" class="navbar__item navbar__link header-github-link"></a><div class="toggle_vylO colorModeToggle_DEke"><button class="clean-btn toggleButton_gllP toggleButtonDisabled_aARS" type=button disabled title="Switch between dark and light mode (currently light mode)" aria-label="Switch between dark and light mode (currently light mode)" aria-live=polite aria-pressed=false><svg viewBox="0 0 24 24" width=24 height=24 class=lightToggleIcon_pyhR><path fill=currentColor d="M12,9c1.65,0,3,1.35,3,3s-1.35,3-3,3s-3-1.35-3-3S10.35,9,12,9 M12,7c-2.76,0-5,2.24-5,5s2.24,5,5,5s5-2.24,5-5 S14.76,7,12,7L12,7z M2,13l2,0c0.55,0,1-0.45,1-1s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S1.45,13,2,13z M20,13l2,0c0.55,0,1-0.45,1-1 s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S19.45,13,20,13z M11,2v2c0,0.55,0.45,1,1,1s1-0.45,1-1V2c0-0.55-0.45-1-1-1S11,1.45,11,2z M11,20v2c0,0.55,0.45,1,1,1s1-0.45,1-1v-2c0-0.55-0.45-1-1-1C11.45,19,11,19.45,11,20z M5.99,4.58c-0.39-0.39-1.03-0.39-1.41,0 c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0s0.39-1.03,0-1.41L5.99,4.58z M18.36,16.95 c-0.39-0.39-1.03-0.39-1.41,0c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0c0.39-0.39,0.39-1.03,0-1.41 L18.36,16.95z M19.42,5.99c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06c-0.39,0.39-0.39,1.03,0,1.41 s1.03,0.39,1.41,0L19.42,5.99z M7.05,18.36c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06 c-0.39,0.39-0.39,1.03,0,1.41s1.03,0.39,1.41,0L7.05,18.36z"/></svg><svg viewBox="0 0 24 24" width=24 height=24 class=darkToggleIcon_wfgR><path fill=currentColor d="M9.37,5.51C9.19,6.15,9.1,6.82,9.1,7.5c0,4.08,3.32,7.4,7.4,7.4c0.68,0,1.35-0.09,1.99-0.27C17.45,17.19,14.93,19,12,19 c-3.86,0-7-3.14-7-7C5,9.07,6.81,6.55,9.37,5.51z M12,3c-4.97,0-9,4.03-9,9s4.03,9,9,9s9-4.03,9-9c0-0.46-0.04-0.92-0.1-1.36 c-0.98,1.37-2.58,2.26-4.4,2.26c-2.98,0-5.4-2.42-5.4-5.4c0-1.81,0.89-3.42,2.26-4.4C12.92,3.04,12.46,3,12,3L12,3z"/></svg></button></div><div class=navbarSearchContainer_Bca1><div class=navbar__search><span aria-label="expand searchbar" role=button class=search-icon tabindex=0></span><input id=search_input_react type=search placeholder=Loading... aria-label=Search class="navbar__search-input search-bar" disabled></div></div></div></div><div role=presentation class=navbar-sidebar__backdrop></div></nav><div id=__docusaurus_skipToContent_fallback class="main-wrapper mainWrapper_z2l0"><div class="container margin-vert--lg"><div class=row><aside class="col col--3"><nav class="sidebar_re4s thin-scrollbar" aria-label="Blog recent posts navigation"><div class="sidebarItemTitle_pO2u margin-bottom--md">All posts</div><div role=group><h3>2024</h3><div role=group><h4>Playing with fire</h4><ul><ul class="sidebarItemList_Yudw clean-list"><li class=sidebarItem__DBe><a class=sidebarItemLink_mo7H href=/2024/11/playing-with-fire>The fractal flame algorithm</a><li class=sidebarItem__DBe><a class=sidebarItemLink_mo7H href=/2024/11/playing-with-fire-transforms>Transforms and variations</a><li class=sidebarItem__DBe><a class=sidebarItemLink_mo7H href=/2024/11/playing-with-fire-log-density>Tone mapping and color</a></ul></ul></div></div><div role=group><h3>2022</h3><ul class="sidebarItemList_Yudw clean-list"><li class=sidebarItem__DBe><a class=sidebarItemLink_mo7H href=/2011/11/webpack-industrial-complex>The webpack industrial complex</a></ul></div><div role=group><h3>2019</h3><ul class="sidebarItemList_Yudw clean-list"><li class=sidebarItem__DBe><a class=sidebarItemLink_mo7H href=/2019/12/release-the-gil>Release the GIL</a></ul><ul class="sidebarItemList_Yudw clean-list"><li class=sidebarItem__DBe><a aria-current=page class="sidebarItemLink_mo7H sidebarItemLinkActive_I1ZP" href=/2019/09/binary-format-shootout>Binary format shootout</a></ul><ul class="sidebarItemList_Yudw clean-list"><li class=sidebarItem__DBe><a class=sidebarItemLink_mo7H href=/2019/06/high-performance-systems>On building high performance systems</a></ul><ul class="sidebarItemList_Yudw clean-list"><li class=sidebarItem__DBe><a class=sidebarItemLink_mo7H href=/2019/05/making-bread>Making bread</a></ul><div role=group><h4>Allocations in Rust</h4><ul><ul class="sidebarItemList_Yudw clean-list"><li class=sidebarItem__DBe><a class=sidebarItemLink_mo7H href=/2019/02/understanding-allocations-in-rust>Foreword</a><li class=sidebarItem__DBe><a class=sidebarItemLink_mo7H href=/2019/02/the-whole-world>Global memory</a><li class=sidebarItem__DBe><a class=sidebarItemLink_mo7H href=/2019/02/stacking-up>Fixed memory</a><li class=sidebarItem__DBe><a class=sidebarItemLink_mo7H href=/2019/02/a-heaping-helping>Dynamic memory</a><li class=sidebarItem__DBe><a class=sidebarItemLink_mo7H href=/2019/02/08/compiler-optimizations>Compiler optimizations</a><li class=sidebarItem__DBe><a class=sidebarItemLink_mo7H href=/2019/02/summary>Summary</a></ul></ul></div></div><div role=group><h3>2018</h3><ul class="sidebarItemList_Yudw clean-list"><li class=sidebarItem__DBe><a class=sidebarItemLink_mo7H href=/2018/12/allocation-safety>QADAPT - debug_assert! for allocations</a></ul><ul class="sidebarItemList_Yudw clean-list"><li class=sidebarItem__DBe><a class=sidebarItemLink_mo7H href=/2018/12/what-small-business-really-means>More "what companies really mean"</a></ul><ul class="sidebarItemList_Yudw clean-list"><li class=sidebarItem__DBe><a class=sidebarItemLink_mo7H href=/2018/10/case-study-optimization>A case study in heaptrack</a></ul><ul class="sidebarItemList_Yudw clean-list"><li class=sidebarItem__DBe><a class=sidebarItemLink_mo7H href=/2018/09/isomorphic-apps>Isomorphic desktop apps with Rust</a></ul><ul class="sidebarItemList_Yudw clean-list"><li class=sidebarItem__DBe><a class=sidebarItemLink_mo7H href=/2018/09/primitives-in-rust-are-weird>Primitives in Rust are weird (and cool)</a></ul><ul class="sidebarItemList_Yudw clean-list"><li class=sidebarItem__DBe><a class=sidebarItemLink_mo7H href=/2018/06/dateutil-parser-to-rust>What I learned porting dateutil to Rust</a></ul><ul class="sidebarItemList_Yudw clean-list"><li class=sidebarItem__DBe><a class=sidebarItemLink_mo7H href=/2018/05/hello>Hello!</a></ul><div role=group><h4>Captain's Cookbook</h4><ul><ul class="sidebarItemList_Yudw clean-list"><li class=sidebarItem__DBe><a class=sidebarItemLink_mo7H href=/2018/01/captains-cookbook-part-1>Project setup</a><li class=sidebarItem__DBe><a class=sidebarItemLink_mo7H href=/2018/01/captains-cookbook-part-2>Practical usage</a></ul></ul></div></div><div role=group><h3>2016</h3><ul class="sidebarItemList_Yudw clean-list"><li class=sidebarItem__DBe><a class=sidebarItemLink_mo7H href=/2016/11/pca-audio-compression>PCA audio compression</a></ul><ul class="sidebarItemList_Yudw clean-list"><li class=sidebarItem__DBe><a class=sidebarItemLink_mo7H href=/2016/10/rustic-repodcasting>A Rustic re-podcasting server</a></ul><ul class="sidebarItemList_Yudw clean-list"><li class=sidebarItem__DBe><a class=sidebarItemLink_mo7H href=/2016/06/event-studies-and-earnings-releases>Event studies and earnings releases</a></ul><ul class="sidebarItemList_Yudw clean-list"><li class=sidebarItem__DBe><a class=sidebarItemLink_mo7H href=/2016/05/the-unfair-casino>The unfair casino</a></ul><ul class="sidebarItemList_Yudw clean-list"><li class=sidebarItem__DBe><a class=sidebarItemLink_mo7H href=/2016/04/tick-tock>Tick tock...</a></ul><ul class="sidebarItemList_Yudw clean-list"><li class=sidebarItem__DBe><a class=sidebarItemLink_mo7H href=/2016/03/tweet-like-me>Tweet like me</a></ul><ul class="sidebarItemList_Yudw clean-list"><li class=sidebarItem__DBe><a class=sidebarItemLink_mo7H href=/2016/03/predicting-santander-customer-happiness>Predicting Santander customer happiness</a></ul><ul class="sidebarItemList_Yudw clean-list"><li class=sidebarItem__DBe><a class=sidebarItemLink_mo7H href=/2016/02/profitability-using-the-investment-formula>Profitability using the investment formula</a></ul><ul class="sidebarItemList_Yudw clean-list"><li class=sidebarItem__DBe><a class=sidebarItemLink_mo7H href=/2016/02/guaranteed-money-maker>Guaranteed money maker</a></ul><ul class="sidebarItemList_Yudw clean-list"><li class=sidebarItem__DBe><a class=sidebarItemLink_mo7H href=/2016/01/cloudy-in-seattle>Cloudy in Seattle</a></ul><ul class="sidebarItemList_Yudw clean-list"><li class=sidebarItem__DBe><a class=sidebarItemLink_mo7H href=/2016/01/complaining-about-the-weather>Complaining about the weather</a></ul></div><div role=group><h3>2015</h3><ul class="sidebarItemList_Yudw clean-list"><li class=sidebarItem__DBe><a class=sidebarItemLink_mo7H href=/2015/12/testing-cramer>Testing Cramer</a></ul><ul class="sidebarItemList_Yudw clean-list"><li class=sidebarItem__DBe><a class=sidebarItemLink_mo7H href=/2015/11/autocallable>Autocallable Bonds</a></ul><ul class="sidebarItemList_Yudw clean-list"><li class=sidebarItem__DBe><a class=sidebarItemLink_mo7H href=/2015/11/welcome>Welcome, and an algorithm</a></ul></div></nav></aside><main class="col col--7"><article><header><h1 class=title_f1Hy>Binary format shootout</h1><div class="container_mt6G margin-vert--md"><time datetime=2019-09-28T12:00:00.000Z>September 28, 2019</time> · <!-- -->9 min read</div><div class="margin-top--md margin-bottom--sm row"><div class="col col--12 authorCol_Hf19"><div class="avatar margin-bottom--sm"><div class="avatar__intro authorDetails_lV9A"><div class=avatar__name><span class=authorName_yefp>Bradlee Speice</span></div><div class=authorSocials_rSDt><a href=https://github.com/bspeice target=_blank rel="noopener noreferrer" class=authorSocialLink_owbf title=GitHub><svg viewBox="0 0 256 250" width=1em height=1em class="authorSocialLink_owbf githubSvg_Uu4N" style=--dark:#000;--light:#fff preserveAspectRatio=xMidYMid><path d="M128.001 0C57.317 0 0 57.307 0 128.001c0 56.554 36.676 104.535 87.535 121.46 6.397 1.185 8.746-2.777 8.746-6.158 0-3.052-.12-13.135-.174-23.83-35.61 7.742-43.124-15.103-43.124-15.103-5.823-14.795-14.213-18.73-14.213-18.73-11.613-7.944.876-7.78.876-7.78 12.853.902 19.621 13.19 19.621 13.19 11.417 19.568 29.945 13.911 37.249 10.64 1.149-8.272 4.466-13.92 8.127-17.116-28.431-3.236-58.318-14.212-58.318-63.258 0-13.975 5-25.394 13.188-34.358-1.329-3.224-5.71-16.242 1.24-33.874 0 0 10.749-3.44 35.21 13.121 10.21-2.836 21.16-4.258 32.038-4.307 10.878.049 21.837 1.47 32.066 4.307 24.431-16.56 35.165-13.12 35.165-13.12 6.967 17.63 2.584 30.65 1.255 33.873 8.207 8.964 13.173 20.383 13.173 34.358 0 49.163-29.944 59.988-58.447 63.157 4.591 3.972 8.682 11.762 8.682 23.704 0 17.126-.148 30.91-.148 35.126 0 3.407 2.304 7.398 8.792 6.14C219.37 232.5 256 184.537 256 128.002 256 57.307 198.691 0 128.001 0Zm-80.06 182.34c-.282.636-1.283.827-2.194.39-.929-.417-1.45-1.284-1.15-1.922.276-.655 1.279-.838 2.205-.399.93.418 1.46 1.293 1.139 1.931Zm6.296 5.618c-.61.566-1.804.303-2.614-.591-.837-.892-.994-2.086-.375-2.66.63-.566 1.787-.301 2.626.591.838.903 1 2.088.363 2.66Zm4.32 7.188c-.785.545-2.067.034-2.86-1.104-.784-1.138-.784-2.503.017-3.05.795-.547 2.058-.055 2.861 1.075.782 1.157.782 2.522-.019 3.08Zm7.304 8.325c-.701.774-2.196.566-3.29-.49-1.119-1.032-1.43-2.496-.726-3.27.71-.776 2.213-.558 3.315.49 1.11 1.03 1.45 2.505.701 3.27Zm9.442 2.81c-.31 1.003-1.75 1.459-3.199 1.033-1.448-.439-2.395-1.613-2.103-2.626.301-1.01 1.747-1.484 3.207-1.028 1.446.436 2.396 1.602 2.095 2.622Zm10.744 1.193c.036 1.055-1.193 1.93-2.715 1.95-1.53.034-2.769-.82-2.786-1.86 0-1.065 1.202-1.932 2.733-1.958 1.522-.03 2.768.818 2.768 1.868Zm10.555-.405c.182 1.03-.875 2.088-2.387 2.37-1.485.271-2.861-.365-3.05-1.386-.184-1.056.893-2.114 2.376-2.387 1.514-.263 2.868.356 3.061 1.403Z"/></svg></a></div></div></div></div></div></header><div id=__blog-post-container class=markdown><p>I've found that in many personal projects,
|
||
<a href=https://en.wikipedia.org/wiki/Analysis_paralysis target=_blank rel="noopener noreferrer">analysis paralysis</a> is particularly deadly.
|
||
Making good decisions in the beginning avoids pain and suffering later; if extra research prevents
|
||
future problems, I'm happy to continue <del>procrastinating</del> researching indefinitely.</p>
|
||
<p>So let's say you're in need of a binary serialization format. Data will be going over the network,
|
||
not just in memory, so having a schema document and code generation is a must. Performance is
|
||
crucial, so formats that support zero-copy de/serialization are given priority. And the more
|
||
languages supported, the better; I use Rust, but can't predict what other languages this could
|
||
interact with.</p>
|
||
<p>Given these requirements, the candidates I could find were:</p>
|
||
<ol>
|
||
<li><a href=https://capnproto.org/ target=_blank rel="noopener noreferrer">Cap'n Proto</a> has been around the longest, and is the most established</li>
|
||
<li><a href=https://google.github.io/flatbuffers/ target=_blank rel="noopener noreferrer">Flatbuffers</a> is the newest, and claims to have a simpler
|
||
encoding</li>
|
||
<li><a href=https://github.com/real-logic/simple-binary-encoding target=_blank rel="noopener noreferrer">Simple Binary Encoding</a> has the simplest
|
||
encoding, but the Rust implementation is unmaintained</li>
|
||
</ol>
|
||
<p>Any one of these will satisfy the project requirements: easy to transmit over a network, reasonably
|
||
fast, and polyglot support. But how do you actually pick one? It's impossible to know what issues
|
||
will follow that choice, so I tend to avoid commitment until the last possible moment.</p>
|
||
<p>Still, a choice must be made. Instead of worrying about which is "the best," I decided to build a
|
||
small proof-of-concept system in each format and pit them against each other. All code can be found
|
||
in the <a href=https://github.com/speice-io/marketdata-shootout target=_blank rel="noopener noreferrer">repository</a> for this post.</p>
|
||
<p>We'll discuss more in detail, but a quick preview of the results:</p>
|
||
<ul>
|
||
<li>Cap'n Proto: Theoretically performs incredibly well, the implementation had issues</li>
|
||
<li>Flatbuffers: Has some quirks, but largely lived up to its "zero-copy" promises</li>
|
||
<li>SBE: Best median and worst-case performance, but the message structure has a limited feature set</li>
|
||
</ul>
|
||
<h2 class="anchor anchorWithStickyNavbar_LWe7" id=prologue-binary-parsing-with-nom>Prologue: Binary Parsing with Nom<a href=#prologue-binary-parsing-with-nom class=hash-link aria-label="Direct link to Prologue: Binary Parsing with Nom" title="Direct link to Prologue: Binary Parsing with Nom"></a></h2>
|
||
<p>Our benchmark system will be a simple data processor; given depth-of-book market data from
|
||
<a href=https://iextrading.com/trading/market-data/#deep target=_blank rel="noopener noreferrer">IEX</a>, serialize each message into the schema
|
||
format, read it back, and calculate total size of stock traded and the lowest/highest quoted prices.
|
||
This test isn't complex, but is representative of the project I need a binary format for.</p>
|
||
<p>But before we make it to that point, we have to actually read in the market data. To do so, I'm
|
||
using a library called <a href=https://github.com/Geal/nom target=_blank rel="noopener noreferrer"><code>nom</code></a>. Version 5.0 was recently released and
|
||
brought some big changes, so this was an opportunity to build a non-trivial program and get
|
||
familiar.</p>
|
||
<p>If you don't already know about <code>nom</code>, it's a "parser generator". By combining different smaller
|
||
parsers, you can assemble a parser to handle complex structures without writing tedious code by
|
||
hand. For example, when parsing
|
||
<a href=https://www.winpcap.org/ntar/draft/PCAP-DumpFileFormat.html#rfc.section.3.3 target=_blank rel="noopener noreferrer">PCAP files</a>:</p>
|
||
<div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-background-color:hsl(230, 1%, 98%);--prism-color:hsl(230, 8%, 24%)"><div class=codeBlockContent_biex><pre tabindex=0 class="prism-code language-text codeBlock_bY9V thin-scrollbar" style="background-color:hsl(230, 1%, 98%);color:hsl(230, 8%, 24%)"><code class=codeBlockLines_e6Vv><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> 0 1 2 3</span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1</span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> +---------------------------------------------------------------+</span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> 0 | Block Type = 0x00000006 |</span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> +---------------------------------------------------------------+</span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> 4 | Block Total Length |</span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+</span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> 8 | Interface ID |</span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+</span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain">12 | Timestamp (High) |</span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+</span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain">16 | Timestamp (Low) |</span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+</span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain">20 | Captured Len |</span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+</span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain">24 | Packet Len |</span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+</span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> | Packet Data |</span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> | ... |</span><br></span></code></pre><div class=buttonGroup__atx><button type=button aria-label="Copy code to clipboard" title=Copy class=clean-btn><span class=copyButtonIcons_eSgA aria-hidden=true><svg viewBox="0 0 24 24" class=copyButtonIcon_y97N><path fill=currentColor d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"/></svg><svg viewBox="0 0 24 24" class=copyButtonSuccessIcon_LjdS><path fill=currentColor d=M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z /></svg></span></button></div></div></div>
|
||
<p>...you can build a parser in <code>nom</code> that looks like
|
||
<a href=https://github.com/speice-io/marketdata-shootout/blob/369613843d39cfdc728e1003123bf87f79422497/src/parsers.rs#L59-L93 target=_blank rel="noopener noreferrer">this</a>:</p>
|
||
<div class="language-rust codeBlockContainer_Ckt0 theme-code-block" style="--prism-background-color:hsl(230, 1%, 98%);--prism-color:hsl(230, 8%, 24%)"><div class=codeBlockContent_biex><pre tabindex=0 class="prism-code language-rust codeBlock_bY9V thin-scrollbar" style="background-color:hsl(230, 1%, 98%);color:hsl(230, 8%, 24%)"><code class=codeBlockLines_e6Vv><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token keyword" style="color:hsl(301, 63%, 40%)">const</span><span class="token plain"> </span><span class="token constant" style="color:hsl(35, 99%, 36%)">ENHANCED_PACKET</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">:</span><span class="token plain"> </span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">[</span><span class="token keyword" style="color:hsl(301, 63%, 40%)">u8</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">;</span><span class="token plain"> </span><span class="token number" style="color:hsl(35, 99%, 36%)">4</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">]</span><span class="token plain"> </span><span class="token operator" style="color:hsl(221, 87%, 60%)">=</span><span class="token plain"> </span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">[</span><span class="token number" style="color:hsl(35, 99%, 36%)">0x06</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">,</span><span class="token plain"> </span><span class="token number" style="color:hsl(35, 99%, 36%)">0x00</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">,</span><span class="token plain"> </span><span class="token number" style="color:hsl(35, 99%, 36%)">0x00</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">,</span><span class="token plain"> </span><span class="token number" style="color:hsl(35, 99%, 36%)">0x00</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">]</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">;</span><span class="token plain"></span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"></span><span class="token keyword" style="color:hsl(301, 63%, 40%)">pub</span><span class="token plain"> </span><span class="token keyword" style="color:hsl(301, 63%, 40%)">fn</span><span class="token plain"> </span><span class="token function-definition function" style="color:hsl(221, 87%, 60%)">enhanced_packet_block</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">(</span><span class="token plain">input</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">:</span><span class="token plain"> </span><span class="token operator" style="color:hsl(221, 87%, 60%)">&</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">[</span><span class="token keyword" style="color:hsl(301, 63%, 40%)">u8</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">]</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">)</span><span class="token plain"> </span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">-></span><span class="token plain"> </span><span class="token class-name" style="color:hsl(35, 99%, 36%)">IResult</span><span class="token operator" style="color:hsl(221, 87%, 60%)"><</span><span class="token operator" style="color:hsl(221, 87%, 60%)">&</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">[</span><span class="token keyword" style="color:hsl(301, 63%, 40%)">u8</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">]</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">,</span><span class="token plain"> </span><span class="token operator" style="color:hsl(221, 87%, 60%)">&</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">[</span><span class="token keyword" style="color:hsl(301, 63%, 40%)">u8</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">]</span><span class="token operator" style="color:hsl(221, 87%, 60%)">></span><span class="token plain"> </span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">{</span><span class="token plain"></span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> </span><span class="token keyword" style="color:hsl(301, 63%, 40%)">let</span><span class="token plain"> </span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">(</span><span class="token plain"></span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> remaining</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">,</span><span class="token plain"></span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> </span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">(</span><span class="token plain"></span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> block_type</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">,</span><span class="token plain"></span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> block_len</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">,</span><span class="token plain"></span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> interface_id</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">,</span><span class="token plain"></span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> timestamp_high</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">,</span><span class="token plain"></span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> timestamp_low</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">,</span><span class="token plain"></span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> captured_len</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">,</span><span class="token plain"></span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> packet_len</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">,</span><span class="token plain"></span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> </span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">)</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">,</span><span class="token plain"></span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> </span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">)</span><span class="token plain"> </span><span class="token operator" style="color:hsl(221, 87%, 60%)">=</span><span class="token plain"> </span><span class="token function" style="color:hsl(221, 87%, 60%)">tuple</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">(</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">(</span><span class="token plain"></span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> </span><span class="token function" style="color:hsl(221, 87%, 60%)">tag</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">(</span><span class="token constant" style="color:hsl(35, 99%, 36%)">ENHANCED_PACKET</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">)</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">,</span><span class="token plain"></span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> le_u32</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">,</span><span class="token plain"></span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> le_u32</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">,</span><span class="token plain"></span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> le_u32</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">,</span><span class="token plain"></span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> le_u32</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">,</span><span class="token plain"></span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> le_u32</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">,</span><span class="token plain"></span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> le_u32</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">,</span><span class="token plain"></span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> </span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">)</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">)</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">(</span><span class="token plain">input</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">)</span><span class="token operator" style="color:hsl(221, 87%, 60%)">?</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">;</span><span class="token plain"></span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain" style=display:inline-block></span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> </span><span class="token keyword" style="color:hsl(301, 63%, 40%)">let</span><span class="token plain"> </span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">(</span><span class="token plain">remaining</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">,</span><span class="token plain"> packet_data</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">)</span><span class="token plain"> </span><span class="token operator" style="color:hsl(221, 87%, 60%)">=</span><span class="token plain"> </span><span class="token function" style="color:hsl(221, 87%, 60%)">take</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">(</span><span class="token plain">captured_len</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">)</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">(</span><span class="token plain">remaining</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">)</span><span class="token operator" style="color:hsl(221, 87%, 60%)">?</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">;</span><span class="token plain"></span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> </span><span class="token class-name" style="color:hsl(35, 99%, 36%)">Ok</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">(</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">(</span><span class="token plain">remaining</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">,</span><span class="token plain"> packet_data</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">)</span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">)</span><span class="token plain"></span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"></span><span class="token punctuation" style="color:hsl(119, 34%, 47%)">}</span><br></span></code></pre><div class=buttonGroup__atx><button type=button aria-label="Copy code to clipboard" title=Copy class=clean-btn><span class=copyButtonIcons_eSgA aria-hidden=true><svg viewBox="0 0 24 24" class=copyButtonIcon_y97N><path fill=currentColor d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"/></svg><svg viewBox="0 0 24 24" class=copyButtonSuccessIcon_LjdS><path fill=currentColor d=M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z /></svg></span></button></div></div></div>
|
||
<p>While this example isn't too interesting, more complex formats (like IEX market data) are where
|
||
<a href=https://github.com/speice-io/marketdata-shootout/blob/369613843d39cfdc728e1003123bf87f79422497/src/iex.rs target=_blank rel="noopener noreferrer"><code>nom</code> really shines</a>.</p>
|
||
<p>Ultimately, because the <code>nom</code> code in this shootout was the same for all formats, we're not too
|
||
interested in its performance. Still, it's worth mentioning that building the market data parser was
|
||
actually fun; I didn't have to write tons of boring code by hand.</p>
|
||
<h2 class="anchor anchorWithStickyNavbar_LWe7" id=capn-proto>Cap'n Proto<a href=#capn-proto class=hash-link aria-label="Direct link to Cap'n Proto" title="Direct link to Cap'n Proto"></a></h2>
|
||
<p>Now it's time to get into the meaty part of the story. Cap'n Proto was the first format I tried
|
||
because of how long it has supported Rust (thanks to <a href=https://github.com/dwrensha target=_blank rel="noopener noreferrer">dwrensha</a> for
|
||
maintaining the Rust port since
|
||
<a href=https://github.com/capnproto/capnproto-rust/releases/tag/rustc-0.10 target=_blank rel="noopener noreferrer">2014!</a>). However, I had a ton
|
||
of performance concerns once I started using it.</p>
|
||
<p>To serialize new messages, Cap'n Proto uses a "builder" object. This builder allocates memory on the
|
||
heap to hold the message content, but because builders
|
||
<a href=https://github.com/capnproto/capnproto-rust/issues/111 target=_blank rel="noopener noreferrer">can't be re-used</a>, we have to allocate a
|
||
new buffer for every single message. I was able to work around this with a
|
||
<a href=https://github.com/speice-io/marketdata-shootout/blob/369613843d39cfdc728e1003123bf87f79422497/src/capnp_runner.rs#L17-L51 target=_blank rel="noopener noreferrer">special builder</a>
|
||
that could re-use the buffer, but it required reading through Cap'n Proto's
|
||
<a href=https://github.com/capnproto/capnproto-rust/blob/master/benchmark/benchmark.rs#L124-L156 target=_blank rel="noopener noreferrer">benchmarks</a>
|
||
to find an example, and used
|
||
<a href=https://doc.rust-lang.org/std/mem/fn.transmute.html target=_blank rel="noopener noreferrer"><code>std::mem::transmute</code></a> to bypass Rust's borrow
|
||
checker.</p>
|
||
<p>The process of reading messages was better, but still had issues. Cap'n Proto has two message
|
||
encodings: a <a href=https://capnproto.org/encoding.html#packing target=_blank rel="noopener noreferrer">"packed"</a> representation, and an
|
||
"unpacked" version. When reading "packed" messages, we need a buffer to unpack the message into
|
||
before we can use it; Cap'n Proto allocates a new buffer for each message we unpack, and I wasn't
|
||
able to figure out a way around that. In contrast, the unpacked message format should be where Cap'n
|
||
Proto shines; its main selling point is that there's <a href=https://capnproto.org/ target=_blank rel="noopener noreferrer">no decoding step</a>.
|
||
However, accomplishing zero-copy deserialization required code in the private API
|
||
(<a href=https://github.com/capnproto/capnproto-rust/issues/148 target=_blank rel="noopener noreferrer">since fixed</a>), and we allocate a vector on
|
||
every read for the segment table.</p>
|
||
<p>In the end, I put in significant work to make Cap'n Proto as fast as possible, but there were too
|
||
many issues for me to feel comfortable using it long-term.</p>
|
||
<h2 class="anchor anchorWithStickyNavbar_LWe7" id=flatbuffers>Flatbuffers<a href=#flatbuffers class=hash-link aria-label="Direct link to Flatbuffers" title="Direct link to Flatbuffers"></a></h2>
|
||
<p>This is the new kid on the block. After a
|
||
<a href=https://github.com/google/flatbuffers/pull/3894 target=_blank rel="noopener noreferrer">first attempt</a> didn't pan out, official support
|
||
was <a href=https://github.com/google/flatbuffers/pull/4898 target=_blank rel="noopener noreferrer">recently launched</a>. Flatbuffers intends to
|
||
address the same problems as Cap'n Proto: high-performance, polyglot, binary messaging. The
|
||
difference is that Flatbuffers claims to have a simpler wire format and
|
||
<a href=https://google.github.io/flatbuffers/flatbuffers_benchmarks.html target=_blank rel="noopener noreferrer">more flexibility</a>.</p>
|
||
<p>On the whole, I enjoyed using Flatbuffers; the <a href=https://crates.io/crates/flatc-rust target=_blank rel="noopener noreferrer">tooling</a> is
|
||
nice, and unlike Cap'n Proto, parsing messages was actually zero-copy and zero-allocation. However,
|
||
there were still some issues.</p>
|
||
<p>First, Flatbuffers (at least in Rust) can't handle nested vectors. This is a problem for formats
|
||
like the following:</p>
|
||
<div class="codeBlockContainer_Ckt0 theme-code-block" style="--prism-background-color:hsl(230, 1%, 98%);--prism-color:hsl(230, 8%, 24%)"><div class=codeBlockContent_biex><pre tabindex=0 class="prism-code language-text codeBlock_bY9V thin-scrollbar" style="background-color:hsl(230, 1%, 98%);color:hsl(230, 8%, 24%)"><code class=codeBlockLines_e6Vv><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain">table Message {</span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> symbol: string;</span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain">}</span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain">table MultiMessage {</span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain"> messages:[Message];</span><br></span><span class=token-line style="color:hsl(230, 8%, 24%)"><span class="token plain">}</span><br></span></code></pre><div class=buttonGroup__atx><button type=button aria-label="Copy code to clipboard" title=Copy class=clean-btn><span class=copyButtonIcons_eSgA aria-hidden=true><svg viewBox="0 0 24 24" class=copyButtonIcon_y97N><path fill=currentColor d="M19,21H8V7H19M19,5H8A2,2 0 0,0 6,7V21A2,2 0 0,0 8,23H19A2,2 0 0,0 21,21V7A2,2 0 0,0 19,5M16,1H4A2,2 0 0,0 2,3V17H4V3H16V1Z"/></svg><svg viewBox="0 0 24 24" class=copyButtonSuccessIcon_LjdS><path fill=currentColor d=M21,7L9,19L3.5,13.5L4.91,12.09L9,16.17L19.59,5.59L21,7Z /></svg></span></button></div></div></div>
|
||
<p>We want to create a <code>MultiMessage</code> which contains a vector of <code>Message</code>, and each <code>Message</code> itself
|
||
contains a vector (the <code>string</code> type). I was able to work around this by
|
||
<a href=https://github.com/speice-io/marketdata-shootout/blob/e9d07d148bf36a211a6f86802b313c4918377d1b/src/flatbuffers_runner.rs#L83 target=_blank rel="noopener noreferrer">caching <code>Message</code> elements</a>
|
||
in a <code>SmallVec</code> before building the final <code>MultiMessage</code>, but it was a painful process that I
|
||
believe contributed to poor serialization performance.</p>
|
||
<p>Second, streaming support in Flatbuffers seems to be something of an
|
||
<a href=https://github.com/google/flatbuffers/issues/3898 target=_blank rel="noopener noreferrer">afterthought</a>. Where Cap'n Proto in Rust handles
|
||
reading messages from a stream as part of the API, Flatbuffers just sticks a <code>u32</code> at the front of
|
||
each message to indicate the size. Not specifically a problem, but calculating message size without
|
||
that tag is nigh on impossible.</p>
|
||
<p>Ultimately, I enjoyed using Flatbuffers, and had to do significantly less work to make it perform
|
||
well.</p>
|
||
<h2 class="anchor anchorWithStickyNavbar_LWe7" id=simple-binary-encoding>Simple Binary Encoding<a href=#simple-binary-encoding class=hash-link aria-label="Direct link to Simple Binary Encoding" title="Direct link to Simple Binary Encoding"></a></h2>
|
||
<p>Support for SBE was added by the author of one of my favorite
|
||
<a href=https://web.archive.org/web/20190427124806/https://polysync.io/blog/session-types-for-hearty-codecs/ target=_blank rel="noopener noreferrer">Rust blog posts</a>.
|
||
I've <a href=/2019/06/high-performance-systems>talked previously</a> about how important
|
||
variance is in high-performance systems, so it was encouraging to read about a format that
|
||
<a href=https://github.com/real-logic/simple-binary-encoding/wiki/Why-Low-Latency target=_blank rel="noopener noreferrer">directly addressed</a> my
|
||
concerns. SBE has by far the simplest binary format, but it does make some tradeoffs.</p>
|
||
<p>Both Cap'n Proto and Flatbuffers use <a href=https://capnproto.org/encoding.html#structs target=_blank rel="noopener noreferrer">message offsets</a>
|
||
to handle variable-length data, <a href=https://capnproto.org/language.html#unions target=_blank rel="noopener noreferrer">unions</a>, and various
|
||
other features. In contrast, messages in SBE are essentially
|
||
<a href=https://github.com/real-logic/simple-binary-encoding/blob/master/sbe-samples/src/main/resources/example-schema.xml target=_blank rel="noopener noreferrer">just structs</a>;
|
||
variable-length data is supported, but there's no union type.</p>
|
||
<p>As mentioned in the beginning, the Rust port of SBE works well, but is
|
||
<a href=https://users.rust-lang.org/t/zero-cost-abstraction-frontier-no-copy-low-allocation-ordered-decoding/11515/9 target=_blank rel="noopener noreferrer">essentially unmaintained</a>.
|
||
However, if you don't need union types, and can accept that schemas are XML documents, it's still
|
||
worth using. SBE's implementation had the best streaming support of all formats I tested, and
|
||
doesn't trigger allocation during de/serialization.</p>
|
||
<h2 class="anchor anchorWithStickyNavbar_LWe7" id=results>Results<a href=#results class=hash-link aria-label="Direct link to Results" title="Direct link to Results"></a></h2>
|
||
<p>After building a test harness
|
||
<a href=https://github.com/speice-io/marketdata-shootout/blob/master/src/capnp_runner.rs target=_blank rel="noopener noreferrer">for</a>
|
||
<a href=https://github.com/speice-io/marketdata-shootout/blob/master/src/flatbuffers_runner.rs target=_blank rel="noopener noreferrer">each</a>
|
||
<a href=https://github.com/speice-io/marketdata-shootout/blob/master/src/sbe_runner.rs target=_blank rel="noopener noreferrer">format</a>, it was
|
||
time to actually take them for a spin. I used
|
||
<a href=https://github.com/speice-io/marketdata-shootout/blob/master/run_shootout.sh target=_blank rel="noopener noreferrer">this script</a> to run
|
||
the benchmarks, and the raw results are
|
||
<a href=https://github.com/speice-io/marketdata-shootout/blob/master/shootout.csv target=_blank rel="noopener noreferrer">here</a>. All data reported
|
||
below is the average of 10 runs on a single day of IEX data. Results were validated to make sure
|
||
that each format parsed the data correctly.</p>
|
||
<h3 class="anchor anchorWithStickyNavbar_LWe7" id=serialization>Serialization<a href=#serialization class=hash-link aria-label="Direct link to Serialization" title="Direct link to Serialization"></a></h3>
|
||
<p>This test measures, on a
|
||
<a href=https://github.com/speice-io/marketdata-shootout/blob/master/src/main.rs#L268-L272 target=_blank rel="noopener noreferrer">per-message basis</a>,
|
||
how long it takes to serialize the IEX message into the desired format and write to a pre-allocated
|
||
buffer.</p>
|
||
<table><thead><tr><th style=text-align:left>Schema<th style=text-align:left>Median<th style=text-align:left>99th Pctl<th style=text-align:left>99.9th Pctl<th style=text-align:left>Total<tbody><tr><td style=text-align:left>Cap'n Proto Packed<td style=text-align:left>413ns<td style=text-align:left>1751ns<td style=text-align:left>2943ns<td style=text-align:left>14.80s<tr><td style=text-align:left>Cap'n Proto Unpacked<td style=text-align:left>273ns<td style=text-align:left>1828ns<td style=text-align:left>2836ns<td style=text-align:left>10.65s<tr><td style=text-align:left>Flatbuffers<td style=text-align:left>355ns<td style=text-align:left>2185ns<td style=text-align:left>3497ns<td style=text-align:left>14.31s<tr><td style=text-align:left>SBE<td style=text-align:left>91ns<td style=text-align:left>1535ns<td style=text-align:left>2423ns<td style=text-align:left>3.91s</table>
|
||
<h3 class="anchor anchorWithStickyNavbar_LWe7" id=deserialization>Deserialization<a href=#deserialization class=hash-link aria-label="Direct link to Deserialization" title="Direct link to Deserialization"></a></h3>
|
||
<p>This test measures, on a
|
||
<a href=https://github.com/speice-io/marketdata-shootout/blob/master/src/main.rs#L294-L298 target=_blank rel="noopener noreferrer">per-message basis</a>,
|
||
how long it takes to read the previously-serialized message and perform some basic aggregation. The
|
||
aggregation code is the same for each format, so any performance differences are due solely to the
|
||
format implementation.</p>
|
||
<table><thead><tr><th style=text-align:left>Schema<th style=text-align:left>Median<th style=text-align:left>99th Pctl<th style=text-align:left>99.9th Pctl<th style=text-align:left>Total<tbody><tr><td style=text-align:left>Cap'n Proto Packed<td style=text-align:left>539ns<td style=text-align:left>1216ns<td style=text-align:left>2599ns<td style=text-align:left>18.92s<tr><td style=text-align:left>Cap'n Proto Unpacked<td style=text-align:left>366ns<td style=text-align:left>737ns<td style=text-align:left>1583ns<td style=text-align:left>12.32s<tr><td style=text-align:left>Flatbuffers<td style=text-align:left>173ns<td style=text-align:left>421ns<td style=text-align:left>1007ns<td style=text-align:left>6.00s<tr><td style=text-align:left>SBE<td style=text-align:left>116ns<td style=text-align:left>286ns<td style=text-align:left>659ns<td style=text-align:left>4.05s</table>
|
||
<h2 class="anchor anchorWithStickyNavbar_LWe7" id=conclusion>Conclusion<a href=#conclusion class=hash-link aria-label="Direct link to Conclusion" title="Direct link to Conclusion"></a></h2>
|
||
<p>Building a benchmark turned out to be incredibly helpful in making a decision; because a "union"
|
||
type isn't important to me, I can be confident that SBE best addresses my needs.</p>
|
||
<p>While SBE was the fastest in terms of both median and worst-case performance, its worst case
|
||
performance was proportionately far higher than any other format. It seems to be that
|
||
de/serialization time scales with message size, but I'll need to do some more research to understand
|
||
what exactly is going on.</div></article><nav class="pagination-nav docusaurus-mt-lg" aria-label="Blog post page navigation"><a class="pagination-nav__link pagination-nav__link--prev" href=/2019/06/high-performance-systems><div class=pagination-nav__sublabel>Older post</div><div class=pagination-nav__label>On building high performance systems</div></a><a class="pagination-nav__link pagination-nav__link--next" href=/2019/12/release-the-gil><div class=pagination-nav__sublabel>Newer post</div><div class=pagination-nav__label>Release the GIL</div></a></nav></main><div class="col col--2"><div class="tableOfContents_bqdL thin-scrollbar"><ul class="table-of-contents table-of-contents__left-border"><li><a href=#prologue-binary-parsing-with-nom class="table-of-contents__link toc-highlight">Prologue: Binary Parsing with Nom</a><li><a href=#capn-proto class="table-of-contents__link toc-highlight">Cap'n Proto</a><li><a href=#flatbuffers class="table-of-contents__link toc-highlight">Flatbuffers</a><li><a href=#simple-binary-encoding class="table-of-contents__link toc-highlight">Simple Binary Encoding</a><li><a href=#results class="table-of-contents__link toc-highlight">Results</a><ul><li><a href=#serialization class="table-of-contents__link toc-highlight">Serialization</a><li><a href=#deserialization class="table-of-contents__link toc-highlight">Deserialization</a></ul><li><a href=#conclusion class="table-of-contents__link toc-highlight">Conclusion</a></ul></div></div></div></div></div><footer class=footer><div class="container container-fluid"><div class="footer__bottom text--center"><div class=footer__copyright>Copyright © 2024 Bradlee Speice</div></div></div></footer></div> |