Initial commit for new blog generator
@ -1,6 +1,2 @@
 | 
			
		||||
FROM mcr.microsoft.com/vscode/devcontainers/ruby:0-2.7-bullseye
 | 
			
		||||
 | 
			
		||||
RUN wget https://github.com/errata-ai/vale/releases/download/v2.21.0/vale_2.21.0_Linux_64-bit.tar.gz -O /tmp/vale.tar.gz \
 | 
			
		||||
 && cd /usr/local/bin \
 | 
			
		||||
 && tar xf /tmp/vale.tar.gz \
 | 
			
		||||
 && rm /tmp/vale.tar.gz
 | 
			
		||||
ARG VARIANT=16-bullseye
 | 
			
		||||
FROM mcr.microsoft.com/vscode/devcontainers/typescript-node:0-${VARIANT}
 | 
			
		||||
 | 
			
		||||
@ -1,13 +1,33 @@
 | 
			
		||||
// For format details, see https://aka.ms/devcontainer.json. For config options, see the README at:
 | 
			
		||||
// https://github.com/microsoft/vscode-dev-containers/tree/v0.245.0/containers/ruby
 | 
			
		||||
// https://github.com/microsoft/vscode-dev-containers/tree/v0.245.0/containers/typescript-node
 | 
			
		||||
{
 | 
			
		||||
	"name": "Ruby",
 | 
			
		||||
	"build": {
 | 
			
		||||
		"dockerfile": "Dockerfile"
 | 
			
		||||
	},
 | 
			
		||||
	"runArgs": ["--userns=keep-id"],
 | 
			
		||||
  "name": "Node.js & TypeScript",
 | 
			
		||||
  "build": {
 | 
			
		||||
    "dockerfile": "Dockerfile",
 | 
			
		||||
    // Update 'VARIANT' to pick a Node version: 18, 16, 14.
 | 
			
		||||
    // Append -bullseye or -buster to pin to an OS version.
 | 
			
		||||
    // Use -bullseye variants on local on arm64/Apple Silicon.
 | 
			
		||||
    "args": {
 | 
			
		||||
      "VARIANT": "18-bullseye"
 | 
			
		||||
    }
 | 
			
		||||
  },
 | 
			
		||||
  "runArgs": ["--userns=keep-id"],
 | 
			
		||||
 | 
			
		||||
	"remoteUser": "vscode",
 | 
			
		||||
	"containerUser": "vscode",
 | 
			
		||||
	"workspaceMount": "source=${localWorkspaceFolder},target=/workspaces/${localWorkspaceFolderBasename},type=bind,Z"
 | 
			
		||||
  // Configure tool-specific properties.
 | 
			
		||||
  "customizations": {
 | 
			
		||||
    // Configure properties specific to VS Code.
 | 
			
		||||
    "vscode": {
 | 
			
		||||
      // Add the IDs of extensions you want installed when the container is created.
 | 
			
		||||
      "extensions": ["dbaeumer.vscode-eslint"]
 | 
			
		||||
    }
 | 
			
		||||
  },
 | 
			
		||||
 | 
			
		||||
  // Use 'forwardPorts' to make a list of ports inside the container available locally.
 | 
			
		||||
  // "forwardPorts": [],
 | 
			
		||||
 | 
			
		||||
  // Use 'postCreateCommand' to run commands after the container is created.
 | 
			
		||||
  // "postCreateCommand": "yarn install",
 | 
			
		||||
 | 
			
		||||
  // Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root.
 | 
			
		||||
  "workspaceMount": "source=${localWorkspaceFolder},target=/workspaces/${localWorkspaceFolderBasename},type=bind,Z"
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										32
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						@ -1,8 +1,24 @@
 | 
			
		||||
_site/
 | 
			
		||||
.swp
 | 
			
		||||
.sass-cache/
 | 
			
		||||
.jekyll-metadata
 | 
			
		||||
.bundle/
 | 
			
		||||
vendor/
 | 
			
		||||
.styles/
 | 
			
		||||
.vscode/
 | 
			
		||||
# Logs
 | 
			
		||||
logs
 | 
			
		||||
*.log
 | 
			
		||||
npm-debug.log*
 | 
			
		||||
yarn-debug.log*
 | 
			
		||||
yarn-error.log*
 | 
			
		||||
pnpm-debug.log*
 | 
			
		||||
lerna-debug.log*
 | 
			
		||||
 | 
			
		||||
node_modules
 | 
			
		||||
dist
 | 
			
		||||
dist-ssr
 | 
			
		||||
*.local
 | 
			
		||||
 | 
			
		||||
# Editor directories and files
 | 
			
		||||
.vscode/*
 | 
			
		||||
!.vscode/extensions.json
 | 
			
		||||
.idea
 | 
			
		||||
.DS_Store
 | 
			
		||||
*.suo
 | 
			
		||||
*.ntvs*
 | 
			
		||||
*.njsproj
 | 
			
		||||
*.sln
 | 
			
		||||
*.sw?
 | 
			
		||||
 | 
			
		||||
@ -1,7 +0,0 @@
 | 
			
		||||
StylesPath = .styles
 | 
			
		||||
MinAlertLevel = suggestion
 | 
			
		||||
Packages = Microsoft, write-good
 | 
			
		||||
 | 
			
		||||
[*]
 | 
			
		||||
BasedOnStyles = Vale, Microsoft, write-good
 | 
			
		||||
write-good.E-Prime = NO
 | 
			
		||||
							
								
								
									
										24
									
								
								404.html
									
									
									
									
									
								
							
							
						
						@ -1,24 +0,0 @@
 | 
			
		||||
---
 | 
			
		||||
layout: page
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
<style type="text/css" media="screen">
 | 
			
		||||
  .container {
 | 
			
		||||
    margin: 10px auto;
 | 
			
		||||
    max-width: 600px;
 | 
			
		||||
    text-align: center;
 | 
			
		||||
  }
 | 
			
		||||
  h1 {
 | 
			
		||||
    margin: 30px 0;
 | 
			
		||||
    font-size: 4em;
 | 
			
		||||
    line-height: 1;
 | 
			
		||||
    letter-spacing: -1px;
 | 
			
		||||
  }
 | 
			
		||||
</style>
 | 
			
		||||
 | 
			
		||||
<div class="container">
 | 
			
		||||
  <h1>404</h1>
 | 
			
		||||
 | 
			
		||||
  <p><strong>Page not found :(</strong></p>
 | 
			
		||||
  <p>The requested page could not be found.</p>
 | 
			
		||||
</div>
 | 
			
		||||
							
								
								
									
										29
									
								
								Gemfile
									
									
									
									
									
								
							
							
						
						@ -1,29 +0,0 @@
 | 
			
		||||
source "https://rubygems.org"
 | 
			
		||||
 | 
			
		||||
# Hello! This is where you manage which Jekyll version is used to run.
 | 
			
		||||
# When you want to use a different version, change it below, save the
 | 
			
		||||
# file and run `bundle install`. Run Jekyll with `bundle exec`, like so:
 | 
			
		||||
#
 | 
			
		||||
#     bundle exec jekyll serve
 | 
			
		||||
#
 | 
			
		||||
# This will help ensure the proper Jekyll version is running.
 | 
			
		||||
# Happy Jekylling!
 | 
			
		||||
gem "jekyll", "~> 3.8.3"
 | 
			
		||||
 | 
			
		||||
gem "texture"
 | 
			
		||||
 | 
			
		||||
# If you want to use GitHub Pages, remove the "gem "jekyll"" above and
 | 
			
		||||
# uncomment the line below. To upgrade, run `bundle update github-pages`.
 | 
			
		||||
# gem "github-pages", group: :jekyll_plugins
 | 
			
		||||
 | 
			
		||||
# If you have any plugins, put them here!
 | 
			
		||||
group :jekyll_plugins do
 | 
			
		||||
  gem "jekyll-feed", "~> 0.6"
 | 
			
		||||
  gem "jekyll-remote-theme"
 | 
			
		||||
end
 | 
			
		||||
 | 
			
		||||
# Windows does not include zoneinfo files, so bundle the tzinfo-data gem
 | 
			
		||||
gem "tzinfo-data", platforms: [:mingw, :mswin, :x64_mingw, :jruby]
 | 
			
		||||
 | 
			
		||||
# Performance-booster for watching directories on Windows
 | 
			
		||||
gem "wdm", "~> 0.1.0" if Gem.win_platform?
 | 
			
		||||
							
								
								
									
										78
									
								
								Gemfile.lock
									
									
									
									
									
								
							
							
						
						@ -1,78 +0,0 @@
 | 
			
		||||
GEM
 | 
			
		||||
  remote: https://rubygems.org/
 | 
			
		||||
  specs:
 | 
			
		||||
    addressable (2.7.0)
 | 
			
		||||
      public_suffix (>= 2.0.2, < 5.0)
 | 
			
		||||
    colorator (1.1.0)
 | 
			
		||||
    concurrent-ruby (1.1.6)
 | 
			
		||||
    em-websocket (0.5.1)
 | 
			
		||||
      eventmachine (>= 0.12.9)
 | 
			
		||||
      http_parser.rb (~> 0.6.0)
 | 
			
		||||
    eventmachine (1.2.7)
 | 
			
		||||
    ffi (1.12.2)
 | 
			
		||||
    forwardable-extended (2.6.0)
 | 
			
		||||
    http_parser.rb (0.6.0)
 | 
			
		||||
    i18n (0.9.5)
 | 
			
		||||
      concurrent-ruby (~> 1.0)
 | 
			
		||||
    jekyll (3.8.6)
 | 
			
		||||
      addressable (~> 2.4)
 | 
			
		||||
      colorator (~> 1.0)
 | 
			
		||||
      em-websocket (~> 0.5)
 | 
			
		||||
      i18n (~> 0.7)
 | 
			
		||||
      jekyll-sass-converter (~> 1.0)
 | 
			
		||||
      jekyll-watch (~> 2.0)
 | 
			
		||||
      kramdown (~> 1.14)
 | 
			
		||||
      liquid (~> 4.0)
 | 
			
		||||
      mercenary (~> 0.3.3)
 | 
			
		||||
      pathutil (~> 0.9)
 | 
			
		||||
      rouge (>= 1.7, < 4)
 | 
			
		||||
      safe_yaml (~> 1.0)
 | 
			
		||||
    jekyll-feed (0.13.0)
 | 
			
		||||
      jekyll (>= 3.7, < 5.0)
 | 
			
		||||
    jekyll-remote-theme (0.4.2)
 | 
			
		||||
      addressable (~> 2.0)
 | 
			
		||||
      jekyll (>= 3.5, < 5.0)
 | 
			
		||||
      jekyll-sass-converter (>= 1.0, <= 3.0.0, != 2.0.0)
 | 
			
		||||
      rubyzip (>= 1.3.0, < 3.0)
 | 
			
		||||
    jekyll-sass-converter (1.5.2)
 | 
			
		||||
      sass (~> 3.4)
 | 
			
		||||
    jekyll-seo-tag (2.6.1)
 | 
			
		||||
      jekyll (>= 3.3, < 5.0)
 | 
			
		||||
    jekyll-watch (2.2.1)
 | 
			
		||||
      listen (~> 3.0)
 | 
			
		||||
    kramdown (1.17.0)
 | 
			
		||||
    liquid (4.0.3)
 | 
			
		||||
    listen (3.2.1)
 | 
			
		||||
      rb-fsevent (~> 0.10, >= 0.10.3)
 | 
			
		||||
      rb-inotify (~> 0.9, >= 0.9.10)
 | 
			
		||||
    mercenary (0.3.6)
 | 
			
		||||
    pathutil (0.16.2)
 | 
			
		||||
      forwardable-extended (~> 2.6)
 | 
			
		||||
    public_suffix (4.0.4)
 | 
			
		||||
    rb-fsevent (0.10.3)
 | 
			
		||||
    rb-inotify (0.10.1)
 | 
			
		||||
      ffi (~> 1.0)
 | 
			
		||||
    rouge (3.17.0)
 | 
			
		||||
    rubyzip (2.3.0)
 | 
			
		||||
    safe_yaml (1.0.5)
 | 
			
		||||
    sass (3.7.4)
 | 
			
		||||
      sass-listen (~> 4.0.0)
 | 
			
		||||
    sass-listen (4.0.0)
 | 
			
		||||
      rb-fsevent (~> 0.9, >= 0.9.4)
 | 
			
		||||
      rb-inotify (~> 0.9, >= 0.9.7)
 | 
			
		||||
    texture (0.3)
 | 
			
		||||
      jekyll (~> 3.7)
 | 
			
		||||
      jekyll-seo-tag (~> 2.1)
 | 
			
		||||
 | 
			
		||||
PLATFORMS
 | 
			
		||||
  ruby
 | 
			
		||||
 | 
			
		||||
DEPENDENCIES
 | 
			
		||||
  jekyll (~> 3.8.3)
 | 
			
		||||
  jekyll-feed (~> 0.6)
 | 
			
		||||
  jekyll-remote-theme
 | 
			
		||||
  texture
 | 
			
		||||
  tzinfo-data
 | 
			
		||||
 | 
			
		||||
BUNDLED WITH
 | 
			
		||||
   2.1.4
 | 
			
		||||
							
								
								
									
										44
									
								
								_config.yml
									
									
									
									
									
								
							
							
						
						@ -1,44 +0,0 @@
 | 
			
		||||
# Welcome to Jekyll!
 | 
			
		||||
#
 | 
			
		||||
# This config file is meant for settings that affect your whole blog, values
 | 
			
		||||
# which you are expected to set up once and rarely edit after that. If you find
 | 
			
		||||
# yourself editing this file very often, consider using Jekyll's data files
 | 
			
		||||
# feature for the data you need to update frequently.
 | 
			
		||||
#
 | 
			
		||||
# For technical reasons, this file is *NOT* reloaded automatically when you use
 | 
			
		||||
# 'bundle exec jekyll serve'. If you change this file, please restart the server process.
 | 
			
		||||
 | 
			
		||||
# Site settings
 | 
			
		||||
# These are used to personalize your new site. If you look in the HTML files,
 | 
			
		||||
# you will see them accessed via {{ site.title }}, {{ site.email }}, and so on.
 | 
			
		||||
# You can create any custom variable you would like, and they will be accessible
 | 
			
		||||
# in the templates via {{ site.myvariable }}.
 | 
			
		||||
title: speice.io
 | 
			
		||||
description: The Old Speice Guy
 | 
			
		||||
email: bradlee@speice.io
 | 
			
		||||
baseurl: "" # the subpath of your site, e.g. /blog
 | 
			
		||||
url: "https://speice.io/" # the base hostname & protocol for your site, e.g. http://example.com
 | 
			
		||||
github_username:  bspeice
 | 
			
		||||
 | 
			
		||||
# Build settings
 | 
			
		||||
markdown: kramdown
 | 
			
		||||
# theme: texture
 | 
			
		||||
remote_theme: thelehhman/texture
 | 
			
		||||
plugins:
 | 
			
		||||
  - jekyll-feed
 | 
			
		||||
  - jekyll-remote-theme
 | 
			
		||||
 | 
			
		||||
include: [_pages]
 | 
			
		||||
permalink: /:year/:month/:title.html
 | 
			
		||||
 | 
			
		||||
# Exclude from processing.
 | 
			
		||||
# The following items will not be processed, by default. Create a custom list
 | 
			
		||||
# to override the default setting.
 | 
			
		||||
# exclude:
 | 
			
		||||
#   - Gemfile
 | 
			
		||||
#   - Gemfile.lock
 | 
			
		||||
#   - node_modules
 | 
			
		||||
#   - vendor/bundle/
 | 
			
		||||
#   - vendor/cache/
 | 
			
		||||
#   - vendor/gems/
 | 
			
		||||
#   - vendor/ruby/
 | 
			
		||||
@ -1,23 +0,0 @@
 | 
			
		||||
{% if page.layout == 'post' %}
 | 
			
		||||
{% comment %}Thanks to https://www.bytedude.com/jekyll-previous-and-next-posts/{% endcomment %}
 | 
			
		||||
<div class="container">
 | 
			
		||||
    <hr>
 | 
			
		||||
    <div class="post-nav">
 | 
			
		||||
        <div>
 | 
			
		||||
            {% if page.previous.url %}
 | 
			
		||||
            <a href="{{page.previous.url}}">« {{page.previous.title}}</a>
 | 
			
		||||
            {% endif %}
 | 
			
		||||
        </div>
 | 
			
		||||
        <div class="post-nav-next">
 | 
			
		||||
            {% if page.next.url %}
 | 
			
		||||
            <a href="{{page.next.url}}">{{page.next.title}} »</a>
 | 
			
		||||
            {% endif %}
 | 
			
		||||
        </div>
 | 
			
		||||
    </div>
 | 
			
		||||
</div>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
<script type="text/javascript"
 | 
			
		||||
    src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_HTMLorMML">
 | 
			
		||||
    </script>
 | 
			
		||||
{% endif %}
 | 
			
		||||
@ -1,7 +0,0 @@
 | 
			
		||||
<meta charset="UTF-8">
 | 
			
		||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
 | 
			
		||||
<meta http-equiv="X-UA-Compatible" content="ie=edge">
 | 
			
		||||
<link rel="stylesheet" href="{{ "/assets/css/style.css" | relative_url }}">
 | 
			
		||||
<link rel="stylesheet" href="{{ "/assets/css/fonts.css" | prepend: site.baseurl }}">
 | 
			
		||||
<title>{{ page.title | default: site.title }}</title>
 | 
			
		||||
{% seo %}
 | 
			
		||||
@ -1,7 +0,0 @@
 | 
			
		||||
<div class="navbar">
 | 
			
		||||
    <a href="{{ "/" | prepend: site.baseurl }}">Home</a>
 | 
			
		||||
    <span class="separator"></span>
 | 
			
		||||
    <a href="{{ "/about/" | prepend: site.baseurl }}">About</a>
 | 
			
		||||
    <span class="separator"></span>
 | 
			
		||||
    <a href="{{ "/feed.xml" | prepend: site.baseurl }}">RSS</a>
 | 
			
		||||
</div>
 | 
			
		||||
@ -1,15 +0,0 @@
 | 
			
		||||
<div class="container">
 | 
			
		||||
    <h2>{{ site.title }}</h1>
 | 
			
		||||
    <h1>{{ site.description }}</h2>
 | 
			
		||||
    <ul class="social">
 | 
			
		||||
        {%- if site.texture.social_links.github -%}
 | 
			
		||||
            <a href="https://github.com/{{ site.texture.social_links.github }}"><li><i class="icon-github-circled"></i></li></a>
 | 
			
		||||
        {%- endif -%}
 | 
			
		||||
        {%- if site.texture.social_links.linkedIn -%}
 | 
			
		||||
            <a href="https://linkedin.com/{{ site.texture.social_links.linkedIn }}"><li><i class="icon-linkedin-squared"></i></li></a>
 | 
			
		||||
        {%- endif -%}
 | 
			
		||||
        {%- if site.texture.social_links.twitter -%}
 | 
			
		||||
            <a href="https://twitter.com/{{ site.texture.social_links.twitter }}"><li><i class="icon-twitter-squared"></i></li></a>
 | 
			
		||||
        {%- endif -%}
 | 
			
		||||
    </ul>
 | 
			
		||||
</div>
 | 
			
		||||
@ -1,13 +0,0 @@
 | 
			
		||||
---
 | 
			
		||||
layout: page
 | 
			
		||||
title: About
 | 
			
		||||
permalink: /about/
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
Developer currently living in New York City.
 | 
			
		||||
 | 
			
		||||
Best ways to get in contact:
 | 
			
		||||
 | 
			
		||||
- Email: [bradlee@speice.io](mailto:bradlee@speice.io)
 | 
			
		||||
- Github: [bspeice](https://github.com/bspeice)
 | 
			
		||||
- LinkedIn: [bradleespeice](https://www.linkedin.com/in/bradleespeice/)
 | 
			
		||||
@ -1,38 +0,0 @@
 | 
			
		||||
---
 | 
			
		||||
layout: post
 | 
			
		||||
title: "Hello!"
 | 
			
		||||
description: ""
 | 
			
		||||
category:
 | 
			
		||||
tags: []
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
I'll do what I can to keep this short, there's plenty of other things we both should be doing right
 | 
			
		||||
now.
 | 
			
		||||
 | 
			
		||||
If you're here for the bread pics, and to marvel in some other culinary side projects, I've got you
 | 
			
		||||
covered:
 | 
			
		||||
 | 
			
		||||

 | 
			
		||||
 | 
			
		||||
And no, I'm not posting pictures of earlier attempts that ended up turning into rocks in the oven.
 | 
			
		||||
 | 
			
		||||
Okay, just one:
 | 
			
		||||
 | 
			
		||||

 | 
			
		||||
 | 
			
		||||
If you're here for keeping up with the man Bradlee Speice, got plenty of that too. Plus some
 | 
			
		||||
up-coming super-nerdy posts about how I'm changing the world.
 | 
			
		||||
 | 
			
		||||
And if you're not here for those things: don't have a lot for you, sorry. But you're welcome to let
 | 
			
		||||
me know what needs to change.
 | 
			
		||||
 | 
			
		||||
I'm looking forward to making this a place to talk about what's going on in life, I hope you'll
 | 
			
		||||
stick it out with me. The best way to follow what's going on is on my [About](/about/) page, but if
 | 
			
		||||
you want the joy of clicking links, here's a few good ones:
 | 
			
		||||
 | 
			
		||||
- Email (people still use this?): [bradlee@speice.io](mailto:bradlee@speice.io)
 | 
			
		||||
- Mastodon (nerd Twitter): [@bradlee](https://mastodon.social/@bradlee)
 | 
			
		||||
- Chat (RiotIM): [@bspeice:matrix.com](https://matrix.to/#/@bspeice:matrix.com)
 | 
			
		||||
- The comments section (not for people with sanity intact): ↓↓↓
 | 
			
		||||
 | 
			
		||||
Thanks, and keep it amazing.
 | 
			
		||||
@ -1,177 +0,0 @@
 | 
			
		||||
---
 | 
			
		||||
layout: post
 | 
			
		||||
title: "What I Learned: Porting Dateutil Parser to Rust"
 | 
			
		||||
description: ""
 | 
			
		||||
category:
 | 
			
		||||
tags: [dtparse, rust]
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
Hi. I'm Bradlee.
 | 
			
		||||
 | 
			
		||||
I've mostly been a lurker in Rust for a while, making a couple small contributions here and there.
 | 
			
		||||
So launching [dtparse](https://github.com/bspeice/dtparse) feels like nice step towards becoming a
 | 
			
		||||
functioning member of society. But not too much, because then you know people start asking you to
 | 
			
		||||
pay bills, and ain't nobody got time for that.
 | 
			
		||||
 | 
			
		||||
But I built dtparse, and you can read about my thoughts on the process. Or don't. I won't tell you
 | 
			
		||||
what to do with your life (but you should totally keep reading).
 | 
			
		||||
 | 
			
		||||
# Slow down, what?
 | 
			
		||||
 | 
			
		||||
OK, fine, I guess I should start with _why_ someone would do this.
 | 
			
		||||
 | 
			
		||||
[Dateutil](https://github.com/dateutil/dateutil) is a Python library for handling dates. The
 | 
			
		||||
standard library support for time in Python is kinda dope, but there are a lot of extras that go
 | 
			
		||||
into making it useful beyond just the [datetime](https://docs.python.org/3.6/library/datetime.html)
 | 
			
		||||
module. `dateutil.parser` specifically is code to take all the super-weird time formats people come
 | 
			
		||||
up with and turn them into something actually useful.
 | 
			
		||||
 | 
			
		||||
Date/time parsing, it turns out, is just like everything else involving
 | 
			
		||||
[computers](https://infiniteundo.com/post/25326999628/falsehoods-programmers-believe-about-time) and
 | 
			
		||||
[time](https://infiniteundo.com/post/25509354022/more-falsehoods-programmers-believe-about-time): it
 | 
			
		||||
feels like it shouldn't be that difficult to do, until you try to do it, and you realize that people
 | 
			
		||||
suck and this is why
 | 
			
		||||
[we can't we have nice things](https://zachholman.com/talk/utc-is-enough-for-everyone-right). But
 | 
			
		||||
alas, we'll try and make contemporary art out of the rubble and give it a pretentious name like
 | 
			
		||||
_Time_.
 | 
			
		||||
 | 
			
		||||

 | 
			
		||||
 | 
			
		||||
> [Time](https://www.goodfreephotos.com/united-states/montana/elkhorn/remains-of-the-mining-operation-elkhorn.jpg.php)
 | 
			
		||||
 | 
			
		||||
What makes `dateutil.parser` great is that there's single function with a single argument that
 | 
			
		||||
drives what programmers interact with:
 | 
			
		||||
[`parse(timestr)`](https://github.com/dateutil/dateutil/blob/6dde5d6298cfb81a4c594a38439462799ed2aef2/dateutil/parser/_parser.py#L1258).
 | 
			
		||||
It takes in the time as a string, and gives you back a reasonable "look, this is the best anyone can
 | 
			
		||||
possibly do to make sense of your input" value. It doesn't expect much of you.
 | 
			
		||||
 | 
			
		||||
[And now it's in Rust.](https://github.com/bspeice/dtparse/blob/7d565d3a78876dbebd9711c9720364fe9eba7915/src/lib.rs#L1332)
 | 
			
		||||
 | 
			
		||||
# Lost in Translation
 | 
			
		||||
 | 
			
		||||
Having worked at a bulge-bracket bank watching Java programmers try to be Python programmers, I'm
 | 
			
		||||
admittedly hesitant to publish Python code that's trying to be Rust. Interestingly, Rust code can
 | 
			
		||||
actually do a great job of mimicking Python. It's certainly not idiomatic Rust, but I've had better
 | 
			
		||||
experiences than
 | 
			
		||||
[this guy](https://webcache.googleusercontent.com/search?q=cache:wkYMpktJtnUJ:https://jackstouffer.com/blog/porting_dateutil.html+&cd=3&hl=en&ct=clnk&gl=us)
 | 
			
		||||
who attempted the same thing for D. These are the actual take-aways:
 | 
			
		||||
 | 
			
		||||
When transcribing code, **stay as close to the original library as possible**. I'm talking about
 | 
			
		||||
using the same variable names, same access patterns, the whole shebang. It's way too easy to make a
 | 
			
		||||
couple of typos, and all of a sudden your code blows up in new and exciting ways. Having a reference
 | 
			
		||||
manual for verbatim what your code should be means that you don't spend that long debugging
 | 
			
		||||
complicated logic, you're more looking for typos.
 | 
			
		||||
 | 
			
		||||
Also, **don't use nice Rust things like enums**. While
 | 
			
		||||
[one time it worked out OK for me](https://github.com/bspeice/dtparse/blob/7d565d3a78876dbebd9711c9720364fe9eba7915/src/lib.rs#L88-L94),
 | 
			
		||||
I also managed to shoot myself in the foot a couple times because `dateutil` stores AM/PM as a
 | 
			
		||||
boolean and I mixed up which was true, and which was false (side note: AM is false, PM is true). In
 | 
			
		||||
general, writing nice code _should not be a first-pass priority_ when you're just trying to recreate
 | 
			
		||||
the same functionality.
 | 
			
		||||
 | 
			
		||||
**Exceptions are a pain.** Make peace with it. Python code is just allowed to skip stack frames. So
 | 
			
		||||
when a co-worker told me "Rust is getting try-catch syntax" I properly freaked out. Turns out
 | 
			
		||||
[he's not quite right](https://github.com/rust-lang/rfcs/pull/243), and I'm OK with that. And while
 | 
			
		||||
`dateutil` is pretty well-behaved about not skipping multiple stack frames,
 | 
			
		||||
[130-line try-catch blocks](https://github.com/dateutil/dateutil/blob/16561fc99361979e88cccbd135393b06b1af7e90/dateutil/parser/_parser.py#L730-L865)
 | 
			
		||||
take a while to verify.
 | 
			
		||||
 | 
			
		||||
As another Python quirk, **be very careful about
 | 
			
		||||
[long nested if-elif-else blocks](https://github.com/dateutil/dateutil/blob/16561fc99361979e88cccbd135393b06b1af7e90/dateutil/parser/_parser.py#L494-L568)**.
 | 
			
		||||
I used to think that Python's whitespace was just there to get you to format your code correctly. I
 | 
			
		||||
think that no longer. It's way too easy to close a block too early and have incredibly weird issues
 | 
			
		||||
in the logic. Make sure you use an editor that displays indentation levels so you can keep things
 | 
			
		||||
straight.
 | 
			
		||||
 | 
			
		||||
**Rust macros are not free.** I originally had the
 | 
			
		||||
[main test body](https://github.com/bspeice/dtparse/blob/b0e737f088eca8e83ab4244c6621a2797d247697/tests/compat.rs#L63-L217)
 | 
			
		||||
wrapped up in a macro using [pyo3](https://github.com/PyO3/PyO3). It took two minutes to compile.
 | 
			
		||||
After
 | 
			
		||||
[moving things to a function](https://github.com/bspeice/dtparse/blob/e017018295c670e4b6c6ee1cfff00dbb233db47d/tests/compat.rs#L76-L205)
 | 
			
		||||
compile times dropped down to ~5 seconds. Turns out 150 lines \* 100 tests = a lot of redundant code
 | 
			
		||||
to be compiled. My new rule of thumb is that any macros longer than 10-15 lines are actually
 | 
			
		||||
functions that need to be liberated, man.
 | 
			
		||||
 | 
			
		||||
Finally, **I really miss list comprehensions and dictionary comprehensions.** As a quick comparison,
 | 
			
		||||
see
 | 
			
		||||
[this dateutil code](https://github.com/dateutil/dateutil/blob/16561fc99361979e88cccbd135393b06b1af7e90/dateutil/parser/_parser.py#L476)
 | 
			
		||||
and
 | 
			
		||||
[the implementation in Rust](https://github.com/bspeice/dtparse/blob/7d565d3a78876dbebd9711c9720364fe9eba7915/src/lib.rs#L619-L629).
 | 
			
		||||
I probably wrote it wrong, and I'm sorry. Ultimately though, I hope that these comprehensions can be
 | 
			
		||||
added through macros or syntax extensions. Either way, they're expressive, save typing, and are
 | 
			
		||||
super-readable. Let's get more of that.
 | 
			
		||||
 | 
			
		||||
# Using a young language
 | 
			
		||||
 | 
			
		||||
Now, Rust is exciting and new, which means that there's opportunity to make a substantive impact. On
 | 
			
		||||
more than one occasion though, I've had issues navigating the Rust ecosystem.
 | 
			
		||||
 | 
			
		||||
What I'll call the "canonical library" is still being built. In Python, if you need datetime
 | 
			
		||||
parsing, you use `dateutil`. If you want `decimal` types, it's already in the
 | 
			
		||||
[standard library](https://docs.python.org/3.6/library/decimal.html). While I might've gotten away
 | 
			
		||||
with `f64`, `dateutil` uses decimals, and I wanted to follow the principle of **staying as close to
 | 
			
		||||
the original library as possible**. Thus began my quest to find a decimal library in Rust. What I
 | 
			
		||||
quickly found was summarized in a comment:
 | 
			
		||||
 | 
			
		||||
> Writing a BigDecimal is easy. Writing a _good_ BigDecimal is hard.
 | 
			
		||||
>
 | 
			
		||||
> [-cmr](https://github.com/rust-lang/rust/issues/8937#issuecomment-34582794)
 | 
			
		||||
 | 
			
		||||
In practice, this means that there are at least [4](https://crates.io/crates/bigdecimal)
 | 
			
		||||
[different](https://crates.io/crates/rust_decimal)
 | 
			
		||||
[implementations](https://crates.io/crates/decimal) [available](https://crates.io/crates/decimate).
 | 
			
		||||
And that's a lot of decisions to worry about when all I'm thinking is "why can't
 | 
			
		||||
[calendar reform](https://en.wikipedia.org/wiki/Calendar_reform) be a thing" and I'm forced to dig
 | 
			
		||||
through a [couple](https://github.com/rust-lang/rust/issues/8937#issuecomment-31661916)
 | 
			
		||||
[different](https://github.com/rust-lang/rfcs/issues/334)
 | 
			
		||||
[threads](https://github.com/rust-num/num/issues/8) to figure out if the library I'm look at is dead
 | 
			
		||||
or just stable.
 | 
			
		||||
 | 
			
		||||
And even when the "canonical library" exists, there's no guarantees that it will be well-maintained.
 | 
			
		||||
[Chrono](https://github.com/chronotope/chrono) is the _de facto_ date/time library in Rust, and just
 | 
			
		||||
released version 0.4.4 like two days ago. Meanwhile,
 | 
			
		||||
[chrono-tz](https://github.com/chronotope/chrono-tz) appears to be dead in the water even though
 | 
			
		||||
[there are people happy to help maintain it](https://github.com/chronotope/chrono-tz/issues/19). I
 | 
			
		||||
know relatively little about it, but it appears that most of the release process is automated;
 | 
			
		||||
keeping that up to date should be a no-brainer.
 | 
			
		||||
 | 
			
		||||
## Trial Maintenance Policy
 | 
			
		||||
 | 
			
		||||
Specifically given "maintenance" being an
 | 
			
		||||
[oft-discussed](https://www.reddit.com/r/rust/comments/48540g/thoughts_on_initiators_vs_maintainers/)
 | 
			
		||||
issue, I'm going to try out the following policy to keep things moving on `dtparse`:
 | 
			
		||||
 | 
			
		||||
1. Issues/PRs needing _maintainer_ feedback will be updated at least weekly. I want to make sure
 | 
			
		||||
   nobody's blocking on me.
 | 
			
		||||
 | 
			
		||||
2. To keep issues/PRs needing _contributor_ feedback moving, I'm going to (kindly) ask the
 | 
			
		||||
   contributor to check in after two weeks, and close the issue without resolution if I hear nothing
 | 
			
		||||
   back after a month.
 | 
			
		||||
 | 
			
		||||
The second point I think has the potential to be a bit controversial, so I'm happy to receive
 | 
			
		||||
feedback on that. And if a contributor responds with "hey, still working on it, had a kid and I'm
 | 
			
		||||
running on 30 seconds of sleep a night," then first: congratulations on sustaining human life. And
 | 
			
		||||
second: I don't mind keeping those requests going indefinitely. I just want to try and balance
 | 
			
		||||
keeping things moving with giving people the necessary time they need.
 | 
			
		||||
 | 
			
		||||
I should also note that I'm still getting some best practices in place - CONTRIBUTING and
 | 
			
		||||
CONTRIBUTORS files need to be added, as well as issue/PR templates. In progress. None of us are
 | 
			
		||||
perfect.
 | 
			
		||||
 | 
			
		||||
# Roadmap and Conclusion
 | 
			
		||||
 | 
			
		||||
So if I've now built a `dateutil`-compatible parser, we're done, right? Of course not! That's not
 | 
			
		||||
nearly ambitious enough.
 | 
			
		||||
 | 
			
		||||
Ultimately, I'd love to have a library that's capable of parsing everything the Linux `date` command
 | 
			
		||||
can do (and not `date` on OSX, because seriously, BSD coreutils are the worst). I know Rust has a
 | 
			
		||||
coreutils rewrite going on, and `dtparse` would potentially be an interesting candidate since it
 | 
			
		||||
doesn't bring in a lot of extra dependencies. [`humantime`](https://crates.io/crates/humantime)
 | 
			
		||||
could help pick up some of the (current) slack in dtparse, so maybe we can share and care with each
 | 
			
		||||
other?
 | 
			
		||||
 | 
			
		||||
All in all, I'm mostly hoping that nobody's already done this and I haven't spent a bit over a month
 | 
			
		||||
on redundant code. So if it exists, tell me. I need to know, but be nice about it, because I'm going
 | 
			
		||||
to take it hard.
 | 
			
		||||
 | 
			
		||||
And in the mean time, I'm looking forward to building more. Onwards.
 | 
			
		||||
@ -1,323 +0,0 @@
 | 
			
		||||
---
 | 
			
		||||
layout: post
 | 
			
		||||
title: "Primitives in Rust are Weird (and Cool)"
 | 
			
		||||
description: "but mostly weird."
 | 
			
		||||
category:
 | 
			
		||||
tags: [rust, c, java, python, x86]
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
I wrote a really small Rust program a while back because I was curious. I was 100% convinced it
 | 
			
		||||
couldn't possibly run:
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
fn main() {
 | 
			
		||||
    println!("{}", 8.to_string())
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
And to my complete befuddlement, it compiled, ran, and produced a completely sensible output. The
 | 
			
		||||
reason I was so surprised has to do with how Rust treats a special category of things I'm going to
 | 
			
		||||
call _primitives_. In the current version of the Rust book, you'll see them referred to as
 | 
			
		||||
[scalars][rust_scalar], and in older versions they'll be called [primitives][rust_primitive], but
 | 
			
		||||
we're going to stick with the name _primitive_ for the time being. Explaining why this program is so
 | 
			
		||||
cool requires talking about a number of other programming languages, and keeping a consistent
 | 
			
		||||
terminology makes things easier.
 | 
			
		||||
 | 
			
		||||
**You've been warned:** this is going to be a tedious post about a relatively minor issue that
 | 
			
		||||
involves Java, Python, C, and x86 Assembly. And also me pretending like I know what I'm talking
 | 
			
		||||
about with assembly.
 | 
			
		||||
 | 
			
		||||
# Defining primitives (Java)
 | 
			
		||||
 | 
			
		||||
The reason I'm using the name _primitive_ comes from how much of my life is Java right now. Spoiler
 | 
			
		||||
alert: a lot of it. And for the most part I like Java, but I digress. In Java, there's a special
 | 
			
		||||
name for some specific types of values:
 | 
			
		||||
 | 
			
		||||
> ```
 | 
			
		||||
> bool    char    byte
 | 
			
		||||
> short   int     long
 | 
			
		||||
> float   double
 | 
			
		||||
> ```
 | 
			
		||||
 | 
			
		||||
````
 | 
			
		||||
 | 
			
		||||
They are referred to as [primitives][java_primitive]. And relative to the other bits of Java,
 | 
			
		||||
they have two unique features. First, they don't have to worry about the
 | 
			
		||||
[billion-dollar mistake](https://en.wikipedia.org/wiki/Tony_Hoare#Apologies_and_retractions);
 | 
			
		||||
primitives in Java can never be `null`. Second: *they can't have instance methods*.
 | 
			
		||||
Remember that Rust program from earlier? Java has no idea what to do with it:
 | 
			
		||||
 | 
			
		||||
```java
 | 
			
		||||
class Main {
 | 
			
		||||
    public static void main(String[] args) {
 | 
			
		||||
        int x = 8;
 | 
			
		||||
        System.out.println(x.toString()); // Triggers a compiler error
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
````
 | 
			
		||||
 | 
			
		||||
The error is:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
Main.java:5: error: int cannot be dereferenced
 | 
			
		||||
        System.out.println(x.toString());
 | 
			
		||||
                            ^
 | 
			
		||||
1 error
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Specifically, Java's [`Object`](https://docs.oracle.com/javase/10/docs/api/java/lang/Object.html)
 | 
			
		||||
and things that inherit from it are pointers under the hood, and we have to dereference them before
 | 
			
		||||
the fields and methods they define can be used. In contrast, _primitive types are just values_ -
 | 
			
		||||
there's nothing to be dereferenced. In memory, they're just a sequence of bits.
 | 
			
		||||
 | 
			
		||||
If we really want, we can turn the `int` into an
 | 
			
		||||
[`Integer`](https://docs.oracle.com/javase/10/docs/api/java/lang/Integer.html) and then dereference
 | 
			
		||||
it, but it's a bit wasteful:
 | 
			
		||||
 | 
			
		||||
```java
 | 
			
		||||
class Main {
 | 
			
		||||
    public static void main(String[] args) {
 | 
			
		||||
        int x = 8;
 | 
			
		||||
        Integer y = Integer.valueOf(x);
 | 
			
		||||
        System.out.println(y.toString());
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
This creates the variable `y` of type `Integer` (which inherits `Object`), and at run time we
 | 
			
		||||
dereference `y` to locate the `toString()` function and call it. Rust obviously handles things a bit
 | 
			
		||||
differently, but we have to dig into the low-level details to see it in action.
 | 
			
		||||
 | 
			
		||||
# Low Level Handling of Primitives (C)
 | 
			
		||||
 | 
			
		||||
We first need to build a foundation for reading and understanding the assembly code the final answer
 | 
			
		||||
requires. Let's begin with showing how the `C` language (and your computer) thinks about "primitive"
 | 
			
		||||
values in memory:
 | 
			
		||||
 | 
			
		||||
```c
 | 
			
		||||
void my_function(int num) {}
 | 
			
		||||
 | 
			
		||||
int main() {
 | 
			
		||||
    int x = 8;
 | 
			
		||||
    my_function(x);
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
The [compiler explorer](https://godbolt.org/z/lgNYcc) gives us an easy way of showing off the
 | 
			
		||||
assembly-level code that's generated: <span style="font-size:.6em">whose output has been lightly
 | 
			
		||||
edited</span>
 | 
			
		||||
 | 
			
		||||
```nasm
 | 
			
		||||
main:
 | 
			
		||||
        push    rbp
 | 
			
		||||
        mov     rbp, rsp
 | 
			
		||||
        sub     rsp, 16
 | 
			
		||||
 | 
			
		||||
        ; We assign the value `8` to `x` here
 | 
			
		||||
        mov     DWORD PTR [rbp-4], 8
 | 
			
		||||
 | 
			
		||||
        ; And copy the bits making up `x` to a location
 | 
			
		||||
        ; `my_function` can access (`edi`)
 | 
			
		||||
        mov     eax, DWORD PTR [rbp-4]
 | 
			
		||||
        mov     edi, eax
 | 
			
		||||
 | 
			
		||||
        ; Call `my_function` and give it control
 | 
			
		||||
        call    my_function
 | 
			
		||||
 | 
			
		||||
        mov     eax, 0
 | 
			
		||||
        leave
 | 
			
		||||
        ret
 | 
			
		||||
 | 
			
		||||
my_function:
 | 
			
		||||
        push    rbp
 | 
			
		||||
        mov     rbp, rsp
 | 
			
		||||
 | 
			
		||||
        ; Copy the bits out of the pre-determined location (`edi`)
 | 
			
		||||
        ; to somewhere we can use
 | 
			
		||||
        mov     DWORD PTR [rbp-4], edi
 | 
			
		||||
        nop
 | 
			
		||||
 | 
			
		||||
        pop     rbp
 | 
			
		||||
        ret
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
At a really low level of memory, we're copying bits around using the [`mov`][x86_guide] instruction;
 | 
			
		||||
nothing crazy. But to show how similar Rust is, let's take a look at our program translated from C
 | 
			
		||||
to Rust:
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
fn my_function(x: i32) {}
 | 
			
		||||
 | 
			
		||||
fn main() {
 | 
			
		||||
    let x = 8;
 | 
			
		||||
    my_function(x)
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
And the assembly generated when we stick it in the
 | 
			
		||||
[compiler explorer](https://godbolt.org/z/cAlmk0): <span style="font-size:.6em">again, lightly
 | 
			
		||||
edited</span>
 | 
			
		||||
 | 
			
		||||
```nasm
 | 
			
		||||
example::main:
 | 
			
		||||
  push rax
 | 
			
		||||
 | 
			
		||||
  ; Look familiar? We're copying bits to a location for `my_function`
 | 
			
		||||
  ; The compiler just optimizes out holding `x` in memory
 | 
			
		||||
  mov edi, 8
 | 
			
		||||
 | 
			
		||||
  ; Call `my_function` and give it control
 | 
			
		||||
  call example::my_function
 | 
			
		||||
 | 
			
		||||
  pop rax
 | 
			
		||||
  ret
 | 
			
		||||
 | 
			
		||||
example::my_function:
 | 
			
		||||
  sub rsp, 4
 | 
			
		||||
 | 
			
		||||
  ; And copying those bits again, just like in C
 | 
			
		||||
  mov dword ptr [rsp], edi
 | 
			
		||||
 | 
			
		||||
  add rsp, 4
 | 
			
		||||
  ret
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
The generated Rust assembly is functionally pretty close to the C assembly: _When working with
 | 
			
		||||
primitives, we're just dealing with bits in memory_.
 | 
			
		||||
 | 
			
		||||
In Java we have to dereference a pointer to call its functions; in Rust, there's no pointer to
 | 
			
		||||
dereference. So what exactly is going on with this `.to_string()` function call?
 | 
			
		||||
 | 
			
		||||
# impl primitive (and Python)
 | 
			
		||||
 | 
			
		||||
Now it's time to <strike>reveal my trap card</strike> show the revelation that tied all this
 | 
			
		||||
together: _Rust has implementations for its primitive types._ That's right, `impl` blocks aren't
 | 
			
		||||
only for `structs` and `traits`, primitives get them too. Don't believe me? Check out
 | 
			
		||||
[u32](https://doc.rust-lang.org/std/primitive.u32.html),
 | 
			
		||||
[f64](https://doc.rust-lang.org/std/primitive.f64.html) and
 | 
			
		||||
[char](https://doc.rust-lang.org/std/primitive.char.html) as examples.
 | 
			
		||||
 | 
			
		||||
But the really interesting bit is how Rust turns those `impl` blocks into assembly. Let's break out
 | 
			
		||||
the [compiler explorer](https://godbolt.org/z/6LBEwq) once again:
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
pub fn main() {
 | 
			
		||||
    8.to_string()
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
And the interesting bits in the assembly: <span style="font-size:.6em">heavily trimmed down</span>
 | 
			
		||||
 | 
			
		||||
```nasm
 | 
			
		||||
example::main:
 | 
			
		||||
  sub rsp, 24
 | 
			
		||||
  mov rdi, rsp
 | 
			
		||||
  lea rax, [rip + .Lbyte_str.u]
 | 
			
		||||
  mov rsi, rax
 | 
			
		||||
 | 
			
		||||
  ; Cool stuff right here
 | 
			
		||||
  call <T as alloc::string::ToString>::to_string@PLT
 | 
			
		||||
 | 
			
		||||
  mov rdi, rsp
 | 
			
		||||
  call core::ptr::drop_in_place
 | 
			
		||||
  add rsp, 24
 | 
			
		||||
  ret
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Now, this assembly is a bit more complicated, but here's the big revelation: **we're calling
 | 
			
		||||
`to_string()` as a function that exists all on its own, and giving it the instance of `8`**. Instead
 | 
			
		||||
of thinking of the value 8 as an instance of `u32` and then peeking in to find the location of the
 | 
			
		||||
function we want to call (like Java), we have a function that exists outside of the instance and
 | 
			
		||||
just give that function the value `8`.
 | 
			
		||||
 | 
			
		||||
This is an incredibly technical detail, but the interesting idea I had was this: _if `to_string()`
 | 
			
		||||
is a static function, can I refer to the unbound function and give it an instance?_
 | 
			
		||||
 | 
			
		||||
Better explained in code (and a [compiler explorer](https://godbolt.org/z/fJY-gA) link because I
 | 
			
		||||
seriously love this thing):
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
struct MyVal {
 | 
			
		||||
    x: u32
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl MyVal {
 | 
			
		||||
    fn to_string(&self) -> String {
 | 
			
		||||
        self.x.to_string()
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn main() {
 | 
			
		||||
    let my_val = MyVal { x: 8 };
 | 
			
		||||
 | 
			
		||||
    // THESE ARE THE SAME
 | 
			
		||||
    my_val.to_string();
 | 
			
		||||
    MyVal::to_string(&my_val);
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Rust is totally fine "binding" the function call to the instance, and also as a static.
 | 
			
		||||
 | 
			
		||||
MIND == BLOWN.
 | 
			
		||||
 | 
			
		||||
Python does the same thing where I can both call functions bound to their instances and also call as
 | 
			
		||||
an unbound function where I give it the instance:
 | 
			
		||||
 | 
			
		||||
```python
 | 
			
		||||
class MyClass():
 | 
			
		||||
    x = 24
 | 
			
		||||
 | 
			
		||||
    def my_function(self):
 | 
			
		||||
        print(self.x)
 | 
			
		||||
 | 
			
		||||
m = MyClass()
 | 
			
		||||
 | 
			
		||||
m.my_function()
 | 
			
		||||
MyClass.my_function(m)
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
And Python tries to make you _think_ that primitives can have instance methods...
 | 
			
		||||
 | 
			
		||||
```python
 | 
			
		||||
>>> dir(8)
 | 
			
		||||
['__abs__', '__add__', '__and__', '__class__', '__cmp__', '__coerce__',
 | 
			
		||||
'__delattr__', '__div__', '__divmod__', '__doc__', '__float__', '__floordiv__',
 | 
			
		||||
...
 | 
			
		||||
'__setattr__', '__sizeof__', '__str__', '__sub__', '__subclasshook__', '__truediv__',
 | 
			
		||||
...]
 | 
			
		||||
 | 
			
		||||
>>> # Theoretically `8.__str__()` should exist, but:
 | 
			
		||||
 | 
			
		||||
>>> 8.__str__()
 | 
			
		||||
  File "<stdin>", line 1
 | 
			
		||||
    8.__str__()
 | 
			
		||||
             ^
 | 
			
		||||
SyntaxError: invalid syntax
 | 
			
		||||
 | 
			
		||||
>>> # It will run if we assign it first though:
 | 
			
		||||
>>> x = 8
 | 
			
		||||
>>> x.__str__()
 | 
			
		||||
'8'
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
...but in practice it's a bit complicated.
 | 
			
		||||
 | 
			
		||||
So while Python handles binding instance methods in a way similar to Rust, it's still not able to
 | 
			
		||||
run the example we started with.
 | 
			
		||||
 | 
			
		||||
# Conclusion
 | 
			
		||||
 | 
			
		||||
This was a super-roundabout way of demonstrating it, but the way Rust handles incredibly minor
 | 
			
		||||
details like primitives leads to really cool effects. Primitives are optimized like C in how they
 | 
			
		||||
have a space-efficient memory layout, yet the language still has a lot of features I enjoy in Python
 | 
			
		||||
(like both instance and late binding).
 | 
			
		||||
 | 
			
		||||
And when you put it together, there are areas where Rust does cool things nobody else can; as a
 | 
			
		||||
quirky feature of Rust's type system, `8.to_string()` is actually valid code.
 | 
			
		||||
 | 
			
		||||
Now go forth and fool your friends into thinking you know assembly. This is all I've got.
 | 
			
		||||
 | 
			
		||||
[x86_guide]: http://www.cs.virginia.edu/~evans/cs216/guides/x86.html
 | 
			
		||||
[java_primitive]: https://docs.oracle.com/javase/tutorial/java/nutsandbolts/datatypes.html
 | 
			
		||||
[rust_scalar]: https://doc.rust-lang.org/book/second-edition/ch03-02-data-types.html#scalar-types
 | 
			
		||||
[rust_primitive]: https://doc.rust-lang.org/book/first-edition/primitive-types.html
 | 
			
		||||
@ -1,294 +0,0 @@
 | 
			
		||||
---
 | 
			
		||||
layout: post
 | 
			
		||||
title: "Isomorphic Desktop Apps with Rust"
 | 
			
		||||
description: "Electron + WASM = ☣"
 | 
			
		||||
category:
 | 
			
		||||
tags: [rust, javascript, webassembly]
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
Forgive me, but this is going to be a bit of a schizophrenic post. I both despise Javascript and the
 | 
			
		||||
modern ECMAScript ecosystem, and I'm stunned by its success doing some really cool things. It's
 | 
			
		||||
[this duality](https://www.destroyallsoftware.com/talks/the-birth-and-death-of-javascript) that's
 | 
			
		||||
led me to a couple of (very) late nights over the past weeks trying to reconcile myself as I
 | 
			
		||||
bootstrap a simple desktop application.
 | 
			
		||||
 | 
			
		||||
See, as much as
 | 
			
		||||
[Webassembly isn't trying to replace Javascript](https://webassembly.org/docs/faq/#is-webassembly-trying-to-replace-javascript),
 | 
			
		||||
**I want Javascript gone**. There are plenty of people who don't share my views, and they are
 | 
			
		||||
probably nicer and more fun at parties. But I cringe every time "Webpack" is mentioned, and I think
 | 
			
		||||
it's hilarious that the
 | 
			
		||||
[language specification](https://ecma-international.org/publications/standards/Ecma-402.htm)
 | 
			
		||||
dramatically outpaces anyone's
 | 
			
		||||
[actual implementation](https://kangax.github.io/compat-table/es2016plus/). The answer to this
 | 
			
		||||
conundrum is of course to recompile code from newer versions of the language to older versions _of
 | 
			
		||||
the same language_ before running. At least [Babel] is a nice tongue-in-cheek reference.
 | 
			
		||||
 | 
			
		||||
Yet for as much hate as [Electron] receives, it does a stunningly good job at solving a really hard
 | 
			
		||||
problem: _how the hell do I put a button on the screen and react when the user clicks it_? GUI
 | 
			
		||||
programming is hard, straight up. But if browsers are already able to run everywhere, why don't we
 | 
			
		||||
take advantage of someone else solving the hard problems for us? I don't like that I have to use
 | 
			
		||||
Javascript for it, but I really don't feel inclined to whip out good ol' [wxWidgets].
 | 
			
		||||
 | 
			
		||||
Now there are other native solutions ([libui-rs], [conrod], [oh hey wxWdidgets again!][wxrust]), but
 | 
			
		||||
those also have their own issues with distribution, styling, etc. With Electron, I can
 | 
			
		||||
`yarn create electron-app my-app` and just get going, knowing that packaging/upgrades/etc. are built
 | 
			
		||||
in.
 | 
			
		||||
 | 
			
		||||
My question is: given recent innovations with WASM, _are we Electron yet_?
 | 
			
		||||
 | 
			
		||||
No, not really.
 | 
			
		||||
 | 
			
		||||
Instead, **what would it take to get to a point where we can skip Javascript in Electron apps?**
 | 
			
		||||
 | 
			
		||||
# Setting the Stage
 | 
			
		||||
 | 
			
		||||
Truth is, WASM/Webassembly is a pretty new technology and I'm a total beginner in this area. There
 | 
			
		||||
may already be solutions to the issues I discuss, but I'm totally unaware of them, so I'm going to
 | 
			
		||||
try and organize what I did manage to discover.
 | 
			
		||||
 | 
			
		||||
I should also mention that the content and things I'm talking about here are not intended to be
 | 
			
		||||
prescriptive, but more "if someone else is interested, what do we already know doesn't work?" _I
 | 
			
		||||
expect everything in this post to be obsolete within two months._ Even over the course of writing
 | 
			
		||||
this, [a separate blog post](https://mnt.io/2018/08/28/from-rust-to-beyond-the-asm-js-galaxy/) had
 | 
			
		||||
to be modified because [upstream changes](https://github.com/WebAssembly/binaryen/pull/1642) broke a
 | 
			
		||||
[Rust tool](https://github.com/rustwasm/wasm-bindgen/pull/787) the post tried to use. The post
 | 
			
		||||
ultimately
 | 
			
		||||
[got updated](https://mnt.io/2018/08/28/from-rust-to-beyond-the-asm-js-galaxy/#comment-477), **but
 | 
			
		||||
all this happened within the span of a week.** Things are moving quickly.
 | 
			
		||||
 | 
			
		||||
I'll also note that we're going to skip [asm.js] and [emscripten]. Truth be told, I couldn't get
 | 
			
		||||
either of these to output anything, and so I'm just going to say
 | 
			
		||||
[here be dragons.](https://en.wikipedia.org/wiki/Here_be_dragons) Everything I'm discussing here
 | 
			
		||||
uses the `wasm32-unknown-unknown` target.
 | 
			
		||||
 | 
			
		||||
The code that I _did_ get running is available
 | 
			
		||||
[over here](https://github.com/speice-io/isomorphic-rust). Feel free to use it as a starting point,
 | 
			
		||||
but I'm mostly including the link as a reference for the things that were attempted.
 | 
			
		||||
 | 
			
		||||
# An Example Running Application
 | 
			
		||||
 | 
			
		||||
So, I did _technically_ get a running application:
 | 
			
		||||
 | 
			
		||||

 | 
			
		||||
 | 
			
		||||
...which you can also try out if you want:
 | 
			
		||||
 | 
			
		||||
```sh
 | 
			
		||||
git clone https://github.com/speice-io/isomorphic-rust.git
 | 
			
		||||
cd isomorphic_rust/percy
 | 
			
		||||
yarn install && yarn start
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
...but I wouldn't really call it a "high quality" starting point to base future work on. It's mostly
 | 
			
		||||
there to prove this is possible in the first place. And that's something to be proud of! There's a
 | 
			
		||||
huge amount of engineering that went into showing a window with the text "It's alive!".
 | 
			
		||||
 | 
			
		||||
There's also a lot of usability issues that prevent me from recommending anyone try Electron and
 | 
			
		||||
WASM apps at the moment, and I think that's the more important thing to discuss.
 | 
			
		||||
 | 
			
		||||
# Issue the First: Complicated Toolchains
 | 
			
		||||
 | 
			
		||||
I quickly established that [wasm-bindgen] was necessary to "link" my Rust code to Javascript. At
 | 
			
		||||
that point you've got an Electron app that starts an HTML page which ultimately fetches your WASM
 | 
			
		||||
blob. To keep things simple, the goal was to package everything using [webpack] so that I could just
 | 
			
		||||
load a `bundle.js` file on the page. That decision was to be the last thing that kinda worked in
 | 
			
		||||
this process.
 | 
			
		||||
 | 
			
		||||
The first issue
 | 
			
		||||
[I ran into](https://www.reddit.com/r/rust/comments/98lpun/unable_to_load_wasm_for_electron_application/)
 | 
			
		||||
while attempting to bundle everything via `webpack` is a detail in the WASM spec:
 | 
			
		||||
 | 
			
		||||
> This function accepts a Response object, or a promise for one, and ... **[if > it] does not match
 | 
			
		||||
> the `application/wasm` MIME type**, the returned promise will be rejected with a TypeError;
 | 
			
		||||
>
 | 
			
		||||
> [WebAssembly - Additional Web Embedding API](https://webassembly.org/docs/web/#additional-web-embedding-api)
 | 
			
		||||
 | 
			
		||||
Specifically, if you try and load a WASM blob without the MIME type set, you'll get an error. On the
 | 
			
		||||
web this isn't a huge issue, as the server can set MIME types when delivering the blob. With
 | 
			
		||||
Electron, you're resolving things with a `file://` URL and thus can't control the MIME type:
 | 
			
		||||
 | 
			
		||||

 | 
			
		||||
 | 
			
		||||
There are a couple of solutions depending on how far into the deep end you care to venture:
 | 
			
		||||
 | 
			
		||||
- Embed a static file server in your Electron application
 | 
			
		||||
- Use a [custom protocol](https://electronjs.org/docs/api/protocol) and custom protocol handler
 | 
			
		||||
- Host your WASM blob on a website that you resolve at runtime
 | 
			
		||||
 | 
			
		||||
But all these are pretty bad solutions and defeat the purpose of using WASM in the first place.
 | 
			
		||||
Instead, my workaround was to
 | 
			
		||||
[open a PR with `webpack`](https://github.com/webpack/webpack/issues/7918) and use regex to remove
 | 
			
		||||
calls to `instantiateStreaming` in the
 | 
			
		||||
[build script](https://github.com/speice-io/isomorphic-rust/blob/master/percy/build.sh#L21-L25):
 | 
			
		||||
 | 
			
		||||
```sh
 | 
			
		||||
cargo +nightly build --target=wasm32-unknown-unknown && \
 | 
			
		||||
    wasm-bindgen "$WASM_DIR/debug/$WASM_NAME.wasm" --out-dir "$APP_DIR" --no-typescript && \
 | 
			
		||||
    # Have to use --mode=development so we can patch out the call to instantiateStreaming
 | 
			
		||||
    "$DIR/node_modules/webpack-cli/bin/cli.js" --mode=development "$APP_DIR/app_loader.js" -o "$APP_DIR/bundle.js" && \
 | 
			
		||||
    sed -i 's/.*instantiateStreaming.*//g' "$APP_DIR/bundle.js"
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Once that lands, the
 | 
			
		||||
[build process](https://github.com/speice-io/isomorphic-rust/blob/master/percy_patched_webpack/build.sh#L24-L27)
 | 
			
		||||
becomes much simpler:
 | 
			
		||||
 | 
			
		||||
```sh
 | 
			
		||||
 | 
			
		||||
cargo +nightly build --target=wasm32-unknown-unknown && \
 | 
			
		||||
    wasm-bindgen "$WASM_DIR/debug/$WASM_NAME.wasm" --out-dir "$APP_DIR" --no-typescript && \
 | 
			
		||||
    "$DIR/node_modules/webpack-cli/bin/cli.js" --mode=production "$APP_DIR/app_loader.js" -o "$APP_DIR/bundle.js"
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
But we're not done yet! After we compile Rust into WASM and link WASM to Javascript (via
 | 
			
		||||
`wasm-bindgen` and `webpack`), we still have to make an Electron app. For this purpose I used a
 | 
			
		||||
starter app from [Electron Forge], and then a
 | 
			
		||||
[`prestart` script](https://github.com/speice-io/isomorphic-rust/blob/master/percy/package.json#L8)
 | 
			
		||||
to actually handle starting the application.
 | 
			
		||||
 | 
			
		||||
The
 | 
			
		||||
[final toolchain](https://github.com/speice-io/isomorphic-rust/blob/master/percy/package.json#L8)
 | 
			
		||||
looks something like this:
 | 
			
		||||
 | 
			
		||||
- `yarn start` triggers the `prestart` script
 | 
			
		||||
- `prestart` checks for missing tools (`wasm-bindgen-cli`, etc.) and then:
 | 
			
		||||
  - Uses `cargo` to compile the Rust code into WASM
 | 
			
		||||
  - Uses `wasm-bindgen` to link the WASM blob into a Javascript file with exported symbols
 | 
			
		||||
  - Uses `webpack` to bundle the page start script with the Javascript we just generated
 | 
			
		||||
    - Uses `babel` under the hood to compile the `wasm-bindgen` code down from ES6 into something
 | 
			
		||||
      browser-compatible
 | 
			
		||||
- The `start` script runs an Electron Forge handler to do some sanity checks
 | 
			
		||||
- Electron actually starts
 | 
			
		||||
 | 
			
		||||
...which is complicated. I think more work needs to be done to either build a high-quality starter
 | 
			
		||||
app that can manage these steps, or another tool that "just handles" the complexity of linking a
 | 
			
		||||
compiled WASM file into something the Electron browser can run.
 | 
			
		||||
 | 
			
		||||
# Issue the Second: WASM tools in Rust
 | 
			
		||||
 | 
			
		||||
For as much as I didn't enjoy the Javascript tooling needed to interface with Rust, the Rust-only
 | 
			
		||||
bits aren't any better at the moment. I get it, a lot of projects are just starting off, and that
 | 
			
		||||
leads to a fragmented ecosystem. Here's what I can recommend as a starting point:
 | 
			
		||||
 | 
			
		||||
Don't check in your `Cargo.lock` files to version control. If there's a disagreement between the
 | 
			
		||||
version of `wasm-bindgen-cli` you have installed and the `wasm-bindgen` you're compiling with in
 | 
			
		||||
`Cargo.lock`, you get a nasty error:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
it looks like the Rust project used to create this wasm file was linked against
 | 
			
		||||
a different version of wasm-bindgen than this binary:
 | 
			
		||||
 | 
			
		||||
rust wasm file: 0.2.21
 | 
			
		||||
    this binary: 0.2.17
 | 
			
		||||
 | 
			
		||||
Currently the bindgen format is unstable enough that these two version must
 | 
			
		||||
exactly match, so it's required that these two version are kept in sync by
 | 
			
		||||
either updating the wasm-bindgen dependency or this binary.
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Not that I ever managed to run into this myself (_coughs nervously_).
 | 
			
		||||
 | 
			
		||||
There are two projects attempting to be "application frameworks": [percy] and [yew]. Between those,
 | 
			
		||||
I managed to get [two](https://github.com/speice-io/isomorphic-rust/tree/master/percy)
 | 
			
		||||
[examples](https://github.com/speice-io/isomorphic-rust/tree/master/percy_patched_webpack) running
 | 
			
		||||
using `percy`, but was unable to get an
 | 
			
		||||
[example](https://github.com/speice-io/isomorphic-rust/tree/master/yew) running with `yew` because
 | 
			
		||||
of issues with "missing modules" during the `webpack` step:
 | 
			
		||||
 | 
			
		||||
```sh
 | 
			
		||||
ERROR in ./dist/electron_yew_wasm_bg.wasm
 | 
			
		||||
Module not found: Error: Can't resolve 'env' in '/home/bspeice/Development/isomorphic_rust/yew/dist'
 | 
			
		||||
 @ ./dist/electron_yew_wasm_bg.wasm
 | 
			
		||||
 @ ./dist/electron_yew_wasm.js
 | 
			
		||||
 @ ./dist/app.js
 | 
			
		||||
 @ ./dist/app_loader.js
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
If you want to work with the browser APIs directly, your choices are [percy-webapis] or [stdweb] (or
 | 
			
		||||
eventually [web-sys]). See above for my `percy` examples, but when I tried
 | 
			
		||||
[an example with `stdweb`](https://github.com/speice-io/isomorphic-rust/tree/master/stdweb), I was
 | 
			
		||||
unable to get it running:
 | 
			
		||||
 | 
			
		||||
```sh
 | 
			
		||||
ERROR in ./dist/stdweb_electron_bg.wasm
 | 
			
		||||
Module not found: Error: Can't resolve 'env' in '/home/bspeice/Development/isomorphic_rust/stdweb/dist'
 | 
			
		||||
 @ ./dist/stdweb_electron_bg.wasm
 | 
			
		||||
 @ ./dist/stdweb_electron.js
 | 
			
		||||
 @ ./dist/app_loader.js
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
At this point I'm pretty convinced that `stdweb` is causing issues for `yew` as well, but can't
 | 
			
		||||
prove it.
 | 
			
		||||
 | 
			
		||||
I did also get a [minimal example](https://github.com/speice-io/isomorphic-rust/tree/master/minimal)
 | 
			
		||||
running that doesn't depend on any tools besides `wasm-bindgen`. However, it requires manually
 | 
			
		||||
writing "`extern C`" blocks for everything you need from the browser. Es no bueno.
 | 
			
		||||
 | 
			
		||||
Finally, from a tools and platform view, there are two up-and-coming packages that should be
 | 
			
		||||
mentioned: [js-sys] and [web-sys]. Their purpose is to be fundamental building blocks that exposes
 | 
			
		||||
the browser's APIs to Rust. If you're interested in building an app framework from scratch, these
 | 
			
		||||
should give you the most flexibility. I didn't touch either in my research, though I expect them to
 | 
			
		||||
be essential long-term.
 | 
			
		||||
 | 
			
		||||
So there's a lot in play from the Rust side of things, and it's just going to take some time to
 | 
			
		||||
figure out what works and what doesn't.
 | 
			
		||||
 | 
			
		||||
# Issue the Third: Known Unknowns
 | 
			
		||||
 | 
			
		||||
Alright, so after I managed to get an application started, I stopped there. It was a good deal of
 | 
			
		||||
effort to chain together even a proof of concept, and at this point I'd rather learn [Typescript]
 | 
			
		||||
than keep trying to maintain an incredibly brittle pipeline. Blasphemy, I know...
 | 
			
		||||
 | 
			
		||||
The important point I want to make is that there's a lot unknown about how any of this holds up
 | 
			
		||||
outside proofs of concept. Things I didn't attempt:
 | 
			
		||||
 | 
			
		||||
- Testing
 | 
			
		||||
- Packaging
 | 
			
		||||
- Updates
 | 
			
		||||
- Literally anything related to why I wanted to use Electron in the first place
 | 
			
		||||
 | 
			
		||||
# What it Would Take
 | 
			
		||||
 | 
			
		||||
Much as I don't like Javascript, the tools are too shaky for me to recommend mixing Electron and
 | 
			
		||||
WASM at the moment. There's a lot of innovation happening, so who knows? Someone might have an
 | 
			
		||||
application in production a couple months from now. But at the moment, I'm personally going to stay
 | 
			
		||||
away.
 | 
			
		||||
 | 
			
		||||
Let's finish with a wishlist then - here are the things that I think need to happen before
 | 
			
		||||
Electron/WASM/Rust can become a thing:
 | 
			
		||||
 | 
			
		||||
- Webpack still needs some updates. The necessary work is in progress, but hasn't landed yet
 | 
			
		||||
  ([#7983](https://github.com/webpack/webpack/pull/7983))
 | 
			
		||||
- Browser API libraries (`web-sys` and `stdweb`) need to make sure they can support running in
 | 
			
		||||
  Electron (see module error above)
 | 
			
		||||
- Projects need to stabilize. There's talk of `stdweb` being turned into a Rust API
 | 
			
		||||
  [on top of web-sys](https://github.com/rustwasm/team/issues/226#issuecomment-418475778), and percy
 | 
			
		||||
  [moving to web-sys](https://github.com/chinedufn/percy/issues/24), both of which are big changes
 | 
			
		||||
- `wasm-bindgen` is great, but still in the "move fast and break things" phase
 | 
			
		||||
- A good "boilerplate" app would dramatically simplify the start-up costs;
 | 
			
		||||
  [electron-react-boilerplate](https://github.com/chentsulin/electron-react-boilerplate) comes to
 | 
			
		||||
  mind as a good project to imitate
 | 
			
		||||
- More blog posts/contributors! I think Electron + Rust could be cool, but I have no idea what I'm
 | 
			
		||||
  doing
 | 
			
		||||
 | 
			
		||||
[wxwidgets]: https://wxwidgets.org/
 | 
			
		||||
[libui-rs]: https://github.com/LeoTindall/libui-rs/
 | 
			
		||||
[electron]: https://electronjs.org/
 | 
			
		||||
[babel]: https://babeljs.io/
 | 
			
		||||
[wxrust]: https://github.com/kenz-gelsoft/wxRust
 | 
			
		||||
[wasm-bindgen]: https://github.com/rustwasm/wasm-bindgen
 | 
			
		||||
[js-sys]: https://crates.io/crates/js-sys
 | 
			
		||||
[percy-webapis]: https://crates.io/crates/percy-webapis
 | 
			
		||||
[stdweb]: https://crates.io/crates/stdweb
 | 
			
		||||
[web-sys]: https://crates.io/crates/web-sys
 | 
			
		||||
[percy]: https://chinedufn.github.io/percy/
 | 
			
		||||
[virtual-dom-rs]: https://crates.io/crates/virtual-dom-rs
 | 
			
		||||
[yew]: https://github.com/DenisKolodin/yew
 | 
			
		||||
[react]: https://reactjs.org/
 | 
			
		||||
[elm]: http://elm-lang.org/
 | 
			
		||||
[asm.js]: http://asmjs.org/
 | 
			
		||||
[emscripten]: https://kripken.github.io/emscripten-site/
 | 
			
		||||
[typescript]: https://www.typescriptlang.org/
 | 
			
		||||
[electron forge]: https://electronforge.io/
 | 
			
		||||
[conrod]: https://github.com/PistonDevelopers/conrod
 | 
			
		||||
[webpack]: https://webpack.js.org/
 | 
			
		||||
@ -1,168 +0,0 @@
 | 
			
		||||
---
 | 
			
		||||
layout: post
 | 
			
		||||
title: "A Case Study in Heaptrack"
 | 
			
		||||
description: "...because you don't need no garbage collection"
 | 
			
		||||
category:
 | 
			
		||||
tags: []
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
One of my earliest conversations about programming went like this:
 | 
			
		||||
 | 
			
		||||
> Programmers have it too easy these days. They should learn to develop in low memory environments
 | 
			
		||||
> and be more efficient.
 | 
			
		||||
>
 | 
			
		||||
> -- My Father (paraphrased)
 | 
			
		||||
 | 
			
		||||
...though it's not like the first code I wrote was for a
 | 
			
		||||
[graphing calculator](https://education.ti.com/en/products/calculators/graphing-calculators/ti-84-plus-se)
 | 
			
		||||
packing a whole 24KB of RAM. By the way, _what are you doing on my lawn?_
 | 
			
		||||
 | 
			
		||||
The principle remains though: be efficient with the resources you have, because
 | 
			
		||||
[what Intel giveth, Microsoft taketh away](http://exo-blog.blogspot.com/2007/09/what-intel-giveth-microsoft-taketh-away.html).
 | 
			
		||||
My professional work is focused on this kind of efficiency; low-latency financial markets demand
 | 
			
		||||
that you understand at a deep level _exactly_ what your code is doing. As I continue experimenting
 | 
			
		||||
with Rust for personal projects, it's exciting to bring a utilitarian mindset with me: there's
 | 
			
		||||
flexibility for the times I pretend to have a garbage collector, and flexibility for the times that
 | 
			
		||||
I really care about how memory is used.
 | 
			
		||||
 | 
			
		||||
This post is a (small) case study in how I went from the former to the latter. And ultimately, it's
 | 
			
		||||
intended to be a starting toolkit to empower analysis of your own code.
 | 
			
		||||
 | 
			
		||||
# Curiosity
 | 
			
		||||
 | 
			
		||||
When I first started building the [dtparse] crate, my intention was to mirror as closely as possible
 | 
			
		||||
the equivalent [Python library][dateutil]. Python, as you may know, is garbage collected. Very
 | 
			
		||||
rarely is memory usage considered in Python, and I likewise wasn't paying too much attention when
 | 
			
		||||
`dtparse` was first being built.
 | 
			
		||||
 | 
			
		||||
This lackadaisical approach to memory works well enough, and I'm not planning on making `dtparse`
 | 
			
		||||
hyper-efficient. But every so often, I've wondered: "what exactly is going on in memory?" With the
 | 
			
		||||
advent of Rust 1.28 and the
 | 
			
		||||
[Global Allocator trait](https://doc.rust-lang.org/std/alloc/trait.GlobalAlloc.html), I had a really
 | 
			
		||||
great idea: _build a custom allocator that allows you to track your own allocations._ That way, you
 | 
			
		||||
can do things like writing tests for both correct results and correct memory usage. I gave it a
 | 
			
		||||
[shot][qadapt], but learned very quickly: **never write your own allocator**. It went from "fun
 | 
			
		||||
weekend project" to "I have literally no idea what my computer is doing" at breakneck speed.
 | 
			
		||||
 | 
			
		||||
Instead, I'll highlight a separate path I took to make sense of my memory usage: [heaptrack].
 | 
			
		||||
 | 
			
		||||
# Turning on the System Allocator
 | 
			
		||||
 | 
			
		||||
This is the hardest part of the post. Because Rust uses
 | 
			
		||||
[its own allocator](https://github.com/rust-lang/rust/pull/27400#issue-41256384) by default,
 | 
			
		||||
`heaptrack` is unable to properly record unmodified Rust code. To remedy this, we'll make use of the
 | 
			
		||||
`#[global_allocator]` attribute.
 | 
			
		||||
 | 
			
		||||
Specifically, in `lib.rs` or `main.rs`, add this:
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
use std::alloc::System;
 | 
			
		||||
 | 
			
		||||
#[global_allocator]
 | 
			
		||||
static GLOBAL: System = System;
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
...and that's it. Everything else comes essentially for free.
 | 
			
		||||
 | 
			
		||||
# Running heaptrack
 | 
			
		||||
 | 
			
		||||
Assuming you've installed heaptrack <span style="font-size: .6em;">(Homebrew in Mac, package manager
 | 
			
		||||
in Linux, ??? in Windows)</span>, all that's left is to fire up your application:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
heaptrack my_application
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
It's that easy. After the program finishes, you'll see a file in your local directory with a name
 | 
			
		||||
like `heaptrack.my_appplication.XXXX.gz`. If you load that up in `heaptrack_gui`, you'll see
 | 
			
		||||
something like this:
 | 
			
		||||
 | 
			
		||||

 | 
			
		||||
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
And even these pretty colors:
 | 
			
		||||
 | 
			
		||||

 | 
			
		||||
 | 
			
		||||
# Reading Flamegraphs
 | 
			
		||||
 | 
			
		||||
To make sense of our memory usage, we're going to focus on that last picture - it's called a
 | 
			
		||||
["flamegraph"](http://www.brendangregg.com/flamegraphs.html). These charts are typically used to
 | 
			
		||||
show how much time your program spends executing each function, but they're used here to show how
 | 
			
		||||
much memory was allocated during those functions instead.
 | 
			
		||||
 | 
			
		||||
For example, we can see that all executions happened during the `main` function:
 | 
			
		||||
 | 
			
		||||

 | 
			
		||||
 | 
			
		||||
...and within that, all allocations happened during `dtparse::parse`:
 | 
			
		||||
 | 
			
		||||

 | 
			
		||||
 | 
			
		||||
...and within _that_, allocations happened in two different places:
 | 
			
		||||
 | 
			
		||||

 | 
			
		||||
 | 
			
		||||
Now I apologize that it's hard to see, but there's one area specifically that stuck out as an issue:
 | 
			
		||||
**what the heck is the `Default` thing doing?**
 | 
			
		||||
 | 
			
		||||

 | 
			
		||||
 | 
			
		||||
# Optimizing dtparse
 | 
			
		||||
 | 
			
		||||
See, I knew that there were some allocations during calls to `dtparse::parse`, but I was totally
 | 
			
		||||
wrong about where the bulk of allocations occurred in my program. Let me post the code and see if
 | 
			
		||||
you can spot the mistake:
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
/// Main entry point for using `dtparse`.
 | 
			
		||||
pub fn parse(timestr: &str) -> ParseResult<(NaiveDateTime, Option<FixedOffset>)> {
 | 
			
		||||
    let res = Parser::default().parse(
 | 
			
		||||
        timestr, None, None, false, false,
 | 
			
		||||
        None, false,
 | 
			
		||||
        &HashMap::new(),
 | 
			
		||||
    )?;
 | 
			
		||||
 | 
			
		||||
    Ok((res.0, res.1))
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
> [dtparse](https://github.com/bspeice/dtparse/blob/4d7c5dd99572823fa4a390b483c38ab020a2172f/src/lib.rs#L1286)
 | 
			
		||||
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
Because `Parser::parse` requires a mutable reference to itself, I have to create a new
 | 
			
		||||
`Parser::default` every time it receives a string. This is excessive! We'd rather have an immutable
 | 
			
		||||
parser that can be re-used, and avoid allocating memory in the first place.
 | 
			
		||||
 | 
			
		||||
Armed with that information, I put some time in to
 | 
			
		||||
[make the parser immutable](https://github.com/bspeice/dtparse/commit/741afa34517d6bc1155713bbc5d66905fea13fad#diff-b4aea3e418ccdb71239b96952d9cddb6).
 | 
			
		||||
Now that I can re-use the same parser over and over, the allocations disappear:
 | 
			
		||||
 | 
			
		||||

 | 
			
		||||
 | 
			
		||||
In total, we went from requiring 2 MB of memory in
 | 
			
		||||
[version 1.0.2](https://crates.io/crates/dtparse/1.0.2):
 | 
			
		||||
 | 
			
		||||

 | 
			
		||||
 | 
			
		||||
All the way down to 300KB in [version 1.0.3](https://crates.io/crates/dtparse/1.0.3):
 | 
			
		||||
 | 
			
		||||

 | 
			
		||||
 | 
			
		||||
# Conclusion
 | 
			
		||||
 | 
			
		||||
In the end, you don't need to write a custom allocator to be efficient with memory, great tools
 | 
			
		||||
already exist to help you understand what your program is doing.
 | 
			
		||||
 | 
			
		||||
**Use them.**
 | 
			
		||||
 | 
			
		||||
Given that [Moore's Law](https://en.wikipedia.org/wiki/Moore%27s_law) is
 | 
			
		||||
[dead](https://www.technologyreview.com/s/601441/moores-law-is-dead-now-what/), we've all got to do
 | 
			
		||||
our part to take back what Microsoft stole.
 | 
			
		||||
 | 
			
		||||
[dtparse]: https://crates.io/crates/dtparse
 | 
			
		||||
[dateutil]: https://github.com/dateutil/dateutil
 | 
			
		||||
[heaptrack]: https://github.com/KDE/heaptrack
 | 
			
		||||
[qadapt]: https://crates.io/crates/qadapt
 | 
			
		||||
@ -1,34 +0,0 @@
 | 
			
		||||
---
 | 
			
		||||
layout: post
 | 
			
		||||
title: 'More "What Companies Really Mean"'
 | 
			
		||||
description: 'when they ask "Why should we hire you?"'
 | 
			
		||||
category:
 | 
			
		||||
tags: []
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
I recently stumbled across a phenomenal small article entitled
 | 
			
		||||
[What Startups Really Mean By "Why Should We Hire You?"](https://angel.co/blog/what-startups-really-mean-by-why-should-we-hire-you).
 | 
			
		||||
Having been interviewed by smaller companies (though not exactly startups), the questions and
 | 
			
		||||
subtexts are the same. There's often a question behind the question that you're actually trying to
 | 
			
		||||
answer, and I wish I spotted the nuance earlier in my career.
 | 
			
		||||
 | 
			
		||||
Let me also make note of one more question/euphemism I've come across:
 | 
			
		||||
 | 
			
		||||
# How do you feel about Production Support?
 | 
			
		||||
 | 
			
		||||
**Translation**: _We're a fairly small team, and when things break on an evening/weekend/Christmas
 | 
			
		||||
Day, can we call on you to be there?_
 | 
			
		||||
 | 
			
		||||
I've met decidedly few people in my life who truly enjoy the "ops" side of "devops". They're
 | 
			
		||||
incredibly good at taking an impossible problem, pre-existing knowledge of arcane arts, and turning
 | 
			
		||||
that into a functioning system at the end. And if they all left for lunch, we probably wouldn't make
 | 
			
		||||
it out the door before the zombie apocalypse.
 | 
			
		||||
 | 
			
		||||
Larger organizations (in my experience, 500+ person organizations) have the luxury of hiring people
 | 
			
		||||
who either enjoy that, or play along nicely enough that our systems keep working.
 | 
			
		||||
 | 
			
		||||
Small teams have no such luck. If you're interviewing at a small company, especially as a "data
 | 
			
		||||
scientist" or other somesuch position, be aware that systems can and do spontaneously combust at the
 | 
			
		||||
most inopportune moments.
 | 
			
		||||
 | 
			
		||||
**Terrible-but-popular answers include**: _It's a part of the job, and I'm happy to contribute._
 | 
			
		||||
@ -1,218 +0,0 @@
 | 
			
		||||
---
 | 
			
		||||
layout: post
 | 
			
		||||
title: "QADAPT - debug_assert! for your memory usage"
 | 
			
		||||
description: "...and why you want an allocator that goes 💥."
 | 
			
		||||
category:
 | 
			
		||||
tags: []
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
I think it's part of the human condition to ignore perfectly good advice when it comes our way. A
 | 
			
		||||
bit over a month ago, I was dispensing sage wisdom for the ages:
 | 
			
		||||
 | 
			
		||||
> I had a really great idea: build a custom allocator that allows you to track your own allocations.
 | 
			
		||||
> I gave it a shot, but learned very quickly: **never write your own allocator.**
 | 
			
		||||
>
 | 
			
		||||
> -- [me](/2018/10/case-study-optimization.html)
 | 
			
		||||
 | 
			
		||||
I proceeded to ignore it, because we never really learn from our mistakes.
 | 
			
		||||
 | 
			
		||||
There's another part of the human condition that derives joy from seeing things explode.
 | 
			
		||||
 | 
			
		||||
<iframe src="https://giphy.com/embed/YA6dmVW0gfIw8" width="480" height="336" frameBorder="0"></iframe>
 | 
			
		||||
 | 
			
		||||
And _that's_ the part I'm going to focus on.
 | 
			
		||||
 | 
			
		||||
# Why an Allocator?
 | 
			
		||||
 | 
			
		||||
So why, after complaining about allocators, would I still want to write one? There are three reasons
 | 
			
		||||
for that:
 | 
			
		||||
 | 
			
		||||
1. Allocation/dropping is slow
 | 
			
		||||
2. It's difficult to know exactly when Rust will allocate or drop, especially when using code that
 | 
			
		||||
   you did not write
 | 
			
		||||
3. I want automated tools to verify behavior, instead of inspecting by hand
 | 
			
		||||
 | 
			
		||||
When I say "slow," it's important to define the terms. If you're writing web applications, you'll
 | 
			
		||||
spend orders of magnitude more time waiting for the database than you will the allocator. However,
 | 
			
		||||
there's still plenty of code where micro- or nano-seconds matter; think
 | 
			
		||||
[finance](https://www.youtube.com/watch?v=NH1Tta7purM),
 | 
			
		||||
[real-time audio](https://www.reddit.com/r/rust/comments/9hg7yj/synthesizer_progress_update/e6c291f),
 | 
			
		||||
[self-driving cars](https://polysync.io/blog/session-types-for-hearty-codecs/), and
 | 
			
		||||
[networking](https://carllerche.github.io/bytes/bytes/index.html). In these situations it's simply
 | 
			
		||||
unacceptable for you to spend time doing things that are not your program, and waiting on the
 | 
			
		||||
allocator is not cool.
 | 
			
		||||
 | 
			
		||||
As I continue to learn Rust, it's difficult for me to predict where exactly allocations will happen.
 | 
			
		||||
So, I propose we play a quick trivia game: **Does this code invoke the allocator?**
 | 
			
		||||
 | 
			
		||||
## Example 1
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
fn my_function() {
 | 
			
		||||
    let v: Vec<u8> = Vec::new();
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
**No**: Rust [knows how big](https://doc.rust-lang.org/std/mem/fn.size_of.html) the `Vec` type is,
 | 
			
		||||
and reserves a fixed amount of memory on the stack for the `v` vector. However, if we wanted to
 | 
			
		||||
reserve extra space (using `Vec::with_capacity`) the allocator would get invoked.
 | 
			
		||||
 | 
			
		||||
## Example 2
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
fn my_function() {
 | 
			
		||||
    let v: Box<Vec<u8>> = Box::new(Vec::new());
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
**Yes**: Because Boxes allow us to work with things that are of unknown size, it has to allocate on
 | 
			
		||||
the heap. While the `Box` is unnecessary in this snippet (release builds will optimize out the
 | 
			
		||||
allocation), reserving heap space more generally is needed to pass a dynamically sized type to
 | 
			
		||||
another function.
 | 
			
		||||
 | 
			
		||||
## Example 3
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
fn my_function(v: Vec<u8>) {
 | 
			
		||||
    v.push(5);
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
**Maybe**: Depending on whether the Vector we were given has space available, we may or may not
 | 
			
		||||
allocate. Especially when dealing with code that you did not author, it's difficult to verify that
 | 
			
		||||
things behave as you expect them to.
 | 
			
		||||
 | 
			
		||||
# Blowing Things Up
 | 
			
		||||
 | 
			
		||||
So, how exactly does QADAPT solve these problems? **Whenever an allocation or drop occurs in code
 | 
			
		||||
marked allocation-safe, QADAPT triggers a thread panic.** We don't want to let the program continue
 | 
			
		||||
as if nothing strange happened, _we want things to explode_.
 | 
			
		||||
 | 
			
		||||
However, you don't want code to panic in production because of circumstances you didn't predict.
 | 
			
		||||
Just like [`debug_assert!`](https://doc.rust-lang.org/std/macro.debug_assert.html), **QADAPT will
 | 
			
		||||
strip out its own code when building in release mode to guarantee no panics and no performance
 | 
			
		||||
impact.**
 | 
			
		||||
 | 
			
		||||
Finally, there are three ways to have QADAPT check that your code will not invoke the allocator:
 | 
			
		||||
 | 
			
		||||
## Using a procedural macro
 | 
			
		||||
 | 
			
		||||
The easiest method, watch an entire function for allocator invocation:
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
use qadapt::no_alloc;
 | 
			
		||||
use qadapt::QADAPT;
 | 
			
		||||
 | 
			
		||||
#[global_allocator]
 | 
			
		||||
static Q: QADAPT = QADAPT;
 | 
			
		||||
 | 
			
		||||
#[no_alloc]
 | 
			
		||||
fn push_vec(v: &mut Vec<u8>) {
 | 
			
		||||
    // This triggers a panic if v.len() == v.capacity()
 | 
			
		||||
    v.push(5);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn main() {
 | 
			
		||||
    let v = Vec::with_capacity(1);
 | 
			
		||||
 | 
			
		||||
    // This will *not* trigger a panic
 | 
			
		||||
    push_vec(&v);
 | 
			
		||||
 | 
			
		||||
    // This *will* trigger a panic
 | 
			
		||||
    push_vec(&v);
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
## Using a regular macro
 | 
			
		||||
 | 
			
		||||
For times when you need more precision:
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
use qadapt::assert_no_alloc;
 | 
			
		||||
use qadapt::QADAPT;
 | 
			
		||||
 | 
			
		||||
#[global_allocator]
 | 
			
		||||
static Q: QADAPT = QADAPT;
 | 
			
		||||
 | 
			
		||||
fn main() {
 | 
			
		||||
    let v = Vec::with_capacity(1);
 | 
			
		||||
 | 
			
		||||
    // No allocations here, we already have space reserved
 | 
			
		||||
    assert_no_alloc!(v.push(5));
 | 
			
		||||
 | 
			
		||||
    // Even though we remove an item, it doesn't trigger a drop
 | 
			
		||||
    // because it's a scalar. If it were a `Box<_>` type,
 | 
			
		||||
    // a drop would trigger.
 | 
			
		||||
    assert_no_alloc!({
 | 
			
		||||
        v.pop().unwrap();
 | 
			
		||||
    });
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
## Using function calls
 | 
			
		||||
 | 
			
		||||
Both the most precise and most tedious:
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
use qadapt::enter_protected;
 | 
			
		||||
use qadapt::exit_protected;
 | 
			
		||||
use qadapt::QADAPT;
 | 
			
		||||
 | 
			
		||||
#[global_allocator]
 | 
			
		||||
static Q: QADAPT = QADAPT;
 | 
			
		||||
 | 
			
		||||
fn main() {
 | 
			
		||||
    // This triggers an allocation (on non-release builds)
 | 
			
		||||
    let v = Vec::with_capacity(1);
 | 
			
		||||
 | 
			
		||||
    enter_protected();
 | 
			
		||||
    // This does not trigger an allocation because we've reserved size
 | 
			
		||||
    v.push(0);
 | 
			
		||||
    exit_protected();
 | 
			
		||||
 | 
			
		||||
    // This triggers an allocation because we ran out of size,
 | 
			
		||||
    // but doesn't panic because we're no longer protected.
 | 
			
		||||
    v.push(1);
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
## Caveats
 | 
			
		||||
 | 
			
		||||
It's important to point out that QADAPT code is synchronous, so please be careful when mixing in
 | 
			
		||||
asynchronous functions:
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
use futures::future::Future;
 | 
			
		||||
use futures::future::ok;
 | 
			
		||||
 | 
			
		||||
#[no_alloc]
 | 
			
		||||
fn async_capacity() -> impl Future<Item=Vec<u8>, Error=()> {
 | 
			
		||||
    ok(12).and_then(|e| Ok(Vec::with_capacity(e)))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn main() {
 | 
			
		||||
    // This doesn't trigger a panic because the `and_then` closure
 | 
			
		||||
    // wasn't run during the function call.
 | 
			
		||||
    async_capacity();
 | 
			
		||||
 | 
			
		||||
    // Still no panic
 | 
			
		||||
    assert_no_alloc!(async_capacity());
 | 
			
		||||
 | 
			
		||||
    // This will panic because the allocation happens during `unwrap`
 | 
			
		||||
    // in the `assert_no_alloc!` macro
 | 
			
		||||
    assert_no_alloc!(async_capacity().poll().unwrap());
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
# Conclusion
 | 
			
		||||
 | 
			
		||||
While there's a lot more to writing high-performance code than managing your usage of the allocator,
 | 
			
		||||
it's critical that you do use the allocator correctly. QADAPT will verify that your code is doing
 | 
			
		||||
what you expect. It's usable even on stable Rust from version 1.31 onward, which isn't the case for
 | 
			
		||||
most allocators. Version 1.0 was released today, and you can check it out over at
 | 
			
		||||
[crates.io](https://crates.io/crates/qadapt) or on [github](https://github.com/bspeice/qadapt).
 | 
			
		||||
 | 
			
		||||
I'm hoping to write more about high-performance Rust in the future, and I expect that QADAPT will
 | 
			
		||||
help guide that. If there are topics you're interested in, let me know in the comments below!
 | 
			
		||||
 | 
			
		||||
[qadapt]: https://crates.io/crates/qadapt
 | 
			
		||||
@ -1,113 +0,0 @@
 | 
			
		||||
---
 | 
			
		||||
layout: post
 | 
			
		||||
title: "Allocations in Rust"
 | 
			
		||||
description: "An introduction to the memory model."
 | 
			
		||||
category:
 | 
			
		||||
tags: [rust, understanding-allocations]
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
There's an alchemy of distilling complex technical topics into articles and videos that change the
 | 
			
		||||
way programmers see the tools they interact with on a regular basis. I knew what a linker was, but
 | 
			
		||||
there's a staggering amount of complexity in between
 | 
			
		||||
[the OS and `main()`](https://www.youtube.com/watch?v=dOfucXtyEsU). Rust programmers use the
 | 
			
		||||
[`Box`](https://doc.rust-lang.org/stable/std/boxed/struct.Box.html) type all the time, but there's a
 | 
			
		||||
rich history of the Rust language itself wrapped up in
 | 
			
		||||
[how special it is](https://manishearth.github.io/blog/2017/01/10/rust-tidbits-box-is-special/).
 | 
			
		||||
 | 
			
		||||
In a similar vein, this series attempts to look at code and understand how memory is used; the
 | 
			
		||||
complex choreography of operating system, compiler, and program that frees you to focus on
 | 
			
		||||
functionality far-flung from frivolous book-keeping. The Rust compiler relieves a great deal of the
 | 
			
		||||
cognitive burden associated with memory management, but we're going to step into its world for a
 | 
			
		||||
while.
 | 
			
		||||
 | 
			
		||||
Let's learn a bit about memory in Rust.
 | 
			
		||||
 | 
			
		||||
# Table of Contents
 | 
			
		||||
 | 
			
		||||
This series is intended as both learning and reference material; we'll work through the different
 | 
			
		||||
memory types Rust uses, and explain the implications of each. Ultimately, a summary will be provided
 | 
			
		||||
as a cheat sheet for easy future reference. To that end, a table of contents is in order:
 | 
			
		||||
 | 
			
		||||
- Foreword
 | 
			
		||||
- [Global Memory Usage: The Whole World](/2019/02/the-whole-world.html)
 | 
			
		||||
- [Fixed Memory: Stacking Up](/2019/02/stacking-up.html)
 | 
			
		||||
- [Dynamic Memory: A Heaping Helping](/2019/02/a-heaping-helping.html)
 | 
			
		||||
- [Compiler Optimizations: What It's Done For You Lately](/2019/02/compiler-optimizations.html)
 | 
			
		||||
- [Summary: What Are the Rules?](/2019/02/summary.html)
 | 
			
		||||
 | 
			
		||||
# Foreword
 | 
			
		||||
 | 
			
		||||
Rust's three defining features of
 | 
			
		||||
[Performance, Reliability, and Productivity](https://www.rust-lang.org/) are all driven to a great
 | 
			
		||||
degree by the how the Rust compiler understands memory usage. Unlike managed memory languages (Java,
 | 
			
		||||
Python), Rust
 | 
			
		||||
[doesn't really](https://words.steveklabnik.com/borrow-checking-escape-analysis-and-the-generational-hypothesis)
 | 
			
		||||
garbage collect; instead, it uses an
 | 
			
		||||
[ownership](https://doc.rust-lang.org/book/ch04-01-what-is-ownership.html) system to reason about
 | 
			
		||||
how long objects will last in your program. In some cases, if the life of an object is fairly
 | 
			
		||||
transient, Rust can make use of a very fast region called the "stack." When that's not possible,
 | 
			
		||||
Rust uses
 | 
			
		||||
[dynamic (heap) memory](https://en.wikipedia.org/wiki/Memory_management#Dynamic_memory_allocation)
 | 
			
		||||
and the ownership system to ensure you can't accidentally corrupt memory. It's not as fast, but it
 | 
			
		||||
is important to have available.
 | 
			
		||||
 | 
			
		||||
That said, there are specific situations in Rust where you'd never need to worry about the
 | 
			
		||||
stack/heap distinction! If you:
 | 
			
		||||
 | 
			
		||||
1. Never use `unsafe`
 | 
			
		||||
2. Never use `#![feature(alloc)]` or the [`alloc` crate](https://doc.rust-lang.org/alloc/index.html)
 | 
			
		||||
 | 
			
		||||
...then it's not possible for you to use dynamic memory!
 | 
			
		||||
 | 
			
		||||
For some uses of Rust, typically embedded devices, these constraints are OK. They have very limited
 | 
			
		||||
memory, and the program binary size itself may significantly affect what's available! There's no
 | 
			
		||||
operating system able to manage this
 | 
			
		||||
["virtual memory"](https://en.wikipedia.org/wiki/Virtual_memory) thing, but that's not an issue
 | 
			
		||||
because there's only one running application. The
 | 
			
		||||
[embedonomicon](https://docs.rust-embedded.org/embedonomicon/preface.html) is ever in mind, and
 | 
			
		||||
interacting with the "real world" through extra peripherals is accomplished by reading and writing
 | 
			
		||||
to [specific memory addresses](https://bob.cs.sonoma.edu/IntroCompOrg-RPi/sec-gpio-mem.html).
 | 
			
		||||
 | 
			
		||||
Most Rust programs find these requirements overly burdensome though. C++ developers would struggle
 | 
			
		||||
without access to [`std::vector`](https://en.cppreference.com/w/cpp/container/vector) (except those
 | 
			
		||||
hardcore no-STL people), and Rust developers would struggle without
 | 
			
		||||
[`std::vec`](https://doc.rust-lang.org/std/vec/struct.Vec.html). But with the constraints above,
 | 
			
		||||
`std::vec` is actually a part of the
 | 
			
		||||
[`alloc` crate](https://doc.rust-lang.org/alloc/vec/struct.Vec.html), and thus off-limits. `Box`,
 | 
			
		||||
`Rc`, etc., are also unusable for the same reason.
 | 
			
		||||
 | 
			
		||||
Whether writing code for embedded devices or not, the important thing in both situations is how much
 | 
			
		||||
you know _before your application starts_ about what its memory usage will look like. In embedded
 | 
			
		||||
devices, there's a small, fixed amount of memory to use. In a browser, you have no idea how large
 | 
			
		||||
[google.com](https://www.google.com)'s home page is until you start trying to download it. The
 | 
			
		||||
compiler uses this knowledge (or lack thereof) to optimize how memory is used; put simply, your code
 | 
			
		||||
runs faster when the compiler can guarantee exactly how much memory your program needs while it's
 | 
			
		||||
running. This series is all about understanding how the compiler reasons about your program, with an
 | 
			
		||||
emphasis on the implications for performance.
 | 
			
		||||
 | 
			
		||||
Now let's address some conditions and caveats before going much further:
 | 
			
		||||
 | 
			
		||||
- We'll focus on "safe" Rust only; `unsafe` lets you use platform-specific allocation API's
 | 
			
		||||
  ([`malloc`](https://www.tutorialspoint.com/c_standard_library/c_function_malloc.htm)) that we'll
 | 
			
		||||
  ignore.
 | 
			
		||||
- We'll assume a "debug" build of Rust code (what you get with `cargo run` and `cargo test`) and
 | 
			
		||||
  address (pun intended) release mode at the end (`cargo run --release` and `cargo test --release`).
 | 
			
		||||
- All content will be run using Rust 1.32, as that's the highest currently supported in the
 | 
			
		||||
  [Compiler Exporer](https://godbolt.org/). As such, we'll avoid upcoming innovations like
 | 
			
		||||
  [compile-time evaluation of `static`](https://github.com/rust-lang/rfcs/blob/master/text/0911-const-fn.md)
 | 
			
		||||
  that are available in nightly.
 | 
			
		||||
- Because of the nature of the content, being able to read assembly is helpful. We'll keep it
 | 
			
		||||
  simple, but I [found](https://stackoverflow.com/a/4584131/1454178) a
 | 
			
		||||
  [refresher](https://stackoverflow.com/a/26026278/1454178) on the `push` and `pop`
 | 
			
		||||
  [instructions](http://www.cs.virginia.edu/~evans/cs216/guides/x86.html) was helpful while writing
 | 
			
		||||
  this.
 | 
			
		||||
- I've tried to be precise in saying only what I can prove using the tools (ASM, docs) that are
 | 
			
		||||
  available, but if there's something said in error it will be corrected expeditiously. Please let
 | 
			
		||||
  me know at [bradlee@speice.io](mailto:bradlee@speice.io)
 | 
			
		||||
 | 
			
		||||
Finally, I'll do what I can to flag potential future changes but the Rust docs have a notice worth
 | 
			
		||||
repeating:
 | 
			
		||||
 | 
			
		||||
> Rust does not currently have a rigorously and formally defined memory model.
 | 
			
		||||
>
 | 
			
		||||
> -- [the docs](https://doc.rust-lang.org/std/ptr/fn.read_volatile.html)
 | 
			
		||||
@ -1,337 +0,0 @@
 | 
			
		||||
---
 | 
			
		||||
layout: post
 | 
			
		||||
title: "Global Memory Usage: The Whole World"
 | 
			
		||||
description: "Static considered slightly less harmful."
 | 
			
		||||
category:
 | 
			
		||||
tags: [rust, understanding-allocations]
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
The first memory type we'll look at is pretty special: when Rust can prove that a _value_ is fixed
 | 
			
		||||
for the life of a program (`const`), and when a _reference_ is unique for the life of a program
 | 
			
		||||
(`static` as a declaration, not
 | 
			
		||||
[`'static`](https://doc.rust-lang.org/book/ch10-03-lifetime-syntax.html#the-static-lifetime) as a
 | 
			
		||||
lifetime), we can make use of global memory. This special section of data is embedded directly in
 | 
			
		||||
the program binary so that variables are ready to go once the program loads; no additional
 | 
			
		||||
computation is necessary.
 | 
			
		||||
 | 
			
		||||
Understanding the value/reference distinction is important for reasons we'll go into below, and
 | 
			
		||||
while the
 | 
			
		||||
[full specification](https://github.com/rust-lang/rfcs/blob/master/text/0246-const-vs-static.md) for
 | 
			
		||||
these two keywords is available, we'll take a hands-on approach to the topic.
 | 
			
		||||
 | 
			
		||||
# **const**
 | 
			
		||||
 | 
			
		||||
When a _value_ is guaranteed to be unchanging in your program (where "value" may be scalars,
 | 
			
		||||
`struct`s, etc.), you can declare it `const`. This tells the compiler that it's safe to treat the
 | 
			
		||||
value as never changing, and enables some interesting optimizations; not only is there no
 | 
			
		||||
initialization cost to creating the value (it is loaded at the same time as the executable parts of
 | 
			
		||||
your program), but the compiler can also copy the value around if it speeds up the code.
 | 
			
		||||
 | 
			
		||||
The points we need to address when talking about `const` are:
 | 
			
		||||
 | 
			
		||||
- `Const` values are stored in read-only memory - it's impossible to modify.
 | 
			
		||||
- Values resulting from calling a `const fn` are materialized at compile-time.
 | 
			
		||||
- The compiler may (or may not) copy `const` values wherever it chooses.
 | 
			
		||||
 | 
			
		||||
## Read-Only
 | 
			
		||||
 | 
			
		||||
The first point is a bit strange - "read-only memory."
 | 
			
		||||
[The Rust book](https://doc.rust-lang.org/book/ch03-01-variables-and-mutability.html#differences-between-variables-and-constants)
 | 
			
		||||
mentions in a couple places that using `mut` with constants is illegal, but it's also important to
 | 
			
		||||
demonstrate just how immutable they are. _Typically_ in Rust you can use
 | 
			
		||||
[interior mutability](https://doc.rust-lang.org/book/ch15-05-interior-mutability.html) to modify
 | 
			
		||||
things that aren't declared `mut`.
 | 
			
		||||
[`RefCell`](https://doc.rust-lang.org/std/cell/struct.RefCell.html) provides an example of this
 | 
			
		||||
pattern in action:
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
use std::cell::RefCell;
 | 
			
		||||
 | 
			
		||||
fn my_mutator(cell: &RefCell<u8>) {
 | 
			
		||||
    // Even though we're given an immutable reference,
 | 
			
		||||
    // the `replace` method allows us to modify the inner value.
 | 
			
		||||
    cell.replace(14);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn main() {
 | 
			
		||||
    let cell = RefCell::new(25);
 | 
			
		||||
    // Prints out 25
 | 
			
		||||
    println!("Cell: {:?}", cell);
 | 
			
		||||
    my_mutator(&cell);
 | 
			
		||||
    // Prints out 14
 | 
			
		||||
    println!("Cell: {:?}", cell);
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
--
 | 
			
		||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=8e4bea1a718edaff4507944e825a54b2)
 | 
			
		||||
 | 
			
		||||
When `const` is involved though, interior mutability is impossible:
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
use std::cell::RefCell;
 | 
			
		||||
 | 
			
		||||
const CELL: RefCell<u8> = RefCell::new(25);
 | 
			
		||||
 | 
			
		||||
fn my_mutator(cell: &RefCell<u8>) {
 | 
			
		||||
    cell.replace(14);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn main() {
 | 
			
		||||
    // First line prints 25 as expected
 | 
			
		||||
    println!("Cell: {:?}", &CELL);
 | 
			
		||||
    my_mutator(&CELL);
 | 
			
		||||
    // Second line *still* prints 25
 | 
			
		||||
    println!("Cell: {:?}", &CELL);
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
--
 | 
			
		||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=88fe98110c33c1b3a51e341f48b8ae00)
 | 
			
		||||
 | 
			
		||||
And a second example using [`Once`](https://doc.rust-lang.org/std/sync/struct.Once.html):
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
use std::sync::Once;
 | 
			
		||||
 | 
			
		||||
const SURPRISE: Once = Once::new();
 | 
			
		||||
 | 
			
		||||
fn main() {
 | 
			
		||||
    // This is how `Once` is supposed to be used
 | 
			
		||||
    SURPRISE.call_once(|| println!("Initializing..."));
 | 
			
		||||
    // Because `Once` is a `const` value, we never record it
 | 
			
		||||
    // having been initialized the first time, and this closure
 | 
			
		||||
    // will also execute.
 | 
			
		||||
    SURPRISE.call_once(|| println!("Initializing again???"));
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
--
 | 
			
		||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=c3cc5979b5e5434eca0f9ec4a06ee0ed)
 | 
			
		||||
 | 
			
		||||
When the
 | 
			
		||||
[`const` specification](https://github.com/rust-lang/rfcs/blob/26197104b7bb9a5a35db243d639aee6e46d35d75/text/0246-const-vs-static.md)
 | 
			
		||||
refers to ["rvalues"](http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2010/n3055.pdf), this
 | 
			
		||||
behavior is what they refer to. [Clippy](https://github.com/rust-lang/rust-clippy) will treat this
 | 
			
		||||
as an error, but it's still something to be aware of.
 | 
			
		||||
 | 
			
		||||
## Initialization == Compilation
 | 
			
		||||
 | 
			
		||||
The next thing to mention is that `const` values are loaded into memory _as part of your program
 | 
			
		||||
binary_. Because of this, any `const` values declared in your program will be "realized" at
 | 
			
		||||
compile-time; accessing them may trigger a main-memory lookup (with a fixed address, so your CPU may
 | 
			
		||||
be able to prefetch the value), but that's it.
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
use std::cell::RefCell;
 | 
			
		||||
 | 
			
		||||
const CELL: RefCell<u32> = RefCell::new(24);
 | 
			
		||||
 | 
			
		||||
pub fn multiply(value: u32) -> u32 {
 | 
			
		||||
    // CELL is stored at `.L__unnamed_1`
 | 
			
		||||
    value * (*CELL.get_mut())
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
-- [Compiler Explorer](https://godbolt.org/z/Th8boO)
 | 
			
		||||
 | 
			
		||||
The compiler creates one `RefCell`, uses it everywhere, and never needs to call the `RefCell::new`
 | 
			
		||||
function.
 | 
			
		||||
 | 
			
		||||
## Copying
 | 
			
		||||
 | 
			
		||||
If it's helpful though, the compiler can choose to copy `const` values.
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
const FACTOR: u32 = 1000;
 | 
			
		||||
 | 
			
		||||
pub fn multiply(value: u32) -> u32 {
 | 
			
		||||
    // See assembly line 4 for the `mov edi, 1000` instruction
 | 
			
		||||
    value * FACTOR
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn multiply_twice(value: u32) -> u32 {
 | 
			
		||||
    // See assembly lines 22 and 29 for `mov edi, 1000` instructions
 | 
			
		||||
    value * FACTOR * FACTOR
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
-- [Compiler Explorer](https://godbolt.org/z/ZtS54X)
 | 
			
		||||
 | 
			
		||||
In this example, the `FACTOR` value is turned into the `mov edi, 1000` instruction in both the
 | 
			
		||||
`multiply` and `multiply_twice` functions; the "1000" value is never "stored" anywhere, as it's
 | 
			
		||||
small enough to inline into the assembly instructions.
 | 
			
		||||
 | 
			
		||||
Finally, getting the address of a `const` value is possible, but not guaranteed to be unique
 | 
			
		||||
(because the compiler can choose to copy values). I was unable to get non-unique pointers in my
 | 
			
		||||
testing (even using different crates), but the specifications are clear enough: _don't rely on
 | 
			
		||||
pointers to `const` values being consistent_. To be frank, caring about locations for `const` values
 | 
			
		||||
is almost certainly a code smell.
 | 
			
		||||
 | 
			
		||||
# **static**
 | 
			
		||||
 | 
			
		||||
Static variables are related to `const` variables, but take a slightly different approach. When we
 | 
			
		||||
declare that a _reference_ is unique for the life of a program, you have a `static` variable
 | 
			
		||||
(unrelated to the `'static` lifetime). Because of the reference/value distinction with
 | 
			
		||||
`const`/`static`, static variables behave much more like typical "global" variables.
 | 
			
		||||
 | 
			
		||||
But to understand `static`, here's what we'll look at:
 | 
			
		||||
 | 
			
		||||
- `static` variables are globally unique locations in memory.
 | 
			
		||||
- Like `const`, `static` variables are loaded at the same time as your program being read into
 | 
			
		||||
  memory.
 | 
			
		||||
- All `static` variables must implement the
 | 
			
		||||
  [`Sync`](https://doc.rust-lang.org/std/marker/trait.Sync.html) marker trait.
 | 
			
		||||
- Interior mutability is safe and acceptable when using `static` variables.
 | 
			
		||||
 | 
			
		||||
## Memory Uniqueness
 | 
			
		||||
 | 
			
		||||
The single biggest difference between `const` and `static` is the guarantees provided about
 | 
			
		||||
uniqueness. Where `const` variables may or may not be copied in code, `static` variables are
 | 
			
		||||
guarantee to be unique. If we take a previous `const` example and change it to `static`, the
 | 
			
		||||
difference should be clear:
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
static FACTOR: u32 = 1000;
 | 
			
		||||
 | 
			
		||||
pub fn multiply(value: u32) -> u32 {
 | 
			
		||||
    // The assembly to `mul dword ptr [rip + example::FACTOR]` is how FACTOR gets used
 | 
			
		||||
    value * FACTOR
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn multiply_twice(value: u32) -> u32 {
 | 
			
		||||
    // The assembly to `mul dword ptr [rip + example::FACTOR]` is how FACTOR gets used
 | 
			
		||||
    value * FACTOR * FACTOR
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
-- [Compiler Explorer](https://godbolt.org/z/uxmiRQ)
 | 
			
		||||
 | 
			
		||||
Where [previously](#copying) there were plenty of references to multiplying by 1000, the new
 | 
			
		||||
assembly refers to `FACTOR` as a named memory location instead. No initialization work needs to be
 | 
			
		||||
done, but the compiler can no longer prove the value never changes during execution.
 | 
			
		||||
 | 
			
		||||
## Initialization == Compilation
 | 
			
		||||
 | 
			
		||||
Next, let's talk about initialization. The simplest case is initializing static variables with
 | 
			
		||||
either scalar or struct notation:
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
#[derive(Debug)]
 | 
			
		||||
struct MyStruct {
 | 
			
		||||
    x: u32
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static MY_STRUCT: MyStruct = MyStruct {
 | 
			
		||||
    // You can even reference other statics
 | 
			
		||||
    // declared later
 | 
			
		||||
    x: MY_VAL
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static MY_VAL: u32 = 24;
 | 
			
		||||
 | 
			
		||||
fn main() {
 | 
			
		||||
    println!("Static MyStruct: {:?}", MY_STRUCT);
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
--
 | 
			
		||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=b538dbc46076f12db047af4f4403ee6e)
 | 
			
		||||
 | 
			
		||||
Things can get a bit weirder when using `const fn` though. In most cases, it just works:
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
#[derive(Debug)]
 | 
			
		||||
struct MyStruct {
 | 
			
		||||
    x: u32
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
impl MyStruct {
 | 
			
		||||
    const fn new() -> MyStruct {
 | 
			
		||||
        MyStruct { x: 24 }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static MY_STRUCT: MyStruct = MyStruct::new();
 | 
			
		||||
 | 
			
		||||
fn main() {
 | 
			
		||||
    println!("const fn Static MyStruct: {:?}", MY_STRUCT);
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
--
 | 
			
		||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=8c796a6e7fc273c12115091b707b0255)
 | 
			
		||||
 | 
			
		||||
However, there's a caveat: you're currently not allowed to use `const fn` to initialize static
 | 
			
		||||
variables of types that aren't marked `Sync`. For example,
 | 
			
		||||
[`RefCell::new()`](https://doc.rust-lang.org/std/cell/struct.RefCell.html#method.new) is a
 | 
			
		||||
`const fn`, but because
 | 
			
		||||
[`RefCell` isn't `Sync`](https://doc.rust-lang.org/std/cell/struct.RefCell.html#impl-Sync), you'll
 | 
			
		||||
get an error at compile time:
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
use std::cell::RefCell;
 | 
			
		||||
 | 
			
		||||
// error[E0277]: `std::cell::RefCell<u8>` cannot be shared between threads safely
 | 
			
		||||
static MY_LOCK: RefCell<u8> = RefCell::new(0);
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
--
 | 
			
		||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=c76ef86e473d07117a1700e21fd45560)
 | 
			
		||||
 | 
			
		||||
It's likely that this will
 | 
			
		||||
[change in the future](https://github.com/rust-lang/rfcs/blob/master/text/0911-const-fn.md) though.
 | 
			
		||||
 | 
			
		||||
## **Sync**
 | 
			
		||||
 | 
			
		||||
Which leads well to the next point: static variable types must implement the
 | 
			
		||||
[`Sync` marker](https://doc.rust-lang.org/std/marker/trait.Sync.html). Because they're globally
 | 
			
		||||
unique, it must be safe for you to access static variables from any thread at any time. Most
 | 
			
		||||
`struct` definitions automatically implement the `Sync` trait because they contain only elements
 | 
			
		||||
which themselves implement `Sync` (read more in the
 | 
			
		||||
[Nomicon](https://doc.rust-lang.org/nomicon/send-and-sync.html)). This is why earlier examples could
 | 
			
		||||
get away with initializing statics, even though we never included an `impl Sync for MyStruct` in the
 | 
			
		||||
code. To demonstrate this property, Rust refuses to compile our earlier example if we add a
 | 
			
		||||
non-`Sync` element to the `struct` definition:
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
use std::cell::RefCell;
 | 
			
		||||
 | 
			
		||||
struct MyStruct {
 | 
			
		||||
    x: u32,
 | 
			
		||||
    y: RefCell<u8>,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// error[E0277]: `std::cell::RefCell<u8>` cannot be shared between threads safely
 | 
			
		||||
static MY_STRUCT: MyStruct = MyStruct {
 | 
			
		||||
    x: 8,
 | 
			
		||||
    y: RefCell::new(8)
 | 
			
		||||
};
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
--
 | 
			
		||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=40074d0248f056c296b662dbbff97cfc)
 | 
			
		||||
 | 
			
		||||
## Interior Mutability
 | 
			
		||||
 | 
			
		||||
Finally, while `static mut` variables are allowed, mutating them is an `unsafe` operation. If we
 | 
			
		||||
want to stay in `safe` Rust, we can use interior mutability to accomplish similar goals:
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
use std::sync::Once;
 | 
			
		||||
 | 
			
		||||
// This example adapted from https://doc.rust-lang.org/std/sync/struct.Once.html#method.call_once
 | 
			
		||||
static INIT: Once = Once::new();
 | 
			
		||||
 | 
			
		||||
fn main() {
 | 
			
		||||
    // Note that while `INIT` is declared immutable, we're still allowed
 | 
			
		||||
    // to mutate its interior
 | 
			
		||||
    INIT.call_once(|| println!("Initializing..."));
 | 
			
		||||
    // This code won't panic, as the interior of INIT was modified
 | 
			
		||||
    // as part of the previous `call_once`
 | 
			
		||||
    INIT.call_once(|| panic!("INIT was called twice!"));
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
--
 | 
			
		||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=3ba003a981a7ed7400240caadd384d59)
 | 
			
		||||
@ -1,601 +0,0 @@
 | 
			
		||||
---
 | 
			
		||||
layout: post
 | 
			
		||||
title: "Fixed Memory: Stacking Up"
 | 
			
		||||
description: "We don't need no allocator."
 | 
			
		||||
category:
 | 
			
		||||
tags: [rust, understanding-allocations]
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
`const` and `static` are perfectly fine, but it's relatively rare that we know at compile-time about
 | 
			
		||||
either values or references that will be the same for the duration of our program. Put another way,
 | 
			
		||||
it's not often the case that either you or your compiler knows how much memory your entire program
 | 
			
		||||
will ever need.
 | 
			
		||||
 | 
			
		||||
However, there are still some optimizations the compiler can do if it knows how much memory
 | 
			
		||||
individual functions will need. Specifically, the compiler can make use of "stack" memory (as
 | 
			
		||||
opposed to "heap" memory) which can be managed far faster in both the short- and long-term. When
 | 
			
		||||
requesting memory, the [`push` instruction](http://www.cs.virginia.edu/~evans/cs216/guides/x86.html)
 | 
			
		||||
can typically complete in [1 or 2 cycles](https://agner.org/optimize/instruction_tables.ods) (<1
 | 
			
		||||
nanosecond on modern CPUs). Contrast that to heap memory which requires an allocator (specialized
 | 
			
		||||
software to track what memory is in use) to reserve space. When you're finished with stack memory,
 | 
			
		||||
the `pop` instruction runs in 1-3 cycles, as opposed to an allocator needing to worry about memory
 | 
			
		||||
fragmentation and other issues with the heap. All sorts of incredibly sophisticated techniques have
 | 
			
		||||
been used to design allocators:
 | 
			
		||||
 | 
			
		||||
- [Garbage Collection](<https://en.wikipedia.org/wiki/Garbage_collection_(computer_science)>)
 | 
			
		||||
  strategies like [Tracing](https://en.wikipedia.org/wiki/Tracing_garbage_collection) (used in
 | 
			
		||||
  [Java](https://www.oracle.com/technetwork/java/javase/tech/g1-intro-jsp-135488.html)) and
 | 
			
		||||
  [Reference counting](https://en.wikipedia.org/wiki/Reference_counting) (used in
 | 
			
		||||
  [Python](https://docs.python.org/3/extending/extending.html#reference-counts))
 | 
			
		||||
- Thread-local structures to prevent locking the allocator in
 | 
			
		||||
  [tcmalloc](https://jamesgolick.com/2013/5/19/how-tcmalloc-works.html)
 | 
			
		||||
- Arena structures used in [jemalloc](http://jemalloc.net/), which
 | 
			
		||||
  [until recently](https://blog.rust-lang.org/2019/01/17/Rust-1.32.0.html#jemalloc-is-removed-by-default)
 | 
			
		||||
  was the primary allocator for Rust programs!
 | 
			
		||||
 | 
			
		||||
But no matter how fast your allocator is, the principle remains: the fastest allocator is the one
 | 
			
		||||
you never use. As such, we're not going to discuss how exactly the
 | 
			
		||||
[`push` and `pop` instructions work](http://www.cs.virginia.edu/~evans/cs216/guides/x86.html), but
 | 
			
		||||
we'll focus instead on the conditions that enable the Rust compiler to use faster stack-based
 | 
			
		||||
allocation for variables.
 | 
			
		||||
 | 
			
		||||
So, **how do we know when Rust will or will not use stack allocation for objects we create?**
 | 
			
		||||
Looking at other languages, it's often easy to delineate between stack and heap. Managed memory
 | 
			
		||||
languages (Python, Java,
 | 
			
		||||
[C#](https://blogs.msdn.microsoft.com/ericlippert/2010/09/30/the-truth-about-value-types/)) place
 | 
			
		||||
everything on the heap. JIT compilers ([PyPy](https://www.pypy.org/),
 | 
			
		||||
[HotSpot](https://www.oracle.com/technetwork/java/javase/tech/index-jsp-136373.html)) may optimize
 | 
			
		||||
some heap allocations away, but you should never assume it will happen. C makes things clear with
 | 
			
		||||
calls to special functions (like [malloc(3)](https://linux.die.net/man/3/malloc)) needed to access
 | 
			
		||||
heap memory. Old C++ has the [`new`](https://stackoverflow.com/a/655086/1454178) keyword, though
 | 
			
		||||
modern C++/C++11 is more complicated with [RAII](https://en.cppreference.com/w/cpp/language/raii).
 | 
			
		||||
 | 
			
		||||
For Rust, we can summarize as follows: **stack allocation will be used for everything that doesn't
 | 
			
		||||
involve "smart pointers" and collections**. We'll skip over a precise definition of the term "smart
 | 
			
		||||
pointer" for now, and instead discuss what we should watch for to understand when stack and heap
 | 
			
		||||
memory regions are used:
 | 
			
		||||
 | 
			
		||||
1. Stack manipulation instructions (`push`, `pop`, and `add`/`sub` of the `rsp` register) indicate
 | 
			
		||||
   allocation of stack memory:
 | 
			
		||||
 | 
			
		||||
   ```rust
 | 
			
		||||
   pub fn stack_alloc(x: u32) -> u32 {
 | 
			
		||||
       // Space for `y` is allocated by subtracting from `rsp`,
 | 
			
		||||
       // and then populated
 | 
			
		||||
       let y = [1u8, 2, 3, 4];
 | 
			
		||||
       // Space for `y` is deallocated by adding back to `rsp`
 | 
			
		||||
       x
 | 
			
		||||
   }
 | 
			
		||||
   ```
 | 
			
		||||
 | 
			
		||||
   -- [Compiler Explorer](https://godbolt.org/z/5WSgc9)
 | 
			
		||||
 | 
			
		||||
2. Tracking when exactly heap allocation calls occur is difficult. It's typically easier to watch
 | 
			
		||||
   for `call core::ptr::real_drop_in_place`, and infer that a heap allocation happened in the recent
 | 
			
		||||
   past:
 | 
			
		||||
 | 
			
		||||
   ```rust
 | 
			
		||||
   pub fn heap_alloc(x: usize) -> usize {
 | 
			
		||||
       // Space for elements in a vector has to be allocated
 | 
			
		||||
       // on the heap, and is then de-allocated once the
 | 
			
		||||
       // vector goes out of scope
 | 
			
		||||
       let y: Vec<u8> = Vec::with_capacity(x);
 | 
			
		||||
       x
 | 
			
		||||
   }
 | 
			
		||||
   ```
 | 
			
		||||
 | 
			
		||||
   -- [Compiler Explorer](https://godbolt.org/z/epfgoQ) (`real_drop_in_place` happens on line 1317)
 | 
			
		||||
   <span style="font-size: .8em">Note: While the
 | 
			
		||||
   [`Drop` trait](https://doc.rust-lang.org/std/ops/trait.Drop.html) is
 | 
			
		||||
   [called for stack-allocated objects](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=87edf374d8983816eb3d8cfeac657b46),
 | 
			
		||||
   the Rust standard library only defines `Drop` implementations for types that involve heap
 | 
			
		||||
   allocation.</span>
 | 
			
		||||
 | 
			
		||||
3. If you don't want to inspect the assembly, use a custom allocator that's able to track and alert
 | 
			
		||||
   when heap allocations occur. Crates like
 | 
			
		||||
   [`alloc_counter`](https://crates.io/crates/alloc_counter) are designed for exactly this purpose.
 | 
			
		||||
 | 
			
		||||
With all that in mind, let's talk about situations in which we're guaranteed to use stack memory:
 | 
			
		||||
 | 
			
		||||
- Structs are created on the stack.
 | 
			
		||||
- Function arguments are passed on the stack, meaning the
 | 
			
		||||
  [`#[inline]` attribute](https://doc.rust-lang.org/reference/attributes.html#inline-attribute) will
 | 
			
		||||
  not change the memory region used.
 | 
			
		||||
- Enums and unions are stack-allocated.
 | 
			
		||||
- [Arrays](https://doc.rust-lang.org/std/primitive.array.html) are always stack-allocated.
 | 
			
		||||
- Closures capture their arguments on the stack.
 | 
			
		||||
- Generics will use stack allocation, even with dynamic dispatch.
 | 
			
		||||
- [`Copy`](https://doc.rust-lang.org/std/marker/trait.Copy.html) types are guaranteed to be
 | 
			
		||||
  stack-allocated, and copying them will be done in stack memory.
 | 
			
		||||
- [`Iterator`s](https://doc.rust-lang.org/std/iter/trait.Iterator.html) in the standard library are
 | 
			
		||||
  stack-allocated even when iterating over heap-based collections.
 | 
			
		||||
 | 
			
		||||
# Structs
 | 
			
		||||
 | 
			
		||||
The simplest case comes first. When creating vanilla `struct` objects, we use stack memory to hold
 | 
			
		||||
their contents:
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
struct Point {
 | 
			
		||||
    x: u64,
 | 
			
		||||
    y: u64,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
struct Line {
 | 
			
		||||
    a: Point,
 | 
			
		||||
    b: Point,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn make_line() {
 | 
			
		||||
    // `origin` is stored in the first 16 bytes of memory
 | 
			
		||||
    // starting at location `rsp`
 | 
			
		||||
    let origin = Point { x: 0, y: 0 };
 | 
			
		||||
    // `point` makes up the next 16 bytes of memory
 | 
			
		||||
    let point = Point { x: 1, y: 2 };
 | 
			
		||||
 | 
			
		||||
    // When creating `ray`, we just move the content out of
 | 
			
		||||
    // `origin` and `point` into the next 32 bytes of memory
 | 
			
		||||
    let ray = Line { a: origin, b: point };
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
-- [Compiler Explorer](https://godbolt.org/z/vri9BE)
 | 
			
		||||
 | 
			
		||||
Note that while some extra-fancy instructions are used for memory manipulation in the assembly, the
 | 
			
		||||
`sub rsp, 64` instruction indicates we're still working with the stack.
 | 
			
		||||
 | 
			
		||||
# Function arguments
 | 
			
		||||
 | 
			
		||||
Have you ever wondered how functions communicate with each other? Like, once the variables are given
 | 
			
		||||
to you, everything's fine. But how do you "give" those variables to another function? How do you get
 | 
			
		||||
the results back afterward? The answer: the compiler arranges memory and assembly instructions using
 | 
			
		||||
a pre-determined [calling convention](http://llvm.org/docs/LangRef.html#calling-conventions). This
 | 
			
		||||
convention governs the rules around where arguments needed by a function will be located (either in
 | 
			
		||||
memory offsets relative to the stack pointer `rsp`, or in other registers), and where the results
 | 
			
		||||
can be found once the function has finished. And when multiple languages agree on what the calling
 | 
			
		||||
conventions are, you can do things like having [Go call Rust code](https://blog.filippo.io/rustgo/)!
 | 
			
		||||
 | 
			
		||||
Put simply: it's the compiler's job to figure out how to call other functions, and you can assume
 | 
			
		||||
that the compiler is good at its job.
 | 
			
		||||
 | 
			
		||||
We can see this in action using a simple example:
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
struct Point {
 | 
			
		||||
    x: i64,
 | 
			
		||||
    y: i64,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// We use integer division operations to keep
 | 
			
		||||
// the assembly clean, understanding the result
 | 
			
		||||
// isn't accurate.
 | 
			
		||||
fn distance(a: &Point, b: &Point) -> i64 {
 | 
			
		||||
    // Immediately subtract from `rsp` the bytes needed
 | 
			
		||||
    // to hold all the intermediate results - this is
 | 
			
		||||
    // the stack allocation step
 | 
			
		||||
 | 
			
		||||
    // The compiler used the `rdi` and `rsi` registers
 | 
			
		||||
    // to pass our arguments, so read them in
 | 
			
		||||
    let x1 = a.x;
 | 
			
		||||
    let x2 = b.x;
 | 
			
		||||
    let y1 = a.y;
 | 
			
		||||
    let y2 = b.y;
 | 
			
		||||
 | 
			
		||||
    // Do the actual math work
 | 
			
		||||
    let x_pow = (x1 - x2) * (x1 - x2);
 | 
			
		||||
    let y_pow = (y1 - y2) * (y1 - y2);
 | 
			
		||||
    let squared = x_pow + y_pow;
 | 
			
		||||
    squared / squared
 | 
			
		||||
 | 
			
		||||
    // Our final result will be stored in the `rax` register
 | 
			
		||||
    // so that our caller knows where to retrieve it.
 | 
			
		||||
    // Finally, add back to `rsp` the stack memory that is
 | 
			
		||||
    // now ready to be used by other functions.
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn total_distance() {
 | 
			
		||||
    let start = Point { x: 1, y: 2 };
 | 
			
		||||
    let middle = Point { x: 3, y: 4 };
 | 
			
		||||
    let end = Point { x: 5, y: 6 };
 | 
			
		||||
 | 
			
		||||
    let _dist_1 = distance(&start, &middle);
 | 
			
		||||
    let _dist_2 = distance(&middle, &end);
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
-- [Compiler Explorer](https://godbolt.org/z/Qmx4ST)
 | 
			
		||||
 | 
			
		||||
As a consequence of function arguments never using heap memory, we can also infer that functions
 | 
			
		||||
using the `#[inline]` attributes also do not heap allocate. But better than inferring, we can look
 | 
			
		||||
at the assembly to prove it:
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
struct Point {
 | 
			
		||||
    x: i64,
 | 
			
		||||
    y: i64,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Note that there is no `distance` function in the assembly output,
 | 
			
		||||
// and the total line count goes from 229 with inlining off
 | 
			
		||||
// to 306 with inline on. Even still, no heap allocations occur.
 | 
			
		||||
#[inline(always)]
 | 
			
		||||
fn distance(a: &Point, b: &Point) -> i64 {
 | 
			
		||||
    let x1 = a.x;
 | 
			
		||||
    let x2 = b.x;
 | 
			
		||||
    let y1 = a.y;
 | 
			
		||||
    let y2 = b.y;
 | 
			
		||||
 | 
			
		||||
    let x_pow = (a.x - b.x) * (a.x - b.x);
 | 
			
		||||
    let y_pow = (a.y - b.y) * (a.y - b.y);
 | 
			
		||||
    let squared = x_pow + y_pow;
 | 
			
		||||
    squared / squared
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn total_distance() {
 | 
			
		||||
    let start = Point { x: 1, y: 2 };
 | 
			
		||||
    let middle = Point { x: 3, y: 4 };
 | 
			
		||||
    let end = Point { x: 5, y: 6 };
 | 
			
		||||
 | 
			
		||||
    let _dist_1 = distance(&start, &middle);
 | 
			
		||||
    let _dist_2 = distance(&middle, &end);
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
-- [Compiler Explorer](https://godbolt.org/z/30Sh66)
 | 
			
		||||
 | 
			
		||||
Finally, passing by value (arguments with type
 | 
			
		||||
[`Copy`](https://doc.rust-lang.org/std/marker/trait.Copy.html)) and passing by reference (either
 | 
			
		||||
moving ownership or passing a pointer) may have slightly different layouts in assembly, but will
 | 
			
		||||
still use either stack memory or CPU registers:
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
pub struct Point {
 | 
			
		||||
    x: i64,
 | 
			
		||||
    y: i64,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Moving values
 | 
			
		||||
pub fn distance_moved(a: Point, b: Point) -> i64 {
 | 
			
		||||
    let x1 = a.x;
 | 
			
		||||
    let x2 = b.x;
 | 
			
		||||
    let y1 = a.y;
 | 
			
		||||
    let y2 = b.y;
 | 
			
		||||
 | 
			
		||||
    let x_pow = (x1 - x2) * (x1 - x2);
 | 
			
		||||
    let y_pow = (y1 - y2) * (y1 - y2);
 | 
			
		||||
    let squared = x_pow + y_pow;
 | 
			
		||||
    squared / squared
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// Borrowing values has two extra `mov` instructions on lines 21 and 22
 | 
			
		||||
pub fn distance_borrowed(a: &Point, b: &Point) -> i64 {
 | 
			
		||||
    let x1 = a.x;
 | 
			
		||||
    let x2 = b.x;
 | 
			
		||||
    let y1 = a.y;
 | 
			
		||||
    let y2 = b.y;
 | 
			
		||||
 | 
			
		||||
    let x_pow = (x1 - x2) * (x1 - x2);
 | 
			
		||||
    let y_pow = (y1 - y2) * (y1 - y2);
 | 
			
		||||
    let squared = x_pow + y_pow;
 | 
			
		||||
    squared / squared
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
-- [Compiler Explorer](https://godbolt.org/z/06hGiv)
 | 
			
		||||
 | 
			
		||||
# Enums
 | 
			
		||||
 | 
			
		||||
If you've ever worried that wrapping your types in
 | 
			
		||||
[`Option`](https://doc.rust-lang.org/stable/core/option/enum.Option.html) or
 | 
			
		||||
[`Result`](https://doc.rust-lang.org/stable/core/result/enum.Result.html) would finally make them
 | 
			
		||||
large enough that Rust decides to use heap allocation instead, fear no longer: `enum` and union
 | 
			
		||||
types don't use heap allocation:
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
enum MyEnum {
 | 
			
		||||
    Small(u8),
 | 
			
		||||
    Large(u64)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
struct MyStruct {
 | 
			
		||||
    x: MyEnum,
 | 
			
		||||
    y: MyEnum,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn enum_compare() {
 | 
			
		||||
    let x = MyEnum::Small(0);
 | 
			
		||||
    let y = MyEnum::Large(0);
 | 
			
		||||
 | 
			
		||||
    let z = MyStruct { x, y };
 | 
			
		||||
 | 
			
		||||
    let opt = Option::Some(z);
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
-- [Compiler Explorer](https://godbolt.org/z/HK7zBx)
 | 
			
		||||
 | 
			
		||||
Because the size of an `enum` is the size of its largest element plus a flag, the compiler can
 | 
			
		||||
predict how much memory is used no matter which variant of an enum is currently stored in a
 | 
			
		||||
variable. Thus, enums and unions have no need of heap allocation. There's unfortunately not a great
 | 
			
		||||
way to show this in assembly, so I'll instead point you to the
 | 
			
		||||
[`core::mem::size_of`](https://doc.rust-lang.org/stable/core/mem/fn.size_of.html#size-of-enums)
 | 
			
		||||
documentation.
 | 
			
		||||
 | 
			
		||||
# Arrays
 | 
			
		||||
 | 
			
		||||
The array type is guaranteed to be stack allocated, which is why the array size must be declared.
 | 
			
		||||
Interestingly enough, this can be used to cause safe Rust programs to crash:
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
// 256 bytes
 | 
			
		||||
#[derive(Default)]
 | 
			
		||||
struct TwoFiftySix {
 | 
			
		||||
    _a: [u64; 32]
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// 8 kilobytes
 | 
			
		||||
#[derive(Default)]
 | 
			
		||||
struct EightK {
 | 
			
		||||
    _a: [TwoFiftySix; 32]
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// 256 kilobytes
 | 
			
		||||
#[derive(Default)]
 | 
			
		||||
struct TwoFiftySixK {
 | 
			
		||||
    _a: [EightK; 32]
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// 8 megabytes - exceeds space typically provided for the stack,
 | 
			
		||||
// though the kernel can be instructed to allocate more.
 | 
			
		||||
// On Linux, you can check stack size using `ulimit -s`
 | 
			
		||||
#[derive(Default)]
 | 
			
		||||
struct EightM {
 | 
			
		||||
    _a: [TwoFiftySixK; 32]
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn main() {
 | 
			
		||||
    // Because we already have things in stack memory
 | 
			
		||||
    // (like the current function call stack), allocating another
 | 
			
		||||
    // eight megabytes of stack memory crashes the program
 | 
			
		||||
    let _x = EightM::default();
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
--
 | 
			
		||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=587a6380a4914bcbcef4192c90c01dc4)
 | 
			
		||||
 | 
			
		||||
There aren't any security implications of this (no memory corruption occurs), but it's good to note
 | 
			
		||||
that the Rust compiler won't move arrays into heap memory even if they can be reasonably expected to
 | 
			
		||||
overflow the stack.
 | 
			
		||||
 | 
			
		||||
# Closures
 | 
			
		||||
 | 
			
		||||
Rules for how anonymous functions capture their arguments are typically language-specific. In Java,
 | 
			
		||||
[Lambda Expressions](https://docs.oracle.com/javase/tutorial/java/javaOO/lambdaexpressions.html) are
 | 
			
		||||
actually objects created on the heap that capture local primitives by copying, and capture local
 | 
			
		||||
non-primitives as (`final`) references.
 | 
			
		||||
[Python](https://docs.python.org/3.7/reference/expressions.html#lambda) and
 | 
			
		||||
[JavaScript](https://javascriptweblog.wordpress.com/2010/10/25/understanding-javascript-closures/)
 | 
			
		||||
both bind _everything_ by reference normally, but Python can also
 | 
			
		||||
[capture values](https://stackoverflow.com/a/235764/1454178) and JavaScript has
 | 
			
		||||
[Arrow functions](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Functions/Arrow_functions).
 | 
			
		||||
 | 
			
		||||
In Rust, arguments to closures are the same as arguments to other functions; closures are simply
 | 
			
		||||
functions that don't have a declared name. Some weird ordering of the stack may be required to
 | 
			
		||||
handle them, but it's the compiler's responsiblity to figure that out.
 | 
			
		||||
 | 
			
		||||
Each example below has the same effect, but a different assembly implementation. In the simplest
 | 
			
		||||
case, we immediately run a closure returned by another function. Because we don't store a reference
 | 
			
		||||
to the closure, the stack memory needed to store the captured values is contiguous:
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
fn my_func() -> impl FnOnce() {
 | 
			
		||||
    let x = 24;
 | 
			
		||||
    // Note that this closure in assembly looks exactly like
 | 
			
		||||
    // any other function; you even use the `call` instruction
 | 
			
		||||
    // to start running it.
 | 
			
		||||
    move || { x; }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn immediate() {
 | 
			
		||||
    my_func()();
 | 
			
		||||
    my_func()();
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
-- [Compiler Explorer](https://godbolt.org/z/mgJ2zl), 25 total assembly instructions
 | 
			
		||||
 | 
			
		||||
If we store a reference to the closure, the Rust compiler keeps values it needs in the stack memory
 | 
			
		||||
of the original function. Getting the details right is a bit harder, so the instruction count goes
 | 
			
		||||
up even though this code is functionally equivalent to our original example:
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
pub fn simple_reference() {
 | 
			
		||||
    let x = my_func();
 | 
			
		||||
    let y = my_func();
 | 
			
		||||
    y();
 | 
			
		||||
    x();
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
-- [Compiler Explorer](https://godbolt.org/z/K_dj5n), 55 total assembly instructions
 | 
			
		||||
 | 
			
		||||
Even things like variable order can make a difference in instruction count:
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
pub fn complex() {
 | 
			
		||||
    let x = my_func();
 | 
			
		||||
    let y = my_func();
 | 
			
		||||
    x();
 | 
			
		||||
    y();
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
-- [Compiler Explorer](https://godbolt.org/z/p37qFl), 70 total assembly instructions
 | 
			
		||||
 | 
			
		||||
In every circumstance though, the compiler ensured that no heap allocations were necessary.
 | 
			
		||||
 | 
			
		||||
# Generics
 | 
			
		||||
 | 
			
		||||
Traits in Rust come in two broad forms: static dispatch (monomorphization, `impl Trait`) and dynamic
 | 
			
		||||
dispatch (trait objects, `dyn Trait`). While dynamic dispatch is often _associated_ with trait
 | 
			
		||||
objects being stored in the heap, dynamic dispatch can be used with stack allocated objects as well:
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
trait GetInt {
 | 
			
		||||
    fn get_int(&self) -> u64;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// vtable stored at section L__unnamed_1
 | 
			
		||||
struct WhyNotU8 {
 | 
			
		||||
    x: u8
 | 
			
		||||
}
 | 
			
		||||
impl GetInt for WhyNotU8 {
 | 
			
		||||
    fn get_int(&self) -> u64 {
 | 
			
		||||
        self.x as u64
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// vtable stored at section L__unnamed_2
 | 
			
		||||
struct ActualU64 {
 | 
			
		||||
    x: u64
 | 
			
		||||
}
 | 
			
		||||
impl GetInt for ActualU64 {
 | 
			
		||||
    fn get_int(&self) -> u64 {
 | 
			
		||||
        self.x
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// `&dyn` declares that we want to use dynamic dispatch
 | 
			
		||||
// rather than monomorphization, so there is only one
 | 
			
		||||
// `retrieve_int` function that shows up in the final assembly.
 | 
			
		||||
// If we used generics, there would be one implementation of
 | 
			
		||||
// `retrieve_int` for each type that implements `GetInt`.
 | 
			
		||||
pub fn retrieve_int(u: &dyn GetInt) {
 | 
			
		||||
    // In the assembly, we just call an address given to us
 | 
			
		||||
    // in the `rsi` register and hope that it was set up
 | 
			
		||||
    // correctly when this function was invoked.
 | 
			
		||||
    let x = u.get_int();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn do_call() {
 | 
			
		||||
    // Note that even though the vtable for `WhyNotU8` and
 | 
			
		||||
    // `ActualU64` includes a pointer to
 | 
			
		||||
    // `core::ptr::real_drop_in_place`, it is never invoked.
 | 
			
		||||
    let a = WhyNotU8 { x: 0 };
 | 
			
		||||
    let b = ActualU64 { x: 0 };
 | 
			
		||||
 | 
			
		||||
    retrieve_int(&a);
 | 
			
		||||
    retrieve_int(&b);
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
-- [Compiler Explorer](https://godbolt.org/z/u_yguS)
 | 
			
		||||
 | 
			
		||||
It's hard to imagine practical situations where dynamic dispatch would be used for objects that
 | 
			
		||||
aren't heap allocated, but it technically can be done.
 | 
			
		||||
 | 
			
		||||
# Copy types
 | 
			
		||||
 | 
			
		||||
Understanding move semantics and copy semantics in Rust is weird at first. The Rust docs
 | 
			
		||||
[go into detail](https://doc.rust-lang.org/stable/core/marker/trait.Copy.html) far better than can
 | 
			
		||||
be addressed here, so I'll leave them to do the job. From a memory perspective though, their
 | 
			
		||||
guideline is reasonable:
 | 
			
		||||
[if your type can implemement `Copy`, it should](https://doc.rust-lang.org/stable/core/marker/trait.Copy.html#when-should-my-type-be-copy).
 | 
			
		||||
While there are potential speed tradeoffs to _benchmark_ when discussing `Copy` (move semantics for
 | 
			
		||||
stack objects vs. copying stack pointers vs. copying stack `struct`s), _it's impossible for `Copy`
 | 
			
		||||
to introduce a heap allocation_.
 | 
			
		||||
 | 
			
		||||
But why is this the case? Fundamentally, it's because the language controls what `Copy` means -
 | 
			
		||||
["the behavior of `Copy` is not overloadable"](https://doc.rust-lang.org/std/marker/trait.Copy.html#whats-the-difference-between-copy-and-clone)
 | 
			
		||||
because it's a marker trait. From there we'll note that a type
 | 
			
		||||
[can implement `Copy`](https://doc.rust-lang.org/std/marker/trait.Copy.html#when-can-my-type-be-copy)
 | 
			
		||||
if (and only if) its components implement `Copy`, and that
 | 
			
		||||
[no heap-allocated types implement `Copy`](https://doc.rust-lang.org/std/marker/trait.Copy.html#implementors).
 | 
			
		||||
Thus, assignments involving heap types are always move semantics, and new heap allocations won't
 | 
			
		||||
occur because of implicit operator behavior.
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
#[derive(Clone)]
 | 
			
		||||
struct Cloneable {
 | 
			
		||||
    x: Box<u64>
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// error[E0204]: the trait `Copy` may not be implemented for this type
 | 
			
		||||
#[derive(Copy, Clone)]
 | 
			
		||||
struct NotCopyable {
 | 
			
		||||
    x: Box<u64>
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
-- [Compiler Explorer](https://godbolt.org/z/VToRuK)
 | 
			
		||||
 | 
			
		||||
# Iterators
 | 
			
		||||
 | 
			
		||||
In managed memory languages (like
 | 
			
		||||
[Java](https://www.youtube.com/watch?v=bSkpMdDe4g4&feature=youtu.be&t=357)), there's a subtle
 | 
			
		||||
difference between these two code samples:
 | 
			
		||||
 | 
			
		||||
```java
 | 
			
		||||
public static int sum_for(List<Long> vals) {
 | 
			
		||||
    long sum = 0;
 | 
			
		||||
    // Regular for loop
 | 
			
		||||
    for (int i = 0; i < vals.length; i++) {
 | 
			
		||||
        sum += vals[i];
 | 
			
		||||
    }
 | 
			
		||||
    return sum;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
public static int sum_foreach(List<Long> vals) {
 | 
			
		||||
    long sum = 0;
 | 
			
		||||
    // "Foreach" loop - uses iteration
 | 
			
		||||
    for (Long l : vals) {
 | 
			
		||||
        sum += l;
 | 
			
		||||
    }
 | 
			
		||||
    return sum;
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
In the `sum_for` function, nothing terribly interesting happens. In `sum_foreach`, an object of type
 | 
			
		||||
[`Iterator`](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/Iterator.html)
 | 
			
		||||
is allocated on the heap, and will eventually be garbage-collected. This isn't a great design;
 | 
			
		||||
iterators are often transient objects that you need during a function and can discard once the
 | 
			
		||||
function ends. Sounds exactly like the issue stack-allocated objects address, no?
 | 
			
		||||
 | 
			
		||||
In Rust, iterators are allocated on the stack. The objects to iterate over are almost certainly in
 | 
			
		||||
heap memory, but the iterator itself
 | 
			
		||||
([`Iter`](https://doc.rust-lang.org/std/slice/struct.Iter.html)) doesn't need to use the heap. In
 | 
			
		||||
each of the examples below we iterate over a collection, but never use heap allocation:
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
use std::collections::HashMap;
 | 
			
		||||
// There's a lot of assembly generated, but if you search in the text,
 | 
			
		||||
// there are no references to `real_drop_in_place` anywhere.
 | 
			
		||||
 | 
			
		||||
pub fn sum_vec(x: &Vec<u32>) {
 | 
			
		||||
    let mut s = 0;
 | 
			
		||||
    // Basic iteration over vectors doesn't need allocation
 | 
			
		||||
    for y in x {
 | 
			
		||||
        s += y;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn sum_enumerate(x: &Vec<u32>) {
 | 
			
		||||
    let mut s = 0;
 | 
			
		||||
    // More complex iterators are just fine too
 | 
			
		||||
    for (_i, y) in x.iter().enumerate() {
 | 
			
		||||
        s += y;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn sum_hm(x: &HashMap<u32, u32>) {
 | 
			
		||||
    let mut s = 0;
 | 
			
		||||
    // And it's not just Vec, all types will allocate the iterator
 | 
			
		||||
    // on stack memory
 | 
			
		||||
    for y in x.values() {
 | 
			
		||||
        s += y;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
-- [Compiler Explorer](https://godbolt.org/z/FTT3CT)
 | 
			
		||||
@ -1,254 +0,0 @@
 | 
			
		||||
---
 | 
			
		||||
layout: post
 | 
			
		||||
title: "Dynamic Memory: A Heaping Helping"
 | 
			
		||||
description: "The reason Rust exists."
 | 
			
		||||
category:
 | 
			
		||||
tags: [rust, understanding-allocations]
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
Managing dynamic memory is hard. Some languages assume users will do it themselves (C, C++), and
 | 
			
		||||
some languages go to extreme lengths to protect users from themselves (Java, Python). In Rust, how
 | 
			
		||||
the language uses dynamic memory (also referred to as the **heap**) is a system called _ownership_.
 | 
			
		||||
And as the docs mention, ownership
 | 
			
		||||
[is Rust's most unique feature](https://doc.rust-lang.org/book/ch04-00-understanding-ownership.html).
 | 
			
		||||
 | 
			
		||||
The heap is used in two situations; when the compiler is unable to predict either the _total size of
 | 
			
		||||
memory needed_, or _how long the memory is needed for_, it allocates space in the heap. This happens
 | 
			
		||||
pretty frequently; if you want to download the Google home page, you won't know how large it is
 | 
			
		||||
until your program runs. And when you're finished with Google, we deallocate the memory so it can be
 | 
			
		||||
used to store other webpages. If you're interested in a slightly longer explanation of the heap,
 | 
			
		||||
check out
 | 
			
		||||
[The Stack and the Heap](https://doc.rust-lang.org/book/ch04-01-what-is-ownership.html#the-stack-and-the-heap)
 | 
			
		||||
in Rust's documentation.
 | 
			
		||||
 | 
			
		||||
We won't go into detail on how the heap is managed; the
 | 
			
		||||
[ownership documentation](https://doc.rust-lang.org/book/ch04-01-what-is-ownership.html) does a
 | 
			
		||||
phenomenal job explaining both the "why" and "how" of memory management. Instead, we're going to
 | 
			
		||||
focus on understanding "when" heap allocations occur in Rust.
 | 
			
		||||
 | 
			
		||||
To start off, take a guess for how many allocations happen in the program below:
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
fn main() {}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
It's obviously a trick question; while no heap allocations occur as a result of that code, the setup
 | 
			
		||||
needed to call `main` does allocate on the heap. Here's a way to show it:
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
#![feature(integer_atomics)]
 | 
			
		||||
use std::alloc::{GlobalAlloc, Layout, System};
 | 
			
		||||
use std::sync::atomic::{AtomicU64, Ordering};
 | 
			
		||||
 | 
			
		||||
static ALLOCATION_COUNT: AtomicU64 = AtomicU64::new(0);
 | 
			
		||||
 | 
			
		||||
struct CountingAllocator;
 | 
			
		||||
 | 
			
		||||
unsafe impl GlobalAlloc for CountingAllocator {
 | 
			
		||||
    unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
 | 
			
		||||
        ALLOCATION_COUNT.fetch_add(1, Ordering::SeqCst);
 | 
			
		||||
        System.alloc(layout)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
 | 
			
		||||
        System.dealloc(ptr, layout);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[global_allocator]
 | 
			
		||||
static A: CountingAllocator = CountingAllocator;
 | 
			
		||||
 | 
			
		||||
fn main() {
 | 
			
		||||
    let x = ALLOCATION_COUNT.fetch_add(0, Ordering::SeqCst);
 | 
			
		||||
    println!("There were {} allocations before calling main!", x);
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
--
 | 
			
		||||
[Rust Playground](https://play.rust-lang.org/?version=nightly&mode=debug&edition=2018&gist=fb5060025ba79fc0f906b65a4ef8eb8e)
 | 
			
		||||
 | 
			
		||||
As of the time of writing, there are five allocations that happen before `main` is ever called.
 | 
			
		||||
 | 
			
		||||
But when we want to understand more practically where heap allocation happens, we'll follow this
 | 
			
		||||
guide:
 | 
			
		||||
 | 
			
		||||
- Smart pointers hold their contents in the heap
 | 
			
		||||
- Collections are smart pointers for many objects at a time, and reallocate when they need to grow
 | 
			
		||||
 | 
			
		||||
Finally, there are two "addendum" issues that are important to address when discussing Rust and the
 | 
			
		||||
heap:
 | 
			
		||||
 | 
			
		||||
- Non-heap alternatives to many standard library types are available.
 | 
			
		||||
- Special allocators to track memory behavior should be used to benchmark code.
 | 
			
		||||
 | 
			
		||||
# Smart pointers
 | 
			
		||||
 | 
			
		||||
The first thing to note are the "smart pointer" types. When you have data that must outlive the
 | 
			
		||||
scope in which it is declared, or your data is of unknown or dynamic size, you'll make use of these
 | 
			
		||||
types.
 | 
			
		||||
 | 
			
		||||
The term [smart pointer](https://en.wikipedia.org/wiki/Smart_pointer) comes from C++, and while it's
 | 
			
		||||
closely linked to a general design pattern of
 | 
			
		||||
["Resource Acquisition Is Initialization"](https://en.cppreference.com/w/cpp/language/raii), we'll
 | 
			
		||||
use it here specifically to describe objects that are responsible for managing ownership of data
 | 
			
		||||
allocated on the heap. The smart pointers available in the `alloc` crate should look mostly
 | 
			
		||||
familiar:
 | 
			
		||||
 | 
			
		||||
- [`Box`](https://doc.rust-lang.org/alloc/boxed/struct.Box.html)
 | 
			
		||||
- [`Rc`](https://doc.rust-lang.org/alloc/rc/struct.Rc.html)
 | 
			
		||||
- [`Arc`](https://doc.rust-lang.org/alloc/sync/struct.Arc.html)
 | 
			
		||||
- [`Cow`](https://doc.rust-lang.org/alloc/borrow/enum.Cow.html)
 | 
			
		||||
 | 
			
		||||
The [standard library](https://doc.rust-lang.org/std/) also defines some smart pointers to manage
 | 
			
		||||
heap objects, though more than can be covered here. Some examples are:
 | 
			
		||||
 | 
			
		||||
- [`RwLock`](https://doc.rust-lang.org/std/sync/struct.RwLock.html)
 | 
			
		||||
- [`Mutex`](https://doc.rust-lang.org/std/sync/struct.Mutex.html)
 | 
			
		||||
 | 
			
		||||
Finally, there is one ["gotcha"](https://www.merriam-webster.com/dictionary/gotcha): **cell types**
 | 
			
		||||
(like [`RefCell`](https://doc.rust-lang.org/stable/core/cell/struct.RefCell.html)) look and behave
 | 
			
		||||
similarly, but **don't involve heap allocation**. The
 | 
			
		||||
[`core::cell` docs](https://doc.rust-lang.org/stable/core/cell/index.html) have more information.
 | 
			
		||||
 | 
			
		||||
When a smart pointer is created, the data it is given is placed in heap memory and the location of
 | 
			
		||||
that data is recorded in the smart pointer. Once the smart pointer has determined it's safe to
 | 
			
		||||
deallocate that memory (when a `Box` has
 | 
			
		||||
[gone out of scope](https://doc.rust-lang.org/stable/std/boxed/index.html) or a reference count
 | 
			
		||||
[goes to zero](https://doc.rust-lang.org/alloc/rc/index.html)), the heap space is reclaimed. We can
 | 
			
		||||
prove these types use heap memory by looking at code:
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
use std::rc::Rc;
 | 
			
		||||
use std::sync::Arc;
 | 
			
		||||
use std::borrow::Cow;
 | 
			
		||||
 | 
			
		||||
pub fn my_box() {
 | 
			
		||||
    // Drop at assembly line 1640
 | 
			
		||||
    Box::new(0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn my_rc() {
 | 
			
		||||
    // Drop at assembly line 1650
 | 
			
		||||
    Rc::new(0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn my_arc() {
 | 
			
		||||
    // Drop at assembly line 1660
 | 
			
		||||
    Arc::new(0);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn my_cow() {
 | 
			
		||||
    // Drop at assembly line 1672
 | 
			
		||||
    Cow::from("drop");
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
-- [Compiler Explorer](https://godbolt.org/z/4AMQug)
 | 
			
		||||
 | 
			
		||||
# Collections
 | 
			
		||||
 | 
			
		||||
Collection types use heap memory because their contents have dynamic size; they will request more
 | 
			
		||||
memory [when needed](https://doc.rust-lang.org/std/vec/struct.Vec.html#method.reserve), and can
 | 
			
		||||
[release memory](https://doc.rust-lang.org/std/vec/struct.Vec.html#method.shrink_to_fit) when it's
 | 
			
		||||
no longer necessary. This dynamic property forces Rust to heap allocate everything they contain. In
 | 
			
		||||
a way, **collections are smart pointers for many objects at a time**. Common types that fall under
 | 
			
		||||
this umbrella are [`Vec`](https://doc.rust-lang.org/stable/alloc/vec/struct.Vec.html),
 | 
			
		||||
[`HashMap`](https://doc.rust-lang.org/stable/std/collections/struct.HashMap.html), and
 | 
			
		||||
[`String`](https://doc.rust-lang.org/stable/alloc/string/struct.String.html) (not
 | 
			
		||||
[`str`](https://doc.rust-lang.org/std/primitive.str.html)).
 | 
			
		||||
 | 
			
		||||
While collections store the objects they own in heap memory, _creating new collections will not
 | 
			
		||||
allocate on the heap_. This is a bit weird; if we call `Vec::new()`, the assembly shows a
 | 
			
		||||
corresponding call to `real_drop_in_place`:
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
pub fn my_vec() {
 | 
			
		||||
    // Drop in place at line 481
 | 
			
		||||
    Vec::<u8>::new();
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
-- [Compiler Explorer](https://godbolt.org/z/1WkNtC)
 | 
			
		||||
 | 
			
		||||
But because the vector has no elements to manage, no calls to the allocator will ever be dispatched:
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
use std::alloc::{GlobalAlloc, Layout, System};
 | 
			
		||||
use std::sync::atomic::{AtomicBool, Ordering};
 | 
			
		||||
 | 
			
		||||
fn main() {
 | 
			
		||||
    // Turn on panicking if we allocate on the heap
 | 
			
		||||
    DO_PANIC.store(true, Ordering::SeqCst);
 | 
			
		||||
 | 
			
		||||
    // Interesting bit happens here
 | 
			
		||||
    let x: Vec<u8> = Vec::new();
 | 
			
		||||
    drop(x);
 | 
			
		||||
 | 
			
		||||
    // Turn panicking back off, some deallocations occur
 | 
			
		||||
    // after main as well.
 | 
			
		||||
    DO_PANIC.store(false, Ordering::SeqCst);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[global_allocator]
 | 
			
		||||
static A: PanicAllocator = PanicAllocator;
 | 
			
		||||
static DO_PANIC: AtomicBool = AtomicBool::new(false);
 | 
			
		||||
struct PanicAllocator;
 | 
			
		||||
 | 
			
		||||
unsafe impl GlobalAlloc for PanicAllocator {
 | 
			
		||||
    unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
 | 
			
		||||
        if DO_PANIC.load(Ordering::SeqCst) {
 | 
			
		||||
            panic!("Unexpected allocation.");
 | 
			
		||||
        }
 | 
			
		||||
        System.alloc(layout)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
 | 
			
		||||
        if DO_PANIC.load(Ordering::SeqCst) {
 | 
			
		||||
            panic!("Unexpected deallocation.");
 | 
			
		||||
        }
 | 
			
		||||
        System.dealloc(ptr, layout);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
--
 | 
			
		||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=831a297d176d015b1f9ace01ae416cc6)
 | 
			
		||||
 | 
			
		||||
Other standard library types follow the same behavior; make sure to check out
 | 
			
		||||
[`HashMap::new()`](https://doc.rust-lang.org/std/collections/hash_map/struct.HashMap.html#method.new),
 | 
			
		||||
and [`String::new()`](https://doc.rust-lang.org/std/string/struct.String.html#method.new).
 | 
			
		||||
 | 
			
		||||
# Heap Alternatives
 | 
			
		||||
 | 
			
		||||
While it is a bit strange to speak of the stack after spending time with the heap, it's worth
 | 
			
		||||
pointing out that some heap-allocated objects in Rust have stack-based counterparts provided by
 | 
			
		||||
other crates. If you have need of the functionality, but want to avoid allocating, there are
 | 
			
		||||
typically alternatives available.
 | 
			
		||||
 | 
			
		||||
When it comes to some standard library smart pointers
 | 
			
		||||
([`RwLock`](https://doc.rust-lang.org/std/sync/struct.RwLock.html) and
 | 
			
		||||
[`Mutex`](https://doc.rust-lang.org/std/sync/struct.Mutex.html)), stack-based alternatives are
 | 
			
		||||
provided in crates like [parking_lot](https://crates.io/crates/parking_lot) and
 | 
			
		||||
[spin](https://crates.io/crates/spin). You can check out
 | 
			
		||||
[`lock_api::RwLock`](https://docs.rs/lock_api/0.1.5/lock_api/struct.RwLock.html),
 | 
			
		||||
[`lock_api::Mutex`](https://docs.rs/lock_api/0.1.5/lock_api/struct.Mutex.html), and
 | 
			
		||||
[`spin::Once`](https://mvdnes.github.io/rust-docs/spin-rs/spin/struct.Once.html) if you're in need
 | 
			
		||||
of synchronization primitives.
 | 
			
		||||
 | 
			
		||||
[thread_id](https://crates.io/crates/thread-id) may be necessary if you're implementing an allocator
 | 
			
		||||
because [`thread::current().id()`](https://doc.rust-lang.org/std/thread/struct.ThreadId.html) uses a
 | 
			
		||||
[`thread_local!` structure](https://doc.rust-lang.org/stable/src/std/sys_common/thread_info.rs.html#17-36)
 | 
			
		||||
that needs heap allocation.
 | 
			
		||||
 | 
			
		||||
# Tracing Allocators
 | 
			
		||||
 | 
			
		||||
When writing performance-sensitive code, there's no alternative to measuring your code. If you
 | 
			
		||||
didn't write a benchmark,
 | 
			
		||||
[you don't care about it's performance](https://www.youtube.com/watch?v=2EWejmkKlxs&feature=youtu.be&t=263)
 | 
			
		||||
You should never rely on your instincts when
 | 
			
		||||
[a microsecond is an eternity](https://www.youtube.com/watch?v=NH1Tta7purM).
 | 
			
		||||
 | 
			
		||||
Similarly, there's great work going on in Rust with allocators that keep track of what they're doing
 | 
			
		||||
(like [`alloc_counter`](https://crates.io/crates/alloc_counter)). When it comes to tracking heap
 | 
			
		||||
behavior, it's easy to make mistakes; please write tests and make sure you have tools to guard
 | 
			
		||||
against future issues.
 | 
			
		||||
@ -1,148 +0,0 @@
 | 
			
		||||
---
 | 
			
		||||
layout: post
 | 
			
		||||
title: "Compiler Optimizations: What It's Done Lately"
 | 
			
		||||
description: "A lot. The answer is a lot."
 | 
			
		||||
category:
 | 
			
		||||
tags: [rust, understanding-allocations]
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
**Update 2019-02-10**: When debugging a
 | 
			
		||||
[related issue](https://gitlab.com/sio4/code/alloc-counter/issues/1), it was discovered that the
 | 
			
		||||
original code worked because LLVM optimized out the entire function, rather than just the allocation
 | 
			
		||||
segments. The code has been updated with proper use of
 | 
			
		||||
[`read_volatile`](https://doc.rust-lang.org/std/ptr/fn.read_volatile.html), and a previous section
 | 
			
		||||
on vector capacity has been removed.
 | 
			
		||||
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
Up to this point, we've been discussing memory usage in the Rust language by focusing on simple
 | 
			
		||||
rules that are mostly right for small chunks of code. We've spent time showing how those rules work
 | 
			
		||||
themselves out in practice, and become familiar with reading the assembly code needed to see each
 | 
			
		||||
memory type (global, stack, heap) in action.
 | 
			
		||||
 | 
			
		||||
Throughout the series so far, we've put a handicap on the code. In the name of consistent and
 | 
			
		||||
understandable results, we've asked the compiler to pretty please leave the training wheels on. Now
 | 
			
		||||
is the time where we throw out all the rules and take off the kid gloves. As it turns out, both the
 | 
			
		||||
Rust compiler and the LLVM optimizers are incredibly sophisticated, and we'll step back and let them
 | 
			
		||||
do their job.
 | 
			
		||||
 | 
			
		||||
Similar to
 | 
			
		||||
["What Has My Compiler Done For Me Lately?"](https://www.youtube.com/watch?v=bSkpMdDe4g4), we're
 | 
			
		||||
focusing on interesting things the Rust language (and LLVM!) can do with memory management. We'll
 | 
			
		||||
still be looking at assembly code to understand what's going on, but it's important to mention
 | 
			
		||||
again: **please use automated tools like [alloc-counter](https://crates.io/crates/alloc_counter) to
 | 
			
		||||
double-check memory behavior if it's something you care about**. It's far too easy to mis-read
 | 
			
		||||
assembly in large code sections, you should always verify behavior if you care about memory usage.
 | 
			
		||||
 | 
			
		||||
The guiding principal as we move forward is this: _optimizing compilers won't produce worse programs
 | 
			
		||||
than we started with._ There won't be any situations where stack allocations get moved to heap
 | 
			
		||||
allocations. There will, however, be an opera of optimization.
 | 
			
		||||
 | 
			
		||||
# The Case of the Disappearing Box
 | 
			
		||||
 | 
			
		||||
Our first optimization comes when LLVM can reason that the lifetime of an object is sufficiently
 | 
			
		||||
short that heap allocations aren't necessary. In these cases, LLVM will move the allocation to the
 | 
			
		||||
stack instead! The way this interacts with `#[inline]` attributes is a bit opaque, but the important
 | 
			
		||||
part is that LLVM can sometimes do better than the baseline Rust language:
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
use std::alloc::{GlobalAlloc, Layout, System};
 | 
			
		||||
use std::sync::atomic::{AtomicBool, Ordering};
 | 
			
		||||
 | 
			
		||||
pub fn cmp(x: u32) {
 | 
			
		||||
    // Turn on panicking if we allocate on the heap
 | 
			
		||||
    DO_PANIC.store(true, Ordering::SeqCst);
 | 
			
		||||
 | 
			
		||||
    // The compiler is able to see through the constant `Box`
 | 
			
		||||
    // and directly compare `x` to 24 - assembly line 73
 | 
			
		||||
    let y = Box::new(24);
 | 
			
		||||
    let equals = x == *y;
 | 
			
		||||
 | 
			
		||||
    // This call to drop is eliminated
 | 
			
		||||
    drop(y);
 | 
			
		||||
 | 
			
		||||
    // Need to mark the comparison result as volatile so that
 | 
			
		||||
    // LLVM doesn't strip out all the code. If `y` is marked
 | 
			
		||||
    // volatile instead, allocation will be forced.
 | 
			
		||||
    unsafe { std::ptr::read_volatile(&equals) };
 | 
			
		||||
 | 
			
		||||
    // Turn off panicking, as there are some deallocations
 | 
			
		||||
    // when we exit main.
 | 
			
		||||
    DO_PANIC.store(false, Ordering::SeqCst);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn main() {
 | 
			
		||||
    cmp(12)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[global_allocator]
 | 
			
		||||
static A: PanicAllocator = PanicAllocator;
 | 
			
		||||
static DO_PANIC: AtomicBool = AtomicBool::new(false);
 | 
			
		||||
struct PanicAllocator;
 | 
			
		||||
 | 
			
		||||
unsafe impl GlobalAlloc for PanicAllocator {
 | 
			
		||||
    unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
 | 
			
		||||
        if DO_PANIC.load(Ordering::SeqCst) {
 | 
			
		||||
            panic!("Unexpected allocation.");
 | 
			
		||||
        }
 | 
			
		||||
        System.alloc(layout)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
 | 
			
		||||
        if DO_PANIC.load(Ordering::SeqCst) {
 | 
			
		||||
            panic!("Unexpected deallocation.");
 | 
			
		||||
        }
 | 
			
		||||
        System.dealloc(ptr, layout);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
## -- [Compiler Explorer](https://godbolt.org/z/BZ_Yp3)
 | 
			
		||||
 | 
			
		||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=release&edition=2018&gist=4a765f753183d5b919f62c71d2109d5d)
 | 
			
		||||
 | 
			
		||||
# Dr. Array or: How I Learned to Love the Optimizer
 | 
			
		||||
 | 
			
		||||
Finally, this isn't so much about LLVM figuring out different memory behavior, but LLVM stripping
 | 
			
		||||
out code that doesn't do anything. Optimizations of this type have a lot of nuance to them; if
 | 
			
		||||
you're not careful, they can make your benchmarks look
 | 
			
		||||
[impossibly good](https://www.youtube.com/watch?v=nXaxk27zwlk&feature=youtu.be&t=1199). In Rust, the
 | 
			
		||||
`black_box` function (implemented in both
 | 
			
		||||
[`libtest`](https://doc.rust-lang.org/1.1.0/test/fn.black_box.html) and
 | 
			
		||||
[`criterion`](https://docs.rs/criterion/0.2.10/criterion/fn.black_box.html)) will tell the compiler
 | 
			
		||||
to disable this kind of optimization. But if you let LLVM remove unnecessary code, you can end up
 | 
			
		||||
running programs that previously caused errors:
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
#[derive(Default)]
 | 
			
		||||
struct TwoFiftySix {
 | 
			
		||||
    _a: [u64; 32]
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[derive(Default)]
 | 
			
		||||
struct EightK {
 | 
			
		||||
    _a: [TwoFiftySix; 32]
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[derive(Default)]
 | 
			
		||||
struct TwoFiftySixK {
 | 
			
		||||
    _a: [EightK; 32]
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[derive(Default)]
 | 
			
		||||
struct EightM {
 | 
			
		||||
    _a: [TwoFiftySixK; 32]
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pub fn main() {
 | 
			
		||||
    // Normally this blows up because we can't reserve size on stack
 | 
			
		||||
    // for the `EightM` struct. But because the compiler notices we
 | 
			
		||||
    // never do anything with `_x`, it optimizes out the stack storage
 | 
			
		||||
    // and the program completes successfully.
 | 
			
		||||
    let _x = EightM::default();
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
## -- [Compiler Explorer](https://godbolt.org/z/daHn7P)
 | 
			
		||||
 | 
			
		||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=release&edition=2018&gist=4c253bf26072119896ab93c6ef064dc0)
 | 
			
		||||
@ -1,35 +0,0 @@
 | 
			
		||||
---
 | 
			
		||||
layout: post
 | 
			
		||||
title: "Summary: What are the Allocation Rules?"
 | 
			
		||||
description: "A synopsis and reference."
 | 
			
		||||
category:
 | 
			
		||||
tags: [rust, understanding-allocations]
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
While there's a lot of interesting detail captured in this series, it's often helpful to have a
 | 
			
		||||
document that answers some "yes/no" questions. You may not care about what an `Iterator` looks like
 | 
			
		||||
in assembly, you just need to know whether it allocates an object on the heap or not. And while Rust
 | 
			
		||||
will prioritize the fastest behavior it can, here are the rules for each memory type:
 | 
			
		||||
 | 
			
		||||
**Heap Allocation**:
 | 
			
		||||
 | 
			
		||||
- Smart pointers (`Box`, `Rc`, `Mutex`, etc.) allocate their contents in heap memory.
 | 
			
		||||
- Collections (`HashMap`, `Vec`, `String`, etc.) allocate their contents in heap memory.
 | 
			
		||||
- Some smart pointers in the standard library have counterparts in other crates that don't need heap
 | 
			
		||||
  memory. If possible, use those.
 | 
			
		||||
 | 
			
		||||
**Stack Allocation**:
 | 
			
		||||
 | 
			
		||||
- Everything not using a smart pointer will be allocated on the stack.
 | 
			
		||||
- Structs, enums, iterators, arrays, and closures are all stack allocated.
 | 
			
		||||
- Cell types (`RefCell`) behave like smart pointers, but are stack-allocated.
 | 
			
		||||
- Inlining (`#[inline]`) will not affect allocation behavior for better or worse.
 | 
			
		||||
- Types that are marked `Copy` are guaranteed to have their contents stack-allocated.
 | 
			
		||||
 | 
			
		||||
**Global Allocation**:
 | 
			
		||||
 | 
			
		||||
- `const` is a fixed value; the compiler is allowed to copy it wherever useful.
 | 
			
		||||
- `static` is a fixed reference; the compiler will guarantee it is unique.
 | 
			
		||||
 | 
			
		||||
 --
 | 
			
		||||
[Raph Levien](https://docs.google.com/presentation/d/1q-c7UAyrUlM-eZyTo1pd8SZ0qwA_wYxmPZVOQkoDmH4/edit?usp=sharing)
 | 
			
		||||
@ -1,52 +0,0 @@
 | 
			
		||||
---
 | 
			
		||||
layout: post
 | 
			
		||||
title: "Making Bread"
 | 
			
		||||
description: "...because I've got some free time now. 🍞"
 | 
			
		||||
category:
 | 
			
		||||
tags: [baking]
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
Having recently started my "gardening leave" between positions, I have some more personal time
 | 
			
		||||
available. I'm planning to stay productive, contributing to some open-source projects, but it also
 | 
			
		||||
occurred to me that despite [talking about](https://speice.io/2018/05/hello.html) bread pics, this
 | 
			
		||||
blog has been purely technical. Maybe I'll change the site title from "The Old Speice Guy" to "Bites
 | 
			
		||||
and Bytes"?
 | 
			
		||||
 | 
			
		||||
Either way, I'm baking a little bit again, and figured it was worth taking a quick break to focus on
 | 
			
		||||
some lighter material. I recently learned two critically important lessons: first, the temperature
 | 
			
		||||
of the dough when you put the yeast in makes a huge difference.
 | 
			
		||||
 | 
			
		||||
Previously, when I wasn't paying attention to dough temperature:
 | 
			
		||||
 | 
			
		||||

 | 
			
		||||
 | 
			
		||||
Compared with what happens when I put the dough in the microwave for a defrost cycle because the
 | 
			
		||||
water I used wasn't warm enough:
 | 
			
		||||
 | 
			
		||||

 | 
			
		||||
 | 
			
		||||
I mean, just look at the bubbles!
 | 
			
		||||
 | 
			
		||||

 | 
			
		||||
 | 
			
		||||
After shaping the dough, I've got two loaves ready:
 | 
			
		||||
 | 
			
		||||

 | 
			
		||||
 | 
			
		||||
Now, the recipe normally calls for a Dutch Oven to bake the bread because it keeps the dough from
 | 
			
		||||
drying out in the oven. Because I don't own a Dutch Oven, I typically put a casserole dish on the
 | 
			
		||||
bottom rack and fill it with water so there's still some moisture in the oven. This time, I forgot
 | 
			
		||||
to add the water and learned my second lesson: never add room-temperature water to a glass dish
 | 
			
		||||
that's currently at 500 degrees.
 | 
			
		||||
 | 
			
		||||

 | 
			
		||||
 | 
			
		||||
Needless to say, trying to pull out sharp glass from an incredibly hot oven is not what I expected
 | 
			
		||||
to be doing during my garden leave.
 | 
			
		||||
 | 
			
		||||
In the end, the bread crust wasn't great, but the bread itself turned out pretty alright:
 | 
			
		||||
 | 
			
		||||

 | 
			
		||||
 | 
			
		||||
I've been writing a lot more during this break, so I'm looking forward to sharing that in the
 | 
			
		||||
future. In the mean-time, I'm planning on making a sandwich.
 | 
			
		||||
@ -1,296 +0,0 @@
 | 
			
		||||
---
 | 
			
		||||
layout: post
 | 
			
		||||
title: "On Building High Performance Systems"
 | 
			
		||||
description: ""
 | 
			
		||||
category:
 | 
			
		||||
tags: []
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
**Update 2019-09-21**: Added notes on `isolcpus` and `systemd` affinity.
 | 
			
		||||
 | 
			
		||||
Prior to working in the trading industry, my assumption was that High Frequency Trading (HFT) is
 | 
			
		||||
made up of people who have access to secret techniques mortal developers could only dream of. There
 | 
			
		||||
had to be some secret art that could only be learned if one had an appropriately tragic backstory:
 | 
			
		||||
 | 
			
		||||
<img src="/assets/images/2019-04-24-kung-fu.webp" alt="kung-fu fight">
 | 
			
		||||
> How I assumed HFT people learn their secret techniques
 | 
			
		||||
 | 
			
		||||
How else do you explain people working on systems that complete the round trip of market data in to
 | 
			
		||||
orders out (a.k.a. tick-to-trade) consistently within
 | 
			
		||||
[750-800 nanoseconds](https://stackoverflow.com/a/22082528/1454178)? In roughly the time it takes a
 | 
			
		||||
computer to access
 | 
			
		||||
[main memory 8 times](https://people.eecs.berkeley.edu/~rcs/research/interactive_latency.html),
 | 
			
		||||
trading systems are capable of reading the market data packets, deciding what orders to send, doing
 | 
			
		||||
risk checks, creating new packets for exchange-specific protocols, and putting those packets on the
 | 
			
		||||
wire.
 | 
			
		||||
 | 
			
		||||
Having now worked in the trading industry, I can confirm the developers aren't super-human; I've
 | 
			
		||||
made some simple mistakes at the very least. Instead, what shows up in public discussions is that
 | 
			
		||||
philosophy, not technique, separates high-performance systems from everything else.
 | 
			
		||||
Performance-critical systems don't rely on "this one cool C++ optimization trick" to make code fast
 | 
			
		||||
(though micro-optimizations have their place); there's a lot more to worry about than just the code
 | 
			
		||||
written for the project.
 | 
			
		||||
 | 
			
		||||
The framework I'd propose is this: **If you want to build high-performance systems, focus first on
 | 
			
		||||
reducing performance variance** (reducing the gap between the fastest and slowest runs of the same
 | 
			
		||||
code), **and only look at average latency once variance is at an acceptable level**.
 | 
			
		||||
 | 
			
		||||
Don't get me wrong, I'm a much happier person when things are fast. Computer goes from booting in 20
 | 
			
		||||
seconds down to 10 because I installed a solid-state drive? Awesome. But if every fifth day it takes
 | 
			
		||||
a full minute to boot because of corrupted sectors? Not so great. Average speed over the course of a
 | 
			
		||||
week is the same in each situation, but you're painfully aware of that minute when it happens. When
 | 
			
		||||
it comes to code, the principal is the same: speeding up a function by an average of 10 milliseconds
 | 
			
		||||
doesn't mean much if there's a 100ms difference between your fastest and slowest runs. When
 | 
			
		||||
performance matters, you need to respond quickly _every time_, not just in aggregate.
 | 
			
		||||
High-performance systems should first optimize for time variance. Once you're consistent at the time
 | 
			
		||||
scale you care about, then focus on improving average time.
 | 
			
		||||
 | 
			
		||||
This focus on variance shows up all the time in industry too (emphasis added in all quotes below):
 | 
			
		||||
 | 
			
		||||
- In [marketing materials](https://business.nasdaq.com/market-tech/marketplaces/trading) for
 | 
			
		||||
  NASDAQ's matching engine, the most performance-sensitive component of the exchange, dependability
 | 
			
		||||
  is highlighted in addition to instantaneous metrics:
 | 
			
		||||
 | 
			
		||||
  > Able to **consistently sustain** an order rate of over 100,000 orders per second at sub-40
 | 
			
		||||
  > microsecond average latency
 | 
			
		||||
 | 
			
		||||
- The [Aeron](https://github.com/real-logic/aeron) message bus has this to say about performance:
 | 
			
		||||
 | 
			
		||||
  > Performance is the key focus. Aeron is designed to be the highest throughput with the lowest and
 | 
			
		||||
  > **most predictable latency possible** of any messaging system
 | 
			
		||||
 | 
			
		||||
- The company PolySync, which is working on autonomous vehicles,
 | 
			
		||||
  [mentions why](https://polysync.io/blog/session-types-for-hearty-codecs/) they picked their
 | 
			
		||||
  specific messaging format:
 | 
			
		||||
 | 
			
		||||
  > In general, high performance is almost always desirable for serialization. But in the world of
 | 
			
		||||
  > autonomous vehicles, **steady timing performance is even more important** than peak throughput.
 | 
			
		||||
  > This is because safe operation is sensitive to timing outliers. Nobody wants the system that
 | 
			
		||||
  > decides when to slam on the brakes to occasionally take 100 times longer than usual to encode
 | 
			
		||||
  > its commands.
 | 
			
		||||
 | 
			
		||||
- [Solarflare](https://solarflare.com/), which makes highly-specialized network hardware, points out
 | 
			
		||||
  variance (jitter) as a big concern for
 | 
			
		||||
  [electronic trading](https://solarflare.com/electronic-trading/):
 | 
			
		||||
  > The high stakes world of electronic trading, investment banks, market makers, hedge funds and
 | 
			
		||||
  > exchanges demand the **lowest possible latency and jitter** while utilizing the highest
 | 
			
		||||
  > bandwidth and return on their investment.
 | 
			
		||||
 | 
			
		||||
And to further clarify: we're not discussing _total run-time_, but variance of total run-time. There
 | 
			
		||||
are situations where it's not reasonably possible to make things faster, and you'd much rather be
 | 
			
		||||
consistent. For example, trading firms use
 | 
			
		||||
[wireless networks](https://sniperinmahwah.wordpress.com/2017/06/07/network-effects-part-i/) because
 | 
			
		||||
the speed of light through air is faster than through fiber-optic cables. There's still at _absolute
 | 
			
		||||
minimum_ a [~33.76 millisecond](http://tinyurl.com/y2vd7tn8) delay required to send data between,
 | 
			
		||||
say,
 | 
			
		||||
[Chicago and Tokyo](https://www.theice.com/market-data/connectivity-and-feeds/wireless/tokyo-chicago).
 | 
			
		||||
If a trading system in Chicago calls the function for "send order to Tokyo" and waits to see if a
 | 
			
		||||
trade occurs, there's a physical limit to how long that will take. In this situation, the focus is
 | 
			
		||||
on keeping variance of _additional processing_ to a minimum, since speed of light is the limiting
 | 
			
		||||
factor.
 | 
			
		||||
 | 
			
		||||
So how does one go about looking for and eliminating performance variance? To tell the truth, I
 | 
			
		||||
don't think a systematic answer or flow-chart exists. There's no substitute for (A) building a deep
 | 
			
		||||
understanding of the entire technology stack, and (B) actually measuring system performance (though
 | 
			
		||||
(C) watching a lot of [CppCon](https://www.youtube.com/channel/UCMlGfpWw-RUdWX_JbLCukXg) videos for
 | 
			
		||||
inspiration never hurt). Even then, every project cares about performance to a different degree; you
 | 
			
		||||
may need to build an entire
 | 
			
		||||
[replica production system](https://www.youtube.com/watch?v=NH1Tta7purM&feature=youtu.be&t=3015) to
 | 
			
		||||
accurately benchmark at nanosecond precision, or you may be content to simply
 | 
			
		||||
[avoid garbage collection](https://www.youtube.com/watch?v=BD9cRbxWQx8&feature=youtu.be&t=1335) in
 | 
			
		||||
your Java code.
 | 
			
		||||
 | 
			
		||||
Even though everyone has different needs, there are still common things to look for when trying to
 | 
			
		||||
isolate and eliminate variance. In no particular order, these are my focus areas when thinking about
 | 
			
		||||
high-performance systems:
 | 
			
		||||
 | 
			
		||||
## Language-specific
 | 
			
		||||
 | 
			
		||||
**Garbage Collection**: How often does garbage collection happen? When is it triggered? What are the
 | 
			
		||||
impacts?
 | 
			
		||||
 | 
			
		||||
- [In Python](https://rushter.com/blog/python-garbage-collector/), individual objects are collected
 | 
			
		||||
  if the reference count reaches 0, and each generation is collected if
 | 
			
		||||
  `num_alloc - num_dealloc > gc_threshold` whenever an allocation happens. The GIL is acquired for
 | 
			
		||||
  the duration of generational collection.
 | 
			
		||||
- Java has
 | 
			
		||||
  [many](https://docs.oracle.com/en/java/javase/12/gctuning/parallel-collector1.html#GUID-DCDD6E46-0406-41D1-AB49-FB96A50EB9CE)
 | 
			
		||||
  [different](https://docs.oracle.com/en/java/javase/12/gctuning/garbage-first-garbage-collector.html#GUID-ED3AB6D3-FD9B-4447-9EDF-983ED2F7A573)
 | 
			
		||||
  [collection](https://docs.oracle.com/en/java/javase/12/gctuning/garbage-first-garbage-collector-tuning.html#GUID-90E30ACA-8040-432E-B3A0-1E0440AB556A)
 | 
			
		||||
  [algorithms](https://docs.oracle.com/en/java/javase/12/gctuning/z-garbage-collector1.html#GUID-A5A42691-095E-47BA-B6DC-FB4E5FAA43D0)
 | 
			
		||||
  to choose from, each with different characteristics. The default algorithms (Parallel GC in Java
 | 
			
		||||
  8, G1 in Java 9) freeze the JVM while collecting, while more recent algorithms
 | 
			
		||||
  ([ZGC](https://wiki.openjdk.java.net/display/zgc) and
 | 
			
		||||
  [Shenandoah](https://wiki.openjdk.java.net/display/shenandoah)) are designed to keep "stop the
 | 
			
		||||
  world" to a minimum by doing collection work in parallel.
 | 
			
		||||
 | 
			
		||||
**Allocation**: Every language has a different way of interacting with "heap" memory, but the
 | 
			
		||||
principle is the same: running the allocator to allocate/deallocate memory takes time that can often
 | 
			
		||||
be put to better use. Understanding when your language interacts with the allocator is crucial, and
 | 
			
		||||
not always obvious. For example: C++ and Rust don't allocate heap memory for iterators, but Java
 | 
			
		||||
does (meaning potential GC pauses). Take time to understand heap behavior (I made a
 | 
			
		||||
[a guide for Rust](/2019/02/understanding-allocations-in-rust.html)), and look into alternative
 | 
			
		||||
allocators ([jemalloc](http://jemalloc.net/),
 | 
			
		||||
[tcmalloc](https://gperftools.github.io/gperftools/tcmalloc.html)) that might run faster than the
 | 
			
		||||
operating system default.
 | 
			
		||||
 | 
			
		||||
**Data Layout**: How your data is arranged in memory matters;
 | 
			
		||||
[data-oriented design](https://www.youtube.com/watch?v=yy8jQgmhbAU) and
 | 
			
		||||
[cache locality](https://www.youtube.com/watch?v=2EWejmkKlxs&feature=youtu.be&t=1185) can have huge
 | 
			
		||||
impacts on performance. The C family of languages (C, value types in C#, C++) and Rust all have
 | 
			
		||||
guarantees about the shape every object takes in memory that others (e.g. Java and Python) can't
 | 
			
		||||
make. [Cachegrind](http://valgrind.org/docs/manual/cg-manual.html) and kernel
 | 
			
		||||
[perf](https://perf.wiki.kernel.org/index.php/Main_Page) counters are both great for understanding
 | 
			
		||||
how performance relates to memory layout.
 | 
			
		||||
 | 
			
		||||
**Just-In-Time Compilation**: Languages that are compiled on the fly (LuaJIT, C#, Java, PyPy) are
 | 
			
		||||
great because they optimize your program for how it's actually being used, rather than how a
 | 
			
		||||
compiler expects it to be used. However, there's a variance problem if the program stops executing
 | 
			
		||||
while waiting for translation from VM bytecode to native code. As a remedy, many languages support
 | 
			
		||||
ahead-of-time compilation in addition to the JIT versions
 | 
			
		||||
([CoreRT](https://github.com/dotnet/corert) in C# and [GraalVM](https://www.graalvm.org/) in Java).
 | 
			
		||||
On the other hand, LLVM supports
 | 
			
		||||
[Profile Guided Optimization](https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization),
 | 
			
		||||
which theoretically brings JIT benefits to non-JIT languages. Finally, be careful to avoid comparing
 | 
			
		||||
apples and oranges during benchmarks; you don't want your code to suddenly speed up because the JIT
 | 
			
		||||
compiler kicked in.
 | 
			
		||||
 | 
			
		||||
**Programming Tricks**: These won't make or break performance, but can be useful in specific
 | 
			
		||||
circumstances. For example, C++ can use
 | 
			
		||||
[templates instead of branches](https://www.youtube.com/watch?v=NH1Tta7purM&feature=youtu.be&t=1206)
 | 
			
		||||
in critical sections.
 | 
			
		||||
 | 
			
		||||
## Kernel
 | 
			
		||||
 | 
			
		||||
Code you wrote is almost certainly not the _only_ code running on your hardware. There are many ways
 | 
			
		||||
the operating system interacts with your program, from interrupts to system calls, that are
 | 
			
		||||
important to watch for. These are written from a Linux perspective, but Windows does typically have
 | 
			
		||||
equivalent functionality.
 | 
			
		||||
 | 
			
		||||
**Scheduling**: The kernel is normally free to schedule any process on any core, so it's important
 | 
			
		||||
to reserve CPU cores exclusively for the important programs. There are a few parts to this: first,
 | 
			
		||||
limit the CPU cores that non-critical processes are allowed to run on by excluding cores from
 | 
			
		||||
scheduling
 | 
			
		||||
([`isolcpus`](https://www.linuxtopia.org/online_books/linux_kernel/kernel_configuration/re46.html)
 | 
			
		||||
kernel command-line option), or by setting the `init` process CPU affinity
 | 
			
		||||
([`systemd` example](https://access.redhat.com/solutions/2884991)). Second, set critical processes
 | 
			
		||||
to run on the isolated cores by setting the
 | 
			
		||||
[processor affinity](https://en.wikipedia.org/wiki/Processor_affinity) using
 | 
			
		||||
[taskset](https://linux.die.net/man/1/taskset). Finally, use
 | 
			
		||||
[`NO_HZ`](https://github.com/torvalds/linux/blob/master/Documentation/timers/NO_HZ.txt) or
 | 
			
		||||
[`chrt`](https://linux.die.net/man/1/chrt) to disable scheduling interrupts. Turning off
 | 
			
		||||
hyper-threading is also likely beneficial.
 | 
			
		||||
 | 
			
		||||
**System calls**: Reading from a UNIX socket? Writing to a file? In addition to not knowing how long
 | 
			
		||||
the I/O operation takes, these all trigger expensive
 | 
			
		||||
[system calls (syscalls)](https://en.wikipedia.org/wiki/System_call). To handle these, the CPU must
 | 
			
		||||
[context switch](https://en.wikipedia.org/wiki/Context_switch) to the kernel, let the kernel
 | 
			
		||||
operation complete, then context switch back to your program. We'd rather keep these
 | 
			
		||||
[to a minimum](https://www.destroyallsoftware.com/talks/the-birth-and-death-of-javascript) (see
 | 
			
		||||
timestamp 18:20). [Strace](https://linux.die.net/man/1/strace) is your friend for understanding when
 | 
			
		||||
and where syscalls happen.
 | 
			
		||||
 | 
			
		||||
**Signal Handling**: Far less likely to be an issue, but signals do trigger a context switch if your
 | 
			
		||||
code has a handler registered. This will be highly dependent on the application, but you can
 | 
			
		||||
[block signals](https://www.linuxprogrammingblog.com/all-about-linux-signals?page=show#Blocking_signals)
 | 
			
		||||
if it's an issue.
 | 
			
		||||
 | 
			
		||||
**Interrupts**: System interrupts are how devices connected to your computer notify the CPU that
 | 
			
		||||
something has happened. The CPU will then choose a processor core to pause and context switch to the
 | 
			
		||||
OS to handle the interrupt. Make sure that
 | 
			
		||||
[SMP affinity](http://www.alexonlinux.com/smp-affinity-and-proper-interrupt-handling-in-linux) is
 | 
			
		||||
set so that interrupts are handled on a CPU core not running the program you care about.
 | 
			
		||||
 | 
			
		||||
**[NUMA](https://www.kernel.org/doc/html/latest/vm/numa.html)**: While NUMA is good at making
 | 
			
		||||
multi-cell systems transparent, there are variance implications; if the kernel moves a process
 | 
			
		||||
across nodes, future memory accesses must wait for the controller on the original node. Use
 | 
			
		||||
[numactl](https://linux.die.net/man/8/numactl) to handle memory-/cpu-cell pinning so this doesn't
 | 
			
		||||
happen.
 | 
			
		||||
 | 
			
		||||
## Hardware
 | 
			
		||||
 | 
			
		||||
**CPU Pipelining/Speculation**: Speculative execution in modern processors gave us vulnerabilities
 | 
			
		||||
like Spectre, but it also gave us performance improvements like
 | 
			
		||||
[branch prediction](https://stackoverflow.com/a/11227902/1454178). And if the CPU mis-speculates
 | 
			
		||||
your code, there's variance associated with rewind and replay. While the compiler knows a lot about
 | 
			
		||||
how your CPU [pipelines instructions](https://youtu.be/nAbCKa0FzjQ?t=4467), code can be
 | 
			
		||||
[structured to help](https://www.youtube.com/watch?v=NH1Tta7purM&feature=youtu.be&t=755) the branch
 | 
			
		||||
predictor.
 | 
			
		||||
 | 
			
		||||
**Paging**: For most systems, virtual memory is incredible. Applications live in their own worlds,
 | 
			
		||||
and the CPU/[MMU](https://en.wikipedia.org/wiki/Memory_management_unit) figures out the details.
 | 
			
		||||
However, there's a variance penalty associated with memory paging and caching; if you access more
 | 
			
		||||
memory pages than the [TLB](https://en.wikipedia.org/wiki/Translation_lookaside_buffer) can store,
 | 
			
		||||
you'll have to wait for the page walk. Kernel perf tools are necessary to figure out if this is an
 | 
			
		||||
issue, but using [huge pages](https://blog.pythian.com/performance-tuning-hugepages-in-linux/) can
 | 
			
		||||
reduce TLB burdens. Alternately, running applications in a hypervisor like
 | 
			
		||||
[Jailhouse](https://github.com/siemens/jailhouse) allows one to skip virtual memory entirely, but
 | 
			
		||||
this is probably more work than the benefits are worth.
 | 
			
		||||
 | 
			
		||||
**Network Interfaces**: When more than one computer is involved, variance can go up dramatically.
 | 
			
		||||
Tuning kernel
 | 
			
		||||
[network parameters](https://github.com/leandromoreira/linux-network-performance-parameters) may be
 | 
			
		||||
helpful, but modern systems more frequently opt to skip the kernel altogether with a technique
 | 
			
		||||
called [kernel bypass](https://blog.cloudflare.com/kernel-bypass/). This typically requires
 | 
			
		||||
specialized hardware and [drivers](https://www.openonload.org/), but even industries like
 | 
			
		||||
[telecom](https://www.bbc.co.uk/rd/blog/2018-04-high-speed-networking-open-source-kernel-bypass) are
 | 
			
		||||
finding the benefits.
 | 
			
		||||
 | 
			
		||||
## Networks
 | 
			
		||||
 | 
			
		||||
**Routing**: There's a reason financial firms are willing to pay
 | 
			
		||||
[millions of euros](https://sniperinmahwah.wordpress.com/2019/03/26/4-les-moeres-english-version/)
 | 
			
		||||
for rights to a small plot of land - having a straight-line connection from point A to point B means
 | 
			
		||||
the path their data takes is the shortest possible. In contrast, there are currently 6 computers in
 | 
			
		||||
between me and Google, but that may change at any moment if my ISP realizes a
 | 
			
		||||
[more efficient route](https://en.wikipedia.org/wiki/Border_Gateway_Protocol) is available. Whether
 | 
			
		||||
it's using
 | 
			
		||||
[research-quality equipment](https://sniperinmahwah.wordpress.com/2018/05/07/shortwave-trading-part-i-the-west-chicago-tower-mystery/)
 | 
			
		||||
for shortwave radio, or just making sure there's no data inadvertently going between data centers,
 | 
			
		||||
routing matters.
 | 
			
		||||
 | 
			
		||||
**Protocol**: TCP as a network protocol is awesome: guaranteed and in-order delivery, flow control,
 | 
			
		||||
and congestion control all built in. But these attributes make the most sense when networking
 | 
			
		||||
infrastructure is lossy; for systems that expect nearly all packets to be delivered correctly, the
 | 
			
		||||
setup handshaking and packet acknowledgment are just overhead. Using UDP (unicast or multicast) may
 | 
			
		||||
make sense in these contexts as it avoids the chatter needed to track connection state, and
 | 
			
		||||
[gap-fill](https://iextrading.com/docs/IEX%20Transport%20Specification.pdf)
 | 
			
		||||
[strategies](http://www.nasdaqtrader.com/content/technicalsupport/specifications/dataproducts/moldudp64.pdf)
 | 
			
		||||
can handle the rest.
 | 
			
		||||
 | 
			
		||||
**Switching**: Many routers/switches handle packets using "store-and-forward" behavior: wait for the
 | 
			
		||||
whole packet, validate checksums, and then send to the next device. In variance terms, the time
 | 
			
		||||
needed to move data between two nodes is proportional to the size of that data; the switch must
 | 
			
		||||
"store" all data before it can calculate checksums and "forward" to the next node. With
 | 
			
		||||
["cut-through"](https://www.networkworld.com/article/2241573/latency-and-jitter--cut-through-design-pays-off-for-arista--blade.html)
 | 
			
		||||
designs, switches will begin forwarding data as soon as they know where the destination is,
 | 
			
		||||
checksums be damned. This means there's a fixed cost (at the switch) for network traffic, no matter
 | 
			
		||||
the size.
 | 
			
		||||
 | 
			
		||||
# Final Thoughts
 | 
			
		||||
 | 
			
		||||
High-performance systems, regardless of industry, are not magical. They do require extreme precision
 | 
			
		||||
and attention to detail, but they're designed, built, and operated by regular people, using a lot of
 | 
			
		||||
tools that are publicly available. Interested in seeing how context switching affects performance of
 | 
			
		||||
your benchmarks? `taskset` should be installed in all modern Linux distributions, and can be used to
 | 
			
		||||
make sure the OS never migrates your process. Curious how often garbage collection triggers during a
 | 
			
		||||
crucial operation? Your language of choice will typically expose details of its operations
 | 
			
		||||
([Python](https://docs.python.org/3/library/gc.html),
 | 
			
		||||
[Java](https://www.oracle.com/technetwork/java/javase/tech/vmoptions-jsp-140102.html#DebuggingOptions)).
 | 
			
		||||
Want to know how hard your program is stressing the TLB? Use `perf record` and look for
 | 
			
		||||
`dtlb_load_misses.miss_causes_a_walk`.
 | 
			
		||||
 | 
			
		||||
Two final guiding questions, then: first, before attempting to apply some of the technology above to
 | 
			
		||||
your own systems, can you first identify
 | 
			
		||||
[where/when you care](http://wiki.c2.com/?PrematureOptimization) about "high-performance"? As an
 | 
			
		||||
example, if parts of a system rely on humans pushing buttons, CPU pinning won't have any measurable
 | 
			
		||||
effect. Humans are already far too slow to react in time. Second, if you're using benchmarks, are
 | 
			
		||||
they being designed in a way that's actually helpful? Tools like
 | 
			
		||||
[Criterion](http://www.serpentine.com/criterion/) (also in
 | 
			
		||||
[Rust](https://github.com/bheisler/criterion.rs)) and Google's
 | 
			
		||||
[Benchmark](https://github.com/google/benchmark) output not only average run time, but variance as
 | 
			
		||||
well; your benchmarking environment is subject to the same concerns your production environment is.
 | 
			
		||||
 | 
			
		||||
Finally, I believe high-performance systems are a matter of philosophy, not necessarily technique.
 | 
			
		||||
Rigorous focus on variance is the first step, and there are plenty of ways to measure and mitigate
 | 
			
		||||
it; once that's at an acceptable level, then optimize for speed.
 | 
			
		||||
@ -1,263 +0,0 @@
 | 
			
		||||
---
 | 
			
		||||
layout: post
 | 
			
		||||
title: "Binary Format Shootout"
 | 
			
		||||
description: "Cap'n Proto vs. Flatbuffers vs. SBE"
 | 
			
		||||
category:
 | 
			
		||||
tags: [rust]
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
I've found that in many personal projects,
 | 
			
		||||
[analysis paralysis](https://en.wikipedia.org/wiki/Analysis_paralysis) is particularly deadly.
 | 
			
		||||
Making good decisions in the beginning avoids pain and suffering later; if extra research prevents
 | 
			
		||||
future problems, I'm happy to continue ~~procrastinating~~ researching indefinitely.
 | 
			
		||||
 | 
			
		||||
So let's say you're in need of a binary serialization format. Data will be going over the network,
 | 
			
		||||
not just in memory, so having a schema document and code generation is a must. Performance is
 | 
			
		||||
crucial, so formats that support zero-copy de/serialization are given priority. And the more
 | 
			
		||||
languages supported, the better; I use Rust, but can't predict what other languages this could
 | 
			
		||||
interact with.
 | 
			
		||||
 | 
			
		||||
Given these requirements, the candidates I could find were:
 | 
			
		||||
 | 
			
		||||
1. [Cap'n Proto](https://capnproto.org/) has been around the longest, and is the most established
 | 
			
		||||
2. [Flatbuffers](https://google.github.io/flatbuffers/) is the newest, and claims to have a simpler
 | 
			
		||||
   encoding
 | 
			
		||||
3. [Simple Binary Encoding](https://github.com/real-logic/simple-binary-encoding) has the simplest
 | 
			
		||||
   encoding, but the Rust implementation is unmaintained
 | 
			
		||||
 | 
			
		||||
Any one of these will satisfy the project requirements: easy to transmit over a network, reasonably
 | 
			
		||||
fast, and polyglot support. But how do you actually pick one? It's impossible to know what issues
 | 
			
		||||
will follow that choice, so I tend to avoid commitment until the last possible moment.
 | 
			
		||||
 | 
			
		||||
Still, a choice must be made. Instead of worrying about which is "the best," I decided to build a
 | 
			
		||||
small proof-of-concept system in each format and pit them against each other. All code can be found
 | 
			
		||||
in the [repository](https://github.com/speice-io/marketdata-shootout) for this post.
 | 
			
		||||
 | 
			
		||||
We'll discuss more in detail, but a quick preview of the results:
 | 
			
		||||
 | 
			
		||||
- Cap'n Proto: Theoretically performs incredibly well, the implementation had issues
 | 
			
		||||
- Flatbuffers: Has some quirks, but largely lived up to its "zero-copy" promises
 | 
			
		||||
- SBE: Best median and worst-case performance, but the message structure has a limited feature set
 | 
			
		||||
 | 
			
		||||
# Prologue: Binary Parsing with Nom
 | 
			
		||||
 | 
			
		||||
Our benchmark system will be a simple data processor; given depth-of-book market data from
 | 
			
		||||
[IEX](https://iextrading.com/trading/market-data/#deep), serialize each message into the schema
 | 
			
		||||
format, read it back, and calculate total size of stock traded and the lowest/highest quoted prices.
 | 
			
		||||
This test isn't complex, but is representative of the project I need a binary format for.
 | 
			
		||||
 | 
			
		||||
But before we make it to that point, we have to actually read in the market data. To do so, I'm
 | 
			
		||||
using a library called [`nom`](https://github.com/Geal/nom). Version 5.0 was recently released and
 | 
			
		||||
brought some big changes, so this was an opportunity to build a non-trivial program and get
 | 
			
		||||
familiar.
 | 
			
		||||
 | 
			
		||||
If you don't already know about `nom`, it's a "parser generator". By combining different smaller
 | 
			
		||||
parsers, you can assemble a parser to handle complex structures without writing tedious code by
 | 
			
		||||
hand. For example, when parsing
 | 
			
		||||
[PCAP files](https://www.winpcap.org/ntar/draft/PCAP-DumpFileFormat.html#rfc.section.3.3):
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
   0                   1                   2                   3
 | 
			
		||||
   0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
 | 
			
		||||
   +---------------------------------------------------------------+
 | 
			
		||||
 0 |                    Block Type = 0x00000006                    |
 | 
			
		||||
   +---------------------------------------------------------------+
 | 
			
		||||
 4 |                      Block Total Length                       |
 | 
			
		||||
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 | 
			
		||||
 8 |                         Interface ID                          |
 | 
			
		||||
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 | 
			
		||||
12 |                        Timestamp (High)                       |
 | 
			
		||||
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 | 
			
		||||
16 |                        Timestamp (Low)                        |
 | 
			
		||||
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 | 
			
		||||
20 |                         Captured Len                          |
 | 
			
		||||
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 | 
			
		||||
24 |                          Packet Len                           |
 | 
			
		||||
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 | 
			
		||||
   |                          Packet Data                          |
 | 
			
		||||
   |                              ...                              |
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
...you can build a parser in `nom` that looks like
 | 
			
		||||
[this](https://github.com/speice-io/marketdata-shootout/blob/369613843d39cfdc728e1003123bf87f79422497/src/parsers.rs#L59-L93):
 | 
			
		||||
 | 
			
		||||
```rust
 | 
			
		||||
const ENHANCED_PACKET: [u8; 4] = [0x06, 0x00, 0x00, 0x00];
 | 
			
		||||
pub fn enhanced_packet_block(input: &[u8]) -> IResult<&[u8], &[u8]> {
 | 
			
		||||
    let (
 | 
			
		||||
        remaining,
 | 
			
		||||
        (
 | 
			
		||||
            block_type,
 | 
			
		||||
            block_len,
 | 
			
		||||
            interface_id,
 | 
			
		||||
            timestamp_high,
 | 
			
		||||
            timestamp_low,
 | 
			
		||||
            captured_len,
 | 
			
		||||
            packet_len,
 | 
			
		||||
        ),
 | 
			
		||||
    ) = tuple((
 | 
			
		||||
        tag(ENHANCED_PACKET),
 | 
			
		||||
        le_u32,
 | 
			
		||||
        le_u32,
 | 
			
		||||
        le_u32,
 | 
			
		||||
        le_u32,
 | 
			
		||||
        le_u32,
 | 
			
		||||
        le_u32,
 | 
			
		||||
    ))(input)?;
 | 
			
		||||
 | 
			
		||||
    let (remaining, packet_data) = take(captured_len)(remaining)?;
 | 
			
		||||
    Ok((remaining, packet_data))
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
While this example isn't too interesting, more complex formats (like IEX market data) are where
 | 
			
		||||
[`nom` really shines](https://github.com/speice-io/marketdata-shootout/blob/369613843d39cfdc728e1003123bf87f79422497/src/iex.rs).
 | 
			
		||||
 | 
			
		||||
Ultimately, because the `nom` code in this shootout was the same for all formats, we're not too
 | 
			
		||||
interested in its performance. Still, it's worth mentioning that building the market data parser was
 | 
			
		||||
actually fun; I didn't have to write tons of boring code by hand.
 | 
			
		||||
 | 
			
		||||
# Part 1: Cap'n Proto
 | 
			
		||||
 | 
			
		||||
Now it's time to get into the meaty part of the story. Cap'n Proto was the first format I tried
 | 
			
		||||
because of how long it has supported Rust (thanks to [dwrensha](https://github.com/dwrensha) for
 | 
			
		||||
maintaining the Rust port since
 | 
			
		||||
[2014!](https://github.com/capnproto/capnproto-rust/releases/tag/rustc-0.10)). However, I had a ton
 | 
			
		||||
of performance concerns once I started using it.
 | 
			
		||||
 | 
			
		||||
To serialize new messages, Cap'n Proto uses a "builder" object. This builder allocates memory on the
 | 
			
		||||
heap to hold the message content, but because builders
 | 
			
		||||
[can't be re-used](https://github.com/capnproto/capnproto-rust/issues/111), we have to allocate a
 | 
			
		||||
new buffer for every single message. I was able to work around this with a
 | 
			
		||||
[special builder](https://github.com/speice-io/marketdata-shootout/blob/369613843d39cfdc728e1003123bf87f79422497/src/capnp_runner.rs#L17-L51)
 | 
			
		||||
that could re-use the buffer, but it required reading through Cap'n Proto's
 | 
			
		||||
[benchmarks](https://github.com/capnproto/capnproto-rust/blob/master/benchmark/benchmark.rs#L124-L156)
 | 
			
		||||
to find an example, and used
 | 
			
		||||
[`std::mem::transmute`](https://doc.rust-lang.org/std/mem/fn.transmute.html) to bypass Rust's borrow
 | 
			
		||||
checker.
 | 
			
		||||
 | 
			
		||||
The process of reading messages was better, but still had issues. Cap'n Proto has two message
 | 
			
		||||
encodings: a ["packed"](https://capnproto.org/encoding.html#packing) representation, and an
 | 
			
		||||
"unpacked" version. When reading "packed" messages, we need a buffer to unpack the message into
 | 
			
		||||
before we can use it; Cap'n Proto allocates a new buffer for each message we unpack, and I wasn't
 | 
			
		||||
able to figure out a way around that. In contrast, the unpacked message format should be where Cap'n
 | 
			
		||||
Proto shines; its main selling point is that there's [no decoding step](https://capnproto.org/).
 | 
			
		||||
However, accomplishing zero-copy deserialization required code in the private API
 | 
			
		||||
([since fixed](https://github.com/capnproto/capnproto-rust/issues/148)), and we allocate a vector on
 | 
			
		||||
every read for the segment table.
 | 
			
		||||
 | 
			
		||||
In the end, I put in significant work to make Cap'n Proto as fast as possible, but there were too
 | 
			
		||||
many issues for me to feel comfortable using it long-term.
 | 
			
		||||
 | 
			
		||||
# Part 2: Flatbuffers
 | 
			
		||||
 | 
			
		||||
This is the new kid on the block. After a
 | 
			
		||||
[first attempt](https://github.com/google/flatbuffers/pull/3894) didn't pan out, official support
 | 
			
		||||
was [recently launched](https://github.com/google/flatbuffers/pull/4898). Flatbuffers intends to
 | 
			
		||||
address the same problems as Cap'n Proto: high-performance, polyglot, binary messaging. The
 | 
			
		||||
difference is that Flatbuffers claims to have a simpler wire format and
 | 
			
		||||
[more flexibility](https://google.github.io/flatbuffers/flatbuffers_benchmarks.html).
 | 
			
		||||
 | 
			
		||||
On the whole, I enjoyed using Flatbuffers; the [tooling](https://crates.io/crates/flatc-rust) is
 | 
			
		||||
nice, and unlike Cap'n Proto, parsing messages was actually zero-copy and zero-allocation. However,
 | 
			
		||||
there were still some issues.
 | 
			
		||||
 | 
			
		||||
First, Flatbuffers (at least in Rust) can't handle nested vectors. This is a problem for formats
 | 
			
		||||
like the following:
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
table Message {
 | 
			
		||||
  symbol: string;
 | 
			
		||||
}
 | 
			
		||||
table MultiMessage {
 | 
			
		||||
  messages:[Message];
 | 
			
		||||
}
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
We want to create a `MultiMessage` which contains a vector of `Message`, and each `Message` itself
 | 
			
		||||
contains a vector (the `string` type). I was able to work around this by
 | 
			
		||||
[caching `Message` elements](https://github.com/speice-io/marketdata-shootout/blob/e9d07d148bf36a211a6f86802b313c4918377d1b/src/flatbuffers_runner.rs#L83)
 | 
			
		||||
in a `SmallVec` before building the final `MultiMessage`, but it was a painful process that I
 | 
			
		||||
believe contributed to poor serialization performance.
 | 
			
		||||
 | 
			
		||||
Second, streaming support in Flatbuffers seems to be something of an
 | 
			
		||||
[afterthought](https://github.com/google/flatbuffers/issues/3898). Where Cap'n Proto in Rust handles
 | 
			
		||||
reading messages from a stream as part of the API, Flatbuffers just sticks a `u32` at the front of
 | 
			
		||||
each message to indicate the size. Not specifically a problem, but calculating message size without
 | 
			
		||||
that tag is nigh on impossible.
 | 
			
		||||
 | 
			
		||||
Ultimately, I enjoyed using Flatbuffers, and had to do significantly less work to make it perform
 | 
			
		||||
well.
 | 
			
		||||
 | 
			
		||||
# Part 3: Simple Binary Encoding
 | 
			
		||||
 | 
			
		||||
Support for SBE was added by the author of one of my favorite
 | 
			
		||||
[Rust blog posts](https://web.archive.org/web/20190427124806/https://polysync.io/blog/session-types-for-hearty-codecs/).
 | 
			
		||||
I've [talked previously]({% post_url 2019-06-31-high-performance-systems %}) about how important
 | 
			
		||||
variance is in high-performance systems, so it was encouraging to read about a format that
 | 
			
		||||
[directly addressed](https://github.com/real-logic/simple-binary-encoding/wiki/Why-Low-Latency) my
 | 
			
		||||
concerns. SBE has by far the simplest binary format, but it does make some tradeoffs.
 | 
			
		||||
 | 
			
		||||
Both Cap'n Proto and Flatbuffers use [message offsets](https://capnproto.org/encoding.html#structs)
 | 
			
		||||
to handle variable-length data, [unions](https://capnproto.org/language.html#unions), and various
 | 
			
		||||
other features. In contrast, messages in SBE are essentially
 | 
			
		||||
[just structs](https://github.com/real-logic/simple-binary-encoding/blob/master/sbe-samples/src/main/resources/example-schema.xml);
 | 
			
		||||
variable-length data is supported, but there's no union type.
 | 
			
		||||
 | 
			
		||||
As mentioned in the beginning, the Rust port of SBE works well, but is
 | 
			
		||||
[essentially unmaintained](https://users.rust-lang.org/t/zero-cost-abstraction-frontier-no-copy-low-allocation-ordered-decoding/11515/9).
 | 
			
		||||
However, if you don't need union types, and can accept that schemas are XML documents, it's still
 | 
			
		||||
worth using. SBE's implementation had the best streaming support of all formats I tested, and
 | 
			
		||||
doesn't trigger allocation during de/serialization.
 | 
			
		||||
 | 
			
		||||
# Results
 | 
			
		||||
 | 
			
		||||
After building a test harness
 | 
			
		||||
[for](https://github.com/speice-io/marketdata-shootout/blob/master/src/capnp_runner.rs)
 | 
			
		||||
[each](https://github.com/speice-io/marketdata-shootout/blob/master/src/flatbuffers_runner.rs)
 | 
			
		||||
[format](https://github.com/speice-io/marketdata-shootout/blob/master/src/sbe_runner.rs), it was
 | 
			
		||||
time to actually take them for a spin. I used
 | 
			
		||||
[this script](https://github.com/speice-io/marketdata-shootout/blob/master/run_shootout.sh) to run
 | 
			
		||||
the benchmarks, and the raw results are
 | 
			
		||||
[here](https://github.com/speice-io/marketdata-shootout/blob/master/shootout.csv). All data reported
 | 
			
		||||
below is the average of 10 runs on a single day of IEX data. Results were validated to make sure
 | 
			
		||||
that each format parsed the data correctly.
 | 
			
		||||
 | 
			
		||||
## Serialization
 | 
			
		||||
 | 
			
		||||
This test measures, on a
 | 
			
		||||
[per-message basis](https://github.com/speice-io/marketdata-shootout/blob/master/src/main.rs#L268-L272),
 | 
			
		||||
how long it takes to serialize the IEX message into the desired format and write to a pre-allocated
 | 
			
		||||
buffer.
 | 
			
		||||
 | 
			
		||||
| Schema               | Median | 99th Pctl | 99.9th Pctl | Total  |
 | 
			
		||||
| :------------------- | :----- | :-------- | :---------- | :----- |
 | 
			
		||||
| Cap'n Proto Packed   | 413ns  | 1751ns    | 2943ns      | 14.80s |
 | 
			
		||||
| Cap'n Proto Unpacked | 273ns  | 1828ns    | 2836ns      | 10.65s |
 | 
			
		||||
| Flatbuffers          | 355ns  | 2185ns    | 3497ns      | 14.31s |
 | 
			
		||||
| SBE                  | 91ns   | 1535ns    | 2423ns      | 3.91s  |
 | 
			
		||||
 | 
			
		||||
## Deserialization
 | 
			
		||||
 | 
			
		||||
This test measures, on a
 | 
			
		||||
[per-message basis](https://github.com/speice-io/marketdata-shootout/blob/master/src/main.rs#L294-L298),
 | 
			
		||||
how long it takes to read the previously-serialized message and perform some basic aggregation. The
 | 
			
		||||
aggregation code is the same for each format, so any performance differences are due solely to the
 | 
			
		||||
format implementation.
 | 
			
		||||
 | 
			
		||||
| Schema               | Median | 99th Pctl | 99.9th Pctl | Total  |
 | 
			
		||||
| :------------------- | :----- | :-------- | :---------- | :----- |
 | 
			
		||||
| Cap'n Proto Packed   | 539ns  | 1216ns    | 2599ns      | 18.92s |
 | 
			
		||||
| Cap'n Proto Unpacked | 366ns  | 737ns     | 1583ns      | 12.32s |
 | 
			
		||||
| Flatbuffers          | 173ns  | 421ns     | 1007ns      | 6.00s  |
 | 
			
		||||
| SBE                  | 116ns  | 286ns     | 659ns       | 4.05s  |
 | 
			
		||||
 | 
			
		||||
# Conclusion
 | 
			
		||||
 | 
			
		||||
Building a benchmark turned out to be incredibly helpful in making a decision; because a "union"
 | 
			
		||||
type isn't important to me, I can be confident that SBE best addresses my needs.
 | 
			
		||||
 | 
			
		||||
While SBE was the fastest in terms of both median and worst-case performance, its worst case
 | 
			
		||||
performance was proportionately far higher than any other format. It seems to be that
 | 
			
		||||
de/serialization time scales with message size, but I'll need to do some more research to understand
 | 
			
		||||
what exactly is going on.
 | 
			
		||||
@ -1,370 +0,0 @@
 | 
			
		||||
---
 | 
			
		||||
layout: post
 | 
			
		||||
title: "Release the GIL"
 | 
			
		||||
description: "Strategies for Parallelism in Python"
 | 
			
		||||
category:
 | 
			
		||||
tags: [python]
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
Complaining about the [Global Interpreter Lock](https://wiki.python.org/moin/GlobalInterpreterLock)
 | 
			
		||||
(GIL) seems like a rite of passage for Python developers. It's easy to criticize a design decision
 | 
			
		||||
made before multi-core CPU's were widely available, but the fact that it's still around indicates
 | 
			
		||||
that it generally works [Good](https://wiki.c2.com/?PrematureOptimization)
 | 
			
		||||
[Enough](https://wiki.c2.com/?YouArentGonnaNeedIt). Besides, there are simple and effective
 | 
			
		||||
workarounds; it's not hard to start a
 | 
			
		||||
[new process](https://docs.python.org/3/library/multiprocessing.html) and use message passing to
 | 
			
		||||
synchronize code running in parallel.
 | 
			
		||||
 | 
			
		||||
Still, wouldn't it be nice to have more than a single active interpreter thread? In an age of
 | 
			
		||||
asynchronicity and _M:N_ threading, Python seems lacking. The ideal scenario is to take advantage of
 | 
			
		||||
both Python's productivity and the modern CPU's parallel capabilities.
 | 
			
		||||
 | 
			
		||||
Presented below are two strategies for releasing the GIL's icy grip without giving up on what makes
 | 
			
		||||
Python a nice language to start with. Bear in mind: these are just the tools, no claim is made about
 | 
			
		||||
whether it's a good idea to use them. Very often, unlocking the GIL is an
 | 
			
		||||
[XY problem](https://en.wikipedia.org/wiki/XY_problem); you want application performance, and the
 | 
			
		||||
GIL seems like an obvious bottleneck. Remember that any gains from running code in parallel come at
 | 
			
		||||
the expense of project complexity; messing with the GIL is ultimately messing with Python's memory
 | 
			
		||||
model.
 | 
			
		||||
 | 
			
		||||
```python
 | 
			
		||||
%load_ext Cython
 | 
			
		||||
from numba import jit
 | 
			
		||||
 | 
			
		||||
N = 1_000_000_000
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
# Cython
 | 
			
		||||
 | 
			
		||||
Put simply, [Cython](https://cython.org/) is a programming language that looks a lot like Python,
 | 
			
		||||
gets [transpiled](https://en.wikipedia.org/wiki/Source-to-source_compiler) to C/C++, and integrates
 | 
			
		||||
well with the [CPython](https://en.wikipedia.org/wiki/CPython) API. It's great for building Python
 | 
			
		||||
wrappers to C and C++ libraries, writing optimized code for numerical processing, and tons more. And
 | 
			
		||||
when it comes to managing the GIL, there are two special features:
 | 
			
		||||
 | 
			
		||||
- The `nogil`
 | 
			
		||||
  [function annotation](https://cython.readthedocs.io/en/latest/src/userguide/external_C_code.html#declaring-a-function-as-callable-without-the-gil)
 | 
			
		||||
  asserts that a Cython function is safe to use without the GIL, and compilation will fail if it
 | 
			
		||||
  interacts with Python in an unsafe manner
 | 
			
		||||
- The `with nogil`
 | 
			
		||||
  [context manager](https://cython.readthedocs.io/en/latest/src/userguide/external_C_code.html#releasing-the-gil)
 | 
			
		||||
  explicitly unlocks the CPython GIL while active
 | 
			
		||||
 | 
			
		||||
Whenever Cython code runs inside a `with nogil` block on a separate thread, the Python interpreter
 | 
			
		||||
is unblocked and allowed to continue work elsewhere. We'll define a "busy work" function that
 | 
			
		||||
demonstrates this principle in action:
 | 
			
		||||
 | 
			
		||||
```python
 | 
			
		||||
%%cython
 | 
			
		||||
 | 
			
		||||
# Annotating a function with `nogil` indicates only that it is safe
 | 
			
		||||
# to call in a `with nogil` block. It *does not* release the GIL.
 | 
			
		||||
cdef unsigned long fibonacci(unsigned long n) nogil:
 | 
			
		||||
    if n <= 1:
 | 
			
		||||
        return n
 | 
			
		||||
 | 
			
		||||
    cdef unsigned long a = 0, b = 1, c = 0
 | 
			
		||||
 | 
			
		||||
    c = a + b
 | 
			
		||||
    for _i in range(2, n):
 | 
			
		||||
        a = b
 | 
			
		||||
        b = c
 | 
			
		||||
        c = a + b
 | 
			
		||||
 | 
			
		||||
    return c
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def cython_nogil(unsigned long n):
 | 
			
		||||
    # Explicitly release the GIL while running `fibonacci`
 | 
			
		||||
    with nogil:
 | 
			
		||||
        value = fibonacci(n)
 | 
			
		||||
 | 
			
		||||
    return value
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def cython_gil(unsigned long n):
 | 
			
		||||
    # Because the GIL is not explicitly released, it implicitly
 | 
			
		||||
    # remains acquired when running the `fibonacci` function
 | 
			
		||||
    return fibonacci(n)
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
First, let's time how long it takes Cython to calculate the billionth Fibonacci number:
 | 
			
		||||
 | 
			
		||||
```python
 | 
			
		||||
%%time
 | 
			
		||||
_ = cython_gil(N);
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
> <pre>
 | 
			
		||||
> CPU times: user 365 ms, sys: 0 ns, total: 365 ms
 | 
			
		||||
> Wall time: 372 ms
 | 
			
		||||
> </pre>
 | 
			
		||||
 | 
			
		||||
```python
 | 
			
		||||
%%time
 | 
			
		||||
_ = cython_nogil(N);
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
> <pre>
 | 
			
		||||
> CPU times: user 381 ms, sys: 0 ns, total: 381 ms
 | 
			
		||||
> Wall time: 388 ms
 | 
			
		||||
> </pre>
 | 
			
		||||
 | 
			
		||||
Both versions (with and without GIL) take effectively the same amount of time to run. Even when
 | 
			
		||||
running this calculation in parallel on separate threads, it is expected that the run time will
 | 
			
		||||
double because only one thread can be active at a time:
 | 
			
		||||
 | 
			
		||||
```python
 | 
			
		||||
%%time
 | 
			
		||||
from threading import Thread
 | 
			
		||||
 | 
			
		||||
# Create the two threads to run on
 | 
			
		||||
t1 = Thread(target=cython_gil, args=[N])
 | 
			
		||||
t2 = Thread(target=cython_gil, args=[N])
 | 
			
		||||
# Start the threads
 | 
			
		||||
t1.start(); t2.start()
 | 
			
		||||
# Wait for the threads to finish
 | 
			
		||||
t1.join(); t2.join()
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
> <pre>
 | 
			
		||||
> CPU times: user 641 ms, sys: 5.62 ms, total: 647 ms
 | 
			
		||||
> Wall time: 645 ms
 | 
			
		||||
> </pre>
 | 
			
		||||
 | 
			
		||||
However, if the first thread releases the GIL, the second thread is free to acquire it and run in
 | 
			
		||||
parallel:
 | 
			
		||||
 | 
			
		||||
```python
 | 
			
		||||
%%time
 | 
			
		||||
 | 
			
		||||
t1 = Thread(target=cython_nogil, args=[N])
 | 
			
		||||
t2 = Thread(target=cython_gil, args=[N])
 | 
			
		||||
t1.start(); t2.start()
 | 
			
		||||
t1.join(); t2.join()
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
> <pre>
 | 
			
		||||
> CPU times: user 717 ms, sys: 372 µs, total: 718 ms
 | 
			
		||||
> Wall time: 358 ms
 | 
			
		||||
> </pre>
 | 
			
		||||
 | 
			
		||||
Because `user` time represents the sum of processing time on all threads, it doesn't change much.
 | 
			
		||||
The ["wall time"](https://en.wikipedia.org/wiki/Elapsed_real_time) has been cut roughly in half
 | 
			
		||||
because each function is running simultaneously.
 | 
			
		||||
 | 
			
		||||
Keep in mind that the **order in which threads are started** makes a difference!
 | 
			
		||||
 | 
			
		||||
```python
 | 
			
		||||
%%time
 | 
			
		||||
 | 
			
		||||
# Note that the GIL-locked version is started first
 | 
			
		||||
t1 = Thread(target=cython_gil, args=[N])
 | 
			
		||||
t2 = Thread(target=cython_nogil, args=[N])
 | 
			
		||||
t1.start(); t2.start()
 | 
			
		||||
t1.join(); t2.join()
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
> <pre>
 | 
			
		||||
> CPU times: user 667 ms, sys: 0 ns, total: 667 ms
 | 
			
		||||
> Wall time: 672 ms
 | 
			
		||||
> </pre>
 | 
			
		||||
 | 
			
		||||
Even though the second thread releases the GIL while running, it can't start until the first has
 | 
			
		||||
completed. Thus, the overall runtime is effectively the same as running two GIL-locked threads.
 | 
			
		||||
 | 
			
		||||
Finally, be aware that attempting to unlock the GIL from a thread that doesn't own it will crash the
 | 
			
		||||
**interpreter**, not just the thread attempting the unlock:
 | 
			
		||||
 | 
			
		||||
```python
 | 
			
		||||
%%cython
 | 
			
		||||
 | 
			
		||||
cdef int cython_recurse(int n) nogil:
 | 
			
		||||
    if n <= 0:
 | 
			
		||||
        return 0
 | 
			
		||||
 | 
			
		||||
    with nogil:
 | 
			
		||||
        return cython_recurse(n - 1)
 | 
			
		||||
 | 
			
		||||
cython_recurse(2)
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
> <pre>
 | 
			
		||||
> Fatal Python error: PyEval_SaveThread: NULL tstate
 | 
			
		||||
> 
 | 
			
		||||
> Thread 0x00007f499effd700 (most recent call first):
 | 
			
		||||
>   File "/home/bspeice/.virtualenvs/release-the-gil/lib/python3.7/site-packages/ipykernel/parentpoller.py", line 39 in run
 | 
			
		||||
>   File "/usr/lib/python3.7/threading.py", line 926 in _bootstrap_inner
 | 
			
		||||
>   File "/usr/lib/python3.7/threading.py", line 890 in _bootstrap
 | 
			
		||||
> </pre>
 | 
			
		||||
 | 
			
		||||
In practice, avoiding this issue is simple. First, `nogil` functions probably shouldn't contain
 | 
			
		||||
`with nogil` blocks. Second, Cython can
 | 
			
		||||
[conditionally acquire/release](https://cython.readthedocs.io/en/latest/src/userguide/external_C_code.html#conditional-acquiring-releasing-the-gil)
 | 
			
		||||
the GIL, so these conditions can be used to synchronize access. Finally, Cython's documentation for
 | 
			
		||||
[external C code](https://cython.readthedocs.io/en/latest/src/userguide/external_C_code.html#acquiring-and-releasing-the-gil)
 | 
			
		||||
contains more detail on how to safely manage the GIL.
 | 
			
		||||
 | 
			
		||||
To conclude: use Cython's `nogil` annotation to assert that functions are safe for calling when the
 | 
			
		||||
GIL is unlocked, and `with nogil` to actually unlock the GIL and run those functions.
 | 
			
		||||
 | 
			
		||||
# Numba
 | 
			
		||||
 | 
			
		||||
Like Cython, [Numba](https://numba.pydata.org/) is a "compiled Python." Where Cython works by
 | 
			
		||||
compiling a Python-like language to C/C++, Numba compiles Python bytecode _directly to machine code_
 | 
			
		||||
at runtime. Behavior is controlled with a special `@jit` decorator; calling a decorated function
 | 
			
		||||
first compiles it to machine code before running. Calling the function a second time re-uses that
 | 
			
		||||
machine code unless the argument types have changed.
 | 
			
		||||
 | 
			
		||||
Numba works best when a `nopython=True` argument is added to the `@jit` decorator; functions
 | 
			
		||||
compiled in [`nopython`](http://numba.pydata.org/numba-doc/latest/user/jit.html?#nopython) mode
 | 
			
		||||
avoid the CPython API and have performance comparable to C. Further, adding `nogil=True` to the
 | 
			
		||||
`@jit` decorator unlocks the GIL while that function is running. Note that `nogil` and `nopython`
 | 
			
		||||
are separate arguments; while it is necessary for code to be compiled in `nopython` mode in order to
 | 
			
		||||
release the lock, the GIL will remain locked if `nogil=False` (the default).
 | 
			
		||||
 | 
			
		||||
Let's repeat the same experiment, this time using Numba instead of Cython:
 | 
			
		||||
 | 
			
		||||
```python
 | 
			
		||||
# The `int` type annotation is only for humans and is ignored
 | 
			
		||||
# by Numba.
 | 
			
		||||
@jit(nopython=True, nogil=True)
 | 
			
		||||
def numba_nogil(n: int) -> int:
 | 
			
		||||
    if n <= 1:
 | 
			
		||||
        return n
 | 
			
		||||
 | 
			
		||||
    a = 0
 | 
			
		||||
    b = 1
 | 
			
		||||
 | 
			
		||||
    c = a + b
 | 
			
		||||
    for _i in range(2, n):
 | 
			
		||||
        a = b
 | 
			
		||||
        b = c
 | 
			
		||||
        c = a + b
 | 
			
		||||
 | 
			
		||||
    return c
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Run using `nopython` mode to receive a performance boost,
 | 
			
		||||
# but GIL remains locked due to `nogil=False` by default.
 | 
			
		||||
@jit(nopython=True)
 | 
			
		||||
def numba_gil(n: int) -> int:
 | 
			
		||||
    if n <= 1:
 | 
			
		||||
        return n
 | 
			
		||||
 | 
			
		||||
    a = 0
 | 
			
		||||
    b = 1
 | 
			
		||||
 | 
			
		||||
    c = a + b
 | 
			
		||||
    for _i in range(2, n):
 | 
			
		||||
        a = b
 | 
			
		||||
        b = c
 | 
			
		||||
        c = a + b
 | 
			
		||||
 | 
			
		||||
    return c
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Call each function once to force compilation; we don't want
 | 
			
		||||
# the timing statistics to include how long it takes to compile.
 | 
			
		||||
numba_nogil(N)
 | 
			
		||||
numba_gil(N);
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
We'll perform the same tests as above; first, figure out how long it takes the function to run:
 | 
			
		||||
 | 
			
		||||
```python
 | 
			
		||||
%%time
 | 
			
		||||
_ = numba_gil(N)
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
> <pre>
 | 
			
		||||
> CPU times: user 253 ms, sys: 258 µs, total: 253 ms
 | 
			
		||||
> Wall time: 251 ms
 | 
			
		||||
> </pre>
 | 
			
		||||
 | 
			
		||||
<span style="font-size: .8em">
 | 
			
		||||
Aside: it's not immediately clear why Numba takes ~20% less time to run than Cython for code that should be
 | 
			
		||||
effectively identical after compilation.
 | 
			
		||||
</span>
 | 
			
		||||
 | 
			
		||||
When running two GIL-locked threads, the result (as expected) takes around twice as long to compute:
 | 
			
		||||
 | 
			
		||||
```python
 | 
			
		||||
%%time
 | 
			
		||||
t1 = Thread(target=numba_gil, args=[N])
 | 
			
		||||
t2 = Thread(target=numba_gil, args=[N])
 | 
			
		||||
t1.start(); t2.start()
 | 
			
		||||
t1.join(); t2.join()
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
> <pre>
 | 
			
		||||
> CPU times: user 541 ms, sys: 3.96 ms, total: 545 ms
 | 
			
		||||
> Wall time: 541 ms
 | 
			
		||||
> </pre>
 | 
			
		||||
 | 
			
		||||
But if the GIL-unlocking thread starts first, both threads run in parallel:
 | 
			
		||||
 | 
			
		||||
```python
 | 
			
		||||
%%time
 | 
			
		||||
t1 = Thread(target=numba_nogil, args=[N])
 | 
			
		||||
t2 = Thread(target=numba_gil, args=[N])
 | 
			
		||||
t1.start(); t2.start()
 | 
			
		||||
t1.join(); t2.join()
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
> <pre>
 | 
			
		||||
> CPU times: user 551 ms, sys: 7.77 ms, total: 559 ms
 | 
			
		||||
> Wall time: 279 ms
 | 
			
		||||
> </pre>
 | 
			
		||||
 | 
			
		||||
Just like Cython, starting the GIL-locked thread first leads to poor performance:
 | 
			
		||||
 | 
			
		||||
```python
 | 
			
		||||
%%time
 | 
			
		||||
t1 = Thread(target=numba_gil, args=[N])
 | 
			
		||||
t2 = Thread(target=numba_nogil, args=[N])
 | 
			
		||||
t1.start(); t2.start()
 | 
			
		||||
t1.join(); t2.join()
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
> <pre>
 | 
			
		||||
> CPU times: user 524 ms, sys: 0 ns, total: 524 ms
 | 
			
		||||
> Wall time: 522 ms
 | 
			
		||||
> </pre>
 | 
			
		||||
 | 
			
		||||
Finally, unlike Cython, Numba will unlock the GIL if and only if it is currently acquired;
 | 
			
		||||
recursively calling `@jit(nogil=True)` functions is perfectly safe:
 | 
			
		||||
 | 
			
		||||
```python
 | 
			
		||||
from numba import jit
 | 
			
		||||
 | 
			
		||||
@jit(nopython=True, nogil=True)
 | 
			
		||||
def numba_recurse(n: int) -> int:
 | 
			
		||||
    if n <= 0:
 | 
			
		||||
        return 0
 | 
			
		||||
 | 
			
		||||
    return numba_recurse(n - 1)
 | 
			
		||||
 | 
			
		||||
numba_recurse(2);
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
# Conclusion
 | 
			
		||||
 | 
			
		||||
Before finishing, it's important to address pain points that will show up if these techniques are
 | 
			
		||||
used in a more realistic project:
 | 
			
		||||
 | 
			
		||||
First, code running in a GIL-free context will likely also need non-trivial data structures;
 | 
			
		||||
GIL-free functions aren't useful if they're constantly interacting with Python objects whose access
 | 
			
		||||
requires the GIL. Cython provides
 | 
			
		||||
[extension types](http://docs.cython.org/en/latest/src/tutorial/cdef_classes.html) and Numba
 | 
			
		||||
provides a [`@jitclass`](https://numba.pydata.org/numba-doc/dev/user/jitclass.html) decorator to
 | 
			
		||||
address this need.
 | 
			
		||||
 | 
			
		||||
Second, building and distributing applications that make use of Cython/Numba can be complicated.
 | 
			
		||||
Cython packages require running the compiler, (potentially) linking/packaging external dependencies,
 | 
			
		||||
and distributing a binary wheel. Numba is generally simpler because the code being distributed is
 | 
			
		||||
pure Python, but can be tricky since errors aren't detected until runtime.
 | 
			
		||||
 | 
			
		||||
Finally, while unlocking the GIL is often a solution in search of a problem, both Cython and Numba
 | 
			
		||||
provide tools to directly manage the GIL when appropriate. This enables true parallelism (not just
 | 
			
		||||
[concurrency](https://stackoverflow.com/a/1050257)) that is impossible in vanilla Python.
 | 
			
		||||
@ -1,60 +0,0 @@
 | 
			
		||||
---
 | 
			
		||||
layout: post
 | 
			
		||||
title: "The webpack industrial complex"
 | 
			
		||||
description: "Reflections on a new project"
 | 
			
		||||
category:
 | 
			
		||||
tags: [webpack, react, vite]
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
This started because I wanted to build a synthesizer. Setting a goal of "digital DX7" was ambitious, but I needed something unrelated to the day job. Beyond that, working with audio seemed like a good challenge. I enjoy performance-focused code, and performance problems in audio are conspicuous. Building a web project was an obvious choice because of the web audio API documentation and independence from a large Digital Audio Workstation (DAW).
 | 
			
		||||
 | 
			
		||||
The project was soon derailed trying to sort out technical issues unrelated to the original purpose. Finding a resolution was a frustrating journey, and it's still not clear whether those problems were my fault. As a result, I'm writing this to try making sense of it, as a case study/reference material, and to salvage something from the process.
 | 
			
		||||
 | 
			
		||||
## Starting strong
 | 
			
		||||
 | 
			
		||||
The sole starting requirement was to write everything in TypeScript. Not because of project scale, but because guardrails help with unfamiliar territory. Keeping that in mind, the first question was: how does one start a new project? All I actually need is "compile TypeScript, show it in a browser."
 | 
			
		||||
 | 
			
		||||
Create React App (CRA) came to the rescue and the rest of that evening was a joy. My TypeScript/JavaScript skills were rusty, but the online documentation was helpful. I had never understood the appeal of JSX (why put a DOM in JavaScript?) until it made connecting an `onEvent` handler and a function easy.
 | 
			
		||||
 | 
			
		||||
Some quick dimensional analysis later and there was a sine wave oscillator playing A=440 through the speakers. I specifically remember thinking "modern browsers are magical."
 | 
			
		||||
 | 
			
		||||
## Continuing on
 | 
			
		||||
 | 
			
		||||
Now comes the first mistake: I began to worry about "scale" before encountering an actual problem. Rather than rendering audio in the main thread, why not use audio worklets and render in a background thread instead?
 | 
			
		||||
 | 
			
		||||
The first sign something was amiss came from the TypeScript compiler errors showing the audio worklet API [was missing](https://github.com/microsoft/TypeScript/issues/28308). After searching out Github issues and (unsuccessfully) tweaking the `.tsconfig` settings, I settled on installing a package and moving on.
 | 
			
		||||
 | 
			
		||||
The next problem came from actually using the API. Worklets must load from separate "modules," but it wasn't clear how to guarantee the worklet code stayed separate from the application. I saw recommendations to use `new URL(<local path>, import.meta.url)` and it worked! Well, kind of:
 | 
			
		||||
 | 
			
		||||

 | 
			
		||||
 | 
			
		||||
That file has the audio processor code, so why does it get served with `Content-Type: video/mp2t`?
 | 
			
		||||
 | 
			
		||||
## Floundering about
 | 
			
		||||
 | 
			
		||||
Now comes the second mistake: even though I didn't understand the error, I ignored recommendations to [just use JavaScript](https://hackernoon.com/implementing-audioworklets-with-react-8a80a470474) and stuck by the original TypeScript requirement.
 | 
			
		||||
 | 
			
		||||
I tried different project structures. Moving the worklet code to a new folder didn't help, nor did setting up a monorepo and placing it in a new package.
 | 
			
		||||
 | 
			
		||||
I tried three different CRA tools - `react-app-rewired`, `craco`, `customize-react-app` - but got the same problem. Each has varying levels of compatibility with recent CRA versions, so it wasn't clear if I had the right solution but implemented it incorrectly. After attempting to eject the application and panicking after seeing the configuration, I abandoned that as well.
 | 
			
		||||
 | 
			
		||||
I tried changing the webpack configuration: using [new](https://github.com/webpack/webpack/issues/11543#issuecomment-917673256) [loaders](https://github.com/popelenkow/worker-url), setting [asset rules](https://github.com/webpack/webpack/discussions/14093#discussioncomment-1257149), even [changing how webpack detects worker resources](https://github.com/webpack/webpack/issues/11543#issuecomment-826897590). In hindsight, entry points may have been the answer. But because CRA actively resists attempts to change its webpack configuration, and I couldn't find audio worklet examples in any other framework, I gave up.
 | 
			
		||||
 | 
			
		||||
I tried so many application frameworks. Next.js looked like a good candidate, but added its own [bespoke webpack complexity](https://github.com/vercel/next.js/issues/24907) to the existing confusion. Astro had the best "getting started" experience, but I refuse to install an IDE-specific plugin. I first used Deno while exploring Lume, but it couldn't import the audio worklet types (maybe because of module compatibility?). Each framework was unique in its own way (shout-out to SvelteKit) but I couldn't figure out how to make them work.
 | 
			
		||||
 | 
			
		||||
## Learning and reflecting
 | 
			
		||||
 | 
			
		||||
I ended up using Vite and vite-plugin-react-pages to handle both "build the app" and "bundle worklets," but the specific tool choice isn't important. Instead, the focus should be on lessons learned.
 | 
			
		||||
 | 
			
		||||
For myself:
 | 
			
		||||
 | 
			
		||||
- I'm obsessed with tooling, to the point it can derail the original goal. While it comes from a good place (for example: "types are awesome"), it can get in the way of more important work
 | 
			
		||||
- I tend to reach for online resources right after seeing a new problem. While finding help online is often faster, spending time understanding the problem would have been more productive than cycling through (often outdated) blog posts
 | 
			
		||||
 | 
			
		||||
For the tools:
 | 
			
		||||
 | 
			
		||||
- Resource bundling is great and solves a genuine challenge. I've heard too many horror stories of developers writing modules by hand to believe this is unnecessary complexity
 | 
			
		||||
- Webpack is a build system and modern frameworks are deeply dependent on it (hence the "webpack industrial complex"). While this often saves users from unnecessary complexity, there's no path forward if something breaks
 | 
			
		||||
- There's little ability to mix and match tools across frameworks. Next.js and Gatsby let users extend webpack, but because each framework adds its own modules, changes aren't portable. After spending a week looking at webpack, I had an example running with parcel in thirty minutes, but couldn't integrate it
 | 
			
		||||
 | 
			
		||||
In the end, learning new systems is fun, but a focus on tools that "just work" can leave users out in the cold if they break down.
 | 
			
		||||
@ -1,15 +0,0 @@
 | 
			
		||||
@font-face {
 | 
			
		||||
    font-family: 'JetBrains Mono';
 | 
			
		||||
    src: url('/assets/font/JetBrainsMono-Regular.woff2') format('woff2'),
 | 
			
		||||
         url('/assets/font/JetBrainsMono-Regular.woff') format('woff');
 | 
			
		||||
    font-weight: normal;
 | 
			
		||||
    font-style: normal;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@font-face {
 | 
			
		||||
    font-family: 'Lato';
 | 
			
		||||
    src: url('/assets/font/lato-regular-webfont.woff2') format('woff2'),
 | 
			
		||||
         url('/assets/font/lato-regular-webfont.woff') format('woff');
 | 
			
		||||
    font-weight: normal;
 | 
			
		||||
    font-style: normal;
 | 
			
		||||
}
 | 
			
		||||
@ -1,119 +0,0 @@
 | 
			
		||||
---
 | 
			
		||||
---
 | 
			
		||||
 | 
			
		||||
// Import the theme rules
 | 
			
		||||
@import "theme";
 | 
			
		||||
 | 
			
		||||
body {
 | 
			
		||||
    max-width: 100%;
 | 
			
		||||
    overflow-x: hidden;
 | 
			
		||||
    font-family: 'Lato', sans-serif;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.navbar {
 | 
			
		||||
    color: $gray;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.separator {
 | 
			
		||||
    margin-right: .45rem;
 | 
			
		||||
    margin-left: .25rem;
 | 
			
		||||
    color: #000;
 | 
			
		||||
    &:after {
 | 
			
		||||
        content: '\00a0/';
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
header {
 | 
			
		||||
    padding-top: 80px;
 | 
			
		||||
    padding-bottom: 0;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
header h1,h2 {
 | 
			
		||||
    color: #000;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.post-description {
 | 
			
		||||
    color: #555;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.post-container a {
 | 
			
		||||
    color: #555;
 | 
			
		||||
    border-bottom-color: $gray;
 | 
			
		||||
    border-bottom-style: dotted;
 | 
			
		||||
    border-bottom-width: 1px;
 | 
			
		||||
 | 
			
		||||
    position: relative;
 | 
			
		||||
    display: inline-block;
 | 
			
		||||
    padding: 1px 1px;
 | 
			
		||||
    transition: color ease 0.3s;
 | 
			
		||||
 | 
			
		||||
    &::after {
 | 
			
		||||
      content: '';
 | 
			
		||||
      position: absolute;
 | 
			
		||||
      z-index: -1;
 | 
			
		||||
      width: 100%;
 | 
			
		||||
      height: 0%;
 | 
			
		||||
      left: 0;
 | 
			
		||||
      bottom: 0;
 | 
			
		||||
      background-color: $gray;
 | 
			
		||||
      transition: all ease 0.3s;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    &:hover {
 | 
			
		||||
      color: #fff;
 | 
			
		||||
      border-bottom-style: solid;
 | 
			
		||||
      &::after {
 | 
			
		||||
        height: 100%;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
body pre {
 | 
			
		||||
    font-size: 15px;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pre.highlight, code {
 | 
			
		||||
    font-family: 'JetBrains Mono', monospace;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
div.highlighter-rouge {
 | 
			
		||||
    // Default theme uses `width: 100vw`, which while cool, does cause the page
 | 
			
		||||
    // to exceed screen width and trigger horizontal scrolling. No bueno.
 | 
			
		||||
    width: 99vw;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.post-date {
 | 
			
		||||
    // On the front page, make sure titles don't force wrapping the date box content
 | 
			
		||||
    text-align: right;
 | 
			
		||||
    white-space: nowrap;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
blockquote {
 | 
			
		||||
    color: #555;
 | 
			
		||||
    right: 100px;
 | 
			
		||||
    margin-left: 0;
 | 
			
		||||
    padding-left: 1.8rem;
 | 
			
		||||
    border-left: 5px solid $gray;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
.post-nav {
 | 
			
		||||
    /* Insert your custom styling here. Example:
 | 
			
		||||
 | 
			
		||||
       font-size: 14px;
 | 
			
		||||
    */
 | 
			
		||||
    display: flex;
 | 
			
		||||
    margin-top: 1em;
 | 
			
		||||
    margin-bottom: 1em;
 | 
			
		||||
}
 | 
			
		||||
.post-nav div {
 | 
			
		||||
    /* flex-grow, flex-shrink, flex-basis */
 | 
			
		||||
    flex: 1 1 0;
 | 
			
		||||
}
 | 
			
		||||
.post-nav-next {
 | 
			
		||||
    text-align: right;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
th, td {
 | 
			
		||||
    border-bottom: 1px solid $gray;
 | 
			
		||||
    padding: 0.75em;
 | 
			
		||||
}
 | 
			
		||||
| 
		 Before Width: | Height: | Size: 840 KiB  | 
| 
		 Before Width: | Height: | Size: 926 KiB  | 
| 
		 Before Width: | Height: | Size: 165 KiB  | 
| 
		 Before Width: | Height: | Size: 50 KiB  | 
| 
		 Before Width: | Height: | Size: 48 KiB  | 
| 
		 Before Width: | Height: | Size: 71 KiB  | 
| 
		 Before Width: | Height: | Size: 68 KiB  | 
| 
		 Before Width: | Height: | Size: 23 KiB  | 
| 
		 Before Width: | Height: | Size: 24 KiB  | 
| 
		 Before Width: | Height: | Size: 124 KiB  | 
| 
		 Before Width: | Height: | Size: 145 KiB  | 
| 
		 Before Width: | Height: | Size: 135 KiB  | 
| 
		 Before Width: | Height: | Size: 138 KiB  | 
| 
		 Before Width: | Height: | Size: 98 KiB  | 
| 
		 Before Width: | Height: | Size: 134 KiB  | 
| 
		 Before Width: | Height: | Size: 426 KiB  | 
| 
		 Before Width: | Height: | Size: 304 KiB  | 
| 
		 Before Width: | Height: | Size: 344 KiB  | 
| 
		 Before Width: | Height: | Size: 296 KiB  | 
| 
		 Before Width: | Height: | Size: 377 KiB  | 
| 
		 Before Width: | Height: | Size: 169 KiB  | 
| 
		 Before Width: | Height: | Size: 140 KiB  | 
| 
		 Before Width: | Height: | Size: 194 KiB  | 
| 
		 Before Width: | Height: | Size: 48 KiB  | 
							
								
								
									
										6
									
								
								index.md
									
									
									
									
									
								
							
							
						
						@ -1,6 +0,0 @@
 | 
			
		||||
---
 | 
			
		||||
# Feel free to add content and custom Front Matter to this file.
 | 
			
		||||
# To modify the layout, see https://jekyllrb.com/docs/themes/#overriding-theme-defaults
 | 
			
		||||
 | 
			
		||||
layout: home
 | 
			
		||||
---
 | 
			
		||||
							
								
								
									
										3163
									
								
								package-lock.json
									
									
									
										generated
									
									
									
										Normal file
									
								
							
							
						
						
							
								
								
									
										32
									
								
								package.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						@ -0,0 +1,32 @@
 | 
			
		||||
{
 | 
			
		||||
  "name": "speice.io",
 | 
			
		||||
  "private": true,
 | 
			
		||||
  "version": "0.0.0",
 | 
			
		||||
  "type": "module",
 | 
			
		||||
  "scripts": {
 | 
			
		||||
    "dev": "vite",
 | 
			
		||||
    "build": "tsc && vite build",
 | 
			
		||||
    "preview": "vite preview",
 | 
			
		||||
    "prepare": "husky install"
 | 
			
		||||
  },
 | 
			
		||||
  "dependencies": {
 | 
			
		||||
    "react": "^18.2.0",
 | 
			
		||||
    "react-dom": "^18.2.0"
 | 
			
		||||
  },
 | 
			
		||||
  "devDependencies": {
 | 
			
		||||
    "@bspeice/vite-plugin-blog": "^1.1.0",
 | 
			
		||||
    "@mdx-js/rollup": "^2.3.0",
 | 
			
		||||
    "@types/react": "^18.0.28",
 | 
			
		||||
    "@types/react-dom": "^18.0.11",
 | 
			
		||||
    "@vitejs/plugin-react-swc": "^3.0.0",
 | 
			
		||||
    "husky": "^8.0.3",
 | 
			
		||||
    "pretty-quick": "^3.1.3",
 | 
			
		||||
    "typescript": "^4.9.3",
 | 
			
		||||
    "vite": "^4.2.0"
 | 
			
		||||
  },
 | 
			
		||||
  "husky": {
 | 
			
		||||
    "hooks": {
 | 
			
		||||
      "pre-commit": "pretty-quick --staged"
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										9
									
								
								pages/index.tsx
									
									
									
									
									
										Normal file
									
								
							
							
						
						@ -0,0 +1,9 @@
 | 
			
		||||
import React from "react";
 | 
			
		||||
 | 
			
		||||
export default function Page() {
 | 
			
		||||
    return (
 | 
			
		||||
        <>
 | 
			
		||||
            <p>Is this thing on?</p>
 | 
			
		||||
        </>
 | 
			
		||||
    );
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										19
									
								
								tsconfig.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						@ -0,0 +1,19 @@
 | 
			
		||||
{
 | 
			
		||||
  "compilerOptions": {
 | 
			
		||||
    "target": "ESNext",
 | 
			
		||||
    "useDefineForClassFields": true,
 | 
			
		||||
    "lib": ["DOM", "DOM.Iterable", "ESNext"],
 | 
			
		||||
    "allowJs": false,
 | 
			
		||||
    "skipLibCheck": true,
 | 
			
		||||
    "esModuleInterop": false,
 | 
			
		||||
    "allowSyntheticDefaultImports": true,
 | 
			
		||||
    "strict": true,
 | 
			
		||||
    "forceConsistentCasingInFileNames": true,
 | 
			
		||||
    "module": "ESNext",
 | 
			
		||||
    "moduleResolution": "Node",
 | 
			
		||||
    "resolveJsonModule": true,
 | 
			
		||||
    "isolatedModules": true,
 | 
			
		||||
    "noEmit": true,
 | 
			
		||||
    "jsx": "react-jsx"
 | 
			
		||||
  },
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										14
									
								
								vite.config.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						@ -0,0 +1,14 @@
 | 
			
		||||
import { defineConfig } from 'vite'
 | 
			
		||||
import blog from "@bspeice/vite-plugin-blog"
 | 
			
		||||
import mdx from "@mdx-js/rollup"
 | 
			
		||||
import react from '@vitejs/plugin-react-swc'
 | 
			
		||||
 | 
			
		||||
export default defineConfig({
 | 
			
		||||
  plugins: [
 | 
			
		||||
    blog({
 | 
			
		||||
      "/": "/pages/index"
 | 
			
		||||
    }),
 | 
			
		||||
    mdx(),
 | 
			
		||||
    react()
 | 
			
		||||
  ],
 | 
			
		||||
})
 | 
			
		||||