Initial commit for new blog generator
@ -1,6 +1,2 @@
 | 
				
			|||||||
FROM mcr.microsoft.com/vscode/devcontainers/ruby:0-2.7-bullseye
 | 
					ARG VARIANT=16-bullseye
 | 
				
			||||||
 | 
					FROM mcr.microsoft.com/vscode/devcontainers/typescript-node:0-${VARIANT}
 | 
				
			||||||
RUN wget https://github.com/errata-ai/vale/releases/download/v2.21.0/vale_2.21.0_Linux_64-bit.tar.gz -O /tmp/vale.tar.gz \
 | 
					 | 
				
			||||||
 && cd /usr/local/bin \
 | 
					 | 
				
			||||||
 && tar xf /tmp/vale.tar.gz \
 | 
					 | 
				
			||||||
 && rm /tmp/vale.tar.gz
 | 
					 | 
				
			||||||
 | 
				
			|||||||
@ -1,13 +1,33 @@
 | 
				
			|||||||
// For format details, see https://aka.ms/devcontainer.json. For config options, see the README at:
 | 
					// For format details, see https://aka.ms/devcontainer.json. For config options, see the README at:
 | 
				
			||||||
// https://github.com/microsoft/vscode-dev-containers/tree/v0.245.0/containers/ruby
 | 
					// https://github.com/microsoft/vscode-dev-containers/tree/v0.245.0/containers/typescript-node
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	"name": "Ruby",
 | 
					  "name": "Node.js & TypeScript",
 | 
				
			||||||
	"build": {
 | 
					  "build": {
 | 
				
			||||||
		"dockerfile": "Dockerfile"
 | 
					    "dockerfile": "Dockerfile",
 | 
				
			||||||
	},
 | 
					    // Update 'VARIANT' to pick a Node version: 18, 16, 14.
 | 
				
			||||||
	"runArgs": ["--userns=keep-id"],
 | 
					    // Append -bullseye or -buster to pin to an OS version.
 | 
				
			||||||
 | 
					    // Use -bullseye variants on local on arm64/Apple Silicon.
 | 
				
			||||||
 | 
					    "args": {
 | 
				
			||||||
 | 
					      "VARIANT": "18-bullseye"
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  },
 | 
				
			||||||
 | 
					  "runArgs": ["--userns=keep-id"],
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	"remoteUser": "vscode",
 | 
					  // Configure tool-specific properties.
 | 
				
			||||||
	"containerUser": "vscode",
 | 
					  "customizations": {
 | 
				
			||||||
	"workspaceMount": "source=${localWorkspaceFolder},target=/workspaces/${localWorkspaceFolderBasename},type=bind,Z"
 | 
					    // Configure properties specific to VS Code.
 | 
				
			||||||
 | 
					    "vscode": {
 | 
				
			||||||
 | 
					      // Add the IDs of extensions you want installed when the container is created.
 | 
				
			||||||
 | 
					      "extensions": ["dbaeumer.vscode-eslint"]
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  },
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Use 'forwardPorts' to make a list of ports inside the container available locally.
 | 
				
			||||||
 | 
					  // "forwardPorts": [],
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Use 'postCreateCommand' to run commands after the container is created.
 | 
				
			||||||
 | 
					  // "postCreateCommand": "yarn install",
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Comment out to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root.
 | 
				
			||||||
 | 
					  "workspaceMount": "source=${localWorkspaceFolder},target=/workspaces/${localWorkspaceFolderBasename},type=bind,Z"
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										32
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						@ -1,8 +1,24 @@
 | 
				
			|||||||
_site/
 | 
					# Logs
 | 
				
			||||||
.swp
 | 
					logs
 | 
				
			||||||
.sass-cache/
 | 
					*.log
 | 
				
			||||||
.jekyll-metadata
 | 
					npm-debug.log*
 | 
				
			||||||
.bundle/
 | 
					yarn-debug.log*
 | 
				
			||||||
vendor/
 | 
					yarn-error.log*
 | 
				
			||||||
.styles/
 | 
					pnpm-debug.log*
 | 
				
			||||||
.vscode/
 | 
					lerna-debug.log*
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					node_modules
 | 
				
			||||||
 | 
					dist
 | 
				
			||||||
 | 
					dist-ssr
 | 
				
			||||||
 | 
					*.local
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Editor directories and files
 | 
				
			||||||
 | 
					.vscode/*
 | 
				
			||||||
 | 
					!.vscode/extensions.json
 | 
				
			||||||
 | 
					.idea
 | 
				
			||||||
 | 
					.DS_Store
 | 
				
			||||||
 | 
					*.suo
 | 
				
			||||||
 | 
					*.ntvs*
 | 
				
			||||||
 | 
					*.njsproj
 | 
				
			||||||
 | 
					*.sln
 | 
				
			||||||
 | 
					*.sw?
 | 
				
			||||||
 | 
				
			|||||||
@ -1,7 +0,0 @@
 | 
				
			|||||||
StylesPath = .styles
 | 
					 | 
				
			||||||
MinAlertLevel = suggestion
 | 
					 | 
				
			||||||
Packages = Microsoft, write-good
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
[*]
 | 
					 | 
				
			||||||
BasedOnStyles = Vale, Microsoft, write-good
 | 
					 | 
				
			||||||
write-good.E-Prime = NO
 | 
					 | 
				
			||||||
							
								
								
									
										24
									
								
								404.html
									
									
									
									
									
								
							
							
						
						@ -1,24 +0,0 @@
 | 
				
			|||||||
---
 | 
					 | 
				
			||||||
layout: page
 | 
					 | 
				
			||||||
---
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
<style type="text/css" media="screen">
 | 
					 | 
				
			||||||
  .container {
 | 
					 | 
				
			||||||
    margin: 10px auto;
 | 
					 | 
				
			||||||
    max-width: 600px;
 | 
					 | 
				
			||||||
    text-align: center;
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  h1 {
 | 
					 | 
				
			||||||
    margin: 30px 0;
 | 
					 | 
				
			||||||
    font-size: 4em;
 | 
					 | 
				
			||||||
    line-height: 1;
 | 
					 | 
				
			||||||
    letter-spacing: -1px;
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
</style>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
<div class="container">
 | 
					 | 
				
			||||||
  <h1>404</h1>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  <p><strong>Page not found :(</strong></p>
 | 
					 | 
				
			||||||
  <p>The requested page could not be found.</p>
 | 
					 | 
				
			||||||
</div>
 | 
					 | 
				
			||||||
							
								
								
									
										29
									
								
								Gemfile
									
									
									
									
									
								
							
							
						
						@ -1,29 +0,0 @@
 | 
				
			|||||||
source "https://rubygems.org"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Hello! This is where you manage which Jekyll version is used to run.
 | 
					 | 
				
			||||||
# When you want to use a different version, change it below, save the
 | 
					 | 
				
			||||||
# file and run `bundle install`. Run Jekyll with `bundle exec`, like so:
 | 
					 | 
				
			||||||
#
 | 
					 | 
				
			||||||
#     bundle exec jekyll serve
 | 
					 | 
				
			||||||
#
 | 
					 | 
				
			||||||
# This will help ensure the proper Jekyll version is running.
 | 
					 | 
				
			||||||
# Happy Jekylling!
 | 
					 | 
				
			||||||
gem "jekyll", "~> 3.8.3"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
gem "texture"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# If you want to use GitHub Pages, remove the "gem "jekyll"" above and
 | 
					 | 
				
			||||||
# uncomment the line below. To upgrade, run `bundle update github-pages`.
 | 
					 | 
				
			||||||
# gem "github-pages", group: :jekyll_plugins
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# If you have any plugins, put them here!
 | 
					 | 
				
			||||||
group :jekyll_plugins do
 | 
					 | 
				
			||||||
  gem "jekyll-feed", "~> 0.6"
 | 
					 | 
				
			||||||
  gem "jekyll-remote-theme"
 | 
					 | 
				
			||||||
end
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Windows does not include zoneinfo files, so bundle the tzinfo-data gem
 | 
					 | 
				
			||||||
gem "tzinfo-data", platforms: [:mingw, :mswin, :x64_mingw, :jruby]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Performance-booster for watching directories on Windows
 | 
					 | 
				
			||||||
gem "wdm", "~> 0.1.0" if Gem.win_platform?
 | 
					 | 
				
			||||||
							
								
								
									
										78
									
								
								Gemfile.lock
									
									
									
									
									
								
							
							
						
						@ -1,78 +0,0 @@
 | 
				
			|||||||
GEM
 | 
					 | 
				
			||||||
  remote: https://rubygems.org/
 | 
					 | 
				
			||||||
  specs:
 | 
					 | 
				
			||||||
    addressable (2.7.0)
 | 
					 | 
				
			||||||
      public_suffix (>= 2.0.2, < 5.0)
 | 
					 | 
				
			||||||
    colorator (1.1.0)
 | 
					 | 
				
			||||||
    concurrent-ruby (1.1.6)
 | 
					 | 
				
			||||||
    em-websocket (0.5.1)
 | 
					 | 
				
			||||||
      eventmachine (>= 0.12.9)
 | 
					 | 
				
			||||||
      http_parser.rb (~> 0.6.0)
 | 
					 | 
				
			||||||
    eventmachine (1.2.7)
 | 
					 | 
				
			||||||
    ffi (1.12.2)
 | 
					 | 
				
			||||||
    forwardable-extended (2.6.0)
 | 
					 | 
				
			||||||
    http_parser.rb (0.6.0)
 | 
					 | 
				
			||||||
    i18n (0.9.5)
 | 
					 | 
				
			||||||
      concurrent-ruby (~> 1.0)
 | 
					 | 
				
			||||||
    jekyll (3.8.6)
 | 
					 | 
				
			||||||
      addressable (~> 2.4)
 | 
					 | 
				
			||||||
      colorator (~> 1.0)
 | 
					 | 
				
			||||||
      em-websocket (~> 0.5)
 | 
					 | 
				
			||||||
      i18n (~> 0.7)
 | 
					 | 
				
			||||||
      jekyll-sass-converter (~> 1.0)
 | 
					 | 
				
			||||||
      jekyll-watch (~> 2.0)
 | 
					 | 
				
			||||||
      kramdown (~> 1.14)
 | 
					 | 
				
			||||||
      liquid (~> 4.0)
 | 
					 | 
				
			||||||
      mercenary (~> 0.3.3)
 | 
					 | 
				
			||||||
      pathutil (~> 0.9)
 | 
					 | 
				
			||||||
      rouge (>= 1.7, < 4)
 | 
					 | 
				
			||||||
      safe_yaml (~> 1.0)
 | 
					 | 
				
			||||||
    jekyll-feed (0.13.0)
 | 
					 | 
				
			||||||
      jekyll (>= 3.7, < 5.0)
 | 
					 | 
				
			||||||
    jekyll-remote-theme (0.4.2)
 | 
					 | 
				
			||||||
      addressable (~> 2.0)
 | 
					 | 
				
			||||||
      jekyll (>= 3.5, < 5.0)
 | 
					 | 
				
			||||||
      jekyll-sass-converter (>= 1.0, <= 3.0.0, != 2.0.0)
 | 
					 | 
				
			||||||
      rubyzip (>= 1.3.0, < 3.0)
 | 
					 | 
				
			||||||
    jekyll-sass-converter (1.5.2)
 | 
					 | 
				
			||||||
      sass (~> 3.4)
 | 
					 | 
				
			||||||
    jekyll-seo-tag (2.6.1)
 | 
					 | 
				
			||||||
      jekyll (>= 3.3, < 5.0)
 | 
					 | 
				
			||||||
    jekyll-watch (2.2.1)
 | 
					 | 
				
			||||||
      listen (~> 3.0)
 | 
					 | 
				
			||||||
    kramdown (1.17.0)
 | 
					 | 
				
			||||||
    liquid (4.0.3)
 | 
					 | 
				
			||||||
    listen (3.2.1)
 | 
					 | 
				
			||||||
      rb-fsevent (~> 0.10, >= 0.10.3)
 | 
					 | 
				
			||||||
      rb-inotify (~> 0.9, >= 0.9.10)
 | 
					 | 
				
			||||||
    mercenary (0.3.6)
 | 
					 | 
				
			||||||
    pathutil (0.16.2)
 | 
					 | 
				
			||||||
      forwardable-extended (~> 2.6)
 | 
					 | 
				
			||||||
    public_suffix (4.0.4)
 | 
					 | 
				
			||||||
    rb-fsevent (0.10.3)
 | 
					 | 
				
			||||||
    rb-inotify (0.10.1)
 | 
					 | 
				
			||||||
      ffi (~> 1.0)
 | 
					 | 
				
			||||||
    rouge (3.17.0)
 | 
					 | 
				
			||||||
    rubyzip (2.3.0)
 | 
					 | 
				
			||||||
    safe_yaml (1.0.5)
 | 
					 | 
				
			||||||
    sass (3.7.4)
 | 
					 | 
				
			||||||
      sass-listen (~> 4.0.0)
 | 
					 | 
				
			||||||
    sass-listen (4.0.0)
 | 
					 | 
				
			||||||
      rb-fsevent (~> 0.9, >= 0.9.4)
 | 
					 | 
				
			||||||
      rb-inotify (~> 0.9, >= 0.9.7)
 | 
					 | 
				
			||||||
    texture (0.3)
 | 
					 | 
				
			||||||
      jekyll (~> 3.7)
 | 
					 | 
				
			||||||
      jekyll-seo-tag (~> 2.1)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
PLATFORMS
 | 
					 | 
				
			||||||
  ruby
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
DEPENDENCIES
 | 
					 | 
				
			||||||
  jekyll (~> 3.8.3)
 | 
					 | 
				
			||||||
  jekyll-feed (~> 0.6)
 | 
					 | 
				
			||||||
  jekyll-remote-theme
 | 
					 | 
				
			||||||
  texture
 | 
					 | 
				
			||||||
  tzinfo-data
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
BUNDLED WITH
 | 
					 | 
				
			||||||
   2.1.4
 | 
					 | 
				
			||||||
							
								
								
									
										44
									
								
								_config.yml
									
									
									
									
									
								
							
							
						
						@ -1,44 +0,0 @@
 | 
				
			|||||||
# Welcome to Jekyll!
 | 
					 | 
				
			||||||
#
 | 
					 | 
				
			||||||
# This config file is meant for settings that affect your whole blog, values
 | 
					 | 
				
			||||||
# which you are expected to set up once and rarely edit after that. If you find
 | 
					 | 
				
			||||||
# yourself editing this file very often, consider using Jekyll's data files
 | 
					 | 
				
			||||||
# feature for the data you need to update frequently.
 | 
					 | 
				
			||||||
#
 | 
					 | 
				
			||||||
# For technical reasons, this file is *NOT* reloaded automatically when you use
 | 
					 | 
				
			||||||
# 'bundle exec jekyll serve'. If you change this file, please restart the server process.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Site settings
 | 
					 | 
				
			||||||
# These are used to personalize your new site. If you look in the HTML files,
 | 
					 | 
				
			||||||
# you will see them accessed via {{ site.title }}, {{ site.email }}, and so on.
 | 
					 | 
				
			||||||
# You can create any custom variable you would like, and they will be accessible
 | 
					 | 
				
			||||||
# in the templates via {{ site.myvariable }}.
 | 
					 | 
				
			||||||
title: speice.io
 | 
					 | 
				
			||||||
description: The Old Speice Guy
 | 
					 | 
				
			||||||
email: bradlee@speice.io
 | 
					 | 
				
			||||||
baseurl: "" # the subpath of your site, e.g. /blog
 | 
					 | 
				
			||||||
url: "https://speice.io/" # the base hostname & protocol for your site, e.g. http://example.com
 | 
					 | 
				
			||||||
github_username:  bspeice
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Build settings
 | 
					 | 
				
			||||||
markdown: kramdown
 | 
					 | 
				
			||||||
# theme: texture
 | 
					 | 
				
			||||||
remote_theme: thelehhman/texture
 | 
					 | 
				
			||||||
plugins:
 | 
					 | 
				
			||||||
  - jekyll-feed
 | 
					 | 
				
			||||||
  - jekyll-remote-theme
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
include: [_pages]
 | 
					 | 
				
			||||||
permalink: /:year/:month/:title.html
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Exclude from processing.
 | 
					 | 
				
			||||||
# The following items will not be processed, by default. Create a custom list
 | 
					 | 
				
			||||||
# to override the default setting.
 | 
					 | 
				
			||||||
# exclude:
 | 
					 | 
				
			||||||
#   - Gemfile
 | 
					 | 
				
			||||||
#   - Gemfile.lock
 | 
					 | 
				
			||||||
#   - node_modules
 | 
					 | 
				
			||||||
#   - vendor/bundle/
 | 
					 | 
				
			||||||
#   - vendor/cache/
 | 
					 | 
				
			||||||
#   - vendor/gems/
 | 
					 | 
				
			||||||
#   - vendor/ruby/
 | 
					 | 
				
			||||||
@ -1,23 +0,0 @@
 | 
				
			|||||||
{% if page.layout == 'post' %}
 | 
					 | 
				
			||||||
{% comment %}Thanks to https://www.bytedude.com/jekyll-previous-and-next-posts/{% endcomment %}
 | 
					 | 
				
			||||||
<div class="container">
 | 
					 | 
				
			||||||
    <hr>
 | 
					 | 
				
			||||||
    <div class="post-nav">
 | 
					 | 
				
			||||||
        <div>
 | 
					 | 
				
			||||||
            {% if page.previous.url %}
 | 
					 | 
				
			||||||
            <a href="{{page.previous.url}}">« {{page.previous.title}}</a>
 | 
					 | 
				
			||||||
            {% endif %}
 | 
					 | 
				
			||||||
        </div>
 | 
					 | 
				
			||||||
        <div class="post-nav-next">
 | 
					 | 
				
			||||||
            {% if page.next.url %}
 | 
					 | 
				
			||||||
            <a href="{{page.next.url}}">{{page.next.title}} »</a>
 | 
					 | 
				
			||||||
            {% endif %}
 | 
					 | 
				
			||||||
        </div>
 | 
					 | 
				
			||||||
    </div>
 | 
					 | 
				
			||||||
</div>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
<script type="text/javascript"
 | 
					 | 
				
			||||||
    src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_HTMLorMML">
 | 
					 | 
				
			||||||
    </script>
 | 
					 | 
				
			||||||
{% endif %}
 | 
					 | 
				
			||||||
@ -1,7 +0,0 @@
 | 
				
			|||||||
<meta charset="UTF-8">
 | 
					 | 
				
			||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
 | 
					 | 
				
			||||||
<meta http-equiv="X-UA-Compatible" content="ie=edge">
 | 
					 | 
				
			||||||
<link rel="stylesheet" href="{{ "/assets/css/style.css" | relative_url }}">
 | 
					 | 
				
			||||||
<link rel="stylesheet" href="{{ "/assets/css/fonts.css" | prepend: site.baseurl }}">
 | 
					 | 
				
			||||||
<title>{{ page.title | default: site.title }}</title>
 | 
					 | 
				
			||||||
{% seo %}
 | 
					 | 
				
			||||||
@ -1,7 +0,0 @@
 | 
				
			|||||||
<div class="navbar">
 | 
					 | 
				
			||||||
    <a href="{{ "/" | prepend: site.baseurl }}">Home</a>
 | 
					 | 
				
			||||||
    <span class="separator"></span>
 | 
					 | 
				
			||||||
    <a href="{{ "/about/" | prepend: site.baseurl }}">About</a>
 | 
					 | 
				
			||||||
    <span class="separator"></span>
 | 
					 | 
				
			||||||
    <a href="{{ "/feed.xml" | prepend: site.baseurl }}">RSS</a>
 | 
					 | 
				
			||||||
</div>
 | 
					 | 
				
			||||||
@ -1,15 +0,0 @@
 | 
				
			|||||||
<div class="container">
 | 
					 | 
				
			||||||
    <h2>{{ site.title }}</h1>
 | 
					 | 
				
			||||||
    <h1>{{ site.description }}</h2>
 | 
					 | 
				
			||||||
    <ul class="social">
 | 
					 | 
				
			||||||
        {%- if site.texture.social_links.github -%}
 | 
					 | 
				
			||||||
            <a href="https://github.com/{{ site.texture.social_links.github }}"><li><i class="icon-github-circled"></i></li></a>
 | 
					 | 
				
			||||||
        {%- endif -%}
 | 
					 | 
				
			||||||
        {%- if site.texture.social_links.linkedIn -%}
 | 
					 | 
				
			||||||
            <a href="https://linkedin.com/{{ site.texture.social_links.linkedIn }}"><li><i class="icon-linkedin-squared"></i></li></a>
 | 
					 | 
				
			||||||
        {%- endif -%}
 | 
					 | 
				
			||||||
        {%- if site.texture.social_links.twitter -%}
 | 
					 | 
				
			||||||
            <a href="https://twitter.com/{{ site.texture.social_links.twitter }}"><li><i class="icon-twitter-squared"></i></li></a>
 | 
					 | 
				
			||||||
        {%- endif -%}
 | 
					 | 
				
			||||||
    </ul>
 | 
					 | 
				
			||||||
</div>
 | 
					 | 
				
			||||||
@ -1,13 +0,0 @@
 | 
				
			|||||||
---
 | 
					 | 
				
			||||||
layout: page
 | 
					 | 
				
			||||||
title: About
 | 
					 | 
				
			||||||
permalink: /about/
 | 
					 | 
				
			||||||
---
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Developer currently living in New York City.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Best ways to get in contact:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
- Email: [bradlee@speice.io](mailto:bradlee@speice.io)
 | 
					 | 
				
			||||||
- Github: [bspeice](https://github.com/bspeice)
 | 
					 | 
				
			||||||
- LinkedIn: [bradleespeice](https://www.linkedin.com/in/bradleespeice/)
 | 
					 | 
				
			||||||
@ -1,38 +0,0 @@
 | 
				
			|||||||
---
 | 
					 | 
				
			||||||
layout: post
 | 
					 | 
				
			||||||
title: "Hello!"
 | 
					 | 
				
			||||||
description: ""
 | 
					 | 
				
			||||||
category:
 | 
					 | 
				
			||||||
tags: []
 | 
					 | 
				
			||||||
---
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
I'll do what I can to keep this short, there's plenty of other things we both should be doing right
 | 
					 | 
				
			||||||
now.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
If you're here for the bread pics, and to marvel in some other culinary side projects, I've got you
 | 
					 | 
				
			||||||
covered:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||

 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
And no, I'm not posting pictures of earlier attempts that ended up turning into rocks in the oven.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Okay, just one:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||

 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
If you're here for keeping up with the man Bradlee Speice, got plenty of that too. Plus some
 | 
					 | 
				
			||||||
up-coming super-nerdy posts about how I'm changing the world.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
And if you're not here for those things: don't have a lot for you, sorry. But you're welcome to let
 | 
					 | 
				
			||||||
me know what needs to change.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
I'm looking forward to making this a place to talk about what's going on in life, I hope you'll
 | 
					 | 
				
			||||||
stick it out with me. The best way to follow what's going on is on my [About](/about/) page, but if
 | 
					 | 
				
			||||||
you want the joy of clicking links, here's a few good ones:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
- Email (people still use this?): [bradlee@speice.io](mailto:bradlee@speice.io)
 | 
					 | 
				
			||||||
- Mastodon (nerd Twitter): [@bradlee](https://mastodon.social/@bradlee)
 | 
					 | 
				
			||||||
- Chat (RiotIM): [@bspeice:matrix.com](https://matrix.to/#/@bspeice:matrix.com)
 | 
					 | 
				
			||||||
- The comments section (not for people with sanity intact): ↓↓↓
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Thanks, and keep it amazing.
 | 
					 | 
				
			||||||
@ -1,177 +0,0 @@
 | 
				
			|||||||
---
 | 
					 | 
				
			||||||
layout: post
 | 
					 | 
				
			||||||
title: "What I Learned: Porting Dateutil Parser to Rust"
 | 
					 | 
				
			||||||
description: ""
 | 
					 | 
				
			||||||
category:
 | 
					 | 
				
			||||||
tags: [dtparse, rust]
 | 
					 | 
				
			||||||
---
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Hi. I'm Bradlee.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
I've mostly been a lurker in Rust for a while, making a couple small contributions here and there.
 | 
					 | 
				
			||||||
So launching [dtparse](https://github.com/bspeice/dtparse) feels like nice step towards becoming a
 | 
					 | 
				
			||||||
functioning member of society. But not too much, because then you know people start asking you to
 | 
					 | 
				
			||||||
pay bills, and ain't nobody got time for that.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
But I built dtparse, and you can read about my thoughts on the process. Or don't. I won't tell you
 | 
					 | 
				
			||||||
what to do with your life (but you should totally keep reading).
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Slow down, what?
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
OK, fine, I guess I should start with _why_ someone would do this.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
[Dateutil](https://github.com/dateutil/dateutil) is a Python library for handling dates. The
 | 
					 | 
				
			||||||
standard library support for time in Python is kinda dope, but there are a lot of extras that go
 | 
					 | 
				
			||||||
into making it useful beyond just the [datetime](https://docs.python.org/3.6/library/datetime.html)
 | 
					 | 
				
			||||||
module. `dateutil.parser` specifically is code to take all the super-weird time formats people come
 | 
					 | 
				
			||||||
up with and turn them into something actually useful.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Date/time parsing, it turns out, is just like everything else involving
 | 
					 | 
				
			||||||
[computers](https://infiniteundo.com/post/25326999628/falsehoods-programmers-believe-about-time) and
 | 
					 | 
				
			||||||
[time](https://infiniteundo.com/post/25509354022/more-falsehoods-programmers-believe-about-time): it
 | 
					 | 
				
			||||||
feels like it shouldn't be that difficult to do, until you try to do it, and you realize that people
 | 
					 | 
				
			||||||
suck and this is why
 | 
					 | 
				
			||||||
[we can't we have nice things](https://zachholman.com/talk/utc-is-enough-for-everyone-right). But
 | 
					 | 
				
			||||||
alas, we'll try and make contemporary art out of the rubble and give it a pretentious name like
 | 
					 | 
				
			||||||
_Time_.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||

 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
> [Time](https://www.goodfreephotos.com/united-states/montana/elkhorn/remains-of-the-mining-operation-elkhorn.jpg.php)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
What makes `dateutil.parser` great is that there's single function with a single argument that
 | 
					 | 
				
			||||||
drives what programmers interact with:
 | 
					 | 
				
			||||||
[`parse(timestr)`](https://github.com/dateutil/dateutil/blob/6dde5d6298cfb81a4c594a38439462799ed2aef2/dateutil/parser/_parser.py#L1258).
 | 
					 | 
				
			||||||
It takes in the time as a string, and gives you back a reasonable "look, this is the best anyone can
 | 
					 | 
				
			||||||
possibly do to make sense of your input" value. It doesn't expect much of you.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
[And now it's in Rust.](https://github.com/bspeice/dtparse/blob/7d565d3a78876dbebd9711c9720364fe9eba7915/src/lib.rs#L1332)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Lost in Translation
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Having worked at a bulge-bracket bank watching Java programmers try to be Python programmers, I'm
 | 
					 | 
				
			||||||
admittedly hesitant to publish Python code that's trying to be Rust. Interestingly, Rust code can
 | 
					 | 
				
			||||||
actually do a great job of mimicking Python. It's certainly not idiomatic Rust, but I've had better
 | 
					 | 
				
			||||||
experiences than
 | 
					 | 
				
			||||||
[this guy](https://webcache.googleusercontent.com/search?q=cache:wkYMpktJtnUJ:https://jackstouffer.com/blog/porting_dateutil.html+&cd=3&hl=en&ct=clnk&gl=us)
 | 
					 | 
				
			||||||
who attempted the same thing for D. These are the actual take-aways:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
When transcribing code, **stay as close to the original library as possible**. I'm talking about
 | 
					 | 
				
			||||||
using the same variable names, same access patterns, the whole shebang. It's way too easy to make a
 | 
					 | 
				
			||||||
couple of typos, and all of a sudden your code blows up in new and exciting ways. Having a reference
 | 
					 | 
				
			||||||
manual for verbatim what your code should be means that you don't spend that long debugging
 | 
					 | 
				
			||||||
complicated logic, you're more looking for typos.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Also, **don't use nice Rust things like enums**. While
 | 
					 | 
				
			||||||
[one time it worked out OK for me](https://github.com/bspeice/dtparse/blob/7d565d3a78876dbebd9711c9720364fe9eba7915/src/lib.rs#L88-L94),
 | 
					 | 
				
			||||||
I also managed to shoot myself in the foot a couple times because `dateutil` stores AM/PM as a
 | 
					 | 
				
			||||||
boolean and I mixed up which was true, and which was false (side note: AM is false, PM is true). In
 | 
					 | 
				
			||||||
general, writing nice code _should not be a first-pass priority_ when you're just trying to recreate
 | 
					 | 
				
			||||||
the same functionality.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
**Exceptions are a pain.** Make peace with it. Python code is just allowed to skip stack frames. So
 | 
					 | 
				
			||||||
when a co-worker told me "Rust is getting try-catch syntax" I properly freaked out. Turns out
 | 
					 | 
				
			||||||
[he's not quite right](https://github.com/rust-lang/rfcs/pull/243), and I'm OK with that. And while
 | 
					 | 
				
			||||||
`dateutil` is pretty well-behaved about not skipping multiple stack frames,
 | 
					 | 
				
			||||||
[130-line try-catch blocks](https://github.com/dateutil/dateutil/blob/16561fc99361979e88cccbd135393b06b1af7e90/dateutil/parser/_parser.py#L730-L865)
 | 
					 | 
				
			||||||
take a while to verify.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
As another Python quirk, **be very careful about
 | 
					 | 
				
			||||||
[long nested if-elif-else blocks](https://github.com/dateutil/dateutil/blob/16561fc99361979e88cccbd135393b06b1af7e90/dateutil/parser/_parser.py#L494-L568)**.
 | 
					 | 
				
			||||||
I used to think that Python's whitespace was just there to get you to format your code correctly. I
 | 
					 | 
				
			||||||
think that no longer. It's way too easy to close a block too early and have incredibly weird issues
 | 
					 | 
				
			||||||
in the logic. Make sure you use an editor that displays indentation levels so you can keep things
 | 
					 | 
				
			||||||
straight.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
**Rust macros are not free.** I originally had the
 | 
					 | 
				
			||||||
[main test body](https://github.com/bspeice/dtparse/blob/b0e737f088eca8e83ab4244c6621a2797d247697/tests/compat.rs#L63-L217)
 | 
					 | 
				
			||||||
wrapped up in a macro using [pyo3](https://github.com/PyO3/PyO3). It took two minutes to compile.
 | 
					 | 
				
			||||||
After
 | 
					 | 
				
			||||||
[moving things to a function](https://github.com/bspeice/dtparse/blob/e017018295c670e4b6c6ee1cfff00dbb233db47d/tests/compat.rs#L76-L205)
 | 
					 | 
				
			||||||
compile times dropped down to ~5 seconds. Turns out 150 lines \* 100 tests = a lot of redundant code
 | 
					 | 
				
			||||||
to be compiled. My new rule of thumb is that any macros longer than 10-15 lines are actually
 | 
					 | 
				
			||||||
functions that need to be liberated, man.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Finally, **I really miss list comprehensions and dictionary comprehensions.** As a quick comparison,
 | 
					 | 
				
			||||||
see
 | 
					 | 
				
			||||||
[this dateutil code](https://github.com/dateutil/dateutil/blob/16561fc99361979e88cccbd135393b06b1af7e90/dateutil/parser/_parser.py#L476)
 | 
					 | 
				
			||||||
and
 | 
					 | 
				
			||||||
[the implementation in Rust](https://github.com/bspeice/dtparse/blob/7d565d3a78876dbebd9711c9720364fe9eba7915/src/lib.rs#L619-L629).
 | 
					 | 
				
			||||||
I probably wrote it wrong, and I'm sorry. Ultimately though, I hope that these comprehensions can be
 | 
					 | 
				
			||||||
added through macros or syntax extensions. Either way, they're expressive, save typing, and are
 | 
					 | 
				
			||||||
super-readable. Let's get more of that.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Using a young language
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Now, Rust is exciting and new, which means that there's opportunity to make a substantive impact. On
 | 
					 | 
				
			||||||
more than one occasion though, I've had issues navigating the Rust ecosystem.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
What I'll call the "canonical library" is still being built. In Python, if you need datetime
 | 
					 | 
				
			||||||
parsing, you use `dateutil`. If you want `decimal` types, it's already in the
 | 
					 | 
				
			||||||
[standard library](https://docs.python.org/3.6/library/decimal.html). While I might've gotten away
 | 
					 | 
				
			||||||
with `f64`, `dateutil` uses decimals, and I wanted to follow the principle of **staying as close to
 | 
					 | 
				
			||||||
the original library as possible**. Thus began my quest to find a decimal library in Rust. What I
 | 
					 | 
				
			||||||
quickly found was summarized in a comment:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
> Writing a BigDecimal is easy. Writing a _good_ BigDecimal is hard.
 | 
					 | 
				
			||||||
>
 | 
					 | 
				
			||||||
> [-cmr](https://github.com/rust-lang/rust/issues/8937#issuecomment-34582794)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
In practice, this means that there are at least [4](https://crates.io/crates/bigdecimal)
 | 
					 | 
				
			||||||
[different](https://crates.io/crates/rust_decimal)
 | 
					 | 
				
			||||||
[implementations](https://crates.io/crates/decimal) [available](https://crates.io/crates/decimate).
 | 
					 | 
				
			||||||
And that's a lot of decisions to worry about when all I'm thinking is "why can't
 | 
					 | 
				
			||||||
[calendar reform](https://en.wikipedia.org/wiki/Calendar_reform) be a thing" and I'm forced to dig
 | 
					 | 
				
			||||||
through a [couple](https://github.com/rust-lang/rust/issues/8937#issuecomment-31661916)
 | 
					 | 
				
			||||||
[different](https://github.com/rust-lang/rfcs/issues/334)
 | 
					 | 
				
			||||||
[threads](https://github.com/rust-num/num/issues/8) to figure out if the library I'm look at is dead
 | 
					 | 
				
			||||||
or just stable.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
And even when the "canonical library" exists, there's no guarantees that it will be well-maintained.
 | 
					 | 
				
			||||||
[Chrono](https://github.com/chronotope/chrono) is the _de facto_ date/time library in Rust, and just
 | 
					 | 
				
			||||||
released version 0.4.4 like two days ago. Meanwhile,
 | 
					 | 
				
			||||||
[chrono-tz](https://github.com/chronotope/chrono-tz) appears to be dead in the water even though
 | 
					 | 
				
			||||||
[there are people happy to help maintain it](https://github.com/chronotope/chrono-tz/issues/19). I
 | 
					 | 
				
			||||||
know relatively little about it, but it appears that most of the release process is automated;
 | 
					 | 
				
			||||||
keeping that up to date should be a no-brainer.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Trial Maintenance Policy
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Specifically given "maintenance" being an
 | 
					 | 
				
			||||||
[oft-discussed](https://www.reddit.com/r/rust/comments/48540g/thoughts_on_initiators_vs_maintainers/)
 | 
					 | 
				
			||||||
issue, I'm going to try out the following policy to keep things moving on `dtparse`:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
1. Issues/PRs needing _maintainer_ feedback will be updated at least weekly. I want to make sure
 | 
					 | 
				
			||||||
   nobody's blocking on me.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
2. To keep issues/PRs needing _contributor_ feedback moving, I'm going to (kindly) ask the
 | 
					 | 
				
			||||||
   contributor to check in after two weeks, and close the issue without resolution if I hear nothing
 | 
					 | 
				
			||||||
   back after a month.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The second point I think has the potential to be a bit controversial, so I'm happy to receive
 | 
					 | 
				
			||||||
feedback on that. And if a contributor responds with "hey, still working on it, had a kid and I'm
 | 
					 | 
				
			||||||
running on 30 seconds of sleep a night," then first: congratulations on sustaining human life. And
 | 
					 | 
				
			||||||
second: I don't mind keeping those requests going indefinitely. I just want to try and balance
 | 
					 | 
				
			||||||
keeping things moving with giving people the necessary time they need.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
I should also note that I'm still getting some best practices in place - CONTRIBUTING and
 | 
					 | 
				
			||||||
CONTRIBUTORS files need to be added, as well as issue/PR templates. In progress. None of us are
 | 
					 | 
				
			||||||
perfect.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Roadmap and Conclusion
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
So if I've now built a `dateutil`-compatible parser, we're done, right? Of course not! That's not
 | 
					 | 
				
			||||||
nearly ambitious enough.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Ultimately, I'd love to have a library that's capable of parsing everything the Linux `date` command
 | 
					 | 
				
			||||||
can do (and not `date` on OSX, because seriously, BSD coreutils are the worst). I know Rust has a
 | 
					 | 
				
			||||||
coreutils rewrite going on, and `dtparse` would potentially be an interesting candidate since it
 | 
					 | 
				
			||||||
doesn't bring in a lot of extra dependencies. [`humantime`](https://crates.io/crates/humantime)
 | 
					 | 
				
			||||||
could help pick up some of the (current) slack in dtparse, so maybe we can share and care with each
 | 
					 | 
				
			||||||
other?
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
All in all, I'm mostly hoping that nobody's already done this and I haven't spent a bit over a month
 | 
					 | 
				
			||||||
on redundant code. So if it exists, tell me. I need to know, but be nice about it, because I'm going
 | 
					 | 
				
			||||||
to take it hard.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
And in the mean time, I'm looking forward to building more. Onwards.
 | 
					 | 
				
			||||||
@ -1,323 +0,0 @@
 | 
				
			|||||||
---
 | 
					 | 
				
			||||||
layout: post
 | 
					 | 
				
			||||||
title: "Primitives in Rust are Weird (and Cool)"
 | 
					 | 
				
			||||||
description: "but mostly weird."
 | 
					 | 
				
			||||||
category:
 | 
					 | 
				
			||||||
tags: [rust, c, java, python, x86]
 | 
					 | 
				
			||||||
---
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
I wrote a really small Rust program a while back because I was curious. I was 100% convinced it
 | 
					 | 
				
			||||||
couldn't possibly run:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
fn main() {
 | 
					 | 
				
			||||||
    println!("{}", 8.to_string())
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
And to my complete befuddlement, it compiled, ran, and produced a completely sensible output. The
 | 
					 | 
				
			||||||
reason I was so surprised has to do with how Rust treats a special category of things I'm going to
 | 
					 | 
				
			||||||
call _primitives_. In the current version of the Rust book, you'll see them referred to as
 | 
					 | 
				
			||||||
[scalars][rust_scalar], and in older versions they'll be called [primitives][rust_primitive], but
 | 
					 | 
				
			||||||
we're going to stick with the name _primitive_ for the time being. Explaining why this program is so
 | 
					 | 
				
			||||||
cool requires talking about a number of other programming languages, and keeping a consistent
 | 
					 | 
				
			||||||
terminology makes things easier.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
**You've been warned:** this is going to be a tedious post about a relatively minor issue that
 | 
					 | 
				
			||||||
involves Java, Python, C, and x86 Assembly. And also me pretending like I know what I'm talking
 | 
					 | 
				
			||||||
about with assembly.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Defining primitives (Java)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The reason I'm using the name _primitive_ comes from how much of my life is Java right now. Spoiler
 | 
					 | 
				
			||||||
alert: a lot of it. And for the most part I like Java, but I digress. In Java, there's a special
 | 
					 | 
				
			||||||
name for some specific types of values:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
> ```
 | 
					 | 
				
			||||||
> bool    char    byte
 | 
					 | 
				
			||||||
> short   int     long
 | 
					 | 
				
			||||||
> float   double
 | 
					 | 
				
			||||||
> ```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
````
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
They are referred to as [primitives][java_primitive]. And relative to the other bits of Java,
 | 
					 | 
				
			||||||
they have two unique features. First, they don't have to worry about the
 | 
					 | 
				
			||||||
[billion-dollar mistake](https://en.wikipedia.org/wiki/Tony_Hoare#Apologies_and_retractions);
 | 
					 | 
				
			||||||
primitives in Java can never be `null`. Second: *they can't have instance methods*.
 | 
					 | 
				
			||||||
Remember that Rust program from earlier? Java has no idea what to do with it:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```java
 | 
					 | 
				
			||||||
class Main {
 | 
					 | 
				
			||||||
    public static void main(String[] args) {
 | 
					 | 
				
			||||||
        int x = 8;
 | 
					 | 
				
			||||||
        System.out.println(x.toString()); // Triggers a compiler error
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
````
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The error is:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
Main.java:5: error: int cannot be dereferenced
 | 
					 | 
				
			||||||
        System.out.println(x.toString());
 | 
					 | 
				
			||||||
                            ^
 | 
					 | 
				
			||||||
1 error
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Specifically, Java's [`Object`](https://docs.oracle.com/javase/10/docs/api/java/lang/Object.html)
 | 
					 | 
				
			||||||
and things that inherit from it are pointers under the hood, and we have to dereference them before
 | 
					 | 
				
			||||||
the fields and methods they define can be used. In contrast, _primitive types are just values_ -
 | 
					 | 
				
			||||||
there's nothing to be dereferenced. In memory, they're just a sequence of bits.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
If we really want, we can turn the `int` into an
 | 
					 | 
				
			||||||
[`Integer`](https://docs.oracle.com/javase/10/docs/api/java/lang/Integer.html) and then dereference
 | 
					 | 
				
			||||||
it, but it's a bit wasteful:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```java
 | 
					 | 
				
			||||||
class Main {
 | 
					 | 
				
			||||||
    public static void main(String[] args) {
 | 
					 | 
				
			||||||
        int x = 8;
 | 
					 | 
				
			||||||
        Integer y = Integer.valueOf(x);
 | 
					 | 
				
			||||||
        System.out.println(y.toString());
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
This creates the variable `y` of type `Integer` (which inherits `Object`), and at run time we
 | 
					 | 
				
			||||||
dereference `y` to locate the `toString()` function and call it. Rust obviously handles things a bit
 | 
					 | 
				
			||||||
differently, but we have to dig into the low-level details to see it in action.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Low Level Handling of Primitives (C)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
We first need to build a foundation for reading and understanding the assembly code the final answer
 | 
					 | 
				
			||||||
requires. Let's begin with showing how the `C` language (and your computer) thinks about "primitive"
 | 
					 | 
				
			||||||
values in memory:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```c
 | 
					 | 
				
			||||||
void my_function(int num) {}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
int main() {
 | 
					 | 
				
			||||||
    int x = 8;
 | 
					 | 
				
			||||||
    my_function(x);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The [compiler explorer](https://godbolt.org/z/lgNYcc) gives us an easy way of showing off the
 | 
					 | 
				
			||||||
assembly-level code that's generated: <span style="font-size:.6em">whose output has been lightly
 | 
					 | 
				
			||||||
edited</span>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```nasm
 | 
					 | 
				
			||||||
main:
 | 
					 | 
				
			||||||
        push    rbp
 | 
					 | 
				
			||||||
        mov     rbp, rsp
 | 
					 | 
				
			||||||
        sub     rsp, 16
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        ; We assign the value `8` to `x` here
 | 
					 | 
				
			||||||
        mov     DWORD PTR [rbp-4], 8
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        ; And copy the bits making up `x` to a location
 | 
					 | 
				
			||||||
        ; `my_function` can access (`edi`)
 | 
					 | 
				
			||||||
        mov     eax, DWORD PTR [rbp-4]
 | 
					 | 
				
			||||||
        mov     edi, eax
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        ; Call `my_function` and give it control
 | 
					 | 
				
			||||||
        call    my_function
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        mov     eax, 0
 | 
					 | 
				
			||||||
        leave
 | 
					 | 
				
			||||||
        ret
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
my_function:
 | 
					 | 
				
			||||||
        push    rbp
 | 
					 | 
				
			||||||
        mov     rbp, rsp
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        ; Copy the bits out of the pre-determined location (`edi`)
 | 
					 | 
				
			||||||
        ; to somewhere we can use
 | 
					 | 
				
			||||||
        mov     DWORD PTR [rbp-4], edi
 | 
					 | 
				
			||||||
        nop
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        pop     rbp
 | 
					 | 
				
			||||||
        ret
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
At a really low level of memory, we're copying bits around using the [`mov`][x86_guide] instruction;
 | 
					 | 
				
			||||||
nothing crazy. But to show how similar Rust is, let's take a look at our program translated from C
 | 
					 | 
				
			||||||
to Rust:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
fn my_function(x: i32) {}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
fn main() {
 | 
					 | 
				
			||||||
    let x = 8;
 | 
					 | 
				
			||||||
    my_function(x)
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
And the assembly generated when we stick it in the
 | 
					 | 
				
			||||||
[compiler explorer](https://godbolt.org/z/cAlmk0): <span style="font-size:.6em">again, lightly
 | 
					 | 
				
			||||||
edited</span>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```nasm
 | 
					 | 
				
			||||||
example::main:
 | 
					 | 
				
			||||||
  push rax
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  ; Look familiar? We're copying bits to a location for `my_function`
 | 
					 | 
				
			||||||
  ; The compiler just optimizes out holding `x` in memory
 | 
					 | 
				
			||||||
  mov edi, 8
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  ; Call `my_function` and give it control
 | 
					 | 
				
			||||||
  call example::my_function
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  pop rax
 | 
					 | 
				
			||||||
  ret
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
example::my_function:
 | 
					 | 
				
			||||||
  sub rsp, 4
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  ; And copying those bits again, just like in C
 | 
					 | 
				
			||||||
  mov dword ptr [rsp], edi
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  add rsp, 4
 | 
					 | 
				
			||||||
  ret
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The generated Rust assembly is functionally pretty close to the C assembly: _When working with
 | 
					 | 
				
			||||||
primitives, we're just dealing with bits in memory_.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
In Java we have to dereference a pointer to call its functions; in Rust, there's no pointer to
 | 
					 | 
				
			||||||
dereference. So what exactly is going on with this `.to_string()` function call?
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# impl primitive (and Python)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Now it's time to <strike>reveal my trap card</strike> show the revelation that tied all this
 | 
					 | 
				
			||||||
together: _Rust has implementations for its primitive types._ That's right, `impl` blocks aren't
 | 
					 | 
				
			||||||
only for `structs` and `traits`, primitives get them too. Don't believe me? Check out
 | 
					 | 
				
			||||||
[u32](https://doc.rust-lang.org/std/primitive.u32.html),
 | 
					 | 
				
			||||||
[f64](https://doc.rust-lang.org/std/primitive.f64.html) and
 | 
					 | 
				
			||||||
[char](https://doc.rust-lang.org/std/primitive.char.html) as examples.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
But the really interesting bit is how Rust turns those `impl` blocks into assembly. Let's break out
 | 
					 | 
				
			||||||
the [compiler explorer](https://godbolt.org/z/6LBEwq) once again:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
pub fn main() {
 | 
					 | 
				
			||||||
    8.to_string()
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
And the interesting bits in the assembly: <span style="font-size:.6em">heavily trimmed down</span>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```nasm
 | 
					 | 
				
			||||||
example::main:
 | 
					 | 
				
			||||||
  sub rsp, 24
 | 
					 | 
				
			||||||
  mov rdi, rsp
 | 
					 | 
				
			||||||
  lea rax, [rip + .Lbyte_str.u]
 | 
					 | 
				
			||||||
  mov rsi, rax
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  ; Cool stuff right here
 | 
					 | 
				
			||||||
  call <T as alloc::string::ToString>::to_string@PLT
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  mov rdi, rsp
 | 
					 | 
				
			||||||
  call core::ptr::drop_in_place
 | 
					 | 
				
			||||||
  add rsp, 24
 | 
					 | 
				
			||||||
  ret
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Now, this assembly is a bit more complicated, but here's the big revelation: **we're calling
 | 
					 | 
				
			||||||
`to_string()` as a function that exists all on its own, and giving it the instance of `8`**. Instead
 | 
					 | 
				
			||||||
of thinking of the value 8 as an instance of `u32` and then peeking in to find the location of the
 | 
					 | 
				
			||||||
function we want to call (like Java), we have a function that exists outside of the instance and
 | 
					 | 
				
			||||||
just give that function the value `8`.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
This is an incredibly technical detail, but the interesting idea I had was this: _if `to_string()`
 | 
					 | 
				
			||||||
is a static function, can I refer to the unbound function and give it an instance?_
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Better explained in code (and a [compiler explorer](https://godbolt.org/z/fJY-gA) link because I
 | 
					 | 
				
			||||||
seriously love this thing):
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
struct MyVal {
 | 
					 | 
				
			||||||
    x: u32
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
impl MyVal {
 | 
					 | 
				
			||||||
    fn to_string(&self) -> String {
 | 
					 | 
				
			||||||
        self.x.to_string()
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
pub fn main() {
 | 
					 | 
				
			||||||
    let my_val = MyVal { x: 8 };
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // THESE ARE THE SAME
 | 
					 | 
				
			||||||
    my_val.to_string();
 | 
					 | 
				
			||||||
    MyVal::to_string(&my_val);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Rust is totally fine "binding" the function call to the instance, and also as a static.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
MIND == BLOWN.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Python does the same thing where I can both call functions bound to their instances and also call as
 | 
					 | 
				
			||||||
an unbound function where I give it the instance:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```python
 | 
					 | 
				
			||||||
class MyClass():
 | 
					 | 
				
			||||||
    x = 24
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def my_function(self):
 | 
					 | 
				
			||||||
        print(self.x)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
m = MyClass()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
m.my_function()
 | 
					 | 
				
			||||||
MyClass.my_function(m)
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
And Python tries to make you _think_ that primitives can have instance methods...
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```python
 | 
					 | 
				
			||||||
>>> dir(8)
 | 
					 | 
				
			||||||
['__abs__', '__add__', '__and__', '__class__', '__cmp__', '__coerce__',
 | 
					 | 
				
			||||||
'__delattr__', '__div__', '__divmod__', '__doc__', '__float__', '__floordiv__',
 | 
					 | 
				
			||||||
...
 | 
					 | 
				
			||||||
'__setattr__', '__sizeof__', '__str__', '__sub__', '__subclasshook__', '__truediv__',
 | 
					 | 
				
			||||||
...]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
>>> # Theoretically `8.__str__()` should exist, but:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
>>> 8.__str__()
 | 
					 | 
				
			||||||
  File "<stdin>", line 1
 | 
					 | 
				
			||||||
    8.__str__()
 | 
					 | 
				
			||||||
             ^
 | 
					 | 
				
			||||||
SyntaxError: invalid syntax
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
>>> # It will run if we assign it first though:
 | 
					 | 
				
			||||||
>>> x = 8
 | 
					 | 
				
			||||||
>>> x.__str__()
 | 
					 | 
				
			||||||
'8'
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
...but in practice it's a bit complicated.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
So while Python handles binding instance methods in a way similar to Rust, it's still not able to
 | 
					 | 
				
			||||||
run the example we started with.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Conclusion
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
This was a super-roundabout way of demonstrating it, but the way Rust handles incredibly minor
 | 
					 | 
				
			||||||
details like primitives leads to really cool effects. Primitives are optimized like C in how they
 | 
					 | 
				
			||||||
have a space-efficient memory layout, yet the language still has a lot of features I enjoy in Python
 | 
					 | 
				
			||||||
(like both instance and late binding).
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
And when you put it together, there are areas where Rust does cool things nobody else can; as a
 | 
					 | 
				
			||||||
quirky feature of Rust's type system, `8.to_string()` is actually valid code.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Now go forth and fool your friends into thinking you know assembly. This is all I've got.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
[x86_guide]: http://www.cs.virginia.edu/~evans/cs216/guides/x86.html
 | 
					 | 
				
			||||||
[java_primitive]: https://docs.oracle.com/javase/tutorial/java/nutsandbolts/datatypes.html
 | 
					 | 
				
			||||||
[rust_scalar]: https://doc.rust-lang.org/book/second-edition/ch03-02-data-types.html#scalar-types
 | 
					 | 
				
			||||||
[rust_primitive]: https://doc.rust-lang.org/book/first-edition/primitive-types.html
 | 
					 | 
				
			||||||
@ -1,294 +0,0 @@
 | 
				
			|||||||
---
 | 
					 | 
				
			||||||
layout: post
 | 
					 | 
				
			||||||
title: "Isomorphic Desktop Apps with Rust"
 | 
					 | 
				
			||||||
description: "Electron + WASM = ☣"
 | 
					 | 
				
			||||||
category:
 | 
					 | 
				
			||||||
tags: [rust, javascript, webassembly]
 | 
					 | 
				
			||||||
---
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Forgive me, but this is going to be a bit of a schizophrenic post. I both despise Javascript and the
 | 
					 | 
				
			||||||
modern ECMAScript ecosystem, and I'm stunned by its success doing some really cool things. It's
 | 
					 | 
				
			||||||
[this duality](https://www.destroyallsoftware.com/talks/the-birth-and-death-of-javascript) that's
 | 
					 | 
				
			||||||
led me to a couple of (very) late nights over the past weeks trying to reconcile myself as I
 | 
					 | 
				
			||||||
bootstrap a simple desktop application.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
See, as much as
 | 
					 | 
				
			||||||
[Webassembly isn't trying to replace Javascript](https://webassembly.org/docs/faq/#is-webassembly-trying-to-replace-javascript),
 | 
					 | 
				
			||||||
**I want Javascript gone**. There are plenty of people who don't share my views, and they are
 | 
					 | 
				
			||||||
probably nicer and more fun at parties. But I cringe every time "Webpack" is mentioned, and I think
 | 
					 | 
				
			||||||
it's hilarious that the
 | 
					 | 
				
			||||||
[language specification](https://ecma-international.org/publications/standards/Ecma-402.htm)
 | 
					 | 
				
			||||||
dramatically outpaces anyone's
 | 
					 | 
				
			||||||
[actual implementation](https://kangax.github.io/compat-table/es2016plus/). The answer to this
 | 
					 | 
				
			||||||
conundrum is of course to recompile code from newer versions of the language to older versions _of
 | 
					 | 
				
			||||||
the same language_ before running. At least [Babel] is a nice tongue-in-cheek reference.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Yet for as much hate as [Electron] receives, it does a stunningly good job at solving a really hard
 | 
					 | 
				
			||||||
problem: _how the hell do I put a button on the screen and react when the user clicks it_? GUI
 | 
					 | 
				
			||||||
programming is hard, straight up. But if browsers are already able to run everywhere, why don't we
 | 
					 | 
				
			||||||
take advantage of someone else solving the hard problems for us? I don't like that I have to use
 | 
					 | 
				
			||||||
Javascript for it, but I really don't feel inclined to whip out good ol' [wxWidgets].
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Now there are other native solutions ([libui-rs], [conrod], [oh hey wxWdidgets again!][wxrust]), but
 | 
					 | 
				
			||||||
those also have their own issues with distribution, styling, etc. With Electron, I can
 | 
					 | 
				
			||||||
`yarn create electron-app my-app` and just get going, knowing that packaging/upgrades/etc. are built
 | 
					 | 
				
			||||||
in.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
My question is: given recent innovations with WASM, _are we Electron yet_?
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
No, not really.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Instead, **what would it take to get to a point where we can skip Javascript in Electron apps?**
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Setting the Stage
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Truth is, WASM/Webassembly is a pretty new technology and I'm a total beginner in this area. There
 | 
					 | 
				
			||||||
may already be solutions to the issues I discuss, but I'm totally unaware of them, so I'm going to
 | 
					 | 
				
			||||||
try and organize what I did manage to discover.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
I should also mention that the content and things I'm talking about here are not intended to be
 | 
					 | 
				
			||||||
prescriptive, but more "if someone else is interested, what do we already know doesn't work?" _I
 | 
					 | 
				
			||||||
expect everything in this post to be obsolete within two months._ Even over the course of writing
 | 
					 | 
				
			||||||
this, [a separate blog post](https://mnt.io/2018/08/28/from-rust-to-beyond-the-asm-js-galaxy/) had
 | 
					 | 
				
			||||||
to be modified because [upstream changes](https://github.com/WebAssembly/binaryen/pull/1642) broke a
 | 
					 | 
				
			||||||
[Rust tool](https://github.com/rustwasm/wasm-bindgen/pull/787) the post tried to use. The post
 | 
					 | 
				
			||||||
ultimately
 | 
					 | 
				
			||||||
[got updated](https://mnt.io/2018/08/28/from-rust-to-beyond-the-asm-js-galaxy/#comment-477), **but
 | 
					 | 
				
			||||||
all this happened within the span of a week.** Things are moving quickly.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
I'll also note that we're going to skip [asm.js] and [emscripten]. Truth be told, I couldn't get
 | 
					 | 
				
			||||||
either of these to output anything, and so I'm just going to say
 | 
					 | 
				
			||||||
[here be dragons.](https://en.wikipedia.org/wiki/Here_be_dragons) Everything I'm discussing here
 | 
					 | 
				
			||||||
uses the `wasm32-unknown-unknown` target.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The code that I _did_ get running is available
 | 
					 | 
				
			||||||
[over here](https://github.com/speice-io/isomorphic-rust). Feel free to use it as a starting point,
 | 
					 | 
				
			||||||
but I'm mostly including the link as a reference for the things that were attempted.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# An Example Running Application
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
So, I did _technically_ get a running application:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||

 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
...which you can also try out if you want:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```sh
 | 
					 | 
				
			||||||
git clone https://github.com/speice-io/isomorphic-rust.git
 | 
					 | 
				
			||||||
cd isomorphic_rust/percy
 | 
					 | 
				
			||||||
yarn install && yarn start
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
...but I wouldn't really call it a "high quality" starting point to base future work on. It's mostly
 | 
					 | 
				
			||||||
there to prove this is possible in the first place. And that's something to be proud of! There's a
 | 
					 | 
				
			||||||
huge amount of engineering that went into showing a window with the text "It's alive!".
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
There's also a lot of usability issues that prevent me from recommending anyone try Electron and
 | 
					 | 
				
			||||||
WASM apps at the moment, and I think that's the more important thing to discuss.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Issue the First: Complicated Toolchains
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
I quickly established that [wasm-bindgen] was necessary to "link" my Rust code to Javascript. At
 | 
					 | 
				
			||||||
that point you've got an Electron app that starts an HTML page which ultimately fetches your WASM
 | 
					 | 
				
			||||||
blob. To keep things simple, the goal was to package everything using [webpack] so that I could just
 | 
					 | 
				
			||||||
load a `bundle.js` file on the page. That decision was to be the last thing that kinda worked in
 | 
					 | 
				
			||||||
this process.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The first issue
 | 
					 | 
				
			||||||
[I ran into](https://www.reddit.com/r/rust/comments/98lpun/unable_to_load_wasm_for_electron_application/)
 | 
					 | 
				
			||||||
while attempting to bundle everything via `webpack` is a detail in the WASM spec:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
> This function accepts a Response object, or a promise for one, and ... **[if > it] does not match
 | 
					 | 
				
			||||||
> the `application/wasm` MIME type**, the returned promise will be rejected with a TypeError;
 | 
					 | 
				
			||||||
>
 | 
					 | 
				
			||||||
> [WebAssembly - Additional Web Embedding API](https://webassembly.org/docs/web/#additional-web-embedding-api)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Specifically, if you try and load a WASM blob without the MIME type set, you'll get an error. On the
 | 
					 | 
				
			||||||
web this isn't a huge issue, as the server can set MIME types when delivering the blob. With
 | 
					 | 
				
			||||||
Electron, you're resolving things with a `file://` URL and thus can't control the MIME type:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||

 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
There are a couple of solutions depending on how far into the deep end you care to venture:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
- Embed a static file server in your Electron application
 | 
					 | 
				
			||||||
- Use a [custom protocol](https://electronjs.org/docs/api/protocol) and custom protocol handler
 | 
					 | 
				
			||||||
- Host your WASM blob on a website that you resolve at runtime
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
But all these are pretty bad solutions and defeat the purpose of using WASM in the first place.
 | 
					 | 
				
			||||||
Instead, my workaround was to
 | 
					 | 
				
			||||||
[open a PR with `webpack`](https://github.com/webpack/webpack/issues/7918) and use regex to remove
 | 
					 | 
				
			||||||
calls to `instantiateStreaming` in the
 | 
					 | 
				
			||||||
[build script](https://github.com/speice-io/isomorphic-rust/blob/master/percy/build.sh#L21-L25):
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```sh
 | 
					 | 
				
			||||||
cargo +nightly build --target=wasm32-unknown-unknown && \
 | 
					 | 
				
			||||||
    wasm-bindgen "$WASM_DIR/debug/$WASM_NAME.wasm" --out-dir "$APP_DIR" --no-typescript && \
 | 
					 | 
				
			||||||
    # Have to use --mode=development so we can patch out the call to instantiateStreaming
 | 
					 | 
				
			||||||
    "$DIR/node_modules/webpack-cli/bin/cli.js" --mode=development "$APP_DIR/app_loader.js" -o "$APP_DIR/bundle.js" && \
 | 
					 | 
				
			||||||
    sed -i 's/.*instantiateStreaming.*//g' "$APP_DIR/bundle.js"
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Once that lands, the
 | 
					 | 
				
			||||||
[build process](https://github.com/speice-io/isomorphic-rust/blob/master/percy_patched_webpack/build.sh#L24-L27)
 | 
					 | 
				
			||||||
becomes much simpler:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```sh
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
cargo +nightly build --target=wasm32-unknown-unknown && \
 | 
					 | 
				
			||||||
    wasm-bindgen "$WASM_DIR/debug/$WASM_NAME.wasm" --out-dir "$APP_DIR" --no-typescript && \
 | 
					 | 
				
			||||||
    "$DIR/node_modules/webpack-cli/bin/cli.js" --mode=production "$APP_DIR/app_loader.js" -o "$APP_DIR/bundle.js"
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
But we're not done yet! After we compile Rust into WASM and link WASM to Javascript (via
 | 
					 | 
				
			||||||
`wasm-bindgen` and `webpack`), we still have to make an Electron app. For this purpose I used a
 | 
					 | 
				
			||||||
starter app from [Electron Forge], and then a
 | 
					 | 
				
			||||||
[`prestart` script](https://github.com/speice-io/isomorphic-rust/blob/master/percy/package.json#L8)
 | 
					 | 
				
			||||||
to actually handle starting the application.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The
 | 
					 | 
				
			||||||
[final toolchain](https://github.com/speice-io/isomorphic-rust/blob/master/percy/package.json#L8)
 | 
					 | 
				
			||||||
looks something like this:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
- `yarn start` triggers the `prestart` script
 | 
					 | 
				
			||||||
- `prestart` checks for missing tools (`wasm-bindgen-cli`, etc.) and then:
 | 
					 | 
				
			||||||
  - Uses `cargo` to compile the Rust code into WASM
 | 
					 | 
				
			||||||
  - Uses `wasm-bindgen` to link the WASM blob into a Javascript file with exported symbols
 | 
					 | 
				
			||||||
  - Uses `webpack` to bundle the page start script with the Javascript we just generated
 | 
					 | 
				
			||||||
    - Uses `babel` under the hood to compile the `wasm-bindgen` code down from ES6 into something
 | 
					 | 
				
			||||||
      browser-compatible
 | 
					 | 
				
			||||||
- The `start` script runs an Electron Forge handler to do some sanity checks
 | 
					 | 
				
			||||||
- Electron actually starts
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
...which is complicated. I think more work needs to be done to either build a high-quality starter
 | 
					 | 
				
			||||||
app that can manage these steps, or another tool that "just handles" the complexity of linking a
 | 
					 | 
				
			||||||
compiled WASM file into something the Electron browser can run.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Issue the Second: WASM tools in Rust
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
For as much as I didn't enjoy the Javascript tooling needed to interface with Rust, the Rust-only
 | 
					 | 
				
			||||||
bits aren't any better at the moment. I get it, a lot of projects are just starting off, and that
 | 
					 | 
				
			||||||
leads to a fragmented ecosystem. Here's what I can recommend as a starting point:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Don't check in your `Cargo.lock` files to version control. If there's a disagreement between the
 | 
					 | 
				
			||||||
version of `wasm-bindgen-cli` you have installed and the `wasm-bindgen` you're compiling with in
 | 
					 | 
				
			||||||
`Cargo.lock`, you get a nasty error:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
it looks like the Rust project used to create this wasm file was linked against
 | 
					 | 
				
			||||||
a different version of wasm-bindgen than this binary:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
rust wasm file: 0.2.21
 | 
					 | 
				
			||||||
    this binary: 0.2.17
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Currently the bindgen format is unstable enough that these two version must
 | 
					 | 
				
			||||||
exactly match, so it's required that these two version are kept in sync by
 | 
					 | 
				
			||||||
either updating the wasm-bindgen dependency or this binary.
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Not that I ever managed to run into this myself (_coughs nervously_).
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
There are two projects attempting to be "application frameworks": [percy] and [yew]. Between those,
 | 
					 | 
				
			||||||
I managed to get [two](https://github.com/speice-io/isomorphic-rust/tree/master/percy)
 | 
					 | 
				
			||||||
[examples](https://github.com/speice-io/isomorphic-rust/tree/master/percy_patched_webpack) running
 | 
					 | 
				
			||||||
using `percy`, but was unable to get an
 | 
					 | 
				
			||||||
[example](https://github.com/speice-io/isomorphic-rust/tree/master/yew) running with `yew` because
 | 
					 | 
				
			||||||
of issues with "missing modules" during the `webpack` step:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```sh
 | 
					 | 
				
			||||||
ERROR in ./dist/electron_yew_wasm_bg.wasm
 | 
					 | 
				
			||||||
Module not found: Error: Can't resolve 'env' in '/home/bspeice/Development/isomorphic_rust/yew/dist'
 | 
					 | 
				
			||||||
 @ ./dist/electron_yew_wasm_bg.wasm
 | 
					 | 
				
			||||||
 @ ./dist/electron_yew_wasm.js
 | 
					 | 
				
			||||||
 @ ./dist/app.js
 | 
					 | 
				
			||||||
 @ ./dist/app_loader.js
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
If you want to work with the browser APIs directly, your choices are [percy-webapis] or [stdweb] (or
 | 
					 | 
				
			||||||
eventually [web-sys]). See above for my `percy` examples, but when I tried
 | 
					 | 
				
			||||||
[an example with `stdweb`](https://github.com/speice-io/isomorphic-rust/tree/master/stdweb), I was
 | 
					 | 
				
			||||||
unable to get it running:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```sh
 | 
					 | 
				
			||||||
ERROR in ./dist/stdweb_electron_bg.wasm
 | 
					 | 
				
			||||||
Module not found: Error: Can't resolve 'env' in '/home/bspeice/Development/isomorphic_rust/stdweb/dist'
 | 
					 | 
				
			||||||
 @ ./dist/stdweb_electron_bg.wasm
 | 
					 | 
				
			||||||
 @ ./dist/stdweb_electron.js
 | 
					 | 
				
			||||||
 @ ./dist/app_loader.js
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
At this point I'm pretty convinced that `stdweb` is causing issues for `yew` as well, but can't
 | 
					 | 
				
			||||||
prove it.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
I did also get a [minimal example](https://github.com/speice-io/isomorphic-rust/tree/master/minimal)
 | 
					 | 
				
			||||||
running that doesn't depend on any tools besides `wasm-bindgen`. However, it requires manually
 | 
					 | 
				
			||||||
writing "`extern C`" blocks for everything you need from the browser. Es no bueno.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Finally, from a tools and platform view, there are two up-and-coming packages that should be
 | 
					 | 
				
			||||||
mentioned: [js-sys] and [web-sys]. Their purpose is to be fundamental building blocks that exposes
 | 
					 | 
				
			||||||
the browser's APIs to Rust. If you're interested in building an app framework from scratch, these
 | 
					 | 
				
			||||||
should give you the most flexibility. I didn't touch either in my research, though I expect them to
 | 
					 | 
				
			||||||
be essential long-term.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
So there's a lot in play from the Rust side of things, and it's just going to take some time to
 | 
					 | 
				
			||||||
figure out what works and what doesn't.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Issue the Third: Known Unknowns
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Alright, so after I managed to get an application started, I stopped there. It was a good deal of
 | 
					 | 
				
			||||||
effort to chain together even a proof of concept, and at this point I'd rather learn [Typescript]
 | 
					 | 
				
			||||||
than keep trying to maintain an incredibly brittle pipeline. Blasphemy, I know...
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The important point I want to make is that there's a lot unknown about how any of this holds up
 | 
					 | 
				
			||||||
outside proofs of concept. Things I didn't attempt:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
- Testing
 | 
					 | 
				
			||||||
- Packaging
 | 
					 | 
				
			||||||
- Updates
 | 
					 | 
				
			||||||
- Literally anything related to why I wanted to use Electron in the first place
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# What it Would Take
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Much as I don't like Javascript, the tools are too shaky for me to recommend mixing Electron and
 | 
					 | 
				
			||||||
WASM at the moment. There's a lot of innovation happening, so who knows? Someone might have an
 | 
					 | 
				
			||||||
application in production a couple months from now. But at the moment, I'm personally going to stay
 | 
					 | 
				
			||||||
away.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Let's finish with a wishlist then - here are the things that I think need to happen before
 | 
					 | 
				
			||||||
Electron/WASM/Rust can become a thing:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
- Webpack still needs some updates. The necessary work is in progress, but hasn't landed yet
 | 
					 | 
				
			||||||
  ([#7983](https://github.com/webpack/webpack/pull/7983))
 | 
					 | 
				
			||||||
- Browser API libraries (`web-sys` and `stdweb`) need to make sure they can support running in
 | 
					 | 
				
			||||||
  Electron (see module error above)
 | 
					 | 
				
			||||||
- Projects need to stabilize. There's talk of `stdweb` being turned into a Rust API
 | 
					 | 
				
			||||||
  [on top of web-sys](https://github.com/rustwasm/team/issues/226#issuecomment-418475778), and percy
 | 
					 | 
				
			||||||
  [moving to web-sys](https://github.com/chinedufn/percy/issues/24), both of which are big changes
 | 
					 | 
				
			||||||
- `wasm-bindgen` is great, but still in the "move fast and break things" phase
 | 
					 | 
				
			||||||
- A good "boilerplate" app would dramatically simplify the start-up costs;
 | 
					 | 
				
			||||||
  [electron-react-boilerplate](https://github.com/chentsulin/electron-react-boilerplate) comes to
 | 
					 | 
				
			||||||
  mind as a good project to imitate
 | 
					 | 
				
			||||||
- More blog posts/contributors! I think Electron + Rust could be cool, but I have no idea what I'm
 | 
					 | 
				
			||||||
  doing
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
[wxwidgets]: https://wxwidgets.org/
 | 
					 | 
				
			||||||
[libui-rs]: https://github.com/LeoTindall/libui-rs/
 | 
					 | 
				
			||||||
[electron]: https://electronjs.org/
 | 
					 | 
				
			||||||
[babel]: https://babeljs.io/
 | 
					 | 
				
			||||||
[wxrust]: https://github.com/kenz-gelsoft/wxRust
 | 
					 | 
				
			||||||
[wasm-bindgen]: https://github.com/rustwasm/wasm-bindgen
 | 
					 | 
				
			||||||
[js-sys]: https://crates.io/crates/js-sys
 | 
					 | 
				
			||||||
[percy-webapis]: https://crates.io/crates/percy-webapis
 | 
					 | 
				
			||||||
[stdweb]: https://crates.io/crates/stdweb
 | 
					 | 
				
			||||||
[web-sys]: https://crates.io/crates/web-sys
 | 
					 | 
				
			||||||
[percy]: https://chinedufn.github.io/percy/
 | 
					 | 
				
			||||||
[virtual-dom-rs]: https://crates.io/crates/virtual-dom-rs
 | 
					 | 
				
			||||||
[yew]: https://github.com/DenisKolodin/yew
 | 
					 | 
				
			||||||
[react]: https://reactjs.org/
 | 
					 | 
				
			||||||
[elm]: http://elm-lang.org/
 | 
					 | 
				
			||||||
[asm.js]: http://asmjs.org/
 | 
					 | 
				
			||||||
[emscripten]: https://kripken.github.io/emscripten-site/
 | 
					 | 
				
			||||||
[typescript]: https://www.typescriptlang.org/
 | 
					 | 
				
			||||||
[electron forge]: https://electronforge.io/
 | 
					 | 
				
			||||||
[conrod]: https://github.com/PistonDevelopers/conrod
 | 
					 | 
				
			||||||
[webpack]: https://webpack.js.org/
 | 
					 | 
				
			||||||
@ -1,168 +0,0 @@
 | 
				
			|||||||
---
 | 
					 | 
				
			||||||
layout: post
 | 
					 | 
				
			||||||
title: "A Case Study in Heaptrack"
 | 
					 | 
				
			||||||
description: "...because you don't need no garbage collection"
 | 
					 | 
				
			||||||
category:
 | 
					 | 
				
			||||||
tags: []
 | 
					 | 
				
			||||||
---
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
One of my earliest conversations about programming went like this:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
> Programmers have it too easy these days. They should learn to develop in low memory environments
 | 
					 | 
				
			||||||
> and be more efficient.
 | 
					 | 
				
			||||||
>
 | 
					 | 
				
			||||||
> -- My Father (paraphrased)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
...though it's not like the first code I wrote was for a
 | 
					 | 
				
			||||||
[graphing calculator](https://education.ti.com/en/products/calculators/graphing-calculators/ti-84-plus-se)
 | 
					 | 
				
			||||||
packing a whole 24KB of RAM. By the way, _what are you doing on my lawn?_
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The principle remains though: be efficient with the resources you have, because
 | 
					 | 
				
			||||||
[what Intel giveth, Microsoft taketh away](http://exo-blog.blogspot.com/2007/09/what-intel-giveth-microsoft-taketh-away.html).
 | 
					 | 
				
			||||||
My professional work is focused on this kind of efficiency; low-latency financial markets demand
 | 
					 | 
				
			||||||
that you understand at a deep level _exactly_ what your code is doing. As I continue experimenting
 | 
					 | 
				
			||||||
with Rust for personal projects, it's exciting to bring a utilitarian mindset with me: there's
 | 
					 | 
				
			||||||
flexibility for the times I pretend to have a garbage collector, and flexibility for the times that
 | 
					 | 
				
			||||||
I really care about how memory is used.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
This post is a (small) case study in how I went from the former to the latter. And ultimately, it's
 | 
					 | 
				
			||||||
intended to be a starting toolkit to empower analysis of your own code.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Curiosity
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
When I first started building the [dtparse] crate, my intention was to mirror as closely as possible
 | 
					 | 
				
			||||||
the equivalent [Python library][dateutil]. Python, as you may know, is garbage collected. Very
 | 
					 | 
				
			||||||
rarely is memory usage considered in Python, and I likewise wasn't paying too much attention when
 | 
					 | 
				
			||||||
`dtparse` was first being built.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
This lackadaisical approach to memory works well enough, and I'm not planning on making `dtparse`
 | 
					 | 
				
			||||||
hyper-efficient. But every so often, I've wondered: "what exactly is going on in memory?" With the
 | 
					 | 
				
			||||||
advent of Rust 1.28 and the
 | 
					 | 
				
			||||||
[Global Allocator trait](https://doc.rust-lang.org/std/alloc/trait.GlobalAlloc.html), I had a really
 | 
					 | 
				
			||||||
great idea: _build a custom allocator that allows you to track your own allocations._ That way, you
 | 
					 | 
				
			||||||
can do things like writing tests for both correct results and correct memory usage. I gave it a
 | 
					 | 
				
			||||||
[shot][qadapt], but learned very quickly: **never write your own allocator**. It went from "fun
 | 
					 | 
				
			||||||
weekend project" to "I have literally no idea what my computer is doing" at breakneck speed.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Instead, I'll highlight a separate path I took to make sense of my memory usage: [heaptrack].
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Turning on the System Allocator
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
This is the hardest part of the post. Because Rust uses
 | 
					 | 
				
			||||||
[its own allocator](https://github.com/rust-lang/rust/pull/27400#issue-41256384) by default,
 | 
					 | 
				
			||||||
`heaptrack` is unable to properly record unmodified Rust code. To remedy this, we'll make use of the
 | 
					 | 
				
			||||||
`#[global_allocator]` attribute.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Specifically, in `lib.rs` or `main.rs`, add this:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
use std::alloc::System;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#[global_allocator]
 | 
					 | 
				
			||||||
static GLOBAL: System = System;
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
...and that's it. Everything else comes essentially for free.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Running heaptrack
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Assuming you've installed heaptrack <span style="font-size: .6em;">(Homebrew in Mac, package manager
 | 
					 | 
				
			||||||
in Linux, ??? in Windows)</span>, all that's left is to fire up your application:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
heaptrack my_application
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
It's that easy. After the program finishes, you'll see a file in your local directory with a name
 | 
					 | 
				
			||||||
like `heaptrack.my_appplication.XXXX.gz`. If you load that up in `heaptrack_gui`, you'll see
 | 
					 | 
				
			||||||
something like this:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||

 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
---
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
And even these pretty colors:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||

 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Reading Flamegraphs
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
To make sense of our memory usage, we're going to focus on that last picture - it's called a
 | 
					 | 
				
			||||||
["flamegraph"](http://www.brendangregg.com/flamegraphs.html). These charts are typically used to
 | 
					 | 
				
			||||||
show how much time your program spends executing each function, but they're used here to show how
 | 
					 | 
				
			||||||
much memory was allocated during those functions instead.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
For example, we can see that all executions happened during the `main` function:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||

 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
...and within that, all allocations happened during `dtparse::parse`:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||

 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
...and within _that_, allocations happened in two different places:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||

 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Now I apologize that it's hard to see, but there's one area specifically that stuck out as an issue:
 | 
					 | 
				
			||||||
**what the heck is the `Default` thing doing?**
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||

 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Optimizing dtparse
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
See, I knew that there were some allocations during calls to `dtparse::parse`, but I was totally
 | 
					 | 
				
			||||||
wrong about where the bulk of allocations occurred in my program. Let me post the code and see if
 | 
					 | 
				
			||||||
you can spot the mistake:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
/// Main entry point for using `dtparse`.
 | 
					 | 
				
			||||||
pub fn parse(timestr: &str) -> ParseResult<(NaiveDateTime, Option<FixedOffset>)> {
 | 
					 | 
				
			||||||
    let res = Parser::default().parse(
 | 
					 | 
				
			||||||
        timestr, None, None, false, false,
 | 
					 | 
				
			||||||
        None, false,
 | 
					 | 
				
			||||||
        &HashMap::new(),
 | 
					 | 
				
			||||||
    )?;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    Ok((res.0, res.1))
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
> [dtparse](https://github.com/bspeice/dtparse/blob/4d7c5dd99572823fa4a390b483c38ab020a2172f/src/lib.rs#L1286)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
---
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Because `Parser::parse` requires a mutable reference to itself, I have to create a new
 | 
					 | 
				
			||||||
`Parser::default` every time it receives a string. This is excessive! We'd rather have an immutable
 | 
					 | 
				
			||||||
parser that can be re-used, and avoid allocating memory in the first place.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Armed with that information, I put some time in to
 | 
					 | 
				
			||||||
[make the parser immutable](https://github.com/bspeice/dtparse/commit/741afa34517d6bc1155713bbc5d66905fea13fad#diff-b4aea3e418ccdb71239b96952d9cddb6).
 | 
					 | 
				
			||||||
Now that I can re-use the same parser over and over, the allocations disappear:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||

 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
In total, we went from requiring 2 MB of memory in
 | 
					 | 
				
			||||||
[version 1.0.2](https://crates.io/crates/dtparse/1.0.2):
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||

 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
All the way down to 300KB in [version 1.0.3](https://crates.io/crates/dtparse/1.0.3):
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||

 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Conclusion
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
In the end, you don't need to write a custom allocator to be efficient with memory, great tools
 | 
					 | 
				
			||||||
already exist to help you understand what your program is doing.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
**Use them.**
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Given that [Moore's Law](https://en.wikipedia.org/wiki/Moore%27s_law) is
 | 
					 | 
				
			||||||
[dead](https://www.technologyreview.com/s/601441/moores-law-is-dead-now-what/), we've all got to do
 | 
					 | 
				
			||||||
our part to take back what Microsoft stole.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
[dtparse]: https://crates.io/crates/dtparse
 | 
					 | 
				
			||||||
[dateutil]: https://github.com/dateutil/dateutil
 | 
					 | 
				
			||||||
[heaptrack]: https://github.com/KDE/heaptrack
 | 
					 | 
				
			||||||
[qadapt]: https://crates.io/crates/qadapt
 | 
					 | 
				
			||||||
@ -1,34 +0,0 @@
 | 
				
			|||||||
---
 | 
					 | 
				
			||||||
layout: post
 | 
					 | 
				
			||||||
title: 'More "What Companies Really Mean"'
 | 
					 | 
				
			||||||
description: 'when they ask "Why should we hire you?"'
 | 
					 | 
				
			||||||
category:
 | 
					 | 
				
			||||||
tags: []
 | 
					 | 
				
			||||||
---
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
I recently stumbled across a phenomenal small article entitled
 | 
					 | 
				
			||||||
[What Startups Really Mean By "Why Should We Hire You?"](https://angel.co/blog/what-startups-really-mean-by-why-should-we-hire-you).
 | 
					 | 
				
			||||||
Having been interviewed by smaller companies (though not exactly startups), the questions and
 | 
					 | 
				
			||||||
subtexts are the same. There's often a question behind the question that you're actually trying to
 | 
					 | 
				
			||||||
answer, and I wish I spotted the nuance earlier in my career.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Let me also make note of one more question/euphemism I've come across:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# How do you feel about Production Support?
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
**Translation**: _We're a fairly small team, and when things break on an evening/weekend/Christmas
 | 
					 | 
				
			||||||
Day, can we call on you to be there?_
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
I've met decidedly few people in my life who truly enjoy the "ops" side of "devops". They're
 | 
					 | 
				
			||||||
incredibly good at taking an impossible problem, pre-existing knowledge of arcane arts, and turning
 | 
					 | 
				
			||||||
that into a functioning system at the end. And if they all left for lunch, we probably wouldn't make
 | 
					 | 
				
			||||||
it out the door before the zombie apocalypse.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Larger organizations (in my experience, 500+ person organizations) have the luxury of hiring people
 | 
					 | 
				
			||||||
who either enjoy that, or play along nicely enough that our systems keep working.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Small teams have no such luck. If you're interviewing at a small company, especially as a "data
 | 
					 | 
				
			||||||
scientist" or other somesuch position, be aware that systems can and do spontaneously combust at the
 | 
					 | 
				
			||||||
most inopportune moments.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
**Terrible-but-popular answers include**: _It's a part of the job, and I'm happy to contribute._
 | 
					 | 
				
			||||||
@ -1,218 +0,0 @@
 | 
				
			|||||||
---
 | 
					 | 
				
			||||||
layout: post
 | 
					 | 
				
			||||||
title: "QADAPT - debug_assert! for your memory usage"
 | 
					 | 
				
			||||||
description: "...and why you want an allocator that goes 💥."
 | 
					 | 
				
			||||||
category:
 | 
					 | 
				
			||||||
tags: []
 | 
					 | 
				
			||||||
---
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
I think it's part of the human condition to ignore perfectly good advice when it comes our way. A
 | 
					 | 
				
			||||||
bit over a month ago, I was dispensing sage wisdom for the ages:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
> I had a really great idea: build a custom allocator that allows you to track your own allocations.
 | 
					 | 
				
			||||||
> I gave it a shot, but learned very quickly: **never write your own allocator.**
 | 
					 | 
				
			||||||
>
 | 
					 | 
				
			||||||
> -- [me](/2018/10/case-study-optimization.html)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
I proceeded to ignore it, because we never really learn from our mistakes.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
There's another part of the human condition that derives joy from seeing things explode.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
<iframe src="https://giphy.com/embed/YA6dmVW0gfIw8" width="480" height="336" frameBorder="0"></iframe>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
And _that's_ the part I'm going to focus on.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Why an Allocator?
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
So why, after complaining about allocators, would I still want to write one? There are three reasons
 | 
					 | 
				
			||||||
for that:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
1. Allocation/dropping is slow
 | 
					 | 
				
			||||||
2. It's difficult to know exactly when Rust will allocate or drop, especially when using code that
 | 
					 | 
				
			||||||
   you did not write
 | 
					 | 
				
			||||||
3. I want automated tools to verify behavior, instead of inspecting by hand
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
When I say "slow," it's important to define the terms. If you're writing web applications, you'll
 | 
					 | 
				
			||||||
spend orders of magnitude more time waiting for the database than you will the allocator. However,
 | 
					 | 
				
			||||||
there's still plenty of code where micro- or nano-seconds matter; think
 | 
					 | 
				
			||||||
[finance](https://www.youtube.com/watch?v=NH1Tta7purM),
 | 
					 | 
				
			||||||
[real-time audio](https://www.reddit.com/r/rust/comments/9hg7yj/synthesizer_progress_update/e6c291f),
 | 
					 | 
				
			||||||
[self-driving cars](https://polysync.io/blog/session-types-for-hearty-codecs/), and
 | 
					 | 
				
			||||||
[networking](https://carllerche.github.io/bytes/bytes/index.html). In these situations it's simply
 | 
					 | 
				
			||||||
unacceptable for you to spend time doing things that are not your program, and waiting on the
 | 
					 | 
				
			||||||
allocator is not cool.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
As I continue to learn Rust, it's difficult for me to predict where exactly allocations will happen.
 | 
					 | 
				
			||||||
So, I propose we play a quick trivia game: **Does this code invoke the allocator?**
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Example 1
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
fn my_function() {
 | 
					 | 
				
			||||||
    let v: Vec<u8> = Vec::new();
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
**No**: Rust [knows how big](https://doc.rust-lang.org/std/mem/fn.size_of.html) the `Vec` type is,
 | 
					 | 
				
			||||||
and reserves a fixed amount of memory on the stack for the `v` vector. However, if we wanted to
 | 
					 | 
				
			||||||
reserve extra space (using `Vec::with_capacity`) the allocator would get invoked.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Example 2
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
fn my_function() {
 | 
					 | 
				
			||||||
    let v: Box<Vec<u8>> = Box::new(Vec::new());
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
**Yes**: Because Boxes allow us to work with things that are of unknown size, it has to allocate on
 | 
					 | 
				
			||||||
the heap. While the `Box` is unnecessary in this snippet (release builds will optimize out the
 | 
					 | 
				
			||||||
allocation), reserving heap space more generally is needed to pass a dynamically sized type to
 | 
					 | 
				
			||||||
another function.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Example 3
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
fn my_function(v: Vec<u8>) {
 | 
					 | 
				
			||||||
    v.push(5);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
**Maybe**: Depending on whether the Vector we were given has space available, we may or may not
 | 
					 | 
				
			||||||
allocate. Especially when dealing with code that you did not author, it's difficult to verify that
 | 
					 | 
				
			||||||
things behave as you expect them to.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Blowing Things Up
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
So, how exactly does QADAPT solve these problems? **Whenever an allocation or drop occurs in code
 | 
					 | 
				
			||||||
marked allocation-safe, QADAPT triggers a thread panic.** We don't want to let the program continue
 | 
					 | 
				
			||||||
as if nothing strange happened, _we want things to explode_.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
However, you don't want code to panic in production because of circumstances you didn't predict.
 | 
					 | 
				
			||||||
Just like [`debug_assert!`](https://doc.rust-lang.org/std/macro.debug_assert.html), **QADAPT will
 | 
					 | 
				
			||||||
strip out its own code when building in release mode to guarantee no panics and no performance
 | 
					 | 
				
			||||||
impact.**
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Finally, there are three ways to have QADAPT check that your code will not invoke the allocator:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Using a procedural macro
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The easiest method, watch an entire function for allocator invocation:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
use qadapt::no_alloc;
 | 
					 | 
				
			||||||
use qadapt::QADAPT;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#[global_allocator]
 | 
					 | 
				
			||||||
static Q: QADAPT = QADAPT;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#[no_alloc]
 | 
					 | 
				
			||||||
fn push_vec(v: &mut Vec<u8>) {
 | 
					 | 
				
			||||||
    // This triggers a panic if v.len() == v.capacity()
 | 
					 | 
				
			||||||
    v.push(5);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
fn main() {
 | 
					 | 
				
			||||||
    let v = Vec::with_capacity(1);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // This will *not* trigger a panic
 | 
					 | 
				
			||||||
    push_vec(&v);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // This *will* trigger a panic
 | 
					 | 
				
			||||||
    push_vec(&v);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Using a regular macro
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
For times when you need more precision:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
use qadapt::assert_no_alloc;
 | 
					 | 
				
			||||||
use qadapt::QADAPT;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#[global_allocator]
 | 
					 | 
				
			||||||
static Q: QADAPT = QADAPT;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
fn main() {
 | 
					 | 
				
			||||||
    let v = Vec::with_capacity(1);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // No allocations here, we already have space reserved
 | 
					 | 
				
			||||||
    assert_no_alloc!(v.push(5));
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // Even though we remove an item, it doesn't trigger a drop
 | 
					 | 
				
			||||||
    // because it's a scalar. If it were a `Box<_>` type,
 | 
					 | 
				
			||||||
    // a drop would trigger.
 | 
					 | 
				
			||||||
    assert_no_alloc!({
 | 
					 | 
				
			||||||
        v.pop().unwrap();
 | 
					 | 
				
			||||||
    });
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Using function calls
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Both the most precise and most tedious:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
use qadapt::enter_protected;
 | 
					 | 
				
			||||||
use qadapt::exit_protected;
 | 
					 | 
				
			||||||
use qadapt::QADAPT;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#[global_allocator]
 | 
					 | 
				
			||||||
static Q: QADAPT = QADAPT;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
fn main() {
 | 
					 | 
				
			||||||
    // This triggers an allocation (on non-release builds)
 | 
					 | 
				
			||||||
    let v = Vec::with_capacity(1);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    enter_protected();
 | 
					 | 
				
			||||||
    // This does not trigger an allocation because we've reserved size
 | 
					 | 
				
			||||||
    v.push(0);
 | 
					 | 
				
			||||||
    exit_protected();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // This triggers an allocation because we ran out of size,
 | 
					 | 
				
			||||||
    // but doesn't panic because we're no longer protected.
 | 
					 | 
				
			||||||
    v.push(1);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Caveats
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
It's important to point out that QADAPT code is synchronous, so please be careful when mixing in
 | 
					 | 
				
			||||||
asynchronous functions:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
use futures::future::Future;
 | 
					 | 
				
			||||||
use futures::future::ok;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#[no_alloc]
 | 
					 | 
				
			||||||
fn async_capacity() -> impl Future<Item=Vec<u8>, Error=()> {
 | 
					 | 
				
			||||||
    ok(12).and_then(|e| Ok(Vec::with_capacity(e)))
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
fn main() {
 | 
					 | 
				
			||||||
    // This doesn't trigger a panic because the `and_then` closure
 | 
					 | 
				
			||||||
    // wasn't run during the function call.
 | 
					 | 
				
			||||||
    async_capacity();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // Still no panic
 | 
					 | 
				
			||||||
    assert_no_alloc!(async_capacity());
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // This will panic because the allocation happens during `unwrap`
 | 
					 | 
				
			||||||
    // in the `assert_no_alloc!` macro
 | 
					 | 
				
			||||||
    assert_no_alloc!(async_capacity().poll().unwrap());
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Conclusion
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
While there's a lot more to writing high-performance code than managing your usage of the allocator,
 | 
					 | 
				
			||||||
it's critical that you do use the allocator correctly. QADAPT will verify that your code is doing
 | 
					 | 
				
			||||||
what you expect. It's usable even on stable Rust from version 1.31 onward, which isn't the case for
 | 
					 | 
				
			||||||
most allocators. Version 1.0 was released today, and you can check it out over at
 | 
					 | 
				
			||||||
[crates.io](https://crates.io/crates/qadapt) or on [github](https://github.com/bspeice/qadapt).
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
I'm hoping to write more about high-performance Rust in the future, and I expect that QADAPT will
 | 
					 | 
				
			||||||
help guide that. If there are topics you're interested in, let me know in the comments below!
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
[qadapt]: https://crates.io/crates/qadapt
 | 
					 | 
				
			||||||
@ -1,113 +0,0 @@
 | 
				
			|||||||
---
 | 
					 | 
				
			||||||
layout: post
 | 
					 | 
				
			||||||
title: "Allocations in Rust"
 | 
					 | 
				
			||||||
description: "An introduction to the memory model."
 | 
					 | 
				
			||||||
category:
 | 
					 | 
				
			||||||
tags: [rust, understanding-allocations]
 | 
					 | 
				
			||||||
---
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
There's an alchemy of distilling complex technical topics into articles and videos that change the
 | 
					 | 
				
			||||||
way programmers see the tools they interact with on a regular basis. I knew what a linker was, but
 | 
					 | 
				
			||||||
there's a staggering amount of complexity in between
 | 
					 | 
				
			||||||
[the OS and `main()`](https://www.youtube.com/watch?v=dOfucXtyEsU). Rust programmers use the
 | 
					 | 
				
			||||||
[`Box`](https://doc.rust-lang.org/stable/std/boxed/struct.Box.html) type all the time, but there's a
 | 
					 | 
				
			||||||
rich history of the Rust language itself wrapped up in
 | 
					 | 
				
			||||||
[how special it is](https://manishearth.github.io/blog/2017/01/10/rust-tidbits-box-is-special/).
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
In a similar vein, this series attempts to look at code and understand how memory is used; the
 | 
					 | 
				
			||||||
complex choreography of operating system, compiler, and program that frees you to focus on
 | 
					 | 
				
			||||||
functionality far-flung from frivolous book-keeping. The Rust compiler relieves a great deal of the
 | 
					 | 
				
			||||||
cognitive burden associated with memory management, but we're going to step into its world for a
 | 
					 | 
				
			||||||
while.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Let's learn a bit about memory in Rust.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Table of Contents
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
This series is intended as both learning and reference material; we'll work through the different
 | 
					 | 
				
			||||||
memory types Rust uses, and explain the implications of each. Ultimately, a summary will be provided
 | 
					 | 
				
			||||||
as a cheat sheet for easy future reference. To that end, a table of contents is in order:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
- Foreword
 | 
					 | 
				
			||||||
- [Global Memory Usage: The Whole World](/2019/02/the-whole-world.html)
 | 
					 | 
				
			||||||
- [Fixed Memory: Stacking Up](/2019/02/stacking-up.html)
 | 
					 | 
				
			||||||
- [Dynamic Memory: A Heaping Helping](/2019/02/a-heaping-helping.html)
 | 
					 | 
				
			||||||
- [Compiler Optimizations: What It's Done For You Lately](/2019/02/compiler-optimizations.html)
 | 
					 | 
				
			||||||
- [Summary: What Are the Rules?](/2019/02/summary.html)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Foreword
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Rust's three defining features of
 | 
					 | 
				
			||||||
[Performance, Reliability, and Productivity](https://www.rust-lang.org/) are all driven to a great
 | 
					 | 
				
			||||||
degree by the how the Rust compiler understands memory usage. Unlike managed memory languages (Java,
 | 
					 | 
				
			||||||
Python), Rust
 | 
					 | 
				
			||||||
[doesn't really](https://words.steveklabnik.com/borrow-checking-escape-analysis-and-the-generational-hypothesis)
 | 
					 | 
				
			||||||
garbage collect; instead, it uses an
 | 
					 | 
				
			||||||
[ownership](https://doc.rust-lang.org/book/ch04-01-what-is-ownership.html) system to reason about
 | 
					 | 
				
			||||||
how long objects will last in your program. In some cases, if the life of an object is fairly
 | 
					 | 
				
			||||||
transient, Rust can make use of a very fast region called the "stack." When that's not possible,
 | 
					 | 
				
			||||||
Rust uses
 | 
					 | 
				
			||||||
[dynamic (heap) memory](https://en.wikipedia.org/wiki/Memory_management#Dynamic_memory_allocation)
 | 
					 | 
				
			||||||
and the ownership system to ensure you can't accidentally corrupt memory. It's not as fast, but it
 | 
					 | 
				
			||||||
is important to have available.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
That said, there are specific situations in Rust where you'd never need to worry about the
 | 
					 | 
				
			||||||
stack/heap distinction! If you:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
1. Never use `unsafe`
 | 
					 | 
				
			||||||
2. Never use `#![feature(alloc)]` or the [`alloc` crate](https://doc.rust-lang.org/alloc/index.html)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
...then it's not possible for you to use dynamic memory!
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
For some uses of Rust, typically embedded devices, these constraints are OK. They have very limited
 | 
					 | 
				
			||||||
memory, and the program binary size itself may significantly affect what's available! There's no
 | 
					 | 
				
			||||||
operating system able to manage this
 | 
					 | 
				
			||||||
["virtual memory"](https://en.wikipedia.org/wiki/Virtual_memory) thing, but that's not an issue
 | 
					 | 
				
			||||||
because there's only one running application. The
 | 
					 | 
				
			||||||
[embedonomicon](https://docs.rust-embedded.org/embedonomicon/preface.html) is ever in mind, and
 | 
					 | 
				
			||||||
interacting with the "real world" through extra peripherals is accomplished by reading and writing
 | 
					 | 
				
			||||||
to [specific memory addresses](https://bob.cs.sonoma.edu/IntroCompOrg-RPi/sec-gpio-mem.html).
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Most Rust programs find these requirements overly burdensome though. C++ developers would struggle
 | 
					 | 
				
			||||||
without access to [`std::vector`](https://en.cppreference.com/w/cpp/container/vector) (except those
 | 
					 | 
				
			||||||
hardcore no-STL people), and Rust developers would struggle without
 | 
					 | 
				
			||||||
[`std::vec`](https://doc.rust-lang.org/std/vec/struct.Vec.html). But with the constraints above,
 | 
					 | 
				
			||||||
`std::vec` is actually a part of the
 | 
					 | 
				
			||||||
[`alloc` crate](https://doc.rust-lang.org/alloc/vec/struct.Vec.html), and thus off-limits. `Box`,
 | 
					 | 
				
			||||||
`Rc`, etc., are also unusable for the same reason.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Whether writing code for embedded devices or not, the important thing in both situations is how much
 | 
					 | 
				
			||||||
you know _before your application starts_ about what its memory usage will look like. In embedded
 | 
					 | 
				
			||||||
devices, there's a small, fixed amount of memory to use. In a browser, you have no idea how large
 | 
					 | 
				
			||||||
[google.com](https://www.google.com)'s home page is until you start trying to download it. The
 | 
					 | 
				
			||||||
compiler uses this knowledge (or lack thereof) to optimize how memory is used; put simply, your code
 | 
					 | 
				
			||||||
runs faster when the compiler can guarantee exactly how much memory your program needs while it's
 | 
					 | 
				
			||||||
running. This series is all about understanding how the compiler reasons about your program, with an
 | 
					 | 
				
			||||||
emphasis on the implications for performance.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Now let's address some conditions and caveats before going much further:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
- We'll focus on "safe" Rust only; `unsafe` lets you use platform-specific allocation API's
 | 
					 | 
				
			||||||
  ([`malloc`](https://www.tutorialspoint.com/c_standard_library/c_function_malloc.htm)) that we'll
 | 
					 | 
				
			||||||
  ignore.
 | 
					 | 
				
			||||||
- We'll assume a "debug" build of Rust code (what you get with `cargo run` and `cargo test`) and
 | 
					 | 
				
			||||||
  address (pun intended) release mode at the end (`cargo run --release` and `cargo test --release`).
 | 
					 | 
				
			||||||
- All content will be run using Rust 1.32, as that's the highest currently supported in the
 | 
					 | 
				
			||||||
  [Compiler Exporer](https://godbolt.org/). As such, we'll avoid upcoming innovations like
 | 
					 | 
				
			||||||
  [compile-time evaluation of `static`](https://github.com/rust-lang/rfcs/blob/master/text/0911-const-fn.md)
 | 
					 | 
				
			||||||
  that are available in nightly.
 | 
					 | 
				
			||||||
- Because of the nature of the content, being able to read assembly is helpful. We'll keep it
 | 
					 | 
				
			||||||
  simple, but I [found](https://stackoverflow.com/a/4584131/1454178) a
 | 
					 | 
				
			||||||
  [refresher](https://stackoverflow.com/a/26026278/1454178) on the `push` and `pop`
 | 
					 | 
				
			||||||
  [instructions](http://www.cs.virginia.edu/~evans/cs216/guides/x86.html) was helpful while writing
 | 
					 | 
				
			||||||
  this.
 | 
					 | 
				
			||||||
- I've tried to be precise in saying only what I can prove using the tools (ASM, docs) that are
 | 
					 | 
				
			||||||
  available, but if there's something said in error it will be corrected expeditiously. Please let
 | 
					 | 
				
			||||||
  me know at [bradlee@speice.io](mailto:bradlee@speice.io)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Finally, I'll do what I can to flag potential future changes but the Rust docs have a notice worth
 | 
					 | 
				
			||||||
repeating:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
> Rust does not currently have a rigorously and formally defined memory model.
 | 
					 | 
				
			||||||
>
 | 
					 | 
				
			||||||
> -- [the docs](https://doc.rust-lang.org/std/ptr/fn.read_volatile.html)
 | 
					 | 
				
			||||||
@ -1,337 +0,0 @@
 | 
				
			|||||||
---
 | 
					 | 
				
			||||||
layout: post
 | 
					 | 
				
			||||||
title: "Global Memory Usage: The Whole World"
 | 
					 | 
				
			||||||
description: "Static considered slightly less harmful."
 | 
					 | 
				
			||||||
category:
 | 
					 | 
				
			||||||
tags: [rust, understanding-allocations]
 | 
					 | 
				
			||||||
---
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The first memory type we'll look at is pretty special: when Rust can prove that a _value_ is fixed
 | 
					 | 
				
			||||||
for the life of a program (`const`), and when a _reference_ is unique for the life of a program
 | 
					 | 
				
			||||||
(`static` as a declaration, not
 | 
					 | 
				
			||||||
[`'static`](https://doc.rust-lang.org/book/ch10-03-lifetime-syntax.html#the-static-lifetime) as a
 | 
					 | 
				
			||||||
lifetime), we can make use of global memory. This special section of data is embedded directly in
 | 
					 | 
				
			||||||
the program binary so that variables are ready to go once the program loads; no additional
 | 
					 | 
				
			||||||
computation is necessary.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Understanding the value/reference distinction is important for reasons we'll go into below, and
 | 
					 | 
				
			||||||
while the
 | 
					 | 
				
			||||||
[full specification](https://github.com/rust-lang/rfcs/blob/master/text/0246-const-vs-static.md) for
 | 
					 | 
				
			||||||
these two keywords is available, we'll take a hands-on approach to the topic.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# **const**
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
When a _value_ is guaranteed to be unchanging in your program (where "value" may be scalars,
 | 
					 | 
				
			||||||
`struct`s, etc.), you can declare it `const`. This tells the compiler that it's safe to treat the
 | 
					 | 
				
			||||||
value as never changing, and enables some interesting optimizations; not only is there no
 | 
					 | 
				
			||||||
initialization cost to creating the value (it is loaded at the same time as the executable parts of
 | 
					 | 
				
			||||||
your program), but the compiler can also copy the value around if it speeds up the code.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The points we need to address when talking about `const` are:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
- `Const` values are stored in read-only memory - it's impossible to modify.
 | 
					 | 
				
			||||||
- Values resulting from calling a `const fn` are materialized at compile-time.
 | 
					 | 
				
			||||||
- The compiler may (or may not) copy `const` values wherever it chooses.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Read-Only
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The first point is a bit strange - "read-only memory."
 | 
					 | 
				
			||||||
[The Rust book](https://doc.rust-lang.org/book/ch03-01-variables-and-mutability.html#differences-between-variables-and-constants)
 | 
					 | 
				
			||||||
mentions in a couple places that using `mut` with constants is illegal, but it's also important to
 | 
					 | 
				
			||||||
demonstrate just how immutable they are. _Typically_ in Rust you can use
 | 
					 | 
				
			||||||
[interior mutability](https://doc.rust-lang.org/book/ch15-05-interior-mutability.html) to modify
 | 
					 | 
				
			||||||
things that aren't declared `mut`.
 | 
					 | 
				
			||||||
[`RefCell`](https://doc.rust-lang.org/std/cell/struct.RefCell.html) provides an example of this
 | 
					 | 
				
			||||||
pattern in action:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
use std::cell::RefCell;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
fn my_mutator(cell: &RefCell<u8>) {
 | 
					 | 
				
			||||||
    // Even though we're given an immutable reference,
 | 
					 | 
				
			||||||
    // the `replace` method allows us to modify the inner value.
 | 
					 | 
				
			||||||
    cell.replace(14);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
fn main() {
 | 
					 | 
				
			||||||
    let cell = RefCell::new(25);
 | 
					 | 
				
			||||||
    // Prints out 25
 | 
					 | 
				
			||||||
    println!("Cell: {:?}", cell);
 | 
					 | 
				
			||||||
    my_mutator(&cell);
 | 
					 | 
				
			||||||
    // Prints out 14
 | 
					 | 
				
			||||||
    println!("Cell: {:?}", cell);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
--
 | 
					 | 
				
			||||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=8e4bea1a718edaff4507944e825a54b2)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
When `const` is involved though, interior mutability is impossible:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
use std::cell::RefCell;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
const CELL: RefCell<u8> = RefCell::new(25);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
fn my_mutator(cell: &RefCell<u8>) {
 | 
					 | 
				
			||||||
    cell.replace(14);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
fn main() {
 | 
					 | 
				
			||||||
    // First line prints 25 as expected
 | 
					 | 
				
			||||||
    println!("Cell: {:?}", &CELL);
 | 
					 | 
				
			||||||
    my_mutator(&CELL);
 | 
					 | 
				
			||||||
    // Second line *still* prints 25
 | 
					 | 
				
			||||||
    println!("Cell: {:?}", &CELL);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
--
 | 
					 | 
				
			||||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=88fe98110c33c1b3a51e341f48b8ae00)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
And a second example using [`Once`](https://doc.rust-lang.org/std/sync/struct.Once.html):
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
use std::sync::Once;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
const SURPRISE: Once = Once::new();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
fn main() {
 | 
					 | 
				
			||||||
    // This is how `Once` is supposed to be used
 | 
					 | 
				
			||||||
    SURPRISE.call_once(|| println!("Initializing..."));
 | 
					 | 
				
			||||||
    // Because `Once` is a `const` value, we never record it
 | 
					 | 
				
			||||||
    // having been initialized the first time, and this closure
 | 
					 | 
				
			||||||
    // will also execute.
 | 
					 | 
				
			||||||
    SURPRISE.call_once(|| println!("Initializing again???"));
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
--
 | 
					 | 
				
			||||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=c3cc5979b5e5434eca0f9ec4a06ee0ed)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
When the
 | 
					 | 
				
			||||||
[`const` specification](https://github.com/rust-lang/rfcs/blob/26197104b7bb9a5a35db243d639aee6e46d35d75/text/0246-const-vs-static.md)
 | 
					 | 
				
			||||||
refers to ["rvalues"](http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2010/n3055.pdf), this
 | 
					 | 
				
			||||||
behavior is what they refer to. [Clippy](https://github.com/rust-lang/rust-clippy) will treat this
 | 
					 | 
				
			||||||
as an error, but it's still something to be aware of.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Initialization == Compilation
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The next thing to mention is that `const` values are loaded into memory _as part of your program
 | 
					 | 
				
			||||||
binary_. Because of this, any `const` values declared in your program will be "realized" at
 | 
					 | 
				
			||||||
compile-time; accessing them may trigger a main-memory lookup (with a fixed address, so your CPU may
 | 
					 | 
				
			||||||
be able to prefetch the value), but that's it.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
use std::cell::RefCell;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
const CELL: RefCell<u32> = RefCell::new(24);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
pub fn multiply(value: u32) -> u32 {
 | 
					 | 
				
			||||||
    // CELL is stored at `.L__unnamed_1`
 | 
					 | 
				
			||||||
    value * (*CELL.get_mut())
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
-- [Compiler Explorer](https://godbolt.org/z/Th8boO)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The compiler creates one `RefCell`, uses it everywhere, and never needs to call the `RefCell::new`
 | 
					 | 
				
			||||||
function.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Copying
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
If it's helpful though, the compiler can choose to copy `const` values.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
const FACTOR: u32 = 1000;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
pub fn multiply(value: u32) -> u32 {
 | 
					 | 
				
			||||||
    // See assembly line 4 for the `mov edi, 1000` instruction
 | 
					 | 
				
			||||||
    value * FACTOR
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
pub fn multiply_twice(value: u32) -> u32 {
 | 
					 | 
				
			||||||
    // See assembly lines 22 and 29 for `mov edi, 1000` instructions
 | 
					 | 
				
			||||||
    value * FACTOR * FACTOR
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
-- [Compiler Explorer](https://godbolt.org/z/ZtS54X)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
In this example, the `FACTOR` value is turned into the `mov edi, 1000` instruction in both the
 | 
					 | 
				
			||||||
`multiply` and `multiply_twice` functions; the "1000" value is never "stored" anywhere, as it's
 | 
					 | 
				
			||||||
small enough to inline into the assembly instructions.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Finally, getting the address of a `const` value is possible, but not guaranteed to be unique
 | 
					 | 
				
			||||||
(because the compiler can choose to copy values). I was unable to get non-unique pointers in my
 | 
					 | 
				
			||||||
testing (even using different crates), but the specifications are clear enough: _don't rely on
 | 
					 | 
				
			||||||
pointers to `const` values being consistent_. To be frank, caring about locations for `const` values
 | 
					 | 
				
			||||||
is almost certainly a code smell.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# **static**
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Static variables are related to `const` variables, but take a slightly different approach. When we
 | 
					 | 
				
			||||||
declare that a _reference_ is unique for the life of a program, you have a `static` variable
 | 
					 | 
				
			||||||
(unrelated to the `'static` lifetime). Because of the reference/value distinction with
 | 
					 | 
				
			||||||
`const`/`static`, static variables behave much more like typical "global" variables.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
But to understand `static`, here's what we'll look at:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
- `static` variables are globally unique locations in memory.
 | 
					 | 
				
			||||||
- Like `const`, `static` variables are loaded at the same time as your program being read into
 | 
					 | 
				
			||||||
  memory.
 | 
					 | 
				
			||||||
- All `static` variables must implement the
 | 
					 | 
				
			||||||
  [`Sync`](https://doc.rust-lang.org/std/marker/trait.Sync.html) marker trait.
 | 
					 | 
				
			||||||
- Interior mutability is safe and acceptable when using `static` variables.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Memory Uniqueness
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The single biggest difference between `const` and `static` is the guarantees provided about
 | 
					 | 
				
			||||||
uniqueness. Where `const` variables may or may not be copied in code, `static` variables are
 | 
					 | 
				
			||||||
guarantee to be unique. If we take a previous `const` example and change it to `static`, the
 | 
					 | 
				
			||||||
difference should be clear:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
static FACTOR: u32 = 1000;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
pub fn multiply(value: u32) -> u32 {
 | 
					 | 
				
			||||||
    // The assembly to `mul dword ptr [rip + example::FACTOR]` is how FACTOR gets used
 | 
					 | 
				
			||||||
    value * FACTOR
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
pub fn multiply_twice(value: u32) -> u32 {
 | 
					 | 
				
			||||||
    // The assembly to `mul dword ptr [rip + example::FACTOR]` is how FACTOR gets used
 | 
					 | 
				
			||||||
    value * FACTOR * FACTOR
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
-- [Compiler Explorer](https://godbolt.org/z/uxmiRQ)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Where [previously](#copying) there were plenty of references to multiplying by 1000, the new
 | 
					 | 
				
			||||||
assembly refers to `FACTOR` as a named memory location instead. No initialization work needs to be
 | 
					 | 
				
			||||||
done, but the compiler can no longer prove the value never changes during execution.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Initialization == Compilation
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Next, let's talk about initialization. The simplest case is initializing static variables with
 | 
					 | 
				
			||||||
either scalar or struct notation:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
#[derive(Debug)]
 | 
					 | 
				
			||||||
struct MyStruct {
 | 
					 | 
				
			||||||
    x: u32
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static MY_STRUCT: MyStruct = MyStruct {
 | 
					 | 
				
			||||||
    // You can even reference other statics
 | 
					 | 
				
			||||||
    // declared later
 | 
					 | 
				
			||||||
    x: MY_VAL
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static MY_VAL: u32 = 24;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
fn main() {
 | 
					 | 
				
			||||||
    println!("Static MyStruct: {:?}", MY_STRUCT);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
--
 | 
					 | 
				
			||||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=b538dbc46076f12db047af4f4403ee6e)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Things can get a bit weirder when using `const fn` though. In most cases, it just works:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
#[derive(Debug)]
 | 
					 | 
				
			||||||
struct MyStruct {
 | 
					 | 
				
			||||||
    x: u32
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
impl MyStruct {
 | 
					 | 
				
			||||||
    const fn new() -> MyStruct {
 | 
					 | 
				
			||||||
        MyStruct { x: 24 }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static MY_STRUCT: MyStruct = MyStruct::new();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
fn main() {
 | 
					 | 
				
			||||||
    println!("const fn Static MyStruct: {:?}", MY_STRUCT);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
--
 | 
					 | 
				
			||||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=8c796a6e7fc273c12115091b707b0255)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
However, there's a caveat: you're currently not allowed to use `const fn` to initialize static
 | 
					 | 
				
			||||||
variables of types that aren't marked `Sync`. For example,
 | 
					 | 
				
			||||||
[`RefCell::new()`](https://doc.rust-lang.org/std/cell/struct.RefCell.html#method.new) is a
 | 
					 | 
				
			||||||
`const fn`, but because
 | 
					 | 
				
			||||||
[`RefCell` isn't `Sync`](https://doc.rust-lang.org/std/cell/struct.RefCell.html#impl-Sync), you'll
 | 
					 | 
				
			||||||
get an error at compile time:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
use std::cell::RefCell;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// error[E0277]: `std::cell::RefCell<u8>` cannot be shared between threads safely
 | 
					 | 
				
			||||||
static MY_LOCK: RefCell<u8> = RefCell::new(0);
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
--
 | 
					 | 
				
			||||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=c76ef86e473d07117a1700e21fd45560)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
It's likely that this will
 | 
					 | 
				
			||||||
[change in the future](https://github.com/rust-lang/rfcs/blob/master/text/0911-const-fn.md) though.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## **Sync**
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Which leads well to the next point: static variable types must implement the
 | 
					 | 
				
			||||||
[`Sync` marker](https://doc.rust-lang.org/std/marker/trait.Sync.html). Because they're globally
 | 
					 | 
				
			||||||
unique, it must be safe for you to access static variables from any thread at any time. Most
 | 
					 | 
				
			||||||
`struct` definitions automatically implement the `Sync` trait because they contain only elements
 | 
					 | 
				
			||||||
which themselves implement `Sync` (read more in the
 | 
					 | 
				
			||||||
[Nomicon](https://doc.rust-lang.org/nomicon/send-and-sync.html)). This is why earlier examples could
 | 
					 | 
				
			||||||
get away with initializing statics, even though we never included an `impl Sync for MyStruct` in the
 | 
					 | 
				
			||||||
code. To demonstrate this property, Rust refuses to compile our earlier example if we add a
 | 
					 | 
				
			||||||
non-`Sync` element to the `struct` definition:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
use std::cell::RefCell;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
struct MyStruct {
 | 
					 | 
				
			||||||
    x: u32,
 | 
					 | 
				
			||||||
    y: RefCell<u8>,
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// error[E0277]: `std::cell::RefCell<u8>` cannot be shared between threads safely
 | 
					 | 
				
			||||||
static MY_STRUCT: MyStruct = MyStruct {
 | 
					 | 
				
			||||||
    x: 8,
 | 
					 | 
				
			||||||
    y: RefCell::new(8)
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
--
 | 
					 | 
				
			||||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=40074d0248f056c296b662dbbff97cfc)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Interior Mutability
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Finally, while `static mut` variables are allowed, mutating them is an `unsafe` operation. If we
 | 
					 | 
				
			||||||
want to stay in `safe` Rust, we can use interior mutability to accomplish similar goals:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
use std::sync::Once;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// This example adapted from https://doc.rust-lang.org/std/sync/struct.Once.html#method.call_once
 | 
					 | 
				
			||||||
static INIT: Once = Once::new();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
fn main() {
 | 
					 | 
				
			||||||
    // Note that while `INIT` is declared immutable, we're still allowed
 | 
					 | 
				
			||||||
    // to mutate its interior
 | 
					 | 
				
			||||||
    INIT.call_once(|| println!("Initializing..."));
 | 
					 | 
				
			||||||
    // This code won't panic, as the interior of INIT was modified
 | 
					 | 
				
			||||||
    // as part of the previous `call_once`
 | 
					 | 
				
			||||||
    INIT.call_once(|| panic!("INIT was called twice!"));
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
--
 | 
					 | 
				
			||||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=3ba003a981a7ed7400240caadd384d59)
 | 
					 | 
				
			||||||
@ -1,601 +0,0 @@
 | 
				
			|||||||
---
 | 
					 | 
				
			||||||
layout: post
 | 
					 | 
				
			||||||
title: "Fixed Memory: Stacking Up"
 | 
					 | 
				
			||||||
description: "We don't need no allocator."
 | 
					 | 
				
			||||||
category:
 | 
					 | 
				
			||||||
tags: [rust, understanding-allocations]
 | 
					 | 
				
			||||||
---
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
`const` and `static` are perfectly fine, but it's relatively rare that we know at compile-time about
 | 
					 | 
				
			||||||
either values or references that will be the same for the duration of our program. Put another way,
 | 
					 | 
				
			||||||
it's not often the case that either you or your compiler knows how much memory your entire program
 | 
					 | 
				
			||||||
will ever need.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
However, there are still some optimizations the compiler can do if it knows how much memory
 | 
					 | 
				
			||||||
individual functions will need. Specifically, the compiler can make use of "stack" memory (as
 | 
					 | 
				
			||||||
opposed to "heap" memory) which can be managed far faster in both the short- and long-term. When
 | 
					 | 
				
			||||||
requesting memory, the [`push` instruction](http://www.cs.virginia.edu/~evans/cs216/guides/x86.html)
 | 
					 | 
				
			||||||
can typically complete in [1 or 2 cycles](https://agner.org/optimize/instruction_tables.ods) (<1
 | 
					 | 
				
			||||||
nanosecond on modern CPUs). Contrast that to heap memory which requires an allocator (specialized
 | 
					 | 
				
			||||||
software to track what memory is in use) to reserve space. When you're finished with stack memory,
 | 
					 | 
				
			||||||
the `pop` instruction runs in 1-3 cycles, as opposed to an allocator needing to worry about memory
 | 
					 | 
				
			||||||
fragmentation and other issues with the heap. All sorts of incredibly sophisticated techniques have
 | 
					 | 
				
			||||||
been used to design allocators:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
- [Garbage Collection](<https://en.wikipedia.org/wiki/Garbage_collection_(computer_science)>)
 | 
					 | 
				
			||||||
  strategies like [Tracing](https://en.wikipedia.org/wiki/Tracing_garbage_collection) (used in
 | 
					 | 
				
			||||||
  [Java](https://www.oracle.com/technetwork/java/javase/tech/g1-intro-jsp-135488.html)) and
 | 
					 | 
				
			||||||
  [Reference counting](https://en.wikipedia.org/wiki/Reference_counting) (used in
 | 
					 | 
				
			||||||
  [Python](https://docs.python.org/3/extending/extending.html#reference-counts))
 | 
					 | 
				
			||||||
- Thread-local structures to prevent locking the allocator in
 | 
					 | 
				
			||||||
  [tcmalloc](https://jamesgolick.com/2013/5/19/how-tcmalloc-works.html)
 | 
					 | 
				
			||||||
- Arena structures used in [jemalloc](http://jemalloc.net/), which
 | 
					 | 
				
			||||||
  [until recently](https://blog.rust-lang.org/2019/01/17/Rust-1.32.0.html#jemalloc-is-removed-by-default)
 | 
					 | 
				
			||||||
  was the primary allocator for Rust programs!
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
But no matter how fast your allocator is, the principle remains: the fastest allocator is the one
 | 
					 | 
				
			||||||
you never use. As such, we're not going to discuss how exactly the
 | 
					 | 
				
			||||||
[`push` and `pop` instructions work](http://www.cs.virginia.edu/~evans/cs216/guides/x86.html), but
 | 
					 | 
				
			||||||
we'll focus instead on the conditions that enable the Rust compiler to use faster stack-based
 | 
					 | 
				
			||||||
allocation for variables.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
So, **how do we know when Rust will or will not use stack allocation for objects we create?**
 | 
					 | 
				
			||||||
Looking at other languages, it's often easy to delineate between stack and heap. Managed memory
 | 
					 | 
				
			||||||
languages (Python, Java,
 | 
					 | 
				
			||||||
[C#](https://blogs.msdn.microsoft.com/ericlippert/2010/09/30/the-truth-about-value-types/)) place
 | 
					 | 
				
			||||||
everything on the heap. JIT compilers ([PyPy](https://www.pypy.org/),
 | 
					 | 
				
			||||||
[HotSpot](https://www.oracle.com/technetwork/java/javase/tech/index-jsp-136373.html)) may optimize
 | 
					 | 
				
			||||||
some heap allocations away, but you should never assume it will happen. C makes things clear with
 | 
					 | 
				
			||||||
calls to special functions (like [malloc(3)](https://linux.die.net/man/3/malloc)) needed to access
 | 
					 | 
				
			||||||
heap memory. Old C++ has the [`new`](https://stackoverflow.com/a/655086/1454178) keyword, though
 | 
					 | 
				
			||||||
modern C++/C++11 is more complicated with [RAII](https://en.cppreference.com/w/cpp/language/raii).
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
For Rust, we can summarize as follows: **stack allocation will be used for everything that doesn't
 | 
					 | 
				
			||||||
involve "smart pointers" and collections**. We'll skip over a precise definition of the term "smart
 | 
					 | 
				
			||||||
pointer" for now, and instead discuss what we should watch for to understand when stack and heap
 | 
					 | 
				
			||||||
memory regions are used:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
1. Stack manipulation instructions (`push`, `pop`, and `add`/`sub` of the `rsp` register) indicate
 | 
					 | 
				
			||||||
   allocation of stack memory:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
   ```rust
 | 
					 | 
				
			||||||
   pub fn stack_alloc(x: u32) -> u32 {
 | 
					 | 
				
			||||||
       // Space for `y` is allocated by subtracting from `rsp`,
 | 
					 | 
				
			||||||
       // and then populated
 | 
					 | 
				
			||||||
       let y = [1u8, 2, 3, 4];
 | 
					 | 
				
			||||||
       // Space for `y` is deallocated by adding back to `rsp`
 | 
					 | 
				
			||||||
       x
 | 
					 | 
				
			||||||
   }
 | 
					 | 
				
			||||||
   ```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
   -- [Compiler Explorer](https://godbolt.org/z/5WSgc9)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
2. Tracking when exactly heap allocation calls occur is difficult. It's typically easier to watch
 | 
					 | 
				
			||||||
   for `call core::ptr::real_drop_in_place`, and infer that a heap allocation happened in the recent
 | 
					 | 
				
			||||||
   past:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
   ```rust
 | 
					 | 
				
			||||||
   pub fn heap_alloc(x: usize) -> usize {
 | 
					 | 
				
			||||||
       // Space for elements in a vector has to be allocated
 | 
					 | 
				
			||||||
       // on the heap, and is then de-allocated once the
 | 
					 | 
				
			||||||
       // vector goes out of scope
 | 
					 | 
				
			||||||
       let y: Vec<u8> = Vec::with_capacity(x);
 | 
					 | 
				
			||||||
       x
 | 
					 | 
				
			||||||
   }
 | 
					 | 
				
			||||||
   ```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
   -- [Compiler Explorer](https://godbolt.org/z/epfgoQ) (`real_drop_in_place` happens on line 1317)
 | 
					 | 
				
			||||||
   <span style="font-size: .8em">Note: While the
 | 
					 | 
				
			||||||
   [`Drop` trait](https://doc.rust-lang.org/std/ops/trait.Drop.html) is
 | 
					 | 
				
			||||||
   [called for stack-allocated objects](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=87edf374d8983816eb3d8cfeac657b46),
 | 
					 | 
				
			||||||
   the Rust standard library only defines `Drop` implementations for types that involve heap
 | 
					 | 
				
			||||||
   allocation.</span>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
3. If you don't want to inspect the assembly, use a custom allocator that's able to track and alert
 | 
					 | 
				
			||||||
   when heap allocations occur. Crates like
 | 
					 | 
				
			||||||
   [`alloc_counter`](https://crates.io/crates/alloc_counter) are designed for exactly this purpose.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
With all that in mind, let's talk about situations in which we're guaranteed to use stack memory:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
- Structs are created on the stack.
 | 
					 | 
				
			||||||
- Function arguments are passed on the stack, meaning the
 | 
					 | 
				
			||||||
  [`#[inline]` attribute](https://doc.rust-lang.org/reference/attributes.html#inline-attribute) will
 | 
					 | 
				
			||||||
  not change the memory region used.
 | 
					 | 
				
			||||||
- Enums and unions are stack-allocated.
 | 
					 | 
				
			||||||
- [Arrays](https://doc.rust-lang.org/std/primitive.array.html) are always stack-allocated.
 | 
					 | 
				
			||||||
- Closures capture their arguments on the stack.
 | 
					 | 
				
			||||||
- Generics will use stack allocation, even with dynamic dispatch.
 | 
					 | 
				
			||||||
- [`Copy`](https://doc.rust-lang.org/std/marker/trait.Copy.html) types are guaranteed to be
 | 
					 | 
				
			||||||
  stack-allocated, and copying them will be done in stack memory.
 | 
					 | 
				
			||||||
- [`Iterator`s](https://doc.rust-lang.org/std/iter/trait.Iterator.html) in the standard library are
 | 
					 | 
				
			||||||
  stack-allocated even when iterating over heap-based collections.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Structs
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The simplest case comes first. When creating vanilla `struct` objects, we use stack memory to hold
 | 
					 | 
				
			||||||
their contents:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
struct Point {
 | 
					 | 
				
			||||||
    x: u64,
 | 
					 | 
				
			||||||
    y: u64,
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
struct Line {
 | 
					 | 
				
			||||||
    a: Point,
 | 
					 | 
				
			||||||
    b: Point,
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
pub fn make_line() {
 | 
					 | 
				
			||||||
    // `origin` is stored in the first 16 bytes of memory
 | 
					 | 
				
			||||||
    // starting at location `rsp`
 | 
					 | 
				
			||||||
    let origin = Point { x: 0, y: 0 };
 | 
					 | 
				
			||||||
    // `point` makes up the next 16 bytes of memory
 | 
					 | 
				
			||||||
    let point = Point { x: 1, y: 2 };
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // When creating `ray`, we just move the content out of
 | 
					 | 
				
			||||||
    // `origin` and `point` into the next 32 bytes of memory
 | 
					 | 
				
			||||||
    let ray = Line { a: origin, b: point };
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
-- [Compiler Explorer](https://godbolt.org/z/vri9BE)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Note that while some extra-fancy instructions are used for memory manipulation in the assembly, the
 | 
					 | 
				
			||||||
`sub rsp, 64` instruction indicates we're still working with the stack.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Function arguments
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Have you ever wondered how functions communicate with each other? Like, once the variables are given
 | 
					 | 
				
			||||||
to you, everything's fine. But how do you "give" those variables to another function? How do you get
 | 
					 | 
				
			||||||
the results back afterward? The answer: the compiler arranges memory and assembly instructions using
 | 
					 | 
				
			||||||
a pre-determined [calling convention](http://llvm.org/docs/LangRef.html#calling-conventions). This
 | 
					 | 
				
			||||||
convention governs the rules around where arguments needed by a function will be located (either in
 | 
					 | 
				
			||||||
memory offsets relative to the stack pointer `rsp`, or in other registers), and where the results
 | 
					 | 
				
			||||||
can be found once the function has finished. And when multiple languages agree on what the calling
 | 
					 | 
				
			||||||
conventions are, you can do things like having [Go call Rust code](https://blog.filippo.io/rustgo/)!
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Put simply: it's the compiler's job to figure out how to call other functions, and you can assume
 | 
					 | 
				
			||||||
that the compiler is good at its job.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
We can see this in action using a simple example:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
struct Point {
 | 
					 | 
				
			||||||
    x: i64,
 | 
					 | 
				
			||||||
    y: i64,
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// We use integer division operations to keep
 | 
					 | 
				
			||||||
// the assembly clean, understanding the result
 | 
					 | 
				
			||||||
// isn't accurate.
 | 
					 | 
				
			||||||
fn distance(a: &Point, b: &Point) -> i64 {
 | 
					 | 
				
			||||||
    // Immediately subtract from `rsp` the bytes needed
 | 
					 | 
				
			||||||
    // to hold all the intermediate results - this is
 | 
					 | 
				
			||||||
    // the stack allocation step
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // The compiler used the `rdi` and `rsi` registers
 | 
					 | 
				
			||||||
    // to pass our arguments, so read them in
 | 
					 | 
				
			||||||
    let x1 = a.x;
 | 
					 | 
				
			||||||
    let x2 = b.x;
 | 
					 | 
				
			||||||
    let y1 = a.y;
 | 
					 | 
				
			||||||
    let y2 = b.y;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // Do the actual math work
 | 
					 | 
				
			||||||
    let x_pow = (x1 - x2) * (x1 - x2);
 | 
					 | 
				
			||||||
    let y_pow = (y1 - y2) * (y1 - y2);
 | 
					 | 
				
			||||||
    let squared = x_pow + y_pow;
 | 
					 | 
				
			||||||
    squared / squared
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // Our final result will be stored in the `rax` register
 | 
					 | 
				
			||||||
    // so that our caller knows where to retrieve it.
 | 
					 | 
				
			||||||
    // Finally, add back to `rsp` the stack memory that is
 | 
					 | 
				
			||||||
    // now ready to be used by other functions.
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
pub fn total_distance() {
 | 
					 | 
				
			||||||
    let start = Point { x: 1, y: 2 };
 | 
					 | 
				
			||||||
    let middle = Point { x: 3, y: 4 };
 | 
					 | 
				
			||||||
    let end = Point { x: 5, y: 6 };
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    let _dist_1 = distance(&start, &middle);
 | 
					 | 
				
			||||||
    let _dist_2 = distance(&middle, &end);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
-- [Compiler Explorer](https://godbolt.org/z/Qmx4ST)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
As a consequence of function arguments never using heap memory, we can also infer that functions
 | 
					 | 
				
			||||||
using the `#[inline]` attributes also do not heap allocate. But better than inferring, we can look
 | 
					 | 
				
			||||||
at the assembly to prove it:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
struct Point {
 | 
					 | 
				
			||||||
    x: i64,
 | 
					 | 
				
			||||||
    y: i64,
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// Note that there is no `distance` function in the assembly output,
 | 
					 | 
				
			||||||
// and the total line count goes from 229 with inlining off
 | 
					 | 
				
			||||||
// to 306 with inline on. Even still, no heap allocations occur.
 | 
					 | 
				
			||||||
#[inline(always)]
 | 
					 | 
				
			||||||
fn distance(a: &Point, b: &Point) -> i64 {
 | 
					 | 
				
			||||||
    let x1 = a.x;
 | 
					 | 
				
			||||||
    let x2 = b.x;
 | 
					 | 
				
			||||||
    let y1 = a.y;
 | 
					 | 
				
			||||||
    let y2 = b.y;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    let x_pow = (a.x - b.x) * (a.x - b.x);
 | 
					 | 
				
			||||||
    let y_pow = (a.y - b.y) * (a.y - b.y);
 | 
					 | 
				
			||||||
    let squared = x_pow + y_pow;
 | 
					 | 
				
			||||||
    squared / squared
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
pub fn total_distance() {
 | 
					 | 
				
			||||||
    let start = Point { x: 1, y: 2 };
 | 
					 | 
				
			||||||
    let middle = Point { x: 3, y: 4 };
 | 
					 | 
				
			||||||
    let end = Point { x: 5, y: 6 };
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    let _dist_1 = distance(&start, &middle);
 | 
					 | 
				
			||||||
    let _dist_2 = distance(&middle, &end);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
-- [Compiler Explorer](https://godbolt.org/z/30Sh66)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Finally, passing by value (arguments with type
 | 
					 | 
				
			||||||
[`Copy`](https://doc.rust-lang.org/std/marker/trait.Copy.html)) and passing by reference (either
 | 
					 | 
				
			||||||
moving ownership or passing a pointer) may have slightly different layouts in assembly, but will
 | 
					 | 
				
			||||||
still use either stack memory or CPU registers:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
pub struct Point {
 | 
					 | 
				
			||||||
    x: i64,
 | 
					 | 
				
			||||||
    y: i64,
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// Moving values
 | 
					 | 
				
			||||||
pub fn distance_moved(a: Point, b: Point) -> i64 {
 | 
					 | 
				
			||||||
    let x1 = a.x;
 | 
					 | 
				
			||||||
    let x2 = b.x;
 | 
					 | 
				
			||||||
    let y1 = a.y;
 | 
					 | 
				
			||||||
    let y2 = b.y;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    let x_pow = (x1 - x2) * (x1 - x2);
 | 
					 | 
				
			||||||
    let y_pow = (y1 - y2) * (y1 - y2);
 | 
					 | 
				
			||||||
    let squared = x_pow + y_pow;
 | 
					 | 
				
			||||||
    squared / squared
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// Borrowing values has two extra `mov` instructions on lines 21 and 22
 | 
					 | 
				
			||||||
pub fn distance_borrowed(a: &Point, b: &Point) -> i64 {
 | 
					 | 
				
			||||||
    let x1 = a.x;
 | 
					 | 
				
			||||||
    let x2 = b.x;
 | 
					 | 
				
			||||||
    let y1 = a.y;
 | 
					 | 
				
			||||||
    let y2 = b.y;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    let x_pow = (x1 - x2) * (x1 - x2);
 | 
					 | 
				
			||||||
    let y_pow = (y1 - y2) * (y1 - y2);
 | 
					 | 
				
			||||||
    let squared = x_pow + y_pow;
 | 
					 | 
				
			||||||
    squared / squared
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
-- [Compiler Explorer](https://godbolt.org/z/06hGiv)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Enums
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
If you've ever worried that wrapping your types in
 | 
					 | 
				
			||||||
[`Option`](https://doc.rust-lang.org/stable/core/option/enum.Option.html) or
 | 
					 | 
				
			||||||
[`Result`](https://doc.rust-lang.org/stable/core/result/enum.Result.html) would finally make them
 | 
					 | 
				
			||||||
large enough that Rust decides to use heap allocation instead, fear no longer: `enum` and union
 | 
					 | 
				
			||||||
types don't use heap allocation:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
enum MyEnum {
 | 
					 | 
				
			||||||
    Small(u8),
 | 
					 | 
				
			||||||
    Large(u64)
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
struct MyStruct {
 | 
					 | 
				
			||||||
    x: MyEnum,
 | 
					 | 
				
			||||||
    y: MyEnum,
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
pub fn enum_compare() {
 | 
					 | 
				
			||||||
    let x = MyEnum::Small(0);
 | 
					 | 
				
			||||||
    let y = MyEnum::Large(0);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    let z = MyStruct { x, y };
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    let opt = Option::Some(z);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
-- [Compiler Explorer](https://godbolt.org/z/HK7zBx)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Because the size of an `enum` is the size of its largest element plus a flag, the compiler can
 | 
					 | 
				
			||||||
predict how much memory is used no matter which variant of an enum is currently stored in a
 | 
					 | 
				
			||||||
variable. Thus, enums and unions have no need of heap allocation. There's unfortunately not a great
 | 
					 | 
				
			||||||
way to show this in assembly, so I'll instead point you to the
 | 
					 | 
				
			||||||
[`core::mem::size_of`](https://doc.rust-lang.org/stable/core/mem/fn.size_of.html#size-of-enums)
 | 
					 | 
				
			||||||
documentation.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Arrays
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The array type is guaranteed to be stack allocated, which is why the array size must be declared.
 | 
					 | 
				
			||||||
Interestingly enough, this can be used to cause safe Rust programs to crash:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
// 256 bytes
 | 
					 | 
				
			||||||
#[derive(Default)]
 | 
					 | 
				
			||||||
struct TwoFiftySix {
 | 
					 | 
				
			||||||
    _a: [u64; 32]
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// 8 kilobytes
 | 
					 | 
				
			||||||
#[derive(Default)]
 | 
					 | 
				
			||||||
struct EightK {
 | 
					 | 
				
			||||||
    _a: [TwoFiftySix; 32]
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// 256 kilobytes
 | 
					 | 
				
			||||||
#[derive(Default)]
 | 
					 | 
				
			||||||
struct TwoFiftySixK {
 | 
					 | 
				
			||||||
    _a: [EightK; 32]
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// 8 megabytes - exceeds space typically provided for the stack,
 | 
					 | 
				
			||||||
// though the kernel can be instructed to allocate more.
 | 
					 | 
				
			||||||
// On Linux, you can check stack size using `ulimit -s`
 | 
					 | 
				
			||||||
#[derive(Default)]
 | 
					 | 
				
			||||||
struct EightM {
 | 
					 | 
				
			||||||
    _a: [TwoFiftySixK; 32]
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
fn main() {
 | 
					 | 
				
			||||||
    // Because we already have things in stack memory
 | 
					 | 
				
			||||||
    // (like the current function call stack), allocating another
 | 
					 | 
				
			||||||
    // eight megabytes of stack memory crashes the program
 | 
					 | 
				
			||||||
    let _x = EightM::default();
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
--
 | 
					 | 
				
			||||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=587a6380a4914bcbcef4192c90c01dc4)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
There aren't any security implications of this (no memory corruption occurs), but it's good to note
 | 
					 | 
				
			||||||
that the Rust compiler won't move arrays into heap memory even if they can be reasonably expected to
 | 
					 | 
				
			||||||
overflow the stack.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Closures
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Rules for how anonymous functions capture their arguments are typically language-specific. In Java,
 | 
					 | 
				
			||||||
[Lambda Expressions](https://docs.oracle.com/javase/tutorial/java/javaOO/lambdaexpressions.html) are
 | 
					 | 
				
			||||||
actually objects created on the heap that capture local primitives by copying, and capture local
 | 
					 | 
				
			||||||
non-primitives as (`final`) references.
 | 
					 | 
				
			||||||
[Python](https://docs.python.org/3.7/reference/expressions.html#lambda) and
 | 
					 | 
				
			||||||
[JavaScript](https://javascriptweblog.wordpress.com/2010/10/25/understanding-javascript-closures/)
 | 
					 | 
				
			||||||
both bind _everything_ by reference normally, but Python can also
 | 
					 | 
				
			||||||
[capture values](https://stackoverflow.com/a/235764/1454178) and JavaScript has
 | 
					 | 
				
			||||||
[Arrow functions](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Functions/Arrow_functions).
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
In Rust, arguments to closures are the same as arguments to other functions; closures are simply
 | 
					 | 
				
			||||||
functions that don't have a declared name. Some weird ordering of the stack may be required to
 | 
					 | 
				
			||||||
handle them, but it's the compiler's responsiblity to figure that out.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Each example below has the same effect, but a different assembly implementation. In the simplest
 | 
					 | 
				
			||||||
case, we immediately run a closure returned by another function. Because we don't store a reference
 | 
					 | 
				
			||||||
to the closure, the stack memory needed to store the captured values is contiguous:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
fn my_func() -> impl FnOnce() {
 | 
					 | 
				
			||||||
    let x = 24;
 | 
					 | 
				
			||||||
    // Note that this closure in assembly looks exactly like
 | 
					 | 
				
			||||||
    // any other function; you even use the `call` instruction
 | 
					 | 
				
			||||||
    // to start running it.
 | 
					 | 
				
			||||||
    move || { x; }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
pub fn immediate() {
 | 
					 | 
				
			||||||
    my_func()();
 | 
					 | 
				
			||||||
    my_func()();
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
-- [Compiler Explorer](https://godbolt.org/z/mgJ2zl), 25 total assembly instructions
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
If we store a reference to the closure, the Rust compiler keeps values it needs in the stack memory
 | 
					 | 
				
			||||||
of the original function. Getting the details right is a bit harder, so the instruction count goes
 | 
					 | 
				
			||||||
up even though this code is functionally equivalent to our original example:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
pub fn simple_reference() {
 | 
					 | 
				
			||||||
    let x = my_func();
 | 
					 | 
				
			||||||
    let y = my_func();
 | 
					 | 
				
			||||||
    y();
 | 
					 | 
				
			||||||
    x();
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
-- [Compiler Explorer](https://godbolt.org/z/K_dj5n), 55 total assembly instructions
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Even things like variable order can make a difference in instruction count:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
pub fn complex() {
 | 
					 | 
				
			||||||
    let x = my_func();
 | 
					 | 
				
			||||||
    let y = my_func();
 | 
					 | 
				
			||||||
    x();
 | 
					 | 
				
			||||||
    y();
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
-- [Compiler Explorer](https://godbolt.org/z/p37qFl), 70 total assembly instructions
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
In every circumstance though, the compiler ensured that no heap allocations were necessary.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Generics
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Traits in Rust come in two broad forms: static dispatch (monomorphization, `impl Trait`) and dynamic
 | 
					 | 
				
			||||||
dispatch (trait objects, `dyn Trait`). While dynamic dispatch is often _associated_ with trait
 | 
					 | 
				
			||||||
objects being stored in the heap, dynamic dispatch can be used with stack allocated objects as well:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
trait GetInt {
 | 
					 | 
				
			||||||
    fn get_int(&self) -> u64;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// vtable stored at section L__unnamed_1
 | 
					 | 
				
			||||||
struct WhyNotU8 {
 | 
					 | 
				
			||||||
    x: u8
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
impl GetInt for WhyNotU8 {
 | 
					 | 
				
			||||||
    fn get_int(&self) -> u64 {
 | 
					 | 
				
			||||||
        self.x as u64
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// vtable stored at section L__unnamed_2
 | 
					 | 
				
			||||||
struct ActualU64 {
 | 
					 | 
				
			||||||
    x: u64
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
impl GetInt for ActualU64 {
 | 
					 | 
				
			||||||
    fn get_int(&self) -> u64 {
 | 
					 | 
				
			||||||
        self.x
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// `&dyn` declares that we want to use dynamic dispatch
 | 
					 | 
				
			||||||
// rather than monomorphization, so there is only one
 | 
					 | 
				
			||||||
// `retrieve_int` function that shows up in the final assembly.
 | 
					 | 
				
			||||||
// If we used generics, there would be one implementation of
 | 
					 | 
				
			||||||
// `retrieve_int` for each type that implements `GetInt`.
 | 
					 | 
				
			||||||
pub fn retrieve_int(u: &dyn GetInt) {
 | 
					 | 
				
			||||||
    // In the assembly, we just call an address given to us
 | 
					 | 
				
			||||||
    // in the `rsi` register and hope that it was set up
 | 
					 | 
				
			||||||
    // correctly when this function was invoked.
 | 
					 | 
				
			||||||
    let x = u.get_int();
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
pub fn do_call() {
 | 
					 | 
				
			||||||
    // Note that even though the vtable for `WhyNotU8` and
 | 
					 | 
				
			||||||
    // `ActualU64` includes a pointer to
 | 
					 | 
				
			||||||
    // `core::ptr::real_drop_in_place`, it is never invoked.
 | 
					 | 
				
			||||||
    let a = WhyNotU8 { x: 0 };
 | 
					 | 
				
			||||||
    let b = ActualU64 { x: 0 };
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    retrieve_int(&a);
 | 
					 | 
				
			||||||
    retrieve_int(&b);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
-- [Compiler Explorer](https://godbolt.org/z/u_yguS)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
It's hard to imagine practical situations where dynamic dispatch would be used for objects that
 | 
					 | 
				
			||||||
aren't heap allocated, but it technically can be done.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Copy types
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Understanding move semantics and copy semantics in Rust is weird at first. The Rust docs
 | 
					 | 
				
			||||||
[go into detail](https://doc.rust-lang.org/stable/core/marker/trait.Copy.html) far better than can
 | 
					 | 
				
			||||||
be addressed here, so I'll leave them to do the job. From a memory perspective though, their
 | 
					 | 
				
			||||||
guideline is reasonable:
 | 
					 | 
				
			||||||
[if your type can implemement `Copy`, it should](https://doc.rust-lang.org/stable/core/marker/trait.Copy.html#when-should-my-type-be-copy).
 | 
					 | 
				
			||||||
While there are potential speed tradeoffs to _benchmark_ when discussing `Copy` (move semantics for
 | 
					 | 
				
			||||||
stack objects vs. copying stack pointers vs. copying stack `struct`s), _it's impossible for `Copy`
 | 
					 | 
				
			||||||
to introduce a heap allocation_.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
But why is this the case? Fundamentally, it's because the language controls what `Copy` means -
 | 
					 | 
				
			||||||
["the behavior of `Copy` is not overloadable"](https://doc.rust-lang.org/std/marker/trait.Copy.html#whats-the-difference-between-copy-and-clone)
 | 
					 | 
				
			||||||
because it's a marker trait. From there we'll note that a type
 | 
					 | 
				
			||||||
[can implement `Copy`](https://doc.rust-lang.org/std/marker/trait.Copy.html#when-can-my-type-be-copy)
 | 
					 | 
				
			||||||
if (and only if) its components implement `Copy`, and that
 | 
					 | 
				
			||||||
[no heap-allocated types implement `Copy`](https://doc.rust-lang.org/std/marker/trait.Copy.html#implementors).
 | 
					 | 
				
			||||||
Thus, assignments involving heap types are always move semantics, and new heap allocations won't
 | 
					 | 
				
			||||||
occur because of implicit operator behavior.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
#[derive(Clone)]
 | 
					 | 
				
			||||||
struct Cloneable {
 | 
					 | 
				
			||||||
    x: Box<u64>
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// error[E0204]: the trait `Copy` may not be implemented for this type
 | 
					 | 
				
			||||||
#[derive(Copy, Clone)]
 | 
					 | 
				
			||||||
struct NotCopyable {
 | 
					 | 
				
			||||||
    x: Box<u64>
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
-- [Compiler Explorer](https://godbolt.org/z/VToRuK)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Iterators
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
In managed memory languages (like
 | 
					 | 
				
			||||||
[Java](https://www.youtube.com/watch?v=bSkpMdDe4g4&feature=youtu.be&t=357)), there's a subtle
 | 
					 | 
				
			||||||
difference between these two code samples:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```java
 | 
					 | 
				
			||||||
public static int sum_for(List<Long> vals) {
 | 
					 | 
				
			||||||
    long sum = 0;
 | 
					 | 
				
			||||||
    // Regular for loop
 | 
					 | 
				
			||||||
    for (int i = 0; i < vals.length; i++) {
 | 
					 | 
				
			||||||
        sum += vals[i];
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    return sum;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
public static int sum_foreach(List<Long> vals) {
 | 
					 | 
				
			||||||
    long sum = 0;
 | 
					 | 
				
			||||||
    // "Foreach" loop - uses iteration
 | 
					 | 
				
			||||||
    for (Long l : vals) {
 | 
					 | 
				
			||||||
        sum += l;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    return sum;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
In the `sum_for` function, nothing terribly interesting happens. In `sum_foreach`, an object of type
 | 
					 | 
				
			||||||
[`Iterator`](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/Iterator.html)
 | 
					 | 
				
			||||||
is allocated on the heap, and will eventually be garbage-collected. This isn't a great design;
 | 
					 | 
				
			||||||
iterators are often transient objects that you need during a function and can discard once the
 | 
					 | 
				
			||||||
function ends. Sounds exactly like the issue stack-allocated objects address, no?
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
In Rust, iterators are allocated on the stack. The objects to iterate over are almost certainly in
 | 
					 | 
				
			||||||
heap memory, but the iterator itself
 | 
					 | 
				
			||||||
([`Iter`](https://doc.rust-lang.org/std/slice/struct.Iter.html)) doesn't need to use the heap. In
 | 
					 | 
				
			||||||
each of the examples below we iterate over a collection, but never use heap allocation:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
use std::collections::HashMap;
 | 
					 | 
				
			||||||
// There's a lot of assembly generated, but if you search in the text,
 | 
					 | 
				
			||||||
// there are no references to `real_drop_in_place` anywhere.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
pub fn sum_vec(x: &Vec<u32>) {
 | 
					 | 
				
			||||||
    let mut s = 0;
 | 
					 | 
				
			||||||
    // Basic iteration over vectors doesn't need allocation
 | 
					 | 
				
			||||||
    for y in x {
 | 
					 | 
				
			||||||
        s += y;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
pub fn sum_enumerate(x: &Vec<u32>) {
 | 
					 | 
				
			||||||
    let mut s = 0;
 | 
					 | 
				
			||||||
    // More complex iterators are just fine too
 | 
					 | 
				
			||||||
    for (_i, y) in x.iter().enumerate() {
 | 
					 | 
				
			||||||
        s += y;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
pub fn sum_hm(x: &HashMap<u32, u32>) {
 | 
					 | 
				
			||||||
    let mut s = 0;
 | 
					 | 
				
			||||||
    // And it's not just Vec, all types will allocate the iterator
 | 
					 | 
				
			||||||
    // on stack memory
 | 
					 | 
				
			||||||
    for y in x.values() {
 | 
					 | 
				
			||||||
        s += y;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
-- [Compiler Explorer](https://godbolt.org/z/FTT3CT)
 | 
					 | 
				
			||||||
@ -1,254 +0,0 @@
 | 
				
			|||||||
---
 | 
					 | 
				
			||||||
layout: post
 | 
					 | 
				
			||||||
title: "Dynamic Memory: A Heaping Helping"
 | 
					 | 
				
			||||||
description: "The reason Rust exists."
 | 
					 | 
				
			||||||
category:
 | 
					 | 
				
			||||||
tags: [rust, understanding-allocations]
 | 
					 | 
				
			||||||
---
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Managing dynamic memory is hard. Some languages assume users will do it themselves (C, C++), and
 | 
					 | 
				
			||||||
some languages go to extreme lengths to protect users from themselves (Java, Python). In Rust, how
 | 
					 | 
				
			||||||
the language uses dynamic memory (also referred to as the **heap**) is a system called _ownership_.
 | 
					 | 
				
			||||||
And as the docs mention, ownership
 | 
					 | 
				
			||||||
[is Rust's most unique feature](https://doc.rust-lang.org/book/ch04-00-understanding-ownership.html).
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The heap is used in two situations; when the compiler is unable to predict either the _total size of
 | 
					 | 
				
			||||||
memory needed_, or _how long the memory is needed for_, it allocates space in the heap. This happens
 | 
					 | 
				
			||||||
pretty frequently; if you want to download the Google home page, you won't know how large it is
 | 
					 | 
				
			||||||
until your program runs. And when you're finished with Google, we deallocate the memory so it can be
 | 
					 | 
				
			||||||
used to store other webpages. If you're interested in a slightly longer explanation of the heap,
 | 
					 | 
				
			||||||
check out
 | 
					 | 
				
			||||||
[The Stack and the Heap](https://doc.rust-lang.org/book/ch04-01-what-is-ownership.html#the-stack-and-the-heap)
 | 
					 | 
				
			||||||
in Rust's documentation.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
We won't go into detail on how the heap is managed; the
 | 
					 | 
				
			||||||
[ownership documentation](https://doc.rust-lang.org/book/ch04-01-what-is-ownership.html) does a
 | 
					 | 
				
			||||||
phenomenal job explaining both the "why" and "how" of memory management. Instead, we're going to
 | 
					 | 
				
			||||||
focus on understanding "when" heap allocations occur in Rust.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
To start off, take a guess for how many allocations happen in the program below:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
fn main() {}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
It's obviously a trick question; while no heap allocations occur as a result of that code, the setup
 | 
					 | 
				
			||||||
needed to call `main` does allocate on the heap. Here's a way to show it:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
#![feature(integer_atomics)]
 | 
					 | 
				
			||||||
use std::alloc::{GlobalAlloc, Layout, System};
 | 
					 | 
				
			||||||
use std::sync::atomic::{AtomicU64, Ordering};
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static ALLOCATION_COUNT: AtomicU64 = AtomicU64::new(0);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
struct CountingAllocator;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
unsafe impl GlobalAlloc for CountingAllocator {
 | 
					 | 
				
			||||||
    unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
 | 
					 | 
				
			||||||
        ALLOCATION_COUNT.fetch_add(1, Ordering::SeqCst);
 | 
					 | 
				
			||||||
        System.alloc(layout)
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
 | 
					 | 
				
			||||||
        System.dealloc(ptr, layout);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#[global_allocator]
 | 
					 | 
				
			||||||
static A: CountingAllocator = CountingAllocator;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
fn main() {
 | 
					 | 
				
			||||||
    let x = ALLOCATION_COUNT.fetch_add(0, Ordering::SeqCst);
 | 
					 | 
				
			||||||
    println!("There were {} allocations before calling main!", x);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
--
 | 
					 | 
				
			||||||
[Rust Playground](https://play.rust-lang.org/?version=nightly&mode=debug&edition=2018&gist=fb5060025ba79fc0f906b65a4ef8eb8e)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
As of the time of writing, there are five allocations that happen before `main` is ever called.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
But when we want to understand more practically where heap allocation happens, we'll follow this
 | 
					 | 
				
			||||||
guide:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
- Smart pointers hold their contents in the heap
 | 
					 | 
				
			||||||
- Collections are smart pointers for many objects at a time, and reallocate when they need to grow
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Finally, there are two "addendum" issues that are important to address when discussing Rust and the
 | 
					 | 
				
			||||||
heap:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
- Non-heap alternatives to many standard library types are available.
 | 
					 | 
				
			||||||
- Special allocators to track memory behavior should be used to benchmark code.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Smart pointers
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The first thing to note are the "smart pointer" types. When you have data that must outlive the
 | 
					 | 
				
			||||||
scope in which it is declared, or your data is of unknown or dynamic size, you'll make use of these
 | 
					 | 
				
			||||||
types.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The term [smart pointer](https://en.wikipedia.org/wiki/Smart_pointer) comes from C++, and while it's
 | 
					 | 
				
			||||||
closely linked to a general design pattern of
 | 
					 | 
				
			||||||
["Resource Acquisition Is Initialization"](https://en.cppreference.com/w/cpp/language/raii), we'll
 | 
					 | 
				
			||||||
use it here specifically to describe objects that are responsible for managing ownership of data
 | 
					 | 
				
			||||||
allocated on the heap. The smart pointers available in the `alloc` crate should look mostly
 | 
					 | 
				
			||||||
familiar:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
- [`Box`](https://doc.rust-lang.org/alloc/boxed/struct.Box.html)
 | 
					 | 
				
			||||||
- [`Rc`](https://doc.rust-lang.org/alloc/rc/struct.Rc.html)
 | 
					 | 
				
			||||||
- [`Arc`](https://doc.rust-lang.org/alloc/sync/struct.Arc.html)
 | 
					 | 
				
			||||||
- [`Cow`](https://doc.rust-lang.org/alloc/borrow/enum.Cow.html)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The [standard library](https://doc.rust-lang.org/std/) also defines some smart pointers to manage
 | 
					 | 
				
			||||||
heap objects, though more than can be covered here. Some examples are:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
- [`RwLock`](https://doc.rust-lang.org/std/sync/struct.RwLock.html)
 | 
					 | 
				
			||||||
- [`Mutex`](https://doc.rust-lang.org/std/sync/struct.Mutex.html)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Finally, there is one ["gotcha"](https://www.merriam-webster.com/dictionary/gotcha): **cell types**
 | 
					 | 
				
			||||||
(like [`RefCell`](https://doc.rust-lang.org/stable/core/cell/struct.RefCell.html)) look and behave
 | 
					 | 
				
			||||||
similarly, but **don't involve heap allocation**. The
 | 
					 | 
				
			||||||
[`core::cell` docs](https://doc.rust-lang.org/stable/core/cell/index.html) have more information.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
When a smart pointer is created, the data it is given is placed in heap memory and the location of
 | 
					 | 
				
			||||||
that data is recorded in the smart pointer. Once the smart pointer has determined it's safe to
 | 
					 | 
				
			||||||
deallocate that memory (when a `Box` has
 | 
					 | 
				
			||||||
[gone out of scope](https://doc.rust-lang.org/stable/std/boxed/index.html) or a reference count
 | 
					 | 
				
			||||||
[goes to zero](https://doc.rust-lang.org/alloc/rc/index.html)), the heap space is reclaimed. We can
 | 
					 | 
				
			||||||
prove these types use heap memory by looking at code:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
use std::rc::Rc;
 | 
					 | 
				
			||||||
use std::sync::Arc;
 | 
					 | 
				
			||||||
use std::borrow::Cow;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
pub fn my_box() {
 | 
					 | 
				
			||||||
    // Drop at assembly line 1640
 | 
					 | 
				
			||||||
    Box::new(0);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
pub fn my_rc() {
 | 
					 | 
				
			||||||
    // Drop at assembly line 1650
 | 
					 | 
				
			||||||
    Rc::new(0);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
pub fn my_arc() {
 | 
					 | 
				
			||||||
    // Drop at assembly line 1660
 | 
					 | 
				
			||||||
    Arc::new(0);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
pub fn my_cow() {
 | 
					 | 
				
			||||||
    // Drop at assembly line 1672
 | 
					 | 
				
			||||||
    Cow::from("drop");
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
-- [Compiler Explorer](https://godbolt.org/z/4AMQug)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Collections
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Collection types use heap memory because their contents have dynamic size; they will request more
 | 
					 | 
				
			||||||
memory [when needed](https://doc.rust-lang.org/std/vec/struct.Vec.html#method.reserve), and can
 | 
					 | 
				
			||||||
[release memory](https://doc.rust-lang.org/std/vec/struct.Vec.html#method.shrink_to_fit) when it's
 | 
					 | 
				
			||||||
no longer necessary. This dynamic property forces Rust to heap allocate everything they contain. In
 | 
					 | 
				
			||||||
a way, **collections are smart pointers for many objects at a time**. Common types that fall under
 | 
					 | 
				
			||||||
this umbrella are [`Vec`](https://doc.rust-lang.org/stable/alloc/vec/struct.Vec.html),
 | 
					 | 
				
			||||||
[`HashMap`](https://doc.rust-lang.org/stable/std/collections/struct.HashMap.html), and
 | 
					 | 
				
			||||||
[`String`](https://doc.rust-lang.org/stable/alloc/string/struct.String.html) (not
 | 
					 | 
				
			||||||
[`str`](https://doc.rust-lang.org/std/primitive.str.html)).
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
While collections store the objects they own in heap memory, _creating new collections will not
 | 
					 | 
				
			||||||
allocate on the heap_. This is a bit weird; if we call `Vec::new()`, the assembly shows a
 | 
					 | 
				
			||||||
corresponding call to `real_drop_in_place`:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
pub fn my_vec() {
 | 
					 | 
				
			||||||
    // Drop in place at line 481
 | 
					 | 
				
			||||||
    Vec::<u8>::new();
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
-- [Compiler Explorer](https://godbolt.org/z/1WkNtC)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
But because the vector has no elements to manage, no calls to the allocator will ever be dispatched:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
use std::alloc::{GlobalAlloc, Layout, System};
 | 
					 | 
				
			||||||
use std::sync::atomic::{AtomicBool, Ordering};
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
fn main() {
 | 
					 | 
				
			||||||
    // Turn on panicking if we allocate on the heap
 | 
					 | 
				
			||||||
    DO_PANIC.store(true, Ordering::SeqCst);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // Interesting bit happens here
 | 
					 | 
				
			||||||
    let x: Vec<u8> = Vec::new();
 | 
					 | 
				
			||||||
    drop(x);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // Turn panicking back off, some deallocations occur
 | 
					 | 
				
			||||||
    // after main as well.
 | 
					 | 
				
			||||||
    DO_PANIC.store(false, Ordering::SeqCst);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#[global_allocator]
 | 
					 | 
				
			||||||
static A: PanicAllocator = PanicAllocator;
 | 
					 | 
				
			||||||
static DO_PANIC: AtomicBool = AtomicBool::new(false);
 | 
					 | 
				
			||||||
struct PanicAllocator;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
unsafe impl GlobalAlloc for PanicAllocator {
 | 
					 | 
				
			||||||
    unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
 | 
					 | 
				
			||||||
        if DO_PANIC.load(Ordering::SeqCst) {
 | 
					 | 
				
			||||||
            panic!("Unexpected allocation.");
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        System.alloc(layout)
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
 | 
					 | 
				
			||||||
        if DO_PANIC.load(Ordering::SeqCst) {
 | 
					 | 
				
			||||||
            panic!("Unexpected deallocation.");
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        System.dealloc(ptr, layout);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
--
 | 
					 | 
				
			||||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=831a297d176d015b1f9ace01ae416cc6)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Other standard library types follow the same behavior; make sure to check out
 | 
					 | 
				
			||||||
[`HashMap::new()`](https://doc.rust-lang.org/std/collections/hash_map/struct.HashMap.html#method.new),
 | 
					 | 
				
			||||||
and [`String::new()`](https://doc.rust-lang.org/std/string/struct.String.html#method.new).
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Heap Alternatives
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
While it is a bit strange to speak of the stack after spending time with the heap, it's worth
 | 
					 | 
				
			||||||
pointing out that some heap-allocated objects in Rust have stack-based counterparts provided by
 | 
					 | 
				
			||||||
other crates. If you have need of the functionality, but want to avoid allocating, there are
 | 
					 | 
				
			||||||
typically alternatives available.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
When it comes to some standard library smart pointers
 | 
					 | 
				
			||||||
([`RwLock`](https://doc.rust-lang.org/std/sync/struct.RwLock.html) and
 | 
					 | 
				
			||||||
[`Mutex`](https://doc.rust-lang.org/std/sync/struct.Mutex.html)), stack-based alternatives are
 | 
					 | 
				
			||||||
provided in crates like [parking_lot](https://crates.io/crates/parking_lot) and
 | 
					 | 
				
			||||||
[spin](https://crates.io/crates/spin). You can check out
 | 
					 | 
				
			||||||
[`lock_api::RwLock`](https://docs.rs/lock_api/0.1.5/lock_api/struct.RwLock.html),
 | 
					 | 
				
			||||||
[`lock_api::Mutex`](https://docs.rs/lock_api/0.1.5/lock_api/struct.Mutex.html), and
 | 
					 | 
				
			||||||
[`spin::Once`](https://mvdnes.github.io/rust-docs/spin-rs/spin/struct.Once.html) if you're in need
 | 
					 | 
				
			||||||
of synchronization primitives.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
[thread_id](https://crates.io/crates/thread-id) may be necessary if you're implementing an allocator
 | 
					 | 
				
			||||||
because [`thread::current().id()`](https://doc.rust-lang.org/std/thread/struct.ThreadId.html) uses a
 | 
					 | 
				
			||||||
[`thread_local!` structure](https://doc.rust-lang.org/stable/src/std/sys_common/thread_info.rs.html#17-36)
 | 
					 | 
				
			||||||
that needs heap allocation.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Tracing Allocators
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
When writing performance-sensitive code, there's no alternative to measuring your code. If you
 | 
					 | 
				
			||||||
didn't write a benchmark,
 | 
					 | 
				
			||||||
[you don't care about it's performance](https://www.youtube.com/watch?v=2EWejmkKlxs&feature=youtu.be&t=263)
 | 
					 | 
				
			||||||
You should never rely on your instincts when
 | 
					 | 
				
			||||||
[a microsecond is an eternity](https://www.youtube.com/watch?v=NH1Tta7purM).
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Similarly, there's great work going on in Rust with allocators that keep track of what they're doing
 | 
					 | 
				
			||||||
(like [`alloc_counter`](https://crates.io/crates/alloc_counter)). When it comes to tracking heap
 | 
					 | 
				
			||||||
behavior, it's easy to make mistakes; please write tests and make sure you have tools to guard
 | 
					 | 
				
			||||||
against future issues.
 | 
					 | 
				
			||||||
@ -1,148 +0,0 @@
 | 
				
			|||||||
---
 | 
					 | 
				
			||||||
layout: post
 | 
					 | 
				
			||||||
title: "Compiler Optimizations: What It's Done Lately"
 | 
					 | 
				
			||||||
description: "A lot. The answer is a lot."
 | 
					 | 
				
			||||||
category:
 | 
					 | 
				
			||||||
tags: [rust, understanding-allocations]
 | 
					 | 
				
			||||||
---
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
**Update 2019-02-10**: When debugging a
 | 
					 | 
				
			||||||
[related issue](https://gitlab.com/sio4/code/alloc-counter/issues/1), it was discovered that the
 | 
					 | 
				
			||||||
original code worked because LLVM optimized out the entire function, rather than just the allocation
 | 
					 | 
				
			||||||
segments. The code has been updated with proper use of
 | 
					 | 
				
			||||||
[`read_volatile`](https://doc.rust-lang.org/std/ptr/fn.read_volatile.html), and a previous section
 | 
					 | 
				
			||||||
on vector capacity has been removed.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
---
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Up to this point, we've been discussing memory usage in the Rust language by focusing on simple
 | 
					 | 
				
			||||||
rules that are mostly right for small chunks of code. We've spent time showing how those rules work
 | 
					 | 
				
			||||||
themselves out in practice, and become familiar with reading the assembly code needed to see each
 | 
					 | 
				
			||||||
memory type (global, stack, heap) in action.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Throughout the series so far, we've put a handicap on the code. In the name of consistent and
 | 
					 | 
				
			||||||
understandable results, we've asked the compiler to pretty please leave the training wheels on. Now
 | 
					 | 
				
			||||||
is the time where we throw out all the rules and take off the kid gloves. As it turns out, both the
 | 
					 | 
				
			||||||
Rust compiler and the LLVM optimizers are incredibly sophisticated, and we'll step back and let them
 | 
					 | 
				
			||||||
do their job.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Similar to
 | 
					 | 
				
			||||||
["What Has My Compiler Done For Me Lately?"](https://www.youtube.com/watch?v=bSkpMdDe4g4), we're
 | 
					 | 
				
			||||||
focusing on interesting things the Rust language (and LLVM!) can do with memory management. We'll
 | 
					 | 
				
			||||||
still be looking at assembly code to understand what's going on, but it's important to mention
 | 
					 | 
				
			||||||
again: **please use automated tools like [alloc-counter](https://crates.io/crates/alloc_counter) to
 | 
					 | 
				
			||||||
double-check memory behavior if it's something you care about**. It's far too easy to mis-read
 | 
					 | 
				
			||||||
assembly in large code sections, you should always verify behavior if you care about memory usage.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The guiding principal as we move forward is this: _optimizing compilers won't produce worse programs
 | 
					 | 
				
			||||||
than we started with._ There won't be any situations where stack allocations get moved to heap
 | 
					 | 
				
			||||||
allocations. There will, however, be an opera of optimization.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# The Case of the Disappearing Box
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Our first optimization comes when LLVM can reason that the lifetime of an object is sufficiently
 | 
					 | 
				
			||||||
short that heap allocations aren't necessary. In these cases, LLVM will move the allocation to the
 | 
					 | 
				
			||||||
stack instead! The way this interacts with `#[inline]` attributes is a bit opaque, but the important
 | 
					 | 
				
			||||||
part is that LLVM can sometimes do better than the baseline Rust language:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
use std::alloc::{GlobalAlloc, Layout, System};
 | 
					 | 
				
			||||||
use std::sync::atomic::{AtomicBool, Ordering};
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
pub fn cmp(x: u32) {
 | 
					 | 
				
			||||||
    // Turn on panicking if we allocate on the heap
 | 
					 | 
				
			||||||
    DO_PANIC.store(true, Ordering::SeqCst);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // The compiler is able to see through the constant `Box`
 | 
					 | 
				
			||||||
    // and directly compare `x` to 24 - assembly line 73
 | 
					 | 
				
			||||||
    let y = Box::new(24);
 | 
					 | 
				
			||||||
    let equals = x == *y;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // This call to drop is eliminated
 | 
					 | 
				
			||||||
    drop(y);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // Need to mark the comparison result as volatile so that
 | 
					 | 
				
			||||||
    // LLVM doesn't strip out all the code. If `y` is marked
 | 
					 | 
				
			||||||
    // volatile instead, allocation will be forced.
 | 
					 | 
				
			||||||
    unsafe { std::ptr::read_volatile(&equals) };
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // Turn off panicking, as there are some deallocations
 | 
					 | 
				
			||||||
    // when we exit main.
 | 
					 | 
				
			||||||
    DO_PANIC.store(false, Ordering::SeqCst);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
fn main() {
 | 
					 | 
				
			||||||
    cmp(12)
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#[global_allocator]
 | 
					 | 
				
			||||||
static A: PanicAllocator = PanicAllocator;
 | 
					 | 
				
			||||||
static DO_PANIC: AtomicBool = AtomicBool::new(false);
 | 
					 | 
				
			||||||
struct PanicAllocator;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
unsafe impl GlobalAlloc for PanicAllocator {
 | 
					 | 
				
			||||||
    unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
 | 
					 | 
				
			||||||
        if DO_PANIC.load(Ordering::SeqCst) {
 | 
					 | 
				
			||||||
            panic!("Unexpected allocation.");
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        System.alloc(layout)
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
 | 
					 | 
				
			||||||
        if DO_PANIC.load(Ordering::SeqCst) {
 | 
					 | 
				
			||||||
            panic!("Unexpected deallocation.");
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        System.dealloc(ptr, layout);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## -- [Compiler Explorer](https://godbolt.org/z/BZ_Yp3)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=release&edition=2018&gist=4a765f753183d5b919f62c71d2109d5d)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Dr. Array or: How I Learned to Love the Optimizer
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Finally, this isn't so much about LLVM figuring out different memory behavior, but LLVM stripping
 | 
					 | 
				
			||||||
out code that doesn't do anything. Optimizations of this type have a lot of nuance to them; if
 | 
					 | 
				
			||||||
you're not careful, they can make your benchmarks look
 | 
					 | 
				
			||||||
[impossibly good](https://www.youtube.com/watch?v=nXaxk27zwlk&feature=youtu.be&t=1199). In Rust, the
 | 
					 | 
				
			||||||
`black_box` function (implemented in both
 | 
					 | 
				
			||||||
[`libtest`](https://doc.rust-lang.org/1.1.0/test/fn.black_box.html) and
 | 
					 | 
				
			||||||
[`criterion`](https://docs.rs/criterion/0.2.10/criterion/fn.black_box.html)) will tell the compiler
 | 
					 | 
				
			||||||
to disable this kind of optimization. But if you let LLVM remove unnecessary code, you can end up
 | 
					 | 
				
			||||||
running programs that previously caused errors:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
#[derive(Default)]
 | 
					 | 
				
			||||||
struct TwoFiftySix {
 | 
					 | 
				
			||||||
    _a: [u64; 32]
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#[derive(Default)]
 | 
					 | 
				
			||||||
struct EightK {
 | 
					 | 
				
			||||||
    _a: [TwoFiftySix; 32]
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#[derive(Default)]
 | 
					 | 
				
			||||||
struct TwoFiftySixK {
 | 
					 | 
				
			||||||
    _a: [EightK; 32]
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#[derive(Default)]
 | 
					 | 
				
			||||||
struct EightM {
 | 
					 | 
				
			||||||
    _a: [TwoFiftySixK; 32]
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
pub fn main() {
 | 
					 | 
				
			||||||
    // Normally this blows up because we can't reserve size on stack
 | 
					 | 
				
			||||||
    // for the `EightM` struct. But because the compiler notices we
 | 
					 | 
				
			||||||
    // never do anything with `_x`, it optimizes out the stack storage
 | 
					 | 
				
			||||||
    // and the program completes successfully.
 | 
					 | 
				
			||||||
    let _x = EightM::default();
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## -- [Compiler Explorer](https://godbolt.org/z/daHn7P)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=release&edition=2018&gist=4c253bf26072119896ab93c6ef064dc0)
 | 
					 | 
				
			||||||
@ -1,35 +0,0 @@
 | 
				
			|||||||
---
 | 
					 | 
				
			||||||
layout: post
 | 
					 | 
				
			||||||
title: "Summary: What are the Allocation Rules?"
 | 
					 | 
				
			||||||
description: "A synopsis and reference."
 | 
					 | 
				
			||||||
category:
 | 
					 | 
				
			||||||
tags: [rust, understanding-allocations]
 | 
					 | 
				
			||||||
---
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
While there's a lot of interesting detail captured in this series, it's often helpful to have a
 | 
					 | 
				
			||||||
document that answers some "yes/no" questions. You may not care about what an `Iterator` looks like
 | 
					 | 
				
			||||||
in assembly, you just need to know whether it allocates an object on the heap or not. And while Rust
 | 
					 | 
				
			||||||
will prioritize the fastest behavior it can, here are the rules for each memory type:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
**Heap Allocation**:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
- Smart pointers (`Box`, `Rc`, `Mutex`, etc.) allocate their contents in heap memory.
 | 
					 | 
				
			||||||
- Collections (`HashMap`, `Vec`, `String`, etc.) allocate their contents in heap memory.
 | 
					 | 
				
			||||||
- Some smart pointers in the standard library have counterparts in other crates that don't need heap
 | 
					 | 
				
			||||||
  memory. If possible, use those.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
**Stack Allocation**:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
- Everything not using a smart pointer will be allocated on the stack.
 | 
					 | 
				
			||||||
- Structs, enums, iterators, arrays, and closures are all stack allocated.
 | 
					 | 
				
			||||||
- Cell types (`RefCell`) behave like smart pointers, but are stack-allocated.
 | 
					 | 
				
			||||||
- Inlining (`#[inline]`) will not affect allocation behavior for better or worse.
 | 
					 | 
				
			||||||
- Types that are marked `Copy` are guaranteed to have their contents stack-allocated.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
**Global Allocation**:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
- `const` is a fixed value; the compiler is allowed to copy it wherever useful.
 | 
					 | 
				
			||||||
- `static` is a fixed reference; the compiler will guarantee it is unique.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 --
 | 
					 | 
				
			||||||
[Raph Levien](https://docs.google.com/presentation/d/1q-c7UAyrUlM-eZyTo1pd8SZ0qwA_wYxmPZVOQkoDmH4/edit?usp=sharing)
 | 
					 | 
				
			||||||
@ -1,52 +0,0 @@
 | 
				
			|||||||
---
 | 
					 | 
				
			||||||
layout: post
 | 
					 | 
				
			||||||
title: "Making Bread"
 | 
					 | 
				
			||||||
description: "...because I've got some free time now. 🍞"
 | 
					 | 
				
			||||||
category:
 | 
					 | 
				
			||||||
tags: [baking]
 | 
					 | 
				
			||||||
---
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Having recently started my "gardening leave" between positions, I have some more personal time
 | 
					 | 
				
			||||||
available. I'm planning to stay productive, contributing to some open-source projects, but it also
 | 
					 | 
				
			||||||
occurred to me that despite [talking about](https://speice.io/2018/05/hello.html) bread pics, this
 | 
					 | 
				
			||||||
blog has been purely technical. Maybe I'll change the site title from "The Old Speice Guy" to "Bites
 | 
					 | 
				
			||||||
and Bytes"?
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Either way, I'm baking a little bit again, and figured it was worth taking a quick break to focus on
 | 
					 | 
				
			||||||
some lighter material. I recently learned two critically important lessons: first, the temperature
 | 
					 | 
				
			||||||
of the dough when you put the yeast in makes a huge difference.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Previously, when I wasn't paying attention to dough temperature:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||

 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Compared with what happens when I put the dough in the microwave for a defrost cycle because the
 | 
					 | 
				
			||||||
water I used wasn't warm enough:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||

 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
I mean, just look at the bubbles!
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||

 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
After shaping the dough, I've got two loaves ready:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||

 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Now, the recipe normally calls for a Dutch Oven to bake the bread because it keeps the dough from
 | 
					 | 
				
			||||||
drying out in the oven. Because I don't own a Dutch Oven, I typically put a casserole dish on the
 | 
					 | 
				
			||||||
bottom rack and fill it with water so there's still some moisture in the oven. This time, I forgot
 | 
					 | 
				
			||||||
to add the water and learned my second lesson: never add room-temperature water to a glass dish
 | 
					 | 
				
			||||||
that's currently at 500 degrees.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||

 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Needless to say, trying to pull out sharp glass from an incredibly hot oven is not what I expected
 | 
					 | 
				
			||||||
to be doing during my garden leave.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
In the end, the bread crust wasn't great, but the bread itself turned out pretty alright:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||

 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
I've been writing a lot more during this break, so I'm looking forward to sharing that in the
 | 
					 | 
				
			||||||
future. In the mean-time, I'm planning on making a sandwich.
 | 
					 | 
				
			||||||
@ -1,296 +0,0 @@
 | 
				
			|||||||
---
 | 
					 | 
				
			||||||
layout: post
 | 
					 | 
				
			||||||
title: "On Building High Performance Systems"
 | 
					 | 
				
			||||||
description: ""
 | 
					 | 
				
			||||||
category:
 | 
					 | 
				
			||||||
tags: []
 | 
					 | 
				
			||||||
---
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
**Update 2019-09-21**: Added notes on `isolcpus` and `systemd` affinity.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Prior to working in the trading industry, my assumption was that High Frequency Trading (HFT) is
 | 
					 | 
				
			||||||
made up of people who have access to secret techniques mortal developers could only dream of. There
 | 
					 | 
				
			||||||
had to be some secret art that could only be learned if one had an appropriately tragic backstory:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
<img src="/assets/images/2019-04-24-kung-fu.webp" alt="kung-fu fight">
 | 
					 | 
				
			||||||
> How I assumed HFT people learn their secret techniques
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
How else do you explain people working on systems that complete the round trip of market data in to
 | 
					 | 
				
			||||||
orders out (a.k.a. tick-to-trade) consistently within
 | 
					 | 
				
			||||||
[750-800 nanoseconds](https://stackoverflow.com/a/22082528/1454178)? In roughly the time it takes a
 | 
					 | 
				
			||||||
computer to access
 | 
					 | 
				
			||||||
[main memory 8 times](https://people.eecs.berkeley.edu/~rcs/research/interactive_latency.html),
 | 
					 | 
				
			||||||
trading systems are capable of reading the market data packets, deciding what orders to send, doing
 | 
					 | 
				
			||||||
risk checks, creating new packets for exchange-specific protocols, and putting those packets on the
 | 
					 | 
				
			||||||
wire.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Having now worked in the trading industry, I can confirm the developers aren't super-human; I've
 | 
					 | 
				
			||||||
made some simple mistakes at the very least. Instead, what shows up in public discussions is that
 | 
					 | 
				
			||||||
philosophy, not technique, separates high-performance systems from everything else.
 | 
					 | 
				
			||||||
Performance-critical systems don't rely on "this one cool C++ optimization trick" to make code fast
 | 
					 | 
				
			||||||
(though micro-optimizations have their place); there's a lot more to worry about than just the code
 | 
					 | 
				
			||||||
written for the project.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The framework I'd propose is this: **If you want to build high-performance systems, focus first on
 | 
					 | 
				
			||||||
reducing performance variance** (reducing the gap between the fastest and slowest runs of the same
 | 
					 | 
				
			||||||
code), **and only look at average latency once variance is at an acceptable level**.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Don't get me wrong, I'm a much happier person when things are fast. Computer goes from booting in 20
 | 
					 | 
				
			||||||
seconds down to 10 because I installed a solid-state drive? Awesome. But if every fifth day it takes
 | 
					 | 
				
			||||||
a full minute to boot because of corrupted sectors? Not so great. Average speed over the course of a
 | 
					 | 
				
			||||||
week is the same in each situation, but you're painfully aware of that minute when it happens. When
 | 
					 | 
				
			||||||
it comes to code, the principal is the same: speeding up a function by an average of 10 milliseconds
 | 
					 | 
				
			||||||
doesn't mean much if there's a 100ms difference between your fastest and slowest runs. When
 | 
					 | 
				
			||||||
performance matters, you need to respond quickly _every time_, not just in aggregate.
 | 
					 | 
				
			||||||
High-performance systems should first optimize for time variance. Once you're consistent at the time
 | 
					 | 
				
			||||||
scale you care about, then focus on improving average time.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
This focus on variance shows up all the time in industry too (emphasis added in all quotes below):
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
- In [marketing materials](https://business.nasdaq.com/market-tech/marketplaces/trading) for
 | 
					 | 
				
			||||||
  NASDAQ's matching engine, the most performance-sensitive component of the exchange, dependability
 | 
					 | 
				
			||||||
  is highlighted in addition to instantaneous metrics:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  > Able to **consistently sustain** an order rate of over 100,000 orders per second at sub-40
 | 
					 | 
				
			||||||
  > microsecond average latency
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
- The [Aeron](https://github.com/real-logic/aeron) message bus has this to say about performance:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  > Performance is the key focus. Aeron is designed to be the highest throughput with the lowest and
 | 
					 | 
				
			||||||
  > **most predictable latency possible** of any messaging system
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
- The company PolySync, which is working on autonomous vehicles,
 | 
					 | 
				
			||||||
  [mentions why](https://polysync.io/blog/session-types-for-hearty-codecs/) they picked their
 | 
					 | 
				
			||||||
  specific messaging format:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  > In general, high performance is almost always desirable for serialization. But in the world of
 | 
					 | 
				
			||||||
  > autonomous vehicles, **steady timing performance is even more important** than peak throughput.
 | 
					 | 
				
			||||||
  > This is because safe operation is sensitive to timing outliers. Nobody wants the system that
 | 
					 | 
				
			||||||
  > decides when to slam on the brakes to occasionally take 100 times longer than usual to encode
 | 
					 | 
				
			||||||
  > its commands.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
- [Solarflare](https://solarflare.com/), which makes highly-specialized network hardware, points out
 | 
					 | 
				
			||||||
  variance (jitter) as a big concern for
 | 
					 | 
				
			||||||
  [electronic trading](https://solarflare.com/electronic-trading/):
 | 
					 | 
				
			||||||
  > The high stakes world of electronic trading, investment banks, market makers, hedge funds and
 | 
					 | 
				
			||||||
  > exchanges demand the **lowest possible latency and jitter** while utilizing the highest
 | 
					 | 
				
			||||||
  > bandwidth and return on their investment.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
And to further clarify: we're not discussing _total run-time_, but variance of total run-time. There
 | 
					 | 
				
			||||||
are situations where it's not reasonably possible to make things faster, and you'd much rather be
 | 
					 | 
				
			||||||
consistent. For example, trading firms use
 | 
					 | 
				
			||||||
[wireless networks](https://sniperinmahwah.wordpress.com/2017/06/07/network-effects-part-i/) because
 | 
					 | 
				
			||||||
the speed of light through air is faster than through fiber-optic cables. There's still at _absolute
 | 
					 | 
				
			||||||
minimum_ a [~33.76 millisecond](http://tinyurl.com/y2vd7tn8) delay required to send data between,
 | 
					 | 
				
			||||||
say,
 | 
					 | 
				
			||||||
[Chicago and Tokyo](https://www.theice.com/market-data/connectivity-and-feeds/wireless/tokyo-chicago).
 | 
					 | 
				
			||||||
If a trading system in Chicago calls the function for "send order to Tokyo" and waits to see if a
 | 
					 | 
				
			||||||
trade occurs, there's a physical limit to how long that will take. In this situation, the focus is
 | 
					 | 
				
			||||||
on keeping variance of _additional processing_ to a minimum, since speed of light is the limiting
 | 
					 | 
				
			||||||
factor.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
So how does one go about looking for and eliminating performance variance? To tell the truth, I
 | 
					 | 
				
			||||||
don't think a systematic answer or flow-chart exists. There's no substitute for (A) building a deep
 | 
					 | 
				
			||||||
understanding of the entire technology stack, and (B) actually measuring system performance (though
 | 
					 | 
				
			||||||
(C) watching a lot of [CppCon](https://www.youtube.com/channel/UCMlGfpWw-RUdWX_JbLCukXg) videos for
 | 
					 | 
				
			||||||
inspiration never hurt). Even then, every project cares about performance to a different degree; you
 | 
					 | 
				
			||||||
may need to build an entire
 | 
					 | 
				
			||||||
[replica production system](https://www.youtube.com/watch?v=NH1Tta7purM&feature=youtu.be&t=3015) to
 | 
					 | 
				
			||||||
accurately benchmark at nanosecond precision, or you may be content to simply
 | 
					 | 
				
			||||||
[avoid garbage collection](https://www.youtube.com/watch?v=BD9cRbxWQx8&feature=youtu.be&t=1335) in
 | 
					 | 
				
			||||||
your Java code.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Even though everyone has different needs, there are still common things to look for when trying to
 | 
					 | 
				
			||||||
isolate and eliminate variance. In no particular order, these are my focus areas when thinking about
 | 
					 | 
				
			||||||
high-performance systems:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Language-specific
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
**Garbage Collection**: How often does garbage collection happen? When is it triggered? What are the
 | 
					 | 
				
			||||||
impacts?
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
- [In Python](https://rushter.com/blog/python-garbage-collector/), individual objects are collected
 | 
					 | 
				
			||||||
  if the reference count reaches 0, and each generation is collected if
 | 
					 | 
				
			||||||
  `num_alloc - num_dealloc > gc_threshold` whenever an allocation happens. The GIL is acquired for
 | 
					 | 
				
			||||||
  the duration of generational collection.
 | 
					 | 
				
			||||||
- Java has
 | 
					 | 
				
			||||||
  [many](https://docs.oracle.com/en/java/javase/12/gctuning/parallel-collector1.html#GUID-DCDD6E46-0406-41D1-AB49-FB96A50EB9CE)
 | 
					 | 
				
			||||||
  [different](https://docs.oracle.com/en/java/javase/12/gctuning/garbage-first-garbage-collector.html#GUID-ED3AB6D3-FD9B-4447-9EDF-983ED2F7A573)
 | 
					 | 
				
			||||||
  [collection](https://docs.oracle.com/en/java/javase/12/gctuning/garbage-first-garbage-collector-tuning.html#GUID-90E30ACA-8040-432E-B3A0-1E0440AB556A)
 | 
					 | 
				
			||||||
  [algorithms](https://docs.oracle.com/en/java/javase/12/gctuning/z-garbage-collector1.html#GUID-A5A42691-095E-47BA-B6DC-FB4E5FAA43D0)
 | 
					 | 
				
			||||||
  to choose from, each with different characteristics. The default algorithms (Parallel GC in Java
 | 
					 | 
				
			||||||
  8, G1 in Java 9) freeze the JVM while collecting, while more recent algorithms
 | 
					 | 
				
			||||||
  ([ZGC](https://wiki.openjdk.java.net/display/zgc) and
 | 
					 | 
				
			||||||
  [Shenandoah](https://wiki.openjdk.java.net/display/shenandoah)) are designed to keep "stop the
 | 
					 | 
				
			||||||
  world" to a minimum by doing collection work in parallel.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
**Allocation**: Every language has a different way of interacting with "heap" memory, but the
 | 
					 | 
				
			||||||
principle is the same: running the allocator to allocate/deallocate memory takes time that can often
 | 
					 | 
				
			||||||
be put to better use. Understanding when your language interacts with the allocator is crucial, and
 | 
					 | 
				
			||||||
not always obvious. For example: C++ and Rust don't allocate heap memory for iterators, but Java
 | 
					 | 
				
			||||||
does (meaning potential GC pauses). Take time to understand heap behavior (I made a
 | 
					 | 
				
			||||||
[a guide for Rust](/2019/02/understanding-allocations-in-rust.html)), and look into alternative
 | 
					 | 
				
			||||||
allocators ([jemalloc](http://jemalloc.net/),
 | 
					 | 
				
			||||||
[tcmalloc](https://gperftools.github.io/gperftools/tcmalloc.html)) that might run faster than the
 | 
					 | 
				
			||||||
operating system default.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
**Data Layout**: How your data is arranged in memory matters;
 | 
					 | 
				
			||||||
[data-oriented design](https://www.youtube.com/watch?v=yy8jQgmhbAU) and
 | 
					 | 
				
			||||||
[cache locality](https://www.youtube.com/watch?v=2EWejmkKlxs&feature=youtu.be&t=1185) can have huge
 | 
					 | 
				
			||||||
impacts on performance. The C family of languages (C, value types in C#, C++) and Rust all have
 | 
					 | 
				
			||||||
guarantees about the shape every object takes in memory that others (e.g. Java and Python) can't
 | 
					 | 
				
			||||||
make. [Cachegrind](http://valgrind.org/docs/manual/cg-manual.html) and kernel
 | 
					 | 
				
			||||||
[perf](https://perf.wiki.kernel.org/index.php/Main_Page) counters are both great for understanding
 | 
					 | 
				
			||||||
how performance relates to memory layout.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
**Just-In-Time Compilation**: Languages that are compiled on the fly (LuaJIT, C#, Java, PyPy) are
 | 
					 | 
				
			||||||
great because they optimize your program for how it's actually being used, rather than how a
 | 
					 | 
				
			||||||
compiler expects it to be used. However, there's a variance problem if the program stops executing
 | 
					 | 
				
			||||||
while waiting for translation from VM bytecode to native code. As a remedy, many languages support
 | 
					 | 
				
			||||||
ahead-of-time compilation in addition to the JIT versions
 | 
					 | 
				
			||||||
([CoreRT](https://github.com/dotnet/corert) in C# and [GraalVM](https://www.graalvm.org/) in Java).
 | 
					 | 
				
			||||||
On the other hand, LLVM supports
 | 
					 | 
				
			||||||
[Profile Guided Optimization](https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization),
 | 
					 | 
				
			||||||
which theoretically brings JIT benefits to non-JIT languages. Finally, be careful to avoid comparing
 | 
					 | 
				
			||||||
apples and oranges during benchmarks; you don't want your code to suddenly speed up because the JIT
 | 
					 | 
				
			||||||
compiler kicked in.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
**Programming Tricks**: These won't make or break performance, but can be useful in specific
 | 
					 | 
				
			||||||
circumstances. For example, C++ can use
 | 
					 | 
				
			||||||
[templates instead of branches](https://www.youtube.com/watch?v=NH1Tta7purM&feature=youtu.be&t=1206)
 | 
					 | 
				
			||||||
in critical sections.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Kernel
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Code you wrote is almost certainly not the _only_ code running on your hardware. There are many ways
 | 
					 | 
				
			||||||
the operating system interacts with your program, from interrupts to system calls, that are
 | 
					 | 
				
			||||||
important to watch for. These are written from a Linux perspective, but Windows does typically have
 | 
					 | 
				
			||||||
equivalent functionality.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
**Scheduling**: The kernel is normally free to schedule any process on any core, so it's important
 | 
					 | 
				
			||||||
to reserve CPU cores exclusively for the important programs. There are a few parts to this: first,
 | 
					 | 
				
			||||||
limit the CPU cores that non-critical processes are allowed to run on by excluding cores from
 | 
					 | 
				
			||||||
scheduling
 | 
					 | 
				
			||||||
([`isolcpus`](https://www.linuxtopia.org/online_books/linux_kernel/kernel_configuration/re46.html)
 | 
					 | 
				
			||||||
kernel command-line option), or by setting the `init` process CPU affinity
 | 
					 | 
				
			||||||
([`systemd` example](https://access.redhat.com/solutions/2884991)). Second, set critical processes
 | 
					 | 
				
			||||||
to run on the isolated cores by setting the
 | 
					 | 
				
			||||||
[processor affinity](https://en.wikipedia.org/wiki/Processor_affinity) using
 | 
					 | 
				
			||||||
[taskset](https://linux.die.net/man/1/taskset). Finally, use
 | 
					 | 
				
			||||||
[`NO_HZ`](https://github.com/torvalds/linux/blob/master/Documentation/timers/NO_HZ.txt) or
 | 
					 | 
				
			||||||
[`chrt`](https://linux.die.net/man/1/chrt) to disable scheduling interrupts. Turning off
 | 
					 | 
				
			||||||
hyper-threading is also likely beneficial.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
**System calls**: Reading from a UNIX socket? Writing to a file? In addition to not knowing how long
 | 
					 | 
				
			||||||
the I/O operation takes, these all trigger expensive
 | 
					 | 
				
			||||||
[system calls (syscalls)](https://en.wikipedia.org/wiki/System_call). To handle these, the CPU must
 | 
					 | 
				
			||||||
[context switch](https://en.wikipedia.org/wiki/Context_switch) to the kernel, let the kernel
 | 
					 | 
				
			||||||
operation complete, then context switch back to your program. We'd rather keep these
 | 
					 | 
				
			||||||
[to a minimum](https://www.destroyallsoftware.com/talks/the-birth-and-death-of-javascript) (see
 | 
					 | 
				
			||||||
timestamp 18:20). [Strace](https://linux.die.net/man/1/strace) is your friend for understanding when
 | 
					 | 
				
			||||||
and where syscalls happen.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
**Signal Handling**: Far less likely to be an issue, but signals do trigger a context switch if your
 | 
					 | 
				
			||||||
code has a handler registered. This will be highly dependent on the application, but you can
 | 
					 | 
				
			||||||
[block signals](https://www.linuxprogrammingblog.com/all-about-linux-signals?page=show#Blocking_signals)
 | 
					 | 
				
			||||||
if it's an issue.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
**Interrupts**: System interrupts are how devices connected to your computer notify the CPU that
 | 
					 | 
				
			||||||
something has happened. The CPU will then choose a processor core to pause and context switch to the
 | 
					 | 
				
			||||||
OS to handle the interrupt. Make sure that
 | 
					 | 
				
			||||||
[SMP affinity](http://www.alexonlinux.com/smp-affinity-and-proper-interrupt-handling-in-linux) is
 | 
					 | 
				
			||||||
set so that interrupts are handled on a CPU core not running the program you care about.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
**[NUMA](https://www.kernel.org/doc/html/latest/vm/numa.html)**: While NUMA is good at making
 | 
					 | 
				
			||||||
multi-cell systems transparent, there are variance implications; if the kernel moves a process
 | 
					 | 
				
			||||||
across nodes, future memory accesses must wait for the controller on the original node. Use
 | 
					 | 
				
			||||||
[numactl](https://linux.die.net/man/8/numactl) to handle memory-/cpu-cell pinning so this doesn't
 | 
					 | 
				
			||||||
happen.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Hardware
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
**CPU Pipelining/Speculation**: Speculative execution in modern processors gave us vulnerabilities
 | 
					 | 
				
			||||||
like Spectre, but it also gave us performance improvements like
 | 
					 | 
				
			||||||
[branch prediction](https://stackoverflow.com/a/11227902/1454178). And if the CPU mis-speculates
 | 
					 | 
				
			||||||
your code, there's variance associated with rewind and replay. While the compiler knows a lot about
 | 
					 | 
				
			||||||
how your CPU [pipelines instructions](https://youtu.be/nAbCKa0FzjQ?t=4467), code can be
 | 
					 | 
				
			||||||
[structured to help](https://www.youtube.com/watch?v=NH1Tta7purM&feature=youtu.be&t=755) the branch
 | 
					 | 
				
			||||||
predictor.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
**Paging**: For most systems, virtual memory is incredible. Applications live in their own worlds,
 | 
					 | 
				
			||||||
and the CPU/[MMU](https://en.wikipedia.org/wiki/Memory_management_unit) figures out the details.
 | 
					 | 
				
			||||||
However, there's a variance penalty associated with memory paging and caching; if you access more
 | 
					 | 
				
			||||||
memory pages than the [TLB](https://en.wikipedia.org/wiki/Translation_lookaside_buffer) can store,
 | 
					 | 
				
			||||||
you'll have to wait for the page walk. Kernel perf tools are necessary to figure out if this is an
 | 
					 | 
				
			||||||
issue, but using [huge pages](https://blog.pythian.com/performance-tuning-hugepages-in-linux/) can
 | 
					 | 
				
			||||||
reduce TLB burdens. Alternately, running applications in a hypervisor like
 | 
					 | 
				
			||||||
[Jailhouse](https://github.com/siemens/jailhouse) allows one to skip virtual memory entirely, but
 | 
					 | 
				
			||||||
this is probably more work than the benefits are worth.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
**Network Interfaces**: When more than one computer is involved, variance can go up dramatically.
 | 
					 | 
				
			||||||
Tuning kernel
 | 
					 | 
				
			||||||
[network parameters](https://github.com/leandromoreira/linux-network-performance-parameters) may be
 | 
					 | 
				
			||||||
helpful, but modern systems more frequently opt to skip the kernel altogether with a technique
 | 
					 | 
				
			||||||
called [kernel bypass](https://blog.cloudflare.com/kernel-bypass/). This typically requires
 | 
					 | 
				
			||||||
specialized hardware and [drivers](https://www.openonload.org/), but even industries like
 | 
					 | 
				
			||||||
[telecom](https://www.bbc.co.uk/rd/blog/2018-04-high-speed-networking-open-source-kernel-bypass) are
 | 
					 | 
				
			||||||
finding the benefits.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Networks
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
**Routing**: There's a reason financial firms are willing to pay
 | 
					 | 
				
			||||||
[millions of euros](https://sniperinmahwah.wordpress.com/2019/03/26/4-les-moeres-english-version/)
 | 
					 | 
				
			||||||
for rights to a small plot of land - having a straight-line connection from point A to point B means
 | 
					 | 
				
			||||||
the path their data takes is the shortest possible. In contrast, there are currently 6 computers in
 | 
					 | 
				
			||||||
between me and Google, but that may change at any moment if my ISP realizes a
 | 
					 | 
				
			||||||
[more efficient route](https://en.wikipedia.org/wiki/Border_Gateway_Protocol) is available. Whether
 | 
					 | 
				
			||||||
it's using
 | 
					 | 
				
			||||||
[research-quality equipment](https://sniperinmahwah.wordpress.com/2018/05/07/shortwave-trading-part-i-the-west-chicago-tower-mystery/)
 | 
					 | 
				
			||||||
for shortwave radio, or just making sure there's no data inadvertently going between data centers,
 | 
					 | 
				
			||||||
routing matters.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
**Protocol**: TCP as a network protocol is awesome: guaranteed and in-order delivery, flow control,
 | 
					 | 
				
			||||||
and congestion control all built in. But these attributes make the most sense when networking
 | 
					 | 
				
			||||||
infrastructure is lossy; for systems that expect nearly all packets to be delivered correctly, the
 | 
					 | 
				
			||||||
setup handshaking and packet acknowledgment are just overhead. Using UDP (unicast or multicast) may
 | 
					 | 
				
			||||||
make sense in these contexts as it avoids the chatter needed to track connection state, and
 | 
					 | 
				
			||||||
[gap-fill](https://iextrading.com/docs/IEX%20Transport%20Specification.pdf)
 | 
					 | 
				
			||||||
[strategies](http://www.nasdaqtrader.com/content/technicalsupport/specifications/dataproducts/moldudp64.pdf)
 | 
					 | 
				
			||||||
can handle the rest.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
**Switching**: Many routers/switches handle packets using "store-and-forward" behavior: wait for the
 | 
					 | 
				
			||||||
whole packet, validate checksums, and then send to the next device. In variance terms, the time
 | 
					 | 
				
			||||||
needed to move data between two nodes is proportional to the size of that data; the switch must
 | 
					 | 
				
			||||||
"store" all data before it can calculate checksums and "forward" to the next node. With
 | 
					 | 
				
			||||||
["cut-through"](https://www.networkworld.com/article/2241573/latency-and-jitter--cut-through-design-pays-off-for-arista--blade.html)
 | 
					 | 
				
			||||||
designs, switches will begin forwarding data as soon as they know where the destination is,
 | 
					 | 
				
			||||||
checksums be damned. This means there's a fixed cost (at the switch) for network traffic, no matter
 | 
					 | 
				
			||||||
the size.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Final Thoughts
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
High-performance systems, regardless of industry, are not magical. They do require extreme precision
 | 
					 | 
				
			||||||
and attention to detail, but they're designed, built, and operated by regular people, using a lot of
 | 
					 | 
				
			||||||
tools that are publicly available. Interested in seeing how context switching affects performance of
 | 
					 | 
				
			||||||
your benchmarks? `taskset` should be installed in all modern Linux distributions, and can be used to
 | 
					 | 
				
			||||||
make sure the OS never migrates your process. Curious how often garbage collection triggers during a
 | 
					 | 
				
			||||||
crucial operation? Your language of choice will typically expose details of its operations
 | 
					 | 
				
			||||||
([Python](https://docs.python.org/3/library/gc.html),
 | 
					 | 
				
			||||||
[Java](https://www.oracle.com/technetwork/java/javase/tech/vmoptions-jsp-140102.html#DebuggingOptions)).
 | 
					 | 
				
			||||||
Want to know how hard your program is stressing the TLB? Use `perf record` and look for
 | 
					 | 
				
			||||||
`dtlb_load_misses.miss_causes_a_walk`.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Two final guiding questions, then: first, before attempting to apply some of the technology above to
 | 
					 | 
				
			||||||
your own systems, can you first identify
 | 
					 | 
				
			||||||
[where/when you care](http://wiki.c2.com/?PrematureOptimization) about "high-performance"? As an
 | 
					 | 
				
			||||||
example, if parts of a system rely on humans pushing buttons, CPU pinning won't have any measurable
 | 
					 | 
				
			||||||
effect. Humans are already far too slow to react in time. Second, if you're using benchmarks, are
 | 
					 | 
				
			||||||
they being designed in a way that's actually helpful? Tools like
 | 
					 | 
				
			||||||
[Criterion](http://www.serpentine.com/criterion/) (also in
 | 
					 | 
				
			||||||
[Rust](https://github.com/bheisler/criterion.rs)) and Google's
 | 
					 | 
				
			||||||
[Benchmark](https://github.com/google/benchmark) output not only average run time, but variance as
 | 
					 | 
				
			||||||
well; your benchmarking environment is subject to the same concerns your production environment is.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Finally, I believe high-performance systems are a matter of philosophy, not necessarily technique.
 | 
					 | 
				
			||||||
Rigorous focus on variance is the first step, and there are plenty of ways to measure and mitigate
 | 
					 | 
				
			||||||
it; once that's at an acceptable level, then optimize for speed.
 | 
					 | 
				
			||||||
@ -1,263 +0,0 @@
 | 
				
			|||||||
---
 | 
					 | 
				
			||||||
layout: post
 | 
					 | 
				
			||||||
title: "Binary Format Shootout"
 | 
					 | 
				
			||||||
description: "Cap'n Proto vs. Flatbuffers vs. SBE"
 | 
					 | 
				
			||||||
category:
 | 
					 | 
				
			||||||
tags: [rust]
 | 
					 | 
				
			||||||
---
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
I've found that in many personal projects,
 | 
					 | 
				
			||||||
[analysis paralysis](https://en.wikipedia.org/wiki/Analysis_paralysis) is particularly deadly.
 | 
					 | 
				
			||||||
Making good decisions in the beginning avoids pain and suffering later; if extra research prevents
 | 
					 | 
				
			||||||
future problems, I'm happy to continue ~~procrastinating~~ researching indefinitely.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
So let's say you're in need of a binary serialization format. Data will be going over the network,
 | 
					 | 
				
			||||||
not just in memory, so having a schema document and code generation is a must. Performance is
 | 
					 | 
				
			||||||
crucial, so formats that support zero-copy de/serialization are given priority. And the more
 | 
					 | 
				
			||||||
languages supported, the better; I use Rust, but can't predict what other languages this could
 | 
					 | 
				
			||||||
interact with.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Given these requirements, the candidates I could find were:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
1. [Cap'n Proto](https://capnproto.org/) has been around the longest, and is the most established
 | 
					 | 
				
			||||||
2. [Flatbuffers](https://google.github.io/flatbuffers/) is the newest, and claims to have a simpler
 | 
					 | 
				
			||||||
   encoding
 | 
					 | 
				
			||||||
3. [Simple Binary Encoding](https://github.com/real-logic/simple-binary-encoding) has the simplest
 | 
					 | 
				
			||||||
   encoding, but the Rust implementation is unmaintained
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Any one of these will satisfy the project requirements: easy to transmit over a network, reasonably
 | 
					 | 
				
			||||||
fast, and polyglot support. But how do you actually pick one? It's impossible to know what issues
 | 
					 | 
				
			||||||
will follow that choice, so I tend to avoid commitment until the last possible moment.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Still, a choice must be made. Instead of worrying about which is "the best," I decided to build a
 | 
					 | 
				
			||||||
small proof-of-concept system in each format and pit them against each other. All code can be found
 | 
					 | 
				
			||||||
in the [repository](https://github.com/speice-io/marketdata-shootout) for this post.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
We'll discuss more in detail, but a quick preview of the results:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
- Cap'n Proto: Theoretically performs incredibly well, the implementation had issues
 | 
					 | 
				
			||||||
- Flatbuffers: Has some quirks, but largely lived up to its "zero-copy" promises
 | 
					 | 
				
			||||||
- SBE: Best median and worst-case performance, but the message structure has a limited feature set
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Prologue: Binary Parsing with Nom
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Our benchmark system will be a simple data processor; given depth-of-book market data from
 | 
					 | 
				
			||||||
[IEX](https://iextrading.com/trading/market-data/#deep), serialize each message into the schema
 | 
					 | 
				
			||||||
format, read it back, and calculate total size of stock traded and the lowest/highest quoted prices.
 | 
					 | 
				
			||||||
This test isn't complex, but is representative of the project I need a binary format for.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
But before we make it to that point, we have to actually read in the market data. To do so, I'm
 | 
					 | 
				
			||||||
using a library called [`nom`](https://github.com/Geal/nom). Version 5.0 was recently released and
 | 
					 | 
				
			||||||
brought some big changes, so this was an opportunity to build a non-trivial program and get
 | 
					 | 
				
			||||||
familiar.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
If you don't already know about `nom`, it's a "parser generator". By combining different smaller
 | 
					 | 
				
			||||||
parsers, you can assemble a parser to handle complex structures without writing tedious code by
 | 
					 | 
				
			||||||
hand. For example, when parsing
 | 
					 | 
				
			||||||
[PCAP files](https://www.winpcap.org/ntar/draft/PCAP-DumpFileFormat.html#rfc.section.3.3):
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
   0                   1                   2                   3
 | 
					 | 
				
			||||||
   0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
 | 
					 | 
				
			||||||
   +---------------------------------------------------------------+
 | 
					 | 
				
			||||||
 0 |                    Block Type = 0x00000006                    |
 | 
					 | 
				
			||||||
   +---------------------------------------------------------------+
 | 
					 | 
				
			||||||
 4 |                      Block Total Length                       |
 | 
					 | 
				
			||||||
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 | 
					 | 
				
			||||||
 8 |                         Interface ID                          |
 | 
					 | 
				
			||||||
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 | 
					 | 
				
			||||||
12 |                        Timestamp (High)                       |
 | 
					 | 
				
			||||||
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 | 
					 | 
				
			||||||
16 |                        Timestamp (Low)                        |
 | 
					 | 
				
			||||||
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 | 
					 | 
				
			||||||
20 |                         Captured Len                          |
 | 
					 | 
				
			||||||
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 | 
					 | 
				
			||||||
24 |                          Packet Len                           |
 | 
					 | 
				
			||||||
   +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
 | 
					 | 
				
			||||||
   |                          Packet Data                          |
 | 
					 | 
				
			||||||
   |                              ...                              |
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
...you can build a parser in `nom` that looks like
 | 
					 | 
				
			||||||
[this](https://github.com/speice-io/marketdata-shootout/blob/369613843d39cfdc728e1003123bf87f79422497/src/parsers.rs#L59-L93):
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```rust
 | 
					 | 
				
			||||||
const ENHANCED_PACKET: [u8; 4] = [0x06, 0x00, 0x00, 0x00];
 | 
					 | 
				
			||||||
pub fn enhanced_packet_block(input: &[u8]) -> IResult<&[u8], &[u8]> {
 | 
					 | 
				
			||||||
    let (
 | 
					 | 
				
			||||||
        remaining,
 | 
					 | 
				
			||||||
        (
 | 
					 | 
				
			||||||
            block_type,
 | 
					 | 
				
			||||||
            block_len,
 | 
					 | 
				
			||||||
            interface_id,
 | 
					 | 
				
			||||||
            timestamp_high,
 | 
					 | 
				
			||||||
            timestamp_low,
 | 
					 | 
				
			||||||
            captured_len,
 | 
					 | 
				
			||||||
            packet_len,
 | 
					 | 
				
			||||||
        ),
 | 
					 | 
				
			||||||
    ) = tuple((
 | 
					 | 
				
			||||||
        tag(ENHANCED_PACKET),
 | 
					 | 
				
			||||||
        le_u32,
 | 
					 | 
				
			||||||
        le_u32,
 | 
					 | 
				
			||||||
        le_u32,
 | 
					 | 
				
			||||||
        le_u32,
 | 
					 | 
				
			||||||
        le_u32,
 | 
					 | 
				
			||||||
        le_u32,
 | 
					 | 
				
			||||||
    ))(input)?;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    let (remaining, packet_data) = take(captured_len)(remaining)?;
 | 
					 | 
				
			||||||
    Ok((remaining, packet_data))
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
While this example isn't too interesting, more complex formats (like IEX market data) are where
 | 
					 | 
				
			||||||
[`nom` really shines](https://github.com/speice-io/marketdata-shootout/blob/369613843d39cfdc728e1003123bf87f79422497/src/iex.rs).
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Ultimately, because the `nom` code in this shootout was the same for all formats, we're not too
 | 
					 | 
				
			||||||
interested in its performance. Still, it's worth mentioning that building the market data parser was
 | 
					 | 
				
			||||||
actually fun; I didn't have to write tons of boring code by hand.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Part 1: Cap'n Proto
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Now it's time to get into the meaty part of the story. Cap'n Proto was the first format I tried
 | 
					 | 
				
			||||||
because of how long it has supported Rust (thanks to [dwrensha](https://github.com/dwrensha) for
 | 
					 | 
				
			||||||
maintaining the Rust port since
 | 
					 | 
				
			||||||
[2014!](https://github.com/capnproto/capnproto-rust/releases/tag/rustc-0.10)). However, I had a ton
 | 
					 | 
				
			||||||
of performance concerns once I started using it.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
To serialize new messages, Cap'n Proto uses a "builder" object. This builder allocates memory on the
 | 
					 | 
				
			||||||
heap to hold the message content, but because builders
 | 
					 | 
				
			||||||
[can't be re-used](https://github.com/capnproto/capnproto-rust/issues/111), we have to allocate a
 | 
					 | 
				
			||||||
new buffer for every single message. I was able to work around this with a
 | 
					 | 
				
			||||||
[special builder](https://github.com/speice-io/marketdata-shootout/blob/369613843d39cfdc728e1003123bf87f79422497/src/capnp_runner.rs#L17-L51)
 | 
					 | 
				
			||||||
that could re-use the buffer, but it required reading through Cap'n Proto's
 | 
					 | 
				
			||||||
[benchmarks](https://github.com/capnproto/capnproto-rust/blob/master/benchmark/benchmark.rs#L124-L156)
 | 
					 | 
				
			||||||
to find an example, and used
 | 
					 | 
				
			||||||
[`std::mem::transmute`](https://doc.rust-lang.org/std/mem/fn.transmute.html) to bypass Rust's borrow
 | 
					 | 
				
			||||||
checker.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The process of reading messages was better, but still had issues. Cap'n Proto has two message
 | 
					 | 
				
			||||||
encodings: a ["packed"](https://capnproto.org/encoding.html#packing) representation, and an
 | 
					 | 
				
			||||||
"unpacked" version. When reading "packed" messages, we need a buffer to unpack the message into
 | 
					 | 
				
			||||||
before we can use it; Cap'n Proto allocates a new buffer for each message we unpack, and I wasn't
 | 
					 | 
				
			||||||
able to figure out a way around that. In contrast, the unpacked message format should be where Cap'n
 | 
					 | 
				
			||||||
Proto shines; its main selling point is that there's [no decoding step](https://capnproto.org/).
 | 
					 | 
				
			||||||
However, accomplishing zero-copy deserialization required code in the private API
 | 
					 | 
				
			||||||
([since fixed](https://github.com/capnproto/capnproto-rust/issues/148)), and we allocate a vector on
 | 
					 | 
				
			||||||
every read for the segment table.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
In the end, I put in significant work to make Cap'n Proto as fast as possible, but there were too
 | 
					 | 
				
			||||||
many issues for me to feel comfortable using it long-term.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Part 2: Flatbuffers
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
This is the new kid on the block. After a
 | 
					 | 
				
			||||||
[first attempt](https://github.com/google/flatbuffers/pull/3894) didn't pan out, official support
 | 
					 | 
				
			||||||
was [recently launched](https://github.com/google/flatbuffers/pull/4898). Flatbuffers intends to
 | 
					 | 
				
			||||||
address the same problems as Cap'n Proto: high-performance, polyglot, binary messaging. The
 | 
					 | 
				
			||||||
difference is that Flatbuffers claims to have a simpler wire format and
 | 
					 | 
				
			||||||
[more flexibility](https://google.github.io/flatbuffers/flatbuffers_benchmarks.html).
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
On the whole, I enjoyed using Flatbuffers; the [tooling](https://crates.io/crates/flatc-rust) is
 | 
					 | 
				
			||||||
nice, and unlike Cap'n Proto, parsing messages was actually zero-copy and zero-allocation. However,
 | 
					 | 
				
			||||||
there were still some issues.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
First, Flatbuffers (at least in Rust) can't handle nested vectors. This is a problem for formats
 | 
					 | 
				
			||||||
like the following:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
table Message {
 | 
					 | 
				
			||||||
  symbol: string;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
table MultiMessage {
 | 
					 | 
				
			||||||
  messages:[Message];
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
We want to create a `MultiMessage` which contains a vector of `Message`, and each `Message` itself
 | 
					 | 
				
			||||||
contains a vector (the `string` type). I was able to work around this by
 | 
					 | 
				
			||||||
[caching `Message` elements](https://github.com/speice-io/marketdata-shootout/blob/e9d07d148bf36a211a6f86802b313c4918377d1b/src/flatbuffers_runner.rs#L83)
 | 
					 | 
				
			||||||
in a `SmallVec` before building the final `MultiMessage`, but it was a painful process that I
 | 
					 | 
				
			||||||
believe contributed to poor serialization performance.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Second, streaming support in Flatbuffers seems to be something of an
 | 
					 | 
				
			||||||
[afterthought](https://github.com/google/flatbuffers/issues/3898). Where Cap'n Proto in Rust handles
 | 
					 | 
				
			||||||
reading messages from a stream as part of the API, Flatbuffers just sticks a `u32` at the front of
 | 
					 | 
				
			||||||
each message to indicate the size. Not specifically a problem, but calculating message size without
 | 
					 | 
				
			||||||
that tag is nigh on impossible.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Ultimately, I enjoyed using Flatbuffers, and had to do significantly less work to make it perform
 | 
					 | 
				
			||||||
well.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Part 3: Simple Binary Encoding
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Support for SBE was added by the author of one of my favorite
 | 
					 | 
				
			||||||
[Rust blog posts](https://web.archive.org/web/20190427124806/https://polysync.io/blog/session-types-for-hearty-codecs/).
 | 
					 | 
				
			||||||
I've [talked previously]({% post_url 2019-06-31-high-performance-systems %}) about how important
 | 
					 | 
				
			||||||
variance is in high-performance systems, so it was encouraging to read about a format that
 | 
					 | 
				
			||||||
[directly addressed](https://github.com/real-logic/simple-binary-encoding/wiki/Why-Low-Latency) my
 | 
					 | 
				
			||||||
concerns. SBE has by far the simplest binary format, but it does make some tradeoffs.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Both Cap'n Proto and Flatbuffers use [message offsets](https://capnproto.org/encoding.html#structs)
 | 
					 | 
				
			||||||
to handle variable-length data, [unions](https://capnproto.org/language.html#unions), and various
 | 
					 | 
				
			||||||
other features. In contrast, messages in SBE are essentially
 | 
					 | 
				
			||||||
[just structs](https://github.com/real-logic/simple-binary-encoding/blob/master/sbe-samples/src/main/resources/example-schema.xml);
 | 
					 | 
				
			||||||
variable-length data is supported, but there's no union type.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
As mentioned in the beginning, the Rust port of SBE works well, but is
 | 
					 | 
				
			||||||
[essentially unmaintained](https://users.rust-lang.org/t/zero-cost-abstraction-frontier-no-copy-low-allocation-ordered-decoding/11515/9).
 | 
					 | 
				
			||||||
However, if you don't need union types, and can accept that schemas are XML documents, it's still
 | 
					 | 
				
			||||||
worth using. SBE's implementation had the best streaming support of all formats I tested, and
 | 
					 | 
				
			||||||
doesn't trigger allocation during de/serialization.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Results
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
After building a test harness
 | 
					 | 
				
			||||||
[for](https://github.com/speice-io/marketdata-shootout/blob/master/src/capnp_runner.rs)
 | 
					 | 
				
			||||||
[each](https://github.com/speice-io/marketdata-shootout/blob/master/src/flatbuffers_runner.rs)
 | 
					 | 
				
			||||||
[format](https://github.com/speice-io/marketdata-shootout/blob/master/src/sbe_runner.rs), it was
 | 
					 | 
				
			||||||
time to actually take them for a spin. I used
 | 
					 | 
				
			||||||
[this script](https://github.com/speice-io/marketdata-shootout/blob/master/run_shootout.sh) to run
 | 
					 | 
				
			||||||
the benchmarks, and the raw results are
 | 
					 | 
				
			||||||
[here](https://github.com/speice-io/marketdata-shootout/blob/master/shootout.csv). All data reported
 | 
					 | 
				
			||||||
below is the average of 10 runs on a single day of IEX data. Results were validated to make sure
 | 
					 | 
				
			||||||
that each format parsed the data correctly.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Serialization
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
This test measures, on a
 | 
					 | 
				
			||||||
[per-message basis](https://github.com/speice-io/marketdata-shootout/blob/master/src/main.rs#L268-L272),
 | 
					 | 
				
			||||||
how long it takes to serialize the IEX message into the desired format and write to a pre-allocated
 | 
					 | 
				
			||||||
buffer.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
| Schema               | Median | 99th Pctl | 99.9th Pctl | Total  |
 | 
					 | 
				
			||||||
| :------------------- | :----- | :-------- | :---------- | :----- |
 | 
					 | 
				
			||||||
| Cap'n Proto Packed   | 413ns  | 1751ns    | 2943ns      | 14.80s |
 | 
					 | 
				
			||||||
| Cap'n Proto Unpacked | 273ns  | 1828ns    | 2836ns      | 10.65s |
 | 
					 | 
				
			||||||
| Flatbuffers          | 355ns  | 2185ns    | 3497ns      | 14.31s |
 | 
					 | 
				
			||||||
| SBE                  | 91ns   | 1535ns    | 2423ns      | 3.91s  |
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Deserialization
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
This test measures, on a
 | 
					 | 
				
			||||||
[per-message basis](https://github.com/speice-io/marketdata-shootout/blob/master/src/main.rs#L294-L298),
 | 
					 | 
				
			||||||
how long it takes to read the previously-serialized message and perform some basic aggregation. The
 | 
					 | 
				
			||||||
aggregation code is the same for each format, so any performance differences are due solely to the
 | 
					 | 
				
			||||||
format implementation.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
| Schema               | Median | 99th Pctl | 99.9th Pctl | Total  |
 | 
					 | 
				
			||||||
| :------------------- | :----- | :-------- | :---------- | :----- |
 | 
					 | 
				
			||||||
| Cap'n Proto Packed   | 539ns  | 1216ns    | 2599ns      | 18.92s |
 | 
					 | 
				
			||||||
| Cap'n Proto Unpacked | 366ns  | 737ns     | 1583ns      | 12.32s |
 | 
					 | 
				
			||||||
| Flatbuffers          | 173ns  | 421ns     | 1007ns      | 6.00s  |
 | 
					 | 
				
			||||||
| SBE                  | 116ns  | 286ns     | 659ns       | 4.05s  |
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Conclusion
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Building a benchmark turned out to be incredibly helpful in making a decision; because a "union"
 | 
					 | 
				
			||||||
type isn't important to me, I can be confident that SBE best addresses my needs.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
While SBE was the fastest in terms of both median and worst-case performance, its worst case
 | 
					 | 
				
			||||||
performance was proportionately far higher than any other format. It seems to be that
 | 
					 | 
				
			||||||
de/serialization time scales with message size, but I'll need to do some more research to understand
 | 
					 | 
				
			||||||
what exactly is going on.
 | 
					 | 
				
			||||||
@ -1,370 +0,0 @@
 | 
				
			|||||||
---
 | 
					 | 
				
			||||||
layout: post
 | 
					 | 
				
			||||||
title: "Release the GIL"
 | 
					 | 
				
			||||||
description: "Strategies for Parallelism in Python"
 | 
					 | 
				
			||||||
category:
 | 
					 | 
				
			||||||
tags: [python]
 | 
					 | 
				
			||||||
---
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Complaining about the [Global Interpreter Lock](https://wiki.python.org/moin/GlobalInterpreterLock)
 | 
					 | 
				
			||||||
(GIL) seems like a rite of passage for Python developers. It's easy to criticize a design decision
 | 
					 | 
				
			||||||
made before multi-core CPU's were widely available, but the fact that it's still around indicates
 | 
					 | 
				
			||||||
that it generally works [Good](https://wiki.c2.com/?PrematureOptimization)
 | 
					 | 
				
			||||||
[Enough](https://wiki.c2.com/?YouArentGonnaNeedIt). Besides, there are simple and effective
 | 
					 | 
				
			||||||
workarounds; it's not hard to start a
 | 
					 | 
				
			||||||
[new process](https://docs.python.org/3/library/multiprocessing.html) and use message passing to
 | 
					 | 
				
			||||||
synchronize code running in parallel.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Still, wouldn't it be nice to have more than a single active interpreter thread? In an age of
 | 
					 | 
				
			||||||
asynchronicity and _M:N_ threading, Python seems lacking. The ideal scenario is to take advantage of
 | 
					 | 
				
			||||||
both Python's productivity and the modern CPU's parallel capabilities.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Presented below are two strategies for releasing the GIL's icy grip without giving up on what makes
 | 
					 | 
				
			||||||
Python a nice language to start with. Bear in mind: these are just the tools, no claim is made about
 | 
					 | 
				
			||||||
whether it's a good idea to use them. Very often, unlocking the GIL is an
 | 
					 | 
				
			||||||
[XY problem](https://en.wikipedia.org/wiki/XY_problem); you want application performance, and the
 | 
					 | 
				
			||||||
GIL seems like an obvious bottleneck. Remember that any gains from running code in parallel come at
 | 
					 | 
				
			||||||
the expense of project complexity; messing with the GIL is ultimately messing with Python's memory
 | 
					 | 
				
			||||||
model.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```python
 | 
					 | 
				
			||||||
%load_ext Cython
 | 
					 | 
				
			||||||
from numba import jit
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
N = 1_000_000_000
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Cython
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Put simply, [Cython](https://cython.org/) is a programming language that looks a lot like Python,
 | 
					 | 
				
			||||||
gets [transpiled](https://en.wikipedia.org/wiki/Source-to-source_compiler) to C/C++, and integrates
 | 
					 | 
				
			||||||
well with the [CPython](https://en.wikipedia.org/wiki/CPython) API. It's great for building Python
 | 
					 | 
				
			||||||
wrappers to C and C++ libraries, writing optimized code for numerical processing, and tons more. And
 | 
					 | 
				
			||||||
when it comes to managing the GIL, there are two special features:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
- The `nogil`
 | 
					 | 
				
			||||||
  [function annotation](https://cython.readthedocs.io/en/latest/src/userguide/external_C_code.html#declaring-a-function-as-callable-without-the-gil)
 | 
					 | 
				
			||||||
  asserts that a Cython function is safe to use without the GIL, and compilation will fail if it
 | 
					 | 
				
			||||||
  interacts with Python in an unsafe manner
 | 
					 | 
				
			||||||
- The `with nogil`
 | 
					 | 
				
			||||||
  [context manager](https://cython.readthedocs.io/en/latest/src/userguide/external_C_code.html#releasing-the-gil)
 | 
					 | 
				
			||||||
  explicitly unlocks the CPython GIL while active
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Whenever Cython code runs inside a `with nogil` block on a separate thread, the Python interpreter
 | 
					 | 
				
			||||||
is unblocked and allowed to continue work elsewhere. We'll define a "busy work" function that
 | 
					 | 
				
			||||||
demonstrates this principle in action:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```python
 | 
					 | 
				
			||||||
%%cython
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Annotating a function with `nogil` indicates only that it is safe
 | 
					 | 
				
			||||||
# to call in a `with nogil` block. It *does not* release the GIL.
 | 
					 | 
				
			||||||
cdef unsigned long fibonacci(unsigned long n) nogil:
 | 
					 | 
				
			||||||
    if n <= 1:
 | 
					 | 
				
			||||||
        return n
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    cdef unsigned long a = 0, b = 1, c = 0
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    c = a + b
 | 
					 | 
				
			||||||
    for _i in range(2, n):
 | 
					 | 
				
			||||||
        a = b
 | 
					 | 
				
			||||||
        b = c
 | 
					 | 
				
			||||||
        c = a + b
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return c
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def cython_nogil(unsigned long n):
 | 
					 | 
				
			||||||
    # Explicitly release the GIL while running `fibonacci`
 | 
					 | 
				
			||||||
    with nogil:
 | 
					 | 
				
			||||||
        value = fibonacci(n)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return value
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def cython_gil(unsigned long n):
 | 
					 | 
				
			||||||
    # Because the GIL is not explicitly released, it implicitly
 | 
					 | 
				
			||||||
    # remains acquired when running the `fibonacci` function
 | 
					 | 
				
			||||||
    return fibonacci(n)
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
First, let's time how long it takes Cython to calculate the billionth Fibonacci number:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```python
 | 
					 | 
				
			||||||
%%time
 | 
					 | 
				
			||||||
_ = cython_gil(N);
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
> <pre>
 | 
					 | 
				
			||||||
> CPU times: user 365 ms, sys: 0 ns, total: 365 ms
 | 
					 | 
				
			||||||
> Wall time: 372 ms
 | 
					 | 
				
			||||||
> </pre>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```python
 | 
					 | 
				
			||||||
%%time
 | 
					 | 
				
			||||||
_ = cython_nogil(N);
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
> <pre>
 | 
					 | 
				
			||||||
> CPU times: user 381 ms, sys: 0 ns, total: 381 ms
 | 
					 | 
				
			||||||
> Wall time: 388 ms
 | 
					 | 
				
			||||||
> </pre>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Both versions (with and without GIL) take effectively the same amount of time to run. Even when
 | 
					 | 
				
			||||||
running this calculation in parallel on separate threads, it is expected that the run time will
 | 
					 | 
				
			||||||
double because only one thread can be active at a time:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```python
 | 
					 | 
				
			||||||
%%time
 | 
					 | 
				
			||||||
from threading import Thread
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Create the two threads to run on
 | 
					 | 
				
			||||||
t1 = Thread(target=cython_gil, args=[N])
 | 
					 | 
				
			||||||
t2 = Thread(target=cython_gil, args=[N])
 | 
					 | 
				
			||||||
# Start the threads
 | 
					 | 
				
			||||||
t1.start(); t2.start()
 | 
					 | 
				
			||||||
# Wait for the threads to finish
 | 
					 | 
				
			||||||
t1.join(); t2.join()
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
> <pre>
 | 
					 | 
				
			||||||
> CPU times: user 641 ms, sys: 5.62 ms, total: 647 ms
 | 
					 | 
				
			||||||
> Wall time: 645 ms
 | 
					 | 
				
			||||||
> </pre>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
However, if the first thread releases the GIL, the second thread is free to acquire it and run in
 | 
					 | 
				
			||||||
parallel:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```python
 | 
					 | 
				
			||||||
%%time
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
t1 = Thread(target=cython_nogil, args=[N])
 | 
					 | 
				
			||||||
t2 = Thread(target=cython_gil, args=[N])
 | 
					 | 
				
			||||||
t1.start(); t2.start()
 | 
					 | 
				
			||||||
t1.join(); t2.join()
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
> <pre>
 | 
					 | 
				
			||||||
> CPU times: user 717 ms, sys: 372 µs, total: 718 ms
 | 
					 | 
				
			||||||
> Wall time: 358 ms
 | 
					 | 
				
			||||||
> </pre>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Because `user` time represents the sum of processing time on all threads, it doesn't change much.
 | 
					 | 
				
			||||||
The ["wall time"](https://en.wikipedia.org/wiki/Elapsed_real_time) has been cut roughly in half
 | 
					 | 
				
			||||||
because each function is running simultaneously.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Keep in mind that the **order in which threads are started** makes a difference!
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```python
 | 
					 | 
				
			||||||
%%time
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Note that the GIL-locked version is started first
 | 
					 | 
				
			||||||
t1 = Thread(target=cython_gil, args=[N])
 | 
					 | 
				
			||||||
t2 = Thread(target=cython_nogil, args=[N])
 | 
					 | 
				
			||||||
t1.start(); t2.start()
 | 
					 | 
				
			||||||
t1.join(); t2.join()
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
> <pre>
 | 
					 | 
				
			||||||
> CPU times: user 667 ms, sys: 0 ns, total: 667 ms
 | 
					 | 
				
			||||||
> Wall time: 672 ms
 | 
					 | 
				
			||||||
> </pre>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Even though the second thread releases the GIL while running, it can't start until the first has
 | 
					 | 
				
			||||||
completed. Thus, the overall runtime is effectively the same as running two GIL-locked threads.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Finally, be aware that attempting to unlock the GIL from a thread that doesn't own it will crash the
 | 
					 | 
				
			||||||
**interpreter**, not just the thread attempting the unlock:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```python
 | 
					 | 
				
			||||||
%%cython
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
cdef int cython_recurse(int n) nogil:
 | 
					 | 
				
			||||||
    if n <= 0:
 | 
					 | 
				
			||||||
        return 0
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    with nogil:
 | 
					 | 
				
			||||||
        return cython_recurse(n - 1)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
cython_recurse(2)
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
> <pre>
 | 
					 | 
				
			||||||
> Fatal Python error: PyEval_SaveThread: NULL tstate
 | 
					 | 
				
			||||||
> 
 | 
					 | 
				
			||||||
> Thread 0x00007f499effd700 (most recent call first):
 | 
					 | 
				
			||||||
>   File "/home/bspeice/.virtualenvs/release-the-gil/lib/python3.7/site-packages/ipykernel/parentpoller.py", line 39 in run
 | 
					 | 
				
			||||||
>   File "/usr/lib/python3.7/threading.py", line 926 in _bootstrap_inner
 | 
					 | 
				
			||||||
>   File "/usr/lib/python3.7/threading.py", line 890 in _bootstrap
 | 
					 | 
				
			||||||
> </pre>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
In practice, avoiding this issue is simple. First, `nogil` functions probably shouldn't contain
 | 
					 | 
				
			||||||
`with nogil` blocks. Second, Cython can
 | 
					 | 
				
			||||||
[conditionally acquire/release](https://cython.readthedocs.io/en/latest/src/userguide/external_C_code.html#conditional-acquiring-releasing-the-gil)
 | 
					 | 
				
			||||||
the GIL, so these conditions can be used to synchronize access. Finally, Cython's documentation for
 | 
					 | 
				
			||||||
[external C code](https://cython.readthedocs.io/en/latest/src/userguide/external_C_code.html#acquiring-and-releasing-the-gil)
 | 
					 | 
				
			||||||
contains more detail on how to safely manage the GIL.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
To conclude: use Cython's `nogil` annotation to assert that functions are safe for calling when the
 | 
					 | 
				
			||||||
GIL is unlocked, and `with nogil` to actually unlock the GIL and run those functions.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Numba
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Like Cython, [Numba](https://numba.pydata.org/) is a "compiled Python." Where Cython works by
 | 
					 | 
				
			||||||
compiling a Python-like language to C/C++, Numba compiles Python bytecode _directly to machine code_
 | 
					 | 
				
			||||||
at runtime. Behavior is controlled with a special `@jit` decorator; calling a decorated function
 | 
					 | 
				
			||||||
first compiles it to machine code before running. Calling the function a second time re-uses that
 | 
					 | 
				
			||||||
machine code unless the argument types have changed.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Numba works best when a `nopython=True` argument is added to the `@jit` decorator; functions
 | 
					 | 
				
			||||||
compiled in [`nopython`](http://numba.pydata.org/numba-doc/latest/user/jit.html?#nopython) mode
 | 
					 | 
				
			||||||
avoid the CPython API and have performance comparable to C. Further, adding `nogil=True` to the
 | 
					 | 
				
			||||||
`@jit` decorator unlocks the GIL while that function is running. Note that `nogil` and `nopython`
 | 
					 | 
				
			||||||
are separate arguments; while it is necessary for code to be compiled in `nopython` mode in order to
 | 
					 | 
				
			||||||
release the lock, the GIL will remain locked if `nogil=False` (the default).
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Let's repeat the same experiment, this time using Numba instead of Cython:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```python
 | 
					 | 
				
			||||||
# The `int` type annotation is only for humans and is ignored
 | 
					 | 
				
			||||||
# by Numba.
 | 
					 | 
				
			||||||
@jit(nopython=True, nogil=True)
 | 
					 | 
				
			||||||
def numba_nogil(n: int) -> int:
 | 
					 | 
				
			||||||
    if n <= 1:
 | 
					 | 
				
			||||||
        return n
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    a = 0
 | 
					 | 
				
			||||||
    b = 1
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    c = a + b
 | 
					 | 
				
			||||||
    for _i in range(2, n):
 | 
					 | 
				
			||||||
        a = b
 | 
					 | 
				
			||||||
        b = c
 | 
					 | 
				
			||||||
        c = a + b
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return c
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Run using `nopython` mode to receive a performance boost,
 | 
					 | 
				
			||||||
# but GIL remains locked due to `nogil=False` by default.
 | 
					 | 
				
			||||||
@jit(nopython=True)
 | 
					 | 
				
			||||||
def numba_gil(n: int) -> int:
 | 
					 | 
				
			||||||
    if n <= 1:
 | 
					 | 
				
			||||||
        return n
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    a = 0
 | 
					 | 
				
			||||||
    b = 1
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    c = a + b
 | 
					 | 
				
			||||||
    for _i in range(2, n):
 | 
					 | 
				
			||||||
        a = b
 | 
					 | 
				
			||||||
        b = c
 | 
					 | 
				
			||||||
        c = a + b
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return c
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Call each function once to force compilation; we don't want
 | 
					 | 
				
			||||||
# the timing statistics to include how long it takes to compile.
 | 
					 | 
				
			||||||
numba_nogil(N)
 | 
					 | 
				
			||||||
numba_gil(N);
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
We'll perform the same tests as above; first, figure out how long it takes the function to run:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```python
 | 
					 | 
				
			||||||
%%time
 | 
					 | 
				
			||||||
_ = numba_gil(N)
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
> <pre>
 | 
					 | 
				
			||||||
> CPU times: user 253 ms, sys: 258 µs, total: 253 ms
 | 
					 | 
				
			||||||
> Wall time: 251 ms
 | 
					 | 
				
			||||||
> </pre>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
<span style="font-size: .8em">
 | 
					 | 
				
			||||||
Aside: it's not immediately clear why Numba takes ~20% less time to run than Cython for code that should be
 | 
					 | 
				
			||||||
effectively identical after compilation.
 | 
					 | 
				
			||||||
</span>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
When running two GIL-locked threads, the result (as expected) takes around twice as long to compute:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```python
 | 
					 | 
				
			||||||
%%time
 | 
					 | 
				
			||||||
t1 = Thread(target=numba_gil, args=[N])
 | 
					 | 
				
			||||||
t2 = Thread(target=numba_gil, args=[N])
 | 
					 | 
				
			||||||
t1.start(); t2.start()
 | 
					 | 
				
			||||||
t1.join(); t2.join()
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
> <pre>
 | 
					 | 
				
			||||||
> CPU times: user 541 ms, sys: 3.96 ms, total: 545 ms
 | 
					 | 
				
			||||||
> Wall time: 541 ms
 | 
					 | 
				
			||||||
> </pre>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
But if the GIL-unlocking thread starts first, both threads run in parallel:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```python
 | 
					 | 
				
			||||||
%%time
 | 
					 | 
				
			||||||
t1 = Thread(target=numba_nogil, args=[N])
 | 
					 | 
				
			||||||
t2 = Thread(target=numba_gil, args=[N])
 | 
					 | 
				
			||||||
t1.start(); t2.start()
 | 
					 | 
				
			||||||
t1.join(); t2.join()
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
> <pre>
 | 
					 | 
				
			||||||
> CPU times: user 551 ms, sys: 7.77 ms, total: 559 ms
 | 
					 | 
				
			||||||
> Wall time: 279 ms
 | 
					 | 
				
			||||||
> </pre>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Just like Cython, starting the GIL-locked thread first leads to poor performance:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```python
 | 
					 | 
				
			||||||
%%time
 | 
					 | 
				
			||||||
t1 = Thread(target=numba_gil, args=[N])
 | 
					 | 
				
			||||||
t2 = Thread(target=numba_nogil, args=[N])
 | 
					 | 
				
			||||||
t1.start(); t2.start()
 | 
					 | 
				
			||||||
t1.join(); t2.join()
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
> <pre>
 | 
					 | 
				
			||||||
> CPU times: user 524 ms, sys: 0 ns, total: 524 ms
 | 
					 | 
				
			||||||
> Wall time: 522 ms
 | 
					 | 
				
			||||||
> </pre>
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Finally, unlike Cython, Numba will unlock the GIL if and only if it is currently acquired;
 | 
					 | 
				
			||||||
recursively calling `@jit(nogil=True)` functions is perfectly safe:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
```python
 | 
					 | 
				
			||||||
from numba import jit
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
@jit(nopython=True, nogil=True)
 | 
					 | 
				
			||||||
def numba_recurse(n: int) -> int:
 | 
					 | 
				
			||||||
    if n <= 0:
 | 
					 | 
				
			||||||
        return 0
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    return numba_recurse(n - 1)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
numba_recurse(2);
 | 
					 | 
				
			||||||
```
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
# Conclusion
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Before finishing, it's important to address pain points that will show up if these techniques are
 | 
					 | 
				
			||||||
used in a more realistic project:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
First, code running in a GIL-free context will likely also need non-trivial data structures;
 | 
					 | 
				
			||||||
GIL-free functions aren't useful if they're constantly interacting with Python objects whose access
 | 
					 | 
				
			||||||
requires the GIL. Cython provides
 | 
					 | 
				
			||||||
[extension types](http://docs.cython.org/en/latest/src/tutorial/cdef_classes.html) and Numba
 | 
					 | 
				
			||||||
provides a [`@jitclass`](https://numba.pydata.org/numba-doc/dev/user/jitclass.html) decorator to
 | 
					 | 
				
			||||||
address this need.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Second, building and distributing applications that make use of Cython/Numba can be complicated.
 | 
					 | 
				
			||||||
Cython packages require running the compiler, (potentially) linking/packaging external dependencies,
 | 
					 | 
				
			||||||
and distributing a binary wheel. Numba is generally simpler because the code being distributed is
 | 
					 | 
				
			||||||
pure Python, but can be tricky since errors aren't detected until runtime.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Finally, while unlocking the GIL is often a solution in search of a problem, both Cython and Numba
 | 
					 | 
				
			||||||
provide tools to directly manage the GIL when appropriate. This enables true parallelism (not just
 | 
					 | 
				
			||||||
[concurrency](https://stackoverflow.com/a/1050257)) that is impossible in vanilla Python.
 | 
					 | 
				
			||||||
@ -1,60 +0,0 @@
 | 
				
			|||||||
---
 | 
					 | 
				
			||||||
layout: post
 | 
					 | 
				
			||||||
title: "The webpack industrial complex"
 | 
					 | 
				
			||||||
description: "Reflections on a new project"
 | 
					 | 
				
			||||||
category:
 | 
					 | 
				
			||||||
tags: [webpack, react, vite]
 | 
					 | 
				
			||||||
---
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
This started because I wanted to build a synthesizer. Setting a goal of "digital DX7" was ambitious, but I needed something unrelated to the day job. Beyond that, working with audio seemed like a good challenge. I enjoy performance-focused code, and performance problems in audio are conspicuous. Building a web project was an obvious choice because of the web audio API documentation and independence from a large Digital Audio Workstation (DAW).
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The project was soon derailed trying to sort out technical issues unrelated to the original purpose. Finding a resolution was a frustrating journey, and it's still not clear whether those problems were my fault. As a result, I'm writing this to try making sense of it, as a case study/reference material, and to salvage something from the process.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Starting strong
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The sole starting requirement was to write everything in TypeScript. Not because of project scale, but because guardrails help with unfamiliar territory. Keeping that in mind, the first question was: how does one start a new project? All I actually need is "compile TypeScript, show it in a browser."
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Create React App (CRA) came to the rescue and the rest of that evening was a joy. My TypeScript/JavaScript skills were rusty, but the online documentation was helpful. I had never understood the appeal of JSX (why put a DOM in JavaScript?) until it made connecting an `onEvent` handler and a function easy.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Some quick dimensional analysis later and there was a sine wave oscillator playing A=440 through the speakers. I specifically remember thinking "modern browsers are magical."
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Continuing on
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Now comes the first mistake: I began to worry about "scale" before encountering an actual problem. Rather than rendering audio in the main thread, why not use audio worklets and render in a background thread instead?
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The first sign something was amiss came from the TypeScript compiler errors showing the audio worklet API [was missing](https://github.com/microsoft/TypeScript/issues/28308). After searching out Github issues and (unsuccessfully) tweaking the `.tsconfig` settings, I settled on installing a package and moving on.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
The next problem came from actually using the API. Worklets must load from separate "modules," but it wasn't clear how to guarantee the worklet code stayed separate from the application. I saw recommendations to use `new URL(<local path>, import.meta.url)` and it worked! Well, kind of:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||

 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
That file has the audio processor code, so why does it get served with `Content-Type: video/mp2t`?
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Floundering about
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Now comes the second mistake: even though I didn't understand the error, I ignored recommendations to [just use JavaScript](https://hackernoon.com/implementing-audioworklets-with-react-8a80a470474) and stuck by the original TypeScript requirement.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
I tried different project structures. Moving the worklet code to a new folder didn't help, nor did setting up a monorepo and placing it in a new package.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
I tried three different CRA tools - `react-app-rewired`, `craco`, `customize-react-app` - but got the same problem. Each has varying levels of compatibility with recent CRA versions, so it wasn't clear if I had the right solution but implemented it incorrectly. After attempting to eject the application and panicking after seeing the configuration, I abandoned that as well.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
I tried changing the webpack configuration: using [new](https://github.com/webpack/webpack/issues/11543#issuecomment-917673256) [loaders](https://github.com/popelenkow/worker-url), setting [asset rules](https://github.com/webpack/webpack/discussions/14093#discussioncomment-1257149), even [changing how webpack detects worker resources](https://github.com/webpack/webpack/issues/11543#issuecomment-826897590). In hindsight, entry points may have been the answer. But because CRA actively resists attempts to change its webpack configuration, and I couldn't find audio worklet examples in any other framework, I gave up.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
I tried so many application frameworks. Next.js looked like a good candidate, but added its own [bespoke webpack complexity](https://github.com/vercel/next.js/issues/24907) to the existing confusion. Astro had the best "getting started" experience, but I refuse to install an IDE-specific plugin. I first used Deno while exploring Lume, but it couldn't import the audio worklet types (maybe because of module compatibility?). Each framework was unique in its own way (shout-out to SvelteKit) but I couldn't figure out how to make them work.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
## Learning and reflecting
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
I ended up using Vite and vite-plugin-react-pages to handle both "build the app" and "bundle worklets," but the specific tool choice isn't important. Instead, the focus should be on lessons learned.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
For myself:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
- I'm obsessed with tooling, to the point it can derail the original goal. While it comes from a good place (for example: "types are awesome"), it can get in the way of more important work
 | 
					 | 
				
			||||||
- I tend to reach for online resources right after seeing a new problem. While finding help online is often faster, spending time understanding the problem would have been more productive than cycling through (often outdated) blog posts
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
For the tools:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
- Resource bundling is great and solves a genuine challenge. I've heard too many horror stories of developers writing modules by hand to believe this is unnecessary complexity
 | 
					 | 
				
			||||||
- Webpack is a build system and modern frameworks are deeply dependent on it (hence the "webpack industrial complex"). While this often saves users from unnecessary complexity, there's no path forward if something breaks
 | 
					 | 
				
			||||||
- There's little ability to mix and match tools across frameworks. Next.js and Gatsby let users extend webpack, but because each framework adds its own modules, changes aren't portable. After spending a week looking at webpack, I had an example running with parcel in thirty minutes, but couldn't integrate it
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
In the end, learning new systems is fun, but a focus on tools that "just work" can leave users out in the cold if they break down.
 | 
					 | 
				
			||||||
@ -1,15 +0,0 @@
 | 
				
			|||||||
@font-face {
 | 
					 | 
				
			||||||
    font-family: 'JetBrains Mono';
 | 
					 | 
				
			||||||
    src: url('/assets/font/JetBrainsMono-Regular.woff2') format('woff2'),
 | 
					 | 
				
			||||||
         url('/assets/font/JetBrainsMono-Regular.woff') format('woff');
 | 
					 | 
				
			||||||
    font-weight: normal;
 | 
					 | 
				
			||||||
    font-style: normal;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
@font-face {
 | 
					 | 
				
			||||||
    font-family: 'Lato';
 | 
					 | 
				
			||||||
    src: url('/assets/font/lato-regular-webfont.woff2') format('woff2'),
 | 
					 | 
				
			||||||
         url('/assets/font/lato-regular-webfont.woff') format('woff');
 | 
					 | 
				
			||||||
    font-weight: normal;
 | 
					 | 
				
			||||||
    font-style: normal;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
@ -1,119 +0,0 @@
 | 
				
			|||||||
---
 | 
					 | 
				
			||||||
---
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// Import the theme rules
 | 
					 | 
				
			||||||
@import "theme";
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
body {
 | 
					 | 
				
			||||||
    max-width: 100%;
 | 
					 | 
				
			||||||
    overflow-x: hidden;
 | 
					 | 
				
			||||||
    font-family: 'Lato', sans-serif;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
.navbar {
 | 
					 | 
				
			||||||
    color: $gray;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
.separator {
 | 
					 | 
				
			||||||
    margin-right: .45rem;
 | 
					 | 
				
			||||||
    margin-left: .25rem;
 | 
					 | 
				
			||||||
    color: #000;
 | 
					 | 
				
			||||||
    &:after {
 | 
					 | 
				
			||||||
        content: '\00a0/';
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
header {
 | 
					 | 
				
			||||||
    padding-top: 80px;
 | 
					 | 
				
			||||||
    padding-bottom: 0;
 | 
					 | 
				
			||||||
};
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
header h1,h2 {
 | 
					 | 
				
			||||||
    color: #000;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
.post-description {
 | 
					 | 
				
			||||||
    color: #555;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
.post-container a {
 | 
					 | 
				
			||||||
    color: #555;
 | 
					 | 
				
			||||||
    border-bottom-color: $gray;
 | 
					 | 
				
			||||||
    border-bottom-style: dotted;
 | 
					 | 
				
			||||||
    border-bottom-width: 1px;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    position: relative;
 | 
					 | 
				
			||||||
    display: inline-block;
 | 
					 | 
				
			||||||
    padding: 1px 1px;
 | 
					 | 
				
			||||||
    transition: color ease 0.3s;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    &::after {
 | 
					 | 
				
			||||||
      content: '';
 | 
					 | 
				
			||||||
      position: absolute;
 | 
					 | 
				
			||||||
      z-index: -1;
 | 
					 | 
				
			||||||
      width: 100%;
 | 
					 | 
				
			||||||
      height: 0%;
 | 
					 | 
				
			||||||
      left: 0;
 | 
					 | 
				
			||||||
      bottom: 0;
 | 
					 | 
				
			||||||
      background-color: $gray;
 | 
					 | 
				
			||||||
      transition: all ease 0.3s;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    &:hover {
 | 
					 | 
				
			||||||
      color: #fff;
 | 
					 | 
				
			||||||
      border-bottom-style: solid;
 | 
					 | 
				
			||||||
      &::after {
 | 
					 | 
				
			||||||
        height: 100%;
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
body pre {
 | 
					 | 
				
			||||||
    font-size: 15px;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
pre.highlight, code {
 | 
					 | 
				
			||||||
    font-family: 'JetBrains Mono', monospace;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
div.highlighter-rouge {
 | 
					 | 
				
			||||||
    // Default theme uses `width: 100vw`, which while cool, does cause the page
 | 
					 | 
				
			||||||
    // to exceed screen width and trigger horizontal scrolling. No bueno.
 | 
					 | 
				
			||||||
    width: 99vw;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
.post-date {
 | 
					 | 
				
			||||||
    // On the front page, make sure titles don't force wrapping the date box content
 | 
					 | 
				
			||||||
    text-align: right;
 | 
					 | 
				
			||||||
    white-space: nowrap;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
blockquote {
 | 
					 | 
				
			||||||
    color: #555;
 | 
					 | 
				
			||||||
    right: 100px;
 | 
					 | 
				
			||||||
    margin-left: 0;
 | 
					 | 
				
			||||||
    padding-left: 1.8rem;
 | 
					 | 
				
			||||||
    border-left: 5px solid $gray;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
.post-nav {
 | 
					 | 
				
			||||||
    /* Insert your custom styling here. Example:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
       font-size: 14px;
 | 
					 | 
				
			||||||
    */
 | 
					 | 
				
			||||||
    display: flex;
 | 
					 | 
				
			||||||
    margin-top: 1em;
 | 
					 | 
				
			||||||
    margin-bottom: 1em;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
.post-nav div {
 | 
					 | 
				
			||||||
    /* flex-grow, flex-shrink, flex-basis */
 | 
					 | 
				
			||||||
    flex: 1 1 0;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
.post-nav-next {
 | 
					 | 
				
			||||||
    text-align: right;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
th, td {
 | 
					 | 
				
			||||||
    border-bottom: 1px solid $gray;
 | 
					 | 
				
			||||||
    padding: 0.75em;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
| 
		 Before Width: | Height: | Size: 840 KiB  | 
| 
		 Before Width: | Height: | Size: 926 KiB  | 
| 
		 Before Width: | Height: | Size: 165 KiB  | 
| 
		 Before Width: | Height: | Size: 50 KiB  | 
| 
		 Before Width: | Height: | Size: 48 KiB  | 
| 
		 Before Width: | Height: | Size: 71 KiB  | 
| 
		 Before Width: | Height: | Size: 68 KiB  | 
| 
		 Before Width: | Height: | Size: 23 KiB  | 
| 
		 Before Width: | Height: | Size: 24 KiB  | 
| 
		 Before Width: | Height: | Size: 124 KiB  | 
| 
		 Before Width: | Height: | Size: 145 KiB  | 
| 
		 Before Width: | Height: | Size: 135 KiB  | 
| 
		 Before Width: | Height: | Size: 138 KiB  | 
| 
		 Before Width: | Height: | Size: 98 KiB  | 
| 
		 Before Width: | Height: | Size: 134 KiB  | 
| 
		 Before Width: | Height: | Size: 426 KiB  | 
| 
		 Before Width: | Height: | Size: 304 KiB  | 
| 
		 Before Width: | Height: | Size: 344 KiB  | 
| 
		 Before Width: | Height: | Size: 296 KiB  | 
| 
		 Before Width: | Height: | Size: 377 KiB  | 
| 
		 Before Width: | Height: | Size: 169 KiB  | 
| 
		 Before Width: | Height: | Size: 140 KiB  | 
| 
		 Before Width: | Height: | Size: 194 KiB  | 
| 
		 Before Width: | Height: | Size: 48 KiB  | 
							
								
								
									
										6
									
								
								index.md
									
									
									
									
									
								
							
							
						
						@ -1,6 +0,0 @@
 | 
				
			|||||||
---
 | 
					 | 
				
			||||||
# Feel free to add content and custom Front Matter to this file.
 | 
					 | 
				
			||||||
# To modify the layout, see https://jekyllrb.com/docs/themes/#overriding-theme-defaults
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
layout: home
 | 
					 | 
				
			||||||
---
 | 
					 | 
				
			||||||
							
								
								
									
										3163
									
								
								package-lock.json
									
									
									
										generated
									
									
									
										Normal file
									
								
							
							
						
						
							
								
								
									
										32
									
								
								package.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						@ -0,0 +1,32 @@
 | 
				
			|||||||
 | 
					{
 | 
				
			||||||
 | 
					  "name": "speice.io",
 | 
				
			||||||
 | 
					  "private": true,
 | 
				
			||||||
 | 
					  "version": "0.0.0",
 | 
				
			||||||
 | 
					  "type": "module",
 | 
				
			||||||
 | 
					  "scripts": {
 | 
				
			||||||
 | 
					    "dev": "vite",
 | 
				
			||||||
 | 
					    "build": "tsc && vite build",
 | 
				
			||||||
 | 
					    "preview": "vite preview",
 | 
				
			||||||
 | 
					    "prepare": "husky install"
 | 
				
			||||||
 | 
					  },
 | 
				
			||||||
 | 
					  "dependencies": {
 | 
				
			||||||
 | 
					    "react": "^18.2.0",
 | 
				
			||||||
 | 
					    "react-dom": "^18.2.0"
 | 
				
			||||||
 | 
					  },
 | 
				
			||||||
 | 
					  "devDependencies": {
 | 
				
			||||||
 | 
					    "@bspeice/vite-plugin-blog": "^1.1.0",
 | 
				
			||||||
 | 
					    "@mdx-js/rollup": "^2.3.0",
 | 
				
			||||||
 | 
					    "@types/react": "^18.0.28",
 | 
				
			||||||
 | 
					    "@types/react-dom": "^18.0.11",
 | 
				
			||||||
 | 
					    "@vitejs/plugin-react-swc": "^3.0.0",
 | 
				
			||||||
 | 
					    "husky": "^8.0.3",
 | 
				
			||||||
 | 
					    "pretty-quick": "^3.1.3",
 | 
				
			||||||
 | 
					    "typescript": "^4.9.3",
 | 
				
			||||||
 | 
					    "vite": "^4.2.0"
 | 
				
			||||||
 | 
					  },
 | 
				
			||||||
 | 
					  "husky": {
 | 
				
			||||||
 | 
					    "hooks": {
 | 
				
			||||||
 | 
					      "pre-commit": "pretty-quick --staged"
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										9
									
								
								pages/index.tsx
									
									
									
									
									
										Normal file
									
								
							
							
						
						@ -0,0 +1,9 @@
 | 
				
			|||||||
 | 
					import React from "react";
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export default function Page() {
 | 
				
			||||||
 | 
					    return (
 | 
				
			||||||
 | 
					        <>
 | 
				
			||||||
 | 
					            <p>Is this thing on?</p>
 | 
				
			||||||
 | 
					        </>
 | 
				
			||||||
 | 
					    );
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										19
									
								
								tsconfig.json
									
									
									
									
									
										Normal file
									
								
							
							
						
						@ -0,0 +1,19 @@
 | 
				
			|||||||
 | 
					{
 | 
				
			||||||
 | 
					  "compilerOptions": {
 | 
				
			||||||
 | 
					    "target": "ESNext",
 | 
				
			||||||
 | 
					    "useDefineForClassFields": true,
 | 
				
			||||||
 | 
					    "lib": ["DOM", "DOM.Iterable", "ESNext"],
 | 
				
			||||||
 | 
					    "allowJs": false,
 | 
				
			||||||
 | 
					    "skipLibCheck": true,
 | 
				
			||||||
 | 
					    "esModuleInterop": false,
 | 
				
			||||||
 | 
					    "allowSyntheticDefaultImports": true,
 | 
				
			||||||
 | 
					    "strict": true,
 | 
				
			||||||
 | 
					    "forceConsistentCasingInFileNames": true,
 | 
				
			||||||
 | 
					    "module": "ESNext",
 | 
				
			||||||
 | 
					    "moduleResolution": "Node",
 | 
				
			||||||
 | 
					    "resolveJsonModule": true,
 | 
				
			||||||
 | 
					    "isolatedModules": true,
 | 
				
			||||||
 | 
					    "noEmit": true,
 | 
				
			||||||
 | 
					    "jsx": "react-jsx"
 | 
				
			||||||
 | 
					  },
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
							
								
								
									
										14
									
								
								vite.config.ts
									
									
									
									
									
										Normal file
									
								
							
							
						
						@ -0,0 +1,14 @@
 | 
				
			|||||||
 | 
					import { defineConfig } from 'vite'
 | 
				
			||||||
 | 
					import blog from "@bspeice/vite-plugin-blog"
 | 
				
			||||||
 | 
					import mdx from "@mdx-js/rollup"
 | 
				
			||||||
 | 
					import react from '@vitejs/plugin-react-swc'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					export default defineConfig({
 | 
				
			||||||
 | 
					  plugins: [
 | 
				
			||||||
 | 
					    blog({
 | 
				
			||||||
 | 
					      "/": "/pages/index"
 | 
				
			||||||
 | 
					    }),
 | 
				
			||||||
 | 
					    mdx(),
 | 
				
			||||||
 | 
					    react()
 | 
				
			||||||
 | 
					  ],
 | 
				
			||||||
 | 
					})
 | 
				
			||||||