Deploy website - based on 6dcbc1a72c
@ -1,6 +0,0 @@
|
|||||||
FROM mcr.microsoft.com/vscode/devcontainers/ruby:0-2.7-bullseye
|
|
||||||
|
|
||||||
RUN wget https://github.com/errata-ai/vale/releases/download/v2.21.0/vale_2.21.0_Linux_64-bit.tar.gz -O /tmp/vale.tar.gz \
|
|
||||||
&& cd /usr/local/bin \
|
|
||||||
&& tar xf /tmp/vale.tar.gz \
|
|
||||||
&& rm /tmp/vale.tar.gz
|
|
@ -1,13 +0,0 @@
|
|||||||
// For format details, see https://aka.ms/devcontainer.json. For config options, see the README at:
|
|
||||||
// https://github.com/microsoft/vscode-dev-containers/tree/v0.245.0/containers/ruby
|
|
||||||
{
|
|
||||||
"name": "Ruby",
|
|
||||||
"build": {
|
|
||||||
"dockerfile": "Dockerfile"
|
|
||||||
},
|
|
||||||
"runArgs": ["--userns=keep-id"],
|
|
||||||
|
|
||||||
"remoteUser": "vscode",
|
|
||||||
"containerUser": "vscode",
|
|
||||||
"workspaceMount": "source=${localWorkspaceFolder},target=/workspaces/${localWorkspaceFolderBasename},type=bind,Z"
|
|
||||||
}
|
|
8
.gitignore
vendored
@ -1,8 +0,0 @@
|
|||||||
_site/
|
|
||||||
.swp
|
|
||||||
.sass-cache/
|
|
||||||
.jekyll-metadata
|
|
||||||
.bundle/
|
|
||||||
vendor/
|
|
||||||
.styles/
|
|
||||||
.vscode/
|
|
@ -1,7 +0,0 @@
|
|||||||
StylesPath = .styles
|
|
||||||
MinAlertLevel = suggestion
|
|
||||||
Packages = Microsoft, write-good
|
|
||||||
|
|
||||||
[*]
|
|
||||||
BasedOnStyles = Vale, Microsoft, write-good
|
|
||||||
write-good.E-Prime = NO
|
|
32
2011/11/webpack-industrial-complex/index.html
Normal file
95
2015/11/autocallable/index.html
Normal file
47
2015/11/welcome/index.html
Normal file
40
2015/12/testing-cramer/index.html
Normal file
41
2016/01/cloudy-in-seattle/index.html
Normal file
30
2016/01/complaining-about-the-weather/index.html
Normal file
75
2016/02/guaranteed-money-maker/index.html
Normal file
48
2016/03/predicting-santander-customer-happiness/index.html
Normal file
59
2016/03/tweet-like-me/index.html
Normal file
83
2016/04/tick-tock/index.html
Normal file
180
2016/05/the-unfair-casino/index.html
Normal file
74
2016/06/event-studies-and-earnings-releases/index.html
Normal file
187
2016/10/rustic-repodcasting/index.html
Normal file
66
2016/11/pca-audio-compression/index.html
Normal file
88
2018/01/captains-cookbook-part-1/index.html
Normal file
75
2018/01/captains-cookbook-part-2/index.html
Normal file
9
2018/05/hello/index.html
Normal file
142
2018/06/dateutil-parser-to-rust/index.html
Normal file
184
2018/09/isomorphic-apps/index.html
Normal file
99
2018/09/primitives-in-rust-are-weird/index.html
Normal file
92
2018/10/case-study-optimization/index.html
Normal file
77
2018/12/allocation-safety/index.html
Normal file
19
2018/12/what-small-business-really-means/index.html
Normal file
46
2019/02/08/compiler-optimizations/index.html
Normal file
122
2019/02/a-heaping-helping/index.html
Normal file
210
2019/02/stacking-up/index.html
Normal file
26
2019/02/summary/index.html
Normal file
133
2019/02/the-whole-world/index.html
Normal file
83
2019/02/understanding-allocations-in-rust/index.html
Normal file
29
2019/05/making-bread/index.html
Normal file
267
2019/06/high-performance-systems/index.html
Normal file
151
2019/09/binary-format-shootout/index.html
Normal file
151
2019/12/release-the-gil/index.html
Normal file
29
Gemfile
@ -1,29 +0,0 @@
|
|||||||
source "https://rubygems.org"
|
|
||||||
|
|
||||||
# Hello! This is where you manage which Jekyll version is used to run.
|
|
||||||
# When you want to use a different version, change it below, save the
|
|
||||||
# file and run `bundle install`. Run Jekyll with `bundle exec`, like so:
|
|
||||||
#
|
|
||||||
# bundle exec jekyll serve
|
|
||||||
#
|
|
||||||
# This will help ensure the proper Jekyll version is running.
|
|
||||||
# Happy Jekylling!
|
|
||||||
gem "jekyll", "~> 3.8.3"
|
|
||||||
|
|
||||||
gem "texture"
|
|
||||||
|
|
||||||
# If you want to use GitHub Pages, remove the "gem "jekyll"" above and
|
|
||||||
# uncomment the line below. To upgrade, run `bundle update github-pages`.
|
|
||||||
# gem "github-pages", group: :jekyll_plugins
|
|
||||||
|
|
||||||
# If you have any plugins, put them here!
|
|
||||||
group :jekyll_plugins do
|
|
||||||
gem "jekyll-feed", "~> 0.6"
|
|
||||||
gem "jekyll-remote-theme"
|
|
||||||
end
|
|
||||||
|
|
||||||
# Windows does not include zoneinfo files, so bundle the tzinfo-data gem
|
|
||||||
gem "tzinfo-data", platforms: [:mingw, :mswin, :x64_mingw, :jruby]
|
|
||||||
|
|
||||||
# Performance-booster for watching directories on Windows
|
|
||||||
gem "wdm", "~> 0.1.0" if Gem.win_platform?
|
|
78
Gemfile.lock
@ -1,78 +0,0 @@
|
|||||||
GEM
|
|
||||||
remote: https://rubygems.org/
|
|
||||||
specs:
|
|
||||||
addressable (2.7.0)
|
|
||||||
public_suffix (>= 2.0.2, < 5.0)
|
|
||||||
colorator (1.1.0)
|
|
||||||
concurrent-ruby (1.1.6)
|
|
||||||
em-websocket (0.5.1)
|
|
||||||
eventmachine (>= 0.12.9)
|
|
||||||
http_parser.rb (~> 0.6.0)
|
|
||||||
eventmachine (1.2.7)
|
|
||||||
ffi (1.12.2)
|
|
||||||
forwardable-extended (2.6.0)
|
|
||||||
http_parser.rb (0.6.0)
|
|
||||||
i18n (0.9.5)
|
|
||||||
concurrent-ruby (~> 1.0)
|
|
||||||
jekyll (3.8.6)
|
|
||||||
addressable (~> 2.4)
|
|
||||||
colorator (~> 1.0)
|
|
||||||
em-websocket (~> 0.5)
|
|
||||||
i18n (~> 0.7)
|
|
||||||
jekyll-sass-converter (~> 1.0)
|
|
||||||
jekyll-watch (~> 2.0)
|
|
||||||
kramdown (~> 1.14)
|
|
||||||
liquid (~> 4.0)
|
|
||||||
mercenary (~> 0.3.3)
|
|
||||||
pathutil (~> 0.9)
|
|
||||||
rouge (>= 1.7, < 4)
|
|
||||||
safe_yaml (~> 1.0)
|
|
||||||
jekyll-feed (0.13.0)
|
|
||||||
jekyll (>= 3.7, < 5.0)
|
|
||||||
jekyll-remote-theme (0.4.2)
|
|
||||||
addressable (~> 2.0)
|
|
||||||
jekyll (>= 3.5, < 5.0)
|
|
||||||
jekyll-sass-converter (>= 1.0, <= 3.0.0, != 2.0.0)
|
|
||||||
rubyzip (>= 1.3.0, < 3.0)
|
|
||||||
jekyll-sass-converter (1.5.2)
|
|
||||||
sass (~> 3.4)
|
|
||||||
jekyll-seo-tag (2.6.1)
|
|
||||||
jekyll (>= 3.3, < 5.0)
|
|
||||||
jekyll-watch (2.2.1)
|
|
||||||
listen (~> 3.0)
|
|
||||||
kramdown (1.17.0)
|
|
||||||
liquid (4.0.3)
|
|
||||||
listen (3.2.1)
|
|
||||||
rb-fsevent (~> 0.10, >= 0.10.3)
|
|
||||||
rb-inotify (~> 0.9, >= 0.9.10)
|
|
||||||
mercenary (0.3.6)
|
|
||||||
pathutil (0.16.2)
|
|
||||||
forwardable-extended (~> 2.6)
|
|
||||||
public_suffix (4.0.4)
|
|
||||||
rb-fsevent (0.10.3)
|
|
||||||
rb-inotify (0.10.1)
|
|
||||||
ffi (~> 1.0)
|
|
||||||
rouge (3.17.0)
|
|
||||||
rubyzip (2.3.0)
|
|
||||||
safe_yaml (1.0.5)
|
|
||||||
sass (3.7.4)
|
|
||||||
sass-listen (~> 4.0.0)
|
|
||||||
sass-listen (4.0.0)
|
|
||||||
rb-fsevent (~> 0.9, >= 0.9.4)
|
|
||||||
rb-inotify (~> 0.9, >= 0.9.7)
|
|
||||||
texture (0.3)
|
|
||||||
jekyll (~> 3.7)
|
|
||||||
jekyll-seo-tag (~> 2.1)
|
|
||||||
|
|
||||||
PLATFORMS
|
|
||||||
ruby
|
|
||||||
|
|
||||||
DEPENDENCIES
|
|
||||||
jekyll (~> 3.8.3)
|
|
||||||
jekyll-feed (~> 0.6)
|
|
||||||
jekyll-remote-theme
|
|
||||||
texture
|
|
||||||
tzinfo-data
|
|
||||||
|
|
||||||
BUNDLED WITH
|
|
||||||
2.1.4
|
|
44
_config.yml
@ -1,44 +0,0 @@
|
|||||||
# Welcome to Jekyll!
|
|
||||||
#
|
|
||||||
# This config file is meant for settings that affect your whole blog, values
|
|
||||||
# which you are expected to set up once and rarely edit after that. If you find
|
|
||||||
# yourself editing this file very often, consider using Jekyll's data files
|
|
||||||
# feature for the data you need to update frequently.
|
|
||||||
#
|
|
||||||
# For technical reasons, this file is *NOT* reloaded automatically when you use
|
|
||||||
# 'bundle exec jekyll serve'. If you change this file, please restart the server process.
|
|
||||||
|
|
||||||
# Site settings
|
|
||||||
# These are used to personalize your new site. If you look in the HTML files,
|
|
||||||
# you will see them accessed via {{ site.title }}, {{ site.email }}, and so on.
|
|
||||||
# You can create any custom variable you would like, and they will be accessible
|
|
||||||
# in the templates via {{ site.myvariable }}.
|
|
||||||
title: speice.io
|
|
||||||
description: The Old Speice Guy
|
|
||||||
email: bradlee@speice.io
|
|
||||||
baseurl: "" # the subpath of your site, e.g. /blog
|
|
||||||
url: "https://speice.io/" # the base hostname & protocol for your site, e.g. http://example.com
|
|
||||||
github_username: bspeice
|
|
||||||
|
|
||||||
# Build settings
|
|
||||||
markdown: kramdown
|
|
||||||
# theme: texture
|
|
||||||
remote_theme: thelehhman/texture
|
|
||||||
plugins:
|
|
||||||
- jekyll-feed
|
|
||||||
- jekyll-remote-theme
|
|
||||||
|
|
||||||
include: [_pages]
|
|
||||||
permalink: /:year/:month/:title.html
|
|
||||||
|
|
||||||
# Exclude from processing.
|
|
||||||
# The following items will not be processed, by default. Create a custom list
|
|
||||||
# to override the default setting.
|
|
||||||
# exclude:
|
|
||||||
# - Gemfile
|
|
||||||
# - Gemfile.lock
|
|
||||||
# - node_modules
|
|
||||||
# - vendor/bundle/
|
|
||||||
# - vendor/cache/
|
|
||||||
# - vendor/gems/
|
|
||||||
# - vendor/ruby/
|
|
@ -1,23 +0,0 @@
|
|||||||
{% if page.layout == 'post' %}
|
|
||||||
{% comment %}Thanks to https://www.bytedude.com/jekyll-previous-and-next-posts/{% endcomment %}
|
|
||||||
<div class="container">
|
|
||||||
<hr>
|
|
||||||
<div class="post-nav">
|
|
||||||
<div>
|
|
||||||
{% if page.previous.url %}
|
|
||||||
<a href="{{page.previous.url}}">« {{page.previous.title}}</a>
|
|
||||||
{% endif %}
|
|
||||||
</div>
|
|
||||||
<div class="post-nav-next">
|
|
||||||
{% if page.next.url %}
|
|
||||||
<a href="{{page.next.url}}">{{page.next.title}} »</a>
|
|
||||||
{% endif %}
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
|
|
||||||
<script type="text/javascript"
|
|
||||||
src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_HTMLorMML">
|
|
||||||
</script>
|
|
||||||
{% endif %}
|
|
@ -1,7 +0,0 @@
|
|||||||
<meta charset="UTF-8">
|
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
||||||
<meta http-equiv="X-UA-Compatible" content="ie=edge">
|
|
||||||
<link rel="stylesheet" href="{{ "/assets/css/style.css" | relative_url }}">
|
|
||||||
<link rel="stylesheet" href="{{ "/assets/css/fonts.css" | prepend: site.baseurl }}">
|
|
||||||
<title>{{ page.title | default: site.title }}</title>
|
|
||||||
{% seo %}
|
|
@ -1,7 +0,0 @@
|
|||||||
<div class="navbar">
|
|
||||||
<a href="{{ "/" | prepend: site.baseurl }}">Home</a>
|
|
||||||
<span class="separator"></span>
|
|
||||||
<a href="{{ "/about/" | prepend: site.baseurl }}">About</a>
|
|
||||||
<span class="separator"></span>
|
|
||||||
<a href="{{ "/feed.xml" | prepend: site.baseurl }}">RSS</a>
|
|
||||||
</div>
|
|
@ -1,15 +0,0 @@
|
|||||||
<div class="container">
|
|
||||||
<h2>{{ site.title }}</h1>
|
|
||||||
<h1>{{ site.description }}</h2>
|
|
||||||
<ul class="social">
|
|
||||||
{%- if site.texture.social_links.github -%}
|
|
||||||
<a href="https://github.com/{{ site.texture.social_links.github }}"><li><i class="icon-github-circled"></i></li></a>
|
|
||||||
{%- endif -%}
|
|
||||||
{%- if site.texture.social_links.linkedIn -%}
|
|
||||||
<a href="https://linkedin.com/{{ site.texture.social_links.linkedIn }}"><li><i class="icon-linkedin-squared"></i></li></a>
|
|
||||||
{%- endif -%}
|
|
||||||
{%- if site.texture.social_links.twitter -%}
|
|
||||||
<a href="https://twitter.com/{{ site.texture.social_links.twitter }}"><li><i class="icon-twitter-squared"></i></li></a>
|
|
||||||
{%- endif -%}
|
|
||||||
</ul>
|
|
||||||
</div>
|
|
@ -1,13 +0,0 @@
|
|||||||
---
|
|
||||||
layout: page
|
|
||||||
title: About
|
|
||||||
permalink: /about/
|
|
||||||
---
|
|
||||||
|
|
||||||
Developer currently living in New York City.
|
|
||||||
|
|
||||||
Best ways to get in contact:
|
|
||||||
|
|
||||||
- Email: [bradlee@speice.io](mailto:bradlee@speice.io)
|
|
||||||
- Github: [bspeice](https://github.com/bspeice)
|
|
||||||
- LinkedIn: [bradleespeice](https://www.linkedin.com/in/bradleespeice/)
|
|
@ -1,38 +0,0 @@
|
|||||||
---
|
|
||||||
layout: post
|
|
||||||
title: "Hello!"
|
|
||||||
description: ""
|
|
||||||
category:
|
|
||||||
tags: []
|
|
||||||
---
|
|
||||||
|
|
||||||
I'll do what I can to keep this short, there's plenty of other things we both should be doing right
|
|
||||||
now.
|
|
||||||
|
|
||||||
If you're here for the bread pics, and to marvel in some other culinary side projects, I've got you
|
|
||||||
covered:
|
|
||||||
|
|
||||||
![Saturday Bread]({{ "/assets/images/2018-05-28-bread.jpg" | absolute_url }})
|
|
||||||
|
|
||||||
And no, I'm not posting pictures of earlier attempts that ended up turning into rocks in the oven.
|
|
||||||
|
|
||||||
Okay, just one:
|
|
||||||
|
|
||||||
![Bread as rock]({{ "/assets/images/2018-05-28-rocks.jpg" | absolute_url }})
|
|
||||||
|
|
||||||
If you're here for keeping up with the man Bradlee Speice, got plenty of that too. Plus some
|
|
||||||
up-coming super-nerdy posts about how I'm changing the world.
|
|
||||||
|
|
||||||
And if you're not here for those things: don't have a lot for you, sorry. But you're welcome to let
|
|
||||||
me know what needs to change.
|
|
||||||
|
|
||||||
I'm looking forward to making this a place to talk about what's going on in life, I hope you'll
|
|
||||||
stick it out with me. The best way to follow what's going on is on my [About](/about/) page, but if
|
|
||||||
you want the joy of clicking links, here's a few good ones:
|
|
||||||
|
|
||||||
- Email (people still use this?): [bradlee@speice.io](mailto:bradlee@speice.io)
|
|
||||||
- Mastodon (nerd Twitter): [@bradlee](https://mastodon.social/@bradlee)
|
|
||||||
- Chat (RiotIM): [@bspeice:matrix.com](https://matrix.to/#/@bspeice:matrix.com)
|
|
||||||
- The comments section (not for people with sanity intact): ↓↓↓
|
|
||||||
|
|
||||||
Thanks, and keep it amazing.
|
|
@ -1,177 +0,0 @@
|
|||||||
---
|
|
||||||
layout: post
|
|
||||||
title: "What I Learned: Porting Dateutil Parser to Rust"
|
|
||||||
description: ""
|
|
||||||
category:
|
|
||||||
tags: [dtparse, rust]
|
|
||||||
---
|
|
||||||
|
|
||||||
Hi. I'm Bradlee.
|
|
||||||
|
|
||||||
I've mostly been a lurker in Rust for a while, making a couple small contributions here and there.
|
|
||||||
So launching [dtparse](https://github.com/bspeice/dtparse) feels like nice step towards becoming a
|
|
||||||
functioning member of society. But not too much, because then you know people start asking you to
|
|
||||||
pay bills, and ain't nobody got time for that.
|
|
||||||
|
|
||||||
But I built dtparse, and you can read about my thoughts on the process. Or don't. I won't tell you
|
|
||||||
what to do with your life (but you should totally keep reading).
|
|
||||||
|
|
||||||
# Slow down, what?
|
|
||||||
|
|
||||||
OK, fine, I guess I should start with _why_ someone would do this.
|
|
||||||
|
|
||||||
[Dateutil](https://github.com/dateutil/dateutil) is a Python library for handling dates. The
|
|
||||||
standard library support for time in Python is kinda dope, but there are a lot of extras that go
|
|
||||||
into making it useful beyond just the [datetime](https://docs.python.org/3.6/library/datetime.html)
|
|
||||||
module. `dateutil.parser` specifically is code to take all the super-weird time formats people come
|
|
||||||
up with and turn them into something actually useful.
|
|
||||||
|
|
||||||
Date/time parsing, it turns out, is just like everything else involving
|
|
||||||
[computers](https://infiniteundo.com/post/25326999628/falsehoods-programmers-believe-about-time) and
|
|
||||||
[time](https://infiniteundo.com/post/25509354022/more-falsehoods-programmers-believe-about-time): it
|
|
||||||
feels like it shouldn't be that difficult to do, until you try to do it, and you realize that people
|
|
||||||
suck and this is why
|
|
||||||
[we can't we have nice things](https://zachholman.com/talk/utc-is-enough-for-everyone-right). But
|
|
||||||
alas, we'll try and make contemporary art out of the rubble and give it a pretentious name like
|
|
||||||
_Time_.
|
|
||||||
|
|
||||||
![A gravel mound](/assets/images/2018-06-25-gravel-mound.jpg)
|
|
||||||
|
|
||||||
> [Time](https://www.goodfreephotos.com/united-states/montana/elkhorn/remains-of-the-mining-operation-elkhorn.jpg.php)
|
|
||||||
|
|
||||||
What makes `dateutil.parser` great is that there's single function with a single argument that
|
|
||||||
drives what programmers interact with:
|
|
||||||
[`parse(timestr)`](https://github.com/dateutil/dateutil/blob/6dde5d6298cfb81a4c594a38439462799ed2aef2/dateutil/parser/_parser.py#L1258).
|
|
||||||
It takes in the time as a string, and gives you back a reasonable "look, this is the best anyone can
|
|
||||||
possibly do to make sense of your input" value. It doesn't expect much of you.
|
|
||||||
|
|
||||||
[And now it's in Rust.](https://github.com/bspeice/dtparse/blob/7d565d3a78876dbebd9711c9720364fe9eba7915/src/lib.rs#L1332)
|
|
||||||
|
|
||||||
# Lost in Translation
|
|
||||||
|
|
||||||
Having worked at a bulge-bracket bank watching Java programmers try to be Python programmers, I'm
|
|
||||||
admittedly hesitant to publish Python code that's trying to be Rust. Interestingly, Rust code can
|
|
||||||
actually do a great job of mimicking Python. It's certainly not idiomatic Rust, but I've had better
|
|
||||||
experiences than
|
|
||||||
[this guy](https://webcache.googleusercontent.com/search?q=cache:wkYMpktJtnUJ:https://jackstouffer.com/blog/porting_dateutil.html+&cd=3&hl=en&ct=clnk&gl=us)
|
|
||||||
who attempted the same thing for D. These are the actual take-aways:
|
|
||||||
|
|
||||||
When transcribing code, **stay as close to the original library as possible**. I'm talking about
|
|
||||||
using the same variable names, same access patterns, the whole shebang. It's way too easy to make a
|
|
||||||
couple of typos, and all of a sudden your code blows up in new and exciting ways. Having a reference
|
|
||||||
manual for verbatim what your code should be means that you don't spend that long debugging
|
|
||||||
complicated logic, you're more looking for typos.
|
|
||||||
|
|
||||||
Also, **don't use nice Rust things like enums**. While
|
|
||||||
[one time it worked out OK for me](https://github.com/bspeice/dtparse/blob/7d565d3a78876dbebd9711c9720364fe9eba7915/src/lib.rs#L88-L94),
|
|
||||||
I also managed to shoot myself in the foot a couple times because `dateutil` stores AM/PM as a
|
|
||||||
boolean and I mixed up which was true, and which was false (side note: AM is false, PM is true). In
|
|
||||||
general, writing nice code _should not be a first-pass priority_ when you're just trying to recreate
|
|
||||||
the same functionality.
|
|
||||||
|
|
||||||
**Exceptions are a pain.** Make peace with it. Python code is just allowed to skip stack frames. So
|
|
||||||
when a co-worker told me "Rust is getting try-catch syntax" I properly freaked out. Turns out
|
|
||||||
[he's not quite right](https://github.com/rust-lang/rfcs/pull/243), and I'm OK with that. And while
|
|
||||||
`dateutil` is pretty well-behaved about not skipping multiple stack frames,
|
|
||||||
[130-line try-catch blocks](https://github.com/dateutil/dateutil/blob/16561fc99361979e88cccbd135393b06b1af7e90/dateutil/parser/_parser.py#L730-L865)
|
|
||||||
take a while to verify.
|
|
||||||
|
|
||||||
As another Python quirk, **be very careful about
|
|
||||||
[long nested if-elif-else blocks](https://github.com/dateutil/dateutil/blob/16561fc99361979e88cccbd135393b06b1af7e90/dateutil/parser/_parser.py#L494-L568)**.
|
|
||||||
I used to think that Python's whitespace was just there to get you to format your code correctly. I
|
|
||||||
think that no longer. It's way too easy to close a block too early and have incredibly weird issues
|
|
||||||
in the logic. Make sure you use an editor that displays indentation levels so you can keep things
|
|
||||||
straight.
|
|
||||||
|
|
||||||
**Rust macros are not free.** I originally had the
|
|
||||||
[main test body](https://github.com/bspeice/dtparse/blob/b0e737f088eca8e83ab4244c6621a2797d247697/tests/compat.rs#L63-L217)
|
|
||||||
wrapped up in a macro using [pyo3](https://github.com/PyO3/PyO3). It took two minutes to compile.
|
|
||||||
After
|
|
||||||
[moving things to a function](https://github.com/bspeice/dtparse/blob/e017018295c670e4b6c6ee1cfff00dbb233db47d/tests/compat.rs#L76-L205)
|
|
||||||
compile times dropped down to ~5 seconds. Turns out 150 lines \* 100 tests = a lot of redundant code
|
|
||||||
to be compiled. My new rule of thumb is that any macros longer than 10-15 lines are actually
|
|
||||||
functions that need to be liberated, man.
|
|
||||||
|
|
||||||
Finally, **I really miss list comprehensions and dictionary comprehensions.** As a quick comparison,
|
|
||||||
see
|
|
||||||
[this dateutil code](https://github.com/dateutil/dateutil/blob/16561fc99361979e88cccbd135393b06b1af7e90/dateutil/parser/_parser.py#L476)
|
|
||||||
and
|
|
||||||
[the implementation in Rust](https://github.com/bspeice/dtparse/blob/7d565d3a78876dbebd9711c9720364fe9eba7915/src/lib.rs#L619-L629).
|
|
||||||
I probably wrote it wrong, and I'm sorry. Ultimately though, I hope that these comprehensions can be
|
|
||||||
added through macros or syntax extensions. Either way, they're expressive, save typing, and are
|
|
||||||
super-readable. Let's get more of that.
|
|
||||||
|
|
||||||
# Using a young language
|
|
||||||
|
|
||||||
Now, Rust is exciting and new, which means that there's opportunity to make a substantive impact. On
|
|
||||||
more than one occasion though, I've had issues navigating the Rust ecosystem.
|
|
||||||
|
|
||||||
What I'll call the "canonical library" is still being built. In Python, if you need datetime
|
|
||||||
parsing, you use `dateutil`. If you want `decimal` types, it's already in the
|
|
||||||
[standard library](https://docs.python.org/3.6/library/decimal.html). While I might've gotten away
|
|
||||||
with `f64`, `dateutil` uses decimals, and I wanted to follow the principle of **staying as close to
|
|
||||||
the original library as possible**. Thus began my quest to find a decimal library in Rust. What I
|
|
||||||
quickly found was summarized in a comment:
|
|
||||||
|
|
||||||
> Writing a BigDecimal is easy. Writing a _good_ BigDecimal is hard.
|
|
||||||
>
|
|
||||||
> [-cmr](https://github.com/rust-lang/rust/issues/8937#issuecomment-34582794)
|
|
||||||
|
|
||||||
In practice, this means that there are at least [4](https://crates.io/crates/bigdecimal)
|
|
||||||
[different](https://crates.io/crates/rust_decimal)
|
|
||||||
[implementations](https://crates.io/crates/decimal) [available](https://crates.io/crates/decimate).
|
|
||||||
And that's a lot of decisions to worry about when all I'm thinking is "why can't
|
|
||||||
[calendar reform](https://en.wikipedia.org/wiki/Calendar_reform) be a thing" and I'm forced to dig
|
|
||||||
through a [couple](https://github.com/rust-lang/rust/issues/8937#issuecomment-31661916)
|
|
||||||
[different](https://github.com/rust-lang/rfcs/issues/334)
|
|
||||||
[threads](https://github.com/rust-num/num/issues/8) to figure out if the library I'm look at is dead
|
|
||||||
or just stable.
|
|
||||||
|
|
||||||
And even when the "canonical library" exists, there's no guarantees that it will be well-maintained.
|
|
||||||
[Chrono](https://github.com/chronotope/chrono) is the _de facto_ date/time library in Rust, and just
|
|
||||||
released version 0.4.4 like two days ago. Meanwhile,
|
|
||||||
[chrono-tz](https://github.com/chronotope/chrono-tz) appears to be dead in the water even though
|
|
||||||
[there are people happy to help maintain it](https://github.com/chronotope/chrono-tz/issues/19). I
|
|
||||||
know relatively little about it, but it appears that most of the release process is automated;
|
|
||||||
keeping that up to date should be a no-brainer.
|
|
||||||
|
|
||||||
## Trial Maintenance Policy
|
|
||||||
|
|
||||||
Specifically given "maintenance" being an
|
|
||||||
[oft-discussed](https://www.reddit.com/r/rust/comments/48540g/thoughts_on_initiators_vs_maintainers/)
|
|
||||||
issue, I'm going to try out the following policy to keep things moving on `dtparse`:
|
|
||||||
|
|
||||||
1. Issues/PRs needing _maintainer_ feedback will be updated at least weekly. I want to make sure
|
|
||||||
nobody's blocking on me.
|
|
||||||
|
|
||||||
2. To keep issues/PRs needing _contributor_ feedback moving, I'm going to (kindly) ask the
|
|
||||||
contributor to check in after two weeks, and close the issue without resolution if I hear nothing
|
|
||||||
back after a month.
|
|
||||||
|
|
||||||
The second point I think has the potential to be a bit controversial, so I'm happy to receive
|
|
||||||
feedback on that. And if a contributor responds with "hey, still working on it, had a kid and I'm
|
|
||||||
running on 30 seconds of sleep a night," then first: congratulations on sustaining human life. And
|
|
||||||
second: I don't mind keeping those requests going indefinitely. I just want to try and balance
|
|
||||||
keeping things moving with giving people the necessary time they need.
|
|
||||||
|
|
||||||
I should also note that I'm still getting some best practices in place - CONTRIBUTING and
|
|
||||||
CONTRIBUTORS files need to be added, as well as issue/PR templates. In progress. None of us are
|
|
||||||
perfect.
|
|
||||||
|
|
||||||
# Roadmap and Conclusion
|
|
||||||
|
|
||||||
So if I've now built a `dateutil`-compatible parser, we're done, right? Of course not! That's not
|
|
||||||
nearly ambitious enough.
|
|
||||||
|
|
||||||
Ultimately, I'd love to have a library that's capable of parsing everything the Linux `date` command
|
|
||||||
can do (and not `date` on OSX, because seriously, BSD coreutils are the worst). I know Rust has a
|
|
||||||
coreutils rewrite going on, and `dtparse` would potentially be an interesting candidate since it
|
|
||||||
doesn't bring in a lot of extra dependencies. [`humantime`](https://crates.io/crates/humantime)
|
|
||||||
could help pick up some of the (current) slack in dtparse, so maybe we can share and care with each
|
|
||||||
other?
|
|
||||||
|
|
||||||
All in all, I'm mostly hoping that nobody's already done this and I haven't spent a bit over a month
|
|
||||||
on redundant code. So if it exists, tell me. I need to know, but be nice about it, because I'm going
|
|
||||||
to take it hard.
|
|
||||||
|
|
||||||
And in the mean time, I'm looking forward to building more. Onwards.
|
|
@ -1,323 +0,0 @@
|
|||||||
---
|
|
||||||
layout: post
|
|
||||||
title: "Primitives in Rust are Weird (and Cool)"
|
|
||||||
description: "but mostly weird."
|
|
||||||
category:
|
|
||||||
tags: [rust, c, java, python, x86]
|
|
||||||
---
|
|
||||||
|
|
||||||
I wrote a really small Rust program a while back because I was curious. I was 100% convinced it
|
|
||||||
couldn't possibly run:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
fn main() {
|
|
||||||
println!("{}", 8.to_string())
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
And to my complete befuddlement, it compiled, ran, and produced a completely sensible output. The
|
|
||||||
reason I was so surprised has to do with how Rust treats a special category of things I'm going to
|
|
||||||
call _primitives_. In the current version of the Rust book, you'll see them referred to as
|
|
||||||
[scalars][rust_scalar], and in older versions they'll be called [primitives][rust_primitive], but
|
|
||||||
we're going to stick with the name _primitive_ for the time being. Explaining why this program is so
|
|
||||||
cool requires talking about a number of other programming languages, and keeping a consistent
|
|
||||||
terminology makes things easier.
|
|
||||||
|
|
||||||
**You've been warned:** this is going to be a tedious post about a relatively minor issue that
|
|
||||||
involves Java, Python, C, and x86 Assembly. And also me pretending like I know what I'm talking
|
|
||||||
about with assembly.
|
|
||||||
|
|
||||||
# Defining primitives (Java)
|
|
||||||
|
|
||||||
The reason I'm using the name _primitive_ comes from how much of my life is Java right now. Spoiler
|
|
||||||
alert: a lot of it. And for the most part I like Java, but I digress. In Java, there's a special
|
|
||||||
name for some specific types of values:
|
|
||||||
|
|
||||||
> ```
|
|
||||||
> bool char byte
|
|
||||||
> short int long
|
|
||||||
> float double
|
|
||||||
> ```
|
|
||||||
|
|
||||||
````
|
|
||||||
|
|
||||||
They are referred to as [primitives][java_primitive]. And relative to the other bits of Java,
|
|
||||||
they have two unique features. First, they don't have to worry about the
|
|
||||||
[billion-dollar mistake](https://en.wikipedia.org/wiki/Tony_Hoare#Apologies_and_retractions);
|
|
||||||
primitives in Java can never be `null`. Second: *they can't have instance methods*.
|
|
||||||
Remember that Rust program from earlier? Java has no idea what to do with it:
|
|
||||||
|
|
||||||
```java
|
|
||||||
class Main {
|
|
||||||
public static void main(String[] args) {
|
|
||||||
int x = 8;
|
|
||||||
System.out.println(x.toString()); // Triggers a compiler error
|
|
||||||
}
|
|
||||||
}
|
|
||||||
````
|
|
||||||
|
|
||||||
The error is:
|
|
||||||
|
|
||||||
```
|
|
||||||
Main.java:5: error: int cannot be dereferenced
|
|
||||||
System.out.println(x.toString());
|
|
||||||
^
|
|
||||||
1 error
|
|
||||||
```
|
|
||||||
|
|
||||||
Specifically, Java's [`Object`](https://docs.oracle.com/javase/10/docs/api/java/lang/Object.html)
|
|
||||||
and things that inherit from it are pointers under the hood, and we have to dereference them before
|
|
||||||
the fields and methods they define can be used. In contrast, _primitive types are just values_ -
|
|
||||||
there's nothing to be dereferenced. In memory, they're just a sequence of bits.
|
|
||||||
|
|
||||||
If we really want, we can turn the `int` into an
|
|
||||||
[`Integer`](https://docs.oracle.com/javase/10/docs/api/java/lang/Integer.html) and then dereference
|
|
||||||
it, but it's a bit wasteful:
|
|
||||||
|
|
||||||
```java
|
|
||||||
class Main {
|
|
||||||
public static void main(String[] args) {
|
|
||||||
int x = 8;
|
|
||||||
Integer y = Integer.valueOf(x);
|
|
||||||
System.out.println(y.toString());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
This creates the variable `y` of type `Integer` (which inherits `Object`), and at run time we
|
|
||||||
dereference `y` to locate the `toString()` function and call it. Rust obviously handles things a bit
|
|
||||||
differently, but we have to dig into the low-level details to see it in action.
|
|
||||||
|
|
||||||
# Low Level Handling of Primitives (C)
|
|
||||||
|
|
||||||
We first need to build a foundation for reading and understanding the assembly code the final answer
|
|
||||||
requires. Let's begin with showing how the `C` language (and your computer) thinks about "primitive"
|
|
||||||
values in memory:
|
|
||||||
|
|
||||||
```c
|
|
||||||
void my_function(int num) {}
|
|
||||||
|
|
||||||
int main() {
|
|
||||||
int x = 8;
|
|
||||||
my_function(x);
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
The [compiler explorer](https://godbolt.org/z/lgNYcc) gives us an easy way of showing off the
|
|
||||||
assembly-level code that's generated: <span style="font-size:.6em">whose output has been lightly
|
|
||||||
edited</span>
|
|
||||||
|
|
||||||
```nasm
|
|
||||||
main:
|
|
||||||
push rbp
|
|
||||||
mov rbp, rsp
|
|
||||||
sub rsp, 16
|
|
||||||
|
|
||||||
; We assign the value `8` to `x` here
|
|
||||||
mov DWORD PTR [rbp-4], 8
|
|
||||||
|
|
||||||
; And copy the bits making up `x` to a location
|
|
||||||
; `my_function` can access (`edi`)
|
|
||||||
mov eax, DWORD PTR [rbp-4]
|
|
||||||
mov edi, eax
|
|
||||||
|
|
||||||
; Call `my_function` and give it control
|
|
||||||
call my_function
|
|
||||||
|
|
||||||
mov eax, 0
|
|
||||||
leave
|
|
||||||
ret
|
|
||||||
|
|
||||||
my_function:
|
|
||||||
push rbp
|
|
||||||
mov rbp, rsp
|
|
||||||
|
|
||||||
; Copy the bits out of the pre-determined location (`edi`)
|
|
||||||
; to somewhere we can use
|
|
||||||
mov DWORD PTR [rbp-4], edi
|
|
||||||
nop
|
|
||||||
|
|
||||||
pop rbp
|
|
||||||
ret
|
|
||||||
```
|
|
||||||
|
|
||||||
At a really low level of memory, we're copying bits around using the [`mov`][x86_guide] instruction;
|
|
||||||
nothing crazy. But to show how similar Rust is, let's take a look at our program translated from C
|
|
||||||
to Rust:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
fn my_function(x: i32) {}
|
|
||||||
|
|
||||||
fn main() {
|
|
||||||
let x = 8;
|
|
||||||
my_function(x)
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
And the assembly generated when we stick it in the
|
|
||||||
[compiler explorer](https://godbolt.org/z/cAlmk0): <span style="font-size:.6em">again, lightly
|
|
||||||
edited</span>
|
|
||||||
|
|
||||||
```nasm
|
|
||||||
example::main:
|
|
||||||
push rax
|
|
||||||
|
|
||||||
; Look familiar? We're copying bits to a location for `my_function`
|
|
||||||
; The compiler just optimizes out holding `x` in memory
|
|
||||||
mov edi, 8
|
|
||||||
|
|
||||||
; Call `my_function` and give it control
|
|
||||||
call example::my_function
|
|
||||||
|
|
||||||
pop rax
|
|
||||||
ret
|
|
||||||
|
|
||||||
example::my_function:
|
|
||||||
sub rsp, 4
|
|
||||||
|
|
||||||
; And copying those bits again, just like in C
|
|
||||||
mov dword ptr [rsp], edi
|
|
||||||
|
|
||||||
add rsp, 4
|
|
||||||
ret
|
|
||||||
```
|
|
||||||
|
|
||||||
The generated Rust assembly is functionally pretty close to the C assembly: _When working with
|
|
||||||
primitives, we're just dealing with bits in memory_.
|
|
||||||
|
|
||||||
In Java we have to dereference a pointer to call its functions; in Rust, there's no pointer to
|
|
||||||
dereference. So what exactly is going on with this `.to_string()` function call?
|
|
||||||
|
|
||||||
# impl primitive (and Python)
|
|
||||||
|
|
||||||
Now it's time to <strike>reveal my trap card</strike> show the revelation that tied all this
|
|
||||||
together: _Rust has implementations for its primitive types._ That's right, `impl` blocks aren't
|
|
||||||
only for `structs` and `traits`, primitives get them too. Don't believe me? Check out
|
|
||||||
[u32](https://doc.rust-lang.org/std/primitive.u32.html),
|
|
||||||
[f64](https://doc.rust-lang.org/std/primitive.f64.html) and
|
|
||||||
[char](https://doc.rust-lang.org/std/primitive.char.html) as examples.
|
|
||||||
|
|
||||||
But the really interesting bit is how Rust turns those `impl` blocks into assembly. Let's break out
|
|
||||||
the [compiler explorer](https://godbolt.org/z/6LBEwq) once again:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
pub fn main() {
|
|
||||||
8.to_string()
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
And the interesting bits in the assembly: <span style="font-size:.6em">heavily trimmed down</span>
|
|
||||||
|
|
||||||
```nasm
|
|
||||||
example::main:
|
|
||||||
sub rsp, 24
|
|
||||||
mov rdi, rsp
|
|
||||||
lea rax, [rip + .Lbyte_str.u]
|
|
||||||
mov rsi, rax
|
|
||||||
|
|
||||||
; Cool stuff right here
|
|
||||||
call <T as alloc::string::ToString>::to_string@PLT
|
|
||||||
|
|
||||||
mov rdi, rsp
|
|
||||||
call core::ptr::drop_in_place
|
|
||||||
add rsp, 24
|
|
||||||
ret
|
|
||||||
```
|
|
||||||
|
|
||||||
Now, this assembly is a bit more complicated, but here's the big revelation: **we're calling
|
|
||||||
`to_string()` as a function that exists all on its own, and giving it the instance of `8`**. Instead
|
|
||||||
of thinking of the value 8 as an instance of `u32` and then peeking in to find the location of the
|
|
||||||
function we want to call (like Java), we have a function that exists outside of the instance and
|
|
||||||
just give that function the value `8`.
|
|
||||||
|
|
||||||
This is an incredibly technical detail, but the interesting idea I had was this: _if `to_string()`
|
|
||||||
is a static function, can I refer to the unbound function and give it an instance?_
|
|
||||||
|
|
||||||
Better explained in code (and a [compiler explorer](https://godbolt.org/z/fJY-gA) link because I
|
|
||||||
seriously love this thing):
|
|
||||||
|
|
||||||
```rust
|
|
||||||
struct MyVal {
|
|
||||||
x: u32
|
|
||||||
}
|
|
||||||
|
|
||||||
impl MyVal {
|
|
||||||
fn to_string(&self) -> String {
|
|
||||||
self.x.to_string()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn main() {
|
|
||||||
let my_val = MyVal { x: 8 };
|
|
||||||
|
|
||||||
// THESE ARE THE SAME
|
|
||||||
my_val.to_string();
|
|
||||||
MyVal::to_string(&my_val);
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
Rust is totally fine "binding" the function call to the instance, and also as a static.
|
|
||||||
|
|
||||||
MIND == BLOWN.
|
|
||||||
|
|
||||||
Python does the same thing where I can both call functions bound to their instances and also call as
|
|
||||||
an unbound function where I give it the instance:
|
|
||||||
|
|
||||||
```python
|
|
||||||
class MyClass():
|
|
||||||
x = 24
|
|
||||||
|
|
||||||
def my_function(self):
|
|
||||||
print(self.x)
|
|
||||||
|
|
||||||
m = MyClass()
|
|
||||||
|
|
||||||
m.my_function()
|
|
||||||
MyClass.my_function(m)
|
|
||||||
```
|
|
||||||
|
|
||||||
And Python tries to make you _think_ that primitives can have instance methods...
|
|
||||||
|
|
||||||
```python
|
|
||||||
>>> dir(8)
|
|
||||||
['__abs__', '__add__', '__and__', '__class__', '__cmp__', '__coerce__',
|
|
||||||
'__delattr__', '__div__', '__divmod__', '__doc__', '__float__', '__floordiv__',
|
|
||||||
...
|
|
||||||
'__setattr__', '__sizeof__', '__str__', '__sub__', '__subclasshook__', '__truediv__',
|
|
||||||
...]
|
|
||||||
|
|
||||||
>>> # Theoretically `8.__str__()` should exist, but:
|
|
||||||
|
|
||||||
>>> 8.__str__()
|
|
||||||
File "<stdin>", line 1
|
|
||||||
8.__str__()
|
|
||||||
^
|
|
||||||
SyntaxError: invalid syntax
|
|
||||||
|
|
||||||
>>> # It will run if we assign it first though:
|
|
||||||
>>> x = 8
|
|
||||||
>>> x.__str__()
|
|
||||||
'8'
|
|
||||||
```
|
|
||||||
|
|
||||||
...but in practice it's a bit complicated.
|
|
||||||
|
|
||||||
So while Python handles binding instance methods in a way similar to Rust, it's still not able to
|
|
||||||
run the example we started with.
|
|
||||||
|
|
||||||
# Conclusion
|
|
||||||
|
|
||||||
This was a super-roundabout way of demonstrating it, but the way Rust handles incredibly minor
|
|
||||||
details like primitives leads to really cool effects. Primitives are optimized like C in how they
|
|
||||||
have a space-efficient memory layout, yet the language still has a lot of features I enjoy in Python
|
|
||||||
(like both instance and late binding).
|
|
||||||
|
|
||||||
And when you put it together, there are areas where Rust does cool things nobody else can; as a
|
|
||||||
quirky feature of Rust's type system, `8.to_string()` is actually valid code.
|
|
||||||
|
|
||||||
Now go forth and fool your friends into thinking you know assembly. This is all I've got.
|
|
||||||
|
|
||||||
[x86_guide]: http://www.cs.virginia.edu/~evans/cs216/guides/x86.html
|
|
||||||
[java_primitive]: https://docs.oracle.com/javase/tutorial/java/nutsandbolts/datatypes.html
|
|
||||||
[rust_scalar]: https://doc.rust-lang.org/book/second-edition/ch03-02-data-types.html#scalar-types
|
|
||||||
[rust_primitive]: https://doc.rust-lang.org/book/first-edition/primitive-types.html
|
|
@ -1,294 +0,0 @@
|
|||||||
---
|
|
||||||
layout: post
|
|
||||||
title: "Isomorphic Desktop Apps with Rust"
|
|
||||||
description: "Electron + WASM = ☣"
|
|
||||||
category:
|
|
||||||
tags: [rust, javascript, webassembly]
|
|
||||||
---
|
|
||||||
|
|
||||||
Forgive me, but this is going to be a bit of a schizophrenic post. I both despise Javascript and the
|
|
||||||
modern ECMAScript ecosystem, and I'm stunned by its success doing some really cool things. It's
|
|
||||||
[this duality](https://www.destroyallsoftware.com/talks/the-birth-and-death-of-javascript) that's
|
|
||||||
led me to a couple of (very) late nights over the past weeks trying to reconcile myself as I
|
|
||||||
bootstrap a simple desktop application.
|
|
||||||
|
|
||||||
See, as much as
|
|
||||||
[Webassembly isn't trying to replace Javascript](https://webassembly.org/docs/faq/#is-webassembly-trying-to-replace-javascript),
|
|
||||||
**I want Javascript gone**. There are plenty of people who don't share my views, and they are
|
|
||||||
probably nicer and more fun at parties. But I cringe every time "Webpack" is mentioned, and I think
|
|
||||||
it's hilarious that the
|
|
||||||
[language specification](https://ecma-international.org/publications/standards/Ecma-402.htm)
|
|
||||||
dramatically outpaces anyone's
|
|
||||||
[actual implementation](https://kangax.github.io/compat-table/es2016plus/). The answer to this
|
|
||||||
conundrum is of course to recompile code from newer versions of the language to older versions _of
|
|
||||||
the same language_ before running. At least [Babel] is a nice tongue-in-cheek reference.
|
|
||||||
|
|
||||||
Yet for as much hate as [Electron] receives, it does a stunningly good job at solving a really hard
|
|
||||||
problem: _how the hell do I put a button on the screen and react when the user clicks it_? GUI
|
|
||||||
programming is hard, straight up. But if browsers are already able to run everywhere, why don't we
|
|
||||||
take advantage of someone else solving the hard problems for us? I don't like that I have to use
|
|
||||||
Javascript for it, but I really don't feel inclined to whip out good ol' [wxWidgets].
|
|
||||||
|
|
||||||
Now there are other native solutions ([libui-rs], [conrod], [oh hey wxWdidgets again!][wxrust]), but
|
|
||||||
those also have their own issues with distribution, styling, etc. With Electron, I can
|
|
||||||
`yarn create electron-app my-app` and just get going, knowing that packaging/upgrades/etc. are built
|
|
||||||
in.
|
|
||||||
|
|
||||||
My question is: given recent innovations with WASM, _are we Electron yet_?
|
|
||||||
|
|
||||||
No, not really.
|
|
||||||
|
|
||||||
Instead, **what would it take to get to a point where we can skip Javascript in Electron apps?**
|
|
||||||
|
|
||||||
# Setting the Stage
|
|
||||||
|
|
||||||
Truth is, WASM/Webassembly is a pretty new technology and I'm a total beginner in this area. There
|
|
||||||
may already be solutions to the issues I discuss, but I'm totally unaware of them, so I'm going to
|
|
||||||
try and organize what I did manage to discover.
|
|
||||||
|
|
||||||
I should also mention that the content and things I'm talking about here are not intended to be
|
|
||||||
prescriptive, but more "if someone else is interested, what do we already know doesn't work?" _I
|
|
||||||
expect everything in this post to be obsolete within two months._ Even over the course of writing
|
|
||||||
this, [a separate blog post](https://mnt.io/2018/08/28/from-rust-to-beyond-the-asm-js-galaxy/) had
|
|
||||||
to be modified because [upstream changes](https://github.com/WebAssembly/binaryen/pull/1642) broke a
|
|
||||||
[Rust tool](https://github.com/rustwasm/wasm-bindgen/pull/787) the post tried to use. The post
|
|
||||||
ultimately
|
|
||||||
[got updated](https://mnt.io/2018/08/28/from-rust-to-beyond-the-asm-js-galaxy/#comment-477), **but
|
|
||||||
all this happened within the span of a week.** Things are moving quickly.
|
|
||||||
|
|
||||||
I'll also note that we're going to skip [asm.js] and [emscripten]. Truth be told, I couldn't get
|
|
||||||
either of these to output anything, and so I'm just going to say
|
|
||||||
[here be dragons.](https://en.wikipedia.org/wiki/Here_be_dragons) Everything I'm discussing here
|
|
||||||
uses the `wasm32-unknown-unknown` target.
|
|
||||||
|
|
||||||
The code that I _did_ get running is available
|
|
||||||
[over here](https://github.com/speice-io/isomorphic-rust). Feel free to use it as a starting point,
|
|
||||||
but I'm mostly including the link as a reference for the things that were attempted.
|
|
||||||
|
|
||||||
# An Example Running Application
|
|
||||||
|
|
||||||
So, I did _technically_ get a running application:
|
|
||||||
|
|
||||||
![Electron app using WASM](/assets/images/2018-09-15-electron-percy-wasm.png)
|
|
||||||
|
|
||||||
...which you can also try out if you want:
|
|
||||||
|
|
||||||
```sh
|
|
||||||
git clone https://github.com/speice-io/isomorphic-rust.git
|
|
||||||
cd isomorphic_rust/percy
|
|
||||||
yarn install && yarn start
|
|
||||||
```
|
|
||||||
|
|
||||||
...but I wouldn't really call it a "high quality" starting point to base future work on. It's mostly
|
|
||||||
there to prove this is possible in the first place. And that's something to be proud of! There's a
|
|
||||||
huge amount of engineering that went into showing a window with the text "It's alive!".
|
|
||||||
|
|
||||||
There's also a lot of usability issues that prevent me from recommending anyone try Electron and
|
|
||||||
WASM apps at the moment, and I think that's the more important thing to discuss.
|
|
||||||
|
|
||||||
# Issue the First: Complicated Toolchains
|
|
||||||
|
|
||||||
I quickly established that [wasm-bindgen] was necessary to "link" my Rust code to Javascript. At
|
|
||||||
that point you've got an Electron app that starts an HTML page which ultimately fetches your WASM
|
|
||||||
blob. To keep things simple, the goal was to package everything using [webpack] so that I could just
|
|
||||||
load a `bundle.js` file on the page. That decision was to be the last thing that kinda worked in
|
|
||||||
this process.
|
|
||||||
|
|
||||||
The first issue
|
|
||||||
[I ran into](https://www.reddit.com/r/rust/comments/98lpun/unable_to_load_wasm_for_electron_application/)
|
|
||||||
while attempting to bundle everything via `webpack` is a detail in the WASM spec:
|
|
||||||
|
|
||||||
> This function accepts a Response object, or a promise for one, and ... **[if > it] does not match
|
|
||||||
> the `application/wasm` MIME type**, the returned promise will be rejected with a TypeError;
|
|
||||||
>
|
|
||||||
> [WebAssembly - Additional Web Embedding API](https://webassembly.org/docs/web/#additional-web-embedding-api)
|
|
||||||
|
|
||||||
Specifically, if you try and load a WASM blob without the MIME type set, you'll get an error. On the
|
|
||||||
web this isn't a huge issue, as the server can set MIME types when delivering the blob. With
|
|
||||||
Electron, you're resolving things with a `file://` URL and thus can't control the MIME type:
|
|
||||||
|
|
||||||
![TypeError: Incorrect response MIME type. Expected 'application/wasm'.](/assets/images/2018-09-15-incorrect-MIME-type.png)
|
|
||||||
|
|
||||||
There are a couple of solutions depending on how far into the deep end you care to venture:
|
|
||||||
|
|
||||||
- Embed a static file server in your Electron application
|
|
||||||
- Use a [custom protocol](https://electronjs.org/docs/api/protocol) and custom protocol handler
|
|
||||||
- Host your WASM blob on a website that you resolve at runtime
|
|
||||||
|
|
||||||
But all these are pretty bad solutions and defeat the purpose of using WASM in the first place.
|
|
||||||
Instead, my workaround was to
|
|
||||||
[open a PR with `webpack`](https://github.com/webpack/webpack/issues/7918) and use regex to remove
|
|
||||||
calls to `instantiateStreaming` in the
|
|
||||||
[build script](https://github.com/speice-io/isomorphic-rust/blob/master/percy/build.sh#L21-L25):
|
|
||||||
|
|
||||||
```sh
|
|
||||||
cargo +nightly build --target=wasm32-unknown-unknown && \
|
|
||||||
wasm-bindgen "$WASM_DIR/debug/$WASM_NAME.wasm" --out-dir "$APP_DIR" --no-typescript && \
|
|
||||||
# Have to use --mode=development so we can patch out the call to instantiateStreaming
|
|
||||||
"$DIR/node_modules/webpack-cli/bin/cli.js" --mode=development "$APP_DIR/app_loader.js" -o "$APP_DIR/bundle.js" && \
|
|
||||||
sed -i 's/.*instantiateStreaming.*//g' "$APP_DIR/bundle.js"
|
|
||||||
```
|
|
||||||
|
|
||||||
Once that lands, the
|
|
||||||
[build process](https://github.com/speice-io/isomorphic-rust/blob/master/percy_patched_webpack/build.sh#L24-L27)
|
|
||||||
becomes much simpler:
|
|
||||||
|
|
||||||
```sh
|
|
||||||
|
|
||||||
cargo +nightly build --target=wasm32-unknown-unknown && \
|
|
||||||
wasm-bindgen "$WASM_DIR/debug/$WASM_NAME.wasm" --out-dir "$APP_DIR" --no-typescript && \
|
|
||||||
"$DIR/node_modules/webpack-cli/bin/cli.js" --mode=production "$APP_DIR/app_loader.js" -o "$APP_DIR/bundle.js"
|
|
||||||
```
|
|
||||||
|
|
||||||
But we're not done yet! After we compile Rust into WASM and link WASM to Javascript (via
|
|
||||||
`wasm-bindgen` and `webpack`), we still have to make an Electron app. For this purpose I used a
|
|
||||||
starter app from [Electron Forge], and then a
|
|
||||||
[`prestart` script](https://github.com/speice-io/isomorphic-rust/blob/master/percy/package.json#L8)
|
|
||||||
to actually handle starting the application.
|
|
||||||
|
|
||||||
The
|
|
||||||
[final toolchain](https://github.com/speice-io/isomorphic-rust/blob/master/percy/package.json#L8)
|
|
||||||
looks something like this:
|
|
||||||
|
|
||||||
- `yarn start` triggers the `prestart` script
|
|
||||||
- `prestart` checks for missing tools (`wasm-bindgen-cli`, etc.) and then:
|
|
||||||
- Uses `cargo` to compile the Rust code into WASM
|
|
||||||
- Uses `wasm-bindgen` to link the WASM blob into a Javascript file with exported symbols
|
|
||||||
- Uses `webpack` to bundle the page start script with the Javascript we just generated
|
|
||||||
- Uses `babel` under the hood to compile the `wasm-bindgen` code down from ES6 into something
|
|
||||||
browser-compatible
|
|
||||||
- The `start` script runs an Electron Forge handler to do some sanity checks
|
|
||||||
- Electron actually starts
|
|
||||||
|
|
||||||
...which is complicated. I think more work needs to be done to either build a high-quality starter
|
|
||||||
app that can manage these steps, or another tool that "just handles" the complexity of linking a
|
|
||||||
compiled WASM file into something the Electron browser can run.
|
|
||||||
|
|
||||||
# Issue the Second: WASM tools in Rust
|
|
||||||
|
|
||||||
For as much as I didn't enjoy the Javascript tooling needed to interface with Rust, the Rust-only
|
|
||||||
bits aren't any better at the moment. I get it, a lot of projects are just starting off, and that
|
|
||||||
leads to a fragmented ecosystem. Here's what I can recommend as a starting point:
|
|
||||||
|
|
||||||
Don't check in your `Cargo.lock` files to version control. If there's a disagreement between the
|
|
||||||
version of `wasm-bindgen-cli` you have installed and the `wasm-bindgen` you're compiling with in
|
|
||||||
`Cargo.lock`, you get a nasty error:
|
|
||||||
|
|
||||||
```
|
|
||||||
it looks like the Rust project used to create this wasm file was linked against
|
|
||||||
a different version of wasm-bindgen than this binary:
|
|
||||||
|
|
||||||
rust wasm file: 0.2.21
|
|
||||||
this binary: 0.2.17
|
|
||||||
|
|
||||||
Currently the bindgen format is unstable enough that these two version must
|
|
||||||
exactly match, so it's required that these two version are kept in sync by
|
|
||||||
either updating the wasm-bindgen dependency or this binary.
|
|
||||||
```
|
|
||||||
|
|
||||||
Not that I ever managed to run into this myself (_coughs nervously_).
|
|
||||||
|
|
||||||
There are two projects attempting to be "application frameworks": [percy] and [yew]. Between those,
|
|
||||||
I managed to get [two](https://github.com/speice-io/isomorphic-rust/tree/master/percy)
|
|
||||||
[examples](https://github.com/speice-io/isomorphic-rust/tree/master/percy_patched_webpack) running
|
|
||||||
using `percy`, but was unable to get an
|
|
||||||
[example](https://github.com/speice-io/isomorphic-rust/tree/master/yew) running with `yew` because
|
|
||||||
of issues with "missing modules" during the `webpack` step:
|
|
||||||
|
|
||||||
```sh
|
|
||||||
ERROR in ./dist/electron_yew_wasm_bg.wasm
|
|
||||||
Module not found: Error: Can't resolve 'env' in '/home/bspeice/Development/isomorphic_rust/yew/dist'
|
|
||||||
@ ./dist/electron_yew_wasm_bg.wasm
|
|
||||||
@ ./dist/electron_yew_wasm.js
|
|
||||||
@ ./dist/app.js
|
|
||||||
@ ./dist/app_loader.js
|
|
||||||
```
|
|
||||||
|
|
||||||
If you want to work with the browser APIs directly, your choices are [percy-webapis] or [stdweb] (or
|
|
||||||
eventually [web-sys]). See above for my `percy` examples, but when I tried
|
|
||||||
[an example with `stdweb`](https://github.com/speice-io/isomorphic-rust/tree/master/stdweb), I was
|
|
||||||
unable to get it running:
|
|
||||||
|
|
||||||
```sh
|
|
||||||
ERROR in ./dist/stdweb_electron_bg.wasm
|
|
||||||
Module not found: Error: Can't resolve 'env' in '/home/bspeice/Development/isomorphic_rust/stdweb/dist'
|
|
||||||
@ ./dist/stdweb_electron_bg.wasm
|
|
||||||
@ ./dist/stdweb_electron.js
|
|
||||||
@ ./dist/app_loader.js
|
|
||||||
```
|
|
||||||
|
|
||||||
At this point I'm pretty convinced that `stdweb` is causing issues for `yew` as well, but can't
|
|
||||||
prove it.
|
|
||||||
|
|
||||||
I did also get a [minimal example](https://github.com/speice-io/isomorphic-rust/tree/master/minimal)
|
|
||||||
running that doesn't depend on any tools besides `wasm-bindgen`. However, it requires manually
|
|
||||||
writing "`extern C`" blocks for everything you need from the browser. Es no bueno.
|
|
||||||
|
|
||||||
Finally, from a tools and platform view, there are two up-and-coming packages that should be
|
|
||||||
mentioned: [js-sys] and [web-sys]. Their purpose is to be fundamental building blocks that exposes
|
|
||||||
the browser's APIs to Rust. If you're interested in building an app framework from scratch, these
|
|
||||||
should give you the most flexibility. I didn't touch either in my research, though I expect them to
|
|
||||||
be essential long-term.
|
|
||||||
|
|
||||||
So there's a lot in play from the Rust side of things, and it's just going to take some time to
|
|
||||||
figure out what works and what doesn't.
|
|
||||||
|
|
||||||
# Issue the Third: Known Unknowns
|
|
||||||
|
|
||||||
Alright, so after I managed to get an application started, I stopped there. It was a good deal of
|
|
||||||
effort to chain together even a proof of concept, and at this point I'd rather learn [Typescript]
|
|
||||||
than keep trying to maintain an incredibly brittle pipeline. Blasphemy, I know...
|
|
||||||
|
|
||||||
The important point I want to make is that there's a lot unknown about how any of this holds up
|
|
||||||
outside proofs of concept. Things I didn't attempt:
|
|
||||||
|
|
||||||
- Testing
|
|
||||||
- Packaging
|
|
||||||
- Updates
|
|
||||||
- Literally anything related to why I wanted to use Electron in the first place
|
|
||||||
|
|
||||||
# What it Would Take
|
|
||||||
|
|
||||||
Much as I don't like Javascript, the tools are too shaky for me to recommend mixing Electron and
|
|
||||||
WASM at the moment. There's a lot of innovation happening, so who knows? Someone might have an
|
|
||||||
application in production a couple months from now. But at the moment, I'm personally going to stay
|
|
||||||
away.
|
|
||||||
|
|
||||||
Let's finish with a wishlist then - here are the things that I think need to happen before
|
|
||||||
Electron/WASM/Rust can become a thing:
|
|
||||||
|
|
||||||
- Webpack still needs some updates. The necessary work is in progress, but hasn't landed yet
|
|
||||||
([#7983](https://github.com/webpack/webpack/pull/7983))
|
|
||||||
- Browser API libraries (`web-sys` and `stdweb`) need to make sure they can support running in
|
|
||||||
Electron (see module error above)
|
|
||||||
- Projects need to stabilize. There's talk of `stdweb` being turned into a Rust API
|
|
||||||
[on top of web-sys](https://github.com/rustwasm/team/issues/226#issuecomment-418475778), and percy
|
|
||||||
[moving to web-sys](https://github.com/chinedufn/percy/issues/24), both of which are big changes
|
|
||||||
- `wasm-bindgen` is great, but still in the "move fast and break things" phase
|
|
||||||
- A good "boilerplate" app would dramatically simplify the start-up costs;
|
|
||||||
[electron-react-boilerplate](https://github.com/chentsulin/electron-react-boilerplate) comes to
|
|
||||||
mind as a good project to imitate
|
|
||||||
- More blog posts/contributors! I think Electron + Rust could be cool, but I have no idea what I'm
|
|
||||||
doing
|
|
||||||
|
|
||||||
[wxwidgets]: https://wxwidgets.org/
|
|
||||||
[libui-rs]: https://github.com/LeoTindall/libui-rs/
|
|
||||||
[electron]: https://electronjs.org/
|
|
||||||
[babel]: https://babeljs.io/
|
|
||||||
[wxrust]: https://github.com/kenz-gelsoft/wxRust
|
|
||||||
[wasm-bindgen]: https://github.com/rustwasm/wasm-bindgen
|
|
||||||
[js-sys]: https://crates.io/crates/js-sys
|
|
||||||
[percy-webapis]: https://crates.io/crates/percy-webapis
|
|
||||||
[stdweb]: https://crates.io/crates/stdweb
|
|
||||||
[web-sys]: https://crates.io/crates/web-sys
|
|
||||||
[percy]: https://chinedufn.github.io/percy/
|
|
||||||
[virtual-dom-rs]: https://crates.io/crates/virtual-dom-rs
|
|
||||||
[yew]: https://github.com/DenisKolodin/yew
|
|
||||||
[react]: https://reactjs.org/
|
|
||||||
[elm]: http://elm-lang.org/
|
|
||||||
[asm.js]: http://asmjs.org/
|
|
||||||
[emscripten]: https://kripken.github.io/emscripten-site/
|
|
||||||
[typescript]: https://www.typescriptlang.org/
|
|
||||||
[electron forge]: https://electronforge.io/
|
|
||||||
[conrod]: https://github.com/PistonDevelopers/conrod
|
|
||||||
[webpack]: https://webpack.js.org/
|
|
@ -1,168 +0,0 @@
|
|||||||
---
|
|
||||||
layout: post
|
|
||||||
title: "A Case Study in Heaptrack"
|
|
||||||
description: "...because you don't need no garbage collection"
|
|
||||||
category:
|
|
||||||
tags: []
|
|
||||||
---
|
|
||||||
|
|
||||||
One of my earliest conversations about programming went like this:
|
|
||||||
|
|
||||||
> Programmers have it too easy these days. They should learn to develop in low memory environments
|
|
||||||
> and be more efficient.
|
|
||||||
>
|
|
||||||
> -- My Father (paraphrased)
|
|
||||||
|
|
||||||
...though it's not like the first code I wrote was for a
|
|
||||||
[graphing calculator](https://education.ti.com/en/products/calculators/graphing-calculators/ti-84-plus-se)
|
|
||||||
packing a whole 24KB of RAM. By the way, _what are you doing on my lawn?_
|
|
||||||
|
|
||||||
The principle remains though: be efficient with the resources you have, because
|
|
||||||
[what Intel giveth, Microsoft taketh away](http://exo-blog.blogspot.com/2007/09/what-intel-giveth-microsoft-taketh-away.html).
|
|
||||||
My professional work is focused on this kind of efficiency; low-latency financial markets demand
|
|
||||||
that you understand at a deep level _exactly_ what your code is doing. As I continue experimenting
|
|
||||||
with Rust for personal projects, it's exciting to bring a utilitarian mindset with me: there's
|
|
||||||
flexibility for the times I pretend to have a garbage collector, and flexibility for the times that
|
|
||||||
I really care about how memory is used.
|
|
||||||
|
|
||||||
This post is a (small) case study in how I went from the former to the latter. And ultimately, it's
|
|
||||||
intended to be a starting toolkit to empower analysis of your own code.
|
|
||||||
|
|
||||||
# Curiosity
|
|
||||||
|
|
||||||
When I first started building the [dtparse] crate, my intention was to mirror as closely as possible
|
|
||||||
the equivalent [Python library][dateutil]. Python, as you may know, is garbage collected. Very
|
|
||||||
rarely is memory usage considered in Python, and I likewise wasn't paying too much attention when
|
|
||||||
`dtparse` was first being built.
|
|
||||||
|
|
||||||
This lackadaisical approach to memory works well enough, and I'm not planning on making `dtparse`
|
|
||||||
hyper-efficient. But every so often, I've wondered: "what exactly is going on in memory?" With the
|
|
||||||
advent of Rust 1.28 and the
|
|
||||||
[Global Allocator trait](https://doc.rust-lang.org/std/alloc/trait.GlobalAlloc.html), I had a really
|
|
||||||
great idea: _build a custom allocator that allows you to track your own allocations._ That way, you
|
|
||||||
can do things like writing tests for both correct results and correct memory usage. I gave it a
|
|
||||||
[shot][qadapt], but learned very quickly: **never write your own allocator**. It went from "fun
|
|
||||||
weekend project" to "I have literally no idea what my computer is doing" at breakneck speed.
|
|
||||||
|
|
||||||
Instead, I'll highlight a separate path I took to make sense of my memory usage: [heaptrack].
|
|
||||||
|
|
||||||
# Turning on the System Allocator
|
|
||||||
|
|
||||||
This is the hardest part of the post. Because Rust uses
|
|
||||||
[its own allocator](https://github.com/rust-lang/rust/pull/27400#issue-41256384) by default,
|
|
||||||
`heaptrack` is unable to properly record unmodified Rust code. To remedy this, we'll make use of the
|
|
||||||
`#[global_allocator]` attribute.
|
|
||||||
|
|
||||||
Specifically, in `lib.rs` or `main.rs`, add this:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
use std::alloc::System;
|
|
||||||
|
|
||||||
#[global_allocator]
|
|
||||||
static GLOBAL: System = System;
|
|
||||||
```
|
|
||||||
|
|
||||||
...and that's it. Everything else comes essentially for free.
|
|
||||||
|
|
||||||
# Running heaptrack
|
|
||||||
|
|
||||||
Assuming you've installed heaptrack <span style="font-size: .6em;">(Homebrew in Mac, package manager
|
|
||||||
in Linux, ??? in Windows)</span>, all that's left is to fire up your application:
|
|
||||||
|
|
||||||
```
|
|
||||||
heaptrack my_application
|
|
||||||
```
|
|
||||||
|
|
||||||
It's that easy. After the program finishes, you'll see a file in your local directory with a name
|
|
||||||
like `heaptrack.my_appplication.XXXX.gz`. If you load that up in `heaptrack_gui`, you'll see
|
|
||||||
something like this:
|
|
||||||
|
|
||||||
![heaptrack](/assets/images/2018-10-heaptrack/heaptrack-before.png)
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
And even these pretty colors:
|
|
||||||
|
|
||||||
![pretty colors](/assets/images/2018-10-heaptrack/heaptrack-flamegraph.png)
|
|
||||||
|
|
||||||
# Reading Flamegraphs
|
|
||||||
|
|
||||||
To make sense of our memory usage, we're going to focus on that last picture - it's called a
|
|
||||||
["flamegraph"](http://www.brendangregg.com/flamegraphs.html). These charts are typically used to
|
|
||||||
show how much time your program spends executing each function, but they're used here to show how
|
|
||||||
much memory was allocated during those functions instead.
|
|
||||||
|
|
||||||
For example, we can see that all executions happened during the `main` function:
|
|
||||||
|
|
||||||
![allocations in main](/assets/images/2018-10-heaptrack/heaptrack-main-colorized.png)
|
|
||||||
|
|
||||||
...and within that, all allocations happened during `dtparse::parse`:
|
|
||||||
|
|
||||||
![allocations in dtparse](/assets/images/2018-10-heaptrack/heaptrack-dtparse-colorized.png)
|
|
||||||
|
|
||||||
...and within _that_, allocations happened in two different places:
|
|
||||||
|
|
||||||
![allocations in parseinfo](/assets/images/2018-10-heaptrack/heaptrack-parseinfo-colorized.png)
|
|
||||||
|
|
||||||
Now I apologize that it's hard to see, but there's one area specifically that stuck out as an issue:
|
|
||||||
**what the heck is the `Default` thing doing?**
|
|
||||||
|
|
||||||
![pretty colors](/assets/images/2018-10-heaptrack/heaptrack-flamegraph-default.png)
|
|
||||||
|
|
||||||
# Optimizing dtparse
|
|
||||||
|
|
||||||
See, I knew that there were some allocations during calls to `dtparse::parse`, but I was totally
|
|
||||||
wrong about where the bulk of allocations occurred in my program. Let me post the code and see if
|
|
||||||
you can spot the mistake:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
/// Main entry point for using `dtparse`.
|
|
||||||
pub fn parse(timestr: &str) -> ParseResult<(NaiveDateTime, Option<FixedOffset>)> {
|
|
||||||
let res = Parser::default().parse(
|
|
||||||
timestr, None, None, false, false,
|
|
||||||
None, false,
|
|
||||||
&HashMap::new(),
|
|
||||||
)?;
|
|
||||||
|
|
||||||
Ok((res.0, res.1))
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
> [dtparse](https://github.com/bspeice/dtparse/blob/4d7c5dd99572823fa4a390b483c38ab020a2172f/src/lib.rs#L1286)
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
Because `Parser::parse` requires a mutable reference to itself, I have to create a new
|
|
||||||
`Parser::default` every time it receives a string. This is excessive! We'd rather have an immutable
|
|
||||||
parser that can be re-used, and avoid allocating memory in the first place.
|
|
||||||
|
|
||||||
Armed with that information, I put some time in to
|
|
||||||
[make the parser immutable](https://github.com/bspeice/dtparse/commit/741afa34517d6bc1155713bbc5d66905fea13fad#diff-b4aea3e418ccdb71239b96952d9cddb6).
|
|
||||||
Now that I can re-use the same parser over and over, the allocations disappear:
|
|
||||||
|
|
||||||
![allocations cleaned up](/assets/images/2018-10-heaptrack/heaptrack-flamegraph-after.png)
|
|
||||||
|
|
||||||
In total, we went from requiring 2 MB of memory in
|
|
||||||
[version 1.0.2](https://crates.io/crates/dtparse/1.0.2):
|
|
||||||
|
|
||||||
![memory before](/assets/images/2018-10-heaptrack/heaptrack-closeup.png)
|
|
||||||
|
|
||||||
All the way down to 300KB in [version 1.0.3](https://crates.io/crates/dtparse/1.0.3):
|
|
||||||
|
|
||||||
![memory after](/assets/images/2018-10-heaptrack/heaptrack-closeup-after.png)
|
|
||||||
|
|
||||||
# Conclusion
|
|
||||||
|
|
||||||
In the end, you don't need to write a custom allocator to be efficient with memory, great tools
|
|
||||||
already exist to help you understand what your program is doing.
|
|
||||||
|
|
||||||
**Use them.**
|
|
||||||
|
|
||||||
Given that [Moore's Law](https://en.wikipedia.org/wiki/Moore%27s_law) is
|
|
||||||
[dead](https://www.technologyreview.com/s/601441/moores-law-is-dead-now-what/), we've all got to do
|
|
||||||
our part to take back what Microsoft stole.
|
|
||||||
|
|
||||||
[dtparse]: https://crates.io/crates/dtparse
|
|
||||||
[dateutil]: https://github.com/dateutil/dateutil
|
|
||||||
[heaptrack]: https://github.com/KDE/heaptrack
|
|
||||||
[qadapt]: https://crates.io/crates/qadapt
|
|
@ -1,34 +0,0 @@
|
|||||||
---
|
|
||||||
layout: post
|
|
||||||
title: 'More "What Companies Really Mean"'
|
|
||||||
description: 'when they ask "Why should we hire you?"'
|
|
||||||
category:
|
|
||||||
tags: []
|
|
||||||
---
|
|
||||||
|
|
||||||
I recently stumbled across a phenomenal small article entitled
|
|
||||||
[What Startups Really Mean By "Why Should We Hire You?"](https://angel.co/blog/what-startups-really-mean-by-why-should-we-hire-you).
|
|
||||||
Having been interviewed by smaller companies (though not exactly startups), the questions and
|
|
||||||
subtexts are the same. There's often a question behind the question that you're actually trying to
|
|
||||||
answer, and I wish I spotted the nuance earlier in my career.
|
|
||||||
|
|
||||||
Let me also make note of one more question/euphemism I've come across:
|
|
||||||
|
|
||||||
# How do you feel about Production Support?
|
|
||||||
|
|
||||||
**Translation**: _We're a fairly small team, and when things break on an evening/weekend/Christmas
|
|
||||||
Day, can we call on you to be there?_
|
|
||||||
|
|
||||||
I've met decidedly few people in my life who truly enjoy the "ops" side of "devops". They're
|
|
||||||
incredibly good at taking an impossible problem, pre-existing knowledge of arcane arts, and turning
|
|
||||||
that into a functioning system at the end. And if they all left for lunch, we probably wouldn't make
|
|
||||||
it out the door before the zombie apocalypse.
|
|
||||||
|
|
||||||
Larger organizations (in my experience, 500+ person organizations) have the luxury of hiring people
|
|
||||||
who either enjoy that, or play along nicely enough that our systems keep working.
|
|
||||||
|
|
||||||
Small teams have no such luck. If you're interviewing at a small company, especially as a "data
|
|
||||||
scientist" or other somesuch position, be aware that systems can and do spontaneously combust at the
|
|
||||||
most inopportune moments.
|
|
||||||
|
|
||||||
**Terrible-but-popular answers include**: _It's a part of the job, and I'm happy to contribute._
|
|
@ -1,218 +0,0 @@
|
|||||||
---
|
|
||||||
layout: post
|
|
||||||
title: "QADAPT - debug_assert! for your memory usage"
|
|
||||||
description: "...and why you want an allocator that goes 💥."
|
|
||||||
category:
|
|
||||||
tags: []
|
|
||||||
---
|
|
||||||
|
|
||||||
I think it's part of the human condition to ignore perfectly good advice when it comes our way. A
|
|
||||||
bit over a month ago, I was dispensing sage wisdom for the ages:
|
|
||||||
|
|
||||||
> I had a really great idea: build a custom allocator that allows you to track your own allocations.
|
|
||||||
> I gave it a shot, but learned very quickly: **never write your own allocator.**
|
|
||||||
>
|
|
||||||
> -- [me](/2018/10/case-study-optimization.html)
|
|
||||||
|
|
||||||
I proceeded to ignore it, because we never really learn from our mistakes.
|
|
||||||
|
|
||||||
There's another part of the human condition that derives joy from seeing things explode.
|
|
||||||
|
|
||||||
<iframe src="https://giphy.com/embed/YA6dmVW0gfIw8" width="480" height="336" frameBorder="0"></iframe>
|
|
||||||
|
|
||||||
And _that's_ the part I'm going to focus on.
|
|
||||||
|
|
||||||
# Why an Allocator?
|
|
||||||
|
|
||||||
So why, after complaining about allocators, would I still want to write one? There are three reasons
|
|
||||||
for that:
|
|
||||||
|
|
||||||
1. Allocation/dropping is slow
|
|
||||||
2. It's difficult to know exactly when Rust will allocate or drop, especially when using code that
|
|
||||||
you did not write
|
|
||||||
3. I want automated tools to verify behavior, instead of inspecting by hand
|
|
||||||
|
|
||||||
When I say "slow," it's important to define the terms. If you're writing web applications, you'll
|
|
||||||
spend orders of magnitude more time waiting for the database than you will the allocator. However,
|
|
||||||
there's still plenty of code where micro- or nano-seconds matter; think
|
|
||||||
[finance](https://www.youtube.com/watch?v=NH1Tta7purM),
|
|
||||||
[real-time audio](https://www.reddit.com/r/rust/comments/9hg7yj/synthesizer_progress_update/e6c291f),
|
|
||||||
[self-driving cars](https://polysync.io/blog/session-types-for-hearty-codecs/), and
|
|
||||||
[networking](https://carllerche.github.io/bytes/bytes/index.html). In these situations it's simply
|
|
||||||
unacceptable for you to spend time doing things that are not your program, and waiting on the
|
|
||||||
allocator is not cool.
|
|
||||||
|
|
||||||
As I continue to learn Rust, it's difficult for me to predict where exactly allocations will happen.
|
|
||||||
So, I propose we play a quick trivia game: **Does this code invoke the allocator?**
|
|
||||||
|
|
||||||
## Example 1
|
|
||||||
|
|
||||||
```rust
|
|
||||||
fn my_function() {
|
|
||||||
let v: Vec<u8> = Vec::new();
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
**No**: Rust [knows how big](https://doc.rust-lang.org/std/mem/fn.size_of.html) the `Vec` type is,
|
|
||||||
and reserves a fixed amount of memory on the stack for the `v` vector. However, if we wanted to
|
|
||||||
reserve extra space (using `Vec::with_capacity`) the allocator would get invoked.
|
|
||||||
|
|
||||||
## Example 2
|
|
||||||
|
|
||||||
```rust
|
|
||||||
fn my_function() {
|
|
||||||
let v: Box<Vec<u8>> = Box::new(Vec::new());
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
**Yes**: Because Boxes allow us to work with things that are of unknown size, it has to allocate on
|
|
||||||
the heap. While the `Box` is unnecessary in this snippet (release builds will optimize out the
|
|
||||||
allocation), reserving heap space more generally is needed to pass a dynamically sized type to
|
|
||||||
another function.
|
|
||||||
|
|
||||||
## Example 3
|
|
||||||
|
|
||||||
```rust
|
|
||||||
fn my_function(v: Vec<u8>) {
|
|
||||||
v.push(5);
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
**Maybe**: Depending on whether the Vector we were given has space available, we may or may not
|
|
||||||
allocate. Especially when dealing with code that you did not author, it's difficult to verify that
|
|
||||||
things behave as you expect them to.
|
|
||||||
|
|
||||||
# Blowing Things Up
|
|
||||||
|
|
||||||
So, how exactly does QADAPT solve these problems? **Whenever an allocation or drop occurs in code
|
|
||||||
marked allocation-safe, QADAPT triggers a thread panic.** We don't want to let the program continue
|
|
||||||
as if nothing strange happened, _we want things to explode_.
|
|
||||||
|
|
||||||
However, you don't want code to panic in production because of circumstances you didn't predict.
|
|
||||||
Just like [`debug_assert!`](https://doc.rust-lang.org/std/macro.debug_assert.html), **QADAPT will
|
|
||||||
strip out its own code when building in release mode to guarantee no panics and no performance
|
|
||||||
impact.**
|
|
||||||
|
|
||||||
Finally, there are three ways to have QADAPT check that your code will not invoke the allocator:
|
|
||||||
|
|
||||||
## Using a procedural macro
|
|
||||||
|
|
||||||
The easiest method, watch an entire function for allocator invocation:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
use qadapt::no_alloc;
|
|
||||||
use qadapt::QADAPT;
|
|
||||||
|
|
||||||
#[global_allocator]
|
|
||||||
static Q: QADAPT = QADAPT;
|
|
||||||
|
|
||||||
#[no_alloc]
|
|
||||||
fn push_vec(v: &mut Vec<u8>) {
|
|
||||||
// This triggers a panic if v.len() == v.capacity()
|
|
||||||
v.push(5);
|
|
||||||
}
|
|
||||||
|
|
||||||
fn main() {
|
|
||||||
let v = Vec::with_capacity(1);
|
|
||||||
|
|
||||||
// This will *not* trigger a panic
|
|
||||||
push_vec(&v);
|
|
||||||
|
|
||||||
// This *will* trigger a panic
|
|
||||||
push_vec(&v);
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## Using a regular macro
|
|
||||||
|
|
||||||
For times when you need more precision:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
use qadapt::assert_no_alloc;
|
|
||||||
use qadapt::QADAPT;
|
|
||||||
|
|
||||||
#[global_allocator]
|
|
||||||
static Q: QADAPT = QADAPT;
|
|
||||||
|
|
||||||
fn main() {
|
|
||||||
let v = Vec::with_capacity(1);
|
|
||||||
|
|
||||||
// No allocations here, we already have space reserved
|
|
||||||
assert_no_alloc!(v.push(5));
|
|
||||||
|
|
||||||
// Even though we remove an item, it doesn't trigger a drop
|
|
||||||
// because it's a scalar. If it were a `Box<_>` type,
|
|
||||||
// a drop would trigger.
|
|
||||||
assert_no_alloc!({
|
|
||||||
v.pop().unwrap();
|
|
||||||
});
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## Using function calls
|
|
||||||
|
|
||||||
Both the most precise and most tedious:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
use qadapt::enter_protected;
|
|
||||||
use qadapt::exit_protected;
|
|
||||||
use qadapt::QADAPT;
|
|
||||||
|
|
||||||
#[global_allocator]
|
|
||||||
static Q: QADAPT = QADAPT;
|
|
||||||
|
|
||||||
fn main() {
|
|
||||||
// This triggers an allocation (on non-release builds)
|
|
||||||
let v = Vec::with_capacity(1);
|
|
||||||
|
|
||||||
enter_protected();
|
|
||||||
// This does not trigger an allocation because we've reserved size
|
|
||||||
v.push(0);
|
|
||||||
exit_protected();
|
|
||||||
|
|
||||||
// This triggers an allocation because we ran out of size,
|
|
||||||
// but doesn't panic because we're no longer protected.
|
|
||||||
v.push(1);
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## Caveats
|
|
||||||
|
|
||||||
It's important to point out that QADAPT code is synchronous, so please be careful when mixing in
|
|
||||||
asynchronous functions:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
use futures::future::Future;
|
|
||||||
use futures::future::ok;
|
|
||||||
|
|
||||||
#[no_alloc]
|
|
||||||
fn async_capacity() -> impl Future<Item=Vec<u8>, Error=()> {
|
|
||||||
ok(12).and_then(|e| Ok(Vec::with_capacity(e)))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn main() {
|
|
||||||
// This doesn't trigger a panic because the `and_then` closure
|
|
||||||
// wasn't run during the function call.
|
|
||||||
async_capacity();
|
|
||||||
|
|
||||||
// Still no panic
|
|
||||||
assert_no_alloc!(async_capacity());
|
|
||||||
|
|
||||||
// This will panic because the allocation happens during `unwrap`
|
|
||||||
// in the `assert_no_alloc!` macro
|
|
||||||
assert_no_alloc!(async_capacity().poll().unwrap());
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
# Conclusion
|
|
||||||
|
|
||||||
While there's a lot more to writing high-performance code than managing your usage of the allocator,
|
|
||||||
it's critical that you do use the allocator correctly. QADAPT will verify that your code is doing
|
|
||||||
what you expect. It's usable even on stable Rust from version 1.31 onward, which isn't the case for
|
|
||||||
most allocators. Version 1.0 was released today, and you can check it out over at
|
|
||||||
[crates.io](https://crates.io/crates/qadapt) or on [github](https://github.com/bspeice/qadapt).
|
|
||||||
|
|
||||||
I'm hoping to write more about high-performance Rust in the future, and I expect that QADAPT will
|
|
||||||
help guide that. If there are topics you're interested in, let me know in the comments below!
|
|
||||||
|
|
||||||
[qadapt]: https://crates.io/crates/qadapt
|
|
@ -1,113 +0,0 @@
|
|||||||
---
|
|
||||||
layout: post
|
|
||||||
title: "Allocations in Rust"
|
|
||||||
description: "An introduction to the memory model."
|
|
||||||
category:
|
|
||||||
tags: [rust, understanding-allocations]
|
|
||||||
---
|
|
||||||
|
|
||||||
There's an alchemy of distilling complex technical topics into articles and videos that change the
|
|
||||||
way programmers see the tools they interact with on a regular basis. I knew what a linker was, but
|
|
||||||
there's a staggering amount of complexity in between
|
|
||||||
[the OS and `main()`](https://www.youtube.com/watch?v=dOfucXtyEsU). Rust programmers use the
|
|
||||||
[`Box`](https://doc.rust-lang.org/stable/std/boxed/struct.Box.html) type all the time, but there's a
|
|
||||||
rich history of the Rust language itself wrapped up in
|
|
||||||
[how special it is](https://manishearth.github.io/blog/2017/01/10/rust-tidbits-box-is-special/).
|
|
||||||
|
|
||||||
In a similar vein, this series attempts to look at code and understand how memory is used; the
|
|
||||||
complex choreography of operating system, compiler, and program that frees you to focus on
|
|
||||||
functionality far-flung from frivolous book-keeping. The Rust compiler relieves a great deal of the
|
|
||||||
cognitive burden associated with memory management, but we're going to step into its world for a
|
|
||||||
while.
|
|
||||||
|
|
||||||
Let's learn a bit about memory in Rust.
|
|
||||||
|
|
||||||
# Table of Contents
|
|
||||||
|
|
||||||
This series is intended as both learning and reference material; we'll work through the different
|
|
||||||
memory types Rust uses, and explain the implications of each. Ultimately, a summary will be provided
|
|
||||||
as a cheat sheet for easy future reference. To that end, a table of contents is in order:
|
|
||||||
|
|
||||||
- Foreword
|
|
||||||
- [Global Memory Usage: The Whole World](/2019/02/the-whole-world.html)
|
|
||||||
- [Fixed Memory: Stacking Up](/2019/02/stacking-up.html)
|
|
||||||
- [Dynamic Memory: A Heaping Helping](/2019/02/a-heaping-helping.html)
|
|
||||||
- [Compiler Optimizations: What It's Done For You Lately](/2019/02/compiler-optimizations.html)
|
|
||||||
- [Summary: What Are the Rules?](/2019/02/summary.html)
|
|
||||||
|
|
||||||
# Foreword
|
|
||||||
|
|
||||||
Rust's three defining features of
|
|
||||||
[Performance, Reliability, and Productivity](https://www.rust-lang.org/) are all driven to a great
|
|
||||||
degree by the how the Rust compiler understands memory usage. Unlike managed memory languages (Java,
|
|
||||||
Python), Rust
|
|
||||||
[doesn't really](https://words.steveklabnik.com/borrow-checking-escape-analysis-and-the-generational-hypothesis)
|
|
||||||
garbage collect; instead, it uses an
|
|
||||||
[ownership](https://doc.rust-lang.org/book/ch04-01-what-is-ownership.html) system to reason about
|
|
||||||
how long objects will last in your program. In some cases, if the life of an object is fairly
|
|
||||||
transient, Rust can make use of a very fast region called the "stack." When that's not possible,
|
|
||||||
Rust uses
|
|
||||||
[dynamic (heap) memory](https://en.wikipedia.org/wiki/Memory_management#Dynamic_memory_allocation)
|
|
||||||
and the ownership system to ensure you can't accidentally corrupt memory. It's not as fast, but it
|
|
||||||
is important to have available.
|
|
||||||
|
|
||||||
That said, there are specific situations in Rust where you'd never need to worry about the
|
|
||||||
stack/heap distinction! If you:
|
|
||||||
|
|
||||||
1. Never use `unsafe`
|
|
||||||
2. Never use `#![feature(alloc)]` or the [`alloc` crate](https://doc.rust-lang.org/alloc/index.html)
|
|
||||||
|
|
||||||
...then it's not possible for you to use dynamic memory!
|
|
||||||
|
|
||||||
For some uses of Rust, typically embedded devices, these constraints are OK. They have very limited
|
|
||||||
memory, and the program binary size itself may significantly affect what's available! There's no
|
|
||||||
operating system able to manage this
|
|
||||||
["virtual memory"](https://en.wikipedia.org/wiki/Virtual_memory) thing, but that's not an issue
|
|
||||||
because there's only one running application. The
|
|
||||||
[embedonomicon](https://docs.rust-embedded.org/embedonomicon/preface.html) is ever in mind, and
|
|
||||||
interacting with the "real world" through extra peripherals is accomplished by reading and writing
|
|
||||||
to [specific memory addresses](https://bob.cs.sonoma.edu/IntroCompOrg-RPi/sec-gpio-mem.html).
|
|
||||||
|
|
||||||
Most Rust programs find these requirements overly burdensome though. C++ developers would struggle
|
|
||||||
without access to [`std::vector`](https://en.cppreference.com/w/cpp/container/vector) (except those
|
|
||||||
hardcore no-STL people), and Rust developers would struggle without
|
|
||||||
[`std::vec`](https://doc.rust-lang.org/std/vec/struct.Vec.html). But with the constraints above,
|
|
||||||
`std::vec` is actually a part of the
|
|
||||||
[`alloc` crate](https://doc.rust-lang.org/alloc/vec/struct.Vec.html), and thus off-limits. `Box`,
|
|
||||||
`Rc`, etc., are also unusable for the same reason.
|
|
||||||
|
|
||||||
Whether writing code for embedded devices or not, the important thing in both situations is how much
|
|
||||||
you know _before your application starts_ about what its memory usage will look like. In embedded
|
|
||||||
devices, there's a small, fixed amount of memory to use. In a browser, you have no idea how large
|
|
||||||
[google.com](https://www.google.com)'s home page is until you start trying to download it. The
|
|
||||||
compiler uses this knowledge (or lack thereof) to optimize how memory is used; put simply, your code
|
|
||||||
runs faster when the compiler can guarantee exactly how much memory your program needs while it's
|
|
||||||
running. This series is all about understanding how the compiler reasons about your program, with an
|
|
||||||
emphasis on the implications for performance.
|
|
||||||
|
|
||||||
Now let's address some conditions and caveats before going much further:
|
|
||||||
|
|
||||||
- We'll focus on "safe" Rust only; `unsafe` lets you use platform-specific allocation API's
|
|
||||||
([`malloc`](https://www.tutorialspoint.com/c_standard_library/c_function_malloc.htm)) that we'll
|
|
||||||
ignore.
|
|
||||||
- We'll assume a "debug" build of Rust code (what you get with `cargo run` and `cargo test`) and
|
|
||||||
address (pun intended) release mode at the end (`cargo run --release` and `cargo test --release`).
|
|
||||||
- All content will be run using Rust 1.32, as that's the highest currently supported in the
|
|
||||||
[Compiler Exporer](https://godbolt.org/). As such, we'll avoid upcoming innovations like
|
|
||||||
[compile-time evaluation of `static`](https://github.com/rust-lang/rfcs/blob/master/text/0911-const-fn.md)
|
|
||||||
that are available in nightly.
|
|
||||||
- Because of the nature of the content, being able to read assembly is helpful. We'll keep it
|
|
||||||
simple, but I [found](https://stackoverflow.com/a/4584131/1454178) a
|
|
||||||
[refresher](https://stackoverflow.com/a/26026278/1454178) on the `push` and `pop`
|
|
||||||
[instructions](http://www.cs.virginia.edu/~evans/cs216/guides/x86.html) was helpful while writing
|
|
||||||
this.
|
|
||||||
- I've tried to be precise in saying only what I can prove using the tools (ASM, docs) that are
|
|
||||||
available, but if there's something said in error it will be corrected expeditiously. Please let
|
|
||||||
me know at [bradlee@speice.io](mailto:bradlee@speice.io)
|
|
||||||
|
|
||||||
Finally, I'll do what I can to flag potential future changes but the Rust docs have a notice worth
|
|
||||||
repeating:
|
|
||||||
|
|
||||||
> Rust does not currently have a rigorously and formally defined memory model.
|
|
||||||
>
|
|
||||||
> -- [the docs](https://doc.rust-lang.org/std/ptr/fn.read_volatile.html)
|
|
@ -1,337 +0,0 @@
|
|||||||
---
|
|
||||||
layout: post
|
|
||||||
title: "Global Memory Usage: The Whole World"
|
|
||||||
description: "Static considered slightly less harmful."
|
|
||||||
category:
|
|
||||||
tags: [rust, understanding-allocations]
|
|
||||||
---
|
|
||||||
|
|
||||||
The first memory type we'll look at is pretty special: when Rust can prove that a _value_ is fixed
|
|
||||||
for the life of a program (`const`), and when a _reference_ is unique for the life of a program
|
|
||||||
(`static` as a declaration, not
|
|
||||||
[`'static`](https://doc.rust-lang.org/book/ch10-03-lifetime-syntax.html#the-static-lifetime) as a
|
|
||||||
lifetime), we can make use of global memory. This special section of data is embedded directly in
|
|
||||||
the program binary so that variables are ready to go once the program loads; no additional
|
|
||||||
computation is necessary.
|
|
||||||
|
|
||||||
Understanding the value/reference distinction is important for reasons we'll go into below, and
|
|
||||||
while the
|
|
||||||
[full specification](https://github.com/rust-lang/rfcs/blob/master/text/0246-const-vs-static.md) for
|
|
||||||
these two keywords is available, we'll take a hands-on approach to the topic.
|
|
||||||
|
|
||||||
# **const**
|
|
||||||
|
|
||||||
When a _value_ is guaranteed to be unchanging in your program (where "value" may be scalars,
|
|
||||||
`struct`s, etc.), you can declare it `const`. This tells the compiler that it's safe to treat the
|
|
||||||
value as never changing, and enables some interesting optimizations; not only is there no
|
|
||||||
initialization cost to creating the value (it is loaded at the same time as the executable parts of
|
|
||||||
your program), but the compiler can also copy the value around if it speeds up the code.
|
|
||||||
|
|
||||||
The points we need to address when talking about `const` are:
|
|
||||||
|
|
||||||
- `Const` values are stored in read-only memory - it's impossible to modify.
|
|
||||||
- Values resulting from calling a `const fn` are materialized at compile-time.
|
|
||||||
- The compiler may (or may not) copy `const` values wherever it chooses.
|
|
||||||
|
|
||||||
## Read-Only
|
|
||||||
|
|
||||||
The first point is a bit strange - "read-only memory."
|
|
||||||
[The Rust book](https://doc.rust-lang.org/book/ch03-01-variables-and-mutability.html#differences-between-variables-and-constants)
|
|
||||||
mentions in a couple places that using `mut` with constants is illegal, but it's also important to
|
|
||||||
demonstrate just how immutable they are. _Typically_ in Rust you can use
|
|
||||||
[interior mutability](https://doc.rust-lang.org/book/ch15-05-interior-mutability.html) to modify
|
|
||||||
things that aren't declared `mut`.
|
|
||||||
[`RefCell`](https://doc.rust-lang.org/std/cell/struct.RefCell.html) provides an example of this
|
|
||||||
pattern in action:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
use std::cell::RefCell;
|
|
||||||
|
|
||||||
fn my_mutator(cell: &RefCell<u8>) {
|
|
||||||
// Even though we're given an immutable reference,
|
|
||||||
// the `replace` method allows us to modify the inner value.
|
|
||||||
cell.replace(14);
|
|
||||||
}
|
|
||||||
|
|
||||||
fn main() {
|
|
||||||
let cell = RefCell::new(25);
|
|
||||||
// Prints out 25
|
|
||||||
println!("Cell: {:?}", cell);
|
|
||||||
my_mutator(&cell);
|
|
||||||
// Prints out 14
|
|
||||||
println!("Cell: {:?}", cell);
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
--
|
|
||||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=8e4bea1a718edaff4507944e825a54b2)
|
|
||||||
|
|
||||||
When `const` is involved though, interior mutability is impossible:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
use std::cell::RefCell;
|
|
||||||
|
|
||||||
const CELL: RefCell<u8> = RefCell::new(25);
|
|
||||||
|
|
||||||
fn my_mutator(cell: &RefCell<u8>) {
|
|
||||||
cell.replace(14);
|
|
||||||
}
|
|
||||||
|
|
||||||
fn main() {
|
|
||||||
// First line prints 25 as expected
|
|
||||||
println!("Cell: {:?}", &CELL);
|
|
||||||
my_mutator(&CELL);
|
|
||||||
// Second line *still* prints 25
|
|
||||||
println!("Cell: {:?}", &CELL);
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
--
|
|
||||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=88fe98110c33c1b3a51e341f48b8ae00)
|
|
||||||
|
|
||||||
And a second example using [`Once`](https://doc.rust-lang.org/std/sync/struct.Once.html):
|
|
||||||
|
|
||||||
```rust
|
|
||||||
use std::sync::Once;
|
|
||||||
|
|
||||||
const SURPRISE: Once = Once::new();
|
|
||||||
|
|
||||||
fn main() {
|
|
||||||
// This is how `Once` is supposed to be used
|
|
||||||
SURPRISE.call_once(|| println!("Initializing..."));
|
|
||||||
// Because `Once` is a `const` value, we never record it
|
|
||||||
// having been initialized the first time, and this closure
|
|
||||||
// will also execute.
|
|
||||||
SURPRISE.call_once(|| println!("Initializing again???"));
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
--
|
|
||||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=c3cc5979b5e5434eca0f9ec4a06ee0ed)
|
|
||||||
|
|
||||||
When the
|
|
||||||
[`const` specification](https://github.com/rust-lang/rfcs/blob/26197104b7bb9a5a35db243d639aee6e46d35d75/text/0246-const-vs-static.md)
|
|
||||||
refers to ["rvalues"](http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2010/n3055.pdf), this
|
|
||||||
behavior is what they refer to. [Clippy](https://github.com/rust-lang/rust-clippy) will treat this
|
|
||||||
as an error, but it's still something to be aware of.
|
|
||||||
|
|
||||||
## Initialization == Compilation
|
|
||||||
|
|
||||||
The next thing to mention is that `const` values are loaded into memory _as part of your program
|
|
||||||
binary_. Because of this, any `const` values declared in your program will be "realized" at
|
|
||||||
compile-time; accessing them may trigger a main-memory lookup (with a fixed address, so your CPU may
|
|
||||||
be able to prefetch the value), but that's it.
|
|
||||||
|
|
||||||
```rust
|
|
||||||
use std::cell::RefCell;
|
|
||||||
|
|
||||||
const CELL: RefCell<u32> = RefCell::new(24);
|
|
||||||
|
|
||||||
pub fn multiply(value: u32) -> u32 {
|
|
||||||
// CELL is stored at `.L__unnamed_1`
|
|
||||||
value * (*CELL.get_mut())
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
-- [Compiler Explorer](https://godbolt.org/z/Th8boO)
|
|
||||||
|
|
||||||
The compiler creates one `RefCell`, uses it everywhere, and never needs to call the `RefCell::new`
|
|
||||||
function.
|
|
||||||
|
|
||||||
## Copying
|
|
||||||
|
|
||||||
If it's helpful though, the compiler can choose to copy `const` values.
|
|
||||||
|
|
||||||
```rust
|
|
||||||
const FACTOR: u32 = 1000;
|
|
||||||
|
|
||||||
pub fn multiply(value: u32) -> u32 {
|
|
||||||
// See assembly line 4 for the `mov edi, 1000` instruction
|
|
||||||
value * FACTOR
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn multiply_twice(value: u32) -> u32 {
|
|
||||||
// See assembly lines 22 and 29 for `mov edi, 1000` instructions
|
|
||||||
value * FACTOR * FACTOR
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
-- [Compiler Explorer](https://godbolt.org/z/ZtS54X)
|
|
||||||
|
|
||||||
In this example, the `FACTOR` value is turned into the `mov edi, 1000` instruction in both the
|
|
||||||
`multiply` and `multiply_twice` functions; the "1000" value is never "stored" anywhere, as it's
|
|
||||||
small enough to inline into the assembly instructions.
|
|
||||||
|
|
||||||
Finally, getting the address of a `const` value is possible, but not guaranteed to be unique
|
|
||||||
(because the compiler can choose to copy values). I was unable to get non-unique pointers in my
|
|
||||||
testing (even using different crates), but the specifications are clear enough: _don't rely on
|
|
||||||
pointers to `const` values being consistent_. To be frank, caring about locations for `const` values
|
|
||||||
is almost certainly a code smell.
|
|
||||||
|
|
||||||
# **static**
|
|
||||||
|
|
||||||
Static variables are related to `const` variables, but take a slightly different approach. When we
|
|
||||||
declare that a _reference_ is unique for the life of a program, you have a `static` variable
|
|
||||||
(unrelated to the `'static` lifetime). Because of the reference/value distinction with
|
|
||||||
`const`/`static`, static variables behave much more like typical "global" variables.
|
|
||||||
|
|
||||||
But to understand `static`, here's what we'll look at:
|
|
||||||
|
|
||||||
- `static` variables are globally unique locations in memory.
|
|
||||||
- Like `const`, `static` variables are loaded at the same time as your program being read into
|
|
||||||
memory.
|
|
||||||
- All `static` variables must implement the
|
|
||||||
[`Sync`](https://doc.rust-lang.org/std/marker/trait.Sync.html) marker trait.
|
|
||||||
- Interior mutability is safe and acceptable when using `static` variables.
|
|
||||||
|
|
||||||
## Memory Uniqueness
|
|
||||||
|
|
||||||
The single biggest difference between `const` and `static` is the guarantees provided about
|
|
||||||
uniqueness. Where `const` variables may or may not be copied in code, `static` variables are
|
|
||||||
guarantee to be unique. If we take a previous `const` example and change it to `static`, the
|
|
||||||
difference should be clear:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
static FACTOR: u32 = 1000;
|
|
||||||
|
|
||||||
pub fn multiply(value: u32) -> u32 {
|
|
||||||
// The assembly to `mul dword ptr [rip + example::FACTOR]` is how FACTOR gets used
|
|
||||||
value * FACTOR
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn multiply_twice(value: u32) -> u32 {
|
|
||||||
// The assembly to `mul dword ptr [rip + example::FACTOR]` is how FACTOR gets used
|
|
||||||
value * FACTOR * FACTOR
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
-- [Compiler Explorer](https://godbolt.org/z/uxmiRQ)
|
|
||||||
|
|
||||||
Where [previously](#copying) there were plenty of references to multiplying by 1000, the new
|
|
||||||
assembly refers to `FACTOR` as a named memory location instead. No initialization work needs to be
|
|
||||||
done, but the compiler can no longer prove the value never changes during execution.
|
|
||||||
|
|
||||||
## Initialization == Compilation
|
|
||||||
|
|
||||||
Next, let's talk about initialization. The simplest case is initializing static variables with
|
|
||||||
either scalar or struct notation:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
#[derive(Debug)]
|
|
||||||
struct MyStruct {
|
|
||||||
x: u32
|
|
||||||
}
|
|
||||||
|
|
||||||
static MY_STRUCT: MyStruct = MyStruct {
|
|
||||||
// You can even reference other statics
|
|
||||||
// declared later
|
|
||||||
x: MY_VAL
|
|
||||||
};
|
|
||||||
|
|
||||||
static MY_VAL: u32 = 24;
|
|
||||||
|
|
||||||
fn main() {
|
|
||||||
println!("Static MyStruct: {:?}", MY_STRUCT);
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
--
|
|
||||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=b538dbc46076f12db047af4f4403ee6e)
|
|
||||||
|
|
||||||
Things can get a bit weirder when using `const fn` though. In most cases, it just works:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
#[derive(Debug)]
|
|
||||||
struct MyStruct {
|
|
||||||
x: u32
|
|
||||||
}
|
|
||||||
|
|
||||||
impl MyStruct {
|
|
||||||
const fn new() -> MyStruct {
|
|
||||||
MyStruct { x: 24 }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static MY_STRUCT: MyStruct = MyStruct::new();
|
|
||||||
|
|
||||||
fn main() {
|
|
||||||
println!("const fn Static MyStruct: {:?}", MY_STRUCT);
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
--
|
|
||||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=8c796a6e7fc273c12115091b707b0255)
|
|
||||||
|
|
||||||
However, there's a caveat: you're currently not allowed to use `const fn` to initialize static
|
|
||||||
variables of types that aren't marked `Sync`. For example,
|
|
||||||
[`RefCell::new()`](https://doc.rust-lang.org/std/cell/struct.RefCell.html#method.new) is a
|
|
||||||
`const fn`, but because
|
|
||||||
[`RefCell` isn't `Sync`](https://doc.rust-lang.org/std/cell/struct.RefCell.html#impl-Sync), you'll
|
|
||||||
get an error at compile time:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
use std::cell::RefCell;
|
|
||||||
|
|
||||||
// error[E0277]: `std::cell::RefCell<u8>` cannot be shared between threads safely
|
|
||||||
static MY_LOCK: RefCell<u8> = RefCell::new(0);
|
|
||||||
```
|
|
||||||
|
|
||||||
--
|
|
||||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=c76ef86e473d07117a1700e21fd45560)
|
|
||||||
|
|
||||||
It's likely that this will
|
|
||||||
[change in the future](https://github.com/rust-lang/rfcs/blob/master/text/0911-const-fn.md) though.
|
|
||||||
|
|
||||||
## **Sync**
|
|
||||||
|
|
||||||
Which leads well to the next point: static variable types must implement the
|
|
||||||
[`Sync` marker](https://doc.rust-lang.org/std/marker/trait.Sync.html). Because they're globally
|
|
||||||
unique, it must be safe for you to access static variables from any thread at any time. Most
|
|
||||||
`struct` definitions automatically implement the `Sync` trait because they contain only elements
|
|
||||||
which themselves implement `Sync` (read more in the
|
|
||||||
[Nomicon](https://doc.rust-lang.org/nomicon/send-and-sync.html)). This is why earlier examples could
|
|
||||||
get away with initializing statics, even though we never included an `impl Sync for MyStruct` in the
|
|
||||||
code. To demonstrate this property, Rust refuses to compile our earlier example if we add a
|
|
||||||
non-`Sync` element to the `struct` definition:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
use std::cell::RefCell;
|
|
||||||
|
|
||||||
struct MyStruct {
|
|
||||||
x: u32,
|
|
||||||
y: RefCell<u8>,
|
|
||||||
}
|
|
||||||
|
|
||||||
// error[E0277]: `std::cell::RefCell<u8>` cannot be shared between threads safely
|
|
||||||
static MY_STRUCT: MyStruct = MyStruct {
|
|
||||||
x: 8,
|
|
||||||
y: RefCell::new(8)
|
|
||||||
};
|
|
||||||
```
|
|
||||||
|
|
||||||
--
|
|
||||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=40074d0248f056c296b662dbbff97cfc)
|
|
||||||
|
|
||||||
## Interior Mutability
|
|
||||||
|
|
||||||
Finally, while `static mut` variables are allowed, mutating them is an `unsafe` operation. If we
|
|
||||||
want to stay in `safe` Rust, we can use interior mutability to accomplish similar goals:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
use std::sync::Once;
|
|
||||||
|
|
||||||
// This example adapted from https://doc.rust-lang.org/std/sync/struct.Once.html#method.call_once
|
|
||||||
static INIT: Once = Once::new();
|
|
||||||
|
|
||||||
fn main() {
|
|
||||||
// Note that while `INIT` is declared immutable, we're still allowed
|
|
||||||
// to mutate its interior
|
|
||||||
INIT.call_once(|| println!("Initializing..."));
|
|
||||||
// This code won't panic, as the interior of INIT was modified
|
|
||||||
// as part of the previous `call_once`
|
|
||||||
INIT.call_once(|| panic!("INIT was called twice!"));
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
--
|
|
||||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=3ba003a981a7ed7400240caadd384d59)
|
|
@ -1,601 +0,0 @@
|
|||||||
---
|
|
||||||
layout: post
|
|
||||||
title: "Fixed Memory: Stacking Up"
|
|
||||||
description: "We don't need no allocator."
|
|
||||||
category:
|
|
||||||
tags: [rust, understanding-allocations]
|
|
||||||
---
|
|
||||||
|
|
||||||
`const` and `static` are perfectly fine, but it's relatively rare that we know at compile-time about
|
|
||||||
either values or references that will be the same for the duration of our program. Put another way,
|
|
||||||
it's not often the case that either you or your compiler knows how much memory your entire program
|
|
||||||
will ever need.
|
|
||||||
|
|
||||||
However, there are still some optimizations the compiler can do if it knows how much memory
|
|
||||||
individual functions will need. Specifically, the compiler can make use of "stack" memory (as
|
|
||||||
opposed to "heap" memory) which can be managed far faster in both the short- and long-term. When
|
|
||||||
requesting memory, the [`push` instruction](http://www.cs.virginia.edu/~evans/cs216/guides/x86.html)
|
|
||||||
can typically complete in [1 or 2 cycles](https://agner.org/optimize/instruction_tables.ods) (<1
|
|
||||||
nanosecond on modern CPUs). Contrast that to heap memory which requires an allocator (specialized
|
|
||||||
software to track what memory is in use) to reserve space. When you're finished with stack memory,
|
|
||||||
the `pop` instruction runs in 1-3 cycles, as opposed to an allocator needing to worry about memory
|
|
||||||
fragmentation and other issues with the heap. All sorts of incredibly sophisticated techniques have
|
|
||||||
been used to design allocators:
|
|
||||||
|
|
||||||
- [Garbage Collection](<https://en.wikipedia.org/wiki/Garbage_collection_(computer_science)>)
|
|
||||||
strategies like [Tracing](https://en.wikipedia.org/wiki/Tracing_garbage_collection) (used in
|
|
||||||
[Java](https://www.oracle.com/technetwork/java/javase/tech/g1-intro-jsp-135488.html)) and
|
|
||||||
[Reference counting](https://en.wikipedia.org/wiki/Reference_counting) (used in
|
|
||||||
[Python](https://docs.python.org/3/extending/extending.html#reference-counts))
|
|
||||||
- Thread-local structures to prevent locking the allocator in
|
|
||||||
[tcmalloc](https://jamesgolick.com/2013/5/19/how-tcmalloc-works.html)
|
|
||||||
- Arena structures used in [jemalloc](http://jemalloc.net/), which
|
|
||||||
[until recently](https://blog.rust-lang.org/2019/01/17/Rust-1.32.0.html#jemalloc-is-removed-by-default)
|
|
||||||
was the primary allocator for Rust programs!
|
|
||||||
|
|
||||||
But no matter how fast your allocator is, the principle remains: the fastest allocator is the one
|
|
||||||
you never use. As such, we're not going to discuss how exactly the
|
|
||||||
[`push` and `pop` instructions work](http://www.cs.virginia.edu/~evans/cs216/guides/x86.html), but
|
|
||||||
we'll focus instead on the conditions that enable the Rust compiler to use faster stack-based
|
|
||||||
allocation for variables.
|
|
||||||
|
|
||||||
So, **how do we know when Rust will or will not use stack allocation for objects we create?**
|
|
||||||
Looking at other languages, it's often easy to delineate between stack and heap. Managed memory
|
|
||||||
languages (Python, Java,
|
|
||||||
[C#](https://blogs.msdn.microsoft.com/ericlippert/2010/09/30/the-truth-about-value-types/)) place
|
|
||||||
everything on the heap. JIT compilers ([PyPy](https://www.pypy.org/),
|
|
||||||
[HotSpot](https://www.oracle.com/technetwork/java/javase/tech/index-jsp-136373.html)) may optimize
|
|
||||||
some heap allocations away, but you should never assume it will happen. C makes things clear with
|
|
||||||
calls to special functions (like [malloc(3)](https://linux.die.net/man/3/malloc)) needed to access
|
|
||||||
heap memory. Old C++ has the [`new`](https://stackoverflow.com/a/655086/1454178) keyword, though
|
|
||||||
modern C++/C++11 is more complicated with [RAII](https://en.cppreference.com/w/cpp/language/raii).
|
|
||||||
|
|
||||||
For Rust, we can summarize as follows: **stack allocation will be used for everything that doesn't
|
|
||||||
involve "smart pointers" and collections**. We'll skip over a precise definition of the term "smart
|
|
||||||
pointer" for now, and instead discuss what we should watch for to understand when stack and heap
|
|
||||||
memory regions are used:
|
|
||||||
|
|
||||||
1. Stack manipulation instructions (`push`, `pop`, and `add`/`sub` of the `rsp` register) indicate
|
|
||||||
allocation of stack memory:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
pub fn stack_alloc(x: u32) -> u32 {
|
|
||||||
// Space for `y` is allocated by subtracting from `rsp`,
|
|
||||||
// and then populated
|
|
||||||
let y = [1u8, 2, 3, 4];
|
|
||||||
// Space for `y` is deallocated by adding back to `rsp`
|
|
||||||
x
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
-- [Compiler Explorer](https://godbolt.org/z/5WSgc9)
|
|
||||||
|
|
||||||
2. Tracking when exactly heap allocation calls occur is difficult. It's typically easier to watch
|
|
||||||
for `call core::ptr::real_drop_in_place`, and infer that a heap allocation happened in the recent
|
|
||||||
past:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
pub fn heap_alloc(x: usize) -> usize {
|
|
||||||
// Space for elements in a vector has to be allocated
|
|
||||||
// on the heap, and is then de-allocated once the
|
|
||||||
// vector goes out of scope
|
|
||||||
let y: Vec<u8> = Vec::with_capacity(x);
|
|
||||||
x
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
-- [Compiler Explorer](https://godbolt.org/z/epfgoQ) (`real_drop_in_place` happens on line 1317)
|
|
||||||
<span style="font-size: .8em">Note: While the
|
|
||||||
[`Drop` trait](https://doc.rust-lang.org/std/ops/trait.Drop.html) is
|
|
||||||
[called for stack-allocated objects](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=87edf374d8983816eb3d8cfeac657b46),
|
|
||||||
the Rust standard library only defines `Drop` implementations for types that involve heap
|
|
||||||
allocation.</span>
|
|
||||||
|
|
||||||
3. If you don't want to inspect the assembly, use a custom allocator that's able to track and alert
|
|
||||||
when heap allocations occur. Crates like
|
|
||||||
[`alloc_counter`](https://crates.io/crates/alloc_counter) are designed for exactly this purpose.
|
|
||||||
|
|
||||||
With all that in mind, let's talk about situations in which we're guaranteed to use stack memory:
|
|
||||||
|
|
||||||
- Structs are created on the stack.
|
|
||||||
- Function arguments are passed on the stack, meaning the
|
|
||||||
[`#[inline]` attribute](https://doc.rust-lang.org/reference/attributes.html#inline-attribute) will
|
|
||||||
not change the memory region used.
|
|
||||||
- Enums and unions are stack-allocated.
|
|
||||||
- [Arrays](https://doc.rust-lang.org/std/primitive.array.html) are always stack-allocated.
|
|
||||||
- Closures capture their arguments on the stack.
|
|
||||||
- Generics will use stack allocation, even with dynamic dispatch.
|
|
||||||
- [`Copy`](https://doc.rust-lang.org/std/marker/trait.Copy.html) types are guaranteed to be
|
|
||||||
stack-allocated, and copying them will be done in stack memory.
|
|
||||||
- [`Iterator`s](https://doc.rust-lang.org/std/iter/trait.Iterator.html) in the standard library are
|
|
||||||
stack-allocated even when iterating over heap-based collections.
|
|
||||||
|
|
||||||
# Structs
|
|
||||||
|
|
||||||
The simplest case comes first. When creating vanilla `struct` objects, we use stack memory to hold
|
|
||||||
their contents:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
struct Point {
|
|
||||||
x: u64,
|
|
||||||
y: u64,
|
|
||||||
}
|
|
||||||
|
|
||||||
struct Line {
|
|
||||||
a: Point,
|
|
||||||
b: Point,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn make_line() {
|
|
||||||
// `origin` is stored in the first 16 bytes of memory
|
|
||||||
// starting at location `rsp`
|
|
||||||
let origin = Point { x: 0, y: 0 };
|
|
||||||
// `point` makes up the next 16 bytes of memory
|
|
||||||
let point = Point { x: 1, y: 2 };
|
|
||||||
|
|
||||||
// When creating `ray`, we just move the content out of
|
|
||||||
// `origin` and `point` into the next 32 bytes of memory
|
|
||||||
let ray = Line { a: origin, b: point };
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
-- [Compiler Explorer](https://godbolt.org/z/vri9BE)
|
|
||||||
|
|
||||||
Note that while some extra-fancy instructions are used for memory manipulation in the assembly, the
|
|
||||||
`sub rsp, 64` instruction indicates we're still working with the stack.
|
|
||||||
|
|
||||||
# Function arguments
|
|
||||||
|
|
||||||
Have you ever wondered how functions communicate with each other? Like, once the variables are given
|
|
||||||
to you, everything's fine. But how do you "give" those variables to another function? How do you get
|
|
||||||
the results back afterward? The answer: the compiler arranges memory and assembly instructions using
|
|
||||||
a pre-determined [calling convention](http://llvm.org/docs/LangRef.html#calling-conventions). This
|
|
||||||
convention governs the rules around where arguments needed by a function will be located (either in
|
|
||||||
memory offsets relative to the stack pointer `rsp`, or in other registers), and where the results
|
|
||||||
can be found once the function has finished. And when multiple languages agree on what the calling
|
|
||||||
conventions are, you can do things like having [Go call Rust code](https://blog.filippo.io/rustgo/)!
|
|
||||||
|
|
||||||
Put simply: it's the compiler's job to figure out how to call other functions, and you can assume
|
|
||||||
that the compiler is good at its job.
|
|
||||||
|
|
||||||
We can see this in action using a simple example:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
struct Point {
|
|
||||||
x: i64,
|
|
||||||
y: i64,
|
|
||||||
}
|
|
||||||
|
|
||||||
// We use integer division operations to keep
|
|
||||||
// the assembly clean, understanding the result
|
|
||||||
// isn't accurate.
|
|
||||||
fn distance(a: &Point, b: &Point) -> i64 {
|
|
||||||
// Immediately subtract from `rsp` the bytes needed
|
|
||||||
// to hold all the intermediate results - this is
|
|
||||||
// the stack allocation step
|
|
||||||
|
|
||||||
// The compiler used the `rdi` and `rsi` registers
|
|
||||||
// to pass our arguments, so read them in
|
|
||||||
let x1 = a.x;
|
|
||||||
let x2 = b.x;
|
|
||||||
let y1 = a.y;
|
|
||||||
let y2 = b.y;
|
|
||||||
|
|
||||||
// Do the actual math work
|
|
||||||
let x_pow = (x1 - x2) * (x1 - x2);
|
|
||||||
let y_pow = (y1 - y2) * (y1 - y2);
|
|
||||||
let squared = x_pow + y_pow;
|
|
||||||
squared / squared
|
|
||||||
|
|
||||||
// Our final result will be stored in the `rax` register
|
|
||||||
// so that our caller knows where to retrieve it.
|
|
||||||
// Finally, add back to `rsp` the stack memory that is
|
|
||||||
// now ready to be used by other functions.
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn total_distance() {
|
|
||||||
let start = Point { x: 1, y: 2 };
|
|
||||||
let middle = Point { x: 3, y: 4 };
|
|
||||||
let end = Point { x: 5, y: 6 };
|
|
||||||
|
|
||||||
let _dist_1 = distance(&start, &middle);
|
|
||||||
let _dist_2 = distance(&middle, &end);
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
-- [Compiler Explorer](https://godbolt.org/z/Qmx4ST)
|
|
||||||
|
|
||||||
As a consequence of function arguments never using heap memory, we can also infer that functions
|
|
||||||
using the `#[inline]` attributes also do not heap allocate. But better than inferring, we can look
|
|
||||||
at the assembly to prove it:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
struct Point {
|
|
||||||
x: i64,
|
|
||||||
y: i64,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Note that there is no `distance` function in the assembly output,
|
|
||||||
// and the total line count goes from 229 with inlining off
|
|
||||||
// to 306 with inline on. Even still, no heap allocations occur.
|
|
||||||
#[inline(always)]
|
|
||||||
fn distance(a: &Point, b: &Point) -> i64 {
|
|
||||||
let x1 = a.x;
|
|
||||||
let x2 = b.x;
|
|
||||||
let y1 = a.y;
|
|
||||||
let y2 = b.y;
|
|
||||||
|
|
||||||
let x_pow = (a.x - b.x) * (a.x - b.x);
|
|
||||||
let y_pow = (a.y - b.y) * (a.y - b.y);
|
|
||||||
let squared = x_pow + y_pow;
|
|
||||||
squared / squared
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn total_distance() {
|
|
||||||
let start = Point { x: 1, y: 2 };
|
|
||||||
let middle = Point { x: 3, y: 4 };
|
|
||||||
let end = Point { x: 5, y: 6 };
|
|
||||||
|
|
||||||
let _dist_1 = distance(&start, &middle);
|
|
||||||
let _dist_2 = distance(&middle, &end);
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
-- [Compiler Explorer](https://godbolt.org/z/30Sh66)
|
|
||||||
|
|
||||||
Finally, passing by value (arguments with type
|
|
||||||
[`Copy`](https://doc.rust-lang.org/std/marker/trait.Copy.html)) and passing by reference (either
|
|
||||||
moving ownership or passing a pointer) may have slightly different layouts in assembly, but will
|
|
||||||
still use either stack memory or CPU registers:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
pub struct Point {
|
|
||||||
x: i64,
|
|
||||||
y: i64,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Moving values
|
|
||||||
pub fn distance_moved(a: Point, b: Point) -> i64 {
|
|
||||||
let x1 = a.x;
|
|
||||||
let x2 = b.x;
|
|
||||||
let y1 = a.y;
|
|
||||||
let y2 = b.y;
|
|
||||||
|
|
||||||
let x_pow = (x1 - x2) * (x1 - x2);
|
|
||||||
let y_pow = (y1 - y2) * (y1 - y2);
|
|
||||||
let squared = x_pow + y_pow;
|
|
||||||
squared / squared
|
|
||||||
}
|
|
||||||
|
|
||||||
// Borrowing values has two extra `mov` instructions on lines 21 and 22
|
|
||||||
pub fn distance_borrowed(a: &Point, b: &Point) -> i64 {
|
|
||||||
let x1 = a.x;
|
|
||||||
let x2 = b.x;
|
|
||||||
let y1 = a.y;
|
|
||||||
let y2 = b.y;
|
|
||||||
|
|
||||||
let x_pow = (x1 - x2) * (x1 - x2);
|
|
||||||
let y_pow = (y1 - y2) * (y1 - y2);
|
|
||||||
let squared = x_pow + y_pow;
|
|
||||||
squared / squared
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
-- [Compiler Explorer](https://godbolt.org/z/06hGiv)
|
|
||||||
|
|
||||||
# Enums
|
|
||||||
|
|
||||||
If you've ever worried that wrapping your types in
|
|
||||||
[`Option`](https://doc.rust-lang.org/stable/core/option/enum.Option.html) or
|
|
||||||
[`Result`](https://doc.rust-lang.org/stable/core/result/enum.Result.html) would finally make them
|
|
||||||
large enough that Rust decides to use heap allocation instead, fear no longer: `enum` and union
|
|
||||||
types don't use heap allocation:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
enum MyEnum {
|
|
||||||
Small(u8),
|
|
||||||
Large(u64)
|
|
||||||
}
|
|
||||||
|
|
||||||
struct MyStruct {
|
|
||||||
x: MyEnum,
|
|
||||||
y: MyEnum,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn enum_compare() {
|
|
||||||
let x = MyEnum::Small(0);
|
|
||||||
let y = MyEnum::Large(0);
|
|
||||||
|
|
||||||
let z = MyStruct { x, y };
|
|
||||||
|
|
||||||
let opt = Option::Some(z);
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
-- [Compiler Explorer](https://godbolt.org/z/HK7zBx)
|
|
||||||
|
|
||||||
Because the size of an `enum` is the size of its largest element plus a flag, the compiler can
|
|
||||||
predict how much memory is used no matter which variant of an enum is currently stored in a
|
|
||||||
variable. Thus, enums and unions have no need of heap allocation. There's unfortunately not a great
|
|
||||||
way to show this in assembly, so I'll instead point you to the
|
|
||||||
[`core::mem::size_of`](https://doc.rust-lang.org/stable/core/mem/fn.size_of.html#size-of-enums)
|
|
||||||
documentation.
|
|
||||||
|
|
||||||
# Arrays
|
|
||||||
|
|
||||||
The array type is guaranteed to be stack allocated, which is why the array size must be declared.
|
|
||||||
Interestingly enough, this can be used to cause safe Rust programs to crash:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
// 256 bytes
|
|
||||||
#[derive(Default)]
|
|
||||||
struct TwoFiftySix {
|
|
||||||
_a: [u64; 32]
|
|
||||||
}
|
|
||||||
|
|
||||||
// 8 kilobytes
|
|
||||||
#[derive(Default)]
|
|
||||||
struct EightK {
|
|
||||||
_a: [TwoFiftySix; 32]
|
|
||||||
}
|
|
||||||
|
|
||||||
// 256 kilobytes
|
|
||||||
#[derive(Default)]
|
|
||||||
struct TwoFiftySixK {
|
|
||||||
_a: [EightK; 32]
|
|
||||||
}
|
|
||||||
|
|
||||||
// 8 megabytes - exceeds space typically provided for the stack,
|
|
||||||
// though the kernel can be instructed to allocate more.
|
|
||||||
// On Linux, you can check stack size using `ulimit -s`
|
|
||||||
#[derive(Default)]
|
|
||||||
struct EightM {
|
|
||||||
_a: [TwoFiftySixK; 32]
|
|
||||||
}
|
|
||||||
|
|
||||||
fn main() {
|
|
||||||
// Because we already have things in stack memory
|
|
||||||
// (like the current function call stack), allocating another
|
|
||||||
// eight megabytes of stack memory crashes the program
|
|
||||||
let _x = EightM::default();
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
--
|
|
||||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=587a6380a4914bcbcef4192c90c01dc4)
|
|
||||||
|
|
||||||
There aren't any security implications of this (no memory corruption occurs), but it's good to note
|
|
||||||
that the Rust compiler won't move arrays into heap memory even if they can be reasonably expected to
|
|
||||||
overflow the stack.
|
|
||||||
|
|
||||||
# Closures
|
|
||||||
|
|
||||||
Rules for how anonymous functions capture their arguments are typically language-specific. In Java,
|
|
||||||
[Lambda Expressions](https://docs.oracle.com/javase/tutorial/java/javaOO/lambdaexpressions.html) are
|
|
||||||
actually objects created on the heap that capture local primitives by copying, and capture local
|
|
||||||
non-primitives as (`final`) references.
|
|
||||||
[Python](https://docs.python.org/3.7/reference/expressions.html#lambda) and
|
|
||||||
[JavaScript](https://javascriptweblog.wordpress.com/2010/10/25/understanding-javascript-closures/)
|
|
||||||
both bind _everything_ by reference normally, but Python can also
|
|
||||||
[capture values](https://stackoverflow.com/a/235764/1454178) and JavaScript has
|
|
||||||
[Arrow functions](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Functions/Arrow_functions).
|
|
||||||
|
|
||||||
In Rust, arguments to closures are the same as arguments to other functions; closures are simply
|
|
||||||
functions that don't have a declared name. Some weird ordering of the stack may be required to
|
|
||||||
handle them, but it's the compiler's responsiblity to figure that out.
|
|
||||||
|
|
||||||
Each example below has the same effect, but a different assembly implementation. In the simplest
|
|
||||||
case, we immediately run a closure returned by another function. Because we don't store a reference
|
|
||||||
to the closure, the stack memory needed to store the captured values is contiguous:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
fn my_func() -> impl FnOnce() {
|
|
||||||
let x = 24;
|
|
||||||
// Note that this closure in assembly looks exactly like
|
|
||||||
// any other function; you even use the `call` instruction
|
|
||||||
// to start running it.
|
|
||||||
move || { x; }
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn immediate() {
|
|
||||||
my_func()();
|
|
||||||
my_func()();
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
-- [Compiler Explorer](https://godbolt.org/z/mgJ2zl), 25 total assembly instructions
|
|
||||||
|
|
||||||
If we store a reference to the closure, the Rust compiler keeps values it needs in the stack memory
|
|
||||||
of the original function. Getting the details right is a bit harder, so the instruction count goes
|
|
||||||
up even though this code is functionally equivalent to our original example:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
pub fn simple_reference() {
|
|
||||||
let x = my_func();
|
|
||||||
let y = my_func();
|
|
||||||
y();
|
|
||||||
x();
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
-- [Compiler Explorer](https://godbolt.org/z/K_dj5n), 55 total assembly instructions
|
|
||||||
|
|
||||||
Even things like variable order can make a difference in instruction count:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
pub fn complex() {
|
|
||||||
let x = my_func();
|
|
||||||
let y = my_func();
|
|
||||||
x();
|
|
||||||
y();
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
-- [Compiler Explorer](https://godbolt.org/z/p37qFl), 70 total assembly instructions
|
|
||||||
|
|
||||||
In every circumstance though, the compiler ensured that no heap allocations were necessary.
|
|
||||||
|
|
||||||
# Generics
|
|
||||||
|
|
||||||
Traits in Rust come in two broad forms: static dispatch (monomorphization, `impl Trait`) and dynamic
|
|
||||||
dispatch (trait objects, `dyn Trait`). While dynamic dispatch is often _associated_ with trait
|
|
||||||
objects being stored in the heap, dynamic dispatch can be used with stack allocated objects as well:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
trait GetInt {
|
|
||||||
fn get_int(&self) -> u64;
|
|
||||||
}
|
|
||||||
|
|
||||||
// vtable stored at section L__unnamed_1
|
|
||||||
struct WhyNotU8 {
|
|
||||||
x: u8
|
|
||||||
}
|
|
||||||
impl GetInt for WhyNotU8 {
|
|
||||||
fn get_int(&self) -> u64 {
|
|
||||||
self.x as u64
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// vtable stored at section L__unnamed_2
|
|
||||||
struct ActualU64 {
|
|
||||||
x: u64
|
|
||||||
}
|
|
||||||
impl GetInt for ActualU64 {
|
|
||||||
fn get_int(&self) -> u64 {
|
|
||||||
self.x
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// `&dyn` declares that we want to use dynamic dispatch
|
|
||||||
// rather than monomorphization, so there is only one
|
|
||||||
// `retrieve_int` function that shows up in the final assembly.
|
|
||||||
// If we used generics, there would be one implementation of
|
|
||||||
// `retrieve_int` for each type that implements `GetInt`.
|
|
||||||
pub fn retrieve_int(u: &dyn GetInt) {
|
|
||||||
// In the assembly, we just call an address given to us
|
|
||||||
// in the `rsi` register and hope that it was set up
|
|
||||||
// correctly when this function was invoked.
|
|
||||||
let x = u.get_int();
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn do_call() {
|
|
||||||
// Note that even though the vtable for `WhyNotU8` and
|
|
||||||
// `ActualU64` includes a pointer to
|
|
||||||
// `core::ptr::real_drop_in_place`, it is never invoked.
|
|
||||||
let a = WhyNotU8 { x: 0 };
|
|
||||||
let b = ActualU64 { x: 0 };
|
|
||||||
|
|
||||||
retrieve_int(&a);
|
|
||||||
retrieve_int(&b);
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
-- [Compiler Explorer](https://godbolt.org/z/u_yguS)
|
|
||||||
|
|
||||||
It's hard to imagine practical situations where dynamic dispatch would be used for objects that
|
|
||||||
aren't heap allocated, but it technically can be done.
|
|
||||||
|
|
||||||
# Copy types
|
|
||||||
|
|
||||||
Understanding move semantics and copy semantics in Rust is weird at first. The Rust docs
|
|
||||||
[go into detail](https://doc.rust-lang.org/stable/core/marker/trait.Copy.html) far better than can
|
|
||||||
be addressed here, so I'll leave them to do the job. From a memory perspective though, their
|
|
||||||
guideline is reasonable:
|
|
||||||
[if your type can implemement `Copy`, it should](https://doc.rust-lang.org/stable/core/marker/trait.Copy.html#when-should-my-type-be-copy).
|
|
||||||
While there are potential speed tradeoffs to _benchmark_ when discussing `Copy` (move semantics for
|
|
||||||
stack objects vs. copying stack pointers vs. copying stack `struct`s), _it's impossible for `Copy`
|
|
||||||
to introduce a heap allocation_.
|
|
||||||
|
|
||||||
But why is this the case? Fundamentally, it's because the language controls what `Copy` means -
|
|
||||||
["the behavior of `Copy` is not overloadable"](https://doc.rust-lang.org/std/marker/trait.Copy.html#whats-the-difference-between-copy-and-clone)
|
|
||||||
because it's a marker trait. From there we'll note that a type
|
|
||||||
[can implement `Copy`](https://doc.rust-lang.org/std/marker/trait.Copy.html#when-can-my-type-be-copy)
|
|
||||||
if (and only if) its components implement `Copy`, and that
|
|
||||||
[no heap-allocated types implement `Copy`](https://doc.rust-lang.org/std/marker/trait.Copy.html#implementors).
|
|
||||||
Thus, assignments involving heap types are always move semantics, and new heap allocations won't
|
|
||||||
occur because of implicit operator behavior.
|
|
||||||
|
|
||||||
```rust
|
|
||||||
#[derive(Clone)]
|
|
||||||
struct Cloneable {
|
|
||||||
x: Box<u64>
|
|
||||||
}
|
|
||||||
|
|
||||||
// error[E0204]: the trait `Copy` may not be implemented for this type
|
|
||||||
#[derive(Copy, Clone)]
|
|
||||||
struct NotCopyable {
|
|
||||||
x: Box<u64>
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
-- [Compiler Explorer](https://godbolt.org/z/VToRuK)
|
|
||||||
|
|
||||||
# Iterators
|
|
||||||
|
|
||||||
In managed memory languages (like
|
|
||||||
[Java](https://www.youtube.com/watch?v=bSkpMdDe4g4&feature=youtu.be&t=357)), there's a subtle
|
|
||||||
difference between these two code samples:
|
|
||||||
|
|
||||||
```java
|
|
||||||
public static int sum_for(List<Long> vals) {
|
|
||||||
long sum = 0;
|
|
||||||
// Regular for loop
|
|
||||||
for (int i = 0; i < vals.length; i++) {
|
|
||||||
sum += vals[i];
|
|
||||||
}
|
|
||||||
return sum;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static int sum_foreach(List<Long> vals) {
|
|
||||||
long sum = 0;
|
|
||||||
// "Foreach" loop - uses iteration
|
|
||||||
for (Long l : vals) {
|
|
||||||
sum += l;
|
|
||||||
}
|
|
||||||
return sum;
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
In the `sum_for` function, nothing terribly interesting happens. In `sum_foreach`, an object of type
|
|
||||||
[`Iterator`](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/Iterator.html)
|
|
||||||
is allocated on the heap, and will eventually be garbage-collected. This isn't a great design;
|
|
||||||
iterators are often transient objects that you need during a function and can discard once the
|
|
||||||
function ends. Sounds exactly like the issue stack-allocated objects address, no?
|
|
||||||
|
|
||||||
In Rust, iterators are allocated on the stack. The objects to iterate over are almost certainly in
|
|
||||||
heap memory, but the iterator itself
|
|
||||||
([`Iter`](https://doc.rust-lang.org/std/slice/struct.Iter.html)) doesn't need to use the heap. In
|
|
||||||
each of the examples below we iterate over a collection, but never use heap allocation:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
use std::collections::HashMap;
|
|
||||||
// There's a lot of assembly generated, but if you search in the text,
|
|
||||||
// there are no references to `real_drop_in_place` anywhere.
|
|
||||||
|
|
||||||
pub fn sum_vec(x: &Vec<u32>) {
|
|
||||||
let mut s = 0;
|
|
||||||
// Basic iteration over vectors doesn't need allocation
|
|
||||||
for y in x {
|
|
||||||
s += y;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn sum_enumerate(x: &Vec<u32>) {
|
|
||||||
let mut s = 0;
|
|
||||||
// More complex iterators are just fine too
|
|
||||||
for (_i, y) in x.iter().enumerate() {
|
|
||||||
s += y;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn sum_hm(x: &HashMap<u32, u32>) {
|
|
||||||
let mut s = 0;
|
|
||||||
// And it's not just Vec, all types will allocate the iterator
|
|
||||||
// on stack memory
|
|
||||||
for y in x.values() {
|
|
||||||
s += y;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
-- [Compiler Explorer](https://godbolt.org/z/FTT3CT)
|
|
@ -1,254 +0,0 @@
|
|||||||
---
|
|
||||||
layout: post
|
|
||||||
title: "Dynamic Memory: A Heaping Helping"
|
|
||||||
description: "The reason Rust exists."
|
|
||||||
category:
|
|
||||||
tags: [rust, understanding-allocations]
|
|
||||||
---
|
|
||||||
|
|
||||||
Managing dynamic memory is hard. Some languages assume users will do it themselves (C, C++), and
|
|
||||||
some languages go to extreme lengths to protect users from themselves (Java, Python). In Rust, how
|
|
||||||
the language uses dynamic memory (also referred to as the **heap**) is a system called _ownership_.
|
|
||||||
And as the docs mention, ownership
|
|
||||||
[is Rust's most unique feature](https://doc.rust-lang.org/book/ch04-00-understanding-ownership.html).
|
|
||||||
|
|
||||||
The heap is used in two situations; when the compiler is unable to predict either the _total size of
|
|
||||||
memory needed_, or _how long the memory is needed for_, it allocates space in the heap. This happens
|
|
||||||
pretty frequently; if you want to download the Google home page, you won't know how large it is
|
|
||||||
until your program runs. And when you're finished with Google, we deallocate the memory so it can be
|
|
||||||
used to store other webpages. If you're interested in a slightly longer explanation of the heap,
|
|
||||||
check out
|
|
||||||
[The Stack and the Heap](https://doc.rust-lang.org/book/ch04-01-what-is-ownership.html#the-stack-and-the-heap)
|
|
||||||
in Rust's documentation.
|
|
||||||
|
|
||||||
We won't go into detail on how the heap is managed; the
|
|
||||||
[ownership documentation](https://doc.rust-lang.org/book/ch04-01-what-is-ownership.html) does a
|
|
||||||
phenomenal job explaining both the "why" and "how" of memory management. Instead, we're going to
|
|
||||||
focus on understanding "when" heap allocations occur in Rust.
|
|
||||||
|
|
||||||
To start off, take a guess for how many allocations happen in the program below:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
fn main() {}
|
|
||||||
```
|
|
||||||
|
|
||||||
It's obviously a trick question; while no heap allocations occur as a result of that code, the setup
|
|
||||||
needed to call `main` does allocate on the heap. Here's a way to show it:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
#![feature(integer_atomics)]
|
|
||||||
use std::alloc::{GlobalAlloc, Layout, System};
|
|
||||||
use std::sync::atomic::{AtomicU64, Ordering};
|
|
||||||
|
|
||||||
static ALLOCATION_COUNT: AtomicU64 = AtomicU64::new(0);
|
|
||||||
|
|
||||||
struct CountingAllocator;
|
|
||||||
|
|
||||||
unsafe impl GlobalAlloc for CountingAllocator {
|
|
||||||
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
|
|
||||||
ALLOCATION_COUNT.fetch_add(1, Ordering::SeqCst);
|
|
||||||
System.alloc(layout)
|
|
||||||
}
|
|
||||||
|
|
||||||
unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
|
|
||||||
System.dealloc(ptr, layout);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[global_allocator]
|
|
||||||
static A: CountingAllocator = CountingAllocator;
|
|
||||||
|
|
||||||
fn main() {
|
|
||||||
let x = ALLOCATION_COUNT.fetch_add(0, Ordering::SeqCst);
|
|
||||||
println!("There were {} allocations before calling main!", x);
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
--
|
|
||||||
[Rust Playground](https://play.rust-lang.org/?version=nightly&mode=debug&edition=2018&gist=fb5060025ba79fc0f906b65a4ef8eb8e)
|
|
||||||
|
|
||||||
As of the time of writing, there are five allocations that happen before `main` is ever called.
|
|
||||||
|
|
||||||
But when we want to understand more practically where heap allocation happens, we'll follow this
|
|
||||||
guide:
|
|
||||||
|
|
||||||
- Smart pointers hold their contents in the heap
|
|
||||||
- Collections are smart pointers for many objects at a time, and reallocate when they need to grow
|
|
||||||
|
|
||||||
Finally, there are two "addendum" issues that are important to address when discussing Rust and the
|
|
||||||
heap:
|
|
||||||
|
|
||||||
- Non-heap alternatives to many standard library types are available.
|
|
||||||
- Special allocators to track memory behavior should be used to benchmark code.
|
|
||||||
|
|
||||||
# Smart pointers
|
|
||||||
|
|
||||||
The first thing to note are the "smart pointer" types. When you have data that must outlive the
|
|
||||||
scope in which it is declared, or your data is of unknown or dynamic size, you'll make use of these
|
|
||||||
types.
|
|
||||||
|
|
||||||
The term [smart pointer](https://en.wikipedia.org/wiki/Smart_pointer) comes from C++, and while it's
|
|
||||||
closely linked to a general design pattern of
|
|
||||||
["Resource Acquisition Is Initialization"](https://en.cppreference.com/w/cpp/language/raii), we'll
|
|
||||||
use it here specifically to describe objects that are responsible for managing ownership of data
|
|
||||||
allocated on the heap. The smart pointers available in the `alloc` crate should look mostly
|
|
||||||
familiar:
|
|
||||||
|
|
||||||
- [`Box`](https://doc.rust-lang.org/alloc/boxed/struct.Box.html)
|
|
||||||
- [`Rc`](https://doc.rust-lang.org/alloc/rc/struct.Rc.html)
|
|
||||||
- [`Arc`](https://doc.rust-lang.org/alloc/sync/struct.Arc.html)
|
|
||||||
- [`Cow`](https://doc.rust-lang.org/alloc/borrow/enum.Cow.html)
|
|
||||||
|
|
||||||
The [standard library](https://doc.rust-lang.org/std/) also defines some smart pointers to manage
|
|
||||||
heap objects, though more than can be covered here. Some examples are:
|
|
||||||
|
|
||||||
- [`RwLock`](https://doc.rust-lang.org/std/sync/struct.RwLock.html)
|
|
||||||
- [`Mutex`](https://doc.rust-lang.org/std/sync/struct.Mutex.html)
|
|
||||||
|
|
||||||
Finally, there is one ["gotcha"](https://www.merriam-webster.com/dictionary/gotcha): **cell types**
|
|
||||||
(like [`RefCell`](https://doc.rust-lang.org/stable/core/cell/struct.RefCell.html)) look and behave
|
|
||||||
similarly, but **don't involve heap allocation**. The
|
|
||||||
[`core::cell` docs](https://doc.rust-lang.org/stable/core/cell/index.html) have more information.
|
|
||||||
|
|
||||||
When a smart pointer is created, the data it is given is placed in heap memory and the location of
|
|
||||||
that data is recorded in the smart pointer. Once the smart pointer has determined it's safe to
|
|
||||||
deallocate that memory (when a `Box` has
|
|
||||||
[gone out of scope](https://doc.rust-lang.org/stable/std/boxed/index.html) or a reference count
|
|
||||||
[goes to zero](https://doc.rust-lang.org/alloc/rc/index.html)), the heap space is reclaimed. We can
|
|
||||||
prove these types use heap memory by looking at code:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
use std::rc::Rc;
|
|
||||||
use std::sync::Arc;
|
|
||||||
use std::borrow::Cow;
|
|
||||||
|
|
||||||
pub fn my_box() {
|
|
||||||
// Drop at assembly line 1640
|
|
||||||
Box::new(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn my_rc() {
|
|
||||||
// Drop at assembly line 1650
|
|
||||||
Rc::new(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn my_arc() {
|
|
||||||
// Drop at assembly line 1660
|
|
||||||
Arc::new(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn my_cow() {
|
|
||||||
// Drop at assembly line 1672
|
|
||||||
Cow::from("drop");
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
-- [Compiler Explorer](https://godbolt.org/z/4AMQug)
|
|
||||||
|
|
||||||
# Collections
|
|
||||||
|
|
||||||
Collection types use heap memory because their contents have dynamic size; they will request more
|
|
||||||
memory [when needed](https://doc.rust-lang.org/std/vec/struct.Vec.html#method.reserve), and can
|
|
||||||
[release memory](https://doc.rust-lang.org/std/vec/struct.Vec.html#method.shrink_to_fit) when it's
|
|
||||||
no longer necessary. This dynamic property forces Rust to heap allocate everything they contain. In
|
|
||||||
a way, **collections are smart pointers for many objects at a time**. Common types that fall under
|
|
||||||
this umbrella are [`Vec`](https://doc.rust-lang.org/stable/alloc/vec/struct.Vec.html),
|
|
||||||
[`HashMap`](https://doc.rust-lang.org/stable/std/collections/struct.HashMap.html), and
|
|
||||||
[`String`](https://doc.rust-lang.org/stable/alloc/string/struct.String.html) (not
|
|
||||||
[`str`](https://doc.rust-lang.org/std/primitive.str.html)).
|
|
||||||
|
|
||||||
While collections store the objects they own in heap memory, _creating new collections will not
|
|
||||||
allocate on the heap_. This is a bit weird; if we call `Vec::new()`, the assembly shows a
|
|
||||||
corresponding call to `real_drop_in_place`:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
pub fn my_vec() {
|
|
||||||
// Drop in place at line 481
|
|
||||||
Vec::<u8>::new();
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
-- [Compiler Explorer](https://godbolt.org/z/1WkNtC)
|
|
||||||
|
|
||||||
But because the vector has no elements to manage, no calls to the allocator will ever be dispatched:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
use std::alloc::{GlobalAlloc, Layout, System};
|
|
||||||
use std::sync::atomic::{AtomicBool, Ordering};
|
|
||||||
|
|
||||||
fn main() {
|
|
||||||
// Turn on panicking if we allocate on the heap
|
|
||||||
DO_PANIC.store(true, Ordering::SeqCst);
|
|
||||||
|
|
||||||
// Interesting bit happens here
|
|
||||||
let x: Vec<u8> = Vec::new();
|
|
||||||
drop(x);
|
|
||||||
|
|
||||||
// Turn panicking back off, some deallocations occur
|
|
||||||
// after main as well.
|
|
||||||
DO_PANIC.store(false, Ordering::SeqCst);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[global_allocator]
|
|
||||||
static A: PanicAllocator = PanicAllocator;
|
|
||||||
static DO_PANIC: AtomicBool = AtomicBool::new(false);
|
|
||||||
struct PanicAllocator;
|
|
||||||
|
|
||||||
unsafe impl GlobalAlloc for PanicAllocator {
|
|
||||||
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
|
|
||||||
if DO_PANIC.load(Ordering::SeqCst) {
|
|
||||||
panic!("Unexpected allocation.");
|
|
||||||
}
|
|
||||||
System.alloc(layout)
|
|
||||||
}
|
|
||||||
|
|
||||||
unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
|
|
||||||
if DO_PANIC.load(Ordering::SeqCst) {
|
|
||||||
panic!("Unexpected deallocation.");
|
|
||||||
}
|
|
||||||
System.dealloc(ptr, layout);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
--
|
|
||||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=831a297d176d015b1f9ace01ae416cc6)
|
|
||||||
|
|
||||||
Other standard library types follow the same behavior; make sure to check out
|
|
||||||
[`HashMap::new()`](https://doc.rust-lang.org/std/collections/hash_map/struct.HashMap.html#method.new),
|
|
||||||
and [`String::new()`](https://doc.rust-lang.org/std/string/struct.String.html#method.new).
|
|
||||||
|
|
||||||
# Heap Alternatives
|
|
||||||
|
|
||||||
While it is a bit strange to speak of the stack after spending time with the heap, it's worth
|
|
||||||
pointing out that some heap-allocated objects in Rust have stack-based counterparts provided by
|
|
||||||
other crates. If you have need of the functionality, but want to avoid allocating, there are
|
|
||||||
typically alternatives available.
|
|
||||||
|
|
||||||
When it comes to some standard library smart pointers
|
|
||||||
([`RwLock`](https://doc.rust-lang.org/std/sync/struct.RwLock.html) and
|
|
||||||
[`Mutex`](https://doc.rust-lang.org/std/sync/struct.Mutex.html)), stack-based alternatives are
|
|
||||||
provided in crates like [parking_lot](https://crates.io/crates/parking_lot) and
|
|
||||||
[spin](https://crates.io/crates/spin). You can check out
|
|
||||||
[`lock_api::RwLock`](https://docs.rs/lock_api/0.1.5/lock_api/struct.RwLock.html),
|
|
||||||
[`lock_api::Mutex`](https://docs.rs/lock_api/0.1.5/lock_api/struct.Mutex.html), and
|
|
||||||
[`spin::Once`](https://mvdnes.github.io/rust-docs/spin-rs/spin/struct.Once.html) if you're in need
|
|
||||||
of synchronization primitives.
|
|
||||||
|
|
||||||
[thread_id](https://crates.io/crates/thread-id) may be necessary if you're implementing an allocator
|
|
||||||
because [`thread::current().id()`](https://doc.rust-lang.org/std/thread/struct.ThreadId.html) uses a
|
|
||||||
[`thread_local!` structure](https://doc.rust-lang.org/stable/src/std/sys_common/thread_info.rs.html#17-36)
|
|
||||||
that needs heap allocation.
|
|
||||||
|
|
||||||
# Tracing Allocators
|
|
||||||
|
|
||||||
When writing performance-sensitive code, there's no alternative to measuring your code. If you
|
|
||||||
didn't write a benchmark,
|
|
||||||
[you don't care about it's performance](https://www.youtube.com/watch?v=2EWejmkKlxs&feature=youtu.be&t=263)
|
|
||||||
You should never rely on your instincts when
|
|
||||||
[a microsecond is an eternity](https://www.youtube.com/watch?v=NH1Tta7purM).
|
|
||||||
|
|
||||||
Similarly, there's great work going on in Rust with allocators that keep track of what they're doing
|
|
||||||
(like [`alloc_counter`](https://crates.io/crates/alloc_counter)). When it comes to tracking heap
|
|
||||||
behavior, it's easy to make mistakes; please write tests and make sure you have tools to guard
|
|
||||||
against future issues.
|
|
@ -1,148 +0,0 @@
|
|||||||
---
|
|
||||||
layout: post
|
|
||||||
title: "Compiler Optimizations: What It's Done Lately"
|
|
||||||
description: "A lot. The answer is a lot."
|
|
||||||
category:
|
|
||||||
tags: [rust, understanding-allocations]
|
|
||||||
---
|
|
||||||
|
|
||||||
**Update 2019-02-10**: When debugging a
|
|
||||||
[related issue](https://gitlab.com/sio4/code/alloc-counter/issues/1), it was discovered that the
|
|
||||||
original code worked because LLVM optimized out the entire function, rather than just the allocation
|
|
||||||
segments. The code has been updated with proper use of
|
|
||||||
[`read_volatile`](https://doc.rust-lang.org/std/ptr/fn.read_volatile.html), and a previous section
|
|
||||||
on vector capacity has been removed.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
Up to this point, we've been discussing memory usage in the Rust language by focusing on simple
|
|
||||||
rules that are mostly right for small chunks of code. We've spent time showing how those rules work
|
|
||||||
themselves out in practice, and become familiar with reading the assembly code needed to see each
|
|
||||||
memory type (global, stack, heap) in action.
|
|
||||||
|
|
||||||
Throughout the series so far, we've put a handicap on the code. In the name of consistent and
|
|
||||||
understandable results, we've asked the compiler to pretty please leave the training wheels on. Now
|
|
||||||
is the time where we throw out all the rules and take off the kid gloves. As it turns out, both the
|
|
||||||
Rust compiler and the LLVM optimizers are incredibly sophisticated, and we'll step back and let them
|
|
||||||
do their job.
|
|
||||||
|
|
||||||
Similar to
|
|
||||||
["What Has My Compiler Done For Me Lately?"](https://www.youtube.com/watch?v=bSkpMdDe4g4), we're
|
|
||||||
focusing on interesting things the Rust language (and LLVM!) can do with memory management. We'll
|
|
||||||
still be looking at assembly code to understand what's going on, but it's important to mention
|
|
||||||
again: **please use automated tools like [alloc-counter](https://crates.io/crates/alloc_counter) to
|
|
||||||
double-check memory behavior if it's something you care about**. It's far too easy to mis-read
|
|
||||||
assembly in large code sections, you should always verify behavior if you care about memory usage.
|
|
||||||
|
|
||||||
The guiding principal as we move forward is this: _optimizing compilers won't produce worse programs
|
|
||||||
than we started with._ There won't be any situations where stack allocations get moved to heap
|
|
||||||
allocations. There will, however, be an opera of optimization.
|
|
||||||
|
|
||||||
# The Case of the Disappearing Box
|
|
||||||
|
|
||||||
Our first optimization comes when LLVM can reason that the lifetime of an object is sufficiently
|
|
||||||
short that heap allocations aren't necessary. In these cases, LLVM will move the allocation to the
|
|
||||||
stack instead! The way this interacts with `#[inline]` attributes is a bit opaque, but the important
|
|
||||||
part is that LLVM can sometimes do better than the baseline Rust language:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
use std::alloc::{GlobalAlloc, Layout, System};
|
|
||||||
use std::sync::atomic::{AtomicBool, Ordering};
|
|
||||||
|
|
||||||
pub fn cmp(x: u32) {
|
|
||||||
// Turn on panicking if we allocate on the heap
|
|
||||||
DO_PANIC.store(true, Ordering::SeqCst);
|
|
||||||
|
|
||||||
// The compiler is able to see through the constant `Box`
|
|
||||||
// and directly compare `x` to 24 - assembly line 73
|
|
||||||
let y = Box::new(24);
|
|
||||||
let equals = x == *y;
|
|
||||||
|
|
||||||
// This call to drop is eliminated
|
|
||||||
drop(y);
|
|
||||||
|
|
||||||
// Need to mark the comparison result as volatile so that
|
|
||||||
// LLVM doesn't strip out all the code. If `y` is marked
|
|
||||||
// volatile instead, allocation will be forced.
|
|
||||||
unsafe { std::ptr::read_volatile(&equals) };
|
|
||||||
|
|
||||||
// Turn off panicking, as there are some deallocations
|
|
||||||
// when we exit main.
|
|
||||||
DO_PANIC.store(false, Ordering::SeqCst);
|
|
||||||
}
|
|
||||||
|
|
||||||
fn main() {
|
|
||||||
cmp(12)
|
|
||||||
}
|
|
||||||
|
|
||||||
#[global_allocator]
|
|
||||||
static A: PanicAllocator = PanicAllocator;
|
|
||||||
static DO_PANIC: AtomicBool = AtomicBool::new(false);
|
|
||||||
struct PanicAllocator;
|
|
||||||
|
|
||||||
unsafe impl GlobalAlloc for PanicAllocator {
|
|
||||||
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
|
|
||||||
if DO_PANIC.load(Ordering::SeqCst) {
|
|
||||||
panic!("Unexpected allocation.");
|
|
||||||
}
|
|
||||||
System.alloc(layout)
|
|
||||||
}
|
|
||||||
|
|
||||||
unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
|
|
||||||
if DO_PANIC.load(Ordering::SeqCst) {
|
|
||||||
panic!("Unexpected deallocation.");
|
|
||||||
}
|
|
||||||
System.dealloc(ptr, layout);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## -- [Compiler Explorer](https://godbolt.org/z/BZ_Yp3)
|
|
||||||
|
|
||||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=release&edition=2018&gist=4a765f753183d5b919f62c71d2109d5d)
|
|
||||||
|
|
||||||
# Dr. Array or: How I Learned to Love the Optimizer
|
|
||||||
|
|
||||||
Finally, this isn't so much about LLVM figuring out different memory behavior, but LLVM stripping
|
|
||||||
out code that doesn't do anything. Optimizations of this type have a lot of nuance to them; if
|
|
||||||
you're not careful, they can make your benchmarks look
|
|
||||||
[impossibly good](https://www.youtube.com/watch?v=nXaxk27zwlk&feature=youtu.be&t=1199). In Rust, the
|
|
||||||
`black_box` function (implemented in both
|
|
||||||
[`libtest`](https://doc.rust-lang.org/1.1.0/test/fn.black_box.html) and
|
|
||||||
[`criterion`](https://docs.rs/criterion/0.2.10/criterion/fn.black_box.html)) will tell the compiler
|
|
||||||
to disable this kind of optimization. But if you let LLVM remove unnecessary code, you can end up
|
|
||||||
running programs that previously caused errors:
|
|
||||||
|
|
||||||
```rust
|
|
||||||
#[derive(Default)]
|
|
||||||
struct TwoFiftySix {
|
|
||||||
_a: [u64; 32]
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Default)]
|
|
||||||
struct EightK {
|
|
||||||
_a: [TwoFiftySix; 32]
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Default)]
|
|
||||||
struct TwoFiftySixK {
|
|
||||||
_a: [EightK; 32]
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Default)]
|
|
||||||
struct EightM {
|
|
||||||
_a: [TwoFiftySixK; 32]
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn main() {
|
|
||||||
// Normally this blows up because we can't reserve size on stack
|
|
||||||
// for the `EightM` struct. But because the compiler notices we
|
|
||||||
// never do anything with `_x`, it optimizes out the stack storage
|
|
||||||
// and the program completes successfully.
|
|
||||||
let _x = EightM::default();
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
## -- [Compiler Explorer](https://godbolt.org/z/daHn7P)
|
|
||||||
|
|
||||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=release&edition=2018&gist=4c253bf26072119896ab93c6ef064dc0)
|
|
@ -1,35 +0,0 @@
|
|||||||
---
|
|
||||||
layout: post
|
|
||||||
title: "Summary: What are the Allocation Rules?"
|
|
||||||
description: "A synopsis and reference."
|
|
||||||
category:
|
|
||||||
tags: [rust, understanding-allocations]
|
|
||||||
---
|
|
||||||
|
|
||||||
While there's a lot of interesting detail captured in this series, it's often helpful to have a
|
|
||||||
document that answers some "yes/no" questions. You may not care about what an `Iterator` looks like
|
|
||||||
in assembly, you just need to know whether it allocates an object on the heap or not. And while Rust
|
|
||||||
will prioritize the fastest behavior it can, here are the rules for each memory type:
|
|
||||||
|
|
||||||
**Heap Allocation**:
|
|
||||||
|
|
||||||
- Smart pointers (`Box`, `Rc`, `Mutex`, etc.) allocate their contents in heap memory.
|
|
||||||
- Collections (`HashMap`, `Vec`, `String`, etc.) allocate their contents in heap memory.
|
|
||||||
- Some smart pointers in the standard library have counterparts in other crates that don't need heap
|
|
||||||
memory. If possible, use those.
|
|
||||||
|
|
||||||
**Stack Allocation**:
|
|
||||||
|
|
||||||
- Everything not using a smart pointer will be allocated on the stack.
|
|
||||||
- Structs, enums, iterators, arrays, and closures are all stack allocated.
|
|
||||||
- Cell types (`RefCell`) behave like smart pointers, but are stack-allocated.
|
|
||||||
- Inlining (`#[inline]`) will not affect allocation behavior for better or worse.
|
|
||||||
- Types that are marked `Copy` are guaranteed to have their contents stack-allocated.
|
|
||||||
|
|
||||||
**Global Allocation**:
|
|
||||||
|
|
||||||
- `const` is a fixed value; the compiler is allowed to copy it wherever useful.
|
|
||||||
- `static` is a fixed reference; the compiler will guarantee it is unique.
|
|
||||||
|
|
||||||
![Container Sizes in Rust](/assets/images/2019-02-04-container-size.svg) --
|
|
||||||
[Raph Levien](https://docs.google.com/presentation/d/1q-c7UAyrUlM-eZyTo1pd8SZ0qwA_wYxmPZVOQkoDmH4/edit?usp=sharing)
|
|
@ -1,52 +0,0 @@
|
|||||||
---
|
|
||||||
layout: post
|
|
||||||
title: "Making Bread"
|
|
||||||
description: "...because I've got some free time now. 🍞"
|
|
||||||
category:
|
|
||||||
tags: [baking]
|
|
||||||
---
|
|
||||||
|
|
||||||
Having recently started my "gardening leave" between positions, I have some more personal time
|
|
||||||
available. I'm planning to stay productive, contributing to some open-source projects, but it also
|
|
||||||
occurred to me that despite [talking about](https://speice.io/2018/05/hello.html) bread pics, this
|
|
||||||
blog has been purely technical. Maybe I'll change the site title from "The Old Speice Guy" to "Bites
|
|
||||||
and Bytes"?
|
|
||||||
|
|
||||||
Either way, I'm baking a little bit again, and figured it was worth taking a quick break to focus on
|
|
||||||
some lighter material. I recently learned two critically important lessons: first, the temperature
|
|
||||||
of the dough when you put the yeast in makes a huge difference.
|
|
||||||
|
|
||||||
Previously, when I wasn't paying attention to dough temperature:
|
|
||||||
|
|
||||||
![Whole weat dough](/assets/images/2019-05-03-making-bread/whole-wheat-not-rising.jpg)
|
|
||||||
|
|
||||||
Compared with what happens when I put the dough in the microwave for a defrost cycle because the
|
|
||||||
water I used wasn't warm enough:
|
|
||||||
|
|
||||||
![White dough](/assets/images/2019-05-03-making-bread/white-dough-rising-before-fold.jpg)
|
|
||||||
|
|
||||||
I mean, just look at the bubbles!
|
|
||||||
|
|
||||||
![White dough with bubbles](/assets/images/2019-05-03-making-bread/white-dough-rising-after-fold.jpg)
|
|
||||||
|
|
||||||
After shaping the dough, I've got two loaves ready:
|
|
||||||
|
|
||||||
![Shaped loaves](/assets/images/2019-05-03-making-bread/shaped-loaves.jpg)
|
|
||||||
|
|
||||||
Now, the recipe normally calls for a Dutch Oven to bake the bread because it keeps the dough from
|
|
||||||
drying out in the oven. Because I don't own a Dutch Oven, I typically put a casserole dish on the
|
|
||||||
bottom rack and fill it with water so there's still some moisture in the oven. This time, I forgot
|
|
||||||
to add the water and learned my second lesson: never add room-temperature water to a glass dish
|
|
||||||
that's currently at 500 degrees.
|
|
||||||
|
|
||||||
![Shattered glass dish](/assets/images/2019-05-03-making-bread/shattered-glass.jpg)
|
|
||||||
|
|
||||||
Needless to say, trying to pull out sharp glass from an incredibly hot oven is not what I expected
|
|
||||||
to be doing during my garden leave.
|
|
||||||
|
|
||||||
In the end, the bread crust wasn't great, but the bread itself turned out pretty alright:
|
|
||||||
|
|
||||||
![Baked bread](/assets/images/2019-05-03-making-bread/final-product.jpg)
|
|
||||||
|
|
||||||
I've been writing a lot more during this break, so I'm looking forward to sharing that in the
|
|
||||||
future. In the mean-time, I'm planning on making a sandwich.
|
|
@ -1,296 +0,0 @@
|
|||||||
---
|
|
||||||
layout: post
|
|
||||||
title: "On Building High Performance Systems"
|
|
||||||
description: ""
|
|
||||||
category:
|
|
||||||
tags: []
|
|
||||||
---
|
|
||||||
|
|
||||||
**Update 2019-09-21**: Added notes on `isolcpus` and `systemd` affinity.
|
|
||||||
|
|
||||||
Prior to working in the trading industry, my assumption was that High Frequency Trading (HFT) is
|
|
||||||
made up of people who have access to secret techniques mortal developers could only dream of. There
|
|
||||||
had to be some secret art that could only be learned if one had an appropriately tragic backstory:
|
|
||||||
|
|
||||||
<img src="/assets/images/2019-04-24-kung-fu.webp" alt="kung-fu fight">
|
|
||||||
> How I assumed HFT people learn their secret techniques
|
|
||||||
|
|
||||||
How else do you explain people working on systems that complete the round trip of market data in to
|
|
||||||
orders out (a.k.a. tick-to-trade) consistently within
|
|
||||||
[750-800 nanoseconds](https://stackoverflow.com/a/22082528/1454178)? In roughly the time it takes a
|
|
||||||
computer to access
|
|
||||||
[main memory 8 times](https://people.eecs.berkeley.edu/~rcs/research/interactive_latency.html),
|
|
||||||
trading systems are capable of reading the market data packets, deciding what orders to send, doing
|
|
||||||
risk checks, creating new packets for exchange-specific protocols, and putting those packets on the
|
|
||||||
wire.
|
|
||||||
|
|
||||||
Having now worked in the trading industry, I can confirm the developers aren't super-human; I've
|
|
||||||
made some simple mistakes at the very least. Instead, what shows up in public discussions is that
|
|
||||||
philosophy, not technique, separates high-performance systems from everything else.
|
|
||||||
Performance-critical systems don't rely on "this one cool C++ optimization trick" to make code fast
|
|
||||||
(though micro-optimizations have their place); there's a lot more to worry about than just the code
|
|
||||||
written for the project.
|
|
||||||
|
|
||||||
The framework I'd propose is this: **If you want to build high-performance systems, focus first on
|
|
||||||
reducing performance variance** (reducing the gap between the fastest and slowest runs of the same
|
|
||||||
code), **and only look at average latency once variance is at an acceptable level**.
|
|
||||||
|
|
||||||
Don't get me wrong, I'm a much happier person when things are fast. Computer goes from booting in 20
|
|
||||||
seconds down to 10 because I installed a solid-state drive? Awesome. But if every fifth day it takes
|
|
||||||
a full minute to boot because of corrupted sectors? Not so great. Average speed over the course of a
|
|
||||||
week is the same in each situation, but you're painfully aware of that minute when it happens. When
|
|
||||||
it comes to code, the principal is the same: speeding up a function by an average of 10 milliseconds
|
|
||||||
doesn't mean much if there's a 100ms difference between your fastest and slowest runs. When
|
|
||||||
performance matters, you need to respond quickly _every time_, not just in aggregate.
|
|
||||||
High-performance systems should first optimize for time variance. Once you're consistent at the time
|
|
||||||
scale you care about, then focus on improving average time.
|
|
||||||
|
|
||||||
This focus on variance shows up all the time in industry too (emphasis added in all quotes below):
|
|
||||||
|
|
||||||
- In [marketing materials](https://business.nasdaq.com/market-tech/marketplaces/trading) for
|
|
||||||
NASDAQ's matching engine, the most performance-sensitive component of the exchange, dependability
|
|
||||||
is highlighted in addition to instantaneous metrics:
|
|
||||||
|
|
||||||
> Able to **consistently sustain** an order rate of over 100,000 orders per second at sub-40
|
|
||||||
> microsecond average latency
|
|
||||||
|
|
||||||
- The [Aeron](https://github.com/real-logic/aeron) message bus has this to say about performance:
|
|
||||||
|
|
||||||
> Performance is the key focus. Aeron is designed to be the highest throughput with the lowest and
|
|
||||||
> **most predictable latency possible** of any messaging system
|
|
||||||
|
|
||||||
- The company PolySync, which is working on autonomous vehicles,
|
|
||||||
[mentions why](https://polysync.io/blog/session-types-for-hearty-codecs/) they picked their
|
|
||||||
specific messaging format:
|
|
||||||
|
|
||||||
> In general, high performance is almost always desirable for serialization. But in the world of
|
|
||||||
> autonomous vehicles, **steady timing performance is even more important** than peak throughput.
|
|
||||||
> This is because safe operation is sensitive to timing outliers. Nobody wants the system that
|
|
||||||
> decides when to slam on the brakes to occasionally take 100 times longer than usual to encode
|
|
||||||
> its commands.
|
|
||||||
|
|
||||||
- [Solarflare](https://solarflare.com/), which makes highly-specialized network hardware, points out
|
|
||||||
variance (jitter) as a big concern for
|
|
||||||
[electronic trading](https://solarflare.com/electronic-trading/):
|
|
||||||
> The high stakes world of electronic trading, investment banks, market makers, hedge funds and
|
|
||||||
> exchanges demand the **lowest possible latency and jitter** while utilizing the highest
|
|
||||||
> bandwidth and return on their investment.
|
|
||||||
|
|
||||||
And to further clarify: we're not discussing _total run-time_, but variance of total run-time. There
|
|
||||||
are situations where it's not reasonably possible to make things faster, and you'd much rather be
|
|
||||||
consistent. For example, trading firms use
|
|
||||||
[wireless networks](https://sniperinmahwah.wordpress.com/2017/06/07/network-effects-part-i/) because
|
|
||||||
the speed of light through air is faster than through fiber-optic cables. There's still at _absolute
|
|
||||||
minimum_ a [~33.76 millisecond](http://tinyurl.com/y2vd7tn8) delay required to send data between,
|
|
||||||
say,
|
|
||||||
[Chicago and Tokyo](https://www.theice.com/market-data/connectivity-and-feeds/wireless/tokyo-chicago).
|
|
||||||
If a trading system in Chicago calls the function for "send order to Tokyo" and waits to see if a
|
|
||||||
trade occurs, there's a physical limit to how long that will take. In this situation, the focus is
|
|
||||||
on keeping variance of _additional processing_ to a minimum, since speed of light is the limiting
|
|
||||||
factor.
|
|
||||||
|
|
||||||
So how does one go about looking for and eliminating performance variance? To tell the truth, I
|
|
||||||
don't think a systematic answer or flow-chart exists. There's no substitute for (A) building a deep
|
|
||||||
understanding of the entire technology stack, and (B) actually measuring system performance (though
|
|
||||||
(C) watching a lot of [CppCon](https://www.youtube.com/channel/UCMlGfpWw-RUdWX_JbLCukXg) videos for
|
|
||||||
inspiration never hurt). Even then, every project cares about performance to a different degree; you
|
|
||||||
may need to build an entire
|
|
||||||
[replica production system](https://www.youtube.com/watch?v=NH1Tta7purM&feature=youtu.be&t=3015) to
|
|
||||||
accurately benchmark at nanosecond precision, or you may be content to simply
|
|
||||||
[avoid garbage collection](https://www.youtube.com/watch?v=BD9cRbxWQx8&feature=youtu.be&t=1335) in
|
|
||||||
your Java code.
|
|
||||||
|
|
||||||
Even though everyone has different needs, there are still common things to look for when trying to
|
|
||||||
isolate and eliminate variance. In no particular order, these are my focus areas when thinking about
|
|
||||||
high-performance systems:
|
|
||||||
|
|
||||||
## Language-specific
|
|
||||||
|
|
||||||
**Garbage Collection**: How often does garbage collection happen? When is it triggered? What are the
|
|
||||||
impacts?
|
|
||||||
|
|
||||||
- [In Python](https://rushter.com/blog/python-garbage-collector/), individual objects are collected
|
|
||||||
if the reference count reaches 0, and each generation is collected if
|
|
||||||
`num_alloc - num_dealloc > gc_threshold` whenever an allocation happens. The GIL is acquired for
|
|
||||||
the duration of generational collection.
|
|
||||||
- Java has
|
|
||||||
[many](https://docs.oracle.com/en/java/javase/12/gctuning/parallel-collector1.html#GUID-DCDD6E46-0406-41D1-AB49-FB96A50EB9CE)
|
|
||||||
[different](https://docs.oracle.com/en/java/javase/12/gctuning/garbage-first-garbage-collector.html#GUID-ED3AB6D3-FD9B-4447-9EDF-983ED2F7A573)
|
|
||||||
[collection](https://docs.oracle.com/en/java/javase/12/gctuning/garbage-first-garbage-collector-tuning.html#GUID-90E30ACA-8040-432E-B3A0-1E0440AB556A)
|
|
||||||
[algorithms](https://docs.oracle.com/en/java/javase/12/gctuning/z-garbage-collector1.html#GUID-A5A42691-095E-47BA-B6DC-FB4E5FAA43D0)
|
|
||||||
to choose from, each with different characteristics. The default algorithms (Parallel GC in Java
|
|
||||||
8, G1 in Java 9) freeze the JVM while collecting, while more recent algorithms
|
|
||||||
([ZGC](https://wiki.openjdk.java.net/display/zgc) and
|
|
||||||
[Shenandoah](https://wiki.openjdk.java.net/display/shenandoah)) are designed to keep "stop the
|
|
||||||
world" to a minimum by doing collection work in parallel.
|
|
||||||
|
|
||||||
**Allocation**: Every language has a different way of interacting with "heap" memory, but the
|
|
||||||
principle is the same: running the allocator to allocate/deallocate memory takes time that can often
|
|
||||||
be put to better use. Understanding when your language interacts with the allocator is crucial, and
|
|
||||||
not always obvious. For example: C++ and Rust don't allocate heap memory for iterators, but Java
|
|
||||||
does (meaning potential GC pauses). Take time to understand heap behavior (I made a
|
|
||||||
[a guide for Rust](/2019/02/understanding-allocations-in-rust.html)), and look into alternative
|
|
||||||
allocators ([jemalloc](http://jemalloc.net/),
|
|
||||||
[tcmalloc](https://gperftools.github.io/gperftools/tcmalloc.html)) that might run faster than the
|
|
||||||
operating system default.
|
|
||||||
|
|
||||||
**Data Layout**: How your data is arranged in memory matters;
|
|
||||||
[data-oriented design](https://www.youtube.com/watch?v=yy8jQgmhbAU) and
|
|
||||||
[cache locality](https://www.youtube.com/watch?v=2EWejmkKlxs&feature=youtu.be&t=1185) can have huge
|
|
||||||
impacts on performance. The C family of languages (C, value types in C#, C++) and Rust all have
|
|
||||||
guarantees about the shape every object takes in memory that others (e.g. Java and Python) can't
|
|
||||||
make. [Cachegrind](http://valgrind.org/docs/manual/cg-manual.html) and kernel
|
|
||||||
[perf](https://perf.wiki.kernel.org/index.php/Main_Page) counters are both great for understanding
|
|
||||||
how performance relates to memory layout.
|
|
||||||
|
|
||||||
**Just-In-Time Compilation**: Languages that are compiled on the fly (LuaJIT, C#, Java, PyPy) are
|
|
||||||
great because they optimize your program for how it's actually being used, rather than how a
|
|
||||||
compiler expects it to be used. However, there's a variance problem if the program stops executing
|
|
||||||
while waiting for translation from VM bytecode to native code. As a remedy, many languages support
|
|
||||||
ahead-of-time compilation in addition to the JIT versions
|
|
||||||
([CoreRT](https://github.com/dotnet/corert) in C# and [GraalVM](https://www.graalvm.org/) in Java).
|
|
||||||
On the other hand, LLVM supports
|
|
||||||
[Profile Guided Optimization](https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization),
|
|
||||||
which theoretically brings JIT benefits to non-JIT languages. Finally, be careful to avoid comparing
|
|
||||||
apples and oranges during benchmarks; you don't want your code to suddenly speed up because the JIT
|
|
||||||
compiler kicked in.
|
|
||||||
|
|
||||||
**Programming Tricks**: These won't make or break performance, but can be useful in specific
|
|
||||||
circumstances. For example, C++ can use
|
|
||||||
[templates instead of branches](https://www.youtube.com/watch?v=NH1Tta7purM&feature=youtu.be&t=1206)
|
|
||||||
in critical sections.
|
|
||||||
|
|
||||||
## Kernel
|
|
||||||
|
|
||||||
Code you wrote is almost certainly not the _only_ code running on your hardware. There are many ways
|
|
||||||
the operating system interacts with your program, from interrupts to system calls, that are
|
|
||||||
important to watch for. These are written from a Linux perspective, but Windows does typically have
|
|
||||||
equivalent functionality.
|
|
||||||
|
|
||||||
**Scheduling**: The kernel is normally free to schedule any process on any core, so it's important
|
|
||||||
to reserve CPU cores exclusively for the important programs. There are a few parts to this: first,
|
|
||||||
limit the CPU cores that non-critical processes are allowed to run on by excluding cores from
|
|
||||||
scheduling
|
|
||||||
([`isolcpus`](https://www.linuxtopia.org/online_books/linux_kernel/kernel_configuration/re46.html)
|
|
||||||
kernel command-line option), or by setting the `init` process CPU affinity
|
|
||||||
([`systemd` example](https://access.redhat.com/solutions/2884991)). Second, set critical processes
|
|
||||||
to run on the isolated cores by setting the
|
|
||||||
[processor affinity](https://en.wikipedia.org/wiki/Processor_affinity) using
|
|
||||||
[taskset](https://linux.die.net/man/1/taskset). Finally, use
|
|
||||||
[`NO_HZ`](https://github.com/torvalds/linux/blob/master/Documentation/timers/NO_HZ.txt) or
|
|
||||||
[`chrt`](https://linux.die.net/man/1/chrt) to disable scheduling interrupts. Turning off
|
|
||||||
hyper-threading is also likely beneficial.
|
|
||||||
|
|
||||||
**System calls**: Reading from a UNIX socket? Writing to a file? In addition to not knowing how long
|
|
||||||
the I/O operation takes, these all trigger expensive
|
|
||||||
[system calls (syscalls)](https://en.wikipedia.org/wiki/System_call). To handle these, the CPU must
|
|
||||||
[context switch](https://en.wikipedia.org/wiki/Context_switch) to the kernel, let the kernel
|
|
||||||
operation complete, then context switch back to your program. We'd rather keep these
|
|
||||||
[to a minimum](https://www.destroyallsoftware.com/talks/the-birth-and-death-of-javascript) (see
|
|
||||||
timestamp 18:20). [Strace](https://linux.die.net/man/1/strace) is your friend for understanding when
|
|
||||||
and where syscalls happen.
|
|
||||||
|
|
||||||
**Signal Handling**: Far less likely to be an issue, but signals do trigger a context switch if your
|
|
||||||
code has a handler registered. This will be highly dependent on the application, but you can
|
|
||||||
[block signals](https://www.linuxprogrammingblog.com/all-about-linux-signals?page=show#Blocking_signals)
|
|
||||||
if it's an issue.
|
|
||||||
|
|
||||||
**Interrupts**: System interrupts are how devices connected to your computer notify the CPU that
|
|
||||||
something has happened. The CPU will then choose a processor core to pause and context switch to the
|
|
||||||
OS to handle the interrupt. Make sure that
|
|
||||||
[SMP affinity](http://www.alexonlinux.com/smp-affinity-and-proper-interrupt-handling-in-linux) is
|
|
||||||
set so that interrupts are handled on a CPU core not running the program you care about.
|
|
||||||
|
|
||||||
**[NUMA](https://www.kernel.org/doc/html/latest/vm/numa.html)**: While NUMA is good at making
|
|
||||||
multi-cell systems transparent, there are variance implications; if the kernel moves a process
|
|
||||||
across nodes, future memory accesses must wait for the controller on the original node. Use
|
|
||||||
[numactl](https://linux.die.net/man/8/numactl) to handle memory-/cpu-cell pinning so this doesn't
|
|
||||||
happen.
|
|
||||||
|
|
||||||
## Hardware
|
|
||||||
|
|
||||||
**CPU Pipelining/Speculation**: Speculative execution in modern processors gave us vulnerabilities
|
|
||||||
like Spectre, but it also gave us performance improvements like
|
|
||||||
[branch prediction](https://stackoverflow.com/a/11227902/1454178). And if the CPU mis-speculates
|
|
||||||
your code, there's variance associated with rewind and replay. While the compiler knows a lot about
|
|
||||||
how your CPU [pipelines instructions](https://youtu.be/nAbCKa0FzjQ?t=4467), code can be
|
|
||||||
[structured to help](https://www.youtube.com/watch?v=NH1Tta7purM&feature=youtu.be&t=755) the branch
|
|
||||||
predictor.
|
|
||||||
|
|
||||||
**Paging**: For most systems, virtual memory is incredible. Applications live in their own worlds,
|
|
||||||
and the CPU/[MMU](https://en.wikipedia.org/wiki/Memory_management_unit) figures out the details.
|
|
||||||
However, there's a variance penalty associated with memory paging and caching; if you access more
|
|
||||||
memory pages than the [TLB](https://en.wikipedia.org/wiki/Translation_lookaside_buffer) can store,
|
|
||||||
you'll have to wait for the page walk. Kernel perf tools are necessary to figure out if this is an
|
|
||||||
issue, but using [huge pages](https://blog.pythian.com/performance-tuning-hugepages-in-linux/) can
|
|
||||||
reduce TLB burdens. Alternately, running applications in a hypervisor like
|
|
||||||
[Jailhouse](https://github.com/siemens/jailhouse) allows one to skip virtual memory entirely, but
|
|
||||||
this is probably more work than the benefits are worth.
|
|
||||||
|
|
||||||
**Network Interfaces**: When more than one computer is involved, variance can go up dramatically.
|
|
||||||
Tuning kernel
|
|
||||||
[network parameters](https://github.com/leandromoreira/linux-network-performance-parameters) may be
|
|
||||||
helpful, but modern systems more frequently opt to skip the kernel altogether with a technique
|
|
||||||
called [kernel bypass](https://blog.cloudflare.com/kernel-bypass/). This typically requires
|
|
||||||
specialized hardware and [drivers](https://www.openonload.org/), but even industries like
|
|
||||||
[telecom](https://www.bbc.co.uk/rd/blog/2018-04-high-speed-networking-open-source-kernel-bypass) are
|
|
||||||
finding the benefits.
|
|
||||||
|
|
||||||
## Networks
|
|
||||||
|
|
||||||
**Routing**: There's a reason financial firms are willing to pay
|
|
||||||
[millions of euros](https://sniperinmahwah.wordpress.com/2019/03/26/4-les-moeres-english-version/)
|
|
||||||
for rights to a small plot of land - having a straight-line connection from point A to point B means
|
|
||||||
the path their data takes is the shortest possible. In contrast, there are currently 6 computers in
|
|
||||||
between me and Google, but that may change at any moment if my ISP realizes a
|
|
||||||
[more efficient route](https://en.wikipedia.org/wiki/Border_Gateway_Protocol) is available. Whether
|
|
||||||
it's using
|
|
||||||
[research-quality equipment](https://sniperinmahwah.wordpress.com/2018/05/07/shortwave-trading-part-i-the-west-chicago-tower-mystery/)
|
|
||||||
for shortwave radio, or just making sure there's no data inadvertently going between data centers,
|
|
||||||
routing matters.
|
|
||||||
|
|
||||||
**Protocol**: TCP as a network protocol is awesome: guaranteed and in-order delivery, flow control,
|
|
||||||
and congestion control all built in. But these attributes make the most sense when networking
|
|
||||||
infrastructure is lossy; for systems that expect nearly all packets to be delivered correctly, the
|
|
||||||
setup handshaking and packet acknowledgment are just overhead. Using UDP (unicast or multicast) may
|
|
||||||
make sense in these contexts as it avoids the chatter needed to track connection state, and
|
|
||||||
[gap-fill](https://iextrading.com/docs/IEX%20Transport%20Specification.pdf)
|
|
||||||
[strategies](http://www.nasdaqtrader.com/content/technicalsupport/specifications/dataproducts/moldudp64.pdf)
|
|
||||||
can handle the rest.
|
|
||||||
|
|
||||||
**Switching**: Many routers/switches handle packets using "store-and-forward" behavior: wait for the
|
|
||||||
whole packet, validate checksums, and then send to the next device. In variance terms, the time
|
|
||||||
needed to move data between two nodes is proportional to the size of that data; the switch must
|
|
||||||
"store" all data before it can calculate checksums and "forward" to the next node. With
|
|
||||||
["cut-through"](https://www.networkworld.com/article/2241573/latency-and-jitter--cut-through-design-pays-off-for-arista--blade.html)
|
|
||||||
designs, switches will begin forwarding data as soon as they know where the destination is,
|
|
||||||
checksums be damned. This means there's a fixed cost (at the switch) for network traffic, no matter
|
|
||||||
the size.
|
|
||||||
|
|
||||||
# Final Thoughts
|
|
||||||
|
|
||||||
High-performance systems, regardless of industry, are not magical. They do require extreme precision
|
|
||||||
and attention to detail, but they're designed, built, and operated by regular people, using a lot of
|
|
||||||
tools that are publicly available. Interested in seeing how context switching affects performance of
|
|
||||||
your benchmarks? `taskset` should be installed in all modern Linux distributions, and can be used to
|
|
||||||
make sure the OS never migrates your process. Curious how often garbage collection triggers during a
|
|
||||||
crucial operation? Your language of choice will typically expose details of its operations
|
|
||||||
([Python](https://docs.python.org/3/library/gc.html),
|
|
||||||
[Java](https://www.oracle.com/technetwork/java/javase/tech/vmoptions-jsp-140102.html#DebuggingOptions)).
|
|
||||||
Want to know how hard your program is stressing the TLB? Use `perf record` and look for
|
|
||||||
`dtlb_load_misses.miss_causes_a_walk`.
|
|
||||||
|
|
||||||
Two final guiding questions, then: first, before attempting to apply some of the technology above to
|
|
||||||
your own systems, can you first identify
|
|
||||||
[where/when you care](http://wiki.c2.com/?PrematureOptimization) about "high-performance"? As an
|
|
||||||
example, if parts of a system rely on humans pushing buttons, CPU pinning won't have any measurable
|
|
||||||
effect. Humans are already far too slow to react in time. Second, if you're using benchmarks, are
|
|
||||||
they being designed in a way that's actually helpful? Tools like
|
|
||||||
[Criterion](http://www.serpentine.com/criterion/) (also in
|
|
||||||
[Rust](https://github.com/bheisler/criterion.rs)) and Google's
|
|
||||||
[Benchmark](https://github.com/google/benchmark) output not only average run time, but variance as
|
|
||||||
well; your benchmarking environment is subject to the same concerns your production environment is.
|
|
||||||
|
|
||||||
Finally, I believe high-performance systems are a matter of philosophy, not necessarily technique.
|
|
||||||
Rigorous focus on variance is the first step, and there are plenty of ways to measure and mitigate
|
|
||||||
it; once that's at an acceptable level, then optimize for speed.
|
|
@ -1,263 +0,0 @@
|
|||||||
---
|
|
||||||
layout: post
|
|
||||||
title: "Binary Format Shootout"
|
|
||||||
description: "Cap'n Proto vs. Flatbuffers vs. SBE"
|
|
||||||
category:
|
|
||||||
tags: [rust]
|
|
||||||
---
|
|
||||||
|
|
||||||
I've found that in many personal projects,
|
|
||||||
[analysis paralysis](https://en.wikipedia.org/wiki/Analysis_paralysis) is particularly deadly.
|
|
||||||
Making good decisions in the beginning avoids pain and suffering later; if extra research prevents
|
|
||||||
future problems, I'm happy to continue ~~procrastinating~~ researching indefinitely.
|
|
||||||
|
|
||||||
So let's say you're in need of a binary serialization format. Data will be going over the network,
|
|
||||||
not just in memory, so having a schema document and code generation is a must. Performance is
|
|
||||||
crucial, so formats that support zero-copy de/serialization are given priority. And the more
|
|
||||||
languages supported, the better; I use Rust, but can't predict what other languages this could
|
|
||||||
interact with.
|
|
||||||
|
|
||||||
Given these requirements, the candidates I could find were:
|
|
||||||
|
|
||||||
1. [Cap'n Proto](https://capnproto.org/) has been around the longest, and is the most established
|
|
||||||
2. [Flatbuffers](https://google.github.io/flatbuffers/) is the newest, and claims to have a simpler
|
|
||||||
encoding
|
|
||||||
3. [Simple Binary Encoding](https://github.com/real-logic/simple-binary-encoding) has the simplest
|
|
||||||
encoding, but the Rust implementation is unmaintained
|
|
||||||
|
|
||||||
Any one of these will satisfy the project requirements: easy to transmit over a network, reasonably
|
|
||||||
fast, and polyglot support. But how do you actually pick one? It's impossible to know what issues
|
|
||||||
will follow that choice, so I tend to avoid commitment until the last possible moment.
|
|
||||||
|
|
||||||
Still, a choice must be made. Instead of worrying about which is "the best," I decided to build a
|
|
||||||
small proof-of-concept system in each format and pit them against each other. All code can be found
|
|
||||||
in the [repository](https://github.com/speice-io/marketdata-shootout) for this post.
|
|
||||||
|
|
||||||
We'll discuss more in detail, but a quick preview of the results:
|
|
||||||
|
|
||||||
- Cap'n Proto: Theoretically performs incredibly well, the implementation had issues
|
|
||||||
- Flatbuffers: Has some quirks, but largely lived up to its "zero-copy" promises
|
|
||||||
- SBE: Best median and worst-case performance, but the message structure has a limited feature set
|
|
||||||
|
|
||||||
# Prologue: Binary Parsing with Nom
|
|
||||||
|
|
||||||
Our benchmark system will be a simple data processor; given depth-of-book market data from
|
|
||||||
[IEX](https://iextrading.com/trading/market-data/#deep), serialize each message into the schema
|
|
||||||
format, read it back, and calculate total size of stock traded and the lowest/highest quoted prices.
|
|
||||||
This test isn't complex, but is representative of the project I need a binary format for.
|
|
||||||
|
|
||||||
But before we make it to that point, we have to actually read in the market data. To do so, I'm
|
|
||||||
using a library called [`nom`](https://github.com/Geal/nom). Version 5.0 was recently released and
|
|
||||||
brought some big changes, so this was an opportunity to build a non-trivial program and get
|
|
||||||
familiar.
|
|
||||||
|
|
||||||
If you don't already know about `nom`, it's a "parser generator". By combining different smaller
|
|
||||||
parsers, you can assemble a parser to handle complex structures without writing tedious code by
|
|
||||||
hand. For example, when parsing
|
|
||||||
[PCAP files](https://www.winpcap.org/ntar/draft/PCAP-DumpFileFormat.html#rfc.section.3.3):
|
|
||||||
|
|
||||||
```
|
|
||||||
0 1 2 3
|
|
||||||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
|
|
||||||
+---------------------------------------------------------------+
|
|
||||||
0 | Block Type = 0x00000006 |
|
|
||||||
+---------------------------------------------------------------+
|
|
||||||
4 | Block Total Length |
|
|
||||||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
|
||||||
8 | Interface ID |
|
|
||||||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
|
||||||
12 | Timestamp (High) |
|
|
||||||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
|
||||||
16 | Timestamp (Low) |
|
|
||||||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
|
||||||
20 | Captured Len |
|
|
||||||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
|
||||||
24 | Packet Len |
|
|
||||||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
|
||||||
| Packet Data |
|
|
||||||
| ... |
|
|
||||||
```
|
|
||||||
|
|
||||||
...you can build a parser in `nom` that looks like
|
|
||||||
[this](https://github.com/speice-io/marketdata-shootout/blob/369613843d39cfdc728e1003123bf87f79422497/src/parsers.rs#L59-L93):
|
|
||||||
|
|
||||||
```rust
|
|
||||||
const ENHANCED_PACKET: [u8; 4] = [0x06, 0x00, 0x00, 0x00];
|
|
||||||
pub fn enhanced_packet_block(input: &[u8]) -> IResult<&[u8], &[u8]> {
|
|
||||||
let (
|
|
||||||
remaining,
|
|
||||||
(
|
|
||||||
block_type,
|
|
||||||
block_len,
|
|
||||||
interface_id,
|
|
||||||
timestamp_high,
|
|
||||||
timestamp_low,
|
|
||||||
captured_len,
|
|
||||||
packet_len,
|
|
||||||
),
|
|
||||||
) = tuple((
|
|
||||||
tag(ENHANCED_PACKET),
|
|
||||||
le_u32,
|
|
||||||
le_u32,
|
|
||||||
le_u32,
|
|
||||||
le_u32,
|
|
||||||
le_u32,
|
|
||||||
le_u32,
|
|
||||||
))(input)?;
|
|
||||||
|
|
||||||
let (remaining, packet_data) = take(captured_len)(remaining)?;
|
|
||||||
Ok((remaining, packet_data))
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
While this example isn't too interesting, more complex formats (like IEX market data) are where
|
|
||||||
[`nom` really shines](https://github.com/speice-io/marketdata-shootout/blob/369613843d39cfdc728e1003123bf87f79422497/src/iex.rs).
|
|
||||||
|
|
||||||
Ultimately, because the `nom` code in this shootout was the same for all formats, we're not too
|
|
||||||
interested in its performance. Still, it's worth mentioning that building the market data parser was
|
|
||||||
actually fun; I didn't have to write tons of boring code by hand.
|
|
||||||
|
|
||||||
# Part 1: Cap'n Proto
|
|
||||||
|
|
||||||
Now it's time to get into the meaty part of the story. Cap'n Proto was the first format I tried
|
|
||||||
because of how long it has supported Rust (thanks to [dwrensha](https://github.com/dwrensha) for
|
|
||||||
maintaining the Rust port since
|
|
||||||
[2014!](https://github.com/capnproto/capnproto-rust/releases/tag/rustc-0.10)). However, I had a ton
|
|
||||||
of performance concerns once I started using it.
|
|
||||||
|
|
||||||
To serialize new messages, Cap'n Proto uses a "builder" object. This builder allocates memory on the
|
|
||||||
heap to hold the message content, but because builders
|
|
||||||
[can't be re-used](https://github.com/capnproto/capnproto-rust/issues/111), we have to allocate a
|
|
||||||
new buffer for every single message. I was able to work around this with a
|
|
||||||
[special builder](https://github.com/speice-io/marketdata-shootout/blob/369613843d39cfdc728e1003123bf87f79422497/src/capnp_runner.rs#L17-L51)
|
|
||||||
that could re-use the buffer, but it required reading through Cap'n Proto's
|
|
||||||
[benchmarks](https://github.com/capnproto/capnproto-rust/blob/master/benchmark/benchmark.rs#L124-L156)
|
|
||||||
to find an example, and used
|
|
||||||
[`std::mem::transmute`](https://doc.rust-lang.org/std/mem/fn.transmute.html) to bypass Rust's borrow
|
|
||||||
checker.
|
|
||||||
|
|
||||||
The process of reading messages was better, but still had issues. Cap'n Proto has two message
|
|
||||||
encodings: a ["packed"](https://capnproto.org/encoding.html#packing) representation, and an
|
|
||||||
"unpacked" version. When reading "packed" messages, we need a buffer to unpack the message into
|
|
||||||
before we can use it; Cap'n Proto allocates a new buffer for each message we unpack, and I wasn't
|
|
||||||
able to figure out a way around that. In contrast, the unpacked message format should be where Cap'n
|
|
||||||
Proto shines; its main selling point is that there's [no decoding step](https://capnproto.org/).
|
|
||||||
However, accomplishing zero-copy deserialization required code in the private API
|
|
||||||
([since fixed](https://github.com/capnproto/capnproto-rust/issues/148)), and we allocate a vector on
|
|
||||||
every read for the segment table.
|
|
||||||
|
|
||||||
In the end, I put in significant work to make Cap'n Proto as fast as possible, but there were too
|
|
||||||
many issues for me to feel comfortable using it long-term.
|
|
||||||
|
|
||||||
# Part 2: Flatbuffers
|
|
||||||
|
|
||||||
This is the new kid on the block. After a
|
|
||||||
[first attempt](https://github.com/google/flatbuffers/pull/3894) didn't pan out, official support
|
|
||||||
was [recently launched](https://github.com/google/flatbuffers/pull/4898). Flatbuffers intends to
|
|
||||||
address the same problems as Cap'n Proto: high-performance, polyglot, binary messaging. The
|
|
||||||
difference is that Flatbuffers claims to have a simpler wire format and
|
|
||||||
[more flexibility](https://google.github.io/flatbuffers/flatbuffers_benchmarks.html).
|
|
||||||
|
|
||||||
On the whole, I enjoyed using Flatbuffers; the [tooling](https://crates.io/crates/flatc-rust) is
|
|
||||||
nice, and unlike Cap'n Proto, parsing messages was actually zero-copy and zero-allocation. However,
|
|
||||||
there were still some issues.
|
|
||||||
|
|
||||||
First, Flatbuffers (at least in Rust) can't handle nested vectors. This is a problem for formats
|
|
||||||
like the following:
|
|
||||||
|
|
||||||
```
|
|
||||||
table Message {
|
|
||||||
symbol: string;
|
|
||||||
}
|
|
||||||
table MultiMessage {
|
|
||||||
messages:[Message];
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
We want to create a `MultiMessage` which contains a vector of `Message`, and each `Message` itself
|
|
||||||
contains a vector (the `string` type). I was able to work around this by
|
|
||||||
[caching `Message` elements](https://github.com/speice-io/marketdata-shootout/blob/e9d07d148bf36a211a6f86802b313c4918377d1b/src/flatbuffers_runner.rs#L83)
|
|
||||||
in a `SmallVec` before building the final `MultiMessage`, but it was a painful process that I
|
|
||||||
believe contributed to poor serialization performance.
|
|
||||||
|
|
||||||
Second, streaming support in Flatbuffers seems to be something of an
|
|
||||||
[afterthought](https://github.com/google/flatbuffers/issues/3898). Where Cap'n Proto in Rust handles
|
|
||||||
reading messages from a stream as part of the API, Flatbuffers just sticks a `u32` at the front of
|
|
||||||
each message to indicate the size. Not specifically a problem, but calculating message size without
|
|
||||||
that tag is nigh on impossible.
|
|
||||||
|
|
||||||
Ultimately, I enjoyed using Flatbuffers, and had to do significantly less work to make it perform
|
|
||||||
well.
|
|
||||||
|
|
||||||
# Part 3: Simple Binary Encoding
|
|
||||||
|
|
||||||
Support for SBE was added by the author of one of my favorite
|
|
||||||
[Rust blog posts](https://web.archive.org/web/20190427124806/https://polysync.io/blog/session-types-for-hearty-codecs/).
|
|
||||||
I've [talked previously]({% post_url 2019-06-31-high-performance-systems %}) about how important
|
|
||||||
variance is in high-performance systems, so it was encouraging to read about a format that
|
|
||||||
[directly addressed](https://github.com/real-logic/simple-binary-encoding/wiki/Why-Low-Latency) my
|
|
||||||
concerns. SBE has by far the simplest binary format, but it does make some tradeoffs.
|
|
||||||
|
|
||||||
Both Cap'n Proto and Flatbuffers use [message offsets](https://capnproto.org/encoding.html#structs)
|
|
||||||
to handle variable-length data, [unions](https://capnproto.org/language.html#unions), and various
|
|
||||||
other features. In contrast, messages in SBE are essentially
|
|
||||||
[just structs](https://github.com/real-logic/simple-binary-encoding/blob/master/sbe-samples/src/main/resources/example-schema.xml);
|
|
||||||
variable-length data is supported, but there's no union type.
|
|
||||||
|
|
||||||
As mentioned in the beginning, the Rust port of SBE works well, but is
|
|
||||||
[essentially unmaintained](https://users.rust-lang.org/t/zero-cost-abstraction-frontier-no-copy-low-allocation-ordered-decoding/11515/9).
|
|
||||||
However, if you don't need union types, and can accept that schemas are XML documents, it's still
|
|
||||||
worth using. SBE's implementation had the best streaming support of all formats I tested, and
|
|
||||||
doesn't trigger allocation during de/serialization.
|
|
||||||
|
|
||||||
# Results
|
|
||||||
|
|
||||||
After building a test harness
|
|
||||||
[for](https://github.com/speice-io/marketdata-shootout/blob/master/src/capnp_runner.rs)
|
|
||||||
[each](https://github.com/speice-io/marketdata-shootout/blob/master/src/flatbuffers_runner.rs)
|
|
||||||
[format](https://github.com/speice-io/marketdata-shootout/blob/master/src/sbe_runner.rs), it was
|
|
||||||
time to actually take them for a spin. I used
|
|
||||||
[this script](https://github.com/speice-io/marketdata-shootout/blob/master/run_shootout.sh) to run
|
|
||||||
the benchmarks, and the raw results are
|
|
||||||
[here](https://github.com/speice-io/marketdata-shootout/blob/master/shootout.csv). All data reported
|
|
||||||
below is the average of 10 runs on a single day of IEX data. Results were validated to make sure
|
|
||||||
that each format parsed the data correctly.
|
|
||||||
|
|
||||||
## Serialization
|
|
||||||
|
|
||||||
This test measures, on a
|
|
||||||
[per-message basis](https://github.com/speice-io/marketdata-shootout/blob/master/src/main.rs#L268-L272),
|
|
||||||
how long it takes to serialize the IEX message into the desired format and write to a pre-allocated
|
|
||||||
buffer.
|
|
||||||
|
|
||||||
| Schema | Median | 99th Pctl | 99.9th Pctl | Total |
|
|
||||||
| :------------------- | :----- | :-------- | :---------- | :----- |
|
|
||||||
| Cap'n Proto Packed | 413ns | 1751ns | 2943ns | 14.80s |
|
|
||||||
| Cap'n Proto Unpacked | 273ns | 1828ns | 2836ns | 10.65s |
|
|
||||||
| Flatbuffers | 355ns | 2185ns | 3497ns | 14.31s |
|
|
||||||
| SBE | 91ns | 1535ns | 2423ns | 3.91s |
|
|
||||||
|
|
||||||
## Deserialization
|
|
||||||
|
|
||||||
This test measures, on a
|
|
||||||
[per-message basis](https://github.com/speice-io/marketdata-shootout/blob/master/src/main.rs#L294-L298),
|
|
||||||
how long it takes to read the previously-serialized message and perform some basic aggregation. The
|
|
||||||
aggregation code is the same for each format, so any performance differences are due solely to the
|
|
||||||
format implementation.
|
|
||||||
|
|
||||||
| Schema | Median | 99th Pctl | 99.9th Pctl | Total |
|
|
||||||
| :------------------- | :----- | :-------- | :---------- | :----- |
|
|
||||||
| Cap'n Proto Packed | 539ns | 1216ns | 2599ns | 18.92s |
|
|
||||||
| Cap'n Proto Unpacked | 366ns | 737ns | 1583ns | 12.32s |
|
|
||||||
| Flatbuffers | 173ns | 421ns | 1007ns | 6.00s |
|
|
||||||
| SBE | 116ns | 286ns | 659ns | 4.05s |
|
|
||||||
|
|
||||||
# Conclusion
|
|
||||||
|
|
||||||
Building a benchmark turned out to be incredibly helpful in making a decision; because a "union"
|
|
||||||
type isn't important to me, I can be confident that SBE best addresses my needs.
|
|
||||||
|
|
||||||
While SBE was the fastest in terms of both median and worst-case performance, its worst case
|
|
||||||
performance was proportionately far higher than any other format. It seems to be that
|
|
||||||
de/serialization time scales with message size, but I'll need to do some more research to understand
|
|
||||||
what exactly is going on.
|
|
@ -1,370 +0,0 @@
|
|||||||
---
|
|
||||||
layout: post
|
|
||||||
title: "Release the GIL"
|
|
||||||
description: "Strategies for Parallelism in Python"
|
|
||||||
category:
|
|
||||||
tags: [python]
|
|
||||||
---
|
|
||||||
|
|
||||||
Complaining about the [Global Interpreter Lock](https://wiki.python.org/moin/GlobalInterpreterLock)
|
|
||||||
(GIL) seems like a rite of passage for Python developers. It's easy to criticize a design decision
|
|
||||||
made before multi-core CPU's were widely available, but the fact that it's still around indicates
|
|
||||||
that it generally works [Good](https://wiki.c2.com/?PrematureOptimization)
|
|
||||||
[Enough](https://wiki.c2.com/?YouArentGonnaNeedIt). Besides, there are simple and effective
|
|
||||||
workarounds; it's not hard to start a
|
|
||||||
[new process](https://docs.python.org/3/library/multiprocessing.html) and use message passing to
|
|
||||||
synchronize code running in parallel.
|
|
||||||
|
|
||||||
Still, wouldn't it be nice to have more than a single active interpreter thread? In an age of
|
|
||||||
asynchronicity and _M:N_ threading, Python seems lacking. The ideal scenario is to take advantage of
|
|
||||||
both Python's productivity and the modern CPU's parallel capabilities.
|
|
||||||
|
|
||||||
Presented below are two strategies for releasing the GIL's icy grip without giving up on what makes
|
|
||||||
Python a nice language to start with. Bear in mind: these are just the tools, no claim is made about
|
|
||||||
whether it's a good idea to use them. Very often, unlocking the GIL is an
|
|
||||||
[XY problem](https://en.wikipedia.org/wiki/XY_problem); you want application performance, and the
|
|
||||||
GIL seems like an obvious bottleneck. Remember that any gains from running code in parallel come at
|
|
||||||
the expense of project complexity; messing with the GIL is ultimately messing with Python's memory
|
|
||||||
model.
|
|
||||||
|
|
||||||
```python
|
|
||||||
%load_ext Cython
|
|
||||||
from numba import jit
|
|
||||||
|
|
||||||
N = 1_000_000_000
|
|
||||||
```
|
|
||||||
|
|
||||||
# Cython
|
|
||||||
|
|
||||||
Put simply, [Cython](https://cython.org/) is a programming language that looks a lot like Python,
|
|
||||||
gets [transpiled](https://en.wikipedia.org/wiki/Source-to-source_compiler) to C/C++, and integrates
|
|
||||||
well with the [CPython](https://en.wikipedia.org/wiki/CPython) API. It's great for building Python
|
|
||||||
wrappers to C and C++ libraries, writing optimized code for numerical processing, and tons more. And
|
|
||||||
when it comes to managing the GIL, there are two special features:
|
|
||||||
|
|
||||||
- The `nogil`
|
|
||||||
[function annotation](https://cython.readthedocs.io/en/latest/src/userguide/external_C_code.html#declaring-a-function-as-callable-without-the-gil)
|
|
||||||
asserts that a Cython function is safe to use without the GIL, and compilation will fail if it
|
|
||||||
interacts with Python in an unsafe manner
|
|
||||||
- The `with nogil`
|
|
||||||
[context manager](https://cython.readthedocs.io/en/latest/src/userguide/external_C_code.html#releasing-the-gil)
|
|
||||||
explicitly unlocks the CPython GIL while active
|
|
||||||
|
|
||||||
Whenever Cython code runs inside a `with nogil` block on a separate thread, the Python interpreter
|
|
||||||
is unblocked and allowed to continue work elsewhere. We'll define a "busy work" function that
|
|
||||||
demonstrates this principle in action:
|
|
||||||
|
|
||||||
```python
|
|
||||||
%%cython
|
|
||||||
|
|
||||||
# Annotating a function with `nogil` indicates only that it is safe
|
|
||||||
# to call in a `with nogil` block. It *does not* release the GIL.
|
|
||||||
cdef unsigned long fibonacci(unsigned long n) nogil:
|
|
||||||
if n <= 1:
|
|
||||||
return n
|
|
||||||
|
|
||||||
cdef unsigned long a = 0, b = 1, c = 0
|
|
||||||
|
|
||||||
c = a + b
|
|
||||||
for _i in range(2, n):
|
|
||||||
a = b
|
|
||||||
b = c
|
|
||||||
c = a + b
|
|
||||||
|
|
||||||
return c
|
|
||||||
|
|
||||||
|
|
||||||
def cython_nogil(unsigned long n):
|
|
||||||
# Explicitly release the GIL while running `fibonacci`
|
|
||||||
with nogil:
|
|
||||||
value = fibonacci(n)
|
|
||||||
|
|
||||||
return value
|
|
||||||
|
|
||||||
|
|
||||||
def cython_gil(unsigned long n):
|
|
||||||
# Because the GIL is not explicitly released, it implicitly
|
|
||||||
# remains acquired when running the `fibonacci` function
|
|
||||||
return fibonacci(n)
|
|
||||||
```
|
|
||||||
|
|
||||||
First, let's time how long it takes Cython to calculate the billionth Fibonacci number:
|
|
||||||
|
|
||||||
```python
|
|
||||||
%%time
|
|
||||||
_ = cython_gil(N);
|
|
||||||
```
|
|
||||||
|
|
||||||
> <pre>
|
|
||||||
> CPU times: user 365 ms, sys: 0 ns, total: 365 ms
|
|
||||||
> Wall time: 372 ms
|
|
||||||
> </pre>
|
|
||||||
|
|
||||||
```python
|
|
||||||
%%time
|
|
||||||
_ = cython_nogil(N);
|
|
||||||
```
|
|
||||||
|
|
||||||
> <pre>
|
|
||||||
> CPU times: user 381 ms, sys: 0 ns, total: 381 ms
|
|
||||||
> Wall time: 388 ms
|
|
||||||
> </pre>
|
|
||||||
|
|
||||||
Both versions (with and without GIL) take effectively the same amount of time to run. Even when
|
|
||||||
running this calculation in parallel on separate threads, it is expected that the run time will
|
|
||||||
double because only one thread can be active at a time:
|
|
||||||
|
|
||||||
```python
|
|
||||||
%%time
|
|
||||||
from threading import Thread
|
|
||||||
|
|
||||||
# Create the two threads to run on
|
|
||||||
t1 = Thread(target=cython_gil, args=[N])
|
|
||||||
t2 = Thread(target=cython_gil, args=[N])
|
|
||||||
# Start the threads
|
|
||||||
t1.start(); t2.start()
|
|
||||||
# Wait for the threads to finish
|
|
||||||
t1.join(); t2.join()
|
|
||||||
```
|
|
||||||
|
|
||||||
> <pre>
|
|
||||||
> CPU times: user 641 ms, sys: 5.62 ms, total: 647 ms
|
|
||||||
> Wall time: 645 ms
|
|
||||||
> </pre>
|
|
||||||
|
|
||||||
However, if the first thread releases the GIL, the second thread is free to acquire it and run in
|
|
||||||
parallel:
|
|
||||||
|
|
||||||
```python
|
|
||||||
%%time
|
|
||||||
|
|
||||||
t1 = Thread(target=cython_nogil, args=[N])
|
|
||||||
t2 = Thread(target=cython_gil, args=[N])
|
|
||||||
t1.start(); t2.start()
|
|
||||||
t1.join(); t2.join()
|
|
||||||
```
|
|
||||||
|
|
||||||
> <pre>
|
|
||||||
> CPU times: user 717 ms, sys: 372 µs, total: 718 ms
|
|
||||||
> Wall time: 358 ms
|
|
||||||
> </pre>
|
|
||||||
|
|
||||||
Because `user` time represents the sum of processing time on all threads, it doesn't change much.
|
|
||||||
The ["wall time"](https://en.wikipedia.org/wiki/Elapsed_real_time) has been cut roughly in half
|
|
||||||
because each function is running simultaneously.
|
|
||||||
|
|
||||||
Keep in mind that the **order in which threads are started** makes a difference!
|
|
||||||
|
|
||||||
```python
|
|
||||||
%%time
|
|
||||||
|
|
||||||
# Note that the GIL-locked version is started first
|
|
||||||
t1 = Thread(target=cython_gil, args=[N])
|
|
||||||
t2 = Thread(target=cython_nogil, args=[N])
|
|
||||||
t1.start(); t2.start()
|
|
||||||
t1.join(); t2.join()
|
|
||||||
```
|
|
||||||
|
|
||||||
> <pre>
|
|
||||||
> CPU times: user 667 ms, sys: 0 ns, total: 667 ms
|
|
||||||
> Wall time: 672 ms
|
|
||||||
> </pre>
|
|
||||||
|
|
||||||
Even though the second thread releases the GIL while running, it can't start until the first has
|
|
||||||
completed. Thus, the overall runtime is effectively the same as running two GIL-locked threads.
|
|
||||||
|
|
||||||
Finally, be aware that attempting to unlock the GIL from a thread that doesn't own it will crash the
|
|
||||||
**interpreter**, not just the thread attempting the unlock:
|
|
||||||
|
|
||||||
```python
|
|
||||||
%%cython
|
|
||||||
|
|
||||||
cdef int cython_recurse(int n) nogil:
|
|
||||||
if n <= 0:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
with nogil:
|
|
||||||
return cython_recurse(n - 1)
|
|
||||||
|
|
||||||
cython_recurse(2)
|
|
||||||
```
|
|
||||||
|
|
||||||
> <pre>
|
|
||||||
> Fatal Python error: PyEval_SaveThread: NULL tstate
|
|
||||||
>
|
|
||||||
> Thread 0x00007f499effd700 (most recent call first):
|
|
||||||
> File "/home/bspeice/.virtualenvs/release-the-gil/lib/python3.7/site-packages/ipykernel/parentpoller.py", line 39 in run
|
|
||||||
> File "/usr/lib/python3.7/threading.py", line 926 in _bootstrap_inner
|
|
||||||
> File "/usr/lib/python3.7/threading.py", line 890 in _bootstrap
|
|
||||||
> </pre>
|
|
||||||
|
|
||||||
In practice, avoiding this issue is simple. First, `nogil` functions probably shouldn't contain
|
|
||||||
`with nogil` blocks. Second, Cython can
|
|
||||||
[conditionally acquire/release](https://cython.readthedocs.io/en/latest/src/userguide/external_C_code.html#conditional-acquiring-releasing-the-gil)
|
|
||||||
the GIL, so these conditions can be used to synchronize access. Finally, Cython's documentation for
|
|
||||||
[external C code](https://cython.readthedocs.io/en/latest/src/userguide/external_C_code.html#acquiring-and-releasing-the-gil)
|
|
||||||
contains more detail on how to safely manage the GIL.
|
|
||||||
|
|
||||||
To conclude: use Cython's `nogil` annotation to assert that functions are safe for calling when the
|
|
||||||
GIL is unlocked, and `with nogil` to actually unlock the GIL and run those functions.
|
|
||||||
|
|
||||||
# Numba
|
|
||||||
|
|
||||||
Like Cython, [Numba](https://numba.pydata.org/) is a "compiled Python." Where Cython works by
|
|
||||||
compiling a Python-like language to C/C++, Numba compiles Python bytecode _directly to machine code_
|
|
||||||
at runtime. Behavior is controlled with a special `@jit` decorator; calling a decorated function
|
|
||||||
first compiles it to machine code before running. Calling the function a second time re-uses that
|
|
||||||
machine code unless the argument types have changed.
|
|
||||||
|
|
||||||
Numba works best when a `nopython=True` argument is added to the `@jit` decorator; functions
|
|
||||||
compiled in [`nopython`](http://numba.pydata.org/numba-doc/latest/user/jit.html?#nopython) mode
|
|
||||||
avoid the CPython API and have performance comparable to C. Further, adding `nogil=True` to the
|
|
||||||
`@jit` decorator unlocks the GIL while that function is running. Note that `nogil` and `nopython`
|
|
||||||
are separate arguments; while it is necessary for code to be compiled in `nopython` mode in order to
|
|
||||||
release the lock, the GIL will remain locked if `nogil=False` (the default).
|
|
||||||
|
|
||||||
Let's repeat the same experiment, this time using Numba instead of Cython:
|
|
||||||
|
|
||||||
```python
|
|
||||||
# The `int` type annotation is only for humans and is ignored
|
|
||||||
# by Numba.
|
|
||||||
@jit(nopython=True, nogil=True)
|
|
||||||
def numba_nogil(n: int) -> int:
|
|
||||||
if n <= 1:
|
|
||||||
return n
|
|
||||||
|
|
||||||
a = 0
|
|
||||||
b = 1
|
|
||||||
|
|
||||||
c = a + b
|
|
||||||
for _i in range(2, n):
|
|
||||||
a = b
|
|
||||||
b = c
|
|
||||||
c = a + b
|
|
||||||
|
|
||||||
return c
|
|
||||||
|
|
||||||
|
|
||||||
# Run using `nopython` mode to receive a performance boost,
|
|
||||||
# but GIL remains locked due to `nogil=False` by default.
|
|
||||||
@jit(nopython=True)
|
|
||||||
def numba_gil(n: int) -> int:
|
|
||||||
if n <= 1:
|
|
||||||
return n
|
|
||||||
|
|
||||||
a = 0
|
|
||||||
b = 1
|
|
||||||
|
|
||||||
c = a + b
|
|
||||||
for _i in range(2, n):
|
|
||||||
a = b
|
|
||||||
b = c
|
|
||||||
c = a + b
|
|
||||||
|
|
||||||
return c
|
|
||||||
|
|
||||||
|
|
||||||
# Call each function once to force compilation; we don't want
|
|
||||||
# the timing statistics to include how long it takes to compile.
|
|
||||||
numba_nogil(N)
|
|
||||||
numba_gil(N);
|
|
||||||
```
|
|
||||||
|
|
||||||
We'll perform the same tests as above; first, figure out how long it takes the function to run:
|
|
||||||
|
|
||||||
```python
|
|
||||||
%%time
|
|
||||||
_ = numba_gil(N)
|
|
||||||
```
|
|
||||||
|
|
||||||
> <pre>
|
|
||||||
> CPU times: user 253 ms, sys: 258 µs, total: 253 ms
|
|
||||||
> Wall time: 251 ms
|
|
||||||
> </pre>
|
|
||||||
|
|
||||||
<span style="font-size: .8em">
|
|
||||||
Aside: it's not immediately clear why Numba takes ~20% less time to run than Cython for code that should be
|
|
||||||
effectively identical after compilation.
|
|
||||||
</span>
|
|
||||||
|
|
||||||
When running two GIL-locked threads, the result (as expected) takes around twice as long to compute:
|
|
||||||
|
|
||||||
```python
|
|
||||||
%%time
|
|
||||||
t1 = Thread(target=numba_gil, args=[N])
|
|
||||||
t2 = Thread(target=numba_gil, args=[N])
|
|
||||||
t1.start(); t2.start()
|
|
||||||
t1.join(); t2.join()
|
|
||||||
```
|
|
||||||
|
|
||||||
> <pre>
|
|
||||||
> CPU times: user 541 ms, sys: 3.96 ms, total: 545 ms
|
|
||||||
> Wall time: 541 ms
|
|
||||||
> </pre>
|
|
||||||
|
|
||||||
But if the GIL-unlocking thread starts first, both threads run in parallel:
|
|
||||||
|
|
||||||
```python
|
|
||||||
%%time
|
|
||||||
t1 = Thread(target=numba_nogil, args=[N])
|
|
||||||
t2 = Thread(target=numba_gil, args=[N])
|
|
||||||
t1.start(); t2.start()
|
|
||||||
t1.join(); t2.join()
|
|
||||||
```
|
|
||||||
|
|
||||||
> <pre>
|
|
||||||
> CPU times: user 551 ms, sys: 7.77 ms, total: 559 ms
|
|
||||||
> Wall time: 279 ms
|
|
||||||
> </pre>
|
|
||||||
|
|
||||||
Just like Cython, starting the GIL-locked thread first leads to poor performance:
|
|
||||||
|
|
||||||
```python
|
|
||||||
%%time
|
|
||||||
t1 = Thread(target=numba_gil, args=[N])
|
|
||||||
t2 = Thread(target=numba_nogil, args=[N])
|
|
||||||
t1.start(); t2.start()
|
|
||||||
t1.join(); t2.join()
|
|
||||||
```
|
|
||||||
|
|
||||||
> <pre>
|
|
||||||
> CPU times: user 524 ms, sys: 0 ns, total: 524 ms
|
|
||||||
> Wall time: 522 ms
|
|
||||||
> </pre>
|
|
||||||
|
|
||||||
Finally, unlike Cython, Numba will unlock the GIL if and only if it is currently acquired;
|
|
||||||
recursively calling `@jit(nogil=True)` functions is perfectly safe:
|
|
||||||
|
|
||||||
```python
|
|
||||||
from numba import jit
|
|
||||||
|
|
||||||
@jit(nopython=True, nogil=True)
|
|
||||||
def numba_recurse(n: int) -> int:
|
|
||||||
if n <= 0:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
return numba_recurse(n - 1)
|
|
||||||
|
|
||||||
numba_recurse(2);
|
|
||||||
```
|
|
||||||
|
|
||||||
# Conclusion
|
|
||||||
|
|
||||||
Before finishing, it's important to address pain points that will show up if these techniques are
|
|
||||||
used in a more realistic project:
|
|
||||||
|
|
||||||
First, code running in a GIL-free context will likely also need non-trivial data structures;
|
|
||||||
GIL-free functions aren't useful if they're constantly interacting with Python objects whose access
|
|
||||||
requires the GIL. Cython provides
|
|
||||||
[extension types](http://docs.cython.org/en/latest/src/tutorial/cdef_classes.html) and Numba
|
|
||||||
provides a [`@jitclass`](https://numba.pydata.org/numba-doc/dev/user/jitclass.html) decorator to
|
|
||||||
address this need.
|
|
||||||
|
|
||||||
Second, building and distributing applications that make use of Cython/Numba can be complicated.
|
|
||||||
Cython packages require running the compiler, (potentially) linking/packaging external dependencies,
|
|
||||||
and distributing a binary wheel. Numba is generally simpler because the code being distributed is
|
|
||||||
pure Python, but can be tricky since errors aren't detected until runtime.
|
|
||||||
|
|
||||||
Finally, while unlocking the GIL is often a solution in search of a problem, both Cython and Numba
|
|
||||||
provide tools to directly manage the GIL when appropriate. This enables true parallelism (not just
|
|
||||||
[concurrency](https://stackoverflow.com/a/1050257)) that is impossible in vanilla Python.
|
|
@ -1,60 +0,0 @@
|
|||||||
---
|
|
||||||
layout: post
|
|
||||||
title: "The webpack industrial complex"
|
|
||||||
description: "Reflections on a new project"
|
|
||||||
category:
|
|
||||||
tags: [webpack, react, vite]
|
|
||||||
---
|
|
||||||
|
|
||||||
This started because I wanted to build a synthesizer. Setting a goal of "digital DX7" was ambitious, but I needed something unrelated to the day job. Beyond that, working with audio seemed like a good challenge. I enjoy performance-focused code, and performance problems in audio are conspicuous. Building a web project was an obvious choice because of the web audio API documentation and independence from a large Digital Audio Workstation (DAW).
|
|
||||||
|
|
||||||
The project was soon derailed trying to sort out technical issues unrelated to the original purpose. Finding a resolution was a frustrating journey, and it's still not clear whether those problems were my fault. As a result, I'm writing this to try making sense of it, as a case study/reference material, and to salvage something from the process.
|
|
||||||
|
|
||||||
## Starting strong
|
|
||||||
|
|
||||||
The sole starting requirement was to write everything in TypeScript. Not because of project scale, but because guardrails help with unfamiliar territory. Keeping that in mind, the first question was: how does one start a new project? All I actually need is "compile TypeScript, show it in a browser."
|
|
||||||
|
|
||||||
Create React App (CRA) came to the rescue and the rest of that evening was a joy. My TypeScript/JavaScript skills were rusty, but the online documentation was helpful. I had never understood the appeal of JSX (why put a DOM in JavaScript?) until it made connecting an `onEvent` handler and a function easy.
|
|
||||||
|
|
||||||
Some quick dimensional analysis later and there was a sine wave oscillator playing A=440 through the speakers. I specifically remember thinking "modern browsers are magical."
|
|
||||||
|
|
||||||
## Continuing on
|
|
||||||
|
|
||||||
Now comes the first mistake: I began to worry about "scale" before encountering an actual problem. Rather than rendering audio in the main thread, why not use audio worklets and render in a background thread instead?
|
|
||||||
|
|
||||||
The first sign something was amiss came from the TypeScript compiler errors showing the audio worklet API [was missing](https://github.com/microsoft/TypeScript/issues/28308). After searching out Github issues and (unsuccessfully) tweaking the `.tsconfig` settings, I settled on installing a package and moving on.
|
|
||||||
|
|
||||||
The next problem came from actually using the API. Worklets must load from separate "modules," but it wasn't clear how to guarantee the worklet code stayed separate from the application. I saw recommendations to use `new URL(<local path>, import.meta.url)` and it worked! Well, kind of:
|
|
||||||
|
|
||||||
![Browser error](/assets/images/2022-11-20-video_mp2t.png)
|
|
||||||
|
|
||||||
That file has the audio processor code, so why does it get served with `Content-Type: video/mp2t`?
|
|
||||||
|
|
||||||
## Floundering about
|
|
||||||
|
|
||||||
Now comes the second mistake: even though I didn't understand the error, I ignored recommendations to [just use JavaScript](https://hackernoon.com/implementing-audioworklets-with-react-8a80a470474) and stuck by the original TypeScript requirement.
|
|
||||||
|
|
||||||
I tried different project structures. Moving the worklet code to a new folder didn't help, nor did setting up a monorepo and placing it in a new package.
|
|
||||||
|
|
||||||
I tried three different CRA tools - `react-app-rewired`, `craco`, `customize-react-app` - but got the same problem. Each has varying levels of compatibility with recent CRA versions, so it wasn't clear if I had the right solution but implemented it incorrectly. After attempting to eject the application and panicking after seeing the configuration, I abandoned that as well.
|
|
||||||
|
|
||||||
I tried changing the webpack configuration: using [new](https://github.com/webpack/webpack/issues/11543#issuecomment-917673256) [loaders](https://github.com/popelenkow/worker-url), setting [asset rules](https://github.com/webpack/webpack/discussions/14093#discussioncomment-1257149), even [changing how webpack detects worker resources](https://github.com/webpack/webpack/issues/11543#issuecomment-826897590). In hindsight, entry points may have been the answer. But because CRA actively resists attempts to change its webpack configuration, and I couldn't find audio worklet examples in any other framework, I gave up.
|
|
||||||
|
|
||||||
I tried so many application frameworks. Next.js looked like a good candidate, but added its own [bespoke webpack complexity](https://github.com/vercel/next.js/issues/24907) to the existing confusion. Astro had the best "getting started" experience, but I refuse to install an IDE-specific plugin. I first used Deno while exploring Lume, but it couldn't import the audio worklet types (maybe because of module compatibility?). Each framework was unique in its own way (shout-out to SvelteKit) but I couldn't figure out how to make them work.
|
|
||||||
|
|
||||||
## Learning and reflecting
|
|
||||||
|
|
||||||
I ended up using Vite and vite-plugin-react-pages to handle both "build the app" and "bundle worklets," but the specific tool choice isn't important. Instead, the focus should be on lessons learned.
|
|
||||||
|
|
||||||
For myself:
|
|
||||||
|
|
||||||
- I'm obsessed with tooling, to the point it can derail the original goal. While it comes from a good place (for example: "types are awesome"), it can get in the way of more important work
|
|
||||||
- I tend to reach for online resources right after seeing a new problem. While finding help online is often faster, spending time understanding the problem would have been more productive than cycling through (often outdated) blog posts
|
|
||||||
|
|
||||||
For the tools:
|
|
||||||
|
|
||||||
- Resource bundling is great and solves a genuine challenge. I've heard too many horror stories of developers writing modules by hand to believe this is unnecessary complexity
|
|
||||||
- Webpack is a build system and modern frameworks are deeply dependent on it (hence the "webpack industrial complex"). While this often saves users from unnecessary complexity, there's no path forward if something breaks
|
|
||||||
- There's little ability to mix and match tools across frameworks. Next.js and Gatsby let users extend webpack, but because each framework adds its own modules, changes aren't portable. After spending a week looking at webpack, I had an example running with parcel in thirty minutes, but couldn't integrate it
|
|
||||||
|
|
||||||
In the end, learning new systems is fun, but a focus on tools that "just work" can leave users out in the cold if they break down.
|
|
1
archive/index.html
Normal file
@ -1,15 +0,0 @@
|
|||||||
@font-face {
|
|
||||||
font-family: 'JetBrains Mono';
|
|
||||||
src: url('/assets/font/JetBrainsMono-Regular.woff2') format('woff2'),
|
|
||||||
url('/assets/font/JetBrainsMono-Regular.woff') format('woff');
|
|
||||||
font-weight: normal;
|
|
||||||
font-style: normal;
|
|
||||||
}
|
|
||||||
|
|
||||||
@font-face {
|
|
||||||
font-family: 'Lato';
|
|
||||||
src: url('/assets/font/lato-regular-webfont.woff2') format('woff2'),
|
|
||||||
url('/assets/font/lato-regular-webfont.woff') format('woff');
|
|
||||||
font-weight: normal;
|
|
||||||
font-style: normal;
|
|
||||||
}
|
|
@ -1,119 +0,0 @@
|
|||||||
---
|
|
||||||
---
|
|
||||||
|
|
||||||
// Import the theme rules
|
|
||||||
@import "theme";
|
|
||||||
|
|
||||||
body {
|
|
||||||
max-width: 100%;
|
|
||||||
overflow-x: hidden;
|
|
||||||
font-family: 'Lato', sans-serif;
|
|
||||||
}
|
|
||||||
|
|
||||||
.navbar {
|
|
||||||
color: $gray;
|
|
||||||
}
|
|
||||||
|
|
||||||
.separator {
|
|
||||||
margin-right: .45rem;
|
|
||||||
margin-left: .25rem;
|
|
||||||
color: #000;
|
|
||||||
&:after {
|
|
||||||
content: '\00a0/';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
header {
|
|
||||||
padding-top: 80px;
|
|
||||||
padding-bottom: 0;
|
|
||||||
};
|
|
||||||
|
|
||||||
header h1,h2 {
|
|
||||||
color: #000;
|
|
||||||
}
|
|
||||||
|
|
||||||
.post-description {
|
|
||||||
color: #555;
|
|
||||||
}
|
|
||||||
|
|
||||||
.post-container a {
|
|
||||||
color: #555;
|
|
||||||
border-bottom-color: $gray;
|
|
||||||
border-bottom-style: dotted;
|
|
||||||
border-bottom-width: 1px;
|
|
||||||
|
|
||||||
position: relative;
|
|
||||||
display: inline-block;
|
|
||||||
padding: 1px 1px;
|
|
||||||
transition: color ease 0.3s;
|
|
||||||
|
|
||||||
&::after {
|
|
||||||
content: '';
|
|
||||||
position: absolute;
|
|
||||||
z-index: -1;
|
|
||||||
width: 100%;
|
|
||||||
height: 0%;
|
|
||||||
left: 0;
|
|
||||||
bottom: 0;
|
|
||||||
background-color: $gray;
|
|
||||||
transition: all ease 0.3s;
|
|
||||||
}
|
|
||||||
|
|
||||||
&:hover {
|
|
||||||
color: #fff;
|
|
||||||
border-bottom-style: solid;
|
|
||||||
&::after {
|
|
||||||
height: 100%;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
body pre {
|
|
||||||
font-size: 15px;
|
|
||||||
}
|
|
||||||
|
|
||||||
pre.highlight, code {
|
|
||||||
font-family: 'JetBrains Mono', monospace;
|
|
||||||
}
|
|
||||||
|
|
||||||
div.highlighter-rouge {
|
|
||||||
// Default theme uses `width: 100vw`, which while cool, does cause the page
|
|
||||||
// to exceed screen width and trigger horizontal scrolling. No bueno.
|
|
||||||
width: 99vw;
|
|
||||||
}
|
|
||||||
|
|
||||||
.post-date {
|
|
||||||
// On the front page, make sure titles don't force wrapping the date box content
|
|
||||||
text-align: right;
|
|
||||||
white-space: nowrap;
|
|
||||||
}
|
|
||||||
|
|
||||||
blockquote {
|
|
||||||
color: #555;
|
|
||||||
right: 100px;
|
|
||||||
margin-left: 0;
|
|
||||||
padding-left: 1.8rem;
|
|
||||||
border-left: 5px solid $gray;
|
|
||||||
}
|
|
||||||
|
|
||||||
.post-nav {
|
|
||||||
/* Insert your custom styling here. Example:
|
|
||||||
|
|
||||||
font-size: 14px;
|
|
||||||
*/
|
|
||||||
display: flex;
|
|
||||||
margin-top: 1em;
|
|
||||||
margin-bottom: 1em;
|
|
||||||
}
|
|
||||||
.post-nav div {
|
|
||||||
/* flex-grow, flex-shrink, flex-basis */
|
|
||||||
flex: 1 1 0;
|
|
||||||
}
|
|
||||||
.post-nav-next {
|
|
||||||
text-align: right;
|
|
||||||
}
|
|
||||||
|
|
||||||
th, td {
|
|
||||||
border-bottom: 1px solid $gray;
|
|
||||||
padding: 0.75em;
|
|
||||||
}
|
|
1
assets/css/styles.ae6ff4a3.css
Normal file
BIN
assets/images/1-0d5e8450555296218deb0517b80440f3.png
Normal file
After Width: | Height: | Size: 117 KiB |
BIN
assets/images/1-2d6670430a11b01011e4c231ea594db1.png
Normal file
After Width: | Height: | Size: 98 KiB |
BIN
assets/images/10-b7987a0ff93705d5045057cbdaa2ede9.png
Normal file
After Width: | Height: | Size: 100 KiB |
BIN
assets/images/2-062e1e47a07f200ff3b1531a02812bc7.png
Normal file
After Width: | Height: | Size: 136 KiB |
BIN
assets/images/2-46bb7cc9cf739d97050c199eedced1a7.png
Normal file
After Width: | Height: | Size: 94 KiB |
Before Width: | Height: | Size: 71 KiB |
BIN
assets/images/3-2f5c483659f81d741809de6d095bd577.png
Normal file
After Width: | Height: | Size: 110 KiB |
BIN
assets/images/3-eea635f8cfe4a12ae649ceb6c984e0cd.png
Normal file
After Width: | Height: | Size: 27 KiB |
BIN
assets/images/4-63dc81954b1604cfa91f4c789da144a5.png
Normal file
After Width: | Height: | Size: 100 KiB |
BIN
assets/images/4-b4c3dbfa10b1997706bc271ca71e2ff5.png
Normal file
After Width: | Height: | Size: 160 KiB |
BIN
assets/images/5-8f10acd82b2f025abe57cb93d435a25f.png
Normal file
After Width: | Height: | Size: 136 KiB |
BIN
assets/images/5-ae210d26729cea1700924579adf2c44c.png
Normal file
After Width: | Height: | Size: 97 KiB |
BIN
assets/images/6-456ca1125f48947cf3c1c13722af95a0.png
Normal file
After Width: | Height: | Size: 18 KiB |
BIN
assets/images/6-f07e72ff0b4639453034c75b2e62faba.png
Normal file
After Width: | Height: | Size: 101 KiB |
BIN
assets/images/7-e0793eed6c42845d8ce4e3e79c1d44d8.png
Normal file
After Width: | Height: | Size: 97 KiB |
BIN
assets/images/8-3eb2ad63e4c40b6717ee4516223d73ed.png
Normal file
After Width: | Height: | Size: 113 KiB |
BIN
assets/images/9-630bd32c43e654f068e3c3bea79810e5.png
Normal file
After Width: | Height: | Size: 103 KiB |
After Width: | Height: | Size: 22 KiB |
After Width: | Height: | Size: 13 KiB |
After Width: | Height: | Size: 81 KiB |
After Width: | Height: | Size: 29 KiB |
After Width: | Height: | Size: 13 KiB |
After Width: | Height: | Size: 95 KiB |
After Width: | Height: | Size: 24 KiB |