Initial docusaurus
@ -1,6 +0,0 @@
|
||||
FROM mcr.microsoft.com/vscode/devcontainers/ruby:0-2.7-bullseye
|
||||
|
||||
RUN wget https://github.com/errata-ai/vale/releases/download/v2.21.0/vale_2.21.0_Linux_64-bit.tar.gz -O /tmp/vale.tar.gz \
|
||||
&& cd /usr/local/bin \
|
||||
&& tar xf /tmp/vale.tar.gz \
|
||||
&& rm /tmp/vale.tar.gz
|
@ -1,13 +1,19 @@
|
||||
// For format details, see https://aka.ms/devcontainer.json. For config options, see the README at:
|
||||
// https://github.com/microsoft/vscode-dev-containers/tree/v0.245.0/containers/ruby
|
||||
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
|
||||
// README at: https://github.com/devcontainers/templates/tree/main/src/typescript-node
|
||||
{
|
||||
"name": "Ruby",
|
||||
"build": {
|
||||
"dockerfile": "Dockerfile"
|
||||
},
|
||||
"name": "Node.js & TypeScript",
|
||||
"image": "mcr.microsoft.com/devcontainers/typescript-node:1-22-bookworm",
|
||||
"runArgs": ["--userns=keep-id"],
|
||||
|
||||
"remoteUser": "vscode",
|
||||
"containerUser": "vscode",
|
||||
"workspaceMount": "source=${localWorkspaceFolder},target=/workspaces/${localWorkspaceFolderBasename},type=bind,Z"
|
||||
"containerUser": "node",
|
||||
"containerEnv": {
|
||||
"HOME": "/home/node"
|
||||
},
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"ChrisChinchilla.vale-vscode"
|
||||
]
|
||||
}
|
||||
},
|
||||
"onCreateCommand": "bash -c 'mkdir -p ~/.local/bin && wget -qO- https://github.com/errata-ai/vale/releases/download/v2.21.0/vale_2.21.0_Linux_64-bit.tar.gz | tar xz -C ~/.local/bin'"
|
||||
}
|
||||
|
28
.gitignore
vendored
@ -1,8 +1,20 @@
|
||||
_site/
|
||||
.swp
|
||||
.sass-cache/
|
||||
.jekyll-metadata
|
||||
.bundle/
|
||||
vendor/
|
||||
.styles/
|
||||
.vscode/
|
||||
# Dependencies
|
||||
/node_modules
|
||||
|
||||
# Production
|
||||
/build
|
||||
|
||||
# Generated files
|
||||
.docusaurus
|
||||
.cache-loader
|
||||
|
||||
# Misc
|
||||
.DS_Store
|
||||
.env.local
|
||||
.env.development.local
|
||||
.env.test.local
|
||||
.env.production.local
|
||||
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
|
9
.styles/Microsoft/AMPM.yml
Normal file
@ -0,0 +1,9 @@
|
||||
extends: existence
|
||||
message: Use 'AM' or 'PM' (preceded by a space).
|
||||
link: https://docs.microsoft.com/en-us/style-guide/a-z-word-list-term-collections/term-collections/date-time-terms
|
||||
level: error
|
||||
nonword: true
|
||||
tokens:
|
||||
- '\d{1,2}[AP]M'
|
||||
- '\d{1,2} ?[ap]m'
|
||||
- '\d{1,2} ?[aApP]\.[mM]\.'
|
30
.styles/Microsoft/Accessibility.yml
Normal file
@ -0,0 +1,30 @@
|
||||
extends: existence
|
||||
message: "Don't use language (such as '%s') that defines people by their disability."
|
||||
link: https://docs.microsoft.com/en-us/style-guide/a-z-word-list-term-collections/term-collections/accessibility-terms
|
||||
level: suggestion
|
||||
ignorecase: true
|
||||
tokens:
|
||||
- a victim of
|
||||
- able-bodied
|
||||
- an epileptic
|
||||
- birth defect
|
||||
- crippled
|
||||
- differently abled
|
||||
- disabled
|
||||
- dumb
|
||||
- handicapped
|
||||
- handicaps
|
||||
- healthy person
|
||||
- hearing-impaired
|
||||
- lame
|
||||
- maimed
|
||||
- mentally handicapped
|
||||
- missing a limb
|
||||
- mute
|
||||
- non-verbal
|
||||
- normal person
|
||||
- sight-impaired
|
||||
- slow learner
|
||||
- stricken with
|
||||
- suffers from
|
||||
- vision-impaired
|
64
.styles/Microsoft/Acronyms.yml
Normal file
@ -0,0 +1,64 @@
|
||||
extends: conditional
|
||||
message: "'%s' has no definition."
|
||||
link: https://docs.microsoft.com/en-us/style-guide/acronyms
|
||||
level: suggestion
|
||||
ignorecase: false
|
||||
# Ensures that the existence of 'first' implies the existence of 'second'.
|
||||
first: '\b([A-Z]{3,5})\b'
|
||||
second: '(?:\b[A-Z][a-z]+ )+\(([A-Z]{3,5})\)'
|
||||
# ... with the exception of these:
|
||||
exceptions:
|
||||
- API
|
||||
- ASP
|
||||
- CLI
|
||||
- CPU
|
||||
- CSS
|
||||
- CSV
|
||||
- DEBUG
|
||||
- DOM
|
||||
- DPI
|
||||
- FAQ
|
||||
- GCC
|
||||
- GDB
|
||||
- GET
|
||||
- GPU
|
||||
- GTK
|
||||
- GUI
|
||||
- HTML
|
||||
- HTTP
|
||||
- HTTPS
|
||||
- IDE
|
||||
- JAR
|
||||
- JSON
|
||||
- JSX
|
||||
- LESS
|
||||
- LLDB
|
||||
- NET
|
||||
- NOTE
|
||||
- NVDA
|
||||
- OSS
|
||||
- PATH
|
||||
- PDF
|
||||
- PHP
|
||||
- POST
|
||||
- RAM
|
||||
- REPL
|
||||
- RSA
|
||||
- SCM
|
||||
- SCSS
|
||||
- SDK
|
||||
- SQL
|
||||
- SSH
|
||||
- SSL
|
||||
- SVG
|
||||
- TBD
|
||||
- TCP
|
||||
- TODO
|
||||
- URI
|
||||
- URL
|
||||
- USB
|
||||
- UTF
|
||||
- XML
|
||||
- XSS
|
||||
- YAML
|
||||
- ZIP
|
272
.styles/Microsoft/Adverbs.yml
Normal file
@ -0,0 +1,272 @@
|
||||
extends: existence
|
||||
message: "Remove '%s' if it's not important to the meaning of the statement."
|
||||
link: https://docs.microsoft.com/en-us/style-guide/word-choice/use-simple-words-concise-sentences
|
||||
ignorecase: true
|
||||
level: warning
|
||||
action:
|
||||
name: remove
|
||||
tokens:
|
||||
- abnormally
|
||||
- absentmindedly
|
||||
- accidentally
|
||||
- adventurously
|
||||
- anxiously
|
||||
- arrogantly
|
||||
- awkwardly
|
||||
- bashfully
|
||||
- beautifully
|
||||
- bitterly
|
||||
- bleakly
|
||||
- blindly
|
||||
- blissfully
|
||||
- boastfully
|
||||
- boldly
|
||||
- bravely
|
||||
- briefly
|
||||
- brightly
|
||||
- briskly
|
||||
- broadly
|
||||
- busily
|
||||
- calmly
|
||||
- carefully
|
||||
- carelessly
|
||||
- cautiously
|
||||
- cheerfully
|
||||
- cleverly
|
||||
- closely
|
||||
- coaxingly
|
||||
- colorfully
|
||||
- continually
|
||||
- coolly
|
||||
- courageously
|
||||
- crossly
|
||||
- cruelly
|
||||
- curiously
|
||||
- daintily
|
||||
- dearly
|
||||
- deceivingly
|
||||
- deeply
|
||||
- defiantly
|
||||
- deliberately
|
||||
- delightfully
|
||||
- diligently
|
||||
- dimly
|
||||
- doubtfully
|
||||
- dreamily
|
||||
- easily
|
||||
- effectively
|
||||
- elegantly
|
||||
- energetically
|
||||
- enormously
|
||||
- enthusiastically
|
||||
- excitedly
|
||||
- extremely
|
||||
- fairly
|
||||
- faithfully
|
||||
- famously
|
||||
- ferociously
|
||||
- fervently
|
||||
- fiercely
|
||||
- fondly
|
||||
- foolishly
|
||||
- fortunately
|
||||
- frankly
|
||||
- frantically
|
||||
- freely
|
||||
- frenetically
|
||||
- frightfully
|
||||
- furiously
|
||||
- generally
|
||||
- generously
|
||||
- gently
|
||||
- gladly
|
||||
- gleefully
|
||||
- gracefully
|
||||
- gratefully
|
||||
- greatly
|
||||
- greedily
|
||||
- happily
|
||||
- hastily
|
||||
- healthily
|
||||
- heavily
|
||||
- helplessly
|
||||
- honestly
|
||||
- hopelessly
|
||||
- hungrily
|
||||
- innocently
|
||||
- inquisitively
|
||||
- intensely
|
||||
- intently
|
||||
- interestingly
|
||||
- inwardly
|
||||
- irritably
|
||||
- jaggedly
|
||||
- jealously
|
||||
- jovially
|
||||
- joyfully
|
||||
- joyously
|
||||
- jubilantly
|
||||
- judgmentally
|
||||
- justly
|
||||
- keenly
|
||||
- kiddingly
|
||||
- kindheartedly
|
||||
- knavishly
|
||||
- knowingly
|
||||
- knowledgeably
|
||||
- lazily
|
||||
- lightly
|
||||
- limply
|
||||
- lively
|
||||
- loftily
|
||||
- longingly
|
||||
- loosely
|
||||
- loudly
|
||||
- lovingly
|
||||
- loyally
|
||||
- madly
|
||||
- majestically
|
||||
- meaningfully
|
||||
- mechanically
|
||||
- merrily
|
||||
- miserably
|
||||
- mockingly
|
||||
- mortally
|
||||
- mysteriously
|
||||
- naturally
|
||||
- nearly
|
||||
- neatly
|
||||
- nervously
|
||||
- nicely
|
||||
- noisily
|
||||
- obediently
|
||||
- obnoxiously
|
||||
- oddly
|
||||
- offensively
|
||||
- optimistically
|
||||
- overconfidently
|
||||
- painfully
|
||||
- partially
|
||||
- patiently
|
||||
- perfectly
|
||||
- playfully
|
||||
- politely
|
||||
- poorly
|
||||
- positively
|
||||
- potentially
|
||||
- powerfully
|
||||
- promptly
|
||||
- properly
|
||||
- punctually
|
||||
- quaintly
|
||||
- queasily
|
||||
- queerly
|
||||
- questionably
|
||||
- quickly
|
||||
- quietly
|
||||
- quirkily
|
||||
- quite
|
||||
- quizzically
|
||||
- randomly
|
||||
- rapidly
|
||||
- rarely
|
||||
- readily
|
||||
- really
|
||||
- reassuringly
|
||||
- recklessly
|
||||
- regularly
|
||||
- reluctantly
|
||||
- repeatedly
|
||||
- reproachfully
|
||||
- restfully
|
||||
- righteously
|
||||
- rightfully
|
||||
- rigidly
|
||||
- roughly
|
||||
- rudely
|
||||
- safely
|
||||
- scarcely
|
||||
- scarily
|
||||
- searchingly
|
||||
- sedately
|
||||
- seemingly
|
||||
- selfishly
|
||||
- separately
|
||||
- seriously
|
||||
- shakily
|
||||
- sharply
|
||||
- sheepishly
|
||||
- shrilly
|
||||
- shyly
|
||||
- silently
|
||||
- sleepily
|
||||
- slowly
|
||||
- smoothly
|
||||
- softly
|
||||
- solemnly
|
||||
- solidly
|
||||
- speedily
|
||||
- stealthily
|
||||
- sternly
|
||||
- strictly
|
||||
- suddenly
|
||||
- supposedly
|
||||
- surprisingly
|
||||
- suspiciously
|
||||
- sweetly
|
||||
- swiftly
|
||||
- sympathetically
|
||||
- tenderly
|
||||
- tensely
|
||||
- terribly
|
||||
- thankfully
|
||||
- thoroughly
|
||||
- thoughtfully
|
||||
- tightly
|
||||
- tremendously
|
||||
- triumphantly
|
||||
- truthfully
|
||||
- ultimately
|
||||
- unabashedly
|
||||
- unaccountably
|
||||
- unbearably
|
||||
- unethically
|
||||
- unexpectedly
|
||||
- unfortunately
|
||||
- unimpressively
|
||||
- unnaturally
|
||||
- unnecessarily
|
||||
- urgently
|
||||
- usefully
|
||||
- uselessly
|
||||
- utterly
|
||||
- vacantly
|
||||
- vaguely
|
||||
- vainly
|
||||
- valiantly
|
||||
- vastly
|
||||
- verbally
|
||||
- very
|
||||
- viciously
|
||||
- victoriously
|
||||
- violently
|
||||
- vivaciously
|
||||
- voluntarily
|
||||
- warmly
|
||||
- weakly
|
||||
- wearily
|
||||
- wetly
|
||||
- wholly
|
||||
- wildly
|
||||
- willfully
|
||||
- wisely
|
||||
- woefully
|
||||
- wonderfully
|
||||
- worriedly
|
||||
- yawningly
|
||||
- yearningly
|
||||
- yieldingly
|
||||
- youthfully
|
||||
- zealously
|
||||
- zestfully
|
||||
- zestily
|
11
.styles/Microsoft/Auto.yml
Normal file
@ -0,0 +1,11 @@
|
||||
extends: existence
|
||||
message: "In general, don't hyphenate '%s'."
|
||||
link: https://docs.microsoft.com/en-us/style-guide/a-z-word-list-term-collections/a/auto
|
||||
ignorecase: true
|
||||
level: error
|
||||
action:
|
||||
name: convert
|
||||
params:
|
||||
- simple
|
||||
tokens:
|
||||
- 'auto-\w+'
|
14
.styles/Microsoft/Avoid.yml
Normal file
@ -0,0 +1,14 @@
|
||||
extends: existence
|
||||
message: "Don't use '%s'. See the A-Z word list for details."
|
||||
# See the A-Z word list
|
||||
link: https://docs.microsoft.com/en-us/style-guide
|
||||
ignorecase: true
|
||||
level: error
|
||||
tokens:
|
||||
- abortion
|
||||
- and so on
|
||||
- app(?:lication)?s? (?:developer|program)
|
||||
- app(?:lication)? file
|
||||
- backbone
|
||||
- backend
|
||||
- contiguous selection
|
50
.styles/Microsoft/Contractions.yml
Normal file
@ -0,0 +1,50 @@
|
||||
extends: substitution
|
||||
message: "Use '%s' instead of '%s'."
|
||||
link: https://docs.microsoft.com/en-us/style-guide/word-choice/use-contractions
|
||||
level: error
|
||||
ignorecase: true
|
||||
action:
|
||||
name: replace
|
||||
swap:
|
||||
are not: aren't
|
||||
cannot: can't
|
||||
could not: couldn't
|
||||
did not: didn't
|
||||
do not: don't
|
||||
does not: doesn't
|
||||
has not: hasn't
|
||||
have not: haven't
|
||||
how is: how's
|
||||
is not: isn't
|
||||
|
||||
'it is(?!\.)': it's
|
||||
'it''s(?=\.)': it is
|
||||
|
||||
should not: shouldn't
|
||||
|
||||
"that is(?![.,])": that's
|
||||
'that''s(?=\.)': that is
|
||||
|
||||
'they are(?!\.)': they're
|
||||
'they''re(?=\.)': they are
|
||||
|
||||
was not: wasn't
|
||||
|
||||
'we are(?!\.)': we're
|
||||
'we''re(?=\.)': we are
|
||||
|
||||
'we have(?!\.)': we've
|
||||
'we''ve(?=\.)': we have
|
||||
|
||||
were not: weren't
|
||||
|
||||
'what is(?!\.)': what's
|
||||
'what''s(?=\.)': what is
|
||||
|
||||
'when is(?!\.)': when's
|
||||
'when''s(?=\.)': when is
|
||||
|
||||
'where is(?!\.)': where's
|
||||
'where''s(?=\.)': where is
|
||||
|
||||
will not: won't
|
13
.styles/Microsoft/Dashes.yml
Normal file
@ -0,0 +1,13 @@
|
||||
extends: existence
|
||||
message: "Remove the spaces around '%s'."
|
||||
link: https://docs.microsoft.com/en-us/style-guide/punctuation/dashes-hyphens/emes
|
||||
ignorecase: true
|
||||
nonword: true
|
||||
level: error
|
||||
action:
|
||||
name: edit
|
||||
params:
|
||||
- trim
|
||||
- " "
|
||||
tokens:
|
||||
- '\s[—–]\s|\s[—–]|[—–]\s'
|
8
.styles/Microsoft/DateFormat.yml
Normal file
@ -0,0 +1,8 @@
|
||||
extends: existence
|
||||
message: Use 'July 31, 2016' format, not '%s'.
|
||||
link: https://docs.microsoft.com/en-us/style-guide/a-z-word-list-term-collections/term-collections/date-time-terms
|
||||
ignorecase: true
|
||||
level: error
|
||||
nonword: true
|
||||
tokens:
|
||||
- '\d{1,2} (?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)|May|Jun(?:e)|Jul(?:y)|Aug(?:ust)|Sep(?:tember)?|Oct(?:ober)|Nov(?:ember)?|Dec(?:ember)?) \d{4}'
|
40
.styles/Microsoft/DateNumbers.yml
Normal file
@ -0,0 +1,40 @@
|
||||
extends: existence
|
||||
message: "Don't use ordinal numbers for dates."
|
||||
link: https://docs.microsoft.com/en-us/style-guide/numbers#numbers-in-dates
|
||||
level: error
|
||||
nonword: true
|
||||
ignorecase: true
|
||||
raw:
|
||||
- \b(?:Jan(?:uary)?|Feb(?:ruary)?|Mar(?:ch)?|Apr(?:il)|May|Jun(?:e)|Jul(?:y)|Aug(?:ust)|Sep(?:tember)?|Oct(?:ober)|Nov(?:ember)?|Dec(?:ember)?)\b\s*
|
||||
tokens:
|
||||
- first
|
||||
- second
|
||||
- third
|
||||
- fourth
|
||||
- fifth
|
||||
- sixth
|
||||
- seventh
|
||||
- eighth
|
||||
- ninth
|
||||
- tenth
|
||||
- eleventh
|
||||
- twelfth
|
||||
- thirteenth
|
||||
- fourteenth
|
||||
- fifteenth
|
||||
- sixteenth
|
||||
- seventeenth
|
||||
- eighteenth
|
||||
- nineteenth
|
||||
- twentieth
|
||||
- twenty-first
|
||||
- twenty-second
|
||||
- twenty-third
|
||||
- twenty-fourth
|
||||
- twenty-fifth
|
||||
- twenty-sixth
|
||||
- twenty-seventh
|
||||
- twenty-eighth
|
||||
- twenty-ninth
|
||||
- thirtieth
|
||||
- thirty-first
|
8
.styles/Microsoft/DateOrder.yml
Normal file
@ -0,0 +1,8 @@
|
||||
extends: existence
|
||||
message: "Always spell out the name of the month."
|
||||
link: https://docs.microsoft.com/en-us/style-guide/numbers#numbers-in-dates
|
||||
ignorecase: true
|
||||
level: error
|
||||
nonword: true
|
||||
tokens:
|
||||
- '\b\d{1,2}/\d{1,2}/(?:\d{4}|\d{2})\b'
|
9
.styles/Microsoft/Ellipses.yml
Normal file
@ -0,0 +1,9 @@
|
||||
extends: existence
|
||||
message: "In general, don't use an ellipsis."
|
||||
link: https://docs.microsoft.com/en-us/style-guide/punctuation/ellipses
|
||||
nonword: true
|
||||
level: warning
|
||||
action:
|
||||
name: remove
|
||||
tokens:
|
||||
- '\.\.\.'
|
16
.styles/Microsoft/FirstPerson.yml
Normal file
@ -0,0 +1,16 @@
|
||||
extends: existence
|
||||
message: "Use first person (such as '%s') sparingly."
|
||||
link: https://docs.microsoft.com/en-us/style-guide/grammar/person
|
||||
ignorecase: true
|
||||
level: warning
|
||||
nonword: true
|
||||
tokens:
|
||||
- (?:^|\s)I(?=\s)
|
||||
- (?:^|\s)I(?=,\s)
|
||||
- \bI'd\b
|
||||
- \bI'll\b
|
||||
- \bI'm\b
|
||||
- \bI've\b
|
||||
- \bme\b
|
||||
- \bmy\b
|
||||
- \bmine\b
|
13
.styles/Microsoft/Foreign.yml
Normal file
@ -0,0 +1,13 @@
|
||||
extends: substitution
|
||||
message: "Use '%s' instead of '%s'."
|
||||
link: https://docs.microsoft.com/en-us/style-guide/word-choice/use-us-spelling-avoid-non-english-words
|
||||
ignorecase: true
|
||||
level: error
|
||||
nonword: true
|
||||
action:
|
||||
name: replace
|
||||
swap:
|
||||
'\b(?:eg|e\.g\.)[\s,]': for example
|
||||
'\b(?:ie|i\.e\.)[\s,]': that is
|
||||
'\b(?:viz\.)[\s,]': namely
|
||||
'\b(?:ergo)[\s,]': therefore
|
8
.styles/Microsoft/Gender.yml
Normal file
@ -0,0 +1,8 @@
|
||||
extends: existence
|
||||
message: "Don't use '%s'."
|
||||
link: https://github.com/MicrosoftDocs/microsoft-style-guide/blob/master/styleguide/grammar/nouns-pronouns.md#pronouns-and-gender
|
||||
level: error
|
||||
ignorecase: true
|
||||
tokens:
|
||||
- he/she
|
||||
- s/he
|
42
.styles/Microsoft/GenderBias.yml
Normal file
@ -0,0 +1,42 @@
|
||||
extends: substitution
|
||||
message: "Consider using '%s' instead of '%s'."
|
||||
ignorecase: true
|
||||
level: error
|
||||
action:
|
||||
name: replace
|
||||
swap:
|
||||
(?:alumna|alumnus): graduate
|
||||
(?:alumnae|alumni): graduates
|
||||
air(?:m[ae]n|wom[ae]n): pilot(s)
|
||||
anchor(?:m[ae]n|wom[ae]n): anchor(s)
|
||||
authoress: author
|
||||
camera(?:m[ae]n|wom[ae]n): camera operator(s)
|
||||
door(?:m[ae]|wom[ae]n): concierge(s)
|
||||
draft(?:m[ae]n|wom[ae]n): drafter(s)
|
||||
fire(?:m[ae]n|wom[ae]n): firefighter(s)
|
||||
fisher(?:m[ae]n|wom[ae]n): fisher(s)
|
||||
fresh(?:m[ae]n|wom[ae]n): first-year student(s)
|
||||
garbage(?:m[ae]n|wom[ae]n): waste collector(s)
|
||||
lady lawyer: lawyer
|
||||
ladylike: courteous
|
||||
mail(?:m[ae]n|wom[ae]n): mail carriers
|
||||
man and wife: husband and wife
|
||||
man enough: strong enough
|
||||
mankind: human kind
|
||||
manmade: manufactured
|
||||
manpower: personnel
|
||||
middle(?:m[ae]n|wom[ae]n): intermediary
|
||||
news(?:m[ae]n|wom[ae]n): journalist(s)
|
||||
ombuds(?:man|woman): ombuds
|
||||
oneupmanship: upstaging
|
||||
poetess: poet
|
||||
police(?:m[ae]n|wom[ae]n): police officer(s)
|
||||
repair(?:m[ae]n|wom[ae]n): technician(s)
|
||||
sales(?:m[ae]n|wom[ae]n): salesperson or sales people
|
||||
service(?:m[ae]n|wom[ae]n): soldier(s)
|
||||
steward(?:ess)?: flight attendant
|
||||
tribes(?:m[ae]n|wom[ae]n): tribe member(s)
|
||||
waitress: waiter
|
||||
woman doctor: doctor
|
||||
woman scientist[s]?: scientist(s)
|
||||
work(?:m[ae]n|wom[ae]n): worker(s)
|
11
.styles/Microsoft/GeneralURL.yml
Normal file
@ -0,0 +1,11 @@
|
||||
extends: existence
|
||||
message: "For a general audience, use 'address' rather than 'URL'."
|
||||
link: https://docs.microsoft.com/en-us/style-guide/urls-web-addresses
|
||||
level: warning
|
||||
action:
|
||||
name: replace
|
||||
params:
|
||||
- URL
|
||||
- address
|
||||
tokens:
|
||||
- URL
|
7
.styles/Microsoft/HeadingAcronyms.yml
Normal file
@ -0,0 +1,7 @@
|
||||
extends: existence
|
||||
message: "Avoid using acronyms in a title or heading."
|
||||
link: https://docs.microsoft.com/en-us/style-guide/acronyms#be-careful-with-acronyms-in-titles-and-headings
|
||||
level: warning
|
||||
scope: heading
|
||||
tokens:
|
||||
- '[A-Z]{2,4}'
|
8
.styles/Microsoft/HeadingColons.yml
Normal file
@ -0,0 +1,8 @@
|
||||
extends: existence
|
||||
message: "Capitalize '%s'."
|
||||
link: https://docs.microsoft.com/en-us/style-guide/punctuation/colons
|
||||
nonword: true
|
||||
level: error
|
||||
scope: heading
|
||||
tokens:
|
||||
- ':\s[a-z]'
|
13
.styles/Microsoft/HeadingPunctuation.yml
Normal file
@ -0,0 +1,13 @@
|
||||
extends: existence
|
||||
message: "Don't use end punctuation in headings."
|
||||
link: https://docs.microsoft.com/en-us/style-guide/punctuation/periods
|
||||
nonword: true
|
||||
level: warning
|
||||
scope: heading
|
||||
action:
|
||||
name: edit
|
||||
params:
|
||||
- trim_right
|
||||
- ".?!"
|
||||
tokens:
|
||||
- "[a-z][.?!]$"
|
28
.styles/Microsoft/Headings.yml
Normal file
@ -0,0 +1,28 @@
|
||||
extends: capitalization
|
||||
message: "'%s' should use sentence-style capitalization."
|
||||
link: https://docs.microsoft.com/en-us/style-guide/capitalization
|
||||
level: suggestion
|
||||
scope: heading
|
||||
match: $sentence
|
||||
indicators:
|
||||
- ':'
|
||||
exceptions:
|
||||
- Azure
|
||||
- CLI
|
||||
- Code
|
||||
- Cosmos
|
||||
- Docker
|
||||
- Emmet
|
||||
- I
|
||||
- Kubernetes
|
||||
- Linux
|
||||
- macOS
|
||||
- Marketplace
|
||||
- MongoDB
|
||||
- REPL
|
||||
- Studio
|
||||
- TypeScript
|
||||
- URLs
|
||||
- Visual
|
||||
- VS
|
||||
- Windows
|
14
.styles/Microsoft/Hyphens.yml
Normal file
@ -0,0 +1,14 @@
|
||||
extends: existence
|
||||
message: "'%s' doesn't need a hyphen."
|
||||
link: https://docs.microsoft.com/en-us/style-guide/punctuation/dashes-hyphens/hyphens
|
||||
level: warning
|
||||
ignorecase: false
|
||||
nonword: true
|
||||
action:
|
||||
name: edit
|
||||
params:
|
||||
- regex
|
||||
- "-"
|
||||
- " "
|
||||
tokens:
|
||||
- '\b[^\s-]+ly-\w+\b'
|
13
.styles/Microsoft/Negative.yml
Normal file
@ -0,0 +1,13 @@
|
||||
extends: existence
|
||||
message: "Form a negative number with an en dash, not a hyphen."
|
||||
link: https://docs.microsoft.com/en-us/style-guide/numbers
|
||||
nonword: true
|
||||
level: error
|
||||
action:
|
||||
name: edit
|
||||
params:
|
||||
- regex
|
||||
- "-"
|
||||
- "–"
|
||||
tokens:
|
||||
- '(?<=\s)-\d+(?:\.\d+)?\b'
|
13
.styles/Microsoft/Ordinal.yml
Normal file
@ -0,0 +1,13 @@
|
||||
extends: existence
|
||||
message: "Don't add -ly to an ordinal number."
|
||||
link: https://docs.microsoft.com/en-us/style-guide/numbers
|
||||
level: error
|
||||
action:
|
||||
name: edit
|
||||
params:
|
||||
- trim
|
||||
- ly
|
||||
tokens:
|
||||
- firstly
|
||||
- secondly
|
||||
- thirdly
|
8
.styles/Microsoft/OxfordComma.yml
Normal file
@ -0,0 +1,8 @@
|
||||
extends: existence
|
||||
message: "Use the Oxford comma in '%s'."
|
||||
link: https://docs.microsoft.com/en-us/style-guide/punctuation/commas
|
||||
scope: sentence
|
||||
level: suggestion
|
||||
nonword: true
|
||||
tokens:
|
||||
- '(?:[^\s,]+,){1,} \w+ (?:and|or) \w+[.?!]'
|
183
.styles/Microsoft/Passive.yml
Normal file
@ -0,0 +1,183 @@
|
||||
extends: existence
|
||||
message: "'%s' looks like passive voice."
|
||||
ignorecase: true
|
||||
level: suggestion
|
||||
raw:
|
||||
- \b(am|are|were|being|is|been|was|be)\b\s*
|
||||
tokens:
|
||||
- '[\w]+ed'
|
||||
- awoken
|
||||
- beat
|
||||
- become
|
||||
- been
|
||||
- begun
|
||||
- bent
|
||||
- beset
|
||||
- bet
|
||||
- bid
|
||||
- bidden
|
||||
- bitten
|
||||
- bled
|
||||
- blown
|
||||
- born
|
||||
- bought
|
||||
- bound
|
||||
- bred
|
||||
- broadcast
|
||||
- broken
|
||||
- brought
|
||||
- built
|
||||
- burnt
|
||||
- burst
|
||||
- cast
|
||||
- caught
|
||||
- chosen
|
||||
- clung
|
||||
- come
|
||||
- cost
|
||||
- crept
|
||||
- cut
|
||||
- dealt
|
||||
- dived
|
||||
- done
|
||||
- drawn
|
||||
- dreamt
|
||||
- driven
|
||||
- drunk
|
||||
- dug
|
||||
- eaten
|
||||
- fallen
|
||||
- fed
|
||||
- felt
|
||||
- fit
|
||||
- fled
|
||||
- flown
|
||||
- flung
|
||||
- forbidden
|
||||
- foregone
|
||||
- forgiven
|
||||
- forgotten
|
||||
- forsaken
|
||||
- fought
|
||||
- found
|
||||
- frozen
|
||||
- given
|
||||
- gone
|
||||
- gotten
|
||||
- ground
|
||||
- grown
|
||||
- heard
|
||||
- held
|
||||
- hidden
|
||||
- hit
|
||||
- hung
|
||||
- hurt
|
||||
- kept
|
||||
- knelt
|
||||
- knit
|
||||
- known
|
||||
- laid
|
||||
- lain
|
||||
- leapt
|
||||
- learnt
|
||||
- led
|
||||
- left
|
||||
- lent
|
||||
- let
|
||||
- lighted
|
||||
- lost
|
||||
- made
|
||||
- meant
|
||||
- met
|
||||
- misspelt
|
||||
- mistaken
|
||||
- mown
|
||||
- overcome
|
||||
- overdone
|
||||
- overtaken
|
||||
- overthrown
|
||||
- paid
|
||||
- pled
|
||||
- proven
|
||||
- put
|
||||
- quit
|
||||
- read
|
||||
- rid
|
||||
- ridden
|
||||
- risen
|
||||
- run
|
||||
- rung
|
||||
- said
|
||||
- sat
|
||||
- sawn
|
||||
- seen
|
||||
- sent
|
||||
- set
|
||||
- sewn
|
||||
- shaken
|
||||
- shaven
|
||||
- shed
|
||||
- shod
|
||||
- shone
|
||||
- shorn
|
||||
- shot
|
||||
- shown
|
||||
- shrunk
|
||||
- shut
|
||||
- slain
|
||||
- slept
|
||||
- slid
|
||||
- slit
|
||||
- slung
|
||||
- smitten
|
||||
- sold
|
||||
- sought
|
||||
- sown
|
||||
- sped
|
||||
- spent
|
||||
- spilt
|
||||
- spit
|
||||
- split
|
||||
- spoken
|
||||
- spread
|
||||
- sprung
|
||||
- spun
|
||||
- stolen
|
||||
- stood
|
||||
- stridden
|
||||
- striven
|
||||
- struck
|
||||
- strung
|
||||
- stuck
|
||||
- stung
|
||||
- stunk
|
||||
- sung
|
||||
- sunk
|
||||
- swept
|
||||
- swollen
|
||||
- sworn
|
||||
- swum
|
||||
- swung
|
||||
- taken
|
||||
- taught
|
||||
- thought
|
||||
- thrived
|
||||
- thrown
|
||||
- thrust
|
||||
- told
|
||||
- torn
|
||||
- trodden
|
||||
- understood
|
||||
- upheld
|
||||
- upset
|
||||
- wed
|
||||
- wept
|
||||
- withheld
|
||||
- withstood
|
||||
- woken
|
||||
- won
|
||||
- worn
|
||||
- wound
|
||||
- woven
|
||||
- written
|
||||
- wrung
|
7
.styles/Microsoft/Percentages.yml
Normal file
@ -0,0 +1,7 @@
|
||||
extends: existence
|
||||
message: "Use a numeral plus the units."
|
||||
link: https://docs.microsoft.com/en-us/style-guide/numbers
|
||||
nonword: true
|
||||
level: error
|
||||
tokens:
|
||||
- '\b[a-zA-z]+\spercent\b'
|
7
.styles/Microsoft/Plurals.yml
Normal file
@ -0,0 +1,7 @@
|
||||
extends: existence
|
||||
message: "Don't add '%s' to a singular noun. Use plural instead."
|
||||
ignorecase: true
|
||||
level: error
|
||||
link: https://learn.microsoft.com/en-us/style-guide/a-z-word-list-term-collections/s/s-es
|
||||
raw:
|
||||
- '\(s\)|\(es\)'
|
7
.styles/Microsoft/Quotes.yml
Normal file
@ -0,0 +1,7 @@
|
||||
extends: existence
|
||||
message: 'Punctuation should be inside the quotes.'
|
||||
link: https://docs.microsoft.com/en-us/style-guide/punctuation/quotation-marks
|
||||
level: error
|
||||
nonword: true
|
||||
tokens:
|
||||
- '["“][^"”“]+["”][.,]'
|
13
.styles/Microsoft/RangeTime.yml
Normal file
@ -0,0 +1,13 @@
|
||||
extends: existence
|
||||
message: "Use 'to' instead of a dash in '%s'."
|
||||
link: https://docs.microsoft.com/en-us/style-guide/numbers
|
||||
nonword: true
|
||||
level: error
|
||||
action:
|
||||
name: edit
|
||||
params:
|
||||
- regex
|
||||
- "[-–]"
|
||||
- "to"
|
||||
tokens:
|
||||
- '\b(?:AM|PM)\s?[-–]\s?.+(?:AM|PM)\b'
|
8
.styles/Microsoft/Semicolon.yml
Normal file
@ -0,0 +1,8 @@
|
||||
extends: existence
|
||||
message: "Try to simplify this sentence."
|
||||
link: https://docs.microsoft.com/en-us/style-guide/punctuation/semicolons
|
||||
nonword: true
|
||||
scope: sentence
|
||||
level: suggestion
|
||||
tokens:
|
||||
- ';'
|
7
.styles/Microsoft/SentenceLength.yml
Normal file
@ -0,0 +1,7 @@
|
||||
extends: occurrence
|
||||
message: "Try to keep sentences short (< 30 words)."
|
||||
scope: sentence
|
||||
level: suggestion
|
||||
max: 30
|
||||
token: \b(\w+)\b
|
||||
|
8
.styles/Microsoft/Spacing.yml
Normal file
@ -0,0 +1,8 @@
|
||||
extends: existence
|
||||
message: "'%s' should have one space."
|
||||
link: https://docs.microsoft.com/en-us/style-guide/punctuation/periods
|
||||
level: error
|
||||
nonword: true
|
||||
tokens:
|
||||
- '[a-z][.?!] {2,}[A-Z]'
|
||||
- '[a-z][.?!][A-Z]'
|
7
.styles/Microsoft/Suspended.yml
Normal file
@ -0,0 +1,7 @@
|
||||
extends: existence
|
||||
message: "Don't use '%s' unless space is limited."
|
||||
link: https://docs.microsoft.com/en-us/style-guide/punctuation/dashes-hyphens/hyphens
|
||||
ignorecase: true
|
||||
level: warning
|
||||
tokens:
|
||||
- '\w+- and \w+-'
|
42
.styles/Microsoft/Terms.yml
Normal file
@ -0,0 +1,42 @@
|
||||
extends: substitution
|
||||
message: "Prefer '%s' over '%s'."
|
||||
# term preference should be based on microsoft style guide, such as
|
||||
link: https://learn.microsoft.com/en-us/style-guide/a-z-word-list-term-collections/a/adapter
|
||||
level: warning
|
||||
ignorecase: true
|
||||
action:
|
||||
name: replace
|
||||
swap:
|
||||
"(?:agent|virtual assistant|intelligent personal assistant)": personal digital assistant
|
||||
"(?:assembler|machine language)": assembly language
|
||||
"(?:drive C:|drive C>|C: drive)": drive C
|
||||
"(?:internet bot|web robot)s?": bot(s)
|
||||
"(?:microsoft cloud|the cloud)": cloud
|
||||
"(?:mobile|smart) ?phone": phone
|
||||
"24/7": every day
|
||||
"audio(?:-| )book": audiobook
|
||||
"back(?:-| )light": backlight
|
||||
"chat ?bots?": chatbot(s)
|
||||
adaptor: adapter
|
||||
administrate: administer
|
||||
afterwards: afterward
|
||||
alphabetic: alphabetical
|
||||
alphanumerical: alphanumeric
|
||||
an URL: a URL
|
||||
anti-aliasing: antialiasing
|
||||
anti-malware: antimalware
|
||||
anti-spyware: antispyware
|
||||
anti-virus: antivirus
|
||||
appendixes: appendices
|
||||
artificial intelligence: AI
|
||||
caap: CaaP
|
||||
conversation-as-a-platform: conversation as a platform
|
||||
eb: EB
|
||||
gb: GB
|
||||
gbps: Gbps
|
||||
kb: KB
|
||||
keypress: keystroke
|
||||
mb: MB
|
||||
pb: PB
|
||||
tb: TB
|
||||
zb: ZB
|
9
.styles/Microsoft/URLFormat.yml
Normal file
@ -0,0 +1,9 @@
|
||||
extends: substitution
|
||||
message: Use 'of' (not 'for') to describe the relationship of the word URL to a resource.
|
||||
ignorecase: true
|
||||
link: https://learn.microsoft.com/en-us/style-guide/a-z-word-list-term-collections/u/url
|
||||
level: suggestion
|
||||
action:
|
||||
name: replace
|
||||
swap:
|
||||
URL for: URL of
|
16
.styles/Microsoft/Units.yml
Normal file
@ -0,0 +1,16 @@
|
||||
extends: existence
|
||||
message: "Don't spell out the number in '%s'."
|
||||
link: https://docs.microsoft.com/en-us/style-guide/a-z-word-list-term-collections/term-collections/units-of-measure-terms
|
||||
level: error
|
||||
raw:
|
||||
- '[a-zA-Z]+\s'
|
||||
tokens:
|
||||
- '(?:centi|milli)?meters'
|
||||
- '(?:kilo)?grams'
|
||||
- '(?:kilo)?meters'
|
||||
- '(?:mega)?pixels'
|
||||
- cm
|
||||
- inches
|
||||
- lb
|
||||
- miles
|
||||
- pounds
|
25
.styles/Microsoft/Vocab.yml
Normal file
@ -0,0 +1,25 @@
|
||||
extends: existence
|
||||
message: "Verify your use of '%s' with the A-Z word list."
|
||||
link: 'https://docs.microsoft.com/en-us/style-guide'
|
||||
level: suggestion
|
||||
ignorecase: true
|
||||
tokens:
|
||||
- above
|
||||
- accessible
|
||||
- actionable
|
||||
- against
|
||||
- alarm
|
||||
- alert
|
||||
- alias
|
||||
- allows?
|
||||
- and/or
|
||||
- as well as
|
||||
- assure
|
||||
- author
|
||||
- avg
|
||||
- beta
|
||||
- ensure
|
||||
- he
|
||||
- insure
|
||||
- sample
|
||||
- she
|
11
.styles/Microsoft/We.yml
Normal file
@ -0,0 +1,11 @@
|
||||
extends: existence
|
||||
message: "Try to avoid using first-person plural like '%s'."
|
||||
link: https://docs.microsoft.com/en-us/style-guide/grammar/person#avoid-first-person-plural
|
||||
level: warning
|
||||
ignorecase: true
|
||||
tokens:
|
||||
- we
|
||||
- we'(?:ve|re)
|
||||
- ours?
|
||||
- us
|
||||
- let's
|
127
.styles/Microsoft/Wordiness.yml
Normal file
@ -0,0 +1,127 @@
|
||||
extends: substitution
|
||||
message: "Consider using '%s' instead of '%s'."
|
||||
link: https://docs.microsoft.com/en-us/style-guide/word-choice/use-simple-words-concise-sentences
|
||||
ignorecase: true
|
||||
level: suggestion
|
||||
action:
|
||||
name: replace
|
||||
swap:
|
||||
(?:extract|take away|eliminate): remove
|
||||
(?:in order to|as a means to): to
|
||||
(?:inform|let me know): tell
|
||||
(?:previous|prior) to: before
|
||||
(?:utilize|make use of): use
|
||||
a (?:large)? majority of: most
|
||||
a (?:large)? number of: many
|
||||
a myriad of: myriad
|
||||
adversely impact: hurt
|
||||
all across: across
|
||||
all of (?!a sudden|these): all
|
||||
all of a sudden: suddenly
|
||||
all of these: these
|
||||
all-time record: record
|
||||
almost all: most
|
||||
almost never: seldom
|
||||
along the lines of: similar to
|
||||
an adequate number of: enough
|
||||
an appreciable number of: many
|
||||
an estimated: about
|
||||
any and all: all
|
||||
are in agreement: agree
|
||||
as a matter of fact: in fact
|
||||
as a means of: to
|
||||
as a result of: because of
|
||||
as of yet: yet
|
||||
as per: per
|
||||
at a later date: later
|
||||
at all times: always
|
||||
at the present time: now
|
||||
at this point in time: at this point
|
||||
based in large part on: based on
|
||||
based on the fact that: because
|
||||
basic necessity: necessity
|
||||
because of the fact that: because
|
||||
came to a realization: realized
|
||||
came to an abrupt end: ended abruptly
|
||||
carry out an evaluation of: evaluate
|
||||
close down: close
|
||||
closed down: closed
|
||||
complete stranger: stranger
|
||||
completely separate: separate
|
||||
concerning the matter of: regarding
|
||||
conduct a review of: review
|
||||
conduct an investigation: investigate
|
||||
conduct experiments: experiment
|
||||
continue on: continue
|
||||
despite the fact that: although
|
||||
disappear from sight: disappear
|
||||
doomed to fail: doomed
|
||||
drag and drop: drag
|
||||
drag-and-drop: drag
|
||||
due to the fact that: because
|
||||
during the period of: during
|
||||
during the time that: while
|
||||
emergency situation: emergency
|
||||
establish connectivity: connect
|
||||
except when: unless
|
||||
excessive number: too many
|
||||
extend an invitation: invite
|
||||
fall down: fall
|
||||
fell down: fell
|
||||
for the duration of: during
|
||||
gather together: gather
|
||||
has the ability to: can
|
||||
has the capacity to: can
|
||||
has the opportunity to: could
|
||||
hold a meeting: meet
|
||||
if this is not the case: if not
|
||||
in a careful manner: carefully
|
||||
in a thoughtful manner: thoughtfully
|
||||
in a timely manner: timely
|
||||
in addition: also
|
||||
in an effort to: to
|
||||
in between: between
|
||||
in lieu of: instead of
|
||||
in many cases: often
|
||||
in most cases: usually
|
||||
in order to: to
|
||||
in some cases: sometimes
|
||||
in spite of the fact that: although
|
||||
in spite of: despite
|
||||
in the (?:very)? near future: soon
|
||||
in the event that: if
|
||||
in the neighborhood of: roughly
|
||||
in the vicinity of: close to
|
||||
it would appear that: apparently
|
||||
lift up: lift
|
||||
made reference to: referred to
|
||||
make reference to: refer to
|
||||
mix together: mix
|
||||
none at all: none
|
||||
not in a position to: unable
|
||||
not possible: impossible
|
||||
of major importance: important
|
||||
perform an assessment of: assess
|
||||
pertaining to: about
|
||||
place an order: order
|
||||
plays a key role in: is essential to
|
||||
present time: now
|
||||
readily apparent: apparent
|
||||
some of the: some
|
||||
span across: span
|
||||
subsequent to: after
|
||||
successfully complete: complete
|
||||
sufficient number (?:of)?: enough
|
||||
take action: act
|
||||
take into account: consider
|
||||
the question as to whether: whether
|
||||
there is no doubt but that: doubtless
|
||||
this day and age: this age
|
||||
this is a subject that: this subject
|
||||
time (?:frame|period): time
|
||||
under the provisions of: under
|
||||
until such time as: until
|
||||
used for fuel purposes: used for fuel
|
||||
whether or not: whether
|
||||
with regard to: regarding
|
||||
with the exception of: except for
|
4
.styles/Microsoft/meta.json
Normal file
@ -0,0 +1,4 @@
|
||||
{
|
||||
"feed": "https://github.com/errata-ai/Microsoft/releases.atom",
|
||||
"vale_version": ">=1.0.0"
|
||||
}
|
702
.styles/write-good/Cliches.yml
Normal file
@ -0,0 +1,702 @@
|
||||
extends: existence
|
||||
message: "Try to avoid using clichés like '%s'."
|
||||
ignorecase: true
|
||||
level: warning
|
||||
tokens:
|
||||
- a chip off the old block
|
||||
- a clean slate
|
||||
- a dark and stormy night
|
||||
- a far cry
|
||||
- a fine kettle of fish
|
||||
- a loose cannon
|
||||
- a penny saved is a penny earned
|
||||
- a tough row to hoe
|
||||
- a word to the wise
|
||||
- ace in the hole
|
||||
- acid test
|
||||
- add insult to injury
|
||||
- against all odds
|
||||
- air your dirty laundry
|
||||
- all fun and games
|
||||
- all in a day's work
|
||||
- all talk, no action
|
||||
- all thumbs
|
||||
- all your eggs in one basket
|
||||
- all's fair in love and war
|
||||
- all's well that ends well
|
||||
- almighty dollar
|
||||
- American as apple pie
|
||||
- an axe to grind
|
||||
- another day, another dollar
|
||||
- armed to the teeth
|
||||
- as luck would have it
|
||||
- as old as time
|
||||
- as the crow flies
|
||||
- at loose ends
|
||||
- at my wits end
|
||||
- avoid like the plague
|
||||
- babe in the woods
|
||||
- back against the wall
|
||||
- back in the saddle
|
||||
- back to square one
|
||||
- back to the drawing board
|
||||
- bad to the bone
|
||||
- badge of honor
|
||||
- bald faced liar
|
||||
- ballpark figure
|
||||
- banging your head against a brick wall
|
||||
- baptism by fire
|
||||
- barking up the wrong tree
|
||||
- bat out of hell
|
||||
- be all and end all
|
||||
- beat a dead horse
|
||||
- beat around the bush
|
||||
- been there, done that
|
||||
- beggars can't be choosers
|
||||
- behind the eight ball
|
||||
- bend over backwards
|
||||
- benefit of the doubt
|
||||
- bent out of shape
|
||||
- best thing since sliced bread
|
||||
- bet your bottom dollar
|
||||
- better half
|
||||
- better late than never
|
||||
- better mousetrap
|
||||
- better safe than sorry
|
||||
- between a rock and a hard place
|
||||
- beyond the pale
|
||||
- bide your time
|
||||
- big as life
|
||||
- big cheese
|
||||
- big fish in a small pond
|
||||
- big man on campus
|
||||
- bigger they are the harder they fall
|
||||
- bird in the hand
|
||||
- bird's eye view
|
||||
- birds and the bees
|
||||
- birds of a feather flock together
|
||||
- bit the hand that feeds you
|
||||
- bite the bullet
|
||||
- bite the dust
|
||||
- bitten off more than he can chew
|
||||
- black as coal
|
||||
- black as pitch
|
||||
- black as the ace of spades
|
||||
- blast from the past
|
||||
- bleeding heart
|
||||
- blessing in disguise
|
||||
- blind ambition
|
||||
- blind as a bat
|
||||
- blind leading the blind
|
||||
- blood is thicker than water
|
||||
- blood sweat and tears
|
||||
- blow off steam
|
||||
- blow your own horn
|
||||
- blushing bride
|
||||
- boils down to
|
||||
- bolt from the blue
|
||||
- bone to pick
|
||||
- bored stiff
|
||||
- bored to tears
|
||||
- bottomless pit
|
||||
- boys will be boys
|
||||
- bright and early
|
||||
- brings home the bacon
|
||||
- broad across the beam
|
||||
- broken record
|
||||
- brought back to reality
|
||||
- bull by the horns
|
||||
- bull in a china shop
|
||||
- burn the midnight oil
|
||||
- burning question
|
||||
- burning the candle at both ends
|
||||
- burst your bubble
|
||||
- bury the hatchet
|
||||
- busy as a bee
|
||||
- by hook or by crook
|
||||
- call a spade a spade
|
||||
- called onto the carpet
|
||||
- calm before the storm
|
||||
- can of worms
|
||||
- can't cut the mustard
|
||||
- can't hold a candle to
|
||||
- case of mistaken identity
|
||||
- cat got your tongue
|
||||
- cat's meow
|
||||
- caught in the crossfire
|
||||
- caught red-handed
|
||||
- checkered past
|
||||
- chomping at the bit
|
||||
- cleanliness is next to godliness
|
||||
- clear as a bell
|
||||
- clear as mud
|
||||
- close to the vest
|
||||
- cock and bull story
|
||||
- cold shoulder
|
||||
- come hell or high water
|
||||
- cool as a cucumber
|
||||
- cool, calm, and collected
|
||||
- cost a king's ransom
|
||||
- count your blessings
|
||||
- crack of dawn
|
||||
- crash course
|
||||
- creature comforts
|
||||
- cross that bridge when you come to it
|
||||
- crushing blow
|
||||
- cry like a baby
|
||||
- cry me a river
|
||||
- cry over spilt milk
|
||||
- crystal clear
|
||||
- curiosity killed the cat
|
||||
- cut and dried
|
||||
- cut through the red tape
|
||||
- cut to the chase
|
||||
- cute as a bugs ear
|
||||
- cute as a button
|
||||
- cute as a puppy
|
||||
- cuts to the quick
|
||||
- dark before the dawn
|
||||
- day in, day out
|
||||
- dead as a doornail
|
||||
- devil is in the details
|
||||
- dime a dozen
|
||||
- divide and conquer
|
||||
- dog and pony show
|
||||
- dog days
|
||||
- dog eat dog
|
||||
- dog tired
|
||||
- don't burn your bridges
|
||||
- don't count your chickens
|
||||
- don't look a gift horse in the mouth
|
||||
- don't rock the boat
|
||||
- don't step on anyone's toes
|
||||
- don't take any wooden nickels
|
||||
- down and out
|
||||
- down at the heels
|
||||
- down in the dumps
|
||||
- down the hatch
|
||||
- down to earth
|
||||
- draw the line
|
||||
- dressed to kill
|
||||
- dressed to the nines
|
||||
- drives me up the wall
|
||||
- dull as dishwater
|
||||
- dyed in the wool
|
||||
- eagle eye
|
||||
- ear to the ground
|
||||
- early bird catches the worm
|
||||
- easier said than done
|
||||
- easy as pie
|
||||
- eat your heart out
|
||||
- eat your words
|
||||
- eleventh hour
|
||||
- even the playing field
|
||||
- every dog has its day
|
||||
- every fiber of my being
|
||||
- everything but the kitchen sink
|
||||
- eye for an eye
|
||||
- face the music
|
||||
- facts of life
|
||||
- fair weather friend
|
||||
- fall by the wayside
|
||||
- fan the flames
|
||||
- feast or famine
|
||||
- feather your nest
|
||||
- feathered friends
|
||||
- few and far between
|
||||
- fifteen minutes of fame
|
||||
- filthy vermin
|
||||
- fine kettle of fish
|
||||
- fish out of water
|
||||
- fishing for a compliment
|
||||
- fit as a fiddle
|
||||
- fit the bill
|
||||
- fit to be tied
|
||||
- flash in the pan
|
||||
- flat as a pancake
|
||||
- flip your lid
|
||||
- flog a dead horse
|
||||
- fly by night
|
||||
- fly the coop
|
||||
- follow your heart
|
||||
- for all intents and purposes
|
||||
- for the birds
|
||||
- for what it's worth
|
||||
- force of nature
|
||||
- force to be reckoned with
|
||||
- forgive and forget
|
||||
- fox in the henhouse
|
||||
- free and easy
|
||||
- free as a bird
|
||||
- fresh as a daisy
|
||||
- full steam ahead
|
||||
- fun in the sun
|
||||
- garbage in, garbage out
|
||||
- gentle as a lamb
|
||||
- get a kick out of
|
||||
- get a leg up
|
||||
- get down and dirty
|
||||
- get the lead out
|
||||
- get to the bottom of
|
||||
- get your feet wet
|
||||
- gets my goat
|
||||
- gilding the lily
|
||||
- give and take
|
||||
- go against the grain
|
||||
- go at it tooth and nail
|
||||
- go for broke
|
||||
- go him one better
|
||||
- go the extra mile
|
||||
- go with the flow
|
||||
- goes without saying
|
||||
- good as gold
|
||||
- good deed for the day
|
||||
- good things come to those who wait
|
||||
- good time was had by all
|
||||
- good times were had by all
|
||||
- greased lightning
|
||||
- greek to me
|
||||
- green thumb
|
||||
- green-eyed monster
|
||||
- grist for the mill
|
||||
- growing like a weed
|
||||
- hair of the dog
|
||||
- hand to mouth
|
||||
- happy as a clam
|
||||
- happy as a lark
|
||||
- hasn't a clue
|
||||
- have a nice day
|
||||
- have high hopes
|
||||
- have the last laugh
|
||||
- haven't got a row to hoe
|
||||
- head honcho
|
||||
- head over heels
|
||||
- hear a pin drop
|
||||
- heard it through the grapevine
|
||||
- heart's content
|
||||
- heavy as lead
|
||||
- hem and haw
|
||||
- high and dry
|
||||
- high and mighty
|
||||
- high as a kite
|
||||
- hit paydirt
|
||||
- hold your head up high
|
||||
- hold your horses
|
||||
- hold your own
|
||||
- hold your tongue
|
||||
- honest as the day is long
|
||||
- horns of a dilemma
|
||||
- horse of a different color
|
||||
- hot under the collar
|
||||
- hour of need
|
||||
- I beg to differ
|
||||
- icing on the cake
|
||||
- if the shoe fits
|
||||
- if the shoe were on the other foot
|
||||
- in a jam
|
||||
- in a jiffy
|
||||
- in a nutshell
|
||||
- in a pig's eye
|
||||
- in a pinch
|
||||
- in a word
|
||||
- in hot water
|
||||
- in the gutter
|
||||
- in the nick of time
|
||||
- in the thick of it
|
||||
- in your dreams
|
||||
- it ain't over till the fat lady sings
|
||||
- it goes without saying
|
||||
- it takes all kinds
|
||||
- it takes one to know one
|
||||
- it's a small world
|
||||
- it's only a matter of time
|
||||
- ivory tower
|
||||
- Jack of all trades
|
||||
- jockey for position
|
||||
- jog your memory
|
||||
- joined at the hip
|
||||
- judge a book by its cover
|
||||
- jump down your throat
|
||||
- jump in with both feet
|
||||
- jump on the bandwagon
|
||||
- jump the gun
|
||||
- jump to conclusions
|
||||
- just a hop, skip, and a jump
|
||||
- just the ticket
|
||||
- justice is blind
|
||||
- keep a stiff upper lip
|
||||
- keep an eye on
|
||||
- keep it simple, stupid
|
||||
- keep the home fires burning
|
||||
- keep up with the Joneses
|
||||
- keep your chin up
|
||||
- keep your fingers crossed
|
||||
- kick the bucket
|
||||
- kick up your heels
|
||||
- kick your feet up
|
||||
- kid in a candy store
|
||||
- kill two birds with one stone
|
||||
- kiss of death
|
||||
- knock it out of the park
|
||||
- knock on wood
|
||||
- knock your socks off
|
||||
- know him from Adam
|
||||
- know the ropes
|
||||
- know the score
|
||||
- knuckle down
|
||||
- knuckle sandwich
|
||||
- knuckle under
|
||||
- labor of love
|
||||
- ladder of success
|
||||
- land on your feet
|
||||
- lap of luxury
|
||||
- last but not least
|
||||
- last hurrah
|
||||
- last-ditch effort
|
||||
- law of the jungle
|
||||
- law of the land
|
||||
- lay down the law
|
||||
- leaps and bounds
|
||||
- let sleeping dogs lie
|
||||
- let the cat out of the bag
|
||||
- let the good times roll
|
||||
- let your hair down
|
||||
- let's talk turkey
|
||||
- letter perfect
|
||||
- lick your wounds
|
||||
- lies like a rug
|
||||
- life's a bitch
|
||||
- life's a grind
|
||||
- light at the end of the tunnel
|
||||
- lighter than a feather
|
||||
- lighter than air
|
||||
- like clockwork
|
||||
- like father like son
|
||||
- like taking candy from a baby
|
||||
- like there's no tomorrow
|
||||
- lion's share
|
||||
- live and learn
|
||||
- live and let live
|
||||
- long and short of it
|
||||
- long lost love
|
||||
- look before you leap
|
||||
- look down your nose
|
||||
- look what the cat dragged in
|
||||
- looking a gift horse in the mouth
|
||||
- looks like death warmed over
|
||||
- loose cannon
|
||||
- lose your head
|
||||
- lose your temper
|
||||
- loud as a horn
|
||||
- lounge lizard
|
||||
- loved and lost
|
||||
- low man on the totem pole
|
||||
- luck of the draw
|
||||
- luck of the Irish
|
||||
- make hay while the sun shines
|
||||
- make money hand over fist
|
||||
- make my day
|
||||
- make the best of a bad situation
|
||||
- make the best of it
|
||||
- make your blood boil
|
||||
- man of few words
|
||||
- man's best friend
|
||||
- mark my words
|
||||
- meaningful dialogue
|
||||
- missed the boat on that one
|
||||
- moment in the sun
|
||||
- moment of glory
|
||||
- moment of truth
|
||||
- money to burn
|
||||
- more power to you
|
||||
- more than one way to skin a cat
|
||||
- movers and shakers
|
||||
- moving experience
|
||||
- naked as a jaybird
|
||||
- naked truth
|
||||
- neat as a pin
|
||||
- needle in a haystack
|
||||
- needless to say
|
||||
- neither here nor there
|
||||
- never look back
|
||||
- never say never
|
||||
- nip and tuck
|
||||
- nip it in the bud
|
||||
- no guts, no glory
|
||||
- no love lost
|
||||
- no pain, no gain
|
||||
- no skin off my back
|
||||
- no stone unturned
|
||||
- no time like the present
|
||||
- no use crying over spilled milk
|
||||
- nose to the grindstone
|
||||
- not a hope in hell
|
||||
- not a minute's peace
|
||||
- not in my backyard
|
||||
- not playing with a full deck
|
||||
- not the end of the world
|
||||
- not written in stone
|
||||
- nothing to sneeze at
|
||||
- nothing ventured nothing gained
|
||||
- now we're cooking
|
||||
- off the top of my head
|
||||
- off the wagon
|
||||
- off the wall
|
||||
- old hat
|
||||
- older and wiser
|
||||
- older than dirt
|
||||
- older than Methuselah
|
||||
- on a roll
|
||||
- on cloud nine
|
||||
- on pins and needles
|
||||
- on the bandwagon
|
||||
- on the money
|
||||
- on the nose
|
||||
- on the rocks
|
||||
- on the spot
|
||||
- on the tip of my tongue
|
||||
- on the wagon
|
||||
- on thin ice
|
||||
- once bitten, twice shy
|
||||
- one bad apple doesn't spoil the bushel
|
||||
- one born every minute
|
||||
- one brick short
|
||||
- one foot in the grave
|
||||
- one in a million
|
||||
- one red cent
|
||||
- only game in town
|
||||
- open a can of worms
|
||||
- open and shut case
|
||||
- open the flood gates
|
||||
- opportunity doesn't knock twice
|
||||
- out of pocket
|
||||
- out of sight, out of mind
|
||||
- out of the frying pan into the fire
|
||||
- out of the woods
|
||||
- out on a limb
|
||||
- over a barrel
|
||||
- over the hump
|
||||
- pain and suffering
|
||||
- pain in the
|
||||
- panic button
|
||||
- par for the course
|
||||
- part and parcel
|
||||
- party pooper
|
||||
- pass the buck
|
||||
- patience is a virtue
|
||||
- pay through the nose
|
||||
- penny pincher
|
||||
- perfect storm
|
||||
- pig in a poke
|
||||
- pile it on
|
||||
- pillar of the community
|
||||
- pin your hopes on
|
||||
- pitter patter of little feet
|
||||
- plain as day
|
||||
- plain as the nose on your face
|
||||
- play by the rules
|
||||
- play your cards right
|
||||
- playing the field
|
||||
- playing with fire
|
||||
- pleased as punch
|
||||
- plenty of fish in the sea
|
||||
- point with pride
|
||||
- poor as a church mouse
|
||||
- pot calling the kettle black
|
||||
- pretty as a picture
|
||||
- pull a fast one
|
||||
- pull your punches
|
||||
- pulling your leg
|
||||
- pure as the driven snow
|
||||
- put it in a nutshell
|
||||
- put one over on you
|
||||
- put the cart before the horse
|
||||
- put the pedal to the metal
|
||||
- put your best foot forward
|
||||
- put your foot down
|
||||
- quick as a bunny
|
||||
- quick as a lick
|
||||
- quick as a wink
|
||||
- quick as lightning
|
||||
- quiet as a dormouse
|
||||
- rags to riches
|
||||
- raining buckets
|
||||
- raining cats and dogs
|
||||
- rank and file
|
||||
- rat race
|
||||
- reap what you sow
|
||||
- red as a beet
|
||||
- red herring
|
||||
- reinvent the wheel
|
||||
- rich and famous
|
||||
- rings a bell
|
||||
- ripe old age
|
||||
- ripped me off
|
||||
- rise and shine
|
||||
- road to hell is paved with good intentions
|
||||
- rob Peter to pay Paul
|
||||
- roll over in the grave
|
||||
- rub the wrong way
|
||||
- ruled the roost
|
||||
- running in circles
|
||||
- sad but true
|
||||
- sadder but wiser
|
||||
- salt of the earth
|
||||
- scared stiff
|
||||
- scared to death
|
||||
- sealed with a kiss
|
||||
- second to none
|
||||
- see eye to eye
|
||||
- seen the light
|
||||
- seize the day
|
||||
- set the record straight
|
||||
- set the world on fire
|
||||
- set your teeth on edge
|
||||
- sharp as a tack
|
||||
- shoot for the moon
|
||||
- shoot the breeze
|
||||
- shot in the dark
|
||||
- shoulder to the wheel
|
||||
- sick as a dog
|
||||
- sigh of relief
|
||||
- signed, sealed, and delivered
|
||||
- sink or swim
|
||||
- six of one, half a dozen of another
|
||||
- skating on thin ice
|
||||
- slept like a log
|
||||
- slinging mud
|
||||
- slippery as an eel
|
||||
- slow as molasses
|
||||
- smart as a whip
|
||||
- smooth as a baby's bottom
|
||||
- sneaking suspicion
|
||||
- snug as a bug in a rug
|
||||
- sow wild oats
|
||||
- spare the rod, spoil the child
|
||||
- speak of the devil
|
||||
- spilled the beans
|
||||
- spinning your wheels
|
||||
- spitting image of
|
||||
- spoke with relish
|
||||
- spread like wildfire
|
||||
- spring to life
|
||||
- squeaky wheel gets the grease
|
||||
- stands out like a sore thumb
|
||||
- start from scratch
|
||||
- stick in the mud
|
||||
- still waters run deep
|
||||
- stitch in time
|
||||
- stop and smell the roses
|
||||
- straight as an arrow
|
||||
- straw that broke the camel's back
|
||||
- strong as an ox
|
||||
- stubborn as a mule
|
||||
- stuff that dreams are made of
|
||||
- stuffed shirt
|
||||
- sweating blood
|
||||
- sweating bullets
|
||||
- take a load off
|
||||
- take one for the team
|
||||
- take the bait
|
||||
- take the bull by the horns
|
||||
- take the plunge
|
||||
- takes one to know one
|
||||
- takes two to tango
|
||||
- the more the merrier
|
||||
- the real deal
|
||||
- the real McCoy
|
||||
- the red carpet treatment
|
||||
- the same old story
|
||||
- there is no accounting for taste
|
||||
- thick as a brick
|
||||
- thick as thieves
|
||||
- thin as a rail
|
||||
- think outside of the box
|
||||
- third time's the charm
|
||||
- this day and age
|
||||
- this hurts me worse than it hurts you
|
||||
- this point in time
|
||||
- three sheets to the wind
|
||||
- through thick and thin
|
||||
- throw in the towel
|
||||
- tie one on
|
||||
- tighter than a drum
|
||||
- time and time again
|
||||
- time is of the essence
|
||||
- tip of the iceberg
|
||||
- tired but happy
|
||||
- to coin a phrase
|
||||
- to each his own
|
||||
- to make a long story short
|
||||
- to the best of my knowledge
|
||||
- toe the line
|
||||
- tongue in cheek
|
||||
- too good to be true
|
||||
- too hot to handle
|
||||
- too numerous to mention
|
||||
- touch with a ten foot pole
|
||||
- tough as nails
|
||||
- trial and error
|
||||
- trials and tribulations
|
||||
- tried and true
|
||||
- trip down memory lane
|
||||
- twist of fate
|
||||
- two cents worth
|
||||
- two peas in a pod
|
||||
- ugly as sin
|
||||
- under the counter
|
||||
- under the gun
|
||||
- under the same roof
|
||||
- under the weather
|
||||
- until the cows come home
|
||||
- unvarnished truth
|
||||
- up the creek
|
||||
- uphill battle
|
||||
- upper crust
|
||||
- upset the applecart
|
||||
- vain attempt
|
||||
- vain effort
|
||||
- vanquish the enemy
|
||||
- vested interest
|
||||
- waiting for the other shoe to drop
|
||||
- wakeup call
|
||||
- warm welcome
|
||||
- watch your p's and q's
|
||||
- watch your tongue
|
||||
- watching the clock
|
||||
- water under the bridge
|
||||
- weather the storm
|
||||
- weed them out
|
||||
- week of Sundays
|
||||
- went belly up
|
||||
- wet behind the ears
|
||||
- what goes around comes around
|
||||
- what you see is what you get
|
||||
- when it rains, it pours
|
||||
- when push comes to shove
|
||||
- when the cat's away
|
||||
- when the going gets tough, the tough get going
|
||||
- white as a sheet
|
||||
- whole ball of wax
|
||||
- whole hog
|
||||
- whole nine yards
|
||||
- wild goose chase
|
||||
- will wonders never cease?
|
||||
- wisdom of the ages
|
||||
- wise as an owl
|
||||
- wolf at the door
|
||||
- words fail me
|
||||
- work like a dog
|
||||
- world weary
|
||||
- worst nightmare
|
||||
- worth its weight in gold
|
||||
- wrong side of the bed
|
||||
- yanking your chain
|
||||
- yappy as a dog
|
||||
- years young
|
||||
- you are what you eat
|
||||
- you can run but you can't hide
|
||||
- you only live once
|
||||
- you're the boss
|
||||
- young and foolish
|
||||
- young and vibrant
|
32
.styles/write-good/E-Prime.yml
Normal file
@ -0,0 +1,32 @@
|
||||
extends: existence
|
||||
message: "Try to avoid using '%s'."
|
||||
ignorecase: true
|
||||
level: suggestion
|
||||
tokens:
|
||||
- am
|
||||
- are
|
||||
- aren't
|
||||
- be
|
||||
- been
|
||||
- being
|
||||
- he's
|
||||
- here's
|
||||
- here's
|
||||
- how's
|
||||
- i'm
|
||||
- is
|
||||
- isn't
|
||||
- it's
|
||||
- she's
|
||||
- that's
|
||||
- there's
|
||||
- they're
|
||||
- was
|
||||
- wasn't
|
||||
- we're
|
||||
- were
|
||||
- weren't
|
||||
- what's
|
||||
- where's
|
||||
- who's
|
||||
- you're
|
11
.styles/write-good/Illusions.yml
Normal file
@ -0,0 +1,11 @@
|
||||
extends: repetition
|
||||
message: "'%s' is repeated!"
|
||||
level: warning
|
||||
alpha: true
|
||||
action:
|
||||
name: edit
|
||||
params:
|
||||
- truncate
|
||||
- " "
|
||||
tokens:
|
||||
- '[^\s]+'
|
183
.styles/write-good/Passive.yml
Normal file
@ -0,0 +1,183 @@
|
||||
extends: existence
|
||||
message: "'%s' may be passive voice. Use active voice if you can."
|
||||
ignorecase: true
|
||||
level: warning
|
||||
raw:
|
||||
- \b(am|are|were|being|is|been|was|be)\b\s*
|
||||
tokens:
|
||||
- '[\w]+ed'
|
||||
- awoken
|
||||
- beat
|
||||
- become
|
||||
- been
|
||||
- begun
|
||||
- bent
|
||||
- beset
|
||||
- bet
|
||||
- bid
|
||||
- bidden
|
||||
- bitten
|
||||
- bled
|
||||
- blown
|
||||
- born
|
||||
- bought
|
||||
- bound
|
||||
- bred
|
||||
- broadcast
|
||||
- broken
|
||||
- brought
|
||||
- built
|
||||
- burnt
|
||||
- burst
|
||||
- cast
|
||||
- caught
|
||||
- chosen
|
||||
- clung
|
||||
- come
|
||||
- cost
|
||||
- crept
|
||||
- cut
|
||||
- dealt
|
||||
- dived
|
||||
- done
|
||||
- drawn
|
||||
- dreamt
|
||||
- driven
|
||||
- drunk
|
||||
- dug
|
||||
- eaten
|
||||
- fallen
|
||||
- fed
|
||||
- felt
|
||||
- fit
|
||||
- fled
|
||||
- flown
|
||||
- flung
|
||||
- forbidden
|
||||
- foregone
|
||||
- forgiven
|
||||
- forgotten
|
||||
- forsaken
|
||||
- fought
|
||||
- found
|
||||
- frozen
|
||||
- given
|
||||
- gone
|
||||
- gotten
|
||||
- ground
|
||||
- grown
|
||||
- heard
|
||||
- held
|
||||
- hidden
|
||||
- hit
|
||||
- hung
|
||||
- hurt
|
||||
- kept
|
||||
- knelt
|
||||
- knit
|
||||
- known
|
||||
- laid
|
||||
- lain
|
||||
- leapt
|
||||
- learnt
|
||||
- led
|
||||
- left
|
||||
- lent
|
||||
- let
|
||||
- lighted
|
||||
- lost
|
||||
- made
|
||||
- meant
|
||||
- met
|
||||
- misspelt
|
||||
- mistaken
|
||||
- mown
|
||||
- overcome
|
||||
- overdone
|
||||
- overtaken
|
||||
- overthrown
|
||||
- paid
|
||||
- pled
|
||||
- proven
|
||||
- put
|
||||
- quit
|
||||
- read
|
||||
- rid
|
||||
- ridden
|
||||
- risen
|
||||
- run
|
||||
- rung
|
||||
- said
|
||||
- sat
|
||||
- sawn
|
||||
- seen
|
||||
- sent
|
||||
- set
|
||||
- sewn
|
||||
- shaken
|
||||
- shaven
|
||||
- shed
|
||||
- shod
|
||||
- shone
|
||||
- shorn
|
||||
- shot
|
||||
- shown
|
||||
- shrunk
|
||||
- shut
|
||||
- slain
|
||||
- slept
|
||||
- slid
|
||||
- slit
|
||||
- slung
|
||||
- smitten
|
||||
- sold
|
||||
- sought
|
||||
- sown
|
||||
- sped
|
||||
- spent
|
||||
- spilt
|
||||
- spit
|
||||
- split
|
||||
- spoken
|
||||
- spread
|
||||
- sprung
|
||||
- spun
|
||||
- stolen
|
||||
- stood
|
||||
- stridden
|
||||
- striven
|
||||
- struck
|
||||
- strung
|
||||
- stuck
|
||||
- stung
|
||||
- stunk
|
||||
- sung
|
||||
- sunk
|
||||
- swept
|
||||
- swollen
|
||||
- sworn
|
||||
- swum
|
||||
- swung
|
||||
- taken
|
||||
- taught
|
||||
- thought
|
||||
- thrived
|
||||
- thrown
|
||||
- thrust
|
||||
- told
|
||||
- torn
|
||||
- trodden
|
||||
- understood
|
||||
- upheld
|
||||
- upset
|
||||
- wed
|
||||
- wept
|
||||
- withheld
|
||||
- withstood
|
||||
- woken
|
||||
- won
|
||||
- worn
|
||||
- wound
|
||||
- woven
|
||||
- written
|
||||
- wrung
|
27
.styles/write-good/README.md
Normal file
@ -0,0 +1,27 @@
|
||||
Based on [write-good](https://github.com/btford/write-good).
|
||||
|
||||
> Naive linter for English prose for developers who can't write good and wanna learn to do other stuff good too.
|
||||
|
||||
```
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2014 Brian Ford
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
```
|
5
.styles/write-good/So.yml
Normal file
@ -0,0 +1,5 @@
|
||||
extends: existence
|
||||
message: "Don't start a sentence with '%s'."
|
||||
level: error
|
||||
raw:
|
||||
- '(?:[;-]\s)so[\s,]|\bSo[\s,]'
|
6
.styles/write-good/ThereIs.yml
Normal file
@ -0,0 +1,6 @@
|
||||
extends: existence
|
||||
message: "Don't start a sentence with '%s'."
|
||||
ignorecase: false
|
||||
level: error
|
||||
raw:
|
||||
- '(?:[;-]\s)There\s(is|are)|\bThere\s(is|are)\b'
|
221
.styles/write-good/TooWordy.yml
Normal file
@ -0,0 +1,221 @@
|
||||
extends: existence
|
||||
message: "'%s' is too wordy."
|
||||
ignorecase: true
|
||||
level: warning
|
||||
tokens:
|
||||
- a number of
|
||||
- abundance
|
||||
- accede to
|
||||
- accelerate
|
||||
- accentuate
|
||||
- accompany
|
||||
- accomplish
|
||||
- accorded
|
||||
- accrue
|
||||
- acquiesce
|
||||
- acquire
|
||||
- additional
|
||||
- adjacent to
|
||||
- adjustment
|
||||
- admissible
|
||||
- advantageous
|
||||
- adversely impact
|
||||
- advise
|
||||
- aforementioned
|
||||
- aggregate
|
||||
- aircraft
|
||||
- all of
|
||||
- all things considered
|
||||
- alleviate
|
||||
- allocate
|
||||
- along the lines of
|
||||
- already existing
|
||||
- alternatively
|
||||
- amazing
|
||||
- ameliorate
|
||||
- anticipate
|
||||
- apparent
|
||||
- appreciable
|
||||
- as a matter of fact
|
||||
- as a means of
|
||||
- as far as I'm concerned
|
||||
- as of yet
|
||||
- as to
|
||||
- as yet
|
||||
- ascertain
|
||||
- assistance
|
||||
- at the present time
|
||||
- at this time
|
||||
- attain
|
||||
- attributable to
|
||||
- authorize
|
||||
- because of the fact that
|
||||
- belated
|
||||
- benefit from
|
||||
- bestow
|
||||
- by means of
|
||||
- by virtue of
|
||||
- by virtue of the fact that
|
||||
- cease
|
||||
- close proximity
|
||||
- commence
|
||||
- comply with
|
||||
- concerning
|
||||
- consequently
|
||||
- consolidate
|
||||
- constitutes
|
||||
- demonstrate
|
||||
- depart
|
||||
- designate
|
||||
- discontinue
|
||||
- due to the fact that
|
||||
- each and every
|
||||
- economical
|
||||
- eliminate
|
||||
- elucidate
|
||||
- employ
|
||||
- endeavor
|
||||
- enumerate
|
||||
- equitable
|
||||
- equivalent
|
||||
- evaluate
|
||||
- evidenced
|
||||
- exclusively
|
||||
- expedite
|
||||
- expend
|
||||
- expiration
|
||||
- facilitate
|
||||
- factual evidence
|
||||
- feasible
|
||||
- finalize
|
||||
- first and foremost
|
||||
- for all intents and purposes
|
||||
- for the most part
|
||||
- for the purpose of
|
||||
- forfeit
|
||||
- formulate
|
||||
- have a tendency to
|
||||
- honest truth
|
||||
- however
|
||||
- if and when
|
||||
- impacted
|
||||
- implement
|
||||
- in a manner of speaking
|
||||
- in a timely manner
|
||||
- in a very real sense
|
||||
- in accordance with
|
||||
- in addition
|
||||
- in all likelihood
|
||||
- in an effort to
|
||||
- in between
|
||||
- in excess of
|
||||
- in lieu of
|
||||
- in light of the fact that
|
||||
- in many cases
|
||||
- in my opinion
|
||||
- in order to
|
||||
- in regard to
|
||||
- in some instances
|
||||
- in terms of
|
||||
- in the case of
|
||||
- in the event that
|
||||
- in the final analysis
|
||||
- in the nature of
|
||||
- in the near future
|
||||
- in the process of
|
||||
- inception
|
||||
- incumbent upon
|
||||
- indicate
|
||||
- indication
|
||||
- initiate
|
||||
- irregardless
|
||||
- is applicable to
|
||||
- is authorized to
|
||||
- is responsible for
|
||||
- it is
|
||||
- it is essential
|
||||
- it seems that
|
||||
- it was
|
||||
- magnitude
|
||||
- maximum
|
||||
- methodology
|
||||
- minimize
|
||||
- minimum
|
||||
- modify
|
||||
- monitor
|
||||
- multiple
|
||||
- necessitate
|
||||
- nevertheless
|
||||
- not certain
|
||||
- not many
|
||||
- not often
|
||||
- not unless
|
||||
- not unlike
|
||||
- notwithstanding
|
||||
- null and void
|
||||
- numerous
|
||||
- objective
|
||||
- obligate
|
||||
- obtain
|
||||
- on the contrary
|
||||
- on the other hand
|
||||
- one particular
|
||||
- optimum
|
||||
- overall
|
||||
- owing to the fact that
|
||||
- participate
|
||||
- particulars
|
||||
- pass away
|
||||
- pertaining to
|
||||
- point in time
|
||||
- portion
|
||||
- possess
|
||||
- preclude
|
||||
- previously
|
||||
- prior to
|
||||
- prioritize
|
||||
- procure
|
||||
- proficiency
|
||||
- provided that
|
||||
- purchase
|
||||
- put simply
|
||||
- readily apparent
|
||||
- refer back
|
||||
- regarding
|
||||
- relocate
|
||||
- remainder
|
||||
- remuneration
|
||||
- requirement
|
||||
- reside
|
||||
- residence
|
||||
- retain
|
||||
- satisfy
|
||||
- shall
|
||||
- should you wish
|
||||
- similar to
|
||||
- solicit
|
||||
- span across
|
||||
- strategize
|
||||
- subsequent
|
||||
- substantial
|
||||
- successfully complete
|
||||
- sufficient
|
||||
- terminate
|
||||
- the month of
|
||||
- the point I am trying to make
|
||||
- therefore
|
||||
- time period
|
||||
- took advantage of
|
||||
- transmit
|
||||
- transpire
|
||||
- type of
|
||||
- until such time as
|
||||
- utilization
|
||||
- utilize
|
||||
- validate
|
||||
- various different
|
||||
- what I mean to say is
|
||||
- whether or not
|
||||
- with respect to
|
||||
- with the exception of
|
||||
- witnessed
|
29
.styles/write-good/Weasel.yml
Normal file
@ -0,0 +1,29 @@
|
||||
extends: existence
|
||||
message: "'%s' is a weasel word!"
|
||||
ignorecase: true
|
||||
level: warning
|
||||
tokens:
|
||||
- clearly
|
||||
- completely
|
||||
- exceedingly
|
||||
- excellent
|
||||
- extremely
|
||||
- fairly
|
||||
- huge
|
||||
- interestingly
|
||||
- is a number
|
||||
- largely
|
||||
- mostly
|
||||
- obviously
|
||||
- quite
|
||||
- relatively
|
||||
- remarkably
|
||||
- several
|
||||
- significantly
|
||||
- substantially
|
||||
- surprisingly
|
||||
- tiny
|
||||
- usually
|
||||
- various
|
||||
- vast
|
||||
- very
|
4
.styles/write-good/meta.json
Normal file
@ -0,0 +1,4 @@
|
||||
{
|
||||
"feed": "https://github.com/errata-ai/write-good/releases.atom",
|
||||
"vale_version": ">=1.0.0"
|
||||
}
|
@ -2,6 +2,6 @@ StylesPath = .styles
|
||||
MinAlertLevel = suggestion
|
||||
Packages = Microsoft, write-good
|
||||
|
||||
[*]
|
||||
[*.{md,mdx}]
|
||||
BasedOnStyles = Vale, Microsoft, write-good
|
||||
write-good.E-Prime = NO
|
24
404.html
@ -1,24 +0,0 @@
|
||||
---
|
||||
layout: page
|
||||
---
|
||||
|
||||
<style type="text/css" media="screen">
|
||||
.container {
|
||||
margin: 10px auto;
|
||||
max-width: 600px;
|
||||
text-align: center;
|
||||
}
|
||||
h1 {
|
||||
margin: 30px 0;
|
||||
font-size: 4em;
|
||||
line-height: 1;
|
||||
letter-spacing: -1px;
|
||||
}
|
||||
</style>
|
||||
|
||||
<div class="container">
|
||||
<h1>404</h1>
|
||||
|
||||
<p><strong>Page not found :(</strong></p>
|
||||
<p>The requested page could not be found.</p>
|
||||
</div>
|
29
Gemfile
@ -1,29 +0,0 @@
|
||||
source "https://rubygems.org"
|
||||
|
||||
# Hello! This is where you manage which Jekyll version is used to run.
|
||||
# When you want to use a different version, change it below, save the
|
||||
# file and run `bundle install`. Run Jekyll with `bundle exec`, like so:
|
||||
#
|
||||
# bundle exec jekyll serve
|
||||
#
|
||||
# This will help ensure the proper Jekyll version is running.
|
||||
# Happy Jekylling!
|
||||
gem "jekyll", "~> 3.8.3"
|
||||
|
||||
gem "texture"
|
||||
|
||||
# If you want to use GitHub Pages, remove the "gem "jekyll"" above and
|
||||
# uncomment the line below. To upgrade, run `bundle update github-pages`.
|
||||
# gem "github-pages", group: :jekyll_plugins
|
||||
|
||||
# If you have any plugins, put them here!
|
||||
group :jekyll_plugins do
|
||||
gem "jekyll-feed", "~> 0.6"
|
||||
gem "jekyll-remote-theme"
|
||||
end
|
||||
|
||||
# Windows does not include zoneinfo files, so bundle the tzinfo-data gem
|
||||
gem "tzinfo-data", platforms: [:mingw, :mswin, :x64_mingw, :jruby]
|
||||
|
||||
# Performance-booster for watching directories on Windows
|
||||
gem "wdm", "~> 0.1.0" if Gem.win_platform?
|
78
Gemfile.lock
@ -1,78 +0,0 @@
|
||||
GEM
|
||||
remote: https://rubygems.org/
|
||||
specs:
|
||||
addressable (2.7.0)
|
||||
public_suffix (>= 2.0.2, < 5.0)
|
||||
colorator (1.1.0)
|
||||
concurrent-ruby (1.1.6)
|
||||
em-websocket (0.5.1)
|
||||
eventmachine (>= 0.12.9)
|
||||
http_parser.rb (~> 0.6.0)
|
||||
eventmachine (1.2.7)
|
||||
ffi (1.12.2)
|
||||
forwardable-extended (2.6.0)
|
||||
http_parser.rb (0.6.0)
|
||||
i18n (0.9.5)
|
||||
concurrent-ruby (~> 1.0)
|
||||
jekyll (3.8.6)
|
||||
addressable (~> 2.4)
|
||||
colorator (~> 1.0)
|
||||
em-websocket (~> 0.5)
|
||||
i18n (~> 0.7)
|
||||
jekyll-sass-converter (~> 1.0)
|
||||
jekyll-watch (~> 2.0)
|
||||
kramdown (~> 1.14)
|
||||
liquid (~> 4.0)
|
||||
mercenary (~> 0.3.3)
|
||||
pathutil (~> 0.9)
|
||||
rouge (>= 1.7, < 4)
|
||||
safe_yaml (~> 1.0)
|
||||
jekyll-feed (0.13.0)
|
||||
jekyll (>= 3.7, < 5.0)
|
||||
jekyll-remote-theme (0.4.2)
|
||||
addressable (~> 2.0)
|
||||
jekyll (>= 3.5, < 5.0)
|
||||
jekyll-sass-converter (>= 1.0, <= 3.0.0, != 2.0.0)
|
||||
rubyzip (>= 1.3.0, < 3.0)
|
||||
jekyll-sass-converter (1.5.2)
|
||||
sass (~> 3.4)
|
||||
jekyll-seo-tag (2.6.1)
|
||||
jekyll (>= 3.3, < 5.0)
|
||||
jekyll-watch (2.2.1)
|
||||
listen (~> 3.0)
|
||||
kramdown (1.17.0)
|
||||
liquid (4.0.3)
|
||||
listen (3.2.1)
|
||||
rb-fsevent (~> 0.10, >= 0.10.3)
|
||||
rb-inotify (~> 0.9, >= 0.9.10)
|
||||
mercenary (0.3.6)
|
||||
pathutil (0.16.2)
|
||||
forwardable-extended (~> 2.6)
|
||||
public_suffix (4.0.4)
|
||||
rb-fsevent (0.10.3)
|
||||
rb-inotify (0.10.1)
|
||||
ffi (~> 1.0)
|
||||
rouge (3.17.0)
|
||||
rubyzip (2.3.0)
|
||||
safe_yaml (1.0.5)
|
||||
sass (3.7.4)
|
||||
sass-listen (~> 4.0.0)
|
||||
sass-listen (4.0.0)
|
||||
rb-fsevent (~> 0.9, >= 0.9.4)
|
||||
rb-inotify (~> 0.9, >= 0.9.7)
|
||||
texture (0.3)
|
||||
jekyll (~> 3.7)
|
||||
jekyll-seo-tag (~> 2.1)
|
||||
|
||||
PLATFORMS
|
||||
ruby
|
||||
|
||||
DEPENDENCIES
|
||||
jekyll (~> 3.8.3)
|
||||
jekyll-feed (~> 0.6)
|
||||
jekyll-remote-theme
|
||||
texture
|
||||
tzinfo-data
|
||||
|
||||
BUNDLED WITH
|
||||
2.1.4
|
41
README.md
Normal file
@ -0,0 +1,41 @@
|
||||
# Website
|
||||
|
||||
This website is built using [Docusaurus](https://docusaurus.io/), a modern static website generator.
|
||||
|
||||
### Installation
|
||||
|
||||
```
|
||||
$ yarn
|
||||
```
|
||||
|
||||
### Local Development
|
||||
|
||||
```
|
||||
$ yarn start
|
||||
```
|
||||
|
||||
This command starts a local development server and opens up a browser window. Most changes are reflected live without having to restart the server.
|
||||
|
||||
### Build
|
||||
|
||||
```
|
||||
$ yarn build
|
||||
```
|
||||
|
||||
This command generates static content into the `build` directory and can be served using any static contents hosting service.
|
||||
|
||||
### Deployment
|
||||
|
||||
Using SSH:
|
||||
|
||||
```
|
||||
$ USE_SSH=true yarn deploy
|
||||
```
|
||||
|
||||
Not using SSH:
|
||||
|
||||
```
|
||||
$ GIT_USER=<Your GitHub username> yarn deploy
|
||||
```
|
||||
|
||||
If you are using GitHub pages for hosting, this command is a convenient way to build the website and push to the `gh-pages` branch.
|
44
_config.yml
@ -1,44 +0,0 @@
|
||||
# Welcome to Jekyll!
|
||||
#
|
||||
# This config file is meant for settings that affect your whole blog, values
|
||||
# which you are expected to set up once and rarely edit after that. If you find
|
||||
# yourself editing this file very often, consider using Jekyll's data files
|
||||
# feature for the data you need to update frequently.
|
||||
#
|
||||
# For technical reasons, this file is *NOT* reloaded automatically when you use
|
||||
# 'bundle exec jekyll serve'. If you change this file, please restart the server process.
|
||||
|
||||
# Site settings
|
||||
# These are used to personalize your new site. If you look in the HTML files,
|
||||
# you will see them accessed via {{ site.title }}, {{ site.email }}, and so on.
|
||||
# You can create any custom variable you would like, and they will be accessible
|
||||
# in the templates via {{ site.myvariable }}.
|
||||
title: speice.io
|
||||
description: The Old Speice Guy
|
||||
email: bradlee@speice.io
|
||||
baseurl: "" # the subpath of your site, e.g. /blog
|
||||
url: "https://speice.io/" # the base hostname & protocol for your site, e.g. http://example.com
|
||||
github_username: bspeice
|
||||
|
||||
# Build settings
|
||||
markdown: kramdown
|
||||
# theme: texture
|
||||
remote_theme: thelehhman/texture
|
||||
plugins:
|
||||
- jekyll-feed
|
||||
- jekyll-remote-theme
|
||||
|
||||
include: [_pages]
|
||||
permalink: /:year/:month/:title.html
|
||||
|
||||
# Exclude from processing.
|
||||
# The following items will not be processed, by default. Create a custom list
|
||||
# to override the default setting.
|
||||
# exclude:
|
||||
# - Gemfile
|
||||
# - Gemfile.lock
|
||||
# - node_modules
|
||||
# - vendor/bundle/
|
||||
# - vendor/cache/
|
||||
# - vendor/gems/
|
||||
# - vendor/ruby/
|
@ -1,23 +0,0 @@
|
||||
{% if page.layout == 'post' %}
|
||||
{% comment %}Thanks to https://www.bytedude.com/jekyll-previous-and-next-posts/{% endcomment %}
|
||||
<div class="container">
|
||||
<hr>
|
||||
<div class="post-nav">
|
||||
<div>
|
||||
{% if page.previous.url %}
|
||||
<a href="{{page.previous.url}}">« {{page.previous.title}}</a>
|
||||
{% endif %}
|
||||
</div>
|
||||
<div class="post-nav-next">
|
||||
{% if page.next.url %}
|
||||
<a href="{{page.next.url}}">{{page.next.title}} »</a>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
<script type="text/javascript"
|
||||
src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js?config=TeX-AMS-MML_HTMLorMML">
|
||||
</script>
|
||||
{% endif %}
|
@ -1,7 +0,0 @@
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<meta http-equiv="X-UA-Compatible" content="ie=edge">
|
||||
<link rel="stylesheet" href="{{ "/assets/css/style.css" | relative_url }}">
|
||||
<link rel="stylesheet" href="{{ "/assets/css/fonts.css" | prepend: site.baseurl }}">
|
||||
<title>{{ page.title | default: site.title }}</title>
|
||||
{% seo %}
|
@ -1,7 +0,0 @@
|
||||
<div class="navbar">
|
||||
<a href="{{ "/" | prepend: site.baseurl }}">Home</a>
|
||||
<span class="separator"></span>
|
||||
<a href="{{ "/about/" | prepend: site.baseurl }}">About</a>
|
||||
<span class="separator"></span>
|
||||
<a href="{{ "/feed.xml" | prepend: site.baseurl }}">RSS</a>
|
||||
</div>
|
@ -1,15 +0,0 @@
|
||||
<div class="container">
|
||||
<h2>{{ site.title }}</h1>
|
||||
<h1>{{ site.description }}</h2>
|
||||
<ul class="social">
|
||||
{%- if site.texture.social_links.github -%}
|
||||
<a href="https://github.com/{{ site.texture.social_links.github }}"><li><i class="icon-github-circled"></i></li></a>
|
||||
{%- endif -%}
|
||||
{%- if site.texture.social_links.linkedIn -%}
|
||||
<a href="https://linkedin.com/{{ site.texture.social_links.linkedIn }}"><li><i class="icon-linkedin-squared"></i></li></a>
|
||||
{%- endif -%}
|
||||
{%- if site.texture.social_links.twitter -%}
|
||||
<a href="https://twitter.com/{{ site.texture.social_links.twitter }}"><li><i class="icon-twitter-squared"></i></li></a>
|
||||
{%- endif -%}
|
||||
</ul>
|
||||
</div>
|
@ -1,13 +0,0 @@
|
||||
---
|
||||
layout: page
|
||||
title: About
|
||||
permalink: /about/
|
||||
---
|
||||
|
||||
Developer currently living in New York City.
|
||||
|
||||
Best ways to get in contact:
|
||||
|
||||
- Email: [bradlee@speice.io](mailto:bradlee@speice.io)
|
||||
- Github: [bspeice](https://github.com/bspeice)
|
||||
- LinkedIn: [bradleespeice](https://www.linkedin.com/in/bradleespeice/)
|
@ -1,38 +0,0 @@
|
||||
---
|
||||
layout: post
|
||||
title: "Hello!"
|
||||
description: ""
|
||||
category:
|
||||
tags: []
|
||||
---
|
||||
|
||||
I'll do what I can to keep this short, there's plenty of other things we both should be doing right
|
||||
now.
|
||||
|
||||
If you're here for the bread pics, and to marvel in some other culinary side projects, I've got you
|
||||
covered:
|
||||
|
||||
![Saturday Bread]({{ "/assets/images/2018-05-28-bread.jpg" | absolute_url }})
|
||||
|
||||
And no, I'm not posting pictures of earlier attempts that ended up turning into rocks in the oven.
|
||||
|
||||
Okay, just one:
|
||||
|
||||
![Bread as rock]({{ "/assets/images/2018-05-28-rocks.jpg" | absolute_url }})
|
||||
|
||||
If you're here for keeping up with the man Bradlee Speice, got plenty of that too. Plus some
|
||||
up-coming super-nerdy posts about how I'm changing the world.
|
||||
|
||||
And if you're not here for those things: don't have a lot for you, sorry. But you're welcome to let
|
||||
me know what needs to change.
|
||||
|
||||
I'm looking forward to making this a place to talk about what's going on in life, I hope you'll
|
||||
stick it out with me. The best way to follow what's going on is on my [About](/about/) page, but if
|
||||
you want the joy of clicking links, here's a few good ones:
|
||||
|
||||
- Email (people still use this?): [bradlee@speice.io](mailto:bradlee@speice.io)
|
||||
- Mastodon (nerd Twitter): [@bradlee](https://mastodon.social/@bradlee)
|
||||
- Chat (RiotIM): [@bspeice:matrix.com](https://matrix.to/#/@bspeice:matrix.com)
|
||||
- The comments section (not for people with sanity intact): ↓↓↓
|
||||
|
||||
Thanks, and keep it amazing.
|
@ -1,177 +0,0 @@
|
||||
---
|
||||
layout: post
|
||||
title: "What I Learned: Porting Dateutil Parser to Rust"
|
||||
description: ""
|
||||
category:
|
||||
tags: [dtparse, rust]
|
||||
---
|
||||
|
||||
Hi. I'm Bradlee.
|
||||
|
||||
I've mostly been a lurker in Rust for a while, making a couple small contributions here and there.
|
||||
So launching [dtparse](https://github.com/bspeice/dtparse) feels like nice step towards becoming a
|
||||
functioning member of society. But not too much, because then you know people start asking you to
|
||||
pay bills, and ain't nobody got time for that.
|
||||
|
||||
But I built dtparse, and you can read about my thoughts on the process. Or don't. I won't tell you
|
||||
what to do with your life (but you should totally keep reading).
|
||||
|
||||
# Slow down, what?
|
||||
|
||||
OK, fine, I guess I should start with _why_ someone would do this.
|
||||
|
||||
[Dateutil](https://github.com/dateutil/dateutil) is a Python library for handling dates. The
|
||||
standard library support for time in Python is kinda dope, but there are a lot of extras that go
|
||||
into making it useful beyond just the [datetime](https://docs.python.org/3.6/library/datetime.html)
|
||||
module. `dateutil.parser` specifically is code to take all the super-weird time formats people come
|
||||
up with and turn them into something actually useful.
|
||||
|
||||
Date/time parsing, it turns out, is just like everything else involving
|
||||
[computers](https://infiniteundo.com/post/25326999628/falsehoods-programmers-believe-about-time) and
|
||||
[time](https://infiniteundo.com/post/25509354022/more-falsehoods-programmers-believe-about-time): it
|
||||
feels like it shouldn't be that difficult to do, until you try to do it, and you realize that people
|
||||
suck and this is why
|
||||
[we can't we have nice things](https://zachholman.com/talk/utc-is-enough-for-everyone-right). But
|
||||
alas, we'll try and make contemporary art out of the rubble and give it a pretentious name like
|
||||
_Time_.
|
||||
|
||||
![A gravel mound](/assets/images/2018-06-25-gravel-mound.jpg)
|
||||
|
||||
> [Time](https://www.goodfreephotos.com/united-states/montana/elkhorn/remains-of-the-mining-operation-elkhorn.jpg.php)
|
||||
|
||||
What makes `dateutil.parser` great is that there's single function with a single argument that
|
||||
drives what programmers interact with:
|
||||
[`parse(timestr)`](https://github.com/dateutil/dateutil/blob/6dde5d6298cfb81a4c594a38439462799ed2aef2/dateutil/parser/_parser.py#L1258).
|
||||
It takes in the time as a string, and gives you back a reasonable "look, this is the best anyone can
|
||||
possibly do to make sense of your input" value. It doesn't expect much of you.
|
||||
|
||||
[And now it's in Rust.](https://github.com/bspeice/dtparse/blob/7d565d3a78876dbebd9711c9720364fe9eba7915/src/lib.rs#L1332)
|
||||
|
||||
# Lost in Translation
|
||||
|
||||
Having worked at a bulge-bracket bank watching Java programmers try to be Python programmers, I'm
|
||||
admittedly hesitant to publish Python code that's trying to be Rust. Interestingly, Rust code can
|
||||
actually do a great job of mimicking Python. It's certainly not idiomatic Rust, but I've had better
|
||||
experiences than
|
||||
[this guy](https://webcache.googleusercontent.com/search?q=cache:wkYMpktJtnUJ:https://jackstouffer.com/blog/porting_dateutil.html+&cd=3&hl=en&ct=clnk&gl=us)
|
||||
who attempted the same thing for D. These are the actual take-aways:
|
||||
|
||||
When transcribing code, **stay as close to the original library as possible**. I'm talking about
|
||||
using the same variable names, same access patterns, the whole shebang. It's way too easy to make a
|
||||
couple of typos, and all of a sudden your code blows up in new and exciting ways. Having a reference
|
||||
manual for verbatim what your code should be means that you don't spend that long debugging
|
||||
complicated logic, you're more looking for typos.
|
||||
|
||||
Also, **don't use nice Rust things like enums**. While
|
||||
[one time it worked out OK for me](https://github.com/bspeice/dtparse/blob/7d565d3a78876dbebd9711c9720364fe9eba7915/src/lib.rs#L88-L94),
|
||||
I also managed to shoot myself in the foot a couple times because `dateutil` stores AM/PM as a
|
||||
boolean and I mixed up which was true, and which was false (side note: AM is false, PM is true). In
|
||||
general, writing nice code _should not be a first-pass priority_ when you're just trying to recreate
|
||||
the same functionality.
|
||||
|
||||
**Exceptions are a pain.** Make peace with it. Python code is just allowed to skip stack frames. So
|
||||
when a co-worker told me "Rust is getting try-catch syntax" I properly freaked out. Turns out
|
||||
[he's not quite right](https://github.com/rust-lang/rfcs/pull/243), and I'm OK with that. And while
|
||||
`dateutil` is pretty well-behaved about not skipping multiple stack frames,
|
||||
[130-line try-catch blocks](https://github.com/dateutil/dateutil/blob/16561fc99361979e88cccbd135393b06b1af7e90/dateutil/parser/_parser.py#L730-L865)
|
||||
take a while to verify.
|
||||
|
||||
As another Python quirk, **be very careful about
|
||||
[long nested if-elif-else blocks](https://github.com/dateutil/dateutil/blob/16561fc99361979e88cccbd135393b06b1af7e90/dateutil/parser/_parser.py#L494-L568)**.
|
||||
I used to think that Python's whitespace was just there to get you to format your code correctly. I
|
||||
think that no longer. It's way too easy to close a block too early and have incredibly weird issues
|
||||
in the logic. Make sure you use an editor that displays indentation levels so you can keep things
|
||||
straight.
|
||||
|
||||
**Rust macros are not free.** I originally had the
|
||||
[main test body](https://github.com/bspeice/dtparse/blob/b0e737f088eca8e83ab4244c6621a2797d247697/tests/compat.rs#L63-L217)
|
||||
wrapped up in a macro using [pyo3](https://github.com/PyO3/PyO3). It took two minutes to compile.
|
||||
After
|
||||
[moving things to a function](https://github.com/bspeice/dtparse/blob/e017018295c670e4b6c6ee1cfff00dbb233db47d/tests/compat.rs#L76-L205)
|
||||
compile times dropped down to ~5 seconds. Turns out 150 lines \* 100 tests = a lot of redundant code
|
||||
to be compiled. My new rule of thumb is that any macros longer than 10-15 lines are actually
|
||||
functions that need to be liberated, man.
|
||||
|
||||
Finally, **I really miss list comprehensions and dictionary comprehensions.** As a quick comparison,
|
||||
see
|
||||
[this dateutil code](https://github.com/dateutil/dateutil/blob/16561fc99361979e88cccbd135393b06b1af7e90/dateutil/parser/_parser.py#L476)
|
||||
and
|
||||
[the implementation in Rust](https://github.com/bspeice/dtparse/blob/7d565d3a78876dbebd9711c9720364fe9eba7915/src/lib.rs#L619-L629).
|
||||
I probably wrote it wrong, and I'm sorry. Ultimately though, I hope that these comprehensions can be
|
||||
added through macros or syntax extensions. Either way, they're expressive, save typing, and are
|
||||
super-readable. Let's get more of that.
|
||||
|
||||
# Using a young language
|
||||
|
||||
Now, Rust is exciting and new, which means that there's opportunity to make a substantive impact. On
|
||||
more than one occasion though, I've had issues navigating the Rust ecosystem.
|
||||
|
||||
What I'll call the "canonical library" is still being built. In Python, if you need datetime
|
||||
parsing, you use `dateutil`. If you want `decimal` types, it's already in the
|
||||
[standard library](https://docs.python.org/3.6/library/decimal.html). While I might've gotten away
|
||||
with `f64`, `dateutil` uses decimals, and I wanted to follow the principle of **staying as close to
|
||||
the original library as possible**. Thus began my quest to find a decimal library in Rust. What I
|
||||
quickly found was summarized in a comment:
|
||||
|
||||
> Writing a BigDecimal is easy. Writing a _good_ BigDecimal is hard.
|
||||
>
|
||||
> [-cmr](https://github.com/rust-lang/rust/issues/8937#issuecomment-34582794)
|
||||
|
||||
In practice, this means that there are at least [4](https://crates.io/crates/bigdecimal)
|
||||
[different](https://crates.io/crates/rust_decimal)
|
||||
[implementations](https://crates.io/crates/decimal) [available](https://crates.io/crates/decimate).
|
||||
And that's a lot of decisions to worry about when all I'm thinking is "why can't
|
||||
[calendar reform](https://en.wikipedia.org/wiki/Calendar_reform) be a thing" and I'm forced to dig
|
||||
through a [couple](https://github.com/rust-lang/rust/issues/8937#issuecomment-31661916)
|
||||
[different](https://github.com/rust-lang/rfcs/issues/334)
|
||||
[threads](https://github.com/rust-num/num/issues/8) to figure out if the library I'm look at is dead
|
||||
or just stable.
|
||||
|
||||
And even when the "canonical library" exists, there's no guarantees that it will be well-maintained.
|
||||
[Chrono](https://github.com/chronotope/chrono) is the _de facto_ date/time library in Rust, and just
|
||||
released version 0.4.4 like two days ago. Meanwhile,
|
||||
[chrono-tz](https://github.com/chronotope/chrono-tz) appears to be dead in the water even though
|
||||
[there are people happy to help maintain it](https://github.com/chronotope/chrono-tz/issues/19). I
|
||||
know relatively little about it, but it appears that most of the release process is automated;
|
||||
keeping that up to date should be a no-brainer.
|
||||
|
||||
## Trial Maintenance Policy
|
||||
|
||||
Specifically given "maintenance" being an
|
||||
[oft-discussed](https://www.reddit.com/r/rust/comments/48540g/thoughts_on_initiators_vs_maintainers/)
|
||||
issue, I'm going to try out the following policy to keep things moving on `dtparse`:
|
||||
|
||||
1. Issues/PRs needing _maintainer_ feedback will be updated at least weekly. I want to make sure
|
||||
nobody's blocking on me.
|
||||
|
||||
2. To keep issues/PRs needing _contributor_ feedback moving, I'm going to (kindly) ask the
|
||||
contributor to check in after two weeks, and close the issue without resolution if I hear nothing
|
||||
back after a month.
|
||||
|
||||
The second point I think has the potential to be a bit controversial, so I'm happy to receive
|
||||
feedback on that. And if a contributor responds with "hey, still working on it, had a kid and I'm
|
||||
running on 30 seconds of sleep a night," then first: congratulations on sustaining human life. And
|
||||
second: I don't mind keeping those requests going indefinitely. I just want to try and balance
|
||||
keeping things moving with giving people the necessary time they need.
|
||||
|
||||
I should also note that I'm still getting some best practices in place - CONTRIBUTING and
|
||||
CONTRIBUTORS files need to be added, as well as issue/PR templates. In progress. None of us are
|
||||
perfect.
|
||||
|
||||
# Roadmap and Conclusion
|
||||
|
||||
So if I've now built a `dateutil`-compatible parser, we're done, right? Of course not! That's not
|
||||
nearly ambitious enough.
|
||||
|
||||
Ultimately, I'd love to have a library that's capable of parsing everything the Linux `date` command
|
||||
can do (and not `date` on OSX, because seriously, BSD coreutils are the worst). I know Rust has a
|
||||
coreutils rewrite going on, and `dtparse` would potentially be an interesting candidate since it
|
||||
doesn't bring in a lot of extra dependencies. [`humantime`](https://crates.io/crates/humantime)
|
||||
could help pick up some of the (current) slack in dtparse, so maybe we can share and care with each
|
||||
other?
|
||||
|
||||
All in all, I'm mostly hoping that nobody's already done this and I haven't spent a bit over a month
|
||||
on redundant code. So if it exists, tell me. I need to know, but be nice about it, because I'm going
|
||||
to take it hard.
|
||||
|
||||
And in the mean time, I'm looking forward to building more. Onwards.
|
@ -1,323 +0,0 @@
|
||||
---
|
||||
layout: post
|
||||
title: "Primitives in Rust are Weird (and Cool)"
|
||||
description: "but mostly weird."
|
||||
category:
|
||||
tags: [rust, c, java, python, x86]
|
||||
---
|
||||
|
||||
I wrote a really small Rust program a while back because I was curious. I was 100% convinced it
|
||||
couldn't possibly run:
|
||||
|
||||
```rust
|
||||
fn main() {
|
||||
println!("{}", 8.to_string())
|
||||
}
|
||||
```
|
||||
|
||||
And to my complete befuddlement, it compiled, ran, and produced a completely sensible output. The
|
||||
reason I was so surprised has to do with how Rust treats a special category of things I'm going to
|
||||
call _primitives_. In the current version of the Rust book, you'll see them referred to as
|
||||
[scalars][rust_scalar], and in older versions they'll be called [primitives][rust_primitive], but
|
||||
we're going to stick with the name _primitive_ for the time being. Explaining why this program is so
|
||||
cool requires talking about a number of other programming languages, and keeping a consistent
|
||||
terminology makes things easier.
|
||||
|
||||
**You've been warned:** this is going to be a tedious post about a relatively minor issue that
|
||||
involves Java, Python, C, and x86 Assembly. And also me pretending like I know what I'm talking
|
||||
about with assembly.
|
||||
|
||||
# Defining primitives (Java)
|
||||
|
||||
The reason I'm using the name _primitive_ comes from how much of my life is Java right now. Spoiler
|
||||
alert: a lot of it. And for the most part I like Java, but I digress. In Java, there's a special
|
||||
name for some specific types of values:
|
||||
|
||||
> ```
|
||||
> bool char byte
|
||||
> short int long
|
||||
> float double
|
||||
> ```
|
||||
|
||||
````
|
||||
|
||||
They are referred to as [primitives][java_primitive]. And relative to the other bits of Java,
|
||||
they have two unique features. First, they don't have to worry about the
|
||||
[billion-dollar mistake](https://en.wikipedia.org/wiki/Tony_Hoare#Apologies_and_retractions);
|
||||
primitives in Java can never be `null`. Second: *they can't have instance methods*.
|
||||
Remember that Rust program from earlier? Java has no idea what to do with it:
|
||||
|
||||
```java
|
||||
class Main {
|
||||
public static void main(String[] args) {
|
||||
int x = 8;
|
||||
System.out.println(x.toString()); // Triggers a compiler error
|
||||
}
|
||||
}
|
||||
````
|
||||
|
||||
The error is:
|
||||
|
||||
```
|
||||
Main.java:5: error: int cannot be dereferenced
|
||||
System.out.println(x.toString());
|
||||
^
|
||||
1 error
|
||||
```
|
||||
|
||||
Specifically, Java's [`Object`](https://docs.oracle.com/javase/10/docs/api/java/lang/Object.html)
|
||||
and things that inherit from it are pointers under the hood, and we have to dereference them before
|
||||
the fields and methods they define can be used. In contrast, _primitive types are just values_ -
|
||||
there's nothing to be dereferenced. In memory, they're just a sequence of bits.
|
||||
|
||||
If we really want, we can turn the `int` into an
|
||||
[`Integer`](https://docs.oracle.com/javase/10/docs/api/java/lang/Integer.html) and then dereference
|
||||
it, but it's a bit wasteful:
|
||||
|
||||
```java
|
||||
class Main {
|
||||
public static void main(String[] args) {
|
||||
int x = 8;
|
||||
Integer y = Integer.valueOf(x);
|
||||
System.out.println(y.toString());
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
This creates the variable `y` of type `Integer` (which inherits `Object`), and at run time we
|
||||
dereference `y` to locate the `toString()` function and call it. Rust obviously handles things a bit
|
||||
differently, but we have to dig into the low-level details to see it in action.
|
||||
|
||||
# Low Level Handling of Primitives (C)
|
||||
|
||||
We first need to build a foundation for reading and understanding the assembly code the final answer
|
||||
requires. Let's begin with showing how the `C` language (and your computer) thinks about "primitive"
|
||||
values in memory:
|
||||
|
||||
```c
|
||||
void my_function(int num) {}
|
||||
|
||||
int main() {
|
||||
int x = 8;
|
||||
my_function(x);
|
||||
}
|
||||
```
|
||||
|
||||
The [compiler explorer](https://godbolt.org/z/lgNYcc) gives us an easy way of showing off the
|
||||
assembly-level code that's generated: <span style="font-size:.6em">whose output has been lightly
|
||||
edited</span>
|
||||
|
||||
```nasm
|
||||
main:
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
sub rsp, 16
|
||||
|
||||
; We assign the value `8` to `x` here
|
||||
mov DWORD PTR [rbp-4], 8
|
||||
|
||||
; And copy the bits making up `x` to a location
|
||||
; `my_function` can access (`edi`)
|
||||
mov eax, DWORD PTR [rbp-4]
|
||||
mov edi, eax
|
||||
|
||||
; Call `my_function` and give it control
|
||||
call my_function
|
||||
|
||||
mov eax, 0
|
||||
leave
|
||||
ret
|
||||
|
||||
my_function:
|
||||
push rbp
|
||||
mov rbp, rsp
|
||||
|
||||
; Copy the bits out of the pre-determined location (`edi`)
|
||||
; to somewhere we can use
|
||||
mov DWORD PTR [rbp-4], edi
|
||||
nop
|
||||
|
||||
pop rbp
|
||||
ret
|
||||
```
|
||||
|
||||
At a really low level of memory, we're copying bits around using the [`mov`][x86_guide] instruction;
|
||||
nothing crazy. But to show how similar Rust is, let's take a look at our program translated from C
|
||||
to Rust:
|
||||
|
||||
```rust
|
||||
fn my_function(x: i32) {}
|
||||
|
||||
fn main() {
|
||||
let x = 8;
|
||||
my_function(x)
|
||||
}
|
||||
```
|
||||
|
||||
And the assembly generated when we stick it in the
|
||||
[compiler explorer](https://godbolt.org/z/cAlmk0): <span style="font-size:.6em">again, lightly
|
||||
edited</span>
|
||||
|
||||
```nasm
|
||||
example::main:
|
||||
push rax
|
||||
|
||||
; Look familiar? We're copying bits to a location for `my_function`
|
||||
; The compiler just optimizes out holding `x` in memory
|
||||
mov edi, 8
|
||||
|
||||
; Call `my_function` and give it control
|
||||
call example::my_function
|
||||
|
||||
pop rax
|
||||
ret
|
||||
|
||||
example::my_function:
|
||||
sub rsp, 4
|
||||
|
||||
; And copying those bits again, just like in C
|
||||
mov dword ptr [rsp], edi
|
||||
|
||||
add rsp, 4
|
||||
ret
|
||||
```
|
||||
|
||||
The generated Rust assembly is functionally pretty close to the C assembly: _When working with
|
||||
primitives, we're just dealing with bits in memory_.
|
||||
|
||||
In Java we have to dereference a pointer to call its functions; in Rust, there's no pointer to
|
||||
dereference. So what exactly is going on with this `.to_string()` function call?
|
||||
|
||||
# impl primitive (and Python)
|
||||
|
||||
Now it's time to <strike>reveal my trap card</strike> show the revelation that tied all this
|
||||
together: _Rust has implementations for its primitive types._ That's right, `impl` blocks aren't
|
||||
only for `structs` and `traits`, primitives get them too. Don't believe me? Check out
|
||||
[u32](https://doc.rust-lang.org/std/primitive.u32.html),
|
||||
[f64](https://doc.rust-lang.org/std/primitive.f64.html) and
|
||||
[char](https://doc.rust-lang.org/std/primitive.char.html) as examples.
|
||||
|
||||
But the really interesting bit is how Rust turns those `impl` blocks into assembly. Let's break out
|
||||
the [compiler explorer](https://godbolt.org/z/6LBEwq) once again:
|
||||
|
||||
```rust
|
||||
pub fn main() {
|
||||
8.to_string()
|
||||
}
|
||||
```
|
||||
|
||||
And the interesting bits in the assembly: <span style="font-size:.6em">heavily trimmed down</span>
|
||||
|
||||
```nasm
|
||||
example::main:
|
||||
sub rsp, 24
|
||||
mov rdi, rsp
|
||||
lea rax, [rip + .Lbyte_str.u]
|
||||
mov rsi, rax
|
||||
|
||||
; Cool stuff right here
|
||||
call <T as alloc::string::ToString>::to_string@PLT
|
||||
|
||||
mov rdi, rsp
|
||||
call core::ptr::drop_in_place
|
||||
add rsp, 24
|
||||
ret
|
||||
```
|
||||
|
||||
Now, this assembly is a bit more complicated, but here's the big revelation: **we're calling
|
||||
`to_string()` as a function that exists all on its own, and giving it the instance of `8`**. Instead
|
||||
of thinking of the value 8 as an instance of `u32` and then peeking in to find the location of the
|
||||
function we want to call (like Java), we have a function that exists outside of the instance and
|
||||
just give that function the value `8`.
|
||||
|
||||
This is an incredibly technical detail, but the interesting idea I had was this: _if `to_string()`
|
||||
is a static function, can I refer to the unbound function and give it an instance?_
|
||||
|
||||
Better explained in code (and a [compiler explorer](https://godbolt.org/z/fJY-gA) link because I
|
||||
seriously love this thing):
|
||||
|
||||
```rust
|
||||
struct MyVal {
|
||||
x: u32
|
||||
}
|
||||
|
||||
impl MyVal {
|
||||
fn to_string(&self) -> String {
|
||||
self.x.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn main() {
|
||||
let my_val = MyVal { x: 8 };
|
||||
|
||||
// THESE ARE THE SAME
|
||||
my_val.to_string();
|
||||
MyVal::to_string(&my_val);
|
||||
}
|
||||
```
|
||||
|
||||
Rust is totally fine "binding" the function call to the instance, and also as a static.
|
||||
|
||||
MIND == BLOWN.
|
||||
|
||||
Python does the same thing where I can both call functions bound to their instances and also call as
|
||||
an unbound function where I give it the instance:
|
||||
|
||||
```python
|
||||
class MyClass():
|
||||
x = 24
|
||||
|
||||
def my_function(self):
|
||||
print(self.x)
|
||||
|
||||
m = MyClass()
|
||||
|
||||
m.my_function()
|
||||
MyClass.my_function(m)
|
||||
```
|
||||
|
||||
And Python tries to make you _think_ that primitives can have instance methods...
|
||||
|
||||
```python
|
||||
>>> dir(8)
|
||||
['__abs__', '__add__', '__and__', '__class__', '__cmp__', '__coerce__',
|
||||
'__delattr__', '__div__', '__divmod__', '__doc__', '__float__', '__floordiv__',
|
||||
...
|
||||
'__setattr__', '__sizeof__', '__str__', '__sub__', '__subclasshook__', '__truediv__',
|
||||
...]
|
||||
|
||||
>>> # Theoretically `8.__str__()` should exist, but:
|
||||
|
||||
>>> 8.__str__()
|
||||
File "<stdin>", line 1
|
||||
8.__str__()
|
||||
^
|
||||
SyntaxError: invalid syntax
|
||||
|
||||
>>> # It will run if we assign it first though:
|
||||
>>> x = 8
|
||||
>>> x.__str__()
|
||||
'8'
|
||||
```
|
||||
|
||||
...but in practice it's a bit complicated.
|
||||
|
||||
So while Python handles binding instance methods in a way similar to Rust, it's still not able to
|
||||
run the example we started with.
|
||||
|
||||
# Conclusion
|
||||
|
||||
This was a super-roundabout way of demonstrating it, but the way Rust handles incredibly minor
|
||||
details like primitives leads to really cool effects. Primitives are optimized like C in how they
|
||||
have a space-efficient memory layout, yet the language still has a lot of features I enjoy in Python
|
||||
(like both instance and late binding).
|
||||
|
||||
And when you put it together, there are areas where Rust does cool things nobody else can; as a
|
||||
quirky feature of Rust's type system, `8.to_string()` is actually valid code.
|
||||
|
||||
Now go forth and fool your friends into thinking you know assembly. This is all I've got.
|
||||
|
||||
[x86_guide]: http://www.cs.virginia.edu/~evans/cs216/guides/x86.html
|
||||
[java_primitive]: https://docs.oracle.com/javase/tutorial/java/nutsandbolts/datatypes.html
|
||||
[rust_scalar]: https://doc.rust-lang.org/book/second-edition/ch03-02-data-types.html#scalar-types
|
||||
[rust_primitive]: https://doc.rust-lang.org/book/first-edition/primitive-types.html
|
@ -1,294 +0,0 @@
|
||||
---
|
||||
layout: post
|
||||
title: "Isomorphic Desktop Apps with Rust"
|
||||
description: "Electron + WASM = ☣"
|
||||
category:
|
||||
tags: [rust, javascript, webassembly]
|
||||
---
|
||||
|
||||
Forgive me, but this is going to be a bit of a schizophrenic post. I both despise Javascript and the
|
||||
modern ECMAScript ecosystem, and I'm stunned by its success doing some really cool things. It's
|
||||
[this duality](https://www.destroyallsoftware.com/talks/the-birth-and-death-of-javascript) that's
|
||||
led me to a couple of (very) late nights over the past weeks trying to reconcile myself as I
|
||||
bootstrap a simple desktop application.
|
||||
|
||||
See, as much as
|
||||
[Webassembly isn't trying to replace Javascript](https://webassembly.org/docs/faq/#is-webassembly-trying-to-replace-javascript),
|
||||
**I want Javascript gone**. There are plenty of people who don't share my views, and they are
|
||||
probably nicer and more fun at parties. But I cringe every time "Webpack" is mentioned, and I think
|
||||
it's hilarious that the
|
||||
[language specification](https://ecma-international.org/publications/standards/Ecma-402.htm)
|
||||
dramatically outpaces anyone's
|
||||
[actual implementation](https://kangax.github.io/compat-table/es2016plus/). The answer to this
|
||||
conundrum is of course to recompile code from newer versions of the language to older versions _of
|
||||
the same language_ before running. At least [Babel] is a nice tongue-in-cheek reference.
|
||||
|
||||
Yet for as much hate as [Electron] receives, it does a stunningly good job at solving a really hard
|
||||
problem: _how the hell do I put a button on the screen and react when the user clicks it_? GUI
|
||||
programming is hard, straight up. But if browsers are already able to run everywhere, why don't we
|
||||
take advantage of someone else solving the hard problems for us? I don't like that I have to use
|
||||
Javascript for it, but I really don't feel inclined to whip out good ol' [wxWidgets].
|
||||
|
||||
Now there are other native solutions ([libui-rs], [conrod], [oh hey wxWdidgets again!][wxrust]), but
|
||||
those also have their own issues with distribution, styling, etc. With Electron, I can
|
||||
`yarn create electron-app my-app` and just get going, knowing that packaging/upgrades/etc. are built
|
||||
in.
|
||||
|
||||
My question is: given recent innovations with WASM, _are we Electron yet_?
|
||||
|
||||
No, not really.
|
||||
|
||||
Instead, **what would it take to get to a point where we can skip Javascript in Electron apps?**
|
||||
|
||||
# Setting the Stage
|
||||
|
||||
Truth is, WASM/Webassembly is a pretty new technology and I'm a total beginner in this area. There
|
||||
may already be solutions to the issues I discuss, but I'm totally unaware of them, so I'm going to
|
||||
try and organize what I did manage to discover.
|
||||
|
||||
I should also mention that the content and things I'm talking about here are not intended to be
|
||||
prescriptive, but more "if someone else is interested, what do we already know doesn't work?" _I
|
||||
expect everything in this post to be obsolete within two months._ Even over the course of writing
|
||||
this, [a separate blog post](https://mnt.io/2018/08/28/from-rust-to-beyond-the-asm-js-galaxy/) had
|
||||
to be modified because [upstream changes](https://github.com/WebAssembly/binaryen/pull/1642) broke a
|
||||
[Rust tool](https://github.com/rustwasm/wasm-bindgen/pull/787) the post tried to use. The post
|
||||
ultimately
|
||||
[got updated](https://mnt.io/2018/08/28/from-rust-to-beyond-the-asm-js-galaxy/#comment-477), **but
|
||||
all this happened within the span of a week.** Things are moving quickly.
|
||||
|
||||
I'll also note that we're going to skip [asm.js] and [emscripten]. Truth be told, I couldn't get
|
||||
either of these to output anything, and so I'm just going to say
|
||||
[here be dragons.](https://en.wikipedia.org/wiki/Here_be_dragons) Everything I'm discussing here
|
||||
uses the `wasm32-unknown-unknown` target.
|
||||
|
||||
The code that I _did_ get running is available
|
||||
[over here](https://github.com/speice-io/isomorphic-rust). Feel free to use it as a starting point,
|
||||
but I'm mostly including the link as a reference for the things that were attempted.
|
||||
|
||||
# An Example Running Application
|
||||
|
||||
So, I did _technically_ get a running application:
|
||||
|
||||
![Electron app using WASM](/assets/images/2018-09-15-electron-percy-wasm.png)
|
||||
|
||||
...which you can also try out if you want:
|
||||
|
||||
```sh
|
||||
git clone https://github.com/speice-io/isomorphic-rust.git
|
||||
cd isomorphic_rust/percy
|
||||
yarn install && yarn start
|
||||
```
|
||||
|
||||
...but I wouldn't really call it a "high quality" starting point to base future work on. It's mostly
|
||||
there to prove this is possible in the first place. And that's something to be proud of! There's a
|
||||
huge amount of engineering that went into showing a window with the text "It's alive!".
|
||||
|
||||
There's also a lot of usability issues that prevent me from recommending anyone try Electron and
|
||||
WASM apps at the moment, and I think that's the more important thing to discuss.
|
||||
|
||||
# Issue the First: Complicated Toolchains
|
||||
|
||||
I quickly established that [wasm-bindgen] was necessary to "link" my Rust code to Javascript. At
|
||||
that point you've got an Electron app that starts an HTML page which ultimately fetches your WASM
|
||||
blob. To keep things simple, the goal was to package everything using [webpack] so that I could just
|
||||
load a `bundle.js` file on the page. That decision was to be the last thing that kinda worked in
|
||||
this process.
|
||||
|
||||
The first issue
|
||||
[I ran into](https://www.reddit.com/r/rust/comments/98lpun/unable_to_load_wasm_for_electron_application/)
|
||||
while attempting to bundle everything via `webpack` is a detail in the WASM spec:
|
||||
|
||||
> This function accepts a Response object, or a promise for one, and ... **[if > it] does not match
|
||||
> the `application/wasm` MIME type**, the returned promise will be rejected with a TypeError;
|
||||
>
|
||||
> [WebAssembly - Additional Web Embedding API](https://webassembly.org/docs/web/#additional-web-embedding-api)
|
||||
|
||||
Specifically, if you try and load a WASM blob without the MIME type set, you'll get an error. On the
|
||||
web this isn't a huge issue, as the server can set MIME types when delivering the blob. With
|
||||
Electron, you're resolving things with a `file://` URL and thus can't control the MIME type:
|
||||
|
||||
![TypeError: Incorrect response MIME type. Expected 'application/wasm'.](/assets/images/2018-09-15-incorrect-MIME-type.png)
|
||||
|
||||
There are a couple of solutions depending on how far into the deep end you care to venture:
|
||||
|
||||
- Embed a static file server in your Electron application
|
||||
- Use a [custom protocol](https://electronjs.org/docs/api/protocol) and custom protocol handler
|
||||
- Host your WASM blob on a website that you resolve at runtime
|
||||
|
||||
But all these are pretty bad solutions and defeat the purpose of using WASM in the first place.
|
||||
Instead, my workaround was to
|
||||
[open a PR with `webpack`](https://github.com/webpack/webpack/issues/7918) and use regex to remove
|
||||
calls to `instantiateStreaming` in the
|
||||
[build script](https://github.com/speice-io/isomorphic-rust/blob/master/percy/build.sh#L21-L25):
|
||||
|
||||
```sh
|
||||
cargo +nightly build --target=wasm32-unknown-unknown && \
|
||||
wasm-bindgen "$WASM_DIR/debug/$WASM_NAME.wasm" --out-dir "$APP_DIR" --no-typescript && \
|
||||
# Have to use --mode=development so we can patch out the call to instantiateStreaming
|
||||
"$DIR/node_modules/webpack-cli/bin/cli.js" --mode=development "$APP_DIR/app_loader.js" -o "$APP_DIR/bundle.js" && \
|
||||
sed -i 's/.*instantiateStreaming.*//g' "$APP_DIR/bundle.js"
|
||||
```
|
||||
|
||||
Once that lands, the
|
||||
[build process](https://github.com/speice-io/isomorphic-rust/blob/master/percy_patched_webpack/build.sh#L24-L27)
|
||||
becomes much simpler:
|
||||
|
||||
```sh
|
||||
|
||||
cargo +nightly build --target=wasm32-unknown-unknown && \
|
||||
wasm-bindgen "$WASM_DIR/debug/$WASM_NAME.wasm" --out-dir "$APP_DIR" --no-typescript && \
|
||||
"$DIR/node_modules/webpack-cli/bin/cli.js" --mode=production "$APP_DIR/app_loader.js" -o "$APP_DIR/bundle.js"
|
||||
```
|
||||
|
||||
But we're not done yet! After we compile Rust into WASM and link WASM to Javascript (via
|
||||
`wasm-bindgen` and `webpack`), we still have to make an Electron app. For this purpose I used a
|
||||
starter app from [Electron Forge], and then a
|
||||
[`prestart` script](https://github.com/speice-io/isomorphic-rust/blob/master/percy/package.json#L8)
|
||||
to actually handle starting the application.
|
||||
|
||||
The
|
||||
[final toolchain](https://github.com/speice-io/isomorphic-rust/blob/master/percy/package.json#L8)
|
||||
looks something like this:
|
||||
|
||||
- `yarn start` triggers the `prestart` script
|
||||
- `prestart` checks for missing tools (`wasm-bindgen-cli`, etc.) and then:
|
||||
- Uses `cargo` to compile the Rust code into WASM
|
||||
- Uses `wasm-bindgen` to link the WASM blob into a Javascript file with exported symbols
|
||||
- Uses `webpack` to bundle the page start script with the Javascript we just generated
|
||||
- Uses `babel` under the hood to compile the `wasm-bindgen` code down from ES6 into something
|
||||
browser-compatible
|
||||
- The `start` script runs an Electron Forge handler to do some sanity checks
|
||||
- Electron actually starts
|
||||
|
||||
...which is complicated. I think more work needs to be done to either build a high-quality starter
|
||||
app that can manage these steps, or another tool that "just handles" the complexity of linking a
|
||||
compiled WASM file into something the Electron browser can run.
|
||||
|
||||
# Issue the Second: WASM tools in Rust
|
||||
|
||||
For as much as I didn't enjoy the Javascript tooling needed to interface with Rust, the Rust-only
|
||||
bits aren't any better at the moment. I get it, a lot of projects are just starting off, and that
|
||||
leads to a fragmented ecosystem. Here's what I can recommend as a starting point:
|
||||
|
||||
Don't check in your `Cargo.lock` files to version control. If there's a disagreement between the
|
||||
version of `wasm-bindgen-cli` you have installed and the `wasm-bindgen` you're compiling with in
|
||||
`Cargo.lock`, you get a nasty error:
|
||||
|
||||
```
|
||||
it looks like the Rust project used to create this wasm file was linked against
|
||||
a different version of wasm-bindgen than this binary:
|
||||
|
||||
rust wasm file: 0.2.21
|
||||
this binary: 0.2.17
|
||||
|
||||
Currently the bindgen format is unstable enough that these two version must
|
||||
exactly match, so it's required that these two version are kept in sync by
|
||||
either updating the wasm-bindgen dependency or this binary.
|
||||
```
|
||||
|
||||
Not that I ever managed to run into this myself (_coughs nervously_).
|
||||
|
||||
There are two projects attempting to be "application frameworks": [percy] and [yew]. Between those,
|
||||
I managed to get [two](https://github.com/speice-io/isomorphic-rust/tree/master/percy)
|
||||
[examples](https://github.com/speice-io/isomorphic-rust/tree/master/percy_patched_webpack) running
|
||||
using `percy`, but was unable to get an
|
||||
[example](https://github.com/speice-io/isomorphic-rust/tree/master/yew) running with `yew` because
|
||||
of issues with "missing modules" during the `webpack` step:
|
||||
|
||||
```sh
|
||||
ERROR in ./dist/electron_yew_wasm_bg.wasm
|
||||
Module not found: Error: Can't resolve 'env' in '/home/bspeice/Development/isomorphic_rust/yew/dist'
|
||||
@ ./dist/electron_yew_wasm_bg.wasm
|
||||
@ ./dist/electron_yew_wasm.js
|
||||
@ ./dist/app.js
|
||||
@ ./dist/app_loader.js
|
||||
```
|
||||
|
||||
If you want to work with the browser APIs directly, your choices are [percy-webapis] or [stdweb] (or
|
||||
eventually [web-sys]). See above for my `percy` examples, but when I tried
|
||||
[an example with `stdweb`](https://github.com/speice-io/isomorphic-rust/tree/master/stdweb), I was
|
||||
unable to get it running:
|
||||
|
||||
```sh
|
||||
ERROR in ./dist/stdweb_electron_bg.wasm
|
||||
Module not found: Error: Can't resolve 'env' in '/home/bspeice/Development/isomorphic_rust/stdweb/dist'
|
||||
@ ./dist/stdweb_electron_bg.wasm
|
||||
@ ./dist/stdweb_electron.js
|
||||
@ ./dist/app_loader.js
|
||||
```
|
||||
|
||||
At this point I'm pretty convinced that `stdweb` is causing issues for `yew` as well, but can't
|
||||
prove it.
|
||||
|
||||
I did also get a [minimal example](https://github.com/speice-io/isomorphic-rust/tree/master/minimal)
|
||||
running that doesn't depend on any tools besides `wasm-bindgen`. However, it requires manually
|
||||
writing "`extern C`" blocks for everything you need from the browser. Es no bueno.
|
||||
|
||||
Finally, from a tools and platform view, there are two up-and-coming packages that should be
|
||||
mentioned: [js-sys] and [web-sys]. Their purpose is to be fundamental building blocks that exposes
|
||||
the browser's APIs to Rust. If you're interested in building an app framework from scratch, these
|
||||
should give you the most flexibility. I didn't touch either in my research, though I expect them to
|
||||
be essential long-term.
|
||||
|
||||
So there's a lot in play from the Rust side of things, and it's just going to take some time to
|
||||
figure out what works and what doesn't.
|
||||
|
||||
# Issue the Third: Known Unknowns
|
||||
|
||||
Alright, so after I managed to get an application started, I stopped there. It was a good deal of
|
||||
effort to chain together even a proof of concept, and at this point I'd rather learn [Typescript]
|
||||
than keep trying to maintain an incredibly brittle pipeline. Blasphemy, I know...
|
||||
|
||||
The important point I want to make is that there's a lot unknown about how any of this holds up
|
||||
outside proofs of concept. Things I didn't attempt:
|
||||
|
||||
- Testing
|
||||
- Packaging
|
||||
- Updates
|
||||
- Literally anything related to why I wanted to use Electron in the first place
|
||||
|
||||
# What it Would Take
|
||||
|
||||
Much as I don't like Javascript, the tools are too shaky for me to recommend mixing Electron and
|
||||
WASM at the moment. There's a lot of innovation happening, so who knows? Someone might have an
|
||||
application in production a couple months from now. But at the moment, I'm personally going to stay
|
||||
away.
|
||||
|
||||
Let's finish with a wishlist then - here are the things that I think need to happen before
|
||||
Electron/WASM/Rust can become a thing:
|
||||
|
||||
- Webpack still needs some updates. The necessary work is in progress, but hasn't landed yet
|
||||
([#7983](https://github.com/webpack/webpack/pull/7983))
|
||||
- Browser API libraries (`web-sys` and `stdweb`) need to make sure they can support running in
|
||||
Electron (see module error above)
|
||||
- Projects need to stabilize. There's talk of `stdweb` being turned into a Rust API
|
||||
[on top of web-sys](https://github.com/rustwasm/team/issues/226#issuecomment-418475778), and percy
|
||||
[moving to web-sys](https://github.com/chinedufn/percy/issues/24), both of which are big changes
|
||||
- `wasm-bindgen` is great, but still in the "move fast and break things" phase
|
||||
- A good "boilerplate" app would dramatically simplify the start-up costs;
|
||||
[electron-react-boilerplate](https://github.com/chentsulin/electron-react-boilerplate) comes to
|
||||
mind as a good project to imitate
|
||||
- More blog posts/contributors! I think Electron + Rust could be cool, but I have no idea what I'm
|
||||
doing
|
||||
|
||||
[wxwidgets]: https://wxwidgets.org/
|
||||
[libui-rs]: https://github.com/LeoTindall/libui-rs/
|
||||
[electron]: https://electronjs.org/
|
||||
[babel]: https://babeljs.io/
|
||||
[wxrust]: https://github.com/kenz-gelsoft/wxRust
|
||||
[wasm-bindgen]: https://github.com/rustwasm/wasm-bindgen
|
||||
[js-sys]: https://crates.io/crates/js-sys
|
||||
[percy-webapis]: https://crates.io/crates/percy-webapis
|
||||
[stdweb]: https://crates.io/crates/stdweb
|
||||
[web-sys]: https://crates.io/crates/web-sys
|
||||
[percy]: https://chinedufn.github.io/percy/
|
||||
[virtual-dom-rs]: https://crates.io/crates/virtual-dom-rs
|
||||
[yew]: https://github.com/DenisKolodin/yew
|
||||
[react]: https://reactjs.org/
|
||||
[elm]: http://elm-lang.org/
|
||||
[asm.js]: http://asmjs.org/
|
||||
[emscripten]: https://kripken.github.io/emscripten-site/
|
||||
[typescript]: https://www.typescriptlang.org/
|
||||
[electron forge]: https://electronforge.io/
|
||||
[conrod]: https://github.com/PistonDevelopers/conrod
|
||||
[webpack]: https://webpack.js.org/
|
@ -1,168 +0,0 @@
|
||||
---
|
||||
layout: post
|
||||
title: "A Case Study in Heaptrack"
|
||||
description: "...because you don't need no garbage collection"
|
||||
category:
|
||||
tags: []
|
||||
---
|
||||
|
||||
One of my earliest conversations about programming went like this:
|
||||
|
||||
> Programmers have it too easy these days. They should learn to develop in low memory environments
|
||||
> and be more efficient.
|
||||
>
|
||||
> -- My Father (paraphrased)
|
||||
|
||||
...though it's not like the first code I wrote was for a
|
||||
[graphing calculator](https://education.ti.com/en/products/calculators/graphing-calculators/ti-84-plus-se)
|
||||
packing a whole 24KB of RAM. By the way, _what are you doing on my lawn?_
|
||||
|
||||
The principle remains though: be efficient with the resources you have, because
|
||||
[what Intel giveth, Microsoft taketh away](http://exo-blog.blogspot.com/2007/09/what-intel-giveth-microsoft-taketh-away.html).
|
||||
My professional work is focused on this kind of efficiency; low-latency financial markets demand
|
||||
that you understand at a deep level _exactly_ what your code is doing. As I continue experimenting
|
||||
with Rust for personal projects, it's exciting to bring a utilitarian mindset with me: there's
|
||||
flexibility for the times I pretend to have a garbage collector, and flexibility for the times that
|
||||
I really care about how memory is used.
|
||||
|
||||
This post is a (small) case study in how I went from the former to the latter. And ultimately, it's
|
||||
intended to be a starting toolkit to empower analysis of your own code.
|
||||
|
||||
# Curiosity
|
||||
|
||||
When I first started building the [dtparse] crate, my intention was to mirror as closely as possible
|
||||
the equivalent [Python library][dateutil]. Python, as you may know, is garbage collected. Very
|
||||
rarely is memory usage considered in Python, and I likewise wasn't paying too much attention when
|
||||
`dtparse` was first being built.
|
||||
|
||||
This lackadaisical approach to memory works well enough, and I'm not planning on making `dtparse`
|
||||
hyper-efficient. But every so often, I've wondered: "what exactly is going on in memory?" With the
|
||||
advent of Rust 1.28 and the
|
||||
[Global Allocator trait](https://doc.rust-lang.org/std/alloc/trait.GlobalAlloc.html), I had a really
|
||||
great idea: _build a custom allocator that allows you to track your own allocations._ That way, you
|
||||
can do things like writing tests for both correct results and correct memory usage. I gave it a
|
||||
[shot][qadapt], but learned very quickly: **never write your own allocator**. It went from "fun
|
||||
weekend project" to "I have literally no idea what my computer is doing" at breakneck speed.
|
||||
|
||||
Instead, I'll highlight a separate path I took to make sense of my memory usage: [heaptrack].
|
||||
|
||||
# Turning on the System Allocator
|
||||
|
||||
This is the hardest part of the post. Because Rust uses
|
||||
[its own allocator](https://github.com/rust-lang/rust/pull/27400#issue-41256384) by default,
|
||||
`heaptrack` is unable to properly record unmodified Rust code. To remedy this, we'll make use of the
|
||||
`#[global_allocator]` attribute.
|
||||
|
||||
Specifically, in `lib.rs` or `main.rs`, add this:
|
||||
|
||||
```rust
|
||||
use std::alloc::System;
|
||||
|
||||
#[global_allocator]
|
||||
static GLOBAL: System = System;
|
||||
```
|
||||
|
||||
...and that's it. Everything else comes essentially for free.
|
||||
|
||||
# Running heaptrack
|
||||
|
||||
Assuming you've installed heaptrack <span style="font-size: .6em;">(Homebrew in Mac, package manager
|
||||
in Linux, ??? in Windows)</span>, all that's left is to fire up your application:
|
||||
|
||||
```
|
||||
heaptrack my_application
|
||||
```
|
||||
|
||||
It's that easy. After the program finishes, you'll see a file in your local directory with a name
|
||||
like `heaptrack.my_appplication.XXXX.gz`. If you load that up in `heaptrack_gui`, you'll see
|
||||
something like this:
|
||||
|
||||
![heaptrack](/assets/images/2018-10-heaptrack/heaptrack-before.png)
|
||||
|
||||
---
|
||||
|
||||
And even these pretty colors:
|
||||
|
||||
![pretty colors](/assets/images/2018-10-heaptrack/heaptrack-flamegraph.png)
|
||||
|
||||
# Reading Flamegraphs
|
||||
|
||||
To make sense of our memory usage, we're going to focus on that last picture - it's called a
|
||||
["flamegraph"](http://www.brendangregg.com/flamegraphs.html). These charts are typically used to
|
||||
show how much time your program spends executing each function, but they're used here to show how
|
||||
much memory was allocated during those functions instead.
|
||||
|
||||
For example, we can see that all executions happened during the `main` function:
|
||||
|
||||
![allocations in main](/assets/images/2018-10-heaptrack/heaptrack-main-colorized.png)
|
||||
|
||||
...and within that, all allocations happened during `dtparse::parse`:
|
||||
|
||||
![allocations in dtparse](/assets/images/2018-10-heaptrack/heaptrack-dtparse-colorized.png)
|
||||
|
||||
...and within _that_, allocations happened in two different places:
|
||||
|
||||
![allocations in parseinfo](/assets/images/2018-10-heaptrack/heaptrack-parseinfo-colorized.png)
|
||||
|
||||
Now I apologize that it's hard to see, but there's one area specifically that stuck out as an issue:
|
||||
**what the heck is the `Default` thing doing?**
|
||||
|
||||
![pretty colors](/assets/images/2018-10-heaptrack/heaptrack-flamegraph-default.png)
|
||||
|
||||
# Optimizing dtparse
|
||||
|
||||
See, I knew that there were some allocations during calls to `dtparse::parse`, but I was totally
|
||||
wrong about where the bulk of allocations occurred in my program. Let me post the code and see if
|
||||
you can spot the mistake:
|
||||
|
||||
```rust
|
||||
/// Main entry point for using `dtparse`.
|
||||
pub fn parse(timestr: &str) -> ParseResult<(NaiveDateTime, Option<FixedOffset>)> {
|
||||
let res = Parser::default().parse(
|
||||
timestr, None, None, false, false,
|
||||
None, false,
|
||||
&HashMap::new(),
|
||||
)?;
|
||||
|
||||
Ok((res.0, res.1))
|
||||
}
|
||||
```
|
||||
|
||||
> [dtparse](https://github.com/bspeice/dtparse/blob/4d7c5dd99572823fa4a390b483c38ab020a2172f/src/lib.rs#L1286)
|
||||
|
||||
---
|
||||
|
||||
Because `Parser::parse` requires a mutable reference to itself, I have to create a new
|
||||
`Parser::default` every time it receives a string. This is excessive! We'd rather have an immutable
|
||||
parser that can be re-used, and avoid allocating memory in the first place.
|
||||
|
||||
Armed with that information, I put some time in to
|
||||
[make the parser immutable](https://github.com/bspeice/dtparse/commit/741afa34517d6bc1155713bbc5d66905fea13fad#diff-b4aea3e418ccdb71239b96952d9cddb6).
|
||||
Now that I can re-use the same parser over and over, the allocations disappear:
|
||||
|
||||
![allocations cleaned up](/assets/images/2018-10-heaptrack/heaptrack-flamegraph-after.png)
|
||||
|
||||
In total, we went from requiring 2 MB of memory in
|
||||
[version 1.0.2](https://crates.io/crates/dtparse/1.0.2):
|
||||
|
||||
![memory before](/assets/images/2018-10-heaptrack/heaptrack-closeup.png)
|
||||
|
||||
All the way down to 300KB in [version 1.0.3](https://crates.io/crates/dtparse/1.0.3):
|
||||
|
||||
![memory after](/assets/images/2018-10-heaptrack/heaptrack-closeup-after.png)
|
||||
|
||||
# Conclusion
|
||||
|
||||
In the end, you don't need to write a custom allocator to be efficient with memory, great tools
|
||||
already exist to help you understand what your program is doing.
|
||||
|
||||
**Use them.**
|
||||
|
||||
Given that [Moore's Law](https://en.wikipedia.org/wiki/Moore%27s_law) is
|
||||
[dead](https://www.technologyreview.com/s/601441/moores-law-is-dead-now-what/), we've all got to do
|
||||
our part to take back what Microsoft stole.
|
||||
|
||||
[dtparse]: https://crates.io/crates/dtparse
|
||||
[dateutil]: https://github.com/dateutil/dateutil
|
||||
[heaptrack]: https://github.com/KDE/heaptrack
|
||||
[qadapt]: https://crates.io/crates/qadapt
|
@ -1,34 +0,0 @@
|
||||
---
|
||||
layout: post
|
||||
title: 'More "What Companies Really Mean"'
|
||||
description: 'when they ask "Why should we hire you?"'
|
||||
category:
|
||||
tags: []
|
||||
---
|
||||
|
||||
I recently stumbled across a phenomenal small article entitled
|
||||
[What Startups Really Mean By "Why Should We Hire You?"](https://angel.co/blog/what-startups-really-mean-by-why-should-we-hire-you).
|
||||
Having been interviewed by smaller companies (though not exactly startups), the questions and
|
||||
subtexts are the same. There's often a question behind the question that you're actually trying to
|
||||
answer, and I wish I spotted the nuance earlier in my career.
|
||||
|
||||
Let me also make note of one more question/euphemism I've come across:
|
||||
|
||||
# How do you feel about Production Support?
|
||||
|
||||
**Translation**: _We're a fairly small team, and when things break on an evening/weekend/Christmas
|
||||
Day, can we call on you to be there?_
|
||||
|
||||
I've met decidedly few people in my life who truly enjoy the "ops" side of "devops". They're
|
||||
incredibly good at taking an impossible problem, pre-existing knowledge of arcane arts, and turning
|
||||
that into a functioning system at the end. And if they all left for lunch, we probably wouldn't make
|
||||
it out the door before the zombie apocalypse.
|
||||
|
||||
Larger organizations (in my experience, 500+ person organizations) have the luxury of hiring people
|
||||
who either enjoy that, or play along nicely enough that our systems keep working.
|
||||
|
||||
Small teams have no such luck. If you're interviewing at a small company, especially as a "data
|
||||
scientist" or other somesuch position, be aware that systems can and do spontaneously combust at the
|
||||
most inopportune moments.
|
||||
|
||||
**Terrible-but-popular answers include**: _It's a part of the job, and I'm happy to contribute._
|
@ -1,218 +0,0 @@
|
||||
---
|
||||
layout: post
|
||||
title: "QADAPT - debug_assert! for your memory usage"
|
||||
description: "...and why you want an allocator that goes 💥."
|
||||
category:
|
||||
tags: []
|
||||
---
|
||||
|
||||
I think it's part of the human condition to ignore perfectly good advice when it comes our way. A
|
||||
bit over a month ago, I was dispensing sage wisdom for the ages:
|
||||
|
||||
> I had a really great idea: build a custom allocator that allows you to track your own allocations.
|
||||
> I gave it a shot, but learned very quickly: **never write your own allocator.**
|
||||
>
|
||||
> -- [me](/2018/10/case-study-optimization.html)
|
||||
|
||||
I proceeded to ignore it, because we never really learn from our mistakes.
|
||||
|
||||
There's another part of the human condition that derives joy from seeing things explode.
|
||||
|
||||
<iframe src="https://giphy.com/embed/YA6dmVW0gfIw8" width="480" height="336" frameBorder="0"></iframe>
|
||||
|
||||
And _that's_ the part I'm going to focus on.
|
||||
|
||||
# Why an Allocator?
|
||||
|
||||
So why, after complaining about allocators, would I still want to write one? There are three reasons
|
||||
for that:
|
||||
|
||||
1. Allocation/dropping is slow
|
||||
2. It's difficult to know exactly when Rust will allocate or drop, especially when using code that
|
||||
you did not write
|
||||
3. I want automated tools to verify behavior, instead of inspecting by hand
|
||||
|
||||
When I say "slow," it's important to define the terms. If you're writing web applications, you'll
|
||||
spend orders of magnitude more time waiting for the database than you will the allocator. However,
|
||||
there's still plenty of code where micro- or nano-seconds matter; think
|
||||
[finance](https://www.youtube.com/watch?v=NH1Tta7purM),
|
||||
[real-time audio](https://www.reddit.com/r/rust/comments/9hg7yj/synthesizer_progress_update/e6c291f),
|
||||
[self-driving cars](https://polysync.io/blog/session-types-for-hearty-codecs/), and
|
||||
[networking](https://carllerche.github.io/bytes/bytes/index.html). In these situations it's simply
|
||||
unacceptable for you to spend time doing things that are not your program, and waiting on the
|
||||
allocator is not cool.
|
||||
|
||||
As I continue to learn Rust, it's difficult for me to predict where exactly allocations will happen.
|
||||
So, I propose we play a quick trivia game: **Does this code invoke the allocator?**
|
||||
|
||||
## Example 1
|
||||
|
||||
```rust
|
||||
fn my_function() {
|
||||
let v: Vec<u8> = Vec::new();
|
||||
}
|
||||
```
|
||||
|
||||
**No**: Rust [knows how big](https://doc.rust-lang.org/std/mem/fn.size_of.html) the `Vec` type is,
|
||||
and reserves a fixed amount of memory on the stack for the `v` vector. However, if we wanted to
|
||||
reserve extra space (using `Vec::with_capacity`) the allocator would get invoked.
|
||||
|
||||
## Example 2
|
||||
|
||||
```rust
|
||||
fn my_function() {
|
||||
let v: Box<Vec<u8>> = Box::new(Vec::new());
|
||||
}
|
||||
```
|
||||
|
||||
**Yes**: Because Boxes allow us to work with things that are of unknown size, it has to allocate on
|
||||
the heap. While the `Box` is unnecessary in this snippet (release builds will optimize out the
|
||||
allocation), reserving heap space more generally is needed to pass a dynamically sized type to
|
||||
another function.
|
||||
|
||||
## Example 3
|
||||
|
||||
```rust
|
||||
fn my_function(v: Vec<u8>) {
|
||||
v.push(5);
|
||||
}
|
||||
```
|
||||
|
||||
**Maybe**: Depending on whether the Vector we were given has space available, we may or may not
|
||||
allocate. Especially when dealing with code that you did not author, it's difficult to verify that
|
||||
things behave as you expect them to.
|
||||
|
||||
# Blowing Things Up
|
||||
|
||||
So, how exactly does QADAPT solve these problems? **Whenever an allocation or drop occurs in code
|
||||
marked allocation-safe, QADAPT triggers a thread panic.** We don't want to let the program continue
|
||||
as if nothing strange happened, _we want things to explode_.
|
||||
|
||||
However, you don't want code to panic in production because of circumstances you didn't predict.
|
||||
Just like [`debug_assert!`](https://doc.rust-lang.org/std/macro.debug_assert.html), **QADAPT will
|
||||
strip out its own code when building in release mode to guarantee no panics and no performance
|
||||
impact.**
|
||||
|
||||
Finally, there are three ways to have QADAPT check that your code will not invoke the allocator:
|
||||
|
||||
## Using a procedural macro
|
||||
|
||||
The easiest method, watch an entire function for allocator invocation:
|
||||
|
||||
```rust
|
||||
use qadapt::no_alloc;
|
||||
use qadapt::QADAPT;
|
||||
|
||||
#[global_allocator]
|
||||
static Q: QADAPT = QADAPT;
|
||||
|
||||
#[no_alloc]
|
||||
fn push_vec(v: &mut Vec<u8>) {
|
||||
// This triggers a panic if v.len() == v.capacity()
|
||||
v.push(5);
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let v = Vec::with_capacity(1);
|
||||
|
||||
// This will *not* trigger a panic
|
||||
push_vec(&v);
|
||||
|
||||
// This *will* trigger a panic
|
||||
push_vec(&v);
|
||||
}
|
||||
```
|
||||
|
||||
## Using a regular macro
|
||||
|
||||
For times when you need more precision:
|
||||
|
||||
```rust
|
||||
use qadapt::assert_no_alloc;
|
||||
use qadapt::QADAPT;
|
||||
|
||||
#[global_allocator]
|
||||
static Q: QADAPT = QADAPT;
|
||||
|
||||
fn main() {
|
||||
let v = Vec::with_capacity(1);
|
||||
|
||||
// No allocations here, we already have space reserved
|
||||
assert_no_alloc!(v.push(5));
|
||||
|
||||
// Even though we remove an item, it doesn't trigger a drop
|
||||
// because it's a scalar. If it were a `Box<_>` type,
|
||||
// a drop would trigger.
|
||||
assert_no_alloc!({
|
||||
v.pop().unwrap();
|
||||
});
|
||||
}
|
||||
```
|
||||
|
||||
## Using function calls
|
||||
|
||||
Both the most precise and most tedious:
|
||||
|
||||
```rust
|
||||
use qadapt::enter_protected;
|
||||
use qadapt::exit_protected;
|
||||
use qadapt::QADAPT;
|
||||
|
||||
#[global_allocator]
|
||||
static Q: QADAPT = QADAPT;
|
||||
|
||||
fn main() {
|
||||
// This triggers an allocation (on non-release builds)
|
||||
let v = Vec::with_capacity(1);
|
||||
|
||||
enter_protected();
|
||||
// This does not trigger an allocation because we've reserved size
|
||||
v.push(0);
|
||||
exit_protected();
|
||||
|
||||
// This triggers an allocation because we ran out of size,
|
||||
// but doesn't panic because we're no longer protected.
|
||||
v.push(1);
|
||||
}
|
||||
```
|
||||
|
||||
## Caveats
|
||||
|
||||
It's important to point out that QADAPT code is synchronous, so please be careful when mixing in
|
||||
asynchronous functions:
|
||||
|
||||
```rust
|
||||
use futures::future::Future;
|
||||
use futures::future::ok;
|
||||
|
||||
#[no_alloc]
|
||||
fn async_capacity() -> impl Future<Item=Vec<u8>, Error=()> {
|
||||
ok(12).and_then(|e| Ok(Vec::with_capacity(e)))
|
||||
}
|
||||
|
||||
fn main() {
|
||||
// This doesn't trigger a panic because the `and_then` closure
|
||||
// wasn't run during the function call.
|
||||
async_capacity();
|
||||
|
||||
// Still no panic
|
||||
assert_no_alloc!(async_capacity());
|
||||
|
||||
// This will panic because the allocation happens during `unwrap`
|
||||
// in the `assert_no_alloc!` macro
|
||||
assert_no_alloc!(async_capacity().poll().unwrap());
|
||||
}
|
||||
```
|
||||
|
||||
# Conclusion
|
||||
|
||||
While there's a lot more to writing high-performance code than managing your usage of the allocator,
|
||||
it's critical that you do use the allocator correctly. QADAPT will verify that your code is doing
|
||||
what you expect. It's usable even on stable Rust from version 1.31 onward, which isn't the case for
|
||||
most allocators. Version 1.0 was released today, and you can check it out over at
|
||||
[crates.io](https://crates.io/crates/qadapt) or on [github](https://github.com/bspeice/qadapt).
|
||||
|
||||
I'm hoping to write more about high-performance Rust in the future, and I expect that QADAPT will
|
||||
help guide that. If there are topics you're interested in, let me know in the comments below!
|
||||
|
||||
[qadapt]: https://crates.io/crates/qadapt
|
@ -1,113 +0,0 @@
|
||||
---
|
||||
layout: post
|
||||
title: "Allocations in Rust"
|
||||
description: "An introduction to the memory model."
|
||||
category:
|
||||
tags: [rust, understanding-allocations]
|
||||
---
|
||||
|
||||
There's an alchemy of distilling complex technical topics into articles and videos that change the
|
||||
way programmers see the tools they interact with on a regular basis. I knew what a linker was, but
|
||||
there's a staggering amount of complexity in between
|
||||
[the OS and `main()`](https://www.youtube.com/watch?v=dOfucXtyEsU). Rust programmers use the
|
||||
[`Box`](https://doc.rust-lang.org/stable/std/boxed/struct.Box.html) type all the time, but there's a
|
||||
rich history of the Rust language itself wrapped up in
|
||||
[how special it is](https://manishearth.github.io/blog/2017/01/10/rust-tidbits-box-is-special/).
|
||||
|
||||
In a similar vein, this series attempts to look at code and understand how memory is used; the
|
||||
complex choreography of operating system, compiler, and program that frees you to focus on
|
||||
functionality far-flung from frivolous book-keeping. The Rust compiler relieves a great deal of the
|
||||
cognitive burden associated with memory management, but we're going to step into its world for a
|
||||
while.
|
||||
|
||||
Let's learn a bit about memory in Rust.
|
||||
|
||||
# Table of Contents
|
||||
|
||||
This series is intended as both learning and reference material; we'll work through the different
|
||||
memory types Rust uses, and explain the implications of each. Ultimately, a summary will be provided
|
||||
as a cheat sheet for easy future reference. To that end, a table of contents is in order:
|
||||
|
||||
- Foreword
|
||||
- [Global Memory Usage: The Whole World](/2019/02/the-whole-world.html)
|
||||
- [Fixed Memory: Stacking Up](/2019/02/stacking-up.html)
|
||||
- [Dynamic Memory: A Heaping Helping](/2019/02/a-heaping-helping.html)
|
||||
- [Compiler Optimizations: What It's Done For You Lately](/2019/02/compiler-optimizations.html)
|
||||
- [Summary: What Are the Rules?](/2019/02/summary.html)
|
||||
|
||||
# Foreword
|
||||
|
||||
Rust's three defining features of
|
||||
[Performance, Reliability, and Productivity](https://www.rust-lang.org/) are all driven to a great
|
||||
degree by the how the Rust compiler understands memory usage. Unlike managed memory languages (Java,
|
||||
Python), Rust
|
||||
[doesn't really](https://words.steveklabnik.com/borrow-checking-escape-analysis-and-the-generational-hypothesis)
|
||||
garbage collect; instead, it uses an
|
||||
[ownership](https://doc.rust-lang.org/book/ch04-01-what-is-ownership.html) system to reason about
|
||||
how long objects will last in your program. In some cases, if the life of an object is fairly
|
||||
transient, Rust can make use of a very fast region called the "stack." When that's not possible,
|
||||
Rust uses
|
||||
[dynamic (heap) memory](https://en.wikipedia.org/wiki/Memory_management#Dynamic_memory_allocation)
|
||||
and the ownership system to ensure you can't accidentally corrupt memory. It's not as fast, but it
|
||||
is important to have available.
|
||||
|
||||
That said, there are specific situations in Rust where you'd never need to worry about the
|
||||
stack/heap distinction! If you:
|
||||
|
||||
1. Never use `unsafe`
|
||||
2. Never use `#![feature(alloc)]` or the [`alloc` crate](https://doc.rust-lang.org/alloc/index.html)
|
||||
|
||||
...then it's not possible for you to use dynamic memory!
|
||||
|
||||
For some uses of Rust, typically embedded devices, these constraints are OK. They have very limited
|
||||
memory, and the program binary size itself may significantly affect what's available! There's no
|
||||
operating system able to manage this
|
||||
["virtual memory"](https://en.wikipedia.org/wiki/Virtual_memory) thing, but that's not an issue
|
||||
because there's only one running application. The
|
||||
[embedonomicon](https://docs.rust-embedded.org/embedonomicon/preface.html) is ever in mind, and
|
||||
interacting with the "real world" through extra peripherals is accomplished by reading and writing
|
||||
to [specific memory addresses](https://bob.cs.sonoma.edu/IntroCompOrg-RPi/sec-gpio-mem.html).
|
||||
|
||||
Most Rust programs find these requirements overly burdensome though. C++ developers would struggle
|
||||
without access to [`std::vector`](https://en.cppreference.com/w/cpp/container/vector) (except those
|
||||
hardcore no-STL people), and Rust developers would struggle without
|
||||
[`std::vec`](https://doc.rust-lang.org/std/vec/struct.Vec.html). But with the constraints above,
|
||||
`std::vec` is actually a part of the
|
||||
[`alloc` crate](https://doc.rust-lang.org/alloc/vec/struct.Vec.html), and thus off-limits. `Box`,
|
||||
`Rc`, etc., are also unusable for the same reason.
|
||||
|
||||
Whether writing code for embedded devices or not, the important thing in both situations is how much
|
||||
you know _before your application starts_ about what its memory usage will look like. In embedded
|
||||
devices, there's a small, fixed amount of memory to use. In a browser, you have no idea how large
|
||||
[google.com](https://www.google.com)'s home page is until you start trying to download it. The
|
||||
compiler uses this knowledge (or lack thereof) to optimize how memory is used; put simply, your code
|
||||
runs faster when the compiler can guarantee exactly how much memory your program needs while it's
|
||||
running. This series is all about understanding how the compiler reasons about your program, with an
|
||||
emphasis on the implications for performance.
|
||||
|
||||
Now let's address some conditions and caveats before going much further:
|
||||
|
||||
- We'll focus on "safe" Rust only; `unsafe` lets you use platform-specific allocation API's
|
||||
([`malloc`](https://www.tutorialspoint.com/c_standard_library/c_function_malloc.htm)) that we'll
|
||||
ignore.
|
||||
- We'll assume a "debug" build of Rust code (what you get with `cargo run` and `cargo test`) and
|
||||
address (pun intended) release mode at the end (`cargo run --release` and `cargo test --release`).
|
||||
- All content will be run using Rust 1.32, as that's the highest currently supported in the
|
||||
[Compiler Exporer](https://godbolt.org/). As such, we'll avoid upcoming innovations like
|
||||
[compile-time evaluation of `static`](https://github.com/rust-lang/rfcs/blob/master/text/0911-const-fn.md)
|
||||
that are available in nightly.
|
||||
- Because of the nature of the content, being able to read assembly is helpful. We'll keep it
|
||||
simple, but I [found](https://stackoverflow.com/a/4584131/1454178) a
|
||||
[refresher](https://stackoverflow.com/a/26026278/1454178) on the `push` and `pop`
|
||||
[instructions](http://www.cs.virginia.edu/~evans/cs216/guides/x86.html) was helpful while writing
|
||||
this.
|
||||
- I've tried to be precise in saying only what I can prove using the tools (ASM, docs) that are
|
||||
available, but if there's something said in error it will be corrected expeditiously. Please let
|
||||
me know at [bradlee@speice.io](mailto:bradlee@speice.io)
|
||||
|
||||
Finally, I'll do what I can to flag potential future changes but the Rust docs have a notice worth
|
||||
repeating:
|
||||
|
||||
> Rust does not currently have a rigorously and formally defined memory model.
|
||||
>
|
||||
> -- [the docs](https://doc.rust-lang.org/std/ptr/fn.read_volatile.html)
|
@ -1,337 +0,0 @@
|
||||
---
|
||||
layout: post
|
||||
title: "Global Memory Usage: The Whole World"
|
||||
description: "Static considered slightly less harmful."
|
||||
category:
|
||||
tags: [rust, understanding-allocations]
|
||||
---
|
||||
|
||||
The first memory type we'll look at is pretty special: when Rust can prove that a _value_ is fixed
|
||||
for the life of a program (`const`), and when a _reference_ is unique for the life of a program
|
||||
(`static` as a declaration, not
|
||||
[`'static`](https://doc.rust-lang.org/book/ch10-03-lifetime-syntax.html#the-static-lifetime) as a
|
||||
lifetime), we can make use of global memory. This special section of data is embedded directly in
|
||||
the program binary so that variables are ready to go once the program loads; no additional
|
||||
computation is necessary.
|
||||
|
||||
Understanding the value/reference distinction is important for reasons we'll go into below, and
|
||||
while the
|
||||
[full specification](https://github.com/rust-lang/rfcs/blob/master/text/0246-const-vs-static.md) for
|
||||
these two keywords is available, we'll take a hands-on approach to the topic.
|
||||
|
||||
# **const**
|
||||
|
||||
When a _value_ is guaranteed to be unchanging in your program (where "value" may be scalars,
|
||||
`struct`s, etc.), you can declare it `const`. This tells the compiler that it's safe to treat the
|
||||
value as never changing, and enables some interesting optimizations; not only is there no
|
||||
initialization cost to creating the value (it is loaded at the same time as the executable parts of
|
||||
your program), but the compiler can also copy the value around if it speeds up the code.
|
||||
|
||||
The points we need to address when talking about `const` are:
|
||||
|
||||
- `Const` values are stored in read-only memory - it's impossible to modify.
|
||||
- Values resulting from calling a `const fn` are materialized at compile-time.
|
||||
- The compiler may (or may not) copy `const` values wherever it chooses.
|
||||
|
||||
## Read-Only
|
||||
|
||||
The first point is a bit strange - "read-only memory."
|
||||
[The Rust book](https://doc.rust-lang.org/book/ch03-01-variables-and-mutability.html#differences-between-variables-and-constants)
|
||||
mentions in a couple places that using `mut` with constants is illegal, but it's also important to
|
||||
demonstrate just how immutable they are. _Typically_ in Rust you can use
|
||||
[interior mutability](https://doc.rust-lang.org/book/ch15-05-interior-mutability.html) to modify
|
||||
things that aren't declared `mut`.
|
||||
[`RefCell`](https://doc.rust-lang.org/std/cell/struct.RefCell.html) provides an example of this
|
||||
pattern in action:
|
||||
|
||||
```rust
|
||||
use std::cell::RefCell;
|
||||
|
||||
fn my_mutator(cell: &RefCell<u8>) {
|
||||
// Even though we're given an immutable reference,
|
||||
// the `replace` method allows us to modify the inner value.
|
||||
cell.replace(14);
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let cell = RefCell::new(25);
|
||||
// Prints out 25
|
||||
println!("Cell: {:?}", cell);
|
||||
my_mutator(&cell);
|
||||
// Prints out 14
|
||||
println!("Cell: {:?}", cell);
|
||||
}
|
||||
```
|
||||
|
||||
--
|
||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=8e4bea1a718edaff4507944e825a54b2)
|
||||
|
||||
When `const` is involved though, interior mutability is impossible:
|
||||
|
||||
```rust
|
||||
use std::cell::RefCell;
|
||||
|
||||
const CELL: RefCell<u8> = RefCell::new(25);
|
||||
|
||||
fn my_mutator(cell: &RefCell<u8>) {
|
||||
cell.replace(14);
|
||||
}
|
||||
|
||||
fn main() {
|
||||
// First line prints 25 as expected
|
||||
println!("Cell: {:?}", &CELL);
|
||||
my_mutator(&CELL);
|
||||
// Second line *still* prints 25
|
||||
println!("Cell: {:?}", &CELL);
|
||||
}
|
||||
```
|
||||
|
||||
--
|
||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=88fe98110c33c1b3a51e341f48b8ae00)
|
||||
|
||||
And a second example using [`Once`](https://doc.rust-lang.org/std/sync/struct.Once.html):
|
||||
|
||||
```rust
|
||||
use std::sync::Once;
|
||||
|
||||
const SURPRISE: Once = Once::new();
|
||||
|
||||
fn main() {
|
||||
// This is how `Once` is supposed to be used
|
||||
SURPRISE.call_once(|| println!("Initializing..."));
|
||||
// Because `Once` is a `const` value, we never record it
|
||||
// having been initialized the first time, and this closure
|
||||
// will also execute.
|
||||
SURPRISE.call_once(|| println!("Initializing again???"));
|
||||
}
|
||||
```
|
||||
|
||||
--
|
||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=c3cc5979b5e5434eca0f9ec4a06ee0ed)
|
||||
|
||||
When the
|
||||
[`const` specification](https://github.com/rust-lang/rfcs/blob/26197104b7bb9a5a35db243d639aee6e46d35d75/text/0246-const-vs-static.md)
|
||||
refers to ["rvalues"](http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2010/n3055.pdf), this
|
||||
behavior is what they refer to. [Clippy](https://github.com/rust-lang/rust-clippy) will treat this
|
||||
as an error, but it's still something to be aware of.
|
||||
|
||||
## Initialization == Compilation
|
||||
|
||||
The next thing to mention is that `const` values are loaded into memory _as part of your program
|
||||
binary_. Because of this, any `const` values declared in your program will be "realized" at
|
||||
compile-time; accessing them may trigger a main-memory lookup (with a fixed address, so your CPU may
|
||||
be able to prefetch the value), but that's it.
|
||||
|
||||
```rust
|
||||
use std::cell::RefCell;
|
||||
|
||||
const CELL: RefCell<u32> = RefCell::new(24);
|
||||
|
||||
pub fn multiply(value: u32) -> u32 {
|
||||
// CELL is stored at `.L__unnamed_1`
|
||||
value * (*CELL.get_mut())
|
||||
}
|
||||
```
|
||||
|
||||
-- [Compiler Explorer](https://godbolt.org/z/Th8boO)
|
||||
|
||||
The compiler creates one `RefCell`, uses it everywhere, and never needs to call the `RefCell::new`
|
||||
function.
|
||||
|
||||
## Copying
|
||||
|
||||
If it's helpful though, the compiler can choose to copy `const` values.
|
||||
|
||||
```rust
|
||||
const FACTOR: u32 = 1000;
|
||||
|
||||
pub fn multiply(value: u32) -> u32 {
|
||||
// See assembly line 4 for the `mov edi, 1000` instruction
|
||||
value * FACTOR
|
||||
}
|
||||
|
||||
pub fn multiply_twice(value: u32) -> u32 {
|
||||
// See assembly lines 22 and 29 for `mov edi, 1000` instructions
|
||||
value * FACTOR * FACTOR
|
||||
}
|
||||
```
|
||||
|
||||
-- [Compiler Explorer](https://godbolt.org/z/ZtS54X)
|
||||
|
||||
In this example, the `FACTOR` value is turned into the `mov edi, 1000` instruction in both the
|
||||
`multiply` and `multiply_twice` functions; the "1000" value is never "stored" anywhere, as it's
|
||||
small enough to inline into the assembly instructions.
|
||||
|
||||
Finally, getting the address of a `const` value is possible, but not guaranteed to be unique
|
||||
(because the compiler can choose to copy values). I was unable to get non-unique pointers in my
|
||||
testing (even using different crates), but the specifications are clear enough: _don't rely on
|
||||
pointers to `const` values being consistent_. To be frank, caring about locations for `const` values
|
||||
is almost certainly a code smell.
|
||||
|
||||
# **static**
|
||||
|
||||
Static variables are related to `const` variables, but take a slightly different approach. When we
|
||||
declare that a _reference_ is unique for the life of a program, you have a `static` variable
|
||||
(unrelated to the `'static` lifetime). Because of the reference/value distinction with
|
||||
`const`/`static`, static variables behave much more like typical "global" variables.
|
||||
|
||||
But to understand `static`, here's what we'll look at:
|
||||
|
||||
- `static` variables are globally unique locations in memory.
|
||||
- Like `const`, `static` variables are loaded at the same time as your program being read into
|
||||
memory.
|
||||
- All `static` variables must implement the
|
||||
[`Sync`](https://doc.rust-lang.org/std/marker/trait.Sync.html) marker trait.
|
||||
- Interior mutability is safe and acceptable when using `static` variables.
|
||||
|
||||
## Memory Uniqueness
|
||||
|
||||
The single biggest difference between `const` and `static` is the guarantees provided about
|
||||
uniqueness. Where `const` variables may or may not be copied in code, `static` variables are
|
||||
guarantee to be unique. If we take a previous `const` example and change it to `static`, the
|
||||
difference should be clear:
|
||||
|
||||
```rust
|
||||
static FACTOR: u32 = 1000;
|
||||
|
||||
pub fn multiply(value: u32) -> u32 {
|
||||
// The assembly to `mul dword ptr [rip + example::FACTOR]` is how FACTOR gets used
|
||||
value * FACTOR
|
||||
}
|
||||
|
||||
pub fn multiply_twice(value: u32) -> u32 {
|
||||
// The assembly to `mul dword ptr [rip + example::FACTOR]` is how FACTOR gets used
|
||||
value * FACTOR * FACTOR
|
||||
}
|
||||
```
|
||||
|
||||
-- [Compiler Explorer](https://godbolt.org/z/uxmiRQ)
|
||||
|
||||
Where [previously](#copying) there were plenty of references to multiplying by 1000, the new
|
||||
assembly refers to `FACTOR` as a named memory location instead. No initialization work needs to be
|
||||
done, but the compiler can no longer prove the value never changes during execution.
|
||||
|
||||
## Initialization == Compilation
|
||||
|
||||
Next, let's talk about initialization. The simplest case is initializing static variables with
|
||||
either scalar or struct notation:
|
||||
|
||||
```rust
|
||||
#[derive(Debug)]
|
||||
struct MyStruct {
|
||||
x: u32
|
||||
}
|
||||
|
||||
static MY_STRUCT: MyStruct = MyStruct {
|
||||
// You can even reference other statics
|
||||
// declared later
|
||||
x: MY_VAL
|
||||
};
|
||||
|
||||
static MY_VAL: u32 = 24;
|
||||
|
||||
fn main() {
|
||||
println!("Static MyStruct: {:?}", MY_STRUCT);
|
||||
}
|
||||
```
|
||||
|
||||
--
|
||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=b538dbc46076f12db047af4f4403ee6e)
|
||||
|
||||
Things can get a bit weirder when using `const fn` though. In most cases, it just works:
|
||||
|
||||
```rust
|
||||
#[derive(Debug)]
|
||||
struct MyStruct {
|
||||
x: u32
|
||||
}
|
||||
|
||||
impl MyStruct {
|
||||
const fn new() -> MyStruct {
|
||||
MyStruct { x: 24 }
|
||||
}
|
||||
}
|
||||
|
||||
static MY_STRUCT: MyStruct = MyStruct::new();
|
||||
|
||||
fn main() {
|
||||
println!("const fn Static MyStruct: {:?}", MY_STRUCT);
|
||||
}
|
||||
```
|
||||
|
||||
--
|
||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=8c796a6e7fc273c12115091b707b0255)
|
||||
|
||||
However, there's a caveat: you're currently not allowed to use `const fn` to initialize static
|
||||
variables of types that aren't marked `Sync`. For example,
|
||||
[`RefCell::new()`](https://doc.rust-lang.org/std/cell/struct.RefCell.html#method.new) is a
|
||||
`const fn`, but because
|
||||
[`RefCell` isn't `Sync`](https://doc.rust-lang.org/std/cell/struct.RefCell.html#impl-Sync), you'll
|
||||
get an error at compile time:
|
||||
|
||||
```rust
|
||||
use std::cell::RefCell;
|
||||
|
||||
// error[E0277]: `std::cell::RefCell<u8>` cannot be shared between threads safely
|
||||
static MY_LOCK: RefCell<u8> = RefCell::new(0);
|
||||
```
|
||||
|
||||
--
|
||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=c76ef86e473d07117a1700e21fd45560)
|
||||
|
||||
It's likely that this will
|
||||
[change in the future](https://github.com/rust-lang/rfcs/blob/master/text/0911-const-fn.md) though.
|
||||
|
||||
## **Sync**
|
||||
|
||||
Which leads well to the next point: static variable types must implement the
|
||||
[`Sync` marker](https://doc.rust-lang.org/std/marker/trait.Sync.html). Because they're globally
|
||||
unique, it must be safe for you to access static variables from any thread at any time. Most
|
||||
`struct` definitions automatically implement the `Sync` trait because they contain only elements
|
||||
which themselves implement `Sync` (read more in the
|
||||
[Nomicon](https://doc.rust-lang.org/nomicon/send-and-sync.html)). This is why earlier examples could
|
||||
get away with initializing statics, even though we never included an `impl Sync for MyStruct` in the
|
||||
code. To demonstrate this property, Rust refuses to compile our earlier example if we add a
|
||||
non-`Sync` element to the `struct` definition:
|
||||
|
||||
```rust
|
||||
use std::cell::RefCell;
|
||||
|
||||
struct MyStruct {
|
||||
x: u32,
|
||||
y: RefCell<u8>,
|
||||
}
|
||||
|
||||
// error[E0277]: `std::cell::RefCell<u8>` cannot be shared between threads safely
|
||||
static MY_STRUCT: MyStruct = MyStruct {
|
||||
x: 8,
|
||||
y: RefCell::new(8)
|
||||
};
|
||||
```
|
||||
|
||||
--
|
||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=40074d0248f056c296b662dbbff97cfc)
|
||||
|
||||
## Interior Mutability
|
||||
|
||||
Finally, while `static mut` variables are allowed, mutating them is an `unsafe` operation. If we
|
||||
want to stay in `safe` Rust, we can use interior mutability to accomplish similar goals:
|
||||
|
||||
```rust
|
||||
use std::sync::Once;
|
||||
|
||||
// This example adapted from https://doc.rust-lang.org/std/sync/struct.Once.html#method.call_once
|
||||
static INIT: Once = Once::new();
|
||||
|
||||
fn main() {
|
||||
// Note that while `INIT` is declared immutable, we're still allowed
|
||||
// to mutate its interior
|
||||
INIT.call_once(|| println!("Initializing..."));
|
||||
// This code won't panic, as the interior of INIT was modified
|
||||
// as part of the previous `call_once`
|
||||
INIT.call_once(|| panic!("INIT was called twice!"));
|
||||
}
|
||||
```
|
||||
|
||||
--
|
||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=3ba003a981a7ed7400240caadd384d59)
|
@ -1,601 +0,0 @@
|
||||
---
|
||||
layout: post
|
||||
title: "Fixed Memory: Stacking Up"
|
||||
description: "We don't need no allocator."
|
||||
category:
|
||||
tags: [rust, understanding-allocations]
|
||||
---
|
||||
|
||||
`const` and `static` are perfectly fine, but it's relatively rare that we know at compile-time about
|
||||
either values or references that will be the same for the duration of our program. Put another way,
|
||||
it's not often the case that either you or your compiler knows how much memory your entire program
|
||||
will ever need.
|
||||
|
||||
However, there are still some optimizations the compiler can do if it knows how much memory
|
||||
individual functions will need. Specifically, the compiler can make use of "stack" memory (as
|
||||
opposed to "heap" memory) which can be managed far faster in both the short- and long-term. When
|
||||
requesting memory, the [`push` instruction](http://www.cs.virginia.edu/~evans/cs216/guides/x86.html)
|
||||
can typically complete in [1 or 2 cycles](https://agner.org/optimize/instruction_tables.ods) (<1
|
||||
nanosecond on modern CPUs). Contrast that to heap memory which requires an allocator (specialized
|
||||
software to track what memory is in use) to reserve space. When you're finished with stack memory,
|
||||
the `pop` instruction runs in 1-3 cycles, as opposed to an allocator needing to worry about memory
|
||||
fragmentation and other issues with the heap. All sorts of incredibly sophisticated techniques have
|
||||
been used to design allocators:
|
||||
|
||||
- [Garbage Collection](<https://en.wikipedia.org/wiki/Garbage_collection_(computer_science)>)
|
||||
strategies like [Tracing](https://en.wikipedia.org/wiki/Tracing_garbage_collection) (used in
|
||||
[Java](https://www.oracle.com/technetwork/java/javase/tech/g1-intro-jsp-135488.html)) and
|
||||
[Reference counting](https://en.wikipedia.org/wiki/Reference_counting) (used in
|
||||
[Python](https://docs.python.org/3/extending/extending.html#reference-counts))
|
||||
- Thread-local structures to prevent locking the allocator in
|
||||
[tcmalloc](https://jamesgolick.com/2013/5/19/how-tcmalloc-works.html)
|
||||
- Arena structures used in [jemalloc](http://jemalloc.net/), which
|
||||
[until recently](https://blog.rust-lang.org/2019/01/17/Rust-1.32.0.html#jemalloc-is-removed-by-default)
|
||||
was the primary allocator for Rust programs!
|
||||
|
||||
But no matter how fast your allocator is, the principle remains: the fastest allocator is the one
|
||||
you never use. As such, we're not going to discuss how exactly the
|
||||
[`push` and `pop` instructions work](http://www.cs.virginia.edu/~evans/cs216/guides/x86.html), but
|
||||
we'll focus instead on the conditions that enable the Rust compiler to use faster stack-based
|
||||
allocation for variables.
|
||||
|
||||
So, **how do we know when Rust will or will not use stack allocation for objects we create?**
|
||||
Looking at other languages, it's often easy to delineate between stack and heap. Managed memory
|
||||
languages (Python, Java,
|
||||
[C#](https://blogs.msdn.microsoft.com/ericlippert/2010/09/30/the-truth-about-value-types/)) place
|
||||
everything on the heap. JIT compilers ([PyPy](https://www.pypy.org/),
|
||||
[HotSpot](https://www.oracle.com/technetwork/java/javase/tech/index-jsp-136373.html)) may optimize
|
||||
some heap allocations away, but you should never assume it will happen. C makes things clear with
|
||||
calls to special functions (like [malloc(3)](https://linux.die.net/man/3/malloc)) needed to access
|
||||
heap memory. Old C++ has the [`new`](https://stackoverflow.com/a/655086/1454178) keyword, though
|
||||
modern C++/C++11 is more complicated with [RAII](https://en.cppreference.com/w/cpp/language/raii).
|
||||
|
||||
For Rust, we can summarize as follows: **stack allocation will be used for everything that doesn't
|
||||
involve "smart pointers" and collections**. We'll skip over a precise definition of the term "smart
|
||||
pointer" for now, and instead discuss what we should watch for to understand when stack and heap
|
||||
memory regions are used:
|
||||
|
||||
1. Stack manipulation instructions (`push`, `pop`, and `add`/`sub` of the `rsp` register) indicate
|
||||
allocation of stack memory:
|
||||
|
||||
```rust
|
||||
pub fn stack_alloc(x: u32) -> u32 {
|
||||
// Space for `y` is allocated by subtracting from `rsp`,
|
||||
// and then populated
|
||||
let y = [1u8, 2, 3, 4];
|
||||
// Space for `y` is deallocated by adding back to `rsp`
|
||||
x
|
||||
}
|
||||
```
|
||||
|
||||
-- [Compiler Explorer](https://godbolt.org/z/5WSgc9)
|
||||
|
||||
2. Tracking when exactly heap allocation calls occur is difficult. It's typically easier to watch
|
||||
for `call core::ptr::real_drop_in_place`, and infer that a heap allocation happened in the recent
|
||||
past:
|
||||
|
||||
```rust
|
||||
pub fn heap_alloc(x: usize) -> usize {
|
||||
// Space for elements in a vector has to be allocated
|
||||
// on the heap, and is then de-allocated once the
|
||||
// vector goes out of scope
|
||||
let y: Vec<u8> = Vec::with_capacity(x);
|
||||
x
|
||||
}
|
||||
```
|
||||
|
||||
-- [Compiler Explorer](https://godbolt.org/z/epfgoQ) (`real_drop_in_place` happens on line 1317)
|
||||
<span style="font-size: .8em">Note: While the
|
||||
[`Drop` trait](https://doc.rust-lang.org/std/ops/trait.Drop.html) is
|
||||
[called for stack-allocated objects](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=87edf374d8983816eb3d8cfeac657b46),
|
||||
the Rust standard library only defines `Drop` implementations for types that involve heap
|
||||
allocation.</span>
|
||||
|
||||
3. If you don't want to inspect the assembly, use a custom allocator that's able to track and alert
|
||||
when heap allocations occur. Crates like
|
||||
[`alloc_counter`](https://crates.io/crates/alloc_counter) are designed for exactly this purpose.
|
||||
|
||||
With all that in mind, let's talk about situations in which we're guaranteed to use stack memory:
|
||||
|
||||
- Structs are created on the stack.
|
||||
- Function arguments are passed on the stack, meaning the
|
||||
[`#[inline]` attribute](https://doc.rust-lang.org/reference/attributes.html#inline-attribute) will
|
||||
not change the memory region used.
|
||||
- Enums and unions are stack-allocated.
|
||||
- [Arrays](https://doc.rust-lang.org/std/primitive.array.html) are always stack-allocated.
|
||||
- Closures capture their arguments on the stack.
|
||||
- Generics will use stack allocation, even with dynamic dispatch.
|
||||
- [`Copy`](https://doc.rust-lang.org/std/marker/trait.Copy.html) types are guaranteed to be
|
||||
stack-allocated, and copying them will be done in stack memory.
|
||||
- [`Iterator`s](https://doc.rust-lang.org/std/iter/trait.Iterator.html) in the standard library are
|
||||
stack-allocated even when iterating over heap-based collections.
|
||||
|
||||
# Structs
|
||||
|
||||
The simplest case comes first. When creating vanilla `struct` objects, we use stack memory to hold
|
||||
their contents:
|
||||
|
||||
```rust
|
||||
struct Point {
|
||||
x: u64,
|
||||
y: u64,
|
||||
}
|
||||
|
||||
struct Line {
|
||||
a: Point,
|
||||
b: Point,
|
||||
}
|
||||
|
||||
pub fn make_line() {
|
||||
// `origin` is stored in the first 16 bytes of memory
|
||||
// starting at location `rsp`
|
||||
let origin = Point { x: 0, y: 0 };
|
||||
// `point` makes up the next 16 bytes of memory
|
||||
let point = Point { x: 1, y: 2 };
|
||||
|
||||
// When creating `ray`, we just move the content out of
|
||||
// `origin` and `point` into the next 32 bytes of memory
|
||||
let ray = Line { a: origin, b: point };
|
||||
}
|
||||
```
|
||||
|
||||
-- [Compiler Explorer](https://godbolt.org/z/vri9BE)
|
||||
|
||||
Note that while some extra-fancy instructions are used for memory manipulation in the assembly, the
|
||||
`sub rsp, 64` instruction indicates we're still working with the stack.
|
||||
|
||||
# Function arguments
|
||||
|
||||
Have you ever wondered how functions communicate with each other? Like, once the variables are given
|
||||
to you, everything's fine. But how do you "give" those variables to another function? How do you get
|
||||
the results back afterward? The answer: the compiler arranges memory and assembly instructions using
|
||||
a pre-determined [calling convention](http://llvm.org/docs/LangRef.html#calling-conventions). This
|
||||
convention governs the rules around where arguments needed by a function will be located (either in
|
||||
memory offsets relative to the stack pointer `rsp`, or in other registers), and where the results
|
||||
can be found once the function has finished. And when multiple languages agree on what the calling
|
||||
conventions are, you can do things like having [Go call Rust code](https://blog.filippo.io/rustgo/)!
|
||||
|
||||
Put simply: it's the compiler's job to figure out how to call other functions, and you can assume
|
||||
that the compiler is good at its job.
|
||||
|
||||
We can see this in action using a simple example:
|
||||
|
||||
```rust
|
||||
struct Point {
|
||||
x: i64,
|
||||
y: i64,
|
||||
}
|
||||
|
||||
// We use integer division operations to keep
|
||||
// the assembly clean, understanding the result
|
||||
// isn't accurate.
|
||||
fn distance(a: &Point, b: &Point) -> i64 {
|
||||
// Immediately subtract from `rsp` the bytes needed
|
||||
// to hold all the intermediate results - this is
|
||||
// the stack allocation step
|
||||
|
||||
// The compiler used the `rdi` and `rsi` registers
|
||||
// to pass our arguments, so read them in
|
||||
let x1 = a.x;
|
||||
let x2 = b.x;
|
||||
let y1 = a.y;
|
||||
let y2 = b.y;
|
||||
|
||||
// Do the actual math work
|
||||
let x_pow = (x1 - x2) * (x1 - x2);
|
||||
let y_pow = (y1 - y2) * (y1 - y2);
|
||||
let squared = x_pow + y_pow;
|
||||
squared / squared
|
||||
|
||||
// Our final result will be stored in the `rax` register
|
||||
// so that our caller knows where to retrieve it.
|
||||
// Finally, add back to `rsp` the stack memory that is
|
||||
// now ready to be used by other functions.
|
||||
}
|
||||
|
||||
pub fn total_distance() {
|
||||
let start = Point { x: 1, y: 2 };
|
||||
let middle = Point { x: 3, y: 4 };
|
||||
let end = Point { x: 5, y: 6 };
|
||||
|
||||
let _dist_1 = distance(&start, &middle);
|
||||
let _dist_2 = distance(&middle, &end);
|
||||
}
|
||||
```
|
||||
|
||||
-- [Compiler Explorer](https://godbolt.org/z/Qmx4ST)
|
||||
|
||||
As a consequence of function arguments never using heap memory, we can also infer that functions
|
||||
using the `#[inline]` attributes also do not heap allocate. But better than inferring, we can look
|
||||
at the assembly to prove it:
|
||||
|
||||
```rust
|
||||
struct Point {
|
||||
x: i64,
|
||||
y: i64,
|
||||
}
|
||||
|
||||
// Note that there is no `distance` function in the assembly output,
|
||||
// and the total line count goes from 229 with inlining off
|
||||
// to 306 with inline on. Even still, no heap allocations occur.
|
||||
#[inline(always)]
|
||||
fn distance(a: &Point, b: &Point) -> i64 {
|
||||
let x1 = a.x;
|
||||
let x2 = b.x;
|
||||
let y1 = a.y;
|
||||
let y2 = b.y;
|
||||
|
||||
let x_pow = (a.x - b.x) * (a.x - b.x);
|
||||
let y_pow = (a.y - b.y) * (a.y - b.y);
|
||||
let squared = x_pow + y_pow;
|
||||
squared / squared
|
||||
}
|
||||
|
||||
pub fn total_distance() {
|
||||
let start = Point { x: 1, y: 2 };
|
||||
let middle = Point { x: 3, y: 4 };
|
||||
let end = Point { x: 5, y: 6 };
|
||||
|
||||
let _dist_1 = distance(&start, &middle);
|
||||
let _dist_2 = distance(&middle, &end);
|
||||
}
|
||||
```
|
||||
|
||||
-- [Compiler Explorer](https://godbolt.org/z/30Sh66)
|
||||
|
||||
Finally, passing by value (arguments with type
|
||||
[`Copy`](https://doc.rust-lang.org/std/marker/trait.Copy.html)) and passing by reference (either
|
||||
moving ownership or passing a pointer) may have slightly different layouts in assembly, but will
|
||||
still use either stack memory or CPU registers:
|
||||
|
||||
```rust
|
||||
pub struct Point {
|
||||
x: i64,
|
||||
y: i64,
|
||||
}
|
||||
|
||||
// Moving values
|
||||
pub fn distance_moved(a: Point, b: Point) -> i64 {
|
||||
let x1 = a.x;
|
||||
let x2 = b.x;
|
||||
let y1 = a.y;
|
||||
let y2 = b.y;
|
||||
|
||||
let x_pow = (x1 - x2) * (x1 - x2);
|
||||
let y_pow = (y1 - y2) * (y1 - y2);
|
||||
let squared = x_pow + y_pow;
|
||||
squared / squared
|
||||
}
|
||||
|
||||
// Borrowing values has two extra `mov` instructions on lines 21 and 22
|
||||
pub fn distance_borrowed(a: &Point, b: &Point) -> i64 {
|
||||
let x1 = a.x;
|
||||
let x2 = b.x;
|
||||
let y1 = a.y;
|
||||
let y2 = b.y;
|
||||
|
||||
let x_pow = (x1 - x2) * (x1 - x2);
|
||||
let y_pow = (y1 - y2) * (y1 - y2);
|
||||
let squared = x_pow + y_pow;
|
||||
squared / squared
|
||||
}
|
||||
```
|
||||
|
||||
-- [Compiler Explorer](https://godbolt.org/z/06hGiv)
|
||||
|
||||
# Enums
|
||||
|
||||
If you've ever worried that wrapping your types in
|
||||
[`Option`](https://doc.rust-lang.org/stable/core/option/enum.Option.html) or
|
||||
[`Result`](https://doc.rust-lang.org/stable/core/result/enum.Result.html) would finally make them
|
||||
large enough that Rust decides to use heap allocation instead, fear no longer: `enum` and union
|
||||
types don't use heap allocation:
|
||||
|
||||
```rust
|
||||
enum MyEnum {
|
||||
Small(u8),
|
||||
Large(u64)
|
||||
}
|
||||
|
||||
struct MyStruct {
|
||||
x: MyEnum,
|
||||
y: MyEnum,
|
||||
}
|
||||
|
||||
pub fn enum_compare() {
|
||||
let x = MyEnum::Small(0);
|
||||
let y = MyEnum::Large(0);
|
||||
|
||||
let z = MyStruct { x, y };
|
||||
|
||||
let opt = Option::Some(z);
|
||||
}
|
||||
```
|
||||
|
||||
-- [Compiler Explorer](https://godbolt.org/z/HK7zBx)
|
||||
|
||||
Because the size of an `enum` is the size of its largest element plus a flag, the compiler can
|
||||
predict how much memory is used no matter which variant of an enum is currently stored in a
|
||||
variable. Thus, enums and unions have no need of heap allocation. There's unfortunately not a great
|
||||
way to show this in assembly, so I'll instead point you to the
|
||||
[`core::mem::size_of`](https://doc.rust-lang.org/stable/core/mem/fn.size_of.html#size-of-enums)
|
||||
documentation.
|
||||
|
||||
# Arrays
|
||||
|
||||
The array type is guaranteed to be stack allocated, which is why the array size must be declared.
|
||||
Interestingly enough, this can be used to cause safe Rust programs to crash:
|
||||
|
||||
```rust
|
||||
// 256 bytes
|
||||
#[derive(Default)]
|
||||
struct TwoFiftySix {
|
||||
_a: [u64; 32]
|
||||
}
|
||||
|
||||
// 8 kilobytes
|
||||
#[derive(Default)]
|
||||
struct EightK {
|
||||
_a: [TwoFiftySix; 32]
|
||||
}
|
||||
|
||||
// 256 kilobytes
|
||||
#[derive(Default)]
|
||||
struct TwoFiftySixK {
|
||||
_a: [EightK; 32]
|
||||
}
|
||||
|
||||
// 8 megabytes - exceeds space typically provided for the stack,
|
||||
// though the kernel can be instructed to allocate more.
|
||||
// On Linux, you can check stack size using `ulimit -s`
|
||||
#[derive(Default)]
|
||||
struct EightM {
|
||||
_a: [TwoFiftySixK; 32]
|
||||
}
|
||||
|
||||
fn main() {
|
||||
// Because we already have things in stack memory
|
||||
// (like the current function call stack), allocating another
|
||||
// eight megabytes of stack memory crashes the program
|
||||
let _x = EightM::default();
|
||||
}
|
||||
```
|
||||
|
||||
--
|
||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=587a6380a4914bcbcef4192c90c01dc4)
|
||||
|
||||
There aren't any security implications of this (no memory corruption occurs), but it's good to note
|
||||
that the Rust compiler won't move arrays into heap memory even if they can be reasonably expected to
|
||||
overflow the stack.
|
||||
|
||||
# Closures
|
||||
|
||||
Rules for how anonymous functions capture their arguments are typically language-specific. In Java,
|
||||
[Lambda Expressions](https://docs.oracle.com/javase/tutorial/java/javaOO/lambdaexpressions.html) are
|
||||
actually objects created on the heap that capture local primitives by copying, and capture local
|
||||
non-primitives as (`final`) references.
|
||||
[Python](https://docs.python.org/3.7/reference/expressions.html#lambda) and
|
||||
[JavaScript](https://javascriptweblog.wordpress.com/2010/10/25/understanding-javascript-closures/)
|
||||
both bind _everything_ by reference normally, but Python can also
|
||||
[capture values](https://stackoverflow.com/a/235764/1454178) and JavaScript has
|
||||
[Arrow functions](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Functions/Arrow_functions).
|
||||
|
||||
In Rust, arguments to closures are the same as arguments to other functions; closures are simply
|
||||
functions that don't have a declared name. Some weird ordering of the stack may be required to
|
||||
handle them, but it's the compiler's responsiblity to figure that out.
|
||||
|
||||
Each example below has the same effect, but a different assembly implementation. In the simplest
|
||||
case, we immediately run a closure returned by another function. Because we don't store a reference
|
||||
to the closure, the stack memory needed to store the captured values is contiguous:
|
||||
|
||||
```rust
|
||||
fn my_func() -> impl FnOnce() {
|
||||
let x = 24;
|
||||
// Note that this closure in assembly looks exactly like
|
||||
// any other function; you even use the `call` instruction
|
||||
// to start running it.
|
||||
move || { x; }
|
||||
}
|
||||
|
||||
pub fn immediate() {
|
||||
my_func()();
|
||||
my_func()();
|
||||
}
|
||||
```
|
||||
|
||||
-- [Compiler Explorer](https://godbolt.org/z/mgJ2zl), 25 total assembly instructions
|
||||
|
||||
If we store a reference to the closure, the Rust compiler keeps values it needs in the stack memory
|
||||
of the original function. Getting the details right is a bit harder, so the instruction count goes
|
||||
up even though this code is functionally equivalent to our original example:
|
||||
|
||||
```rust
|
||||
pub fn simple_reference() {
|
||||
let x = my_func();
|
||||
let y = my_func();
|
||||
y();
|
||||
x();
|
||||
}
|
||||
```
|
||||
|
||||
-- [Compiler Explorer](https://godbolt.org/z/K_dj5n), 55 total assembly instructions
|
||||
|
||||
Even things like variable order can make a difference in instruction count:
|
||||
|
||||
```rust
|
||||
pub fn complex() {
|
||||
let x = my_func();
|
||||
let y = my_func();
|
||||
x();
|
||||
y();
|
||||
}
|
||||
```
|
||||
|
||||
-- [Compiler Explorer](https://godbolt.org/z/p37qFl), 70 total assembly instructions
|
||||
|
||||
In every circumstance though, the compiler ensured that no heap allocations were necessary.
|
||||
|
||||
# Generics
|
||||
|
||||
Traits in Rust come in two broad forms: static dispatch (monomorphization, `impl Trait`) and dynamic
|
||||
dispatch (trait objects, `dyn Trait`). While dynamic dispatch is often _associated_ with trait
|
||||
objects being stored in the heap, dynamic dispatch can be used with stack allocated objects as well:
|
||||
|
||||
```rust
|
||||
trait GetInt {
|
||||
fn get_int(&self) -> u64;
|
||||
}
|
||||
|
||||
// vtable stored at section L__unnamed_1
|
||||
struct WhyNotU8 {
|
||||
x: u8
|
||||
}
|
||||
impl GetInt for WhyNotU8 {
|
||||
fn get_int(&self) -> u64 {
|
||||
self.x as u64
|
||||
}
|
||||
}
|
||||
|
||||
// vtable stored at section L__unnamed_2
|
||||
struct ActualU64 {
|
||||
x: u64
|
||||
}
|
||||
impl GetInt for ActualU64 {
|
||||
fn get_int(&self) -> u64 {
|
||||
self.x
|
||||
}
|
||||
}
|
||||
|
||||
// `&dyn` declares that we want to use dynamic dispatch
|
||||
// rather than monomorphization, so there is only one
|
||||
// `retrieve_int` function that shows up in the final assembly.
|
||||
// If we used generics, there would be one implementation of
|
||||
// `retrieve_int` for each type that implements `GetInt`.
|
||||
pub fn retrieve_int(u: &dyn GetInt) {
|
||||
// In the assembly, we just call an address given to us
|
||||
// in the `rsi` register and hope that it was set up
|
||||
// correctly when this function was invoked.
|
||||
let x = u.get_int();
|
||||
}
|
||||
|
||||
pub fn do_call() {
|
||||
// Note that even though the vtable for `WhyNotU8` and
|
||||
// `ActualU64` includes a pointer to
|
||||
// `core::ptr::real_drop_in_place`, it is never invoked.
|
||||
let a = WhyNotU8 { x: 0 };
|
||||
let b = ActualU64 { x: 0 };
|
||||
|
||||
retrieve_int(&a);
|
||||
retrieve_int(&b);
|
||||
}
|
||||
```
|
||||
|
||||
-- [Compiler Explorer](https://godbolt.org/z/u_yguS)
|
||||
|
||||
It's hard to imagine practical situations where dynamic dispatch would be used for objects that
|
||||
aren't heap allocated, but it technically can be done.
|
||||
|
||||
# Copy types
|
||||
|
||||
Understanding move semantics and copy semantics in Rust is weird at first. The Rust docs
|
||||
[go into detail](https://doc.rust-lang.org/stable/core/marker/trait.Copy.html) far better than can
|
||||
be addressed here, so I'll leave them to do the job. From a memory perspective though, their
|
||||
guideline is reasonable:
|
||||
[if your type can implemement `Copy`, it should](https://doc.rust-lang.org/stable/core/marker/trait.Copy.html#when-should-my-type-be-copy).
|
||||
While there are potential speed tradeoffs to _benchmark_ when discussing `Copy` (move semantics for
|
||||
stack objects vs. copying stack pointers vs. copying stack `struct`s), _it's impossible for `Copy`
|
||||
to introduce a heap allocation_.
|
||||
|
||||
But why is this the case? Fundamentally, it's because the language controls what `Copy` means -
|
||||
["the behavior of `Copy` is not overloadable"](https://doc.rust-lang.org/std/marker/trait.Copy.html#whats-the-difference-between-copy-and-clone)
|
||||
because it's a marker trait. From there we'll note that a type
|
||||
[can implement `Copy`](https://doc.rust-lang.org/std/marker/trait.Copy.html#when-can-my-type-be-copy)
|
||||
if (and only if) its components implement `Copy`, and that
|
||||
[no heap-allocated types implement `Copy`](https://doc.rust-lang.org/std/marker/trait.Copy.html#implementors).
|
||||
Thus, assignments involving heap types are always move semantics, and new heap allocations won't
|
||||
occur because of implicit operator behavior.
|
||||
|
||||
```rust
|
||||
#[derive(Clone)]
|
||||
struct Cloneable {
|
||||
x: Box<u64>
|
||||
}
|
||||
|
||||
// error[E0204]: the trait `Copy` may not be implemented for this type
|
||||
#[derive(Copy, Clone)]
|
||||
struct NotCopyable {
|
||||
x: Box<u64>
|
||||
}
|
||||
```
|
||||
|
||||
-- [Compiler Explorer](https://godbolt.org/z/VToRuK)
|
||||
|
||||
# Iterators
|
||||
|
||||
In managed memory languages (like
|
||||
[Java](https://www.youtube.com/watch?v=bSkpMdDe4g4&feature=youtu.be&t=357)), there's a subtle
|
||||
difference between these two code samples:
|
||||
|
||||
```java
|
||||
public static int sum_for(List<Long> vals) {
|
||||
long sum = 0;
|
||||
// Regular for loop
|
||||
for (int i = 0; i < vals.length; i++) {
|
||||
sum += vals[i];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
public static int sum_foreach(List<Long> vals) {
|
||||
long sum = 0;
|
||||
// "Foreach" loop - uses iteration
|
||||
for (Long l : vals) {
|
||||
sum += l;
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
```
|
||||
|
||||
In the `sum_for` function, nothing terribly interesting happens. In `sum_foreach`, an object of type
|
||||
[`Iterator`](https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/Iterator.html)
|
||||
is allocated on the heap, and will eventually be garbage-collected. This isn't a great design;
|
||||
iterators are often transient objects that you need during a function and can discard once the
|
||||
function ends. Sounds exactly like the issue stack-allocated objects address, no?
|
||||
|
||||
In Rust, iterators are allocated on the stack. The objects to iterate over are almost certainly in
|
||||
heap memory, but the iterator itself
|
||||
([`Iter`](https://doc.rust-lang.org/std/slice/struct.Iter.html)) doesn't need to use the heap. In
|
||||
each of the examples below we iterate over a collection, but never use heap allocation:
|
||||
|
||||
```rust
|
||||
use std::collections::HashMap;
|
||||
// There's a lot of assembly generated, but if you search in the text,
|
||||
// there are no references to `real_drop_in_place` anywhere.
|
||||
|
||||
pub fn sum_vec(x: &Vec<u32>) {
|
||||
let mut s = 0;
|
||||
// Basic iteration over vectors doesn't need allocation
|
||||
for y in x {
|
||||
s += y;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn sum_enumerate(x: &Vec<u32>) {
|
||||
let mut s = 0;
|
||||
// More complex iterators are just fine too
|
||||
for (_i, y) in x.iter().enumerate() {
|
||||
s += y;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn sum_hm(x: &HashMap<u32, u32>) {
|
||||
let mut s = 0;
|
||||
// And it's not just Vec, all types will allocate the iterator
|
||||
// on stack memory
|
||||
for y in x.values() {
|
||||
s += y;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
-- [Compiler Explorer](https://godbolt.org/z/FTT3CT)
|
@ -1,254 +0,0 @@
|
||||
---
|
||||
layout: post
|
||||
title: "Dynamic Memory: A Heaping Helping"
|
||||
description: "The reason Rust exists."
|
||||
category:
|
||||
tags: [rust, understanding-allocations]
|
||||
---
|
||||
|
||||
Managing dynamic memory is hard. Some languages assume users will do it themselves (C, C++), and
|
||||
some languages go to extreme lengths to protect users from themselves (Java, Python). In Rust, how
|
||||
the language uses dynamic memory (also referred to as the **heap**) is a system called _ownership_.
|
||||
And as the docs mention, ownership
|
||||
[is Rust's most unique feature](https://doc.rust-lang.org/book/ch04-00-understanding-ownership.html).
|
||||
|
||||
The heap is used in two situations; when the compiler is unable to predict either the _total size of
|
||||
memory needed_, or _how long the memory is needed for_, it allocates space in the heap. This happens
|
||||
pretty frequently; if you want to download the Google home page, you won't know how large it is
|
||||
until your program runs. And when you're finished with Google, we deallocate the memory so it can be
|
||||
used to store other webpages. If you're interested in a slightly longer explanation of the heap,
|
||||
check out
|
||||
[The Stack and the Heap](https://doc.rust-lang.org/book/ch04-01-what-is-ownership.html#the-stack-and-the-heap)
|
||||
in Rust's documentation.
|
||||
|
||||
We won't go into detail on how the heap is managed; the
|
||||
[ownership documentation](https://doc.rust-lang.org/book/ch04-01-what-is-ownership.html) does a
|
||||
phenomenal job explaining both the "why" and "how" of memory management. Instead, we're going to
|
||||
focus on understanding "when" heap allocations occur in Rust.
|
||||
|
||||
To start off, take a guess for how many allocations happen in the program below:
|
||||
|
||||
```rust
|
||||
fn main() {}
|
||||
```
|
||||
|
||||
It's obviously a trick question; while no heap allocations occur as a result of that code, the setup
|
||||
needed to call `main` does allocate on the heap. Here's a way to show it:
|
||||
|
||||
```rust
|
||||
#![feature(integer_atomics)]
|
||||
use std::alloc::{GlobalAlloc, Layout, System};
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
|
||||
static ALLOCATION_COUNT: AtomicU64 = AtomicU64::new(0);
|
||||
|
||||
struct CountingAllocator;
|
||||
|
||||
unsafe impl GlobalAlloc for CountingAllocator {
|
||||
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
|
||||
ALLOCATION_COUNT.fetch_add(1, Ordering::SeqCst);
|
||||
System.alloc(layout)
|
||||
}
|
||||
|
||||
unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
|
||||
System.dealloc(ptr, layout);
|
||||
}
|
||||
}
|
||||
|
||||
#[global_allocator]
|
||||
static A: CountingAllocator = CountingAllocator;
|
||||
|
||||
fn main() {
|
||||
let x = ALLOCATION_COUNT.fetch_add(0, Ordering::SeqCst);
|
||||
println!("There were {} allocations before calling main!", x);
|
||||
}
|
||||
```
|
||||
|
||||
--
|
||||
[Rust Playground](https://play.rust-lang.org/?version=nightly&mode=debug&edition=2018&gist=fb5060025ba79fc0f906b65a4ef8eb8e)
|
||||
|
||||
As of the time of writing, there are five allocations that happen before `main` is ever called.
|
||||
|
||||
But when we want to understand more practically where heap allocation happens, we'll follow this
|
||||
guide:
|
||||
|
||||
- Smart pointers hold their contents in the heap
|
||||
- Collections are smart pointers for many objects at a time, and reallocate when they need to grow
|
||||
|
||||
Finally, there are two "addendum" issues that are important to address when discussing Rust and the
|
||||
heap:
|
||||
|
||||
- Non-heap alternatives to many standard library types are available.
|
||||
- Special allocators to track memory behavior should be used to benchmark code.
|
||||
|
||||
# Smart pointers
|
||||
|
||||
The first thing to note are the "smart pointer" types. When you have data that must outlive the
|
||||
scope in which it is declared, or your data is of unknown or dynamic size, you'll make use of these
|
||||
types.
|
||||
|
||||
The term [smart pointer](https://en.wikipedia.org/wiki/Smart_pointer) comes from C++, and while it's
|
||||
closely linked to a general design pattern of
|
||||
["Resource Acquisition Is Initialization"](https://en.cppreference.com/w/cpp/language/raii), we'll
|
||||
use it here specifically to describe objects that are responsible for managing ownership of data
|
||||
allocated on the heap. The smart pointers available in the `alloc` crate should look mostly
|
||||
familiar:
|
||||
|
||||
- [`Box`](https://doc.rust-lang.org/alloc/boxed/struct.Box.html)
|
||||
- [`Rc`](https://doc.rust-lang.org/alloc/rc/struct.Rc.html)
|
||||
- [`Arc`](https://doc.rust-lang.org/alloc/sync/struct.Arc.html)
|
||||
- [`Cow`](https://doc.rust-lang.org/alloc/borrow/enum.Cow.html)
|
||||
|
||||
The [standard library](https://doc.rust-lang.org/std/) also defines some smart pointers to manage
|
||||
heap objects, though more than can be covered here. Some examples are:
|
||||
|
||||
- [`RwLock`](https://doc.rust-lang.org/std/sync/struct.RwLock.html)
|
||||
- [`Mutex`](https://doc.rust-lang.org/std/sync/struct.Mutex.html)
|
||||
|
||||
Finally, there is one ["gotcha"](https://www.merriam-webster.com/dictionary/gotcha): **cell types**
|
||||
(like [`RefCell`](https://doc.rust-lang.org/stable/core/cell/struct.RefCell.html)) look and behave
|
||||
similarly, but **don't involve heap allocation**. The
|
||||
[`core::cell` docs](https://doc.rust-lang.org/stable/core/cell/index.html) have more information.
|
||||
|
||||
When a smart pointer is created, the data it is given is placed in heap memory and the location of
|
||||
that data is recorded in the smart pointer. Once the smart pointer has determined it's safe to
|
||||
deallocate that memory (when a `Box` has
|
||||
[gone out of scope](https://doc.rust-lang.org/stable/std/boxed/index.html) or a reference count
|
||||
[goes to zero](https://doc.rust-lang.org/alloc/rc/index.html)), the heap space is reclaimed. We can
|
||||
prove these types use heap memory by looking at code:
|
||||
|
||||
```rust
|
||||
use std::rc::Rc;
|
||||
use std::sync::Arc;
|
||||
use std::borrow::Cow;
|
||||
|
||||
pub fn my_box() {
|
||||
// Drop at assembly line 1640
|
||||
Box::new(0);
|
||||
}
|
||||
|
||||
pub fn my_rc() {
|
||||
// Drop at assembly line 1650
|
||||
Rc::new(0);
|
||||
}
|
||||
|
||||
pub fn my_arc() {
|
||||
// Drop at assembly line 1660
|
||||
Arc::new(0);
|
||||
}
|
||||
|
||||
pub fn my_cow() {
|
||||
// Drop at assembly line 1672
|
||||
Cow::from("drop");
|
||||
}
|
||||
```
|
||||
|
||||
-- [Compiler Explorer](https://godbolt.org/z/4AMQug)
|
||||
|
||||
# Collections
|
||||
|
||||
Collection types use heap memory because their contents have dynamic size; they will request more
|
||||
memory [when needed](https://doc.rust-lang.org/std/vec/struct.Vec.html#method.reserve), and can
|
||||
[release memory](https://doc.rust-lang.org/std/vec/struct.Vec.html#method.shrink_to_fit) when it's
|
||||
no longer necessary. This dynamic property forces Rust to heap allocate everything they contain. In
|
||||
a way, **collections are smart pointers for many objects at a time**. Common types that fall under
|
||||
this umbrella are [`Vec`](https://doc.rust-lang.org/stable/alloc/vec/struct.Vec.html),
|
||||
[`HashMap`](https://doc.rust-lang.org/stable/std/collections/struct.HashMap.html), and
|
||||
[`String`](https://doc.rust-lang.org/stable/alloc/string/struct.String.html) (not
|
||||
[`str`](https://doc.rust-lang.org/std/primitive.str.html)).
|
||||
|
||||
While collections store the objects they own in heap memory, _creating new collections will not
|
||||
allocate on the heap_. This is a bit weird; if we call `Vec::new()`, the assembly shows a
|
||||
corresponding call to `real_drop_in_place`:
|
||||
|
||||
```rust
|
||||
pub fn my_vec() {
|
||||
// Drop in place at line 481
|
||||
Vec::<u8>::new();
|
||||
}
|
||||
```
|
||||
|
||||
-- [Compiler Explorer](https://godbolt.org/z/1WkNtC)
|
||||
|
||||
But because the vector has no elements to manage, no calls to the allocator will ever be dispatched:
|
||||
|
||||
```rust
|
||||
use std::alloc::{GlobalAlloc, Layout, System};
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
|
||||
fn main() {
|
||||
// Turn on panicking if we allocate on the heap
|
||||
DO_PANIC.store(true, Ordering::SeqCst);
|
||||
|
||||
// Interesting bit happens here
|
||||
let x: Vec<u8> = Vec::new();
|
||||
drop(x);
|
||||
|
||||
// Turn panicking back off, some deallocations occur
|
||||
// after main as well.
|
||||
DO_PANIC.store(false, Ordering::SeqCst);
|
||||
}
|
||||
|
||||
#[global_allocator]
|
||||
static A: PanicAllocator = PanicAllocator;
|
||||
static DO_PANIC: AtomicBool = AtomicBool::new(false);
|
||||
struct PanicAllocator;
|
||||
|
||||
unsafe impl GlobalAlloc for PanicAllocator {
|
||||
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
|
||||
if DO_PANIC.load(Ordering::SeqCst) {
|
||||
panic!("Unexpected allocation.");
|
||||
}
|
||||
System.alloc(layout)
|
||||
}
|
||||
|
||||
unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
|
||||
if DO_PANIC.load(Ordering::SeqCst) {
|
||||
panic!("Unexpected deallocation.");
|
||||
}
|
||||
System.dealloc(ptr, layout);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
--
|
||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=debug&edition=2018&gist=831a297d176d015b1f9ace01ae416cc6)
|
||||
|
||||
Other standard library types follow the same behavior; make sure to check out
|
||||
[`HashMap::new()`](https://doc.rust-lang.org/std/collections/hash_map/struct.HashMap.html#method.new),
|
||||
and [`String::new()`](https://doc.rust-lang.org/std/string/struct.String.html#method.new).
|
||||
|
||||
# Heap Alternatives
|
||||
|
||||
While it is a bit strange to speak of the stack after spending time with the heap, it's worth
|
||||
pointing out that some heap-allocated objects in Rust have stack-based counterparts provided by
|
||||
other crates. If you have need of the functionality, but want to avoid allocating, there are
|
||||
typically alternatives available.
|
||||
|
||||
When it comes to some standard library smart pointers
|
||||
([`RwLock`](https://doc.rust-lang.org/std/sync/struct.RwLock.html) and
|
||||
[`Mutex`](https://doc.rust-lang.org/std/sync/struct.Mutex.html)), stack-based alternatives are
|
||||
provided in crates like [parking_lot](https://crates.io/crates/parking_lot) and
|
||||
[spin](https://crates.io/crates/spin). You can check out
|
||||
[`lock_api::RwLock`](https://docs.rs/lock_api/0.1.5/lock_api/struct.RwLock.html),
|
||||
[`lock_api::Mutex`](https://docs.rs/lock_api/0.1.5/lock_api/struct.Mutex.html), and
|
||||
[`spin::Once`](https://mvdnes.github.io/rust-docs/spin-rs/spin/struct.Once.html) if you're in need
|
||||
of synchronization primitives.
|
||||
|
||||
[thread_id](https://crates.io/crates/thread-id) may be necessary if you're implementing an allocator
|
||||
because [`thread::current().id()`](https://doc.rust-lang.org/std/thread/struct.ThreadId.html) uses a
|
||||
[`thread_local!` structure](https://doc.rust-lang.org/stable/src/std/sys_common/thread_info.rs.html#17-36)
|
||||
that needs heap allocation.
|
||||
|
||||
# Tracing Allocators
|
||||
|
||||
When writing performance-sensitive code, there's no alternative to measuring your code. If you
|
||||
didn't write a benchmark,
|
||||
[you don't care about it's performance](https://www.youtube.com/watch?v=2EWejmkKlxs&feature=youtu.be&t=263)
|
||||
You should never rely on your instincts when
|
||||
[a microsecond is an eternity](https://www.youtube.com/watch?v=NH1Tta7purM).
|
||||
|
||||
Similarly, there's great work going on in Rust with allocators that keep track of what they're doing
|
||||
(like [`alloc_counter`](https://crates.io/crates/alloc_counter)). When it comes to tracking heap
|
||||
behavior, it's easy to make mistakes; please write tests and make sure you have tools to guard
|
||||
against future issues.
|
@ -1,148 +0,0 @@
|
||||
---
|
||||
layout: post
|
||||
title: "Compiler Optimizations: What It's Done Lately"
|
||||
description: "A lot. The answer is a lot."
|
||||
category:
|
||||
tags: [rust, understanding-allocations]
|
||||
---
|
||||
|
||||
**Update 2019-02-10**: When debugging a
|
||||
[related issue](https://gitlab.com/sio4/code/alloc-counter/issues/1), it was discovered that the
|
||||
original code worked because LLVM optimized out the entire function, rather than just the allocation
|
||||
segments. The code has been updated with proper use of
|
||||
[`read_volatile`](https://doc.rust-lang.org/std/ptr/fn.read_volatile.html), and a previous section
|
||||
on vector capacity has been removed.
|
||||
|
||||
---
|
||||
|
||||
Up to this point, we've been discussing memory usage in the Rust language by focusing on simple
|
||||
rules that are mostly right for small chunks of code. We've spent time showing how those rules work
|
||||
themselves out in practice, and become familiar with reading the assembly code needed to see each
|
||||
memory type (global, stack, heap) in action.
|
||||
|
||||
Throughout the series so far, we've put a handicap on the code. In the name of consistent and
|
||||
understandable results, we've asked the compiler to pretty please leave the training wheels on. Now
|
||||
is the time where we throw out all the rules and take off the kid gloves. As it turns out, both the
|
||||
Rust compiler and the LLVM optimizers are incredibly sophisticated, and we'll step back and let them
|
||||
do their job.
|
||||
|
||||
Similar to
|
||||
["What Has My Compiler Done For Me Lately?"](https://www.youtube.com/watch?v=bSkpMdDe4g4), we're
|
||||
focusing on interesting things the Rust language (and LLVM!) can do with memory management. We'll
|
||||
still be looking at assembly code to understand what's going on, but it's important to mention
|
||||
again: **please use automated tools like [alloc-counter](https://crates.io/crates/alloc_counter) to
|
||||
double-check memory behavior if it's something you care about**. It's far too easy to mis-read
|
||||
assembly in large code sections, you should always verify behavior if you care about memory usage.
|
||||
|
||||
The guiding principal as we move forward is this: _optimizing compilers won't produce worse programs
|
||||
than we started with._ There won't be any situations where stack allocations get moved to heap
|
||||
allocations. There will, however, be an opera of optimization.
|
||||
|
||||
# The Case of the Disappearing Box
|
||||
|
||||
Our first optimization comes when LLVM can reason that the lifetime of an object is sufficiently
|
||||
short that heap allocations aren't necessary. In these cases, LLVM will move the allocation to the
|
||||
stack instead! The way this interacts with `#[inline]` attributes is a bit opaque, but the important
|
||||
part is that LLVM can sometimes do better than the baseline Rust language:
|
||||
|
||||
```rust
|
||||
use std::alloc::{GlobalAlloc, Layout, System};
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
|
||||
pub fn cmp(x: u32) {
|
||||
// Turn on panicking if we allocate on the heap
|
||||
DO_PANIC.store(true, Ordering::SeqCst);
|
||||
|
||||
// The compiler is able to see through the constant `Box`
|
||||
// and directly compare `x` to 24 - assembly line 73
|
||||
let y = Box::new(24);
|
||||
let equals = x == *y;
|
||||
|
||||
// This call to drop is eliminated
|
||||
drop(y);
|
||||
|
||||
// Need to mark the comparison result as volatile so that
|
||||
// LLVM doesn't strip out all the code. If `y` is marked
|
||||
// volatile instead, allocation will be forced.
|
||||
unsafe { std::ptr::read_volatile(&equals) };
|
||||
|
||||
// Turn off panicking, as there are some deallocations
|
||||
// when we exit main.
|
||||
DO_PANIC.store(false, Ordering::SeqCst);
|
||||
}
|
||||
|
||||
fn main() {
|
||||
cmp(12)
|
||||
}
|
||||
|
||||
#[global_allocator]
|
||||
static A: PanicAllocator = PanicAllocator;
|
||||
static DO_PANIC: AtomicBool = AtomicBool::new(false);
|
||||
struct PanicAllocator;
|
||||
|
||||
unsafe impl GlobalAlloc for PanicAllocator {
|
||||
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
|
||||
if DO_PANIC.load(Ordering::SeqCst) {
|
||||
panic!("Unexpected allocation.");
|
||||
}
|
||||
System.alloc(layout)
|
||||
}
|
||||
|
||||
unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
|
||||
if DO_PANIC.load(Ordering::SeqCst) {
|
||||
panic!("Unexpected deallocation.");
|
||||
}
|
||||
System.dealloc(ptr, layout);
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## -- [Compiler Explorer](https://godbolt.org/z/BZ_Yp3)
|
||||
|
||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=release&edition=2018&gist=4a765f753183d5b919f62c71d2109d5d)
|
||||
|
||||
# Dr. Array or: How I Learned to Love the Optimizer
|
||||
|
||||
Finally, this isn't so much about LLVM figuring out different memory behavior, but LLVM stripping
|
||||
out code that doesn't do anything. Optimizations of this type have a lot of nuance to them; if
|
||||
you're not careful, they can make your benchmarks look
|
||||
[impossibly good](https://www.youtube.com/watch?v=nXaxk27zwlk&feature=youtu.be&t=1199). In Rust, the
|
||||
`black_box` function (implemented in both
|
||||
[`libtest`](https://doc.rust-lang.org/1.1.0/test/fn.black_box.html) and
|
||||
[`criterion`](https://docs.rs/criterion/0.2.10/criterion/fn.black_box.html)) will tell the compiler
|
||||
to disable this kind of optimization. But if you let LLVM remove unnecessary code, you can end up
|
||||
running programs that previously caused errors:
|
||||
|
||||
```rust
|
||||
#[derive(Default)]
|
||||
struct TwoFiftySix {
|
||||
_a: [u64; 32]
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct EightK {
|
||||
_a: [TwoFiftySix; 32]
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct TwoFiftySixK {
|
||||
_a: [EightK; 32]
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
struct EightM {
|
||||
_a: [TwoFiftySixK; 32]
|
||||
}
|
||||
|
||||
pub fn main() {
|
||||
// Normally this blows up because we can't reserve size on stack
|
||||
// for the `EightM` struct. But because the compiler notices we
|
||||
// never do anything with `_x`, it optimizes out the stack storage
|
||||
// and the program completes successfully.
|
||||
let _x = EightM::default();
|
||||
}
|
||||
```
|
||||
|
||||
## -- [Compiler Explorer](https://godbolt.org/z/daHn7P)
|
||||
|
||||
[Rust Playground](https://play.rust-lang.org/?version=stable&mode=release&edition=2018&gist=4c253bf26072119896ab93c6ef064dc0)
|
@ -1,35 +0,0 @@
|
||||
---
|
||||
layout: post
|
||||
title: "Summary: What are the Allocation Rules?"
|
||||
description: "A synopsis and reference."
|
||||
category:
|
||||
tags: [rust, understanding-allocations]
|
||||
---
|
||||
|
||||
While there's a lot of interesting detail captured in this series, it's often helpful to have a
|
||||
document that answers some "yes/no" questions. You may not care about what an `Iterator` looks like
|
||||
in assembly, you just need to know whether it allocates an object on the heap or not. And while Rust
|
||||
will prioritize the fastest behavior it can, here are the rules for each memory type:
|
||||
|
||||
**Heap Allocation**:
|
||||
|
||||
- Smart pointers (`Box`, `Rc`, `Mutex`, etc.) allocate their contents in heap memory.
|
||||
- Collections (`HashMap`, `Vec`, `String`, etc.) allocate their contents in heap memory.
|
||||
- Some smart pointers in the standard library have counterparts in other crates that don't need heap
|
||||
memory. If possible, use those.
|
||||
|
||||
**Stack Allocation**:
|
||||
|
||||
- Everything not using a smart pointer will be allocated on the stack.
|
||||
- Structs, enums, iterators, arrays, and closures are all stack allocated.
|
||||
- Cell types (`RefCell`) behave like smart pointers, but are stack-allocated.
|
||||
- Inlining (`#[inline]`) will not affect allocation behavior for better or worse.
|
||||
- Types that are marked `Copy` are guaranteed to have their contents stack-allocated.
|
||||
|
||||
**Global Allocation**:
|
||||
|
||||
- `const` is a fixed value; the compiler is allowed to copy it wherever useful.
|
||||
- `static` is a fixed reference; the compiler will guarantee it is unique.
|
||||
|
||||
![Container Sizes in Rust](/assets/images/2019-02-04-container-size.svg) --
|
||||
[Raph Levien](https://docs.google.com/presentation/d/1q-c7UAyrUlM-eZyTo1pd8SZ0qwA_wYxmPZVOQkoDmH4/edit?usp=sharing)
|
@ -1,52 +0,0 @@
|
||||
---
|
||||
layout: post
|
||||
title: "Making Bread"
|
||||
description: "...because I've got some free time now. 🍞"
|
||||
category:
|
||||
tags: [baking]
|
||||
---
|
||||
|
||||
Having recently started my "gardening leave" between positions, I have some more personal time
|
||||
available. I'm planning to stay productive, contributing to some open-source projects, but it also
|
||||
occurred to me that despite [talking about](https://speice.io/2018/05/hello.html) bread pics, this
|
||||
blog has been purely technical. Maybe I'll change the site title from "The Old Speice Guy" to "Bites
|
||||
and Bytes"?
|
||||
|
||||
Either way, I'm baking a little bit again, and figured it was worth taking a quick break to focus on
|
||||
some lighter material. I recently learned two critically important lessons: first, the temperature
|
||||
of the dough when you put the yeast in makes a huge difference.
|
||||
|
||||
Previously, when I wasn't paying attention to dough temperature:
|
||||
|
||||
![Whole weat dough](/assets/images/2019-05-03-making-bread/whole-wheat-not-rising.jpg)
|
||||
|
||||
Compared with what happens when I put the dough in the microwave for a defrost cycle because the
|
||||
water I used wasn't warm enough:
|
||||
|
||||
![White dough](/assets/images/2019-05-03-making-bread/white-dough-rising-before-fold.jpg)
|
||||
|
||||
I mean, just look at the bubbles!
|
||||
|
||||
![White dough with bubbles](/assets/images/2019-05-03-making-bread/white-dough-rising-after-fold.jpg)
|
||||
|
||||
After shaping the dough, I've got two loaves ready:
|
||||
|
||||
![Shaped loaves](/assets/images/2019-05-03-making-bread/shaped-loaves.jpg)
|
||||
|
||||
Now, the recipe normally calls for a Dutch Oven to bake the bread because it keeps the dough from
|
||||
drying out in the oven. Because I don't own a Dutch Oven, I typically put a casserole dish on the
|
||||
bottom rack and fill it with water so there's still some moisture in the oven. This time, I forgot
|
||||
to add the water and learned my second lesson: never add room-temperature water to a glass dish
|
||||
that's currently at 500 degrees.
|
||||
|
||||
![Shattered glass dish](/assets/images/2019-05-03-making-bread/shattered-glass.jpg)
|
||||
|
||||
Needless to say, trying to pull out sharp glass from an incredibly hot oven is not what I expected
|
||||
to be doing during my garden leave.
|
||||
|
||||
In the end, the bread crust wasn't great, but the bread itself turned out pretty alright:
|
||||
|
||||
![Baked bread](/assets/images/2019-05-03-making-bread/final-product.jpg)
|
||||
|
||||
I've been writing a lot more during this break, so I'm looking forward to sharing that in the
|
||||
future. In the mean-time, I'm planning on making a sandwich.
|
@ -1,296 +0,0 @@
|
||||
---
|
||||
layout: post
|
||||
title: "On Building High Performance Systems"
|
||||
description: ""
|
||||
category:
|
||||
tags: []
|
||||
---
|
||||
|
||||
**Update 2019-09-21**: Added notes on `isolcpus` and `systemd` affinity.
|
||||
|
||||
Prior to working in the trading industry, my assumption was that High Frequency Trading (HFT) is
|
||||
made up of people who have access to secret techniques mortal developers could only dream of. There
|
||||
had to be some secret art that could only be learned if one had an appropriately tragic backstory:
|
||||
|
||||
<img src="/assets/images/2019-04-24-kung-fu.webp" alt="kung-fu fight">
|
||||
> How I assumed HFT people learn their secret techniques
|
||||
|
||||
How else do you explain people working on systems that complete the round trip of market data in to
|
||||
orders out (a.k.a. tick-to-trade) consistently within
|
||||
[750-800 nanoseconds](https://stackoverflow.com/a/22082528/1454178)? In roughly the time it takes a
|
||||
computer to access
|
||||
[main memory 8 times](https://people.eecs.berkeley.edu/~rcs/research/interactive_latency.html),
|
||||
trading systems are capable of reading the market data packets, deciding what orders to send, doing
|
||||
risk checks, creating new packets for exchange-specific protocols, and putting those packets on the
|
||||
wire.
|
||||
|
||||
Having now worked in the trading industry, I can confirm the developers aren't super-human; I've
|
||||
made some simple mistakes at the very least. Instead, what shows up in public discussions is that
|
||||
philosophy, not technique, separates high-performance systems from everything else.
|
||||
Performance-critical systems don't rely on "this one cool C++ optimization trick" to make code fast
|
||||
(though micro-optimizations have their place); there's a lot more to worry about than just the code
|
||||
written for the project.
|
||||
|
||||
The framework I'd propose is this: **If you want to build high-performance systems, focus first on
|
||||
reducing performance variance** (reducing the gap between the fastest and slowest runs of the same
|
||||
code), **and only look at average latency once variance is at an acceptable level**.
|
||||
|
||||
Don't get me wrong, I'm a much happier person when things are fast. Computer goes from booting in 20
|
||||
seconds down to 10 because I installed a solid-state drive? Awesome. But if every fifth day it takes
|
||||
a full minute to boot because of corrupted sectors? Not so great. Average speed over the course of a
|
||||
week is the same in each situation, but you're painfully aware of that minute when it happens. When
|
||||
it comes to code, the principal is the same: speeding up a function by an average of 10 milliseconds
|
||||
doesn't mean much if there's a 100ms difference between your fastest and slowest runs. When
|
||||
performance matters, you need to respond quickly _every time_, not just in aggregate.
|
||||
High-performance systems should first optimize for time variance. Once you're consistent at the time
|
||||
scale you care about, then focus on improving average time.
|
||||
|
||||
This focus on variance shows up all the time in industry too (emphasis added in all quotes below):
|
||||
|
||||
- In [marketing materials](https://business.nasdaq.com/market-tech/marketplaces/trading) for
|
||||
NASDAQ's matching engine, the most performance-sensitive component of the exchange, dependability
|
||||
is highlighted in addition to instantaneous metrics:
|
||||
|
||||
> Able to **consistently sustain** an order rate of over 100,000 orders per second at sub-40
|
||||
> microsecond average latency
|
||||
|
||||
- The [Aeron](https://github.com/real-logic/aeron) message bus has this to say about performance:
|
||||
|
||||
> Performance is the key focus. Aeron is designed to be the highest throughput with the lowest and
|
||||
> **most predictable latency possible** of any messaging system
|
||||
|
||||
- The company PolySync, which is working on autonomous vehicles,
|
||||
[mentions why](https://polysync.io/blog/session-types-for-hearty-codecs/) they picked their
|
||||
specific messaging format:
|
||||
|
||||
> In general, high performance is almost always desirable for serialization. But in the world of
|
||||
> autonomous vehicles, **steady timing performance is even more important** than peak throughput.
|
||||
> This is because safe operation is sensitive to timing outliers. Nobody wants the system that
|
||||
> decides when to slam on the brakes to occasionally take 100 times longer than usual to encode
|
||||
> its commands.
|
||||
|
||||
- [Solarflare](https://solarflare.com/), which makes highly-specialized network hardware, points out
|
||||
variance (jitter) as a big concern for
|
||||
[electronic trading](https://solarflare.com/electronic-trading/):
|
||||
> The high stakes world of electronic trading, investment banks, market makers, hedge funds and
|
||||
> exchanges demand the **lowest possible latency and jitter** while utilizing the highest
|
||||
> bandwidth and return on their investment.
|
||||
|
||||
And to further clarify: we're not discussing _total run-time_, but variance of total run-time. There
|
||||
are situations where it's not reasonably possible to make things faster, and you'd much rather be
|
||||
consistent. For example, trading firms use
|
||||
[wireless networks](https://sniperinmahwah.wordpress.com/2017/06/07/network-effects-part-i/) because
|
||||
the speed of light through air is faster than through fiber-optic cables. There's still at _absolute
|
||||
minimum_ a [~33.76 millisecond](http://tinyurl.com/y2vd7tn8) delay required to send data between,
|
||||
say,
|
||||
[Chicago and Tokyo](https://www.theice.com/market-data/connectivity-and-feeds/wireless/tokyo-chicago).
|
||||
If a trading system in Chicago calls the function for "send order to Tokyo" and waits to see if a
|
||||
trade occurs, there's a physical limit to how long that will take. In this situation, the focus is
|
||||
on keeping variance of _additional processing_ to a minimum, since speed of light is the limiting
|
||||
factor.
|
||||
|
||||
So how does one go about looking for and eliminating performance variance? To tell the truth, I
|
||||
don't think a systematic answer or flow-chart exists. There's no substitute for (A) building a deep
|
||||
understanding of the entire technology stack, and (B) actually measuring system performance (though
|
||||
(C) watching a lot of [CppCon](https://www.youtube.com/channel/UCMlGfpWw-RUdWX_JbLCukXg) videos for
|
||||
inspiration never hurt). Even then, every project cares about performance to a different degree; you
|
||||
may need to build an entire
|
||||
[replica production system](https://www.youtube.com/watch?v=NH1Tta7purM&feature=youtu.be&t=3015) to
|
||||
accurately benchmark at nanosecond precision, or you may be content to simply
|
||||
[avoid garbage collection](https://www.youtube.com/watch?v=BD9cRbxWQx8&feature=youtu.be&t=1335) in
|
||||
your Java code.
|
||||
|
||||
Even though everyone has different needs, there are still common things to look for when trying to
|
||||
isolate and eliminate variance. In no particular order, these are my focus areas when thinking about
|
||||
high-performance systems:
|
||||
|
||||
## Language-specific
|
||||
|
||||
**Garbage Collection**: How often does garbage collection happen? When is it triggered? What are the
|
||||
impacts?
|
||||
|
||||
- [In Python](https://rushter.com/blog/python-garbage-collector/), individual objects are collected
|
||||
if the reference count reaches 0, and each generation is collected if
|
||||
`num_alloc - num_dealloc > gc_threshold` whenever an allocation happens. The GIL is acquired for
|
||||
the duration of generational collection.
|
||||
- Java has
|
||||
[many](https://docs.oracle.com/en/java/javase/12/gctuning/parallel-collector1.html#GUID-DCDD6E46-0406-41D1-AB49-FB96A50EB9CE)
|
||||
[different](https://docs.oracle.com/en/java/javase/12/gctuning/garbage-first-garbage-collector.html#GUID-ED3AB6D3-FD9B-4447-9EDF-983ED2F7A573)
|
||||
[collection](https://docs.oracle.com/en/java/javase/12/gctuning/garbage-first-garbage-collector-tuning.html#GUID-90E30ACA-8040-432E-B3A0-1E0440AB556A)
|
||||
[algorithms](https://docs.oracle.com/en/java/javase/12/gctuning/z-garbage-collector1.html#GUID-A5A42691-095E-47BA-B6DC-FB4E5FAA43D0)
|
||||
to choose from, each with different characteristics. The default algorithms (Parallel GC in Java
|
||||
8, G1 in Java 9) freeze the JVM while collecting, while more recent algorithms
|
||||
([ZGC](https://wiki.openjdk.java.net/display/zgc) and
|
||||
[Shenandoah](https://wiki.openjdk.java.net/display/shenandoah)) are designed to keep "stop the
|
||||
world" to a minimum by doing collection work in parallel.
|
||||
|
||||
**Allocation**: Every language has a different way of interacting with "heap" memory, but the
|
||||
principle is the same: running the allocator to allocate/deallocate memory takes time that can often
|
||||
be put to better use. Understanding when your language interacts with the allocator is crucial, and
|
||||
not always obvious. For example: C++ and Rust don't allocate heap memory for iterators, but Java
|
||||
does (meaning potential GC pauses). Take time to understand heap behavior (I made a
|
||||
[a guide for Rust](/2019/02/understanding-allocations-in-rust.html)), and look into alternative
|
||||
allocators ([jemalloc](http://jemalloc.net/),
|
||||
[tcmalloc](https://gperftools.github.io/gperftools/tcmalloc.html)) that might run faster than the
|
||||
operating system default.
|
||||
|
||||
**Data Layout**: How your data is arranged in memory matters;
|
||||
[data-oriented design](https://www.youtube.com/watch?v=yy8jQgmhbAU) and
|
||||
[cache locality](https://www.youtube.com/watch?v=2EWejmkKlxs&feature=youtu.be&t=1185) can have huge
|
||||
impacts on performance. The C family of languages (C, value types in C#, C++) and Rust all have
|
||||
guarantees about the shape every object takes in memory that others (e.g. Java and Python) can't
|
||||
make. [Cachegrind](http://valgrind.org/docs/manual/cg-manual.html) and kernel
|
||||
[perf](https://perf.wiki.kernel.org/index.php/Main_Page) counters are both great for understanding
|
||||
how performance relates to memory layout.
|
||||
|
||||
**Just-In-Time Compilation**: Languages that are compiled on the fly (LuaJIT, C#, Java, PyPy) are
|
||||
great because they optimize your program for how it's actually being used, rather than how a
|
||||
compiler expects it to be used. However, there's a variance problem if the program stops executing
|
||||
while waiting for translation from VM bytecode to native code. As a remedy, many languages support
|
||||
ahead-of-time compilation in addition to the JIT versions
|
||||
([CoreRT](https://github.com/dotnet/corert) in C# and [GraalVM](https://www.graalvm.org/) in Java).
|
||||
On the other hand, LLVM supports
|
||||
[Profile Guided Optimization](https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization),
|
||||
which theoretically brings JIT benefits to non-JIT languages. Finally, be careful to avoid comparing
|
||||
apples and oranges during benchmarks; you don't want your code to suddenly speed up because the JIT
|
||||
compiler kicked in.
|
||||
|
||||
**Programming Tricks**: These won't make or break performance, but can be useful in specific
|
||||
circumstances. For example, C++ can use
|
||||
[templates instead of branches](https://www.youtube.com/watch?v=NH1Tta7purM&feature=youtu.be&t=1206)
|
||||
in critical sections.
|
||||
|
||||
## Kernel
|
||||
|
||||
Code you wrote is almost certainly not the _only_ code running on your hardware. There are many ways
|
||||
the operating system interacts with your program, from interrupts to system calls, that are
|
||||
important to watch for. These are written from a Linux perspective, but Windows does typically have
|
||||
equivalent functionality.
|
||||
|
||||
**Scheduling**: The kernel is normally free to schedule any process on any core, so it's important
|
||||
to reserve CPU cores exclusively for the important programs. There are a few parts to this: first,
|
||||
limit the CPU cores that non-critical processes are allowed to run on by excluding cores from
|
||||
scheduling
|
||||
([`isolcpus`](https://www.linuxtopia.org/online_books/linux_kernel/kernel_configuration/re46.html)
|
||||
kernel command-line option), or by setting the `init` process CPU affinity
|
||||
([`systemd` example](https://access.redhat.com/solutions/2884991)). Second, set critical processes
|
||||
to run on the isolated cores by setting the
|
||||
[processor affinity](https://en.wikipedia.org/wiki/Processor_affinity) using
|
||||
[taskset](https://linux.die.net/man/1/taskset). Finally, use
|
||||
[`NO_HZ`](https://github.com/torvalds/linux/blob/master/Documentation/timers/NO_HZ.txt) or
|
||||
[`chrt`](https://linux.die.net/man/1/chrt) to disable scheduling interrupts. Turning off
|
||||
hyper-threading is also likely beneficial.
|
||||
|
||||
**System calls**: Reading from a UNIX socket? Writing to a file? In addition to not knowing how long
|
||||
the I/O operation takes, these all trigger expensive
|
||||
[system calls (syscalls)](https://en.wikipedia.org/wiki/System_call). To handle these, the CPU must
|
||||
[context switch](https://en.wikipedia.org/wiki/Context_switch) to the kernel, let the kernel
|
||||
operation complete, then context switch back to your program. We'd rather keep these
|
||||
[to a minimum](https://www.destroyallsoftware.com/talks/the-birth-and-death-of-javascript) (see
|
||||
timestamp 18:20). [Strace](https://linux.die.net/man/1/strace) is your friend for understanding when
|
||||
and where syscalls happen.
|
||||
|
||||
**Signal Handling**: Far less likely to be an issue, but signals do trigger a context switch if your
|
||||
code has a handler registered. This will be highly dependent on the application, but you can
|
||||
[block signals](https://www.linuxprogrammingblog.com/all-about-linux-signals?page=show#Blocking_signals)
|
||||
if it's an issue.
|
||||
|
||||
**Interrupts**: System interrupts are how devices connected to your computer notify the CPU that
|
||||
something has happened. The CPU will then choose a processor core to pause and context switch to the
|
||||
OS to handle the interrupt. Make sure that
|
||||
[SMP affinity](http://www.alexonlinux.com/smp-affinity-and-proper-interrupt-handling-in-linux) is
|
||||
set so that interrupts are handled on a CPU core not running the program you care about.
|
||||
|
||||
**[NUMA](https://www.kernel.org/doc/html/latest/vm/numa.html)**: While NUMA is good at making
|
||||
multi-cell systems transparent, there are variance implications; if the kernel moves a process
|
||||
across nodes, future memory accesses must wait for the controller on the original node. Use
|
||||
[numactl](https://linux.die.net/man/8/numactl) to handle memory-/cpu-cell pinning so this doesn't
|
||||
happen.
|
||||
|
||||
## Hardware
|
||||
|
||||
**CPU Pipelining/Speculation**: Speculative execution in modern processors gave us vulnerabilities
|
||||
like Spectre, but it also gave us performance improvements like
|
||||
[branch prediction](https://stackoverflow.com/a/11227902/1454178). And if the CPU mis-speculates
|
||||
your code, there's variance associated with rewind and replay. While the compiler knows a lot about
|
||||
how your CPU [pipelines instructions](https://youtu.be/nAbCKa0FzjQ?t=4467), code can be
|
||||
[structured to help](https://www.youtube.com/watch?v=NH1Tta7purM&feature=youtu.be&t=755) the branch
|
||||
predictor.
|
||||
|
||||
**Paging**: For most systems, virtual memory is incredible. Applications live in their own worlds,
|
||||
and the CPU/[MMU](https://en.wikipedia.org/wiki/Memory_management_unit) figures out the details.
|
||||
However, there's a variance penalty associated with memory paging and caching; if you access more
|
||||
memory pages than the [TLB](https://en.wikipedia.org/wiki/Translation_lookaside_buffer) can store,
|
||||
you'll have to wait for the page walk. Kernel perf tools are necessary to figure out if this is an
|
||||
issue, but using [huge pages](https://blog.pythian.com/performance-tuning-hugepages-in-linux/) can
|
||||
reduce TLB burdens. Alternately, running applications in a hypervisor like
|
||||
[Jailhouse](https://github.com/siemens/jailhouse) allows one to skip virtual memory entirely, but
|
||||
this is probably more work than the benefits are worth.
|
||||
|
||||
**Network Interfaces**: When more than one computer is involved, variance can go up dramatically.
|
||||
Tuning kernel
|
||||
[network parameters](https://github.com/leandromoreira/linux-network-performance-parameters) may be
|
||||
helpful, but modern systems more frequently opt to skip the kernel altogether with a technique
|
||||
called [kernel bypass](https://blog.cloudflare.com/kernel-bypass/). This typically requires
|
||||
specialized hardware and [drivers](https://www.openonload.org/), but even industries like
|
||||
[telecom](https://www.bbc.co.uk/rd/blog/2018-04-high-speed-networking-open-source-kernel-bypass) are
|
||||
finding the benefits.
|
||||
|
||||
## Networks
|
||||
|
||||
**Routing**: There's a reason financial firms are willing to pay
|
||||
[millions of euros](https://sniperinmahwah.wordpress.com/2019/03/26/4-les-moeres-english-version/)
|
||||
for rights to a small plot of land - having a straight-line connection from point A to point B means
|
||||
the path their data takes is the shortest possible. In contrast, there are currently 6 computers in
|
||||
between me and Google, but that may change at any moment if my ISP realizes a
|
||||
[more efficient route](https://en.wikipedia.org/wiki/Border_Gateway_Protocol) is available. Whether
|
||||
it's using
|
||||
[research-quality equipment](https://sniperinmahwah.wordpress.com/2018/05/07/shortwave-trading-part-i-the-west-chicago-tower-mystery/)
|
||||
for shortwave radio, or just making sure there's no data inadvertently going between data centers,
|
||||
routing matters.
|
||||
|
||||
**Protocol**: TCP as a network protocol is awesome: guaranteed and in-order delivery, flow control,
|
||||
and congestion control all built in. But these attributes make the most sense when networking
|
||||
infrastructure is lossy; for systems that expect nearly all packets to be delivered correctly, the
|
||||
setup handshaking and packet acknowledgment are just overhead. Using UDP (unicast or multicast) may
|
||||
make sense in these contexts as it avoids the chatter needed to track connection state, and
|
||||
[gap-fill](https://iextrading.com/docs/IEX%20Transport%20Specification.pdf)
|
||||
[strategies](http://www.nasdaqtrader.com/content/technicalsupport/specifications/dataproducts/moldudp64.pdf)
|
||||
can handle the rest.
|
||||
|
||||
**Switching**: Many routers/switches handle packets using "store-and-forward" behavior: wait for the
|
||||
whole packet, validate checksums, and then send to the next device. In variance terms, the time
|
||||
needed to move data between two nodes is proportional to the size of that data; the switch must
|
||||
"store" all data before it can calculate checksums and "forward" to the next node. With
|
||||
["cut-through"](https://www.networkworld.com/article/2241573/latency-and-jitter--cut-through-design-pays-off-for-arista--blade.html)
|
||||
designs, switches will begin forwarding data as soon as they know where the destination is,
|
||||
checksums be damned. This means there's a fixed cost (at the switch) for network traffic, no matter
|
||||
the size.
|
||||
|
||||
# Final Thoughts
|
||||
|
||||
High-performance systems, regardless of industry, are not magical. They do require extreme precision
|
||||
and attention to detail, but they're designed, built, and operated by regular people, using a lot of
|
||||
tools that are publicly available. Interested in seeing how context switching affects performance of
|
||||
your benchmarks? `taskset` should be installed in all modern Linux distributions, and can be used to
|
||||
make sure the OS never migrates your process. Curious how often garbage collection triggers during a
|
||||
crucial operation? Your language of choice will typically expose details of its operations
|
||||
([Python](https://docs.python.org/3/library/gc.html),
|
||||
[Java](https://www.oracle.com/technetwork/java/javase/tech/vmoptions-jsp-140102.html#DebuggingOptions)).
|
||||
Want to know how hard your program is stressing the TLB? Use `perf record` and look for
|
||||
`dtlb_load_misses.miss_causes_a_walk`.
|
||||
|
||||
Two final guiding questions, then: first, before attempting to apply some of the technology above to
|
||||
your own systems, can you first identify
|
||||
[where/when you care](http://wiki.c2.com/?PrematureOptimization) about "high-performance"? As an
|
||||
example, if parts of a system rely on humans pushing buttons, CPU pinning won't have any measurable
|
||||
effect. Humans are already far too slow to react in time. Second, if you're using benchmarks, are
|
||||
they being designed in a way that's actually helpful? Tools like
|
||||
[Criterion](http://www.serpentine.com/criterion/) (also in
|
||||
[Rust](https://github.com/bheisler/criterion.rs)) and Google's
|
||||
[Benchmark](https://github.com/google/benchmark) output not only average run time, but variance as
|
||||
well; your benchmarking environment is subject to the same concerns your production environment is.
|
||||
|
||||
Finally, I believe high-performance systems are a matter of philosophy, not necessarily technique.
|
||||
Rigorous focus on variance is the first step, and there are plenty of ways to measure and mitigate
|
||||
it; once that's at an acceptable level, then optimize for speed.
|
@ -1,263 +0,0 @@
|
||||
---
|
||||
layout: post
|
||||
title: "Binary Format Shootout"
|
||||
description: "Cap'n Proto vs. Flatbuffers vs. SBE"
|
||||
category:
|
||||
tags: [rust]
|
||||
---
|
||||
|
||||
I've found that in many personal projects,
|
||||
[analysis paralysis](https://en.wikipedia.org/wiki/Analysis_paralysis) is particularly deadly.
|
||||
Making good decisions in the beginning avoids pain and suffering later; if extra research prevents
|
||||
future problems, I'm happy to continue ~~procrastinating~~ researching indefinitely.
|
||||
|
||||
So let's say you're in need of a binary serialization format. Data will be going over the network,
|
||||
not just in memory, so having a schema document and code generation is a must. Performance is
|
||||
crucial, so formats that support zero-copy de/serialization are given priority. And the more
|
||||
languages supported, the better; I use Rust, but can't predict what other languages this could
|
||||
interact with.
|
||||
|
||||
Given these requirements, the candidates I could find were:
|
||||
|
||||
1. [Cap'n Proto](https://capnproto.org/) has been around the longest, and is the most established
|
||||
2. [Flatbuffers](https://google.github.io/flatbuffers/) is the newest, and claims to have a simpler
|
||||
encoding
|
||||
3. [Simple Binary Encoding](https://github.com/real-logic/simple-binary-encoding) has the simplest
|
||||
encoding, but the Rust implementation is unmaintained
|
||||
|
||||
Any one of these will satisfy the project requirements: easy to transmit over a network, reasonably
|
||||
fast, and polyglot support. But how do you actually pick one? It's impossible to know what issues
|
||||
will follow that choice, so I tend to avoid commitment until the last possible moment.
|
||||
|
||||
Still, a choice must be made. Instead of worrying about which is "the best," I decided to build a
|
||||
small proof-of-concept system in each format and pit them against each other. All code can be found
|
||||
in the [repository](https://github.com/speice-io/marketdata-shootout) for this post.
|
||||
|
||||
We'll discuss more in detail, but a quick preview of the results:
|
||||
|
||||
- Cap'n Proto: Theoretically performs incredibly well, the implementation had issues
|
||||
- Flatbuffers: Has some quirks, but largely lived up to its "zero-copy" promises
|
||||
- SBE: Best median and worst-case performance, but the message structure has a limited feature set
|
||||
|
||||
# Prologue: Binary Parsing with Nom
|
||||
|
||||
Our benchmark system will be a simple data processor; given depth-of-book market data from
|
||||
[IEX](https://iextrading.com/trading/market-data/#deep), serialize each message into the schema
|
||||
format, read it back, and calculate total size of stock traded and the lowest/highest quoted prices.
|
||||
This test isn't complex, but is representative of the project I need a binary format for.
|
||||
|
||||
But before we make it to that point, we have to actually read in the market data. To do so, I'm
|
||||
using a library called [`nom`](https://github.com/Geal/nom). Version 5.0 was recently released and
|
||||
brought some big changes, so this was an opportunity to build a non-trivial program and get
|
||||
familiar.
|
||||
|
||||
If you don't already know about `nom`, it's a "parser generator". By combining different smaller
|
||||
parsers, you can assemble a parser to handle complex structures without writing tedious code by
|
||||
hand. For example, when parsing
|
||||
[PCAP files](https://www.winpcap.org/ntar/draft/PCAP-DumpFileFormat.html#rfc.section.3.3):
|
||||
|
||||
```
|
||||
0 1 2 3
|
||||
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
|
||||
+---------------------------------------------------------------+
|
||||
0 | Block Type = 0x00000006 |
|
||||
+---------------------------------------------------------------+
|
||||
4 | Block Total Length |
|
||||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
8 | Interface ID |
|
||||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
12 | Timestamp (High) |
|
||||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
16 | Timestamp (Low) |
|
||||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
20 | Captured Len |
|
||||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
24 | Packet Len |
|
||||
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
|
||||
| Packet Data |
|
||||
| ... |
|
||||
```
|
||||
|
||||
...you can build a parser in `nom` that looks like
|
||||
[this](https://github.com/speice-io/marketdata-shootout/blob/369613843d39cfdc728e1003123bf87f79422497/src/parsers.rs#L59-L93):
|
||||
|
||||
```rust
|
||||
const ENHANCED_PACKET: [u8; 4] = [0x06, 0x00, 0x00, 0x00];
|
||||
pub fn enhanced_packet_block(input: &[u8]) -> IResult<&[u8], &[u8]> {
|
||||
let (
|
||||
remaining,
|
||||
(
|
||||
block_type,
|
||||
block_len,
|
||||
interface_id,
|
||||
timestamp_high,
|
||||
timestamp_low,
|
||||
captured_len,
|
||||
packet_len,
|
||||
),
|
||||
) = tuple((
|
||||
tag(ENHANCED_PACKET),
|
||||
le_u32,
|
||||
le_u32,
|
||||
le_u32,
|
||||
le_u32,
|
||||
le_u32,
|
||||
le_u32,
|
||||
))(input)?;
|
||||
|
||||
let (remaining, packet_data) = take(captured_len)(remaining)?;
|
||||
Ok((remaining, packet_data))
|
||||
}
|
||||
```
|
||||
|
||||
While this example isn't too interesting, more complex formats (like IEX market data) are where
|
||||
[`nom` really shines](https://github.com/speice-io/marketdata-shootout/blob/369613843d39cfdc728e1003123bf87f79422497/src/iex.rs).
|
||||
|
||||
Ultimately, because the `nom` code in this shootout was the same for all formats, we're not too
|
||||
interested in its performance. Still, it's worth mentioning that building the market data parser was
|
||||
actually fun; I didn't have to write tons of boring code by hand.
|
||||
|
||||
# Part 1: Cap'n Proto
|
||||
|
||||
Now it's time to get into the meaty part of the story. Cap'n Proto was the first format I tried
|
||||
because of how long it has supported Rust (thanks to [dwrensha](https://github.com/dwrensha) for
|
||||
maintaining the Rust port since
|
||||
[2014!](https://github.com/capnproto/capnproto-rust/releases/tag/rustc-0.10)). However, I had a ton
|
||||
of performance concerns once I started using it.
|
||||
|
||||
To serialize new messages, Cap'n Proto uses a "builder" object. This builder allocates memory on the
|
||||
heap to hold the message content, but because builders
|
||||
[can't be re-used](https://github.com/capnproto/capnproto-rust/issues/111), we have to allocate a
|
||||
new buffer for every single message. I was able to work around this with a
|
||||
[special builder](https://github.com/speice-io/marketdata-shootout/blob/369613843d39cfdc728e1003123bf87f79422497/src/capnp_runner.rs#L17-L51)
|
||||
that could re-use the buffer, but it required reading through Cap'n Proto's
|
||||
[benchmarks](https://github.com/capnproto/capnproto-rust/blob/master/benchmark/benchmark.rs#L124-L156)
|
||||
to find an example, and used
|
||||
[`std::mem::transmute`](https://doc.rust-lang.org/std/mem/fn.transmute.html) to bypass Rust's borrow
|
||||
checker.
|
||||
|
||||
The process of reading messages was better, but still had issues. Cap'n Proto has two message
|
||||
encodings: a ["packed"](https://capnproto.org/encoding.html#packing) representation, and an
|
||||
"unpacked" version. When reading "packed" messages, we need a buffer to unpack the message into
|
||||
before we can use it; Cap'n Proto allocates a new buffer for each message we unpack, and I wasn't
|
||||
able to figure out a way around that. In contrast, the unpacked message format should be where Cap'n
|
||||
Proto shines; its main selling point is that there's [no decoding step](https://capnproto.org/).
|
||||
However, accomplishing zero-copy deserialization required code in the private API
|
||||
([since fixed](https://github.com/capnproto/capnproto-rust/issues/148)), and we allocate a vector on
|
||||
every read for the segment table.
|
||||
|
||||
In the end, I put in significant work to make Cap'n Proto as fast as possible, but there were too
|
||||
many issues for me to feel comfortable using it long-term.
|
||||
|
||||
# Part 2: Flatbuffers
|
||||
|
||||
This is the new kid on the block. After a
|
||||
[first attempt](https://github.com/google/flatbuffers/pull/3894) didn't pan out, official support
|
||||
was [recently launched](https://github.com/google/flatbuffers/pull/4898). Flatbuffers intends to
|
||||
address the same problems as Cap'n Proto: high-performance, polyglot, binary messaging. The
|
||||
difference is that Flatbuffers claims to have a simpler wire format and
|
||||
[more flexibility](https://google.github.io/flatbuffers/flatbuffers_benchmarks.html).
|
||||
|
||||
On the whole, I enjoyed using Flatbuffers; the [tooling](https://crates.io/crates/flatc-rust) is
|
||||
nice, and unlike Cap'n Proto, parsing messages was actually zero-copy and zero-allocation. However,
|
||||
there were still some issues.
|
||||
|
||||
First, Flatbuffers (at least in Rust) can't handle nested vectors. This is a problem for formats
|
||||
like the following:
|
||||
|
||||
```
|
||||
table Message {
|
||||
symbol: string;
|
||||
}
|
||||
table MultiMessage {
|
||||
messages:[Message];
|
||||
}
|
||||
```
|
||||
|
||||
We want to create a `MultiMessage` which contains a vector of `Message`, and each `Message` itself
|
||||
contains a vector (the `string` type). I was able to work around this by
|
||||
[caching `Message` elements](https://github.com/speice-io/marketdata-shootout/blob/e9d07d148bf36a211a6f86802b313c4918377d1b/src/flatbuffers_runner.rs#L83)
|
||||
in a `SmallVec` before building the final `MultiMessage`, but it was a painful process that I
|
||||
believe contributed to poor serialization performance.
|
||||
|
||||
Second, streaming support in Flatbuffers seems to be something of an
|
||||
[afterthought](https://github.com/google/flatbuffers/issues/3898). Where Cap'n Proto in Rust handles
|
||||
reading messages from a stream as part of the API, Flatbuffers just sticks a `u32` at the front of
|
||||
each message to indicate the size. Not specifically a problem, but calculating message size without
|
||||
that tag is nigh on impossible.
|
||||
|
||||
Ultimately, I enjoyed using Flatbuffers, and had to do significantly less work to make it perform
|
||||
well.
|
||||
|
||||
# Part 3: Simple Binary Encoding
|
||||
|
||||
Support for SBE was added by the author of one of my favorite
|
||||
[Rust blog posts](https://web.archive.org/web/20190427124806/https://polysync.io/blog/session-types-for-hearty-codecs/).
|
||||
I've [talked previously]({% post_url 2019-06-31-high-performance-systems %}) about how important
|
||||
variance is in high-performance systems, so it was encouraging to read about a format that
|
||||
[directly addressed](https://github.com/real-logic/simple-binary-encoding/wiki/Why-Low-Latency) my
|
||||
concerns. SBE has by far the simplest binary format, but it does make some tradeoffs.
|
||||
|
||||
Both Cap'n Proto and Flatbuffers use [message offsets](https://capnproto.org/encoding.html#structs)
|
||||
to handle variable-length data, [unions](https://capnproto.org/language.html#unions), and various
|
||||
other features. In contrast, messages in SBE are essentially
|
||||
[just structs](https://github.com/real-logic/simple-binary-encoding/blob/master/sbe-samples/src/main/resources/example-schema.xml);
|
||||
variable-length data is supported, but there's no union type.
|
||||
|
||||
As mentioned in the beginning, the Rust port of SBE works well, but is
|
||||
[essentially unmaintained](https://users.rust-lang.org/t/zero-cost-abstraction-frontier-no-copy-low-allocation-ordered-decoding/11515/9).
|
||||
However, if you don't need union types, and can accept that schemas are XML documents, it's still
|
||||
worth using. SBE's implementation had the best streaming support of all formats I tested, and
|
||||
doesn't trigger allocation during de/serialization.
|
||||
|
||||
# Results
|
||||
|
||||
After building a test harness
|
||||
[for](https://github.com/speice-io/marketdata-shootout/blob/master/src/capnp_runner.rs)
|
||||
[each](https://github.com/speice-io/marketdata-shootout/blob/master/src/flatbuffers_runner.rs)
|
||||
[format](https://github.com/speice-io/marketdata-shootout/blob/master/src/sbe_runner.rs), it was
|
||||
time to actually take them for a spin. I used
|
||||
[this script](https://github.com/speice-io/marketdata-shootout/blob/master/run_shootout.sh) to run
|
||||
the benchmarks, and the raw results are
|
||||
[here](https://github.com/speice-io/marketdata-shootout/blob/master/shootout.csv). All data reported
|
||||
below is the average of 10 runs on a single day of IEX data. Results were validated to make sure
|
||||
that each format parsed the data correctly.
|
||||
|
||||
## Serialization
|
||||
|
||||
This test measures, on a
|
||||
[per-message basis](https://github.com/speice-io/marketdata-shootout/blob/master/src/main.rs#L268-L272),
|
||||
how long it takes to serialize the IEX message into the desired format and write to a pre-allocated
|
||||
buffer.
|
||||
|
||||
| Schema | Median | 99th Pctl | 99.9th Pctl | Total |
|
||||
| :------------------- | :----- | :-------- | :---------- | :----- |
|
||||
| Cap'n Proto Packed | 413ns | 1751ns | 2943ns | 14.80s |
|
||||
| Cap'n Proto Unpacked | 273ns | 1828ns | 2836ns | 10.65s |
|
||||
| Flatbuffers | 355ns | 2185ns | 3497ns | 14.31s |
|
||||
| SBE | 91ns | 1535ns | 2423ns | 3.91s |
|
||||
|
||||
## Deserialization
|
||||
|
||||
This test measures, on a
|
||||
[per-message basis](https://github.com/speice-io/marketdata-shootout/blob/master/src/main.rs#L294-L298),
|
||||
how long it takes to read the previously-serialized message and perform some basic aggregation. The
|
||||
aggregation code is the same for each format, so any performance differences are due solely to the
|
||||
format implementation.
|
||||
|
||||
| Schema | Median | 99th Pctl | 99.9th Pctl | Total |
|
||||
| :------------------- | :----- | :-------- | :---------- | :----- |
|
||||
| Cap'n Proto Packed | 539ns | 1216ns | 2599ns | 18.92s |
|
||||
| Cap'n Proto Unpacked | 366ns | 737ns | 1583ns | 12.32s |
|
||||
| Flatbuffers | 173ns | 421ns | 1007ns | 6.00s |
|
||||
| SBE | 116ns | 286ns | 659ns | 4.05s |
|
||||
|
||||
# Conclusion
|
||||
|
||||
Building a benchmark turned out to be incredibly helpful in making a decision; because a "union"
|
||||
type isn't important to me, I can be confident that SBE best addresses my needs.
|
||||
|
||||
While SBE was the fastest in terms of both median and worst-case performance, its worst case
|
||||
performance was proportionately far higher than any other format. It seems to be that
|
||||
de/serialization time scales with message size, but I'll need to do some more research to understand
|
||||
what exactly is going on.
|
@ -1,370 +0,0 @@
|
||||
---
|
||||
layout: post
|
||||
title: "Release the GIL"
|
||||
description: "Strategies for Parallelism in Python"
|
||||
category:
|
||||
tags: [python]
|
||||
---
|
||||
|
||||
Complaining about the [Global Interpreter Lock](https://wiki.python.org/moin/GlobalInterpreterLock)
|
||||
(GIL) seems like a rite of passage for Python developers. It's easy to criticize a design decision
|
||||
made before multi-core CPU's were widely available, but the fact that it's still around indicates
|
||||
that it generally works [Good](https://wiki.c2.com/?PrematureOptimization)
|
||||
[Enough](https://wiki.c2.com/?YouArentGonnaNeedIt). Besides, there are simple and effective
|
||||
workarounds; it's not hard to start a
|
||||
[new process](https://docs.python.org/3/library/multiprocessing.html) and use message passing to
|
||||
synchronize code running in parallel.
|
||||
|
||||
Still, wouldn't it be nice to have more than a single active interpreter thread? In an age of
|
||||
asynchronicity and _M:N_ threading, Python seems lacking. The ideal scenario is to take advantage of
|
||||
both Python's productivity and the modern CPU's parallel capabilities.
|
||||
|
||||
Presented below are two strategies for releasing the GIL's icy grip without giving up on what makes
|
||||
Python a nice language to start with. Bear in mind: these are just the tools, no claim is made about
|
||||
whether it's a good idea to use them. Very often, unlocking the GIL is an
|
||||
[XY problem](https://en.wikipedia.org/wiki/XY_problem); you want application performance, and the
|
||||
GIL seems like an obvious bottleneck. Remember that any gains from running code in parallel come at
|
||||
the expense of project complexity; messing with the GIL is ultimately messing with Python's memory
|
||||
model.
|
||||
|
||||
```python
|
||||
%load_ext Cython
|
||||
from numba import jit
|
||||
|
||||
N = 1_000_000_000
|
||||
```
|
||||
|
||||
# Cython
|
||||
|
||||
Put simply, [Cython](https://cython.org/) is a programming language that looks a lot like Python,
|
||||
gets [transpiled](https://en.wikipedia.org/wiki/Source-to-source_compiler) to C/C++, and integrates
|
||||
well with the [CPython](https://en.wikipedia.org/wiki/CPython) API. It's great for building Python
|
||||
wrappers to C and C++ libraries, writing optimized code for numerical processing, and tons more. And
|
||||
when it comes to managing the GIL, there are two special features:
|
||||
|
||||
- The `nogil`
|
||||
[function annotation](https://cython.readthedocs.io/en/latest/src/userguide/external_C_code.html#declaring-a-function-as-callable-without-the-gil)
|
||||
asserts that a Cython function is safe to use without the GIL, and compilation will fail if it
|
||||
interacts with Python in an unsafe manner
|
||||
- The `with nogil`
|
||||
[context manager](https://cython.readthedocs.io/en/latest/src/userguide/external_C_code.html#releasing-the-gil)
|
||||
explicitly unlocks the CPython GIL while active
|
||||
|
||||
Whenever Cython code runs inside a `with nogil` block on a separate thread, the Python interpreter
|
||||
is unblocked and allowed to continue work elsewhere. We'll define a "busy work" function that
|
||||
demonstrates this principle in action:
|
||||
|
||||
```python
|
||||
%%cython
|
||||
|
||||
# Annotating a function with `nogil` indicates only that it is safe
|
||||
# to call in a `with nogil` block. It *does not* release the GIL.
|
||||
cdef unsigned long fibonacci(unsigned long n) nogil:
|
||||
if n <= 1:
|
||||
return n
|
||||
|
||||
cdef unsigned long a = 0, b = 1, c = 0
|
||||
|
||||
c = a + b
|
||||
for _i in range(2, n):
|
||||
a = b
|
||||
b = c
|
||||
c = a + b
|
||||
|
||||
return c
|
||||
|
||||
|
||||
def cython_nogil(unsigned long n):
|
||||
# Explicitly release the GIL while running `fibonacci`
|
||||
with nogil:
|
||||
value = fibonacci(n)
|
||||
|
||||
return value
|
||||
|
||||
|
||||
def cython_gil(unsigned long n):
|
||||
# Because the GIL is not explicitly released, it implicitly
|
||||
# remains acquired when running the `fibonacci` function
|
||||
return fibonacci(n)
|
||||
```
|
||||
|
||||
First, let's time how long it takes Cython to calculate the billionth Fibonacci number:
|
||||
|
||||
```python
|
||||
%%time
|
||||
_ = cython_gil(N);
|
||||
```
|
||||
|
||||
> <pre>
|
||||
> CPU times: user 365 ms, sys: 0 ns, total: 365 ms
|
||||
> Wall time: 372 ms
|
||||
> </pre>
|
||||
|
||||
```python
|
||||
%%time
|
||||
_ = cython_nogil(N);
|
||||
```
|
||||
|
||||
> <pre>
|
||||
> CPU times: user 381 ms, sys: 0 ns, total: 381 ms
|
||||
> Wall time: 388 ms
|
||||
> </pre>
|
||||
|
||||
Both versions (with and without GIL) take effectively the same amount of time to run. Even when
|
||||
running this calculation in parallel on separate threads, it is expected that the run time will
|
||||
double because only one thread can be active at a time:
|
||||
|
||||
```python
|
||||
%%time
|
||||
from threading import Thread
|
||||
|
||||
# Create the two threads to run on
|
||||
t1 = Thread(target=cython_gil, args=[N])
|
||||
t2 = Thread(target=cython_gil, args=[N])
|
||||
# Start the threads
|
||||
t1.start(); t2.start()
|
||||
# Wait for the threads to finish
|
||||
t1.join(); t2.join()
|
||||
```
|
||||
|
||||
> <pre>
|
||||
> CPU times: user 641 ms, sys: 5.62 ms, total: 647 ms
|
||||
> Wall time: 645 ms
|
||||
> </pre>
|
||||
|
||||
However, if the first thread releases the GIL, the second thread is free to acquire it and run in
|
||||
parallel:
|
||||
|
||||
```python
|
||||
%%time
|
||||
|
||||
t1 = Thread(target=cython_nogil, args=[N])
|
||||
t2 = Thread(target=cython_gil, args=[N])
|
||||
t1.start(); t2.start()
|
||||
t1.join(); t2.join()
|
||||
```
|
||||
|
||||
> <pre>
|
||||
> CPU times: user 717 ms, sys: 372 µs, total: 718 ms
|
||||
> Wall time: 358 ms
|
||||
> </pre>
|
||||
|
||||
Because `user` time represents the sum of processing time on all threads, it doesn't change much.
|
||||
The ["wall time"](https://en.wikipedia.org/wiki/Elapsed_real_time) has been cut roughly in half
|
||||
because each function is running simultaneously.
|
||||
|
||||
Keep in mind that the **order in which threads are started** makes a difference!
|
||||
|
||||
```python
|
||||
%%time
|
||||
|
||||
# Note that the GIL-locked version is started first
|
||||
t1 = Thread(target=cython_gil, args=[N])
|
||||
t2 = Thread(target=cython_nogil, args=[N])
|
||||
t1.start(); t2.start()
|
||||
t1.join(); t2.join()
|
||||
```
|
||||
|
||||
> <pre>
|
||||
> CPU times: user 667 ms, sys: 0 ns, total: 667 ms
|
||||
> Wall time: 672 ms
|
||||
> </pre>
|
||||
|
||||
Even though the second thread releases the GIL while running, it can't start until the first has
|
||||
completed. Thus, the overall runtime is effectively the same as running two GIL-locked threads.
|
||||
|
||||
Finally, be aware that attempting to unlock the GIL from a thread that doesn't own it will crash the
|
||||
**interpreter**, not just the thread attempting the unlock:
|
||||
|
||||
```python
|
||||
%%cython
|
||||
|
||||
cdef int cython_recurse(int n) nogil:
|
||||
if n <= 0:
|
||||
return 0
|
||||
|
||||
with nogil:
|
||||
return cython_recurse(n - 1)
|
||||
|
||||
cython_recurse(2)
|
||||
```
|
||||
|
||||
> <pre>
|
||||
> Fatal Python error: PyEval_SaveThread: NULL tstate
|
||||
>
|
||||
> Thread 0x00007f499effd700 (most recent call first):
|
||||
> File "/home/bspeice/.virtualenvs/release-the-gil/lib/python3.7/site-packages/ipykernel/parentpoller.py", line 39 in run
|
||||
> File "/usr/lib/python3.7/threading.py", line 926 in _bootstrap_inner
|
||||
> File "/usr/lib/python3.7/threading.py", line 890 in _bootstrap
|
||||
> </pre>
|
||||
|
||||
In practice, avoiding this issue is simple. First, `nogil` functions probably shouldn't contain
|
||||
`with nogil` blocks. Second, Cython can
|
||||
[conditionally acquire/release](https://cython.readthedocs.io/en/latest/src/userguide/external_C_code.html#conditional-acquiring-releasing-the-gil)
|
||||
the GIL, so these conditions can be used to synchronize access. Finally, Cython's documentation for
|
||||
[external C code](https://cython.readthedocs.io/en/latest/src/userguide/external_C_code.html#acquiring-and-releasing-the-gil)
|
||||
contains more detail on how to safely manage the GIL.
|
||||
|
||||
To conclude: use Cython's `nogil` annotation to assert that functions are safe for calling when the
|
||||
GIL is unlocked, and `with nogil` to actually unlock the GIL and run those functions.
|
||||
|
||||
# Numba
|
||||
|
||||
Like Cython, [Numba](https://numba.pydata.org/) is a "compiled Python." Where Cython works by
|
||||
compiling a Python-like language to C/C++, Numba compiles Python bytecode _directly to machine code_
|
||||
at runtime. Behavior is controlled with a special `@jit` decorator; calling a decorated function
|
||||
first compiles it to machine code before running. Calling the function a second time re-uses that
|
||||
machine code unless the argument types have changed.
|
||||
|
||||
Numba works best when a `nopython=True` argument is added to the `@jit` decorator; functions
|
||||
compiled in [`nopython`](http://numba.pydata.org/numba-doc/latest/user/jit.html?#nopython) mode
|
||||
avoid the CPython API and have performance comparable to C. Further, adding `nogil=True` to the
|
||||
`@jit` decorator unlocks the GIL while that function is running. Note that `nogil` and `nopython`
|
||||
are separate arguments; while it is necessary for code to be compiled in `nopython` mode in order to
|
||||
release the lock, the GIL will remain locked if `nogil=False` (the default).
|
||||
|
||||
Let's repeat the same experiment, this time using Numba instead of Cython:
|
||||
|
||||
```python
|
||||
# The `int` type annotation is only for humans and is ignored
|
||||
# by Numba.
|
||||
@jit(nopython=True, nogil=True)
|
||||
def numba_nogil(n: int) -> int:
|
||||
if n <= 1:
|
||||
return n
|
||||
|
||||
a = 0
|
||||
b = 1
|
||||
|
||||
c = a + b
|
||||
for _i in range(2, n):
|
||||
a = b
|
||||
b = c
|
||||
c = a + b
|
||||
|
||||
return c
|
||||
|
||||
|
||||
# Run using `nopython` mode to receive a performance boost,
|
||||
# but GIL remains locked due to `nogil=False` by default.
|
||||
@jit(nopython=True)
|
||||
def numba_gil(n: int) -> int:
|
||||
if n <= 1:
|
||||
return n
|
||||
|
||||
a = 0
|
||||
b = 1
|
||||
|
||||
c = a + b
|
||||
for _i in range(2, n):
|
||||
a = b
|
||||
b = c
|
||||
c = a + b
|
||||
|
||||
return c
|
||||
|
||||
|
||||
# Call each function once to force compilation; we don't want
|
||||
# the timing statistics to include how long it takes to compile.
|
||||
numba_nogil(N)
|
||||
numba_gil(N);
|
||||
```
|
||||
|
||||
We'll perform the same tests as above; first, figure out how long it takes the function to run:
|
||||
|
||||
```python
|
||||
%%time
|
||||
_ = numba_gil(N)
|
||||
```
|
||||
|
||||
> <pre>
|
||||
> CPU times: user 253 ms, sys: 258 µs, total: 253 ms
|
||||
> Wall time: 251 ms
|
||||
> </pre>
|
||||
|
||||
<span style="font-size: .8em">
|
||||
Aside: it's not immediately clear why Numba takes ~20% less time to run than Cython for code that should be
|
||||
effectively identical after compilation.
|
||||
</span>
|
||||
|
||||
When running two GIL-locked threads, the result (as expected) takes around twice as long to compute:
|
||||
|
||||
```python
|
||||
%%time
|
||||
t1 = Thread(target=numba_gil, args=[N])
|
||||
t2 = Thread(target=numba_gil, args=[N])
|
||||
t1.start(); t2.start()
|
||||
t1.join(); t2.join()
|
||||
```
|
||||
|
||||
> <pre>
|
||||
> CPU times: user 541 ms, sys: 3.96 ms, total: 545 ms
|
||||
> Wall time: 541 ms
|
||||
> </pre>
|
||||
|
||||
But if the GIL-unlocking thread starts first, both threads run in parallel:
|
||||
|
||||
```python
|
||||
%%time
|
||||
t1 = Thread(target=numba_nogil, args=[N])
|
||||
t2 = Thread(target=numba_gil, args=[N])
|
||||
t1.start(); t2.start()
|
||||
t1.join(); t2.join()
|
||||
```
|
||||
|
||||
> <pre>
|
||||
> CPU times: user 551 ms, sys: 7.77 ms, total: 559 ms
|
||||
> Wall time: 279 ms
|
||||
> </pre>
|
||||
|
||||
Just like Cython, starting the GIL-locked thread first leads to poor performance:
|
||||
|
||||
```python
|
||||
%%time
|
||||
t1 = Thread(target=numba_gil, args=[N])
|
||||
t2 = Thread(target=numba_nogil, args=[N])
|
||||
t1.start(); t2.start()
|
||||
t1.join(); t2.join()
|
||||
```
|
||||
|
||||
> <pre>
|
||||
> CPU times: user 524 ms, sys: 0 ns, total: 524 ms
|
||||
> Wall time: 522 ms
|
||||
> </pre>
|
||||
|
||||
Finally, unlike Cython, Numba will unlock the GIL if and only if it is currently acquired;
|
||||
recursively calling `@jit(nogil=True)` functions is perfectly safe:
|
||||
|
||||
```python
|
||||
from numba import jit
|
||||
|
||||
@jit(nopython=True, nogil=True)
|
||||
def numba_recurse(n: int) -> int:
|
||||
if n <= 0:
|
||||
return 0
|
||||
|
||||
return numba_recurse(n - 1)
|
||||
|
||||
numba_recurse(2);
|
||||
```
|
||||
|
||||
# Conclusion
|
||||
|
||||
Before finishing, it's important to address pain points that will show up if these techniques are
|
||||
used in a more realistic project:
|
||||
|
||||
First, code running in a GIL-free context will likely also need non-trivial data structures;
|
||||
GIL-free functions aren't useful if they're constantly interacting with Python objects whose access
|
||||
requires the GIL. Cython provides
|
||||
[extension types](http://docs.cython.org/en/latest/src/tutorial/cdef_classes.html) and Numba
|
||||
provides a [`@jitclass`](https://numba.pydata.org/numba-doc/dev/user/jitclass.html) decorator to
|
||||
address this need.
|
||||
|
||||
Second, building and distributing applications that make use of Cython/Numba can be complicated.
|
||||
Cython packages require running the compiler, (potentially) linking/packaging external dependencies,
|
||||
and distributing a binary wheel. Numba is generally simpler because the code being distributed is
|
||||
pure Python, but can be tricky since errors aren't detected until runtime.
|
||||
|
||||
Finally, while unlocking the GIL is often a solution in search of a problem, both Cython and Numba
|
||||
provide tools to directly manage the GIL when appropriate. This enables true parallelism (not just
|
||||
[concurrency](https://stackoverflow.com/a/1050257)) that is impossible in vanilla Python.
|
@ -1,60 +0,0 @@
|
||||
---
|
||||
layout: post
|
||||
title: "The webpack industrial complex"
|
||||
description: "Reflections on a new project"
|
||||
category:
|
||||
tags: [webpack, react, vite]
|
||||
---
|
||||
|
||||
This started because I wanted to build a synthesizer. Setting a goal of "digital DX7" was ambitious, but I needed something unrelated to the day job. Beyond that, working with audio seemed like a good challenge. I enjoy performance-focused code, and performance problems in audio are conspicuous. Building a web project was an obvious choice because of the web audio API documentation and independence from a large Digital Audio Workstation (DAW).
|
||||
|
||||
The project was soon derailed trying to sort out technical issues unrelated to the original purpose. Finding a resolution was a frustrating journey, and it's still not clear whether those problems were my fault. As a result, I'm writing this to try making sense of it, as a case study/reference material, and to salvage something from the process.
|
||||
|
||||
## Starting strong
|
||||
|
||||
The sole starting requirement was to write everything in TypeScript. Not because of project scale, but because guardrails help with unfamiliar territory. Keeping that in mind, the first question was: how does one start a new project? All I actually need is "compile TypeScript, show it in a browser."
|
||||
|
||||
Create React App (CRA) came to the rescue and the rest of that evening was a joy. My TypeScript/JavaScript skills were rusty, but the online documentation was helpful. I had never understood the appeal of JSX (why put a DOM in JavaScript?) until it made connecting an `onEvent` handler and a function easy.
|
||||
|
||||
Some quick dimensional analysis later and there was a sine wave oscillator playing A=440 through the speakers. I specifically remember thinking "modern browsers are magical."
|
||||
|
||||
## Continuing on
|
||||
|
||||
Now comes the first mistake: I began to worry about "scale" before encountering an actual problem. Rather than rendering audio in the main thread, why not use audio worklets and render in a background thread instead?
|
||||
|
||||
The first sign something was amiss came from the TypeScript compiler errors showing the audio worklet API [was missing](https://github.com/microsoft/TypeScript/issues/28308). After searching out Github issues and (unsuccessfully) tweaking the `.tsconfig` settings, I settled on installing a package and moving on.
|
||||
|
||||
The next problem came from actually using the API. Worklets must load from separate "modules," but it wasn't clear how to guarantee the worklet code stayed separate from the application. I saw recommendations to use `new URL(<local path>, import.meta.url)` and it worked! Well, kind of:
|
||||
|
||||
![Browser error](/assets/images/2022-11-20-video_mp2t.png)
|
||||
|
||||
That file has the audio processor code, so why does it get served with `Content-Type: video/mp2t`?
|
||||
|
||||
## Floundering about
|
||||
|
||||
Now comes the second mistake: even though I didn't understand the error, I ignored recommendations to [just use JavaScript](https://hackernoon.com/implementing-audioworklets-with-react-8a80a470474) and stuck by the original TypeScript requirement.
|
||||
|
||||
I tried different project structures. Moving the worklet code to a new folder didn't help, nor did setting up a monorepo and placing it in a new package.
|
||||
|
||||
I tried three different CRA tools - `react-app-rewired`, `craco`, `customize-react-app` - but got the same problem. Each has varying levels of compatibility with recent CRA versions, so it wasn't clear if I had the right solution but implemented it incorrectly. After attempting to eject the application and panicking after seeing the configuration, I abandoned that as well.
|
||||
|
||||
I tried changing the webpack configuration: using [new](https://github.com/webpack/webpack/issues/11543#issuecomment-917673256) [loaders](https://github.com/popelenkow/worker-url), setting [asset rules](https://github.com/webpack/webpack/discussions/14093#discussioncomment-1257149), even [changing how webpack detects worker resources](https://github.com/webpack/webpack/issues/11543#issuecomment-826897590). In hindsight, entry points may have been the answer. But because CRA actively resists attempts to change its webpack configuration, and I couldn't find audio worklet examples in any other framework, I gave up.
|
||||
|
||||
I tried so many application frameworks. Next.js looked like a good candidate, but added its own [bespoke webpack complexity](https://github.com/vercel/next.js/issues/24907) to the existing confusion. Astro had the best "getting started" experience, but I refuse to install an IDE-specific plugin. I first used Deno while exploring Lume, but it couldn't import the audio worklet types (maybe because of module compatibility?). Each framework was unique in its own way (shout-out to SvelteKit) but I couldn't figure out how to make them work.
|
||||
|
||||
## Learning and reflecting
|
||||
|
||||
I ended up using Vite and vite-plugin-react-pages to handle both "build the app" and "bundle worklets," but the specific tool choice isn't important. Instead, the focus should be on lessons learned.
|
||||
|
||||
For myself:
|
||||
|
||||
- I'm obsessed with tooling, to the point it can derail the original goal. While it comes from a good place (for example: "types are awesome"), it can get in the way of more important work
|
||||
- I tend to reach for online resources right after seeing a new problem. While finding help online is often faster, spending time understanding the problem would have been more productive than cycling through (often outdated) blog posts
|
||||
|
||||
For the tools:
|
||||
|
||||
- Resource bundling is great and solves a genuine challenge. I've heard too many horror stories of developers writing modules by hand to believe this is unnecessary complexity
|
||||
- Webpack is a build system and modern frameworks are deeply dependent on it (hence the "webpack industrial complex"). While this often saves users from unnecessary complexity, there's no path forward if something breaks
|
||||
- There's little ability to mix and match tools across frameworks. Next.js and Gatsby let users extend webpack, but because each framework adds its own modules, changes aren't portable. After spending a week looking at webpack, I had an example running with parcel in thirty minutes, but couldn't integrate it
|
||||
|
||||
In the end, learning new systems is fun, but a focus on tools that "just work" can leave users out in the cold if they break down.
|
@ -1,15 +0,0 @@
|
||||
@font-face {
|
||||
font-family: 'JetBrains Mono';
|
||||
src: url('/assets/font/JetBrainsMono-Regular.woff2') format('woff2'),
|
||||
url('/assets/font/JetBrainsMono-Regular.woff') format('woff');
|
||||
font-weight: normal;
|
||||
font-style: normal;
|
||||
}
|
||||
|
||||
@font-face {
|
||||
font-family: 'Lato';
|
||||
src: url('/assets/font/lato-regular-webfont.woff2') format('woff2'),
|
||||
url('/assets/font/lato-regular-webfont.woff') format('woff');
|
||||
font-weight: normal;
|
||||
font-style: normal;
|
||||
}
|
@ -1,119 +0,0 @@
|
||||
---
|
||||
---
|
||||
|
||||
// Import the theme rules
|
||||
@import "theme";
|
||||
|
||||
body {
|
||||
max-width: 100%;
|
||||
overflow-x: hidden;
|
||||
font-family: 'Lato', sans-serif;
|
||||
}
|
||||
|
||||
.navbar {
|
||||
color: $gray;
|
||||
}
|
||||
|
||||
.separator {
|
||||
margin-right: .45rem;
|
||||
margin-left: .25rem;
|
||||
color: #000;
|
||||
&:after {
|
||||
content: '\00a0/';
|
||||
}
|
||||
}
|
||||
|
||||
header {
|
||||
padding-top: 80px;
|
||||
padding-bottom: 0;
|
||||
};
|
||||
|
||||
header h1,h2 {
|
||||
color: #000;
|
||||
}
|
||||
|
||||
.post-description {
|
||||
color: #555;
|
||||
}
|
||||
|
||||
.post-container a {
|
||||
color: #555;
|
||||
border-bottom-color: $gray;
|
||||
border-bottom-style: dotted;
|
||||
border-bottom-width: 1px;
|
||||
|
||||
position: relative;
|
||||
display: inline-block;
|
||||
padding: 1px 1px;
|
||||
transition: color ease 0.3s;
|
||||
|
||||
&::after {
|
||||
content: '';
|
||||
position: absolute;
|
||||
z-index: -1;
|
||||
width: 100%;
|
||||
height: 0%;
|
||||
left: 0;
|
||||
bottom: 0;
|
||||
background-color: $gray;
|
||||
transition: all ease 0.3s;
|
||||
}
|
||||
|
||||
&:hover {
|
||||
color: #fff;
|
||||
border-bottom-style: solid;
|
||||
&::after {
|
||||
height: 100%;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
body pre {
|
||||
font-size: 15px;
|
||||
}
|
||||
|
||||
pre.highlight, code {
|
||||
font-family: 'JetBrains Mono', monospace;
|
||||
}
|
||||
|
||||
div.highlighter-rouge {
|
||||
// Default theme uses `width: 100vw`, which while cool, does cause the page
|
||||
// to exceed screen width and trigger horizontal scrolling. No bueno.
|
||||
width: 99vw;
|
||||
}
|
||||
|
||||
.post-date {
|
||||
// On the front page, make sure titles don't force wrapping the date box content
|
||||
text-align: right;
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
blockquote {
|
||||
color: #555;
|
||||
right: 100px;
|
||||
margin-left: 0;
|
||||
padding-left: 1.8rem;
|
||||
border-left: 5px solid $gray;
|
||||
}
|
||||
|
||||
.post-nav {
|
||||
/* Insert your custom styling here. Example:
|
||||
|
||||
font-size: 14px;
|
||||
*/
|
||||
display: flex;
|
||||
margin-top: 1em;
|
||||
margin-bottom: 1em;
|
||||
}
|
||||
.post-nav div {
|
||||
/* flex-grow, flex-shrink, flex-basis */
|
||||
flex: 1 1 0;
|
||||
}
|
||||
.post-nav-next {
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
th, td {
|
||||
border-bottom: 1px solid $gray;
|
||||
padding: 0.75em;
|
||||
}
|
Before Width: | Height: | Size: 840 KiB |
Before Width: | Height: | Size: 926 KiB |
Before Width: | Height: | Size: 165 KiB |
Before Width: | Height: | Size: 50 KiB |
Before Width: | Height: | Size: 48 KiB |
Before Width: | Height: | Size: 71 KiB |
Before Width: | Height: | Size: 68 KiB |
Before Width: | Height: | Size: 23 KiB |
Before Width: | Height: | Size: 24 KiB |
Before Width: | Height: | Size: 124 KiB |