speice.io/assets/js/16c8da5a.33a51d92.js

1 line
17 KiB
JavaScript

"use strict";(self.webpackChunkspeice_io=self.webpackChunkspeice_io||[]).push([["95"],{55382:function(e,t,n){n.r(t),n.d(t,{assets:function(){return l},contentTitle:function(){return r},default:function(){return d},frontMatter:function(){return o},metadata:function(){return i},toc:function(){return h}});var i=n(50745),a=n(85893),s=n(50065);let o={slug:"2018/06/dateutil-parser-to-rust",title:"What I learned porting dateutil to Rust",date:new Date("2018-06-25T12:00:00.000Z"),authors:["bspeice"],tags:[]},r=void 0,l={authorsImageUrls:[void 0]},h=[{value:"Slow down, what?",id:"slow-down-what",level:2},{value:"Lost in Translation",id:"lost-in-translation",level:2},{value:"Using a young language",id:"using-a-young-language",level:2},{value:"Trial Maintenance Policy",id:"trial-maintenance-policy",level:2},{value:"Roadmap and Conclusion",id:"roadmap-and-conclusion",level:2}];function c(e){let t={a:"a",blockquote:"blockquote",code:"code",em:"em",h2:"h2",img:"img",li:"li",ol:"ol",p:"p",strong:"strong",...(0,s.a)(),...e.components};return(0,a.jsxs)(a.Fragment,{children:[(0,a.jsxs)(t.p,{children:["I've mostly been a lurker in Rust for a while, making a couple small contributions here and there.\nSo launching ",(0,a.jsx)(t.a,{href:"https://github.com/bspeice/dtparse",children:"dtparse"})," feels like nice step towards becoming a\nfunctioning member of society. But not too much, because then you know people start asking you to\npay bills, and ain't nobody got time for that."]}),"\n",(0,a.jsx)(t.p,{children:"But I built dtparse, and you can read about my thoughts on the process. Or don't. I won't tell you\nwhat to do with your life (but you should totally keep reading)."}),"\n",(0,a.jsx)(t.h2,{id:"slow-down-what",children:"Slow down, what?"}),"\n",(0,a.jsxs)(t.p,{children:["OK, fine, I guess I should start with ",(0,a.jsx)(t.em,{children:"why"})," someone would do this."]}),"\n",(0,a.jsxs)(t.p,{children:[(0,a.jsx)(t.a,{href:"https://github.com/dateutil/dateutil",children:"Dateutil"})," is a Python library for handling dates. The\nstandard library support for time in Python is kinda dope, but there are a lot of extras that go\ninto making it useful beyond just the ",(0,a.jsx)(t.a,{href:"https://docs.python.org/3.6/library/datetime.html",children:"datetime"}),"\nmodule. ",(0,a.jsx)(t.code,{children:"dateutil.parser"})," specifically is code to take all the super-weird time formats people come\nup with and turn them into something actually useful."]}),"\n",(0,a.jsxs)(t.p,{children:["Date/time parsing, it turns out, is just like everything else involving\n",(0,a.jsx)(t.a,{href:"https://infiniteundo.com/post/25326999628/falsehoods-programmers-believe-about-time",children:"computers"})," and\n",(0,a.jsx)(t.a,{href:"https://infiniteundo.com/post/25509354022/more-falsehoods-programmers-believe-about-time",children:"time"}),": it\nfeels like it shouldn't be that difficult to do, until you try to do it, and you realize that people\nsuck and this is why\n",(0,a.jsx)(t.a,{href:"https://zachholman.com/talk/utc-is-enough-for-everyone-right",children:"we can't we have nice things"}),". But\nalas, we'll try and make contemporary art out of the rubble and give it a pretentious name like\n",(0,a.jsx)(t.em,{children:"Time"}),"."]}),"\n",(0,a.jsx)(t.p,{children:(0,a.jsx)(t.img,{alt:"A gravel mound",src:n(49394).Z+"",width:"800",height:"374"})}),"\n",(0,a.jsxs)(t.blockquote,{children:["\n",(0,a.jsx)(t.p,{children:(0,a.jsx)(t.a,{href:"https://www.goodfreephotos.com/united-states/montana/elkhorn/remains-of-the-mining-operation-elkhorn.jpg.php",children:"Time"})}),"\n"]}),"\n",(0,a.jsxs)(t.p,{children:["What makes ",(0,a.jsx)(t.code,{children:"dateutil.parser"})," great is that there's single function with a single argument that\ndrives what programmers interact with:\n",(0,a.jsx)(t.a,{href:"https://github.com/dateutil/dateutil/blob/6dde5d6298cfb81a4c594a38439462799ed2aef2/dateutil/parser/_parser.py#L1258",children:(0,a.jsx)(t.code,{children:"parse(timestr)"})}),'.\nIt takes in the time as a string, and gives you back a reasonable "look, this is the best anyone can\npossibly do to make sense of your input" value. It doesn\'t expect much of you.']}),"\n",(0,a.jsx)(t.p,{children:(0,a.jsx)(t.a,{href:"https://github.com/bspeice/dtparse/blob/7d565d3a78876dbebd9711c9720364fe9eba7915/src/lib.rs#L1332",children:"And now it's in Rust."})}),"\n",(0,a.jsx)(t.h2,{id:"lost-in-translation",children:"Lost in Translation"}),"\n",(0,a.jsxs)(t.p,{children:["Having worked at a bulge-bracket bank watching Java programmers try to be Python programmers, I'm\nadmittedly hesitant to publish Python code that's trying to be Rust. Interestingly, Rust code can\nactually do a great job of mimicking Python. It's certainly not idiomatic Rust, but I've had better\nexperiences than\n",(0,a.jsx)(t.a,{href:"https://webcache.googleusercontent.com/search?q=cache:wkYMpktJtnUJ:https://jackstouffer.com/blog/porting_dateutil.html+&cd=3&hl=en&ct=clnk&gl=us",children:"this guy"}),"\nwho attempted the same thing for D. These are the actual take-aways:"]}),"\n",(0,a.jsxs)(t.p,{children:["When transcribing code, ",(0,a.jsx)(t.strong,{children:"stay as close to the original library as possible"}),". I'm talking about\nusing the same variable names, same access patterns, the whole shebang. It's way too easy to make a\ncouple of typos, and all of a sudden your code blows up in new and exciting ways. Having a reference\nmanual for verbatim what your code should be means that you don't spend that long debugging\ncomplicated logic, you're more looking for typos."]}),"\n",(0,a.jsxs)(t.p,{children:["Also, ",(0,a.jsx)(t.strong,{children:"don't use nice Rust things like enums"}),". While\n",(0,a.jsx)(t.a,{href:"https://github.com/bspeice/dtparse/blob/7d565d3a78876dbebd9711c9720364fe9eba7915/src/lib.rs#L88-L94",children:"one time it worked out OK for me"}),",\nI also managed to shoot myself in the foot a couple times because ",(0,a.jsx)(t.code,{children:"dateutil"})," stores AM/PM as a\nboolean and I mixed up which was true, and which was false (side note: AM is false, PM is true). In\ngeneral, writing nice code ",(0,a.jsx)(t.em,{children:"should not be a first-pass priority"})," when you're just trying to recreate\nthe same functionality."]}),"\n",(0,a.jsxs)(t.p,{children:[(0,a.jsx)(t.strong,{children:"Exceptions are a pain."}),' Make peace with it. Python code is just allowed to skip stack frames. So\nwhen a co-worker told me "Rust is getting try-catch syntax" I properly freaked out. Turns out\n',(0,a.jsx)(t.a,{href:"https://github.com/rust-lang/rfcs/pull/243",children:"he's not quite right"}),", and I'm OK with that. And while\n",(0,a.jsx)(t.code,{children:"dateutil"})," is pretty well-behaved about not skipping multiple stack frames,\n",(0,a.jsx)(t.a,{href:"https://github.com/dateutil/dateutil/blob/16561fc99361979e88cccbd135393b06b1af7e90/dateutil/parser/_parser.py#L730-L865",children:"130-line try-catch blocks"}),"\ntake a while to verify."]}),"\n",(0,a.jsxs)(t.p,{children:["As another Python quirk, ",(0,a.jsxs)(t.strong,{children:["be very careful about\n",(0,a.jsx)(t.a,{href:"https://github.com/dateutil/dateutil/blob/16561fc99361979e88cccbd135393b06b1af7e90/dateutil/parser/_parser.py#L494-L568",children:"long nested if-elif-else blocks"})]}),".\nI used to think that Python's whitespace was just there to get you to format your code correctly. I\nthink that no longer. It's way too easy to close a block too early and have incredibly weird issues\nin the logic. Make sure you use an editor that displays indentation levels so you can keep things\nstraight."]}),"\n",(0,a.jsxs)(t.p,{children:[(0,a.jsx)(t.strong,{children:"Rust macros are not free."})," I originally had the\n",(0,a.jsx)(t.a,{href:"https://github.com/bspeice/dtparse/blob/b0e737f088eca8e83ab4244c6621a2797d247697/tests/compat.rs#L63-L217",children:"main test body"}),"\nwrapped up in a macro using ",(0,a.jsx)(t.a,{href:"https://github.com/PyO3/PyO3",children:"pyo3"}),". It took two minutes to compile.\nAfter\n",(0,a.jsx)(t.a,{href:"https://github.com/bspeice/dtparse/blob/e017018295c670e4b6c6ee1cfff00dbb233db47d/tests/compat.rs#L76-L205",children:"moving things to a function"}),"\ncompile times dropped down to ~5 seconds. Turns out 150 lines * 100 tests = a lot of redundant code\nto be compiled. My new rule of thumb is that any macros longer than 10-15 lines are actually\nfunctions that need to be liberated, man."]}),"\n",(0,a.jsxs)(t.p,{children:["Finally, ",(0,a.jsx)(t.strong,{children:"I really miss list comprehensions and dictionary comprehensions."})," As a quick comparison,\nsee\n",(0,a.jsx)(t.a,{href:"https://github.com/dateutil/dateutil/blob/16561fc99361979e88cccbd135393b06b1af7e90/dateutil/parser/_parser.py#L476",children:"this dateutil code"}),"\nand\n",(0,a.jsx)(t.a,{href:"https://github.com/bspeice/dtparse/blob/7d565d3a78876dbebd9711c9720364fe9eba7915/src/lib.rs#L619-L629",children:"the implementation in Rust"}),".\nI probably wrote it wrong, and I'm sorry. Ultimately though, I hope that these comprehensions can be\nadded through macros or syntax extensions. Either way, they're expressive, save typing, and are\nsuper-readable. Let's get more of that."]}),"\n",(0,a.jsx)(t.h2,{id:"using-a-young-language",children:"Using a young language"}),"\n",(0,a.jsx)(t.p,{children:"Now, Rust is exciting and new, which means that there's opportunity to make a substantive impact. On\nmore than one occasion though, I've had issues navigating the Rust ecosystem."}),"\n",(0,a.jsxs)(t.p,{children:['What I\'ll call the "canonical library" is still being built. In Python, if you need datetime\nparsing, you use ',(0,a.jsx)(t.code,{children:"dateutil"}),". If you want ",(0,a.jsx)(t.code,{children:"decimal"})," types, it's already in the\n",(0,a.jsx)(t.a,{href:"https://docs.python.org/3.6/library/decimal.html",children:"standard library"}),". While I might've gotten away\nwith ",(0,a.jsx)(t.code,{children:"f64"}),", ",(0,a.jsx)(t.code,{children:"dateutil"})," uses decimals, and I wanted to follow the principle of ",(0,a.jsx)(t.strong,{children:"staying as close to\nthe original library as possible"}),". Thus began my quest to find a decimal library in Rust. What I\nquickly found was summarized in a comment:"]}),"\n",(0,a.jsxs)(t.blockquote,{children:["\n",(0,a.jsxs)(t.p,{children:["Writing a BigDecimal is easy. Writing a ",(0,a.jsx)(t.em,{children:"good"})," BigDecimal is hard."]}),"\n",(0,a.jsx)(t.p,{children:(0,a.jsx)(t.a,{href:"https://github.com/rust-lang/rust/issues/8937#issuecomment-34582794",children:"-cmr"})}),"\n"]}),"\n",(0,a.jsxs)(t.p,{children:["In practice, this means that there are at least ",(0,a.jsx)(t.a,{href:"https://crates.io/crates/bigdecimal",children:"4"}),"\n",(0,a.jsx)(t.a,{href:"https://crates.io/crates/rust_decimal",children:"different"}),"\n",(0,a.jsx)(t.a,{href:"https://crates.io/crates/decimal",children:"implementations"})," ",(0,a.jsx)(t.a,{href:"https://crates.io/crates/decimate",children:"available"}),".\nAnd that's a lot of decisions to worry about when all I'm thinking is \"why can't\n",(0,a.jsx)(t.a,{href:"https://en.wikipedia.org/wiki/Calendar_reform",children:"calendar reform"})," be a thing\" and I'm forced to dig\nthrough a ",(0,a.jsx)(t.a,{href:"https://github.com/rust-lang/rust/issues/8937#issuecomment-31661916",children:"couple"}),"\n",(0,a.jsx)(t.a,{href:"https://github.com/rust-lang/rfcs/issues/334",children:"different"}),"\n",(0,a.jsx)(t.a,{href:"https://github.com/rust-num/num/issues/8",children:"threads"})," to figure out if the library I'm look at is dead\nor just stable."]}),"\n",(0,a.jsxs)(t.p,{children:['And even when the "canonical library" exists, there\'s no guarantees that it will be well-maintained.\n',(0,a.jsx)(t.a,{href:"https://github.com/chronotope/chrono",children:"Chrono"})," is the ",(0,a.jsx)(t.em,{children:"de facto"})," date/time library in Rust, and just\nreleased version 0.4.4 like two days ago. Meanwhile,\n",(0,a.jsx)(t.a,{href:"https://github.com/chronotope/chrono-tz",children:"chrono-tz"})," appears to be dead in the water even though\n",(0,a.jsx)(t.a,{href:"https://github.com/chronotope/chrono-tz/issues/19",children:"there are people happy to help maintain it"}),". I\nknow relatively little about it, but it appears that most of the release process is automated;\nkeeping that up to date should be a no-brainer."]}),"\n",(0,a.jsx)(t.h2,{id:"trial-maintenance-policy",children:"Trial Maintenance Policy"}),"\n",(0,a.jsxs)(t.p,{children:['Specifically given "maintenance" being an\n',(0,a.jsx)(t.a,{href:"https://www.reddit.com/r/rust/comments/48540g/thoughts_on_initiators_vs_maintainers/",children:"oft-discussed"}),"\nissue, I'm going to try out the following policy to keep things moving on ",(0,a.jsx)(t.code,{children:"dtparse"}),":"]}),"\n",(0,a.jsxs)(t.ol,{children:["\n",(0,a.jsxs)(t.li,{children:["\n",(0,a.jsxs)(t.p,{children:["Issues/PRs needing ",(0,a.jsx)(t.em,{children:"maintainer"})," feedback will be updated at least weekly. I want to make sure\nnobody's blocking on me."]}),"\n"]}),"\n",(0,a.jsxs)(t.li,{children:["\n",(0,a.jsxs)(t.p,{children:["To keep issues/PRs needing ",(0,a.jsx)(t.em,{children:"contributor"})," feedback moving, I'm going to (kindly) ask the\ncontributor to check in after two weeks, and close the issue without resolution if I hear nothing\nback after a month."]}),"\n"]}),"\n"]}),"\n",(0,a.jsx)(t.p,{children:"The second point I think has the potential to be a bit controversial, so I'm happy to receive\nfeedback on that. And if a contributor responds with \"hey, still working on it, had a kid and I'm\nrunning on 30 seconds of sleep a night,\" then first: congratulations on sustaining human life. And\nsecond: I don't mind keeping those requests going indefinitely. I just want to try and balance\nkeeping things moving with giving people the necessary time they need."}),"\n",(0,a.jsx)(t.p,{children:"I should also note that I'm still getting some best practices in place - CONTRIBUTING and\nCONTRIBUTORS files need to be added, as well as issue/PR templates. In progress. None of us are\nperfect."}),"\n",(0,a.jsx)(t.h2,{id:"roadmap-and-conclusion",children:"Roadmap and Conclusion"}),"\n",(0,a.jsxs)(t.p,{children:["So if I've now built a ",(0,a.jsx)(t.code,{children:"dateutil"}),"-compatible parser, we're done, right? Of course not! That's not\nnearly ambitious enough."]}),"\n",(0,a.jsxs)(t.p,{children:["Ultimately, I'd love to have a library that's capable of parsing everything the Linux ",(0,a.jsx)(t.code,{children:"date"})," command\ncan do (and not ",(0,a.jsx)(t.code,{children:"date"})," on OSX, because seriously, BSD coreutils are the worst). I know Rust has a\ncoreutils rewrite going on, and ",(0,a.jsx)(t.code,{children:"dtparse"})," would potentially be an interesting candidate since it\ndoesn't bring in a lot of extra dependencies. ",(0,a.jsx)(t.a,{href:"https://crates.io/crates/humantime",children:(0,a.jsx)(t.code,{children:"humantime"})}),"\ncould help pick up some of the (current) slack in dtparse, so maybe we can share and care with each\nother?"]}),"\n",(0,a.jsx)(t.p,{children:"All in all, I'm mostly hoping that nobody's already done this and I haven't spent a bit over a month\non redundant code. So if it exists, tell me. I need to know, but be nice about it, because I'm going\nto take it hard."}),"\n",(0,a.jsx)(t.p,{children:"And in the mean time, I'm looking forward to building more. Onwards."})]})}function d(e={}){let{wrapper:t}={...(0,s.a)(),...e.components};return t?(0,a.jsx)(t,{...e,children:(0,a.jsx)(c,{...e})}):c(e)}},49394:function(e,t,n){n.d(t,{Z:function(){return i}});let i=n.p+"assets/images/gravel-mound-4afad8bdb1cd6b0e40dd2fd41adca36f.jpg"},50065:function(e,t,n){n.d(t,{Z:function(){return r},a:function(){return o}});var i=n(67294);let a={},s=i.createContext(a);function o(e){let t=i.useContext(s);return i.useMemo(function(){return"function"==typeof e?e(t):{...t,...e}},[t,e])}function r(e){let t;return t=e.disableParentContext?"function"==typeof e.components?e.components(a):e.components||a:o(e.components),i.createElement(s.Provider,{value:t},e.children)}},50745:function(e){e.exports=JSON.parse('{"permalink":"/2018/06/dateutil-parser-to-rust","source":"@site/blog/2018-06-25-dateutil-parser-to-rust/index.mdx","title":"What I learned porting dateutil to Rust","description":"I\'ve mostly been a lurker in Rust for a while, making a couple small contributions here and there.","date":"2018-06-25T12:00:00.000Z","tags":[],"readingTime":6.99,"hasTruncateMarker":true,"authors":[{"name":"Bradlee Speice","socials":{"github":"https://github.com/bspeice"},"key":"bspeice","page":null}],"frontMatter":{"slug":"2018/06/dateutil-parser-to-rust","title":"What I learned porting dateutil to Rust","date":"2018-06-25T12:00:00.000Z","authors":["bspeice"],"tags":[]},"unlisted":false,"lastUpdatedAt":1731201811000,"prevItem":{"title":"Primitives in Rust are weird (and cool)","permalink":"/2018/09/primitives-in-rust-are-weird"},"nextItem":{"title":"Hello!","permalink":"/2018/05/hello"}}')}}]);