mirror of
https://github.com/speice-io/marketdata-shootout
synced 2024-11-23 00:18:22 -05:00
Add the SBE runner
Works for the test dataset, now time to make sure everyone agrees.
This commit is contained in:
parent
df5198993b
commit
e58861458e
@ -59,23 +59,26 @@
|
||||
-->
|
||||
<field name="timestamp" id="3" type="int64"/>
|
||||
<!--
|
||||
SBE specifically doesn't have "union" types, so we include both a `trade` and `quote`
|
||||
here and the tag to identify.
|
||||
SBE specifically doesn't have "union" types, so we include a type tag, and both
|
||||
`trade` and `quote` as "optional". This style was chosen to approximate how
|
||||
Cap'n Proto and Flatbuffers do it:
|
||||
https://github.com/real-logic/simple-binary-encoding/issues/232
|
||||
|
||||
In the future, there are a couple options:
|
||||
1. Create a "payload header" type and promote "trade" and "quote" to <sbe:message>,
|
||||
since the SBE message header is already able to distinguish between message types
|
||||
2. Create a "group" for each message type, and just include no elements of that type.
|
||||
Uses a `u16` for each message type, instead of padding bytes for the message size.
|
||||
3. Split up the message components and use "session types" to chain things together,
|
||||
as in https://polysync.io/blog/session-types-for-hearty-codecs
|
||||
|
||||
For now, this message format was chosen to approximate the schemas of Cap'n Proto and Flatbuffers
|
||||
However, space is actually reserved for *both* `trade` and `quote` in the message;
|
||||
that is, the payload size is the same if neither, one, or both are filled.
|
||||
Other ways you can try to emulate unions:
|
||||
1. Use a "payload header" composite type and promote "trade" and "quote" to <sbe:message>;
|
||||
SBE can distinguish message types based on the SBE header.
|
||||
2. Create a "group" for each message type; adds an extra `u16` per type, but overall payload
|
||||
size goes down because we no longer reserve space that is potentially unused.
|
||||
3. Split up the message components into individual <composite/> blocks, and write
|
||||
a state machine (by hand) to chain the blocks. For a better explanation,
|
||||
see "session types" in:
|
||||
https://polysync.io/blog/session-types-for-hearty-codecs
|
||||
-->
|
||||
<field name="msg_type" id="4" type="MsgType"/>
|
||||
<field name="trade" id="5" type="Trade"/>
|
||||
<field name="quote" id="6" type="Quote"/>
|
||||
<field name="trade" id="5" type="Trade" presence="optional"/>
|
||||
<field name="quote" id="6" type="Quote" presence="optional"/>
|
||||
<data name="symbol" id="100" type="varAsciiEncoding"/>
|
||||
</group>
|
||||
</sbe:message>
|
||||
|
@ -1,2 +0,0 @@
|
||||
// This file is needed for tests outside the main source tree to find the project files
|
||||
pub mod marketdata_capnp;
|
17
src/main.rs
17
src/main.rs
@ -17,9 +17,11 @@ use crate::iex::IexParser;
|
||||
pub mod marketdata_capnp;
|
||||
#[allow(unused_imports)]
|
||||
pub mod marketdata_generated; // Flatbuffers
|
||||
pub mod marketdata_sbe;
|
||||
|
||||
mod capnp_runner;
|
||||
mod flatbuffers_runner;
|
||||
mod sbe_runner;
|
||||
mod iex;
|
||||
mod parsers;
|
||||
|
||||
@ -81,6 +83,7 @@ fn main() {
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
let mut capnp_writer = capnp_runner::CapnpWriter::new();
|
||||
for iex_payload in parser {
|
||||
//let iex_payload = parser.next().unwrap();
|
||||
@ -93,6 +96,20 @@ fn main() {
|
||||
while let Ok(_) = capnp_reader.deserialize_unpacked(&mut read_buf, &mut summarizer) {
|
||||
parsed_msgs += 1;
|
||||
}
|
||||
*/
|
||||
|
||||
let mut sbe_writer = sbe_runner::SBEWriter::new();
|
||||
for iex_payload in parser {
|
||||
//let iex_payload = parser.next().unwrap();
|
||||
sbe_writer.serialize(&iex_payload, &mut output_buf);
|
||||
}
|
||||
|
||||
let sbe_reader = sbe_runner::SBEReader::new();
|
||||
let mut read_buf = StreamVec::new(output_buf);
|
||||
let mut parsed_msgs: u64 = 0;
|
||||
while let Ok(_) = sbe_reader.deserialize(&mut read_buf, &mut summarizer) {
|
||||
parsed_msgs += 1;
|
||||
}
|
||||
|
||||
dbg!(parsed_msgs);
|
||||
dbg!(summarizer);
|
||||
|
@ -188,7 +188,7 @@ impl<'d> ScratchEncoderData<'d> {
|
||||
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
|
||||
pub enum Either<L, R> {
|
||||
Left(L),
|
||||
Right(R),
|
||||
Right(R)
|
||||
}
|
||||
|
||||
/// Enum Side
|
||||
@ -199,7 +199,6 @@ pub enum Side {
|
||||
Sell = 1u8,
|
||||
NullVal = 255u8,
|
||||
}
|
||||
|
||||
impl Default for Side {
|
||||
fn default() -> Self { Side::NullVal }
|
||||
}
|
||||
@ -212,7 +211,6 @@ pub enum MsgType {
|
||||
Quote = 1u8,
|
||||
NullVal = 255u8,
|
||||
}
|
||||
|
||||
impl Default for MsgType {
|
||||
fn default() -> Self { MsgType::NullVal }
|
||||
}
|
||||
@ -293,14 +291,12 @@ impl MultiMessageFields {}
|
||||
pub struct MultiMessageMessageHeader {
|
||||
pub message_header: MessageHeader
|
||||
}
|
||||
|
||||
impl MultiMessageMessageHeader {
|
||||
pub const BLOCK_LENGTH: u16 = 8;
|
||||
pub const TEMPLATE_ID: u16 = 1;
|
||||
pub const SCHEMA_ID: u16 = 1;
|
||||
pub const VERSION: u16 = 0;
|
||||
}
|
||||
|
||||
impl Default for MultiMessageMessageHeader {
|
||||
fn default() -> MultiMessageMessageHeader {
|
||||
MultiMessageMessageHeader {
|
||||
@ -330,7 +326,6 @@ impl MultiMessageMessagesMember {}
|
||||
pub struct MultiMessageDecoderDone<'d> {
|
||||
scratch: ScratchDecoderData<'d>,
|
||||
}
|
||||
|
||||
impl<'d> MultiMessageDecoderDone<'d> {
|
||||
/// Returns the number of bytes decoded
|
||||
pub fn unwrap(self) -> usize {
|
||||
@ -346,7 +341,6 @@ impl<'d> MultiMessageDecoderDone<'d> {
|
||||
pub struct MultiMessageMessagesSymbolDecoder<'d> {
|
||||
parent: MultiMessageMessagesMemberDecoder<'d>,
|
||||
}
|
||||
|
||||
impl<'d> MultiMessageMessagesSymbolDecoder<'d> {
|
||||
fn wrap(parent: MultiMessageMessagesMemberDecoder<'d>) -> Self {
|
||||
MultiMessageMessagesSymbolDecoder { parent: parent }
|
||||
@ -388,11 +382,9 @@ impl<'d> MultiMessageMessagesMemberDecoder<'d> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct MultiMessageMessagesHeaderDecoder<'d> {
|
||||
scratch: ScratchDecoderData<'d>,
|
||||
}
|
||||
|
||||
impl<'d> MultiMessageMessagesHeaderDecoder<'d> {
|
||||
fn wrap(scratch: ScratchDecoderData<'d>) -> Self {
|
||||
MultiMessageMessagesHeaderDecoder { scratch: scratch }
|
||||
@ -411,7 +403,6 @@ impl<'d> MultiMessageMessagesHeaderDecoder<'d> {
|
||||
pub struct MultiMessageFieldsDecoder<'d> {
|
||||
scratch: ScratchDecoderData<'d>,
|
||||
}
|
||||
|
||||
impl<'d> MultiMessageFieldsDecoder<'d> {
|
||||
pub fn wrap(scratch: ScratchDecoderData<'d>) -> MultiMessageFieldsDecoder<'d> {
|
||||
MultiMessageFieldsDecoder { scratch: scratch }
|
||||
@ -426,7 +417,6 @@ impl<'d> MultiMessageFieldsDecoder<'d> {
|
||||
pub struct MultiMessageMessageHeaderDecoder<'d> {
|
||||
scratch: ScratchDecoderData<'d>,
|
||||
}
|
||||
|
||||
impl<'d> MultiMessageMessageHeaderDecoder<'d> {
|
||||
pub fn wrap(scratch: ScratchDecoderData<'d>) -> MultiMessageMessageHeaderDecoder<'d> {
|
||||
MultiMessageMessageHeaderDecoder { scratch: scratch }
|
||||
@ -446,7 +436,6 @@ pub fn start_decoding_multi_message<'d>(data: &'d [u8]) -> MultiMessageMessageHe
|
||||
pub struct MultiMessageEncoderDone<'d> {
|
||||
scratch: ScratchEncoderData<'d>,
|
||||
}
|
||||
|
||||
impl<'d> MultiMessageEncoderDone<'d> {
|
||||
/// Returns the number of bytes encoded
|
||||
pub fn unwrap(self) -> usize {
|
||||
@ -462,7 +451,6 @@ impl<'d> MultiMessageEncoderDone<'d> {
|
||||
pub struct MultiMessageMessagesSymbolEncoder<'d> {
|
||||
parent: MultiMessageMessagesMemberEncoder<'d>,
|
||||
}
|
||||
|
||||
impl<'d> MultiMessageMessagesSymbolEncoder<'d> {
|
||||
fn wrap(parent: MultiMessageMessagesMemberEncoder<'d>) -> Self {
|
||||
MultiMessageMessagesSymbolEncoder { parent: parent }
|
||||
@ -470,7 +458,7 @@ impl<'d> MultiMessageMessagesSymbolEncoder<'d> {
|
||||
pub fn symbol(mut self, s: &'d [u8]) -> CodecResult<MultiMessageMessagesMemberEncoder> {
|
||||
let l = s.len();
|
||||
if l > 4294967294 {
|
||||
return Err(CodecErr::SliceIsLongerThanAllowedBySchema);
|
||||
return Err(CodecErr::SliceIsLongerThanAllowedBySchema)
|
||||
}
|
||||
// Write data length
|
||||
self.parent.scratch.write_type::<u32>(&(l as u32), 4)?; // group length
|
||||
@ -508,11 +496,9 @@ impl<'d> MultiMessageMessagesMemberEncoder<'d> {
|
||||
Ok(MultiMessageEncoderDone::wrap(self.scratch))
|
||||
}
|
||||
}
|
||||
|
||||
pub struct MultiMessageMessagesHeaderEncoder<'d> {
|
||||
scratch: ScratchEncoderData<'d>,
|
||||
}
|
||||
|
||||
impl<'d> MultiMessageMessagesHeaderEncoder<'d> {
|
||||
#[inline]
|
||||
fn wrap(scratch: ScratchEncoderData<'d>) -> Self {
|
||||
@ -531,7 +517,6 @@ impl<'d> MultiMessageMessagesHeaderEncoder<'d> {
|
||||
pub struct MultiMessageFieldsEncoder<'d> {
|
||||
scratch: ScratchEncoderData<'d>,
|
||||
}
|
||||
|
||||
impl<'d> MultiMessageFieldsEncoder<'d> {
|
||||
pub fn wrap(scratch: ScratchEncoderData<'d>) -> MultiMessageFieldsEncoder<'d> {
|
||||
MultiMessageFieldsEncoder { scratch: scratch }
|
||||
@ -556,7 +541,6 @@ impl<'d> MultiMessageFieldsEncoder<'d> {
|
||||
pub struct MultiMessageMessageHeaderEncoder<'d> {
|
||||
scratch: ScratchEncoderData<'d>,
|
||||
}
|
||||
|
||||
impl<'d> MultiMessageMessageHeaderEncoder<'d> {
|
||||
pub fn wrap(scratch: ScratchEncoderData<'d>) -> MultiMessageMessageHeaderEncoder<'d> {
|
||||
MultiMessageMessageHeaderEncoder { scratch: scratch }
|
||||
|
144
src/sbe_runner.rs
Normal file
144
src/sbe_runner.rs
Normal file
@ -0,0 +1,144 @@
|
||||
use std::io::{BufRead, Write};
|
||||
use std::str::from_utf8_unchecked;
|
||||
|
||||
use nom::bytes::complete::take_until;
|
||||
use nom::IResult;
|
||||
|
||||
use crate::{marketdata_sbe, StreamVec, Summarizer};
|
||||
use crate::iex::{IexMessage, IexPayload};
|
||||
use crate::marketdata_sbe::{Either, MultiMessageFields, MultiMessageMessageHeader, MultiMessageMessagesMember, MultiMessageMessagesMemberEncoder, MultiMessageMessagesSymbolEncoder, Side, start_decoding_multi_message, start_encoding_multi_message};
|
||||
|
||||
fn __take_until<'a>(tag: &'static str, input: &'a [u8]) -> IResult<&'a [u8], &'a [u8]> {
|
||||
take_until(tag)(input)
|
||||
}
|
||||
|
||||
fn parse_symbol(sym: &[u8; 8]) -> &str {
|
||||
// TODO: Use the `jetscii` library for all that SIMD goodness
|
||||
// IEX guarantees ASCII, so we're fine using an unsafe conversion
|
||||
let (_, sym_bytes) = __take_until(" ", &sym[..]).unwrap();
|
||||
unsafe { from_utf8_unchecked(sym_bytes) }
|
||||
}
|
||||
|
||||
pub struct SBEWriter {
|
||||
/// Buffer to construct messages before copying. While SBE benefits
|
||||
/// from easily being able to create messages directly in output buffer,
|
||||
/// we'll construct in a scratch buffer and then copy to more fairly
|
||||
/// benchmark against Cap'n Proto and Flatbuffers.
|
||||
scratch_buffer: Vec<u8>,
|
||||
default_header: MultiMessageMessageHeader,
|
||||
}
|
||||
|
||||
impl SBEWriter {
|
||||
pub fn new() -> SBEWriter {
|
||||
SBEWriter {
|
||||
// 8K scratch buffer is *way* more than necessary,
|
||||
// but we don't want to run into issues with not enough
|
||||
// data to encode messages
|
||||
scratch_buffer: vec![0; 1024 * 8],
|
||||
default_header: MultiMessageMessageHeader::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn serialize(&mut self, payload: &IexPayload, output: &mut Vec<u8>) {
|
||||
let (fields, encoder) = start_encoding_multi_message(&mut self.scratch_buffer[..])
|
||||
.header_copy(&self.default_header.message_header).unwrap()
|
||||
.multi_message_fields().unwrap();
|
||||
fields.sequence_number = payload.first_seq_no;
|
||||
|
||||
let mut encoder = encoder.messages_individually().unwrap();
|
||||
let mut encoder: MultiMessageMessagesMemberEncoder = payload.messages.iter().fold(encoder, |enc, m| {
|
||||
match m {
|
||||
IexMessage::TradeReport(tr) => {
|
||||
let fields = MultiMessageMessagesMember {
|
||||
msg_type: marketdata_sbe::MsgType::Trade,
|
||||
timestamp: tr.timestamp,
|
||||
trade: marketdata_sbe::Trade {
|
||||
size: tr.size,
|
||||
price: tr.price,
|
||||
},
|
||||
..Default::default()
|
||||
};
|
||||
let sym_enc: MultiMessageMessagesSymbolEncoder = enc.next_messages_member(&fields).unwrap();
|
||||
sym_enc.symbol(parse_symbol(&tr.symbol).as_bytes()).unwrap()
|
||||
}
|
||||
IexMessage::PriceLevelUpdate(plu) => {
|
||||
let fields = MultiMessageMessagesMember {
|
||||
msg_type: marketdata_sbe::MsgType::Quote,
|
||||
timestamp: plu.timestamp,
|
||||
quote: marketdata_sbe::Quote {
|
||||
price: plu.price,
|
||||
size: plu.size,
|
||||
flags: plu.event_flags,
|
||||
side: if plu.msg_type == 0x38 { Side::Buy } else { Side::Sell },
|
||||
},
|
||||
..Default::default()
|
||||
};
|
||||
let sym_enc: MultiMessageMessagesSymbolEncoder = enc.next_messages_member(&fields).unwrap();
|
||||
sym_enc.symbol(parse_symbol(&plu.symbol).as_bytes()).unwrap()
|
||||
}
|
||||
_ => enc
|
||||
}
|
||||
});
|
||||
|
||||
let finished = encoder.done_with_messages().unwrap();
|
||||
let data_len = finished.unwrap();
|
||||
|
||||
output.write(&self.scratch_buffer[..data_len]).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
pub struct SBEReader;
|
||||
|
||||
impl SBEReader {
|
||||
pub fn new() -> SBEReader {
|
||||
SBEReader {}
|
||||
}
|
||||
|
||||
pub fn deserialize<'a>(&self, buf: &'a mut StreamVec, stats: &mut Summarizer) -> Result<(), ()> {
|
||||
let data = buf.fill_buf().unwrap();
|
||||
if data.len() == 0 {
|
||||
return Err(());
|
||||
}
|
||||
|
||||
let (header, decoder) = start_decoding_multi_message(data)
|
||||
.header().unwrap();
|
||||
|
||||
let (fields, decoder) = decoder.multi_message_fields().unwrap();
|
||||
let mut msg_decoder = decoder.messages_individually().unwrap();
|
||||
while let Either::Left(msg) = msg_decoder {
|
||||
let (member, sym_dec) = msg.next_messages_member().unwrap();
|
||||
let (sym, next_msg_dec) = sym_dec.symbol().unwrap();
|
||||
match member.msg_type {
|
||||
marketdata_sbe::MsgType::Trade => stats.append_trade_volume(
|
||||
unsafe { from_utf8_unchecked(sym) },
|
||||
member.trade.size as u64,
|
||||
),
|
||||
marketdata_sbe::MsgType::Quote => stats.update_quote_prices(
|
||||
unsafe { from_utf8_unchecked(sym) },
|
||||
member.quote.price,
|
||||
match member.quote.side {
|
||||
Side::Buy => true,
|
||||
_ => false
|
||||
},
|
||||
),
|
||||
_ => ()
|
||||
}
|
||||
msg_decoder = next_msg_dec;
|
||||
}
|
||||
|
||||
// We now have a `Right`, which is a finished messages block
|
||||
let msg_decoder = match msg_decoder {
|
||||
Either::Right(r) => r,
|
||||
_ => panic!("Didn't parse all messages")
|
||||
};
|
||||
|
||||
// Interestingly enough, `buf.consume(msg_decoder.unwrap())` isn't OK,
|
||||
// presumably something to do with when *precisely* the drop of `self`
|
||||
// happens for `msg_decoder`. Leave it as two statments so that
|
||||
// Rust is able to prove our immutable borrow of `data` ends in time
|
||||
// to consume the buffer
|
||||
let msg_len = msg_decoder.unwrap();
|
||||
buf.consume(msg_len);
|
||||
Ok(())
|
||||
}
|
||||
}
|
@ -1,50 +0,0 @@
|
||||
use alloc_counter::{AllocCounterSystem, count_alloc, deny_alloc};
|
||||
|
||||
use md_shootout::marketdata_capnp::multi_message;
|
||||
|
||||
#[global_allocator]
|
||||
static A: AllocCounterSystem = AllocCounterSystem;
|
||||
|
||||
#[test]
|
||||
fn reinit_memory_check() {
|
||||
// Setting up the builder doesn't reserve any heap memory
|
||||
let mut msg_block = deny_alloc(|| {
|
||||
capnp::message::Builder::new_default()
|
||||
});
|
||||
|
||||
// Setting up the root object, however, does reserve a first segment
|
||||
let (stats, result) = count_alloc(|| {
|
||||
let multimsg = msg_block.init_root::<multi_message::Builder>();
|
||||
multimsg.init_messages(32);
|
||||
});
|
||||
|
||||
assert_eq!(stats.0, 4);
|
||||
assert_eq!(stats.1, 0);
|
||||
assert_eq!(stats.2, 0);
|
||||
|
||||
// If we reinitialize an object on that original builder, we re-use memory
|
||||
deny_alloc(|| {
|
||||
let multimsg = msg_block.init_root::<multi_message::Builder>();
|
||||
multimsg.init_messages(32);
|
||||
|
||||
// Even if we down-size and up-size the message list size, we don't need
|
||||
// to re-allocate
|
||||
let multimsg = msg_block.init_root::<multi_message::Builder>();
|
||||
multimsg.init_messages(16);
|
||||
|
||||
let multimsg = msg_block.init_root::<multi_message::Builder>();
|
||||
multimsg.init_messages(32);
|
||||
});
|
||||
|
||||
// It's only when we init a larger message count that a fresh allocation occurs
|
||||
let (stats, _) = count_alloc(|| {
|
||||
let multimsg = msg_block.init_root::<multi_message::Builder>();
|
||||
// Note: calling `init_messages(33)` doesn't force allocation because
|
||||
// the Capnproto builder reserved extra memory the first time around
|
||||
multimsg.init_messages(256);
|
||||
});
|
||||
|
||||
assert_eq!(stats.0, 1);
|
||||
assert_eq!(stats.1, 3);
|
||||
assert_eq!(stats.2, 0);
|
||||
}
|
Loading…
Reference in New Issue
Block a user