mirror of
				https://github.com/speice-io/marketdata-shootout
				synced 2025-11-03 18:10:38 -05:00 
			
		
		
		
	Use traits for reader/writer
Should make it easier to write analysis code
This commit is contained in:
		@ -10,22 +10,11 @@ use capnp::serialize_packed::{read_message as read_message_packed, write_message
 | 
				
			|||||||
use nom::bytes::complete::take_until;
 | 
					use nom::bytes::complete::take_until;
 | 
				
			||||||
use nom::IResult;
 | 
					use nom::IResult;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
use crate::{StreamVec, Summarizer};
 | 
					use crate::{RunnerDeserialize, RunnerSerialize, StreamVec, Summarizer};
 | 
				
			||||||
use crate::iex::{IexMessage, IexPayload};
 | 
					use crate::iex::{IexMessage, IexPayload};
 | 
				
			||||||
use crate::marketdata_capnp::{multi_message, Side};
 | 
					use crate::marketdata_capnp::{multi_message, Side};
 | 
				
			||||||
use crate::marketdata_capnp::message;
 | 
					use crate::marketdata_capnp::message;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
fn __take_until<'a>(tag: &'static str, input: &'a [u8]) -> IResult<&'a [u8], &'a [u8]> {
 | 
					 | 
				
			||||||
    take_until(tag)(input)
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
fn parse_symbol(sym: &[u8; 8]) -> &str {
 | 
					 | 
				
			||||||
    // TODO: Use the `jetscii` library for all that SIMD goodness
 | 
					 | 
				
			||||||
    // IEX guarantees ASCII, so we're fine using an unsafe conversion
 | 
					 | 
				
			||||||
    let (_, sym_bytes) = __take_until(" ", &sym[..]).unwrap();
 | 
					 | 
				
			||||||
    unsafe { from_utf8_unchecked(sym_bytes) }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
pub struct CapnpWriter<'a> {
 | 
					pub struct CapnpWriter<'a> {
 | 
				
			||||||
    // We have to be very careful with how messages are built, as running
 | 
					    // We have to be very careful with how messages are built, as running
 | 
				
			||||||
    // `init_root` and rebuilding will still accumulate garbage if using
 | 
					    // `init_root` and rebuilding will still accumulate garbage if using
 | 
				
			||||||
@ -33,10 +22,11 @@ pub struct CapnpWriter<'a> {
 | 
				
			|||||||
    // https://github.com/capnproto/capnproto-rust/issues/111
 | 
					    // https://github.com/capnproto/capnproto-rust/issues/111
 | 
				
			||||||
    words: Vec<capnp::Word>,
 | 
					    words: Vec<capnp::Word>,
 | 
				
			||||||
    scratch: ScratchSpace<'a>,
 | 
					    scratch: ScratchSpace<'a>,
 | 
				
			||||||
 | 
					    packed: bool,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
impl<'a> CapnpWriter<'a> {
 | 
					impl<'a> CapnpWriter<'a> {
 | 
				
			||||||
    pub fn new() -> CapnpWriter<'a> {
 | 
					    pub fn new(packed: bool) -> CapnpWriter<'a> {
 | 
				
			||||||
        // Cap'n'Proto words are 8 bytes, MTU is 1500 bytes, theoretically need only 188 words.
 | 
					        // Cap'n'Proto words are 8 bytes, MTU is 1500 bytes, theoretically need only 188 words.
 | 
				
			||||||
        // In practice, let's just make sure everything fits.
 | 
					        // In practice, let's just make sure everything fits.
 | 
				
			||||||
        let mut words = capnp::Word::allocate_zeroed_vec(1024);
 | 
					        let mut words = capnp::Word::allocate_zeroed_vec(1024);
 | 
				
			||||||
@ -48,6 +38,7 @@ impl<'a> CapnpWriter<'a> {
 | 
				
			|||||||
        CapnpWriter {
 | 
					        CapnpWriter {
 | 
				
			||||||
            words,
 | 
					            words,
 | 
				
			||||||
            scratch,
 | 
					            scratch,
 | 
				
			||||||
 | 
					            packed
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -60,8 +51,10 @@ impl<'a> CapnpWriter<'a> {
 | 
				
			|||||||
            std::mem::transmute(&mut self.scratch)
 | 
					            std::mem::transmute(&mut self.scratch)
 | 
				
			||||||
        }))
 | 
					        }))
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    pub fn serialize(&mut self, payload: &IexPayload, mut output: &mut Vec<u8>, packed: bool) {
 | 
					impl<'a> RunnerSerialize for CapnpWriter<'a> {
 | 
				
			||||||
 | 
					    fn serialize(&mut self, payload: &IexPayload, mut output: &mut Vec<u8>) {
 | 
				
			||||||
        // First, count the messages we actually care about.
 | 
					        // First, count the messages we actually care about.
 | 
				
			||||||
        let num_msgs = payload.messages.iter().map(|m| {
 | 
					        let num_msgs = payload.messages.iter().map(|m| {
 | 
				
			||||||
            match m {
 | 
					            match m {
 | 
				
			||||||
@ -76,14 +69,14 @@ impl<'a> CapnpWriter<'a> {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        // And actually serialize the IEX payload to CapNProto format
 | 
					        // And actually serialize the IEX payload to CapNProto format
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // This is the safe builder used for testing
 | 
				
			||||||
 | 
					        //let mut builder = capnp::message::Builder::new_default();
 | 
				
			||||||
 | 
					        //let mut multimsg = builder.init_root::<multi_message::Builder>();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // This is the unsafe (but faster) version
 | 
					        // This is the unsafe (but faster) version
 | 
				
			||||||
        let mut builder = self.builder();
 | 
					        let mut builder = self.builder();
 | 
				
			||||||
        let mut multimsg = builder.init_root::<multi_message::Builder>();
 | 
					        let mut multimsg = builder.init_root::<multi_message::Builder>();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // And the safe version used for testing
 | 
					 | 
				
			||||||
        //let mut builder = capnp::message::Builder::new_default();
 | 
					 | 
				
			||||||
        //let mut multimsg = builder.init_root::<multi_message::Builder>();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        multimsg.set_seq_no(payload.first_seq_no);
 | 
					        multimsg.set_seq_no(payload.first_seq_no);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        let mut messages = multimsg.init_messages(num_msgs as u32);
 | 
					        let mut messages = multimsg.init_messages(num_msgs as u32);
 | 
				
			||||||
@ -95,7 +88,7 @@ impl<'a> CapnpWriter<'a> {
 | 
				
			|||||||
                    current_msg_no += 1;
 | 
					                    current_msg_no += 1;
 | 
				
			||||||
                    message.set_ts(tr.timestamp);
 | 
					                    message.set_ts(tr.timestamp);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    let sym = parse_symbol(&tr.symbol);
 | 
					                    let sym = crate::parse_symbol(&tr.symbol);
 | 
				
			||||||
                    message.reborrow().init_symbol(sym.len() as u32);
 | 
					                    message.reborrow().init_symbol(sym.len() as u32);
 | 
				
			||||||
                    message.set_symbol(sym);
 | 
					                    message.set_symbol(sym);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -108,7 +101,7 @@ impl<'a> CapnpWriter<'a> {
 | 
				
			|||||||
                    current_msg_no += 1;
 | 
					                    current_msg_no += 1;
 | 
				
			||||||
                    message.set_ts(plu.timestamp);
 | 
					                    message.set_ts(plu.timestamp);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    let sym = parse_symbol(&plu.symbol);
 | 
					                    let sym = crate::parse_symbol(&plu.symbol);
 | 
				
			||||||
                    message.reborrow().init_symbol(sym.len() as u32);
 | 
					                    message.reborrow().init_symbol(sym.len() as u32);
 | 
				
			||||||
                    message.set_symbol(sym);
 | 
					                    message.set_symbol(sym);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -122,28 +115,33 @@ impl<'a> CapnpWriter<'a> {
 | 
				
			|||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        let write_fn = if packed { write_message_packed } else { write_message };
 | 
					        let write_fn = if self.packed { write_message_packed } else { write_message };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        write_fn(&mut output, &builder).unwrap();
 | 
					        write_fn(&mut output, &builder).unwrap();
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub struct CapnpReader {
 | 
					pub struct CapnpReader {
 | 
				
			||||||
    read_opts: ReaderOptions
 | 
					    read_opts: ReaderOptions,
 | 
				
			||||||
 | 
					    packed: bool
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
impl CapnpReader {
 | 
					impl CapnpReader {
 | 
				
			||||||
    pub fn new() -> CapnpReader {
 | 
					    pub fn new(packed: bool) -> CapnpReader {
 | 
				
			||||||
        CapnpReader {
 | 
					        CapnpReader {
 | 
				
			||||||
            read_opts: ReaderOptions::new()
 | 
					            read_opts: ReaderOptions::new(),
 | 
				
			||||||
 | 
					            packed
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    pub fn deserialize_packed(&self, buf: &mut StreamVec, stats: &mut Summarizer) -> Result<(), Error> {
 | 
					impl CapnpReader {
 | 
				
			||||||
 | 
					    fn deserialize_packed<'a>(&self, buf: &'a mut StreamVec, stats: &mut Summarizer) -> Result<(), ()> {
 | 
				
			||||||
        // Because `capnp::serialize_packed::PackedRead` is hidden from us, packed reads
 | 
					        // Because `capnp::serialize_packed::PackedRead` is hidden from us, packed reads
 | 
				
			||||||
        // *have* to both allocate new segments every read, and copy the buffer into
 | 
					        // *have* to both allocate new segments every read, and copy the buffer into
 | 
				
			||||||
        // those same segments, no ability to re-use allocated memory.
 | 
					        // those same segments, no ability to re-use allocated memory.
 | 
				
			||||||
        let reader = read_message_packed(buf, self.read_opts)?;
 | 
					        let reader = read_message_packed(buf, self.read_opts)
 | 
				
			||||||
 | 
					            .map_err(|_| ())?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        let multimsg = reader.get_root::<multi_message::Reader>().unwrap();
 | 
					        let multimsg = reader.get_root::<multi_message::Reader>().unwrap();
 | 
				
			||||||
        for msg in multimsg.get_messages().unwrap().iter() {
 | 
					        for msg in multimsg.get_messages().unwrap().iter() {
 | 
				
			||||||
@ -166,10 +164,10 @@ impl CapnpReader {
 | 
				
			|||||||
        Ok(())
 | 
					        Ok(())
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    pub fn deserialize_unpacked(&self, buf: &mut StreamVec, stats: &mut Summarizer) -> Result<(), Error> {
 | 
					    fn deserialize_unpacked(&self, buf: &mut StreamVec, stats: &mut Summarizer) -> Result<(), ()> {
 | 
				
			||||||
        let mut data = buf.fill_buf()?;
 | 
					        let mut data = buf.fill_buf().map_err(|_| ())?;
 | 
				
			||||||
        if data.len() == 0 {
 | 
					        if data.len() == 0 {
 | 
				
			||||||
            return Err(capnp::Error::failed(String::new()));
 | 
					            return Err(());
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        let orig_data = data;
 | 
					        let orig_data = data;
 | 
				
			||||||
@ -179,7 +177,7 @@ impl CapnpReader {
 | 
				
			|||||||
        Read into `OwnedSegments`, which means we copy the entire message into a new Vec. Note that
 | 
					        Read into `OwnedSegments`, which means we copy the entire message into a new Vec. Note that
 | 
				
			||||||
        the `data` pointer is modified underneath us, can figure out the message length by
 | 
					        the `data` pointer is modified underneath us, can figure out the message length by
 | 
				
			||||||
        checking the difference between where we started and what `data` is afterward.
 | 
					        checking the difference between where we started and what `data` is afterward.
 | 
				
			||||||
        This is a trick you learn only by checking the fuzzing test cases.
 | 
					        This is a trick you learn only by looking at the fuzzing test cases.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        let reader = capnp::serialize::read_message(&mut data, reader_opts)?;
 | 
					        let reader = capnp::serialize::read_message(&mut data, reader_opts)?;
 | 
				
			||||||
        let bytes_consumed = orig_data.len() - data.len();
 | 
					        let bytes_consumed = orig_data.len() - data.len();
 | 
				
			||||||
@ -190,11 +188,12 @@ impl CapnpReader {
 | 
				
			|||||||
        but still forces a Vec allocation for `offsets`. Also requires us to copy code from
 | 
					        but still forces a Vec allocation for `offsets`. Also requires us to copy code from
 | 
				
			||||||
        Cap'n'Proto because `SliceSegments` has private fields, and `read_segment_table`
 | 
					        Cap'n'Proto because `SliceSegments` has private fields, and `read_segment_table`
 | 
				
			||||||
        is private. And all this because `read_segment_from_words` has a length check
 | 
					        is private. And all this because `read_segment_from_words` has a length check
 | 
				
			||||||
        that triggers an error if our buffer is too large.
 | 
					        that triggers an error if our buffer is too large. What the hell?
 | 
				
			||||||
        There is no other documentation on how to calculate `bytes_consumed` in this case
 | 
					        There is no documentation on how to calculate `bytes_consumed` when parsing by hand
 | 
				
			||||||
        that I could find, you just have to guess and check until you figure this one out.
 | 
					        that I could find, you just have to guess and check until you figure this one out.
 | 
				
			||||||
        */
 | 
					        */
 | 
				
			||||||
        let (num_words, offsets) = read_segment_table(&mut data, reader_opts)?;
 | 
					        let (num_words, offsets) = read_segment_table(&mut data, reader_opts)
 | 
				
			||||||
 | 
					            .map_err(|_| ())?;
 | 
				
			||||||
        let words = unsafe { capnp::Word::bytes_to_words(data) };
 | 
					        let words = unsafe { capnp::Word::bytes_to_words(data) };
 | 
				
			||||||
        let reader = capnp::message::Reader::new(
 | 
					        let reader = capnp::message::Reader::new(
 | 
				
			||||||
            SliceSegments {
 | 
					            SliceSegments {
 | 
				
			||||||
@ -207,18 +206,19 @@ impl CapnpReader {
 | 
				
			|||||||
        let msg_bytes = num_words * size_of::<capnp::Word>();
 | 
					        let msg_bytes = num_words * size_of::<capnp::Word>();
 | 
				
			||||||
        let bytes_consumed = segment_table_bytes + msg_bytes;
 | 
					        let bytes_consumed = segment_table_bytes + msg_bytes;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        let multimsg = reader.get_root::<multi_message::Reader>()?;
 | 
					        let multimsg = reader.get_root::<multi_message::Reader>()
 | 
				
			||||||
        for msg in multimsg.get_messages()?.iter() {
 | 
					            .map_err(|_| ())?;
 | 
				
			||||||
            let sym = msg.get_symbol()?;
 | 
					        for msg in multimsg.get_messages().map_err(|_| ())?.iter() {
 | 
				
			||||||
 | 
					            let sym = msg.get_symbol().map_err(|_| ())?;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            match msg.which()? {
 | 
					            match msg.which().map_err(|_| ())? {
 | 
				
			||||||
                message::Trade(trade) => {
 | 
					                message::Trade(trade) => {
 | 
				
			||||||
                    let trade = trade.unwrap();
 | 
					                    let trade = trade.unwrap();
 | 
				
			||||||
                    stats.append_trade_volume(sym, trade.get_size().into());
 | 
					                    stats.append_trade_volume(sym, trade.get_size().into());
 | 
				
			||||||
                },
 | 
					                },
 | 
				
			||||||
                message::Quote(quote) => {
 | 
					                message::Quote(quote) => {
 | 
				
			||||||
                    let quote = quote.unwrap();
 | 
					                    let quote = quote.unwrap();
 | 
				
			||||||
                    let is_buy = match quote.get_side()? {
 | 
					                    let is_buy = match quote.get_side().unwrap() {
 | 
				
			||||||
                        Side::Buy => true,
 | 
					                        Side::Buy => true,
 | 
				
			||||||
                        _ => false
 | 
					                        _ => false
 | 
				
			||||||
                    };
 | 
					                    };
 | 
				
			||||||
@ -232,6 +232,18 @@ impl CapnpReader {
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl RunnerDeserialize for CapnpReader {
 | 
				
			||||||
 | 
					    fn deserialize<'a>(&self, buf: &'a mut StreamVec, stats: &mut Summarizer) -> Result<(), ()> {
 | 
				
			||||||
 | 
					        // While this is an extra branch per call, we're going to assume that the overhead
 | 
				
			||||||
 | 
					        // is essentially nil in practice
 | 
				
			||||||
 | 
					        if self.packed {
 | 
				
			||||||
 | 
					            self.deserialize_packed(buf, stats)
 | 
				
			||||||
 | 
					        } else {
 | 
				
			||||||
 | 
					            self.deserialize_unpacked(buf, stats)
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub struct SliceSegments<'a> {
 | 
					pub struct SliceSegments<'a> {
 | 
				
			||||||
    words: &'a [capnp::Word],
 | 
					    words: &'a [capnp::Word],
 | 
				
			||||||
 | 
				
			|||||||
@ -7,21 +7,10 @@ use capnp::data::new_builder;
 | 
				
			|||||||
use flatbuffers::buffer_has_identifier;
 | 
					use flatbuffers::buffer_has_identifier;
 | 
				
			||||||
use nom::{bytes::complete::take_until, IResult};
 | 
					use nom::{bytes::complete::take_until, IResult};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
use crate::{StreamVec, Summarizer};
 | 
					use crate::{RunnerDeserialize, RunnerSerialize, StreamVec, Summarizer};
 | 
				
			||||||
use crate::iex::{IexMessage, IexPayload};
 | 
					use crate::iex::{IexMessage, IexPayload};
 | 
				
			||||||
use crate::marketdata_generated::md_shootout;
 | 
					use crate::marketdata_generated::md_shootout;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
fn __take_until<'a>(tag: &'static str, input: &'a [u8]) -> IResult<&'a [u8], &'a [u8]> {
 | 
					 | 
				
			||||||
    take_until(tag)(input)
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
fn parse_symbol(sym: &[u8; 8]) -> &str {
 | 
					 | 
				
			||||||
    // TODO: Use the `jetscii` library for all that SIMD goodness
 | 
					 | 
				
			||||||
    // IEX guarantees ASCII, so we're fine using an unsafe conversion
 | 
					 | 
				
			||||||
    let (_, sym_bytes) = __take_until(" ", &sym[..]).unwrap();
 | 
					 | 
				
			||||||
    unsafe { from_utf8_unchecked(sym_bytes) }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
pub struct FlatbuffersWriter<'a> {
 | 
					pub struct FlatbuffersWriter<'a> {
 | 
				
			||||||
    builder: flatbuffers::FlatBufferBuilder<'a>,
 | 
					    builder: flatbuffers::FlatBufferBuilder<'a>,
 | 
				
			||||||
    message_buffer: Vec<flatbuffers::WIPOffset<md_shootout::Message<'a>>>,
 | 
					    message_buffer: Vec<flatbuffers::WIPOffset<md_shootout::Message<'a>>>,
 | 
				
			||||||
@ -34,8 +23,10 @@ impl<'a> FlatbuffersWriter<'a> {
 | 
				
			|||||||
            message_buffer: Vec::new(),
 | 
					            message_buffer: Vec::new(),
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    pub fn serialize(&mut self, payload: &IexPayload, output: &mut Vec<u8>) {
 | 
					impl<'a> RunnerSerialize for FlatbuffersWriter<'a> {
 | 
				
			||||||
 | 
					    fn serialize(&mut self, payload: &IexPayload, output: &mut Vec<u8>) {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        // Because FlatBuffers can't handle nested vectors (specifically, we can't track
 | 
					        // Because FlatBuffers can't handle nested vectors (specifically, we can't track
 | 
				
			||||||
        // both the variable-length vector of messages, and the variable-length strings
 | 
					        // both the variable-length vector of messages, and the variable-length strings
 | 
				
			||||||
@ -46,8 +37,7 @@ impl<'a> FlatbuffersWriter<'a> {
 | 
				
			|||||||
            let msg_args = match iex_msg {
 | 
					            let msg_args = match iex_msg {
 | 
				
			||||||
                IexMessage::TradeReport(tr) => {
 | 
					                IexMessage::TradeReport(tr) => {
 | 
				
			||||||
                    // The `Args` objects used are wrappers over an underlying `Builder`.
 | 
					                    // The `Args` objects used are wrappers over an underlying `Builder`.
 | 
				
			||||||
                    // We trust release builds to optimize out the wrapper, but would be
 | 
					                    // We trust release builds to optimize out the wrapper.
 | 
				
			||||||
                    // interesting to know whether that's actually the case.
 | 
					 | 
				
			||||||
                    let trade = md_shootout::Trade::create(
 | 
					                    let trade = md_shootout::Trade::create(
 | 
				
			||||||
                        &mut self.builder,
 | 
					                        &mut self.builder,
 | 
				
			||||||
                        &md_shootout::TradeArgs {
 | 
					                        &md_shootout::TradeArgs {
 | 
				
			||||||
@ -56,18 +46,11 @@ impl<'a> FlatbuffersWriter<'a> {
 | 
				
			|||||||
                        },
 | 
					                        },
 | 
				
			||||||
                    );
 | 
					                    );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    /*
 | 
					                    let sym_str = self.builder.create_string(crate::parse_symbol(&tr.symbol));
 | 
				
			||||||
                    let mut trade_builder = md_shootout::TradeBuilder::new(self.builder);
 | 
					 | 
				
			||||||
                    trade_builder.add_price(tr.price);
 | 
					 | 
				
			||||||
                    trade_builder.add_size_(tr.size);
 | 
					 | 
				
			||||||
                    let trade = trade_builder.finish();
 | 
					 | 
				
			||||||
                    */
 | 
					 | 
				
			||||||
                    let sym_str = self.builder.create_string(parse_symbol(&tr.symbol));
 | 
					 | 
				
			||||||
                    Some(md_shootout::MessageArgs {
 | 
					                    Some(md_shootout::MessageArgs {
 | 
				
			||||||
                        ts_nanos: tr.timestamp,
 | 
					                        ts_nanos: tr.timestamp,
 | 
				
			||||||
                        symbol: Some(sym_str),
 | 
					                        symbol: Some(sym_str),
 | 
				
			||||||
                        body_type: md_shootout::MessageBody::Trade,
 | 
					                        body_type: md_shootout::MessageBody::Trade,
 | 
				
			||||||
                        // Why the hell do I need the `as_union_value` function to convert to UnionWIPOffset???
 | 
					 | 
				
			||||||
                        body: Some(trade.as_union_value()),
 | 
					                        body: Some(trade.as_union_value()),
 | 
				
			||||||
                    })
 | 
					                    })
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
@ -82,7 +65,7 @@ impl<'a> FlatbuffersWriter<'a> {
 | 
				
			|||||||
                        },
 | 
					                        },
 | 
				
			||||||
                    );
 | 
					                    );
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    let sym_str = self.builder.create_string(parse_symbol(&plu.symbol));
 | 
					                    let sym_str = self.builder.create_string(crate::parse_symbol(&plu.symbol));
 | 
				
			||||||
                    Some(md_shootout::MessageArgs {
 | 
					                    Some(md_shootout::MessageArgs {
 | 
				
			||||||
                        ts_nanos: plu.timestamp,
 | 
					                        ts_nanos: plu.timestamp,
 | 
				
			||||||
                        symbol: Some(sym_str),
 | 
					                        symbol: Some(sym_str),
 | 
				
			||||||
@ -124,8 +107,10 @@ impl FlatbuffersReader {
 | 
				
			|||||||
    pub fn new() -> FlatbuffersReader {
 | 
					    pub fn new() -> FlatbuffersReader {
 | 
				
			||||||
        FlatbuffersReader {}
 | 
					        FlatbuffersReader {}
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    pub fn deserialize<'a>(&self, buf: &'a mut StreamVec, stats: &mut Summarizer) -> Result<(), ()> {
 | 
					impl RunnerDeserialize for FlatbuffersReader {
 | 
				
			||||||
 | 
					    fn deserialize<'a>(&self, buf: &'a mut StreamVec, stats: &mut Summarizer) -> Result<(), ()> {
 | 
				
			||||||
        // Flatbuffers has kinda ad-hoc support for streaming: https://github.com/google/flatbuffers/issues/3898
 | 
					        // Flatbuffers has kinda ad-hoc support for streaming: https://github.com/google/flatbuffers/issues/3898
 | 
				
			||||||
        // Essentially, you can write an optional `u32` value to the front of each message
 | 
					        // Essentially, you can write an optional `u32` value to the front of each message
 | 
				
			||||||
        // (`finish_size_prefixed` above) to figure out how long that message actually is.
 | 
					        // (`finish_size_prefixed` above) to figure out how long that message actually is.
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										35
									
								
								src/main.rs
									
									
									
									
									
								
							
							
						
						
									
										35
									
								
								src/main.rs
									
									
									
									
									
								
							@ -5,11 +5,13 @@ use std::hash::Hasher;
 | 
				
			|||||||
use std::io::{BufRead, Read};
 | 
					use std::io::{BufRead, Read};
 | 
				
			||||||
use std::io::Error;
 | 
					use std::io::Error;
 | 
				
			||||||
use std::path::Path;
 | 
					use std::path::Path;
 | 
				
			||||||
use std::time::SystemTime;
 | 
					use std::str::from_utf8_unchecked;
 | 
				
			||||||
 | 
					use std::time::{Instant, SystemTime};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
use clap::{App, Arg};
 | 
					use clap::{App, Arg};
 | 
				
			||||||
 | 
					use nom::{bytes::complete::take_until, IResult};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
use crate::iex::IexParser;
 | 
					use crate::iex::{IexParser, IexPayload};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Cap'n'Proto and Flatbuffers typically ask that you generate code on the fly to match
 | 
					// Cap'n'Proto and Flatbuffers typically ask that you generate code on the fly to match
 | 
				
			||||||
// the schemas. For purposes of auto-complete and easy browsing in the repository,
 | 
					// the schemas. For purposes of auto-complete and easy browsing in the repository,
 | 
				
			||||||
@ -68,10 +70,14 @@ fn main() {
 | 
				
			|||||||
    while let Ok(_) = capnp_reader.deserialize_packed(&mut read_buf, &mut summarizer) {
 | 
					    while let Ok(_) = capnp_reader.deserialize_packed(&mut read_buf, &mut summarizer) {
 | 
				
			||||||
        parsed_msgs += 1;
 | 
					        parsed_msgs += 1;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					    */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    let mut fb_writer = flatbuffers_runner::FlatbuffersWriter::new();
 | 
					    let mut fb_writer = flatbuffers_runner::FlatbuffersWriter::new();
 | 
				
			||||||
    for iex_payload in parser {
 | 
					    for iex_payload in parser {
 | 
				
			||||||
 | 
					        let now = Instant::now();
 | 
				
			||||||
        fb_writer.serialize(&iex_payload, &mut output_buf);
 | 
					        fb_writer.serialize(&iex_payload, &mut output_buf);
 | 
				
			||||||
 | 
					        let serialize_nanos = Instant::now().duration_since(now).as_nanos();
 | 
				
			||||||
 | 
					        dbg!(serialize_nanos);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    let mut read_buf = StreamVec::new(output_buf);
 | 
					    let mut read_buf = StreamVec::new(output_buf);
 | 
				
			||||||
@ -81,13 +87,15 @@ fn main() {
 | 
				
			|||||||
    while let Ok(_) = fb_reader.deserialize(&mut read_buf, &mut summarizer) {
 | 
					    while let Ok(_) = fb_reader.deserialize(&mut read_buf, &mut summarizer) {
 | 
				
			||||||
        parsed_msgs += 1;
 | 
					        parsed_msgs += 1;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    */
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /*
 | 
					    /*
 | 
				
			||||||
    let mut capnp_writer = capnp_runner::CapnpWriter::new();
 | 
					    let mut capnp_writer = capnp_runner::CapnpWriter::new();
 | 
				
			||||||
    for iex_payload in parser {
 | 
					    for iex_payload in parser {
 | 
				
			||||||
        //let iex_payload = parser.next().unwrap();
 | 
					        //let iex_payload = parser.next().unwrap();
 | 
				
			||||||
 | 
					        let now = Instant::now();
 | 
				
			||||||
        capnp_writer.serialize(&iex_payload, &mut output_buf, false);
 | 
					        capnp_writer.serialize(&iex_payload, &mut output_buf, false);
 | 
				
			||||||
 | 
					        let serialize_nanos = Instant::now().duration_since(now).as_nanos();
 | 
				
			||||||
 | 
					        dbg!(serialize_nanos);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    let capnp_reader = capnp_runner::CapnpReader::new();
 | 
					    let capnp_reader = capnp_runner::CapnpReader::new();
 | 
				
			||||||
@ -98,6 +106,7 @@ fn main() {
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
    */
 | 
					    */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /*
 | 
				
			||||||
    let mut sbe_writer = sbe_runner::SBEWriter::new();
 | 
					    let mut sbe_writer = sbe_runner::SBEWriter::new();
 | 
				
			||||||
    for iex_payload in parser {
 | 
					    for iex_payload in parser {
 | 
				
			||||||
        //let iex_payload = parser.next().unwrap();
 | 
					        //let iex_payload = parser.next().unwrap();
 | 
				
			||||||
@ -110,6 +119,7 @@ fn main() {
 | 
				
			|||||||
    while let Ok(_) = sbe_reader.deserialize(&mut read_buf, &mut summarizer) {
 | 
					    while let Ok(_) = sbe_reader.deserialize(&mut read_buf, &mut summarizer) {
 | 
				
			||||||
        parsed_msgs += 1;
 | 
					        parsed_msgs += 1;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					    */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    dbg!(parsed_msgs);
 | 
					    dbg!(parsed_msgs);
 | 
				
			||||||
    dbg!(summarizer);
 | 
					    dbg!(summarizer);
 | 
				
			||||||
@ -197,3 +207,22 @@ impl BufRead for StreamVec {
 | 
				
			|||||||
        self.pos += amt;
 | 
					        self.pos += amt;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					trait RunnerSerialize {
 | 
				
			||||||
 | 
					    fn serialize(&mut self, payload: &IexPayload, output: &mut Vec<u8>);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					trait RunnerDeserialize {
 | 
				
			||||||
 | 
					    fn deserialize<'a>(&self, buf: &'a mut StreamVec, stats: &mut Summarizer) -> Result<(), ()>;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fn __take_until<'a>(tag: &'static str, input: &'a [u8]) -> IResult<&'a [u8], &'a [u8]> {
 | 
				
			||||||
 | 
					    take_until(tag)(input)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fn parse_symbol(sym: &[u8; 8]) -> &str {
 | 
				
			||||||
 | 
					    // TODO: Use the `jetscii` library for all that SIMD goodness
 | 
				
			||||||
 | 
					    // IEX guarantees ASCII, so we're fine using an unsafe conversion
 | 
				
			||||||
 | 
					    let (_, sym_bytes) = __take_until(" ", &sym[..]).unwrap();
 | 
				
			||||||
 | 
					    unsafe { from_utf8_unchecked(sym_bytes) }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
				
			|||||||
@ -404,6 +404,7 @@ pub struct MultiMessageFieldsDecoder<'d> {
 | 
				
			|||||||
  scratch: ScratchDecoderData<'d>,
 | 
					  scratch: ScratchDecoderData<'d>,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
impl<'d> MultiMessageFieldsDecoder<'d> {
 | 
					impl<'d> MultiMessageFieldsDecoder<'d> {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  pub fn wrap(scratch: ScratchDecoderData<'d>) -> MultiMessageFieldsDecoder<'d> {
 | 
					  pub fn wrap(scratch: ScratchDecoderData<'d>) -> MultiMessageFieldsDecoder<'d> {
 | 
				
			||||||
    MultiMessageFieldsDecoder { scratch: scratch }
 | 
					    MultiMessageFieldsDecoder { scratch: scratch }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
@ -418,6 +419,7 @@ pub struct MultiMessageMessageHeaderDecoder<'d> {
 | 
				
			|||||||
  scratch: ScratchDecoderData<'d>,
 | 
					  scratch: ScratchDecoderData<'d>,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
impl<'d> MultiMessageMessageHeaderDecoder<'d> {
 | 
					impl<'d> MultiMessageMessageHeaderDecoder<'d> {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  pub fn wrap(scratch: ScratchDecoderData<'d>) -> MultiMessageMessageHeaderDecoder<'d> {
 | 
					  pub fn wrap(scratch: ScratchDecoderData<'d>) -> MultiMessageMessageHeaderDecoder<'d> {
 | 
				
			||||||
    MultiMessageMessageHeaderDecoder { scratch: scratch }
 | 
					    MultiMessageMessageHeaderDecoder { scratch: scratch }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
@ -518,6 +520,7 @@ pub struct MultiMessageFieldsEncoder<'d> {
 | 
				
			|||||||
  scratch: ScratchEncoderData<'d>,
 | 
					  scratch: ScratchEncoderData<'d>,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
impl<'d> MultiMessageFieldsEncoder<'d> {
 | 
					impl<'d> MultiMessageFieldsEncoder<'d> {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  pub fn wrap(scratch: ScratchEncoderData<'d>) -> MultiMessageFieldsEncoder<'d> {
 | 
					  pub fn wrap(scratch: ScratchEncoderData<'d>) -> MultiMessageFieldsEncoder<'d> {
 | 
				
			||||||
    MultiMessageFieldsEncoder { scratch: scratch }
 | 
					    MultiMessageFieldsEncoder { scratch: scratch }
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
				
			|||||||
@ -4,21 +4,10 @@ use std::str::from_utf8_unchecked;
 | 
				
			|||||||
use nom::bytes::complete::take_until;
 | 
					use nom::bytes::complete::take_until;
 | 
				
			||||||
use nom::IResult;
 | 
					use nom::IResult;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
use crate::{marketdata_sbe, StreamVec, Summarizer};
 | 
					use crate::{marketdata_sbe, RunnerDeserialize, RunnerSerialize, StreamVec, Summarizer};
 | 
				
			||||||
use crate::iex::{IexMessage, IexPayload};
 | 
					use crate::iex::{IexMessage, IexPayload};
 | 
				
			||||||
use crate::marketdata_sbe::{Either, MultiMessageFields, MultiMessageMessageHeader, MultiMessageMessagesMember, MultiMessageMessagesMemberEncoder, MultiMessageMessagesSymbolEncoder, Side, start_decoding_multi_message, start_encoding_multi_message};
 | 
					use crate::marketdata_sbe::{Either, MultiMessageFields, MultiMessageMessageHeader, MultiMessageMessagesMember, MultiMessageMessagesMemberEncoder, MultiMessageMessagesSymbolEncoder, Side, start_decoding_multi_message, start_encoding_multi_message};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
fn __take_until<'a>(tag: &'static str, input: &'a [u8]) -> IResult<&'a [u8], &'a [u8]> {
 | 
					 | 
				
			||||||
    take_until(tag)(input)
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
fn parse_symbol(sym: &[u8; 8]) -> &str {
 | 
					 | 
				
			||||||
    // TODO: Use the `jetscii` library for all that SIMD goodness
 | 
					 | 
				
			||||||
    // IEX guarantees ASCII, so we're fine using an unsafe conversion
 | 
					 | 
				
			||||||
    let (_, sym_bytes) = __take_until(" ", &sym[..]).unwrap();
 | 
					 | 
				
			||||||
    unsafe { from_utf8_unchecked(sym_bytes) }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
pub struct SBEWriter {
 | 
					pub struct SBEWriter {
 | 
				
			||||||
    /// Buffer to construct messages before copying. While SBE benefits
 | 
					    /// Buffer to construct messages before copying. While SBE benefits
 | 
				
			||||||
    /// from easily being able to create messages directly in output buffer,
 | 
					    /// from easily being able to create messages directly in output buffer,
 | 
				
			||||||
@ -38,8 +27,10 @@ impl SBEWriter {
 | 
				
			|||||||
            default_header: MultiMessageMessageHeader::default(),
 | 
					            default_header: MultiMessageMessageHeader::default(),
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    pub fn serialize(&mut self, payload: &IexPayload, output: &mut Vec<u8>) {
 | 
					impl RunnerSerialize for SBEWriter {
 | 
				
			||||||
 | 
					    fn serialize(&mut self, payload: &IexPayload, output: &mut Vec<u8>) {
 | 
				
			||||||
        let (fields, encoder) = start_encoding_multi_message(&mut self.scratch_buffer[..])
 | 
					        let (fields, encoder) = start_encoding_multi_message(&mut self.scratch_buffer[..])
 | 
				
			||||||
            .header_copy(&self.default_header.message_header).unwrap()
 | 
					            .header_copy(&self.default_header.message_header).unwrap()
 | 
				
			||||||
            .multi_message_fields().unwrap();
 | 
					            .multi_message_fields().unwrap();
 | 
				
			||||||
@ -59,7 +50,7 @@ impl SBEWriter {
 | 
				
			|||||||
                        ..Default::default()
 | 
					                        ..Default::default()
 | 
				
			||||||
                    };
 | 
					                    };
 | 
				
			||||||
                    let sym_enc: MultiMessageMessagesSymbolEncoder = enc.next_messages_member(&fields).unwrap();
 | 
					                    let sym_enc: MultiMessageMessagesSymbolEncoder = enc.next_messages_member(&fields).unwrap();
 | 
				
			||||||
                    sym_enc.symbol(parse_symbol(&tr.symbol).as_bytes()).unwrap()
 | 
					                    sym_enc.symbol(crate::parse_symbol(&tr.symbol).as_bytes()).unwrap()
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
                IexMessage::PriceLevelUpdate(plu) => {
 | 
					                IexMessage::PriceLevelUpdate(plu) => {
 | 
				
			||||||
                    let fields = MultiMessageMessagesMember {
 | 
					                    let fields = MultiMessageMessagesMember {
 | 
				
			||||||
@ -74,7 +65,7 @@ impl SBEWriter {
 | 
				
			|||||||
                        ..Default::default()
 | 
					                        ..Default::default()
 | 
				
			||||||
                    };
 | 
					                    };
 | 
				
			||||||
                    let sym_enc: MultiMessageMessagesSymbolEncoder = enc.next_messages_member(&fields).unwrap();
 | 
					                    let sym_enc: MultiMessageMessagesSymbolEncoder = enc.next_messages_member(&fields).unwrap();
 | 
				
			||||||
                    sym_enc.symbol(parse_symbol(&plu.symbol).as_bytes()).unwrap()
 | 
					                    sym_enc.symbol(crate::parse_symbol(&plu.symbol).as_bytes()).unwrap()
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
                _ => enc
 | 
					                _ => enc
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
@ -93,8 +84,10 @@ impl SBEReader {
 | 
				
			|||||||
    pub fn new() -> SBEReader {
 | 
					    pub fn new() -> SBEReader {
 | 
				
			||||||
        SBEReader {}
 | 
					        SBEReader {}
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    pub fn deserialize<'a>(&self, buf: &'a mut StreamVec, stats: &mut Summarizer) -> Result<(), ()> {
 | 
					impl RunnerDeserialize for SBEReader {
 | 
				
			||||||
 | 
					    fn deserialize<'a>(&self, buf: &'a mut StreamVec, stats: &mut Summarizer) -> Result<(), ()> {
 | 
				
			||||||
        let data = buf.fill_buf().unwrap();
 | 
					        let data = buf.fill_buf().unwrap();
 | 
				
			||||||
        if data.len() == 0 {
 | 
					        if data.len() == 0 {
 | 
				
			||||||
            return Err(());
 | 
					            return Err(());
 | 
				
			||||||
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user