mirror of
https://github.com/speice-io/marketdata-shootout
synced 2025-07-02 06:16:14 -04:00
Capnproto serialize/deserialize loop running!
This commit is contained in:
166
src/capnp_runner.rs
Normal file
166
src/capnp_runner.rs
Normal file
@ -0,0 +1,166 @@
|
||||
use std::cmp::{max, min};
|
||||
use std::collections::hash_map::{DefaultHasher, HashMap};
|
||||
use std::hash::Hasher;
|
||||
use std::io::{BufReader, Error, Read};
|
||||
use std::str::from_utf8_unchecked;
|
||||
|
||||
use capnp::message::ReaderOptions;
|
||||
use capnp::serialize::{read_message, write_message};
|
||||
use capnp::serialize_packed::{read_message as read_message_packed, write_message as write_message_packed};
|
||||
use nom::bytes::complete::take_until;
|
||||
use nom::IResult;
|
||||
|
||||
use crate::iex::{IexMessage, IexParser};
|
||||
use crate::marketdata_capnp::{multi_message, Side};
|
||||
use crate::marketdata_capnp::message;
|
||||
use crate::SummaryStats;
|
||||
|
||||
fn __take_until<'a>(tag: &'static str, input: &'a [u8]) -> IResult<&'a [u8], &'a [u8]> {
|
||||
take_until(tag)(input)
|
||||
}
|
||||
|
||||
fn parse_symbol(sym: &[u8; 8]) -> &str {
|
||||
// IEX guarantees ASCII, so we're fine using an unsafe conversion
|
||||
let (_, sym_bytes) = __take_until(" ", &sym[..]).unwrap();
|
||||
unsafe { from_utf8_unchecked(sym_bytes) }
|
||||
}
|
||||
|
||||
pub fn serialize_capnp(parser: IexParser, size_hint: usize, packed: bool) -> Vec<u8> {
|
||||
let write_fn = if packed { write_message_packed } else { write_message };
|
||||
|
||||
// Because CapNProto builds messages in heap before serialization,
|
||||
// we'll reserve memory up front and should avoid alloc calls later
|
||||
let mut capnp_message = capnp::message::Builder::new_default();
|
||||
let multimsg = capnp_message.init_root::<multi_message::Builder>();
|
||||
multimsg.init_messages(256);
|
||||
|
||||
// Allocate our output buffer
|
||||
let mut output: Vec<u8> = Vec::with_capacity(size_hint);
|
||||
|
||||
// Now to the actual work
|
||||
for iex_msg in parser {
|
||||
// Find the messages we actually care about in this context
|
||||
let num_msgs = iex_msg.messages.iter().map(|m| {
|
||||
match m {
|
||||
IexMessage::TradeReport(_) | IexMessage::PriceLevelUpdate(_) => 1,
|
||||
_ => 0
|
||||
}
|
||||
}).fold(0, |sum, i| sum + i);
|
||||
|
||||
if num_msgs == 0 {
|
||||
continue;
|
||||
}
|
||||
|
||||
// And actually serialize the IEX payload to CapNProto format
|
||||
let mut multimsg = capnp_message.init_root::<multi_message::Builder>();
|
||||
multimsg.set_seq_no(iex_msg.first_seq_no);
|
||||
|
||||
let mut messages = multimsg.init_messages(num_msgs as u32);
|
||||
let mut current_msg_no = 0;
|
||||
for iex_msg in iex_msg.messages {
|
||||
match iex_msg {
|
||||
IexMessage::TradeReport(tr) => {
|
||||
let mut message = messages.reborrow().get(current_msg_no);
|
||||
current_msg_no += 1;
|
||||
message.set_ts(tr.timestamp);
|
||||
|
||||
let sym = parse_symbol(&tr.symbol);
|
||||
message.reborrow().init_symbol(sym.len() as u32);
|
||||
message.set_symbol(sym);
|
||||
|
||||
let mut msg_tr = message.init_trade();
|
||||
msg_tr.set_size(tr.size);
|
||||
msg_tr.set_price(tr.price);
|
||||
}
|
||||
IexMessage::PriceLevelUpdate(plu) => {
|
||||
let mut message = messages.reborrow().get(current_msg_no);
|
||||
current_msg_no += 1;
|
||||
message.set_ts(plu.timestamp);
|
||||
|
||||
let sym = parse_symbol(&plu.symbol);
|
||||
message.reborrow().init_symbol(sym.len() as u32);
|
||||
message.set_symbol(sym);
|
||||
|
||||
let mut msg_plu = message.init_quote();
|
||||
msg_plu.set_price(plu.price);
|
||||
msg_plu.set_size(plu.size);
|
||||
msg_plu.set_flags(plu.event_flags);
|
||||
msg_plu.set_side(if plu.msg_type == 0x38 { Side::Buy } else { Side::Sell });
|
||||
}
|
||||
_ => ()
|
||||
}
|
||||
}
|
||||
|
||||
write_fn(&mut output, &capnp_message).unwrap();
|
||||
}
|
||||
|
||||
output
|
||||
}
|
||||
|
||||
struct AdvancingVec<'a> {
|
||||
pos: usize,
|
||||
inner: &'a Vec<u8>,
|
||||
}
|
||||
|
||||
impl<'a> Read for AdvancingVec<'a> {
|
||||
fn read(&mut self, buf: &mut [u8]) -> Result<usize, Error> {
|
||||
// TODO: There's *got* to be a better way to handle this
|
||||
let end = self.pos + buf.len();
|
||||
let end = if end > self.inner.len() { self.inner.len() } else { end };
|
||||
let read_size = end - self.pos;
|
||||
buf[..read_size].copy_from_slice(&self.inner[self.pos..end]);
|
||||
self.pos = end;
|
||||
|
||||
Ok(read_size)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn read_capnp(buffer: &Vec<u8>, packed: bool) -> HashMap<u64, SummaryStats> {
|
||||
let read_fn = if packed { read_message_packed } else { read_message };
|
||||
let unbuffered = AdvancingVec {
|
||||
pos: 0,
|
||||
inner: buffer,
|
||||
};
|
||||
let mut buffered = BufReader::new(unbuffered);
|
||||
let read_opts = ReaderOptions::new();
|
||||
|
||||
let mut stats = HashMap::new();
|
||||
|
||||
while let Ok(msg) = read_fn(&mut buffered, read_opts) {
|
||||
let multimsg = msg.get_root::<multi_message::Reader>().unwrap();
|
||||
|
||||
for msg in multimsg.get_messages().unwrap().iter() {
|
||||
// Hash the symbol name since we can't return a HashMap containing
|
||||
// string pointers as the keys
|
||||
let sym = msg.get_symbol().unwrap();
|
||||
let mut h = DefaultHasher::new();
|
||||
h.write(sym.as_bytes());
|
||||
let key = h.finish();
|
||||
|
||||
let mut sym_stats = stats.entry(key)
|
||||
.or_insert(SummaryStats::new(sym));
|
||||
|
||||
match msg.which() {
|
||||
Ok(message::Trade(tr)) => {
|
||||
let tr = tr.unwrap();
|
||||
sym_stats.trade_volume += tr.get_size() as u64;
|
||||
}
|
||||
Ok(message::Quote(q)) => {
|
||||
let q = q.unwrap();
|
||||
if q.get_side().unwrap() == Side::Buy {
|
||||
sym_stats.bid_high = max(sym_stats.bid_high, q.get_price());
|
||||
sym_stats.bid_low = min(sym_stats.bid_low, q.get_price());
|
||||
} else {
|
||||
sym_stats.ask_high = max(sym_stats.ask_high, q.get_price());
|
||||
sym_stats.ask_low = min(sym_stats.ask_low, q.get_price());
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
panic!("Unrecognized message type")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stats
|
||||
}
|
57
src/iex.rs
57
src/iex.rs
@ -5,7 +5,7 @@ use nom::{bytes::complete::take, IResult, number::complete::*, sequence::tuple};
|
||||
use crate::parsers::{Block, extract_iex_data, read_block};
|
||||
|
||||
pub struct IexParser<'a> {
|
||||
pcap_buffer: &'a [u8]
|
||||
pcap_buffer: &'a [u8],
|
||||
}
|
||||
|
||||
impl<'a> IexParser<'a> {
|
||||
@ -28,7 +28,7 @@ impl<'a> Iterator for IexParser<'a> {
|
||||
let (_, payload) = IexPayload::parse(iex_data).unwrap();
|
||||
return Some(payload);
|
||||
}
|
||||
_ => ()
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
@ -44,17 +44,32 @@ pub struct IexPayload {
|
||||
channel_id: u32,
|
||||
session_id: u32,
|
||||
payload_len: u16,
|
||||
msg_count: u16,
|
||||
pub msg_count: u16,
|
||||
stream_offset: u64,
|
||||
first_seq_no: u64,
|
||||
pub first_seq_no: u64,
|
||||
send_time: i64,
|
||||
messages: smallvec::SmallVec<[IexMessage; 8]>,
|
||||
pub messages: smallvec::SmallVec<[IexMessage; 256]>,
|
||||
}
|
||||
|
||||
impl IexPayload {
|
||||
pub fn parse(payload: &[u8]) -> IResult<&[u8], IexPayload> {
|
||||
let (mut rem, (version, _reserved, proto_id, channel_id, session_id, payload_len, msg_count, stream_offset, first_seq_no, send_time)) =
|
||||
tuple((le_u8, le_u8, le_u16, le_u32, le_u32, le_u16, le_u16, le_u64, le_u64, le_i64))(payload)?;
|
||||
let (
|
||||
mut rem,
|
||||
(
|
||||
version,
|
||||
_reserved,
|
||||
proto_id,
|
||||
channel_id,
|
||||
session_id,
|
||||
payload_len,
|
||||
msg_count,
|
||||
stream_offset,
|
||||
first_seq_no,
|
||||
send_time,
|
||||
),
|
||||
) = tuple((
|
||||
le_u8, le_u8, le_u16, le_u32, le_u32, le_u16, le_u16, le_u64, le_u64, le_i64,
|
||||
))(payload)?;
|
||||
|
||||
let mut messages = smallvec::SmallVec::new();
|
||||
for _i in 0..msg_count {
|
||||
@ -78,7 +93,7 @@ impl IexPayload {
|
||||
first_seq_no,
|
||||
send_time,
|
||||
messages,
|
||||
}
|
||||
},
|
||||
))
|
||||
}
|
||||
}
|
||||
@ -293,12 +308,12 @@ impl SecurityEvent {
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct PriceLevelUpdate {
|
||||
msg_type: u8,
|
||||
event_flags: u8,
|
||||
timestamp: i64,
|
||||
symbol: [u8; 8],
|
||||
size: u32,
|
||||
price: u64,
|
||||
pub msg_type: u8,
|
||||
pub event_flags: u8,
|
||||
pub timestamp: i64,
|
||||
pub symbol: [u8; 8],
|
||||
pub size: u32,
|
||||
pub price: u64,
|
||||
}
|
||||
|
||||
impl PriceLevelUpdate {
|
||||
@ -322,13 +337,13 @@ impl PriceLevelUpdate {
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct TradeReport {
|
||||
msg_type: u8,
|
||||
sale_condition: u8,
|
||||
timestamp: i64,
|
||||
symbol: [u8; 8],
|
||||
size: u32,
|
||||
price: u64,
|
||||
trade_id: u64,
|
||||
pub msg_type: u8,
|
||||
pub sale_condition: u8,
|
||||
pub timestamp: i64,
|
||||
pub symbol: [u8; 8],
|
||||
pub size: u32,
|
||||
pub price: u64,
|
||||
pub trade_id: u64,
|
||||
}
|
||||
|
||||
impl TradeReport {
|
||||
|
2
src/lib.rs
Normal file
2
src/lib.rs
Normal file
@ -0,0 +1,2 @@
|
||||
// This file is needed for tests outside the main source tree to find the project files
|
||||
pub mod marketdata_capnp;
|
61
src/main.rs
61
src/main.rs
@ -5,7 +5,7 @@ use std::time::SystemTime;
|
||||
|
||||
use clap::{App, Arg};
|
||||
|
||||
use crate::iex::IexParser;
|
||||
use crate::iex::{IexMessage, IexParser};
|
||||
|
||||
// Cap'n'Proto and Flatbuffers typically ask that you generate code on the fly to match
|
||||
// the schemas. For purposes of auto-complete and easy browsing in the repository,
|
||||
@ -14,18 +14,21 @@ pub mod marketdata_capnp;
|
||||
#[allow(unused_imports)]
|
||||
pub mod marketdata_generated; // Flatbuffers
|
||||
|
||||
mod capnp_runner;
|
||||
mod iex;
|
||||
mod parsers;
|
||||
|
||||
fn main() {
|
||||
let matches = App::new("Marketdata Shootout")
|
||||
.arg(Arg::with_name("file")
|
||||
.short("f")
|
||||
.long("file")
|
||||
.value_name("FILE")
|
||||
.help("IEX DEEP file to process")
|
||||
.required(true)
|
||||
.takes_value(true))
|
||||
.arg(
|
||||
Arg::with_name("file")
|
||||
.short("f")
|
||||
.long("file")
|
||||
.value_name("FILE")
|
||||
.help("IEX DEEP file to process")
|
||||
.required(true)
|
||||
.takes_value(true),
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
let deep = matches.value_of("file").unwrap();
|
||||
@ -33,11 +36,43 @@ fn main() {
|
||||
let mut file = File::open(path).expect(&format!("Unable to open file={}", path.display()));
|
||||
|
||||
let mut buf = Vec::new();
|
||||
file.read_to_end(&mut buf).expect(&format!("Unable to read file={}", path.display()));
|
||||
file.read_to_end(&mut buf)
|
||||
.expect(&format!("Unable to read file={}", path.display()));
|
||||
|
||||
let start = SystemTime::now();
|
||||
for _payload in IexParser::new(&buf[..]) {
|
||||
//dbg!(payload);
|
||||
}
|
||||
println!("Parse time seconds={}", SystemTime::now().duration_since(start).unwrap().as_secs())
|
||||
|
||||
// Try with Capnproto for now
|
||||
let parser = IexParser::new(&buf[..]);
|
||||
let capnp_buf = capnp_runner::serialize_capnp(parser, buf.len(), true);
|
||||
let stats = capnp_runner::read_capnp(&capnp_buf, true);
|
||||
|
||||
dbg!(stats);
|
||||
|
||||
println!(
|
||||
"Parse time seconds={}",
|
||||
SystemTime::now().duration_since(start).unwrap().as_secs()
|
||||
)
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct SummaryStats {
|
||||
symbol: String,
|
||||
trade_volume: u64,
|
||||
bid_high: u64,
|
||||
bid_low: u64,
|
||||
ask_high: u64,
|
||||
ask_low: u64,
|
||||
}
|
||||
|
||||
impl SummaryStats {
|
||||
fn new(sym: &str) -> SummaryStats {
|
||||
SummaryStats {
|
||||
symbol: sym.to_string(),
|
||||
trade_volume: 0,
|
||||
bid_high: 0,
|
||||
bid_low: u64::max_value(),
|
||||
ask_high: 0,
|
||||
ask_low: u64::max_value(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -145,13 +145,14 @@ pub mod multi_message {
|
||||
}
|
||||
mod _private {
|
||||
use capnp::private::layout;
|
||||
|
||||
pub const STRUCT_SIZE: layout::StructSize = layout::StructSize { data: 1, pointers: 1 };
|
||||
pub const TYPE_ID: u64 = 0xd13b_1bd4_36e1_ca9f;
|
||||
}
|
||||
}
|
||||
|
||||
pub mod message {
|
||||
pub use self::Which::{Trade,Quote};
|
||||
pub use self::Which::{Quote, Trade};
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct Owned;
|
||||
@ -199,8 +200,8 @@ pub mod message {
|
||||
self.reader.total_size()
|
||||
}
|
||||
#[inline]
|
||||
pub fn get_ts(self) -> u64 {
|
||||
self.reader.get_data_field::<u64>(0)
|
||||
pub fn get_ts(self) -> i64 {
|
||||
self.reader.get_data_field::<i64>(0)
|
||||
}
|
||||
#[inline]
|
||||
pub fn get_symbol(self) -> ::capnp::Result<::capnp::text::Reader<'a>> {
|
||||
@ -284,12 +285,12 @@ pub mod message {
|
||||
self.builder.into_reader().total_size()
|
||||
}
|
||||
#[inline]
|
||||
pub fn get_ts(self) -> u64 {
|
||||
self.builder.get_data_field::<u64>(0)
|
||||
pub fn get_ts(self) -> i64 {
|
||||
self.builder.get_data_field::<i64>(0)
|
||||
}
|
||||
#[inline]
|
||||
pub fn set_ts(&mut self, value: u64) {
|
||||
self.builder.set_data_field::<u64>(0, value);
|
||||
pub fn set_ts(&mut self, value: i64) {
|
||||
self.builder.set_data_field::<i64>(0, value);
|
||||
}
|
||||
#[inline]
|
||||
pub fn get_symbol(self) -> ::capnp::Result<::capnp::text::Builder<'a>> {
|
||||
@ -362,6 +363,7 @@ pub mod message {
|
||||
}
|
||||
mod _private {
|
||||
use capnp::private::layout;
|
||||
|
||||
pub const STRUCT_SIZE: layout::StructSize = layout::StructSize { data: 2, pointers: 2 };
|
||||
pub const TYPE_ID: u64 = 0x91d7_2965_3a3d_4be4;
|
||||
}
|
||||
@ -505,6 +507,7 @@ pub mod trade {
|
||||
}
|
||||
mod _private {
|
||||
use capnp::private::layout;
|
||||
|
||||
pub const STRUCT_SIZE: layout::StructSize = layout::StructSize { data: 2, pointers: 0 };
|
||||
pub const TYPE_ID: u64 = 0xd29e_10bd_4e5f_c241;
|
||||
}
|
||||
@ -666,6 +669,7 @@ pub mod level_update {
|
||||
}
|
||||
mod _private {
|
||||
use capnp::private::layout;
|
||||
|
||||
pub const STRUCT_SIZE: layout::StructSize = layout::StructSize { data: 2, pointers: 0 };
|
||||
pub const TYPE_ID: u64 = 0xe664_c3b5_6628_c453;
|
||||
}
|
||||
|
Reference in New Issue
Block a user