Capnproto serialize/deserialize loop running!

This commit is contained in:
2019-08-25 21:21:10 -04:00
parent 3bbaced086
commit bf71146152
9 changed files with 373 additions and 42 deletions

166
src/capnp_runner.rs Normal file
View File

@ -0,0 +1,166 @@
use std::cmp::{max, min};
use std::collections::hash_map::{DefaultHasher, HashMap};
use std::hash::Hasher;
use std::io::{BufReader, Error, Read};
use std::str::from_utf8_unchecked;
use capnp::message::ReaderOptions;
use capnp::serialize::{read_message, write_message};
use capnp::serialize_packed::{read_message as read_message_packed, write_message as write_message_packed};
use nom::bytes::complete::take_until;
use nom::IResult;
use crate::iex::{IexMessage, IexParser};
use crate::marketdata_capnp::{multi_message, Side};
use crate::marketdata_capnp::message;
use crate::SummaryStats;
fn __take_until<'a>(tag: &'static str, input: &'a [u8]) -> IResult<&'a [u8], &'a [u8]> {
take_until(tag)(input)
}
fn parse_symbol(sym: &[u8; 8]) -> &str {
// IEX guarantees ASCII, so we're fine using an unsafe conversion
let (_, sym_bytes) = __take_until(" ", &sym[..]).unwrap();
unsafe { from_utf8_unchecked(sym_bytes) }
}
pub fn serialize_capnp(parser: IexParser, size_hint: usize, packed: bool) -> Vec<u8> {
let write_fn = if packed { write_message_packed } else { write_message };
// Because CapNProto builds messages in heap before serialization,
// we'll reserve memory up front and should avoid alloc calls later
let mut capnp_message = capnp::message::Builder::new_default();
let multimsg = capnp_message.init_root::<multi_message::Builder>();
multimsg.init_messages(256);
// Allocate our output buffer
let mut output: Vec<u8> = Vec::with_capacity(size_hint);
// Now to the actual work
for iex_msg in parser {
// Find the messages we actually care about in this context
let num_msgs = iex_msg.messages.iter().map(|m| {
match m {
IexMessage::TradeReport(_) | IexMessage::PriceLevelUpdate(_) => 1,
_ => 0
}
}).fold(0, |sum, i| sum + i);
if num_msgs == 0 {
continue;
}
// And actually serialize the IEX payload to CapNProto format
let mut multimsg = capnp_message.init_root::<multi_message::Builder>();
multimsg.set_seq_no(iex_msg.first_seq_no);
let mut messages = multimsg.init_messages(num_msgs as u32);
let mut current_msg_no = 0;
for iex_msg in iex_msg.messages {
match iex_msg {
IexMessage::TradeReport(tr) => {
let mut message = messages.reborrow().get(current_msg_no);
current_msg_no += 1;
message.set_ts(tr.timestamp);
let sym = parse_symbol(&tr.symbol);
message.reborrow().init_symbol(sym.len() as u32);
message.set_symbol(sym);
let mut msg_tr = message.init_trade();
msg_tr.set_size(tr.size);
msg_tr.set_price(tr.price);
}
IexMessage::PriceLevelUpdate(plu) => {
let mut message = messages.reborrow().get(current_msg_no);
current_msg_no += 1;
message.set_ts(plu.timestamp);
let sym = parse_symbol(&plu.symbol);
message.reborrow().init_symbol(sym.len() as u32);
message.set_symbol(sym);
let mut msg_plu = message.init_quote();
msg_plu.set_price(plu.price);
msg_plu.set_size(plu.size);
msg_plu.set_flags(plu.event_flags);
msg_plu.set_side(if plu.msg_type == 0x38 { Side::Buy } else { Side::Sell });
}
_ => ()
}
}
write_fn(&mut output, &capnp_message).unwrap();
}
output
}
struct AdvancingVec<'a> {
pos: usize,
inner: &'a Vec<u8>,
}
impl<'a> Read for AdvancingVec<'a> {
fn read(&mut self, buf: &mut [u8]) -> Result<usize, Error> {
// TODO: There's *got* to be a better way to handle this
let end = self.pos + buf.len();
let end = if end > self.inner.len() { self.inner.len() } else { end };
let read_size = end - self.pos;
buf[..read_size].copy_from_slice(&self.inner[self.pos..end]);
self.pos = end;
Ok(read_size)
}
}
pub fn read_capnp(buffer: &Vec<u8>, packed: bool) -> HashMap<u64, SummaryStats> {
let read_fn = if packed { read_message_packed } else { read_message };
let unbuffered = AdvancingVec {
pos: 0,
inner: buffer,
};
let mut buffered = BufReader::new(unbuffered);
let read_opts = ReaderOptions::new();
let mut stats = HashMap::new();
while let Ok(msg) = read_fn(&mut buffered, read_opts) {
let multimsg = msg.get_root::<multi_message::Reader>().unwrap();
for msg in multimsg.get_messages().unwrap().iter() {
// Hash the symbol name since we can't return a HashMap containing
// string pointers as the keys
let sym = msg.get_symbol().unwrap();
let mut h = DefaultHasher::new();
h.write(sym.as_bytes());
let key = h.finish();
let mut sym_stats = stats.entry(key)
.or_insert(SummaryStats::new(sym));
match msg.which() {
Ok(message::Trade(tr)) => {
let tr = tr.unwrap();
sym_stats.trade_volume += tr.get_size() as u64;
}
Ok(message::Quote(q)) => {
let q = q.unwrap();
if q.get_side().unwrap() == Side::Buy {
sym_stats.bid_high = max(sym_stats.bid_high, q.get_price());
sym_stats.bid_low = min(sym_stats.bid_low, q.get_price());
} else {
sym_stats.ask_high = max(sym_stats.ask_high, q.get_price());
sym_stats.ask_low = min(sym_stats.ask_low, q.get_price());
}
}
_ => {
panic!("Unrecognized message type")
}
}
}
}
stats
}

View File

@ -5,7 +5,7 @@ use nom::{bytes::complete::take, IResult, number::complete::*, sequence::tuple};
use crate::parsers::{Block, extract_iex_data, read_block};
pub struct IexParser<'a> {
pcap_buffer: &'a [u8]
pcap_buffer: &'a [u8],
}
impl<'a> IexParser<'a> {
@ -28,7 +28,7 @@ impl<'a> Iterator for IexParser<'a> {
let (_, payload) = IexPayload::parse(iex_data).unwrap();
return Some(payload);
}
_ => ()
_ => (),
}
}
@ -44,17 +44,32 @@ pub struct IexPayload {
channel_id: u32,
session_id: u32,
payload_len: u16,
msg_count: u16,
pub msg_count: u16,
stream_offset: u64,
first_seq_no: u64,
pub first_seq_no: u64,
send_time: i64,
messages: smallvec::SmallVec<[IexMessage; 8]>,
pub messages: smallvec::SmallVec<[IexMessage; 256]>,
}
impl IexPayload {
pub fn parse(payload: &[u8]) -> IResult<&[u8], IexPayload> {
let (mut rem, (version, _reserved, proto_id, channel_id, session_id, payload_len, msg_count, stream_offset, first_seq_no, send_time)) =
tuple((le_u8, le_u8, le_u16, le_u32, le_u32, le_u16, le_u16, le_u64, le_u64, le_i64))(payload)?;
let (
mut rem,
(
version,
_reserved,
proto_id,
channel_id,
session_id,
payload_len,
msg_count,
stream_offset,
first_seq_no,
send_time,
),
) = tuple((
le_u8, le_u8, le_u16, le_u32, le_u32, le_u16, le_u16, le_u64, le_u64, le_i64,
))(payload)?;
let mut messages = smallvec::SmallVec::new();
for _i in 0..msg_count {
@ -78,7 +93,7 @@ impl IexPayload {
first_seq_no,
send_time,
messages,
}
},
))
}
}
@ -293,12 +308,12 @@ impl SecurityEvent {
#[derive(Debug)]
pub struct PriceLevelUpdate {
msg_type: u8,
event_flags: u8,
timestamp: i64,
symbol: [u8; 8],
size: u32,
price: u64,
pub msg_type: u8,
pub event_flags: u8,
pub timestamp: i64,
pub symbol: [u8; 8],
pub size: u32,
pub price: u64,
}
impl PriceLevelUpdate {
@ -322,13 +337,13 @@ impl PriceLevelUpdate {
#[derive(Debug)]
pub struct TradeReport {
msg_type: u8,
sale_condition: u8,
timestamp: i64,
symbol: [u8; 8],
size: u32,
price: u64,
trade_id: u64,
pub msg_type: u8,
pub sale_condition: u8,
pub timestamp: i64,
pub symbol: [u8; 8],
pub size: u32,
pub price: u64,
pub trade_id: u64,
}
impl TradeReport {

2
src/lib.rs Normal file
View File

@ -0,0 +1,2 @@
// This file is needed for tests outside the main source tree to find the project files
pub mod marketdata_capnp;

View File

@ -5,7 +5,7 @@ use std::time::SystemTime;
use clap::{App, Arg};
use crate::iex::IexParser;
use crate::iex::{IexMessage, IexParser};
// Cap'n'Proto and Flatbuffers typically ask that you generate code on the fly to match
// the schemas. For purposes of auto-complete and easy browsing in the repository,
@ -14,18 +14,21 @@ pub mod marketdata_capnp;
#[allow(unused_imports)]
pub mod marketdata_generated; // Flatbuffers
mod capnp_runner;
mod iex;
mod parsers;
fn main() {
let matches = App::new("Marketdata Shootout")
.arg(Arg::with_name("file")
.short("f")
.long("file")
.value_name("FILE")
.help("IEX DEEP file to process")
.required(true)
.takes_value(true))
.arg(
Arg::with_name("file")
.short("f")
.long("file")
.value_name("FILE")
.help("IEX DEEP file to process")
.required(true)
.takes_value(true),
)
.get_matches();
let deep = matches.value_of("file").unwrap();
@ -33,11 +36,43 @@ fn main() {
let mut file = File::open(path).expect(&format!("Unable to open file={}", path.display()));
let mut buf = Vec::new();
file.read_to_end(&mut buf).expect(&format!("Unable to read file={}", path.display()));
file.read_to_end(&mut buf)
.expect(&format!("Unable to read file={}", path.display()));
let start = SystemTime::now();
for _payload in IexParser::new(&buf[..]) {
//dbg!(payload);
}
println!("Parse time seconds={}", SystemTime::now().duration_since(start).unwrap().as_secs())
// Try with Capnproto for now
let parser = IexParser::new(&buf[..]);
let capnp_buf = capnp_runner::serialize_capnp(parser, buf.len(), true);
let stats = capnp_runner::read_capnp(&capnp_buf, true);
dbg!(stats);
println!(
"Parse time seconds={}",
SystemTime::now().duration_since(start).unwrap().as_secs()
)
}
#[derive(Debug)]
pub struct SummaryStats {
symbol: String,
trade_volume: u64,
bid_high: u64,
bid_low: u64,
ask_high: u64,
ask_low: u64,
}
impl SummaryStats {
fn new(sym: &str) -> SummaryStats {
SummaryStats {
symbol: sym.to_string(),
trade_volume: 0,
bid_high: 0,
bid_low: u64::max_value(),
ask_high: 0,
ask_low: u64::max_value(),
}
}
}

View File

@ -145,13 +145,14 @@ pub mod multi_message {
}
mod _private {
use capnp::private::layout;
pub const STRUCT_SIZE: layout::StructSize = layout::StructSize { data: 1, pointers: 1 };
pub const TYPE_ID: u64 = 0xd13b_1bd4_36e1_ca9f;
}
}
pub mod message {
pub use self::Which::{Trade,Quote};
pub use self::Which::{Quote, Trade};
#[derive(Copy, Clone)]
pub struct Owned;
@ -199,8 +200,8 @@ pub mod message {
self.reader.total_size()
}
#[inline]
pub fn get_ts(self) -> u64 {
self.reader.get_data_field::<u64>(0)
pub fn get_ts(self) -> i64 {
self.reader.get_data_field::<i64>(0)
}
#[inline]
pub fn get_symbol(self) -> ::capnp::Result<::capnp::text::Reader<'a>> {
@ -284,12 +285,12 @@ pub mod message {
self.builder.into_reader().total_size()
}
#[inline]
pub fn get_ts(self) -> u64 {
self.builder.get_data_field::<u64>(0)
pub fn get_ts(self) -> i64 {
self.builder.get_data_field::<i64>(0)
}
#[inline]
pub fn set_ts(&mut self, value: u64) {
self.builder.set_data_field::<u64>(0, value);
pub fn set_ts(&mut self, value: i64) {
self.builder.set_data_field::<i64>(0, value);
}
#[inline]
pub fn get_symbol(self) -> ::capnp::Result<::capnp::text::Builder<'a>> {
@ -362,6 +363,7 @@ pub mod message {
}
mod _private {
use capnp::private::layout;
pub const STRUCT_SIZE: layout::StructSize = layout::StructSize { data: 2, pointers: 2 };
pub const TYPE_ID: u64 = 0x91d7_2965_3a3d_4be4;
}
@ -505,6 +507,7 @@ pub mod trade {
}
mod _private {
use capnp::private::layout;
pub const STRUCT_SIZE: layout::StructSize = layout::StructSize { data: 2, pointers: 0 };
pub const TYPE_ID: u64 = 0xd29e_10bd_4e5f_c241;
}
@ -666,6 +669,7 @@ pub mod level_update {
}
mod _private {
use capnp::private::layout;
pub const STRUCT_SIZE: layout::StructSize = layout::StructSize { data: 2, pointers: 0 };
pub const TYPE_ID: u64 = 0xe664_c3b5_6628_c453;
}