Capnproto serialize/deserialize loop running!

master
Bradlee Speice 2019-08-25 21:21:10 -04:00
parent 3bbaced086
commit bf71146152
9 changed files with 373 additions and 42 deletions

56
Cargo.lock generated
View File

@ -1,5 +1,23 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
[[package]]
name = "alloc_counter"
version = "0.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"alloc_counter_macro 0.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "alloc_counter_macro"
version = "0.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)",
"quote 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)",
"syn 0.15.44 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "ansi_term"
version = "0.11.0"
@ -99,6 +117,7 @@ dependencies = [
name = "md_shootout"
version = "0.1.0"
dependencies = [
"alloc_counter 0.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"capnp 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)",
"capnpc 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)",
"clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)",
@ -123,6 +142,22 @@ dependencies = [
"version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "proc-macro2"
version = "0.4.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "quote"
version = "0.6.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rustc_version"
version = "0.2.3"
@ -173,6 +208,16 @@ name = "strsim"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "syn"
version = "0.15.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)",
"quote 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)",
"unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "textwrap"
version = "0.11.0"
@ -186,6 +231,11 @@ name = "unicode-width"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "unicode-xid"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "unreachable"
version = "1.0.0"
@ -229,6 +279,8 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[metadata]
"checksum alloc_counter 0.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a169230586814a38a47b9764bb5e5310120df93952df01ee3ea9d832aef10e2f"
"checksum alloc_counter_macro 0.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c37d9ddd812e5223e8de74a2152fa79dce52ca9f6af38a54c6bcd1ae8b26a05d"
"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
"checksum atty 0.2.13 (registry+https://github.com/rust-lang/crates.io-index)" = "1803c647a3ec87095e7ae7acfca019e98de5ec9a7d01343f611cf3152ed71a90"
"checksum bitflags 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3d155346769a6855b86399e9bc3814ab343cd3d62c7e985113d46a0ec3c281fd"
@ -243,6 +295,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)" = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7"
"checksum memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e"
"checksum nom 5.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e9761d859320e381010a4f7f8ed425f2c924de33ad121ace447367c713ad561b"
"checksum proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)" = "cf3d2011ab5c909338f7887f4fc896d35932e29146c12c8d01da6b22a80ba759"
"checksum quote 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)" = "6ce23b6b870e8f94f81fb0a363d65d86675884b34a09043c81e5562f11c1f8e1"
"checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a"
"checksum ryu 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c92464b447c0ee8c4fb3824ecc8383b81717b9f1e74ba2e72540aef7b9f82997"
"checksum semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
@ -251,8 +305,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum stackvector 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "1c4725650978235083241fab0fdc8e694c3de37821524e7534a1a9061d1068af"
"checksum static_assertions 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "7f3eb36b47e512f8f1c9e3d10c2c1965bc992bd9cdb024fa581e2194501c83d3"
"checksum strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
"checksum syn 0.15.44 (registry+https://github.com/rust-lang/crates.io-index)" = "9ca4b3b69a77cbe1ffc9e198781b7acb0c7365a883670e8f1c1bc66fba79a5c5"
"checksum textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
"checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526"
"checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
"checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56"
"checksum vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a"
"checksum version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd"

View File

@ -11,6 +11,9 @@ flatbuffers = "0.6.0"
nom = "5.0.0"
smallvec = "0.6.10"
[dev-dependencies]
alloc_counter = "0.0.2"
[build-dependencies]
capnpc = "0.10"
flatc-rust = "0.1.2"

View File

@ -6,7 +6,7 @@ struct MultiMessage {
}
struct Message {
ts @0 :UInt64;
ts @0 :Int64;
symbol @1 :Text;
union {

166
src/capnp_runner.rs Normal file
View File

@ -0,0 +1,166 @@
use std::cmp::{max, min};
use std::collections::hash_map::{DefaultHasher, HashMap};
use std::hash::Hasher;
use std::io::{BufReader, Error, Read};
use std::str::from_utf8_unchecked;
use capnp::message::ReaderOptions;
use capnp::serialize::{read_message, write_message};
use capnp::serialize_packed::{read_message as read_message_packed, write_message as write_message_packed};
use nom::bytes::complete::take_until;
use nom::IResult;
use crate::iex::{IexMessage, IexParser};
use crate::marketdata_capnp::{multi_message, Side};
use crate::marketdata_capnp::message;
use crate::SummaryStats;
fn __take_until<'a>(tag: &'static str, input: &'a [u8]) -> IResult<&'a [u8], &'a [u8]> {
take_until(tag)(input)
}
fn parse_symbol(sym: &[u8; 8]) -> &str {
// IEX guarantees ASCII, so we're fine using an unsafe conversion
let (_, sym_bytes) = __take_until(" ", &sym[..]).unwrap();
unsafe { from_utf8_unchecked(sym_bytes) }
}
pub fn serialize_capnp(parser: IexParser, size_hint: usize, packed: bool) -> Vec<u8> {
let write_fn = if packed { write_message_packed } else { write_message };
// Because CapNProto builds messages in heap before serialization,
// we'll reserve memory up front and should avoid alloc calls later
let mut capnp_message = capnp::message::Builder::new_default();
let multimsg = capnp_message.init_root::<multi_message::Builder>();
multimsg.init_messages(256);
// Allocate our output buffer
let mut output: Vec<u8> = Vec::with_capacity(size_hint);
// Now to the actual work
for iex_msg in parser {
// Find the messages we actually care about in this context
let num_msgs = iex_msg.messages.iter().map(|m| {
match m {
IexMessage::TradeReport(_) | IexMessage::PriceLevelUpdate(_) => 1,
_ => 0
}
}).fold(0, |sum, i| sum + i);
if num_msgs == 0 {
continue;
}
// And actually serialize the IEX payload to CapNProto format
let mut multimsg = capnp_message.init_root::<multi_message::Builder>();
multimsg.set_seq_no(iex_msg.first_seq_no);
let mut messages = multimsg.init_messages(num_msgs as u32);
let mut current_msg_no = 0;
for iex_msg in iex_msg.messages {
match iex_msg {
IexMessage::TradeReport(tr) => {
let mut message = messages.reborrow().get(current_msg_no);
current_msg_no += 1;
message.set_ts(tr.timestamp);
let sym = parse_symbol(&tr.symbol);
message.reborrow().init_symbol(sym.len() as u32);
message.set_symbol(sym);
let mut msg_tr = message.init_trade();
msg_tr.set_size(tr.size);
msg_tr.set_price(tr.price);
}
IexMessage::PriceLevelUpdate(plu) => {
let mut message = messages.reborrow().get(current_msg_no);
current_msg_no += 1;
message.set_ts(plu.timestamp);
let sym = parse_symbol(&plu.symbol);
message.reborrow().init_symbol(sym.len() as u32);
message.set_symbol(sym);
let mut msg_plu = message.init_quote();
msg_plu.set_price(plu.price);
msg_plu.set_size(plu.size);
msg_plu.set_flags(plu.event_flags);
msg_plu.set_side(if plu.msg_type == 0x38 { Side::Buy } else { Side::Sell });
}
_ => ()
}
}
write_fn(&mut output, &capnp_message).unwrap();
}
output
}
struct AdvancingVec<'a> {
pos: usize,
inner: &'a Vec<u8>,
}
impl<'a> Read for AdvancingVec<'a> {
fn read(&mut self, buf: &mut [u8]) -> Result<usize, Error> {
// TODO: There's *got* to be a better way to handle this
let end = self.pos + buf.len();
let end = if end > self.inner.len() { self.inner.len() } else { end };
let read_size = end - self.pos;
buf[..read_size].copy_from_slice(&self.inner[self.pos..end]);
self.pos = end;
Ok(read_size)
}
}
pub fn read_capnp(buffer: &Vec<u8>, packed: bool) -> HashMap<u64, SummaryStats> {
let read_fn = if packed { read_message_packed } else { read_message };
let unbuffered = AdvancingVec {
pos: 0,
inner: buffer,
};
let mut buffered = BufReader::new(unbuffered);
let read_opts = ReaderOptions::new();
let mut stats = HashMap::new();
while let Ok(msg) = read_fn(&mut buffered, read_opts) {
let multimsg = msg.get_root::<multi_message::Reader>().unwrap();
for msg in multimsg.get_messages().unwrap().iter() {
// Hash the symbol name since we can't return a HashMap containing
// string pointers as the keys
let sym = msg.get_symbol().unwrap();
let mut h = DefaultHasher::new();
h.write(sym.as_bytes());
let key = h.finish();
let mut sym_stats = stats.entry(key)
.or_insert(SummaryStats::new(sym));
match msg.which() {
Ok(message::Trade(tr)) => {
let tr = tr.unwrap();
sym_stats.trade_volume += tr.get_size() as u64;
}
Ok(message::Quote(q)) => {
let q = q.unwrap();
if q.get_side().unwrap() == Side::Buy {
sym_stats.bid_high = max(sym_stats.bid_high, q.get_price());
sym_stats.bid_low = min(sym_stats.bid_low, q.get_price());
} else {
sym_stats.ask_high = max(sym_stats.ask_high, q.get_price());
sym_stats.ask_low = min(sym_stats.ask_low, q.get_price());
}
}
_ => {
panic!("Unrecognized message type")
}
}
}
}
stats
}

View File

@ -5,7 +5,7 @@ use nom::{bytes::complete::take, IResult, number::complete::*, sequence::tuple};
use crate::parsers::{Block, extract_iex_data, read_block};
pub struct IexParser<'a> {
pcap_buffer: &'a [u8]
pcap_buffer: &'a [u8],
}
impl<'a> IexParser<'a> {
@ -28,7 +28,7 @@ impl<'a> Iterator for IexParser<'a> {
let (_, payload) = IexPayload::parse(iex_data).unwrap();
return Some(payload);
}
_ => ()
_ => (),
}
}
@ -44,17 +44,32 @@ pub struct IexPayload {
channel_id: u32,
session_id: u32,
payload_len: u16,
msg_count: u16,
pub msg_count: u16,
stream_offset: u64,
first_seq_no: u64,
pub first_seq_no: u64,
send_time: i64,
messages: smallvec::SmallVec<[IexMessage; 8]>,
pub messages: smallvec::SmallVec<[IexMessage; 256]>,
}
impl IexPayload {
pub fn parse(payload: &[u8]) -> IResult<&[u8], IexPayload> {
let (mut rem, (version, _reserved, proto_id, channel_id, session_id, payload_len, msg_count, stream_offset, first_seq_no, send_time)) =
tuple((le_u8, le_u8, le_u16, le_u32, le_u32, le_u16, le_u16, le_u64, le_u64, le_i64))(payload)?;
let (
mut rem,
(
version,
_reserved,
proto_id,
channel_id,
session_id,
payload_len,
msg_count,
stream_offset,
first_seq_no,
send_time,
),
) = tuple((
le_u8, le_u8, le_u16, le_u32, le_u32, le_u16, le_u16, le_u64, le_u64, le_i64,
))(payload)?;
let mut messages = smallvec::SmallVec::new();
for _i in 0..msg_count {
@ -78,7 +93,7 @@ impl IexPayload {
first_seq_no,
send_time,
messages,
}
},
))
}
}
@ -293,12 +308,12 @@ impl SecurityEvent {
#[derive(Debug)]
pub struct PriceLevelUpdate {
msg_type: u8,
event_flags: u8,
timestamp: i64,
symbol: [u8; 8],
size: u32,
price: u64,
pub msg_type: u8,
pub event_flags: u8,
pub timestamp: i64,
pub symbol: [u8; 8],
pub size: u32,
pub price: u64,
}
impl PriceLevelUpdate {
@ -322,13 +337,13 @@ impl PriceLevelUpdate {
#[derive(Debug)]
pub struct TradeReport {
msg_type: u8,
sale_condition: u8,
timestamp: i64,
symbol: [u8; 8],
size: u32,
price: u64,
trade_id: u64,
pub msg_type: u8,
pub sale_condition: u8,
pub timestamp: i64,
pub symbol: [u8; 8],
pub size: u32,
pub price: u64,
pub trade_id: u64,
}
impl TradeReport {

2
src/lib.rs Normal file
View File

@ -0,0 +1,2 @@
// This file is needed for tests outside the main source tree to find the project files
pub mod marketdata_capnp;

View File

@ -5,7 +5,7 @@ use std::time::SystemTime;
use clap::{App, Arg};
use crate::iex::IexParser;
use crate::iex::{IexMessage, IexParser};
// Cap'n'Proto and Flatbuffers typically ask that you generate code on the fly to match
// the schemas. For purposes of auto-complete and easy browsing in the repository,
@ -14,18 +14,21 @@ pub mod marketdata_capnp;
#[allow(unused_imports)]
pub mod marketdata_generated; // Flatbuffers
mod capnp_runner;
mod iex;
mod parsers;
fn main() {
let matches = App::new("Marketdata Shootout")
.arg(Arg::with_name("file")
.short("f")
.long("file")
.value_name("FILE")
.help("IEX DEEP file to process")
.required(true)
.takes_value(true))
.arg(
Arg::with_name("file")
.short("f")
.long("file")
.value_name("FILE")
.help("IEX DEEP file to process")
.required(true)
.takes_value(true),
)
.get_matches();
let deep = matches.value_of("file").unwrap();
@ -33,11 +36,43 @@ fn main() {
let mut file = File::open(path).expect(&format!("Unable to open file={}", path.display()));
let mut buf = Vec::new();
file.read_to_end(&mut buf).expect(&format!("Unable to read file={}", path.display()));
file.read_to_end(&mut buf)
.expect(&format!("Unable to read file={}", path.display()));
let start = SystemTime::now();
for _payload in IexParser::new(&buf[..]) {
//dbg!(payload);
}
println!("Parse time seconds={}", SystemTime::now().duration_since(start).unwrap().as_secs())
// Try with Capnproto for now
let parser = IexParser::new(&buf[..]);
let capnp_buf = capnp_runner::serialize_capnp(parser, buf.len(), true);
let stats = capnp_runner::read_capnp(&capnp_buf, true);
dbg!(stats);
println!(
"Parse time seconds={}",
SystemTime::now().duration_since(start).unwrap().as_secs()
)
}
#[derive(Debug)]
pub struct SummaryStats {
symbol: String,
trade_volume: u64,
bid_high: u64,
bid_low: u64,
ask_high: u64,
ask_low: u64,
}
impl SummaryStats {
fn new(sym: &str) -> SummaryStats {
SummaryStats {
symbol: sym.to_string(),
trade_volume: 0,
bid_high: 0,
bid_low: u64::max_value(),
ask_high: 0,
ask_low: u64::max_value(),
}
}
}

View File

@ -145,13 +145,14 @@ pub mod multi_message {
}
mod _private {
use capnp::private::layout;
pub const STRUCT_SIZE: layout::StructSize = layout::StructSize { data: 1, pointers: 1 };
pub const TYPE_ID: u64 = 0xd13b_1bd4_36e1_ca9f;
}
}
pub mod message {
pub use self::Which::{Trade,Quote};
pub use self::Which::{Quote, Trade};
#[derive(Copy, Clone)]
pub struct Owned;
@ -199,8 +200,8 @@ pub mod message {
self.reader.total_size()
}
#[inline]
pub fn get_ts(self) -> u64 {
self.reader.get_data_field::<u64>(0)
pub fn get_ts(self) -> i64 {
self.reader.get_data_field::<i64>(0)
}
#[inline]
pub fn get_symbol(self) -> ::capnp::Result<::capnp::text::Reader<'a>> {
@ -284,12 +285,12 @@ pub mod message {
self.builder.into_reader().total_size()
}
#[inline]
pub fn get_ts(self) -> u64 {
self.builder.get_data_field::<u64>(0)
pub fn get_ts(self) -> i64 {
self.builder.get_data_field::<i64>(0)
}
#[inline]
pub fn set_ts(&mut self, value: u64) {
self.builder.set_data_field::<u64>(0, value);
pub fn set_ts(&mut self, value: i64) {
self.builder.set_data_field::<i64>(0, value);
}
#[inline]
pub fn get_symbol(self) -> ::capnp::Result<::capnp::text::Builder<'a>> {
@ -362,6 +363,7 @@ pub mod message {
}
mod _private {
use capnp::private::layout;
pub const STRUCT_SIZE: layout::StructSize = layout::StructSize { data: 2, pointers: 2 };
pub const TYPE_ID: u64 = 0x91d7_2965_3a3d_4be4;
}
@ -505,6 +507,7 @@ pub mod trade {
}
mod _private {
use capnp::private::layout;
pub const STRUCT_SIZE: layout::StructSize = layout::StructSize { data: 2, pointers: 0 };
pub const TYPE_ID: u64 = 0xd29e_10bd_4e5f_c241;
}
@ -666,6 +669,7 @@ pub mod level_update {
}
mod _private {
use capnp::private::layout;
pub const STRUCT_SIZE: layout::StructSize = layout::StructSize { data: 2, pointers: 0 };
pub const TYPE_ID: u64 = 0xe664_c3b5_6628_c453;
}

50
tests/capnp.rs Normal file
View File

@ -0,0 +1,50 @@
use alloc_counter::{AllocCounterSystem, count_alloc, deny_alloc};
use md_shootout::marketdata_capnp::multi_message;
#[global_allocator]
static A: AllocCounterSystem = AllocCounterSystem;
#[test]
fn reinit_memory_check() {
// Setting up the builder doesn't reserve any heap memory
let mut msg_block = deny_alloc(|| {
capnp::message::Builder::new_default()
});
// Setting up the root object, however, does reserve a first segment
let (stats, result) = count_alloc(|| {
let multimsg = msg_block.init_root::<multi_message::Builder>();
multimsg.init_messages(32);
});
assert_eq!(stats.0, 4);
assert_eq!(stats.1, 0);
assert_eq!(stats.2, 0);
// If we reinitialize an object on that original builder, we re-use memory
deny_alloc(|| {
let multimsg = msg_block.init_root::<multi_message::Builder>();
multimsg.init_messages(32);
// Even if we down-size and up-size the message list size, we don't need
// to re-allocate
let multimsg = msg_block.init_root::<multi_message::Builder>();
multimsg.init_messages(16);
let multimsg = msg_block.init_root::<multi_message::Builder>();
multimsg.init_messages(32);
});
// It's only when we init a larger message count that a fresh allocation occurs
let (stats, _) = count_alloc(|| {
let multimsg = msg_block.init_root::<multi_message::Builder>();
// Note: calling `init_messages(33)` doesn't force allocation because
// the Capnproto builder reserved extra memory the first time around
multimsg.init_messages(256);
});
assert_eq!(stats.0, 1);
assert_eq!(stats.1, 3);
assert_eq!(stats.2, 0);
}