diff --git a/src/main.rs b/src/main.rs index f7498e6..d9c4aed 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,9 +1,48 @@ +use std::fs::File; +use std::io::Read; +use std::path::Path; + +use clap::{App, Arg}; + +use pcap_ng::Block; + // Cap'n'Proto and Flatbuffers typically ask that you generate code on the fly to match // the schemas. For purposes of auto-complete and easy browsing in the repository, // we generate the code and just copy it into the src/ tree. pub mod marketdata_capnp; +#[allow(unused_imports)] pub mod marketdata_generated; // Flatbuffers +mod pcap_ng; + fn main() { - println!("Hello, world!"); + let matches = App::new("Marketdata Shootout") + .arg(Arg::with_name("file") + .short("f") + .long("file") + .value_name("FILE") + .help("IEX DEEP file to process") + .required(true) + .takes_value(true)) + .get_matches(); + + let deep = matches.value_of("file").unwrap(); + let path = Path::new(deep); + let mut file = File::open(path).expect(&format!("Unable to open file={}", path.display())); + + let mut buf = Vec::new(); + file.read_to_end(&mut buf).expect(&format!("Unable to read file={}", path.display())); + + let mut rem = &buf[..]; + while let Ok((unparsed, block)) = pcap_ng::read_block(rem) { + let offset = (unparsed.as_ptr() as usize) - (buf.as_ptr() as usize); + rem = unparsed; + match block { + Block::SectionHeader(sh) => println!("{:?}, next offset={}", sh, offset), + Block::InterfaceDescription(id) => println!("{:?}, next offset={}", id, offset), + Block::EnhancedPacket(epb) => println!("EnhancedPacketBlock {{ block_len: {}, packet_len: {} }}, next offset={}", epb.block_len, epb.packet_data.len(), offset) + } + } + + println!("Remaining unparsed len={}", rem.len()); } diff --git a/src/pcap_ng.rs b/src/pcap_ng.rs new file mode 100644 index 0000000..5d75520 --- /dev/null +++ b/src/pcap_ng.rs @@ -0,0 +1,99 @@ +use std::mem::size_of; + +use nom::{ + branch::alt, + bytes::complete::tag, + bytes::complete::take, + IResult, + number::complete::*, + sequence::tuple, +}; + +use crate::pcap_ng::Block::EnhancedPacket; + +pub enum Block<'a> { + SectionHeader(SectionHeaderBlock), + InterfaceDescription(InterfaceDescriptionBlock), + EnhancedPacket(EnhancedPacketBlock<'a>), +} + +pub fn read_block(input: &[u8]) -> IResult<&[u8], Block> { + alt(( + section_header_block, + interface_description_block, + enhanced_packet_block + ))(input) +} + +#[derive(Debug)] +pub struct SectionHeaderBlock { + block_len: u32 +} + +const SECTION_HEADER: [u8; 4] = [0x0a, 0x0d, 0x0d, 0x0a]; + +pub fn section_header_block(input: &[u8]) -> IResult<&[u8], Block> { + let header_len = 12; + let (rem, (_, block_len, _)) = tuple(( + tag(SECTION_HEADER), + le_u32, + tag([0x4d, 0x3c, 0x2b, 0x1a]) + ))(input)?; + + take(block_len - header_len)(rem) + .map(|i| (i.0, Block::SectionHeader(SectionHeaderBlock { + block_len + }))) +} + +#[derive(Debug)] +pub struct InterfaceDescriptionBlock { + block_len: u32 +} + +const INTERFACE_DESCRIPTION: [u8; 4] = [0x01, 0x00, 0x00, 0x00]; + +pub fn interface_description_block(input: &[u8]) -> IResult<&[u8], Block> { + let header_len = 8; + let (rem, (_, block_len)) = tuple(( + tag(INTERFACE_DESCRIPTION), + le_u32 + ))(input)?; + + take(block_len - header_len)(rem) + .map(|i| (i.0, Block::InterfaceDescription(InterfaceDescriptionBlock { + block_len + }))) +} + +pub struct EnhancedPacketBlock<'a> { + pub block_len: u32, + pub packet_data: &'a [u8], +} + +const ENHANCED_PACKET: [u8; 4] = [0x06, 0x00, 0x00, 0x00]; + +pub fn enhanced_packet_block(input: &[u8]) -> IResult<&[u8], Block> { + let header_len = 28; + let (rem, (_, block_len, _, _, _, captured_len, _)) = tuple(( + tag(ENHANCED_PACKET), + le_u32, + le_u32, + le_u32, + le_u32, + le_u32, + le_u32 + ))(input)?; + + let (rem, packet_data) = take(captured_len)(rem)?; + + // Packets are supposed to be padded to 32 bits, but IEX DEEP doesn't + // seem to respect this + //let packet_total_len = (captured_len + 3) / 4 * 4; + + take(block_len - header_len - captured_len)(rem) + .map(|i| (i.0, Block::EnhancedPacket(EnhancedPacketBlock { + block_len, + packet_data, + }))) +} \ No newline at end of file