From 5a00ff789c15ed27009d3a84341e17c293623112 Mon Sep 17 00:00:00 2001 From: Profpatsch Date: Sat, 6 Jun 2020 14:08:27 +0200 Subject: pkgs/profpatsch/netencode: add shallow parser MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The “shallow” parser uses the fact that every netencode value is length-encoded (or a scalar with a fixed length). It does not need to parse the inner values in order to get the structure of the thing. That means that we can implement very fast structure-based operations, like “take the first 5 elements of a list” or “get the record value with the key name `foo`”. We can even do things like intersperse elements into a list of values and write the resulting netencode structure to a socket, without ever needing to copy the data (it’s all length-indexed pointers to bytes). --- pkgs/profpatsch/netencode/netencode.rs | 213 +++++++++++++++++++++++++-------- 1 file changed, 160 insertions(+), 53 deletions(-) (limited to 'pkgs/profpatsch/netencode') diff --git a/pkgs/profpatsch/netencode/netencode.rs b/pkgs/profpatsch/netencode/netencode.rs index b83aca7b..481cbb49 100644 --- a/pkgs/profpatsch/netencode/netencode.rs +++ b/pkgs/profpatsch/netencode/netencode.rs @@ -19,17 +19,47 @@ pub enum T { // TODO: make into &str Text(String), // Tags - Sum(Tag), + // TODO: make into &str + Sum(Tag>), // TODO: make into &str Record(HashMap>), List(Box>), } #[derive(Debug, PartialEq, Eq, Clone)] -pub struct Tag { +pub enum U<'a> { + Unit, + // Naturals + N3(u8), + N6(u64), + N7(u128), + // Integers + I3(i8), + I6(i64), + I7(i128), + // Text + Text(&'a [u8]), + // Tags + Sum(Tag<&'a str, Box>>), + Record(HashMap<&'a str, Box>>), + List(&'a [u8]), +} + +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct Tag { // TODO: make into &str - pub tag: String, - pub val: Box + pub tag: S, + pub val: A +} + +impl Tag { + fn map(self, f: F) -> Tag + where F: Fn(A) -> B { + Tag { + tag: self.tag, + val: f(self.val) + } + } } fn encode_tag(w: &mut W, tag: String, val: T) -> std::io::Result<()> { @@ -84,7 +114,7 @@ pub fn text(s: String) -> T { } mod parse { - use super::{T, Tag}; + use super::{T, Tag, U}; use std::str::FromStr; use std::ops::Neg; @@ -95,7 +125,7 @@ mod parse { use nom::branch::{alt}; use nom::character::complete::{digit1, char}; use nom::sequence::{tuple}; - use nom::combinator::{map, map_res, flat_map, opt}; + use nom::combinator::{map, map_res, flat_map, map_parser, opt}; use nom::error::{context, ErrorKind, ParseError}; fn unit_t(s: &[u8]) -> IResult<&[u8], ()> { @@ -112,6 +142,21 @@ mod parse { )(s) } + fn sized(begin: char, end: char) -> impl Fn(&[u8]) -> IResult<&[u8], &[u8]> { + move |s: &[u8]| { + let (s, (_, len, _)) = tuple(( + char(begin), + usize_t, + char(':') + ))(s)?; + let (s, (res, _)) = tuple(( + take(len), + char(end) + ))(s)?; + Ok((s, res)) + } + } + fn uint_t<'a, I: FromStr + 'a>(t: &'static str) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], I> { move |s: &'a [u8]| { @@ -151,37 +196,35 @@ mod parse { ) } - fn tag_t(s: &[u8]) -> IResult<&[u8], Tag> { - let (s, (_, len, _)) = tuple(( - char('<'), - usize_t, - char(':'), - ))(s)?; - let (s, (tag, _, recurse)) = tuple(( - take(len), - char('|'), - // recurses into the main parser - t_t - ))(s)?; - Ok((s, Tag { - tag: std::str::from_utf8(tag) - .map_err(|_| nom::Err::Failure((s, ErrorKind::Char))) - .map(|s| s.to_string())?, - val: Box::new(recurse) - })) + fn tag_t(s: &[u8]) -> IResult<&[u8], Tag>> { + // recurses into the main parser + map(tag_g(t_t), + |Tag {tag, val}| + Tag { + tag: tag.to_string(), + val: Box::new(val) + })(s) + } + + fn tag_g<'a, P, O>(inner: P) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Tag<&'a str, O>> + where + P: Fn(&'a [u8]) -> IResult<&'a [u8], O> + { + move |s: &[u8]| { + let (s, tag) = sized('<', '|')(s)?; + let (s, val) = inner(s)?; + Ok((s, Tag { + tag: std::str::from_utf8(tag) + .map_err(|_| nom::Err::Failure((s, ErrorKind::Char)))?, + val + })) + + } } /// parse text scalar (`t5:hello,`) fn text(s: &[u8]) -> IResult<&[u8], T> { - let (s, (_, len, _)) = tuple(( - char('t'), - usize_t, - char(':') - ))(s)?; - let (s, (res, _)) = tuple(( - take(len), - char(',') - ))(s)?; + let (s, res) = text_g()(s)?; Ok((s, T::Text( std::str::from_utf8(res) .map_err(|_| nom::Err::Failure((s, ErrorKind::Char))) @@ -189,36 +232,93 @@ mod parse { ))) } + fn text_g() -> impl Fn(&[u8]) -> IResult<&[u8], &[u8]> { + sized('t', ',') + } + fn list_t(s: &[u8]) -> IResult<&[u8], Vec> { - let (s, (_, _, _, vec, _)) = tuple(( - char('['), - usize_t, - char(':'), - nom::multi::many0(t_t), - char(']') - ))(s)?; - Ok((s, vec)) + map_parser(list_g(), nom::multi::many0(t_t))(s) + } + + fn list_g() -> impl Fn(&[u8]) -> IResult<&[u8], &[u8]> { + sized('[', ']') + } + + fn skip() -> impl Fn(&[u8]) -> IResult<&[u8], ()> { + move |s: &[u8]| { + let (s, ()) = alt(( + // TODO: only use the sized parsers here + map(text, |_| ()), + map(unit_t, |_| ()), + map(list_g(), |_| ()), + map(t_t, |_| ()), + // TODO: add rest of parsers + ))(s)?; + Ok((s, ())) + } + } + + fn list_take<'a>(n: usize) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], Vec>> { + map_parser(list_g(), nom::multi::many_m_n(n, n, u_u)) + } + + // fn record_get<'a>(key: usize + + fn record_t<'a>(s: &'a [u8]) -> IResult<&'a [u8], HashMap>> { + let (s, hm) = record_g(t_t)(s)?; + Ok((s, + hm.into_iter().map( + |(k, v)| (k.to_string(), v) + ).collect::>())) } - fn record_t(s: &[u8]) -> IResult<&[u8], HashMap>> { - let (s, (_, _, _, map, _)) = tuple(( - char('{'), - usize_t, - char(':'), + fn record_g<'a, P, O>(inner: P) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], HashMap<&'a str, Box>> + where + O: Clone, + P: Fn(&'a [u8]) -> IResult<&'a [u8], O> + { + map_parser( + sized('{', '}'), nom::multi::fold_many1( - tag_t, + tag_g(inner), HashMap::new(), - |mut acc: HashMap<_, _>, Tag { tag, val }| { + |mut acc: HashMap<_, _>, Tag { tag, mut val }| { // ignore duplicated tag names that appear later if !acc.contains_key(&tag) { - acc.insert(tag, val); + acc.insert(tag, Box::new(val)); } acc } - ), - char('}') - ))(s)?; - Ok((s, map)) + ) + ) + } + + fn u_u(s: &[u8]) -> IResult<&[u8], U> { + alt(( + map(text_g(), U::Text), + map(unit_t, |()| U::Unit), + map(tag_g(u_u), |t| U::Sum(t.map(Box::new))), + map(list_g(), U::List), + map(record_g(u_u), U::Record), + + map(uint_t("n3"), |u| U::N3(u)), + map(uint_t("n6"), |u| U::N6(u)), + map(uint_t("n7"), |u| U::N7(u)), + map(int_t("i3"), |u| U::I3(u)), + map(int_t("i6"), |u| U::I6(u)), + map(int_t("i7"), |u| U::I7(u)), + + // less common + map(uint_t("n1"), |u| U::N3(u)), + map(uint_t("n2"), |u| U::N3(u)), + map(uint_t("n4"), |u| U::N6(u)), + map(uint_t("n5"), |u| U::N6(u)), + map(int_t("i1"), |u| U::I3(u)), + map(int_t("i2"), |u| U::I3(u)), + map(int_t("i4"), |u| U::I6(u)), + map(int_t("i5"), |u| U::I6(u)), + // TODO: 8, 9 not supported + ))(s) } fn t_t(s: &[u8]) -> IResult<&[u8], T> { @@ -333,6 +433,13 @@ mod parse { T::Unit, ])) ); + assert_eq!( + list_take(2)("[6:u,u,u,]".as_bytes()), + Ok(("".as_bytes(), vec![ + U::Unit, + U::Unit, + ])) + ); assert_eq!( list_t("[15:u,[7:t3:foo,]u,]".as_bytes()), Ok(("".as_bytes(), vec![ -- cgit 1.4.1