From d9ebfe5495801ad231afec6b9c0fff899332d4db Mon Sep 17 00:00:00 2001 From: Profpatsch Date: Tue, 2 Jun 2020 01:54:50 +0200 Subject: pkgs/profpatsch/encode: put parser in submodule --- pkgs/profpatsch/encode/encode.rs | 564 +++++++++++++++++++-------------------- pkgs/profpatsch/encode/spec.md | 2 +- 2 files changed, 282 insertions(+), 284 deletions(-) (limited to 'pkgs') diff --git a/pkgs/profpatsch/encode/encode.rs b/pkgs/profpatsch/encode/encode.rs index 9b3f73c9..ab09a372 100644 --- a/pkgs/profpatsch/encode/encode.rs +++ b/pkgs/profpatsch/encode/encode.rs @@ -1,27 +1,9 @@ extern crate nom; -use nom::{ - IResult -}; - -use std::str::FromStr; -use std::ops::Neg; use std::collections::HashMap; -use nom::bytes::complete::{tag, take}; -use nom::branch::{alt}; -use nom::character::complete::{digit1, char}; -use nom::sequence::{tuple}; -use nom::combinator::{map, map_res, flat_map, opt}; -use nom::error::{context, ErrorKind, ParseError}; - -#[derive(Debug, PartialEq, Eq, Clone)] -struct Tag { - tag: String, - val: Box -} #[derive(Debug, PartialEq, Eq, Clone)] -enum T { +pub enum T { // Unit Unit, // Naturals @@ -40,303 +22,319 @@ enum T { List(Box>), } -#[derive(Debug, PartialEq, Eq)] -enum Err { - Empty, - ParseLen, +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct Tag { + pub tag: String, + pub val: Box } -fn unit_t(s: &[u8]) -> IResult<&[u8], ()> { - let (s, _) = context("unit", tag("u,"))(s)?; - Ok((s, ())) -} +mod parsing { + use super::{T, Tag}; -fn usize_t(s: &[u8]) -> IResult<&[u8], usize> { - context( - "usize", - map_res( - map_res(digit1, |n| std::str::from_utf8(n)), - |s| s.parse::()) - )(s) -} + use std::str::FromStr; + use std::ops::Neg; + use std::collections::HashMap; + use nom::{IResult}; + use nom::bytes::complete::{tag, take}; + use nom::branch::{alt}; + use nom::character::complete::{digit1, char}; + use nom::sequence::{tuple}; + use nom::combinator::{map, map_res, flat_map, opt}; + use nom::error::{context, ErrorKind, ParseError}; -fn uint_t<'a, I: FromStr + 'a>(t: &'static str) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], I> { - move |s: &'a [u8]| { - let (s, (_, _, int, _)) = tuple(( - tag(t.as_bytes()), - char(':'), + fn unit_t(s: &[u8]) -> IResult<&[u8], ()> { + let (s, _) = context("unit", tag("u,"))(s)?; + Ok((s, ())) + } + + fn usize_t(s: &[u8]) -> IResult<&[u8], usize> { + context( + "usize", map_res( - map_res(digit1, |n: &[u8]| std::str::from_utf8(n)), - |s| s.parse::() - ), - char(',') - ))(s)?; - Ok((s, int)) + map_res(digit1, |n| std::str::from_utf8(n)), + |s| s.parse::()) + )(s) } -} -fn int_t<'a, I: FromStr + Neg>(t: &'static str) -> impl Fn(&'a [u8]) -> IResult<&[u8], I> { - context( - t, + + fn uint_t<'a, I: FromStr + 'a>(t: &'static str) -> impl Fn(&'a [u8]) -> IResult<&'a [u8], I> { move |s: &'a [u8]| { - let (s, (_, _, neg, int, _)) = tuple(( + let (s, (_, _, int, _)) = tuple(( tag(t.as_bytes()), char(':'), - opt(char('-')), map_res( map_res(digit1, |n: &[u8]| std::str::from_utf8(n)), |s| s.parse::() ), char(',') ))(s)?; - let res = match neg { - Some(_) => -int, - None => int, - }; - Ok((s, res)) + Ok((s, int)) } - ) -} + } -fn tag_t(s: &[u8]) -> IResult<&[u8], Tag> { - let (s, (_, len, _)) = tuple(( - char('<'), - usize_t, - char(':'), - ))(s)?; - let (s, (tag, _, recurse)) = tuple(( - take(len), - char('|'), - // recurses into the main parser - t_t - ))(s)?; - Ok((s, Tag { - tag: std::str::from_utf8(tag) - .map_err(|_| nom::Err::Failure((s, ErrorKind::Char))) - .map(|s| s.to_string())?, - val: Box::new(recurse) - })) -} + fn int_t<'a, I: FromStr + Neg>(t: &'static str) -> impl Fn(&'a [u8]) -> IResult<&[u8], I> { + context( + t, + move |s: &'a [u8]| { + let (s, (_, _, neg, int, _)) = tuple(( + tag(t.as_bytes()), + char(':'), + opt(char('-')), + map_res( + map_res(digit1, |n: &[u8]| std::str::from_utf8(n)), + |s| s.parse::() + ), + char(',') + ))(s)?; + let res = match neg { + Some(_) => -int, + None => int, + }; + Ok((s, res)) + } + ) + } -/// parse text scalar (`t5:hello,`) -fn text(s: &[u8]) -> IResult<&[u8], T> { - let (s, (_, len, _)) = tuple(( - char('t'), - usize_t, - char(':') - ))(s)?; - let (s, (res, _)) = tuple(( - take(len), - char(',') - ))(s)?; - Ok((s, T::Text( - std::str::from_utf8(res) - .map_err(|_| nom::Err::Failure((s, ErrorKind::Char))) - .map(|s| s.to_string())?, - ))) -} + fn tag_t(s: &[u8]) -> IResult<&[u8], Tag> { + let (s, (_, len, _)) = tuple(( + char('<'), + usize_t, + char(':'), + ))(s)?; + let (s, (tag, _, recurse)) = tuple(( + take(len), + char('|'), + // recurses into the main parser + t_t + ))(s)?; + Ok((s, Tag { + tag: std::str::from_utf8(tag) + .map_err(|_| nom::Err::Failure((s, ErrorKind::Char))) + .map(|s| s.to_string())?, + val: Box::new(recurse) + })) + } -fn list_t(s: &[u8]) -> IResult<&[u8], Vec> { - let (s, (_, vec, _)) = tuple(( - char('['), - nom::multi::many0(t_t), - char(']') - ))(s)?; - Ok((s, vec)) -} + /// parse text scalar (`t5:hello,`) + fn text(s: &[u8]) -> IResult<&[u8], T> { + let (s, (_, len, _)) = tuple(( + char('t'), + usize_t, + char(':') + ))(s)?; + let (s, (res, _)) = tuple(( + take(len), + char(',') + ))(s)?; + Ok((s, T::Text( + std::str::from_utf8(res) + .map_err(|_| nom::Err::Failure((s, ErrorKind::Char))) + .map(|s| s.to_string())?, + ))) + } -fn record_t(s: &[u8]) -> IResult<&[u8], HashMap>> { - let (s, (_, map, _)) = tuple(( - char('{'), - nom::multi::fold_many1( - tag_t, - HashMap::new(), - |mut acc: HashMap<_, _>, Tag { tag, val }| { - // ignore duplicated tag names that appear later - if !acc.contains_key(&tag) { - acc.insert(tag, val); + fn list_t(s: &[u8]) -> IResult<&[u8], Vec> { + let (s, (_, vec, _)) = tuple(( + char('['), + nom::multi::many0(t_t), + char(']') + ))(s)?; + Ok((s, vec)) + } + + fn record_t(s: &[u8]) -> IResult<&[u8], HashMap>> { + let (s, (_, map, _)) = tuple(( + char('{'), + nom::multi::fold_many1( + tag_t, + HashMap::new(), + |mut acc: HashMap<_, _>, Tag { tag, val }| { + // ignore duplicated tag names that appear later + if !acc.contains_key(&tag) { + acc.insert(tag, val); + } + acc } - acc - } - ), - char('}') - ))(s)?; - Ok((s, map)) -} + ), + char('}') + ))(s)?; + Ok((s, map)) + } -fn t_t(s: &[u8]) -> IResult<&[u8], T> { - alt(( - text, - map(unit_t, |_| T::Unit), - map(tag_t, |t| T::Sum(t)), - map(list_t, |l| T::List(Box::new(l))), - map(record_t, |p| T::Record(p)), + fn t_t(s: &[u8]) -> IResult<&[u8], T> { + alt(( + text, + map(unit_t, |_| T::Unit), + map(tag_t, |t| T::Sum(t)), + map(list_t, |l| T::List(Box::new(l))), + map(record_t, |p| T::Record(p)), - // 8, 64 and 128 bit - map(uint_t("n3"), |u| T::N3(u)), - map(uint_t("n6"), |u| T::N6(u)), - map(uint_t("n7"), |u| T::N7(u)), - map(int_t("i3"), |u| T::I3(u)), - map(int_t("i6"), |u| T::I6(u)), - map(int_t("i7"), |u| T::I7(u)), + // 8, 64 and 128 bit + map(uint_t("n3"), |u| T::N3(u)), + map(uint_t("n6"), |u| T::N6(u)), + map(uint_t("n7"), |u| T::N7(u)), + map(int_t("i3"), |u| T::I3(u)), + map(int_t("i6"), |u| T::I6(u)), + map(int_t("i7"), |u| T::I7(u)), - // less common - map(uint_t("n1"), |u| T::N3(u)), - map(uint_t("n2"), |u| T::N3(u)), - map(uint_t("n4"), |u| T::N6(u)), - map(uint_t("n5"), |u| T::N6(u)), - map(int_t("i1"), |u| T::I3(u)), - map(int_t("i2"), |u| T::I3(u)), - map(int_t("i4"), |u| T::I6(u)), - map(int_t("i5"), |u| T::I6(u)), - // TODO: 8, 9 not supported - ))(s) -} + // less common + map(uint_t("n1"), |u| T::N3(u)), + map(uint_t("n2"), |u| T::N3(u)), + map(uint_t("n4"), |u| T::N6(u)), + map(uint_t("n5"), |u| T::N6(u)), + map(int_t("i1"), |u| T::I3(u)), + map(int_t("i2"), |u| T::I3(u)), + map(int_t("i4"), |u| T::I6(u)), + map(int_t("i5"), |u| T::I6(u)), + // TODO: 8, 9 not supported + ))(s) + } -#[cfg(test)] -mod tests { - use super::*; + #[cfg(test)] + mod tests { + use super::*; - #[test] - fn test_parse_unit_t() { - assert_eq!( - unit_t("u,".as_bytes()), - Ok(("".as_bytes(), ())) - ); - } + #[test] + fn test_parse_unit_t() { + assert_eq!( + unit_t("u,".as_bytes()), + Ok(("".as_bytes(), ())) + ); + } - #[test] - fn test_parse_usize_t() { - assert_eq!( - usize_t("32foo".as_bytes()), - Ok(("foo".as_bytes(), 32)) - ); - } + #[test] + fn test_parse_usize_t() { + assert_eq!( + usize_t("32foo".as_bytes()), + Ok(("foo".as_bytes(), 32)) + ); + } - #[test] - fn test_parse_int_t() { - assert_eq!( - uint_t::("n3")("n3:42,abc".as_bytes()), - Ok(("abc".as_bytes(), 42)) - ); - assert_eq!( - uint_t::("n3")("n3:1024,abc".as_bytes()), - Err(nom::Err::Error(("1024,abc".as_bytes(), nom::error::ErrorKind::MapRes))) - ); - assert_eq!( - int_t::("i6")("i6:-23,abc".as_bytes()), - Ok(("abc".as_bytes(), -23)) - ); - assert_eq!( - int_t::("i3")("i3:0,:abc".as_bytes()), - Ok((":abc".as_bytes(), 0)) - ); - assert_eq!( - uint_t::("n7")("n7:09,".as_bytes()), - Ok(("".as_bytes(), 9)) - ); - // assert_eq!( - // length("c"), - // Err(nom::Err::Error(("c", nom::error::ErrorKind::Digit))) - // ); - // assert_eq!( - // length(":"), - // Err(nom::Err::Error((":", nom::error::ErrorKind::Digit))) - // ); - } + #[test] + fn test_parse_int_t() { + assert_eq!( + uint_t::("n3")("n3:42,abc".as_bytes()), + Ok(("abc".as_bytes(), 42)) + ); + assert_eq!( + uint_t::("n3")("n3:1024,abc".as_bytes()), + Err(nom::Err::Error(("1024,abc".as_bytes(), nom::error::ErrorKind::MapRes))) + ); + assert_eq!( + int_t::("i6")("i6:-23,abc".as_bytes()), + Ok(("abc".as_bytes(), -23)) + ); + assert_eq!( + int_t::("i3")("i3:0,:abc".as_bytes()), + Ok((":abc".as_bytes(), 0)) + ); + assert_eq!( + uint_t::("n7")("n7:09,".as_bytes()), + Ok(("".as_bytes(), 9)) + ); + // assert_eq!( + // length("c"), + // Err(nom::Err::Error(("c", nom::error::ErrorKind::Digit))) + // ); + // assert_eq!( + // length(":"), + // Err(nom::Err::Error((":", nom::error::ErrorKind::Digit))) + // ); + } - #[test] - fn test_parse_text() { - assert_eq!( - text("t5:hello,".as_bytes()), - Ok(("".as_bytes(), T::Text("hello".to_owned()))) - ); - assert_eq!( - text("t4:fo,".as_bytes()), - // TODO: way better parse error messages - Err(nom::Err::Error(("fo,".as_bytes(), nom::error::ErrorKind::Eof))) - ); - assert_eq!( - text("t9:今日は,".as_bytes()), - Ok(("".as_bytes(), T::Text("今日は".to_owned()))) - ); - } + #[test] + fn test_parse_text() { + assert_eq!( + text("t5:hello,".as_bytes()), + Ok(("".as_bytes(), T::Text("hello".to_owned()))) + ); + assert_eq!( + text("t4:fo,".as_bytes()), + // TODO: way better parse error messages + Err(nom::Err::Error(("fo,".as_bytes(), nom::error::ErrorKind::Eof))) + ); + assert_eq!( + text("t9:今日は,".as_bytes()), + Ok(("".as_bytes(), T::Text("今日は".to_owned()))) + ); + } - #[test] - fn test_list() { - assert_eq!( - list_t("[]".as_bytes()), - Ok(("".as_bytes(), vec![])) - ); - assert_eq!( - list_t("[u,u,u,]".as_bytes()), - Ok(("".as_bytes(), vec![ - T::Unit, - T::Unit, - T::Unit, - ])) - ); - assert_eq!( - list_t("[u,[t3:foo,]u,]".as_bytes()), - Ok(("".as_bytes(), vec![ - T::Unit, - T::List(Box::new(vec![T::Text("foo".to_owned())])), - T::Unit, - ])) - ); - } + #[test] + fn test_list() { + assert_eq!( + list_t("[]".as_bytes()), + Ok(("".as_bytes(), vec![])) + ); + assert_eq!( + list_t("[u,u,u,]".as_bytes()), + Ok(("".as_bytes(), vec![ + T::Unit, + T::Unit, + T::Unit, + ])) + ); + assert_eq!( + list_t("[u,[t3:foo,]u,]".as_bytes()), + Ok(("".as_bytes(), vec![ + T::Unit, + T::List(Box::new(vec![T::Text("foo".to_owned())])), + T::Unit, + ])) + ); + } - #[test] - fn test_record() { - assert_eq!( - record_t("{<1:a|u,<1:b|u,<1:c|u,}".as_bytes()), - Ok(("".as_bytes(), vec![ - ("a".to_owned(), Box::new(T::Unit)), - ("b".to_owned(), Box::new(T::Unit)), - ("c".to_owned(), Box::new(T::Unit)), - ].into_iter().collect::>>())) - ); - // duplicated keys are ignored (first is taken) - assert_eq!( - record_t("{<1:a|u,<1:b|u,<1:a|i1:-1,}".as_bytes()), - Ok(("".as_bytes(), vec![ - ("a".to_owned(), Box::new(T::Unit)), - ("b".to_owned(), Box::new(T::Unit)), - ].into_iter().collect::>())) - ); - } + #[test] + fn test_record() { + assert_eq!( + record_t("{<1:a|u,<1:b|u,<1:c|u,}".as_bytes()), + Ok(("".as_bytes(), vec![ + ("a".to_owned(), Box::new(T::Unit)), + ("b".to_owned(), Box::new(T::Unit)), + ("c".to_owned(), Box::new(T::Unit)), + ].into_iter().collect::>>())) + ); + // duplicated keys are ignored (first is taken) + assert_eq!( + record_t("{<1:a|u,<1:b|u,<1:a|i1:-1,}".as_bytes()), + Ok(("".as_bytes(), vec![ + ("a".to_owned(), Box::new(T::Unit)), + ("b".to_owned(), Box::new(T::Unit)), + ].into_iter().collect::>())) + ); + } - #[test] - fn test_parse() { - assert_eq!( - t_t("n3:255,".as_bytes()), - Ok(("".as_bytes(), T::N3(255))) - ); - assert_eq!( - t_t("t6:halloo,".as_bytes()), - Ok(("".as_bytes(), T::Text("halloo".to_owned()))) - ); - assert_eq!( - t_t("<3:foo|t6:halloo,".as_bytes()), - Ok(("".as_bytes(), T::Sum (Tag { - tag: "foo".to_owned(), - val: Box::new(T::Text("halloo".to_owned())) - }))) - ); - // { a: Unit - // , foo: List } - assert_eq!( - t_t("{<1:a|u,<3:foo|[<1:A|u,<1:A|u,<1:B|[i3:127,]]}".as_bytes()), - Ok(("".as_bytes(), T::Record(vec![ - ("a".to_owned(), Box::new(T::Unit)), - ("foo".to_owned(), Box::new(T::List(Box::new(vec![ - T::Sum(Tag { tag: "A".to_owned(), val: Box::new(T::Unit) }), - T::Sum(Tag { tag: "A".to_owned(), val: Box::new(T::Unit) }), - T::Sum(Tag { tag: "B".to_owned(), val: Box::new(T::List(Box::new(vec![T::I3(127)]))) }), - ])))) - ].into_iter().collect::>>()))) - ); - } + #[test] + fn test_parse() { + assert_eq!( + t_t("n3:255,".as_bytes()), + Ok(("".as_bytes(), T::N3(255))) + ); + assert_eq!( + t_t("t6:halloo,".as_bytes()), + Ok(("".as_bytes(), T::Text("halloo".to_owned()))) + ); + assert_eq!( + t_t("<3:foo|t6:halloo,".as_bytes()), + Ok(("".as_bytes(), T::Sum (Tag { + tag: "foo".to_owned(), + val: Box::new(T::Text("halloo".to_owned())) + }))) + ); + // { a: Unit + // , foo: List } + assert_eq!( + t_t("{<1:a|u,<3:foo|[<1:A|u,<1:A|u,<1:B|[i3:127,]]}".as_bytes()), + Ok(("".as_bytes(), T::Record(vec![ + ("a".to_owned(), Box::new(T::Unit)), + ("foo".to_owned(), Box::new(T::List(Box::new(vec![ + T::Sum(Tag { tag: "A".to_owned(), val: Box::new(T::Unit) }), + T::Sum(Tag { tag: "A".to_owned(), val: Box::new(T::Unit) }), + T::Sum(Tag { tag: "B".to_owned(), val: Box::new(T::List(Box::new(vec![T::I3(127)]))) }), + ])))) + ].into_iter().collect::>>()))) + ); + } + } } diff --git a/pkgs/profpatsch/encode/spec.md b/pkgs/profpatsch/encode/spec.md index d3624c84..6588c654 100644 --- a/pkgs/profpatsch/encode/spec.md +++ b/pkgs/profpatsch/encode/spec.md @@ -23,7 +23,7 @@ Bit sizes are specified in 2^n increments, 1 to 9 (`n1`..`n9`, `i1`..`n9`). * Natural `1234` that fits in 32 bits (2^5): `n5:1234,` * Integer `-42` that fits in 8 bits (2^3): `i3:-42,` -* Integer `23` that fits in 64 bits (2^6): `i64:23,` +* Integer `23` that fits in 64 bits (2^6): `i6:23,` * Integer `-1` that fits in 512 bits (2^9): `i9:-1,` * Natural `0` that fits in 1 bit (2^1): `n1:0,` -- cgit 1.4.1