From b21008f5d7b62fabc3d6fddec29f39dd80a89d60 Mon Sep 17 00:00:00 2001 From: Profpatsch Date: Fri, 26 Jun 2020 23:29:59 +0200 Subject: pkgs/profpatsch/netencode: add binary type --- pkgs/profpatsch/netencode/netencode.rs | 32 ++++++++++++++++++++++++++++++++ pkgs/profpatsch/netencode/spec.md | 18 +++++++++++++++--- 2 files changed, 47 insertions(+), 3 deletions(-) (limited to 'pkgs/profpatsch/netencode') diff --git a/pkgs/profpatsch/netencode/netencode.rs b/pkgs/profpatsch/netencode/netencode.rs index a3744946..0f7e1aa2 100644 --- a/pkgs/profpatsch/netencode/netencode.rs +++ b/pkgs/profpatsch/netencode/netencode.rs @@ -20,6 +20,7 @@ pub enum T { // Text // TODO: make into &str Text(String), + Binary(Vec), // Tags // TODO: make into &str Sum(Tag>), @@ -43,6 +44,7 @@ pub enum U<'a> { I7(i128), // Text Text(&'a [u8]), + Binary(&'a [u8]), // Tags Sum(Tag<&'a str, Box>>), Record(HashMap<&'a str, Box>>), @@ -83,6 +85,11 @@ pub fn encode(w: &mut W, t: T) -> std::io::Result<()> { T::I6(i) => write!(w, "i6:{},", i), T::I7(i) => write!(w, "i7:{},", i), T::Text(s) => write!(w, "t{}:{},", s.len(), s), + T::Binary(s) => { + write!(w, "b{}:", s.len()); + w.write(&s); + write!(w, ",") + }, T::Sum(Tag{tag, val}) => encode_tag(w, tag, *val), T::Record(m) => { let mut c = std::io::Cursor::new(vec![]); @@ -248,6 +255,14 @@ pub mod parse { sized('t', ',') } + fn binary<'a>() -> impl Fn(&'a [u8]) -> IResult<&'a [u8], T> { + map(binary_g(), |b| T::Binary(b.to_owned())) + } + + fn binary_g() -> impl Fn(&[u8]) -> IResult<&[u8], &[u8]> { + sized('b', ',') + } + fn list_t(s: &[u8]) -> IResult<&[u8], Vec> { map_parser(list_g(), nom::multi::many0(t_t))(s) } @@ -443,6 +458,23 @@ pub mod parse { ); } + #[test] + fn test_parse_binary() { + assert_eq!( + binary()("b5:hello,".as_bytes()), + Ok(("".as_bytes(), T::Binary(Vec::from("hello".to_owned())))) + ); + assert_eq!( + binary()("b4:fo,".as_bytes()), + // TODO: way better parse error messages + Err(nom::Err::Error(("fo,".as_bytes(), nom::error::ErrorKind::Eof))) + ); + assert_eq!( + binary()("b9:今日は,".as_bytes()), + Ok(("".as_bytes(), T::Binary(Vec::from("今日は".as_bytes())))) + ); + } + #[test] fn test_list() { assert_eq!( diff --git a/pkgs/profpatsch/netencode/spec.md b/pkgs/profpatsch/netencode/spec.md index e680034f..d1cc15c1 100644 --- a/pkgs/profpatsch/netencode/spec.md +++ b/pkgs/profpatsch/netencode/spec.md @@ -1,4 +1,4 @@ -# encode 0.1-unreleased +# netencode 0.1-unreleased [bencode][] and [netstring][]-inspired pipe format that should be trivial to parse (100 lines of code or less), mostly human-decipherable for easy debugging, and support nested record and sum types. @@ -42,7 +42,6 @@ TODO: should we add `f,` and `t,`? ### text - Text (`t`) that *must* be encoded as UTF-8, starting with its length in bytes: * The string `hello world` (11 bytes): `t11:hello world,` @@ -50,7 +49,20 @@ Text (`t`) that *must* be encoded as UTF-8, starting with its length in bytes: * The string `:,` (2 bytes): `t2::,,` * The empty sting `` (0 bytes): `t0:,` -TODO: add `b` for binary content. Even filesystem paths are not utf-8 encodable sometimes, yet the distinction of text with an encoding is useful, so we should keep `t` as is. +### binary +:LOGBOOK: +CLOCK: [2020-06-26 Fr 23:21] +:END: + +Arbitrary binary strings (`b`) that can contain any data, starting with its length in bytes. + +* The ASCII string `hello world` as binary data (11 bytes): `b11:hello world,` +* The empty binary string (0 bytes): `b0:,` +* The bytestring with `^D` (1 byte): `b1:,` + +Since the binary strings are length-prefixd, they can contain `\0` and no escaping is required. Care has to be taken in languages with `\0`-terminated bytestrings. + +Use text (`t`) if you have utf-8 encoded data. ## tagged values -- cgit 1.4.1