From 54e19e0aeac16abd02eac2623e1eeefbb05ca2a3 Mon Sep 17 00:00:00 2001 From: Profpatsch Date: Mon, 29 Jun 2020 00:39:57 +0200 Subject: pkgs/profpatsch/execline: change el_semicolon to only read one arg The C implementation of el_semicolon in execline only reads one argument at a time and returns an index into the rest of argv. This makes sense for the usual application of programs using it, which is just reading a few arguments and a block or two, and then executing into `prog`. `prog` could be anything really, including additional blocks. The new `el_semicolon_full_argv` function exports the previous behaviour of parsing the whole thing. As a nice side-effect, we return the rest of argv in-place. --- pkgs/profpatsch/execline/el_semicolon.rs | 117 +++++++++++++++++++++---------- 1 file changed, 80 insertions(+), 37 deletions(-) (limited to 'pkgs/profpatsch/execline') diff --git a/pkgs/profpatsch/execline/el_semicolon.rs b/pkgs/profpatsch/execline/el_semicolon.rs index 46bf0aed..1aba214e 100644 --- a/pkgs/profpatsch/execline/el_semicolon.rs +++ b/pkgs/profpatsch/execline/el_semicolon.rs @@ -18,59 +18,102 @@ pub enum Arg<'a> { /// An empty block was just an empty string on its own. /// You will have to decide whether you want to treat /// it as a block or an empty string. - Block(Vec<&'a [u8]>) + Block(Vec<&'a [u8]>), + /// The given argv list is empty + EndOfArgv } #[derive(Debug, PartialEq, Eq)] -pub enum Error { +pub enum Error<'a> { /// The argument was not quoted, at index. - UnquotedArgument(usize), + UnquotedArgument(&'a [u8]), /// The last block was not terminated - UnterminatedBlock + UnterminatedBlock, } -/// Parse a command line into a list of `Arg`s. +enum ArgKind<'a> { + // Reference to the argument without the block prefix + BlockArg(&'a [u8]), + // Matches BLOCK_END + BlockEnd(&'a [u8]), + // Same argument + JustArg(&'a [u8]) +} + + +/// Finds out whether an argument belongs to a block +/// or is just a normal argument. +fn what<'a>(arg: &'a [u8]) -> ArgKind<'a> { + let arg = arg.as_ref(); + if arg == BLOCK_END { + ArgKind::BlockEnd(arg) + } else { + match arg[0] { + BLOCK_QUOTE_CHAR => ArgKind::BlockArg(&arg[1..]), + _ => ArgKind::JustArg(arg) + } + } +} + +/// Fetch one Arg from the given argv, +/// which is either a full block or one plain argument. +/// +/// Returns the Arg and the unparsed rest of argv. /// /// Blocks can be nested by adding more spaces, /// but `el_semicolon` will only parse one level. /// Usually that is intended, because nested blocks /// are intended to be parsed by nested programs. -pub fn el_semicolon<'a, S: AsRef<[u8]>>(args: &'a [S]) -> Result>, Error> { +pub fn el_semicolon<'a>(args: &'a [&'a [u8]]) -> Result<(Arg<'a>, &'a [&'a [u8]]), Error<'a>> { + let args = args.as_ref(); let mut cur_block : Option> = None; let mut res : Vec> = vec![]; - for (i, arg) in args.iter().enumerate() { - let arg = arg.as_ref(); - if arg == BLOCK_END { - let bl = cur_block.take(); - match bl { - None => res.push(Arg::Block(vec![])), - Some(bl) => res.push(Arg::Block(bl)) - } - } else { - match arg[0] { - BLOCK_QUOTE_CHAR => { - let new = &arg[1..]; - cur_block = Some(cur_block.map_or_else( - || vec![new], - |mut bl| { bl.push(new); bl } - )) - }, - _ => { - if cur_block != None { - return Err(Error::UnquotedArgument(i)); + match args.first() { + None => Ok((Arg::EndOfArgv, args)), + Some(arg) => match what(arg) { + ArgKind::BlockEnd(arg) => Ok((Arg::Arg(arg), &args[1..])), + ArgKind::JustArg(arg) => Ok((Arg::Arg(arg), &args[1..])), + ArgKind::BlockArg(arg) => { + // if it’s a block, we have to repeatedly + // fetch more args + let mut block: Vec<&'a [u8]> = vec![arg]; + // we already looked at the 0th element + let mut args = &args[1..]; + loop { + match args.first() { + None => break Err(Error::UnterminatedBlock), + Some(arg) => match what(arg) { + ArgKind::BlockEnd(_) => break Ok((Arg::Block(block), &args[1..])), + ArgKind::JustArg(arg) => break Err(Error::UnquotedArgument(arg)), + ArgKind::BlockArg(arg) => block.push(arg), + } } - res.push(Arg::Arg(arg)) + args = &args[1..]; } } } } - if cur_block != None { - Err(Error::UnterminatedBlock) - } else { - Ok(res) +} + +pub fn el_semicolon_full_argv<'a>(args: &'a [&'a [u8]]) -> Result>, Error<'a>> { + let mut res = vec![]; + let mut args = args.as_ref(); + loop { + let (arg, rest) = match el_semicolon(args) { + Ok((res, rest)) => (res, rest), + Err(err) => break Err(err) + }; + match arg { + Arg::Arg(_) => res.push(arg), + Arg::Block(_) => res.push(arg), + Arg::EndOfArgv => break Ok(res), + } + args = &rest; } } + + #[cfg(test)] mod tests { use super::*; @@ -78,7 +121,7 @@ mod tests { #[test] fn success() { assert_eq!( - el_semicolon(&vec![ + el_semicolon_full_argv(&vec![ "-b".as_bytes(), " echo".as_bytes(), " hi".as_bytes(), @@ -101,7 +144,7 @@ mod tests { #[test] fn unquoted_argument() { assert_eq!( - el_semicolon(&vec![ + el_semicolon_full_argv(&vec![ "-b".as_bytes(), " echo".as_bytes(), "hi".as_bytes(), @@ -109,24 +152,24 @@ mod tests { "test".as_bytes(), "".as_bytes(), ]), - Err(Error::UnquotedArgument(2)) + Err(Error::UnquotedArgument("hi".as_bytes())) ); assert_eq!( - el_semicolon(&vec![ + el_semicolon_full_argv(&vec![ " -b".as_bytes(), " echo".as_bytes(), "".as_bytes(), " test".as_bytes(), "a".as_bytes(), ]), - Err(Error::UnquotedArgument(4)) + Err(Error::UnquotedArgument("a".as_bytes())) ) } #[test] fn unterminated_block() { assert_eq!( - el_semicolon(&vec![ + el_semicolon_full_argv(&vec![ "-b".as_bytes(), " echo".as_bytes(), ]), -- cgit 1.4.1