pkgs/profpatsch/execline: change el_semicolon to only read one arg

The C implementation of el_semicolon in execline only reads one argument at a time and returns an index into the rest of argv. This makes sense for the usual application of programs using it, which is just reading a few arguments and a block or two, and then executing into `prog`. `prog` could be anything really, including additional blocks. The new `el_semicolon_full_argv` function exports the previous behaviour of parsing the whole thing. As a nice side-effect, we return the rest of argv in-place.
author: Profpatsch <mail@profpatsch.de> 2020-06-29 00:39:57 +0200
committer: Profpatsch <mail@profpatsch.de> 2020-06-29 00:39:57 +0200
commit: 54e19e0aeac16abd02eac2623e1eeefbb05ca2a3 (patch)
tree: 5610fd3bef7b8d56ce07e1eca4c04fc9eea6770b /pkgs/profpatsch/execline
parent: 3f79ddf06375c3b34569c8b9ce91a607bbd0f052 (diff)
1 files changed, 80 insertions, 37 deletions
diff --git a/pkgs/profpatsch/execline/el_semicolon.rs b/pkgs/profpatsch/execline/el_semicolon.rs
index 46bf0aed..1aba214e 100644
--- a/pkgs/profpatsch/execline/el_semicolon.rs
+++ b/pkgs/profpatsch/execline/el_semicolon.rs
@@ -18,59 +18,102 @@ pub enum Arg<'a> {
     /// An empty block was just an empty string on its own.
     /// You will have to decide whether you want to treat
     /// it as a block or an empty string.
-    Block(Vec<&'a [u8]>)
+    Block(Vec<&'a [u8]>),
+    /// The given argv list is empty
+    EndOfArgv
 }
 
 #[derive(Debug, PartialEq, Eq)]
-pub enum Error {
+pub enum Error<'a> {
     /// The argument was not quoted, at index.
-    UnquotedArgument(usize),
+    UnquotedArgument(&'a [u8]),
     /// The last block was not terminated
-    UnterminatedBlock
+    UnterminatedBlock,
 }
 
-/// Parse a command line into a list of `Arg`s.
+enum ArgKind<'a> {
+    // Reference to the argument without the block prefix
+    BlockArg(&'a [u8]),
+    // Matches BLOCK_END
+    BlockEnd(&'a [u8]),
+    // Same argument
+    JustArg(&'a [u8])
+}
+
+
+/// Finds out whether an argument belongs to a block
+/// or is just a normal argument.
+fn what<'a>(arg: &'a [u8]) -> ArgKind<'a> {
+    let arg = arg.as_ref();
+    if arg == BLOCK_END {
+        ArgKind::BlockEnd(arg)
+    } else {
+        match arg[0] {
+            BLOCK_QUOTE_CHAR => ArgKind::BlockArg(&arg[1..]),
+            _ => ArgKind::JustArg(arg)
+        }
+    }
+}
+
+/// Fetch one Arg from the given argv,
+/// which is either a full block or one plain argument.
+///
+/// Returns the Arg and the unparsed rest of argv.
 ///
 /// Blocks can be nested by adding more spaces,
 /// but `el_semicolon` will only parse one level.
 /// Usually that is intended, because nested blocks
 /// are intended to be parsed by nested programs.
-pub fn el_semicolon<'a, S: AsRef<[u8]>>(args: &'a [S]) -> Result<Vec<Arg<'a>>, Error> {
+pub fn el_semicolon<'a>(args: &'a [&'a [u8]]) -> Result<(Arg<'a>, &'a [&'a [u8]]), Error<'a>> {
+    let args = args.as_ref();
     let mut cur_block : Option<Vec<&'a [u8]>> = None;
     let mut res : Vec<Arg<'a>> = vec![];
-    for (i, arg) in args.iter().enumerate() {
-        let arg = arg.as_ref();
-        if arg == BLOCK_END {
-            let bl = cur_block.take();
-            match bl {
-                None => res.push(Arg::Block(vec![])),
-                Some(bl) => res.push(Arg::Block(bl))
-            }
-        } else {
-            match arg[0] {
-                BLOCK_QUOTE_CHAR => {
-                    let new = &arg[1..];
-                    cur_block = Some(cur_block.map_or_else(
-                        || vec![new],
-                        |mut bl| { bl.push(new); bl }
-                    ))
-                },
-                _ => {
-                    if cur_block != None {
-                        return Err(Error::UnquotedArgument(i));
+    match args.first() {
+        None => Ok((Arg::EndOfArgv, args)),
+        Some(arg) => match what(arg) {
+            ArgKind::BlockEnd(arg) => Ok((Arg::Arg(arg), &args[1..])),
+            ArgKind::JustArg(arg) => Ok((Arg::Arg(arg), &args[1..])),
+            ArgKind::BlockArg(arg) => {
+                // if it’s a block, we have to repeatedly
+                // fetch more args
+                let mut block: Vec<&'a [u8]> = vec![arg];
+                // we already looked at the 0th element
+                let mut args = &args[1..];
+                loop {
+                    match args.first() {
+                        None => break Err(Error::UnterminatedBlock),
+                        Some(arg) => match what(arg) {
+                            ArgKind::BlockEnd(_) => break Ok((Arg::Block(block), &args[1..])),
+                            ArgKind::JustArg(arg) => break Err(Error::UnquotedArgument(arg)),
+                            ArgKind::BlockArg(arg) => block.push(arg),
+                        }
                     }
-                    res.push(Arg::Arg(arg))
+                    args = &args[1..];
                 }
             }
         }
     }
-    if cur_block != None {
-        Err(Error::UnterminatedBlock)
-    } else {
-        Ok(res)
+}
+
+pub fn el_semicolon_full_argv<'a>(args: &'a [&'a [u8]]) -> Result<Vec<Arg<'a>>, Error<'a>> {
+    let mut res = vec![];
+    let mut args = args.as_ref();
+    loop {
+        let (arg, rest) = match el_semicolon(args) {
+            Ok((res, rest)) => (res, rest),
+            Err(err) => break Err(err)
+        };
+        match arg {
+            Arg::Arg(_) => res.push(arg),
+            Arg::Block(_) => res.push(arg),
+            Arg::EndOfArgv => break Ok(res),
+        }
+        args = &rest;
     }
 }
 
+
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -78,7 +121,7 @@ mod tests {
     #[test]
     fn success() {
         assert_eq!(
-            el_semicolon(&vec![
+            el_semicolon_full_argv(&vec![
                 "-b".as_bytes(),
                 " echo".as_bytes(),
                 " hi".as_bytes(),
@@ -101,7 +144,7 @@ mod tests {
     #[test]
     fn unquoted_argument() {
         assert_eq!(
-            el_semicolon(&vec![
+            el_semicolon_full_argv(&vec![
                 "-b".as_bytes(),
                 " echo".as_bytes(),
                 "hi".as_bytes(),
@@ -109,24 +152,24 @@ mod tests {
                 "test".as_bytes(),
                 "".as_bytes(),
             ]),
-            Err(Error::UnquotedArgument(2))
+            Err(Error::UnquotedArgument("hi".as_bytes()))
         );
         assert_eq!(
-            el_semicolon(&vec![
+            el_semicolon_full_argv(&vec![
                 " -b".as_bytes(),
                 " echo".as_bytes(),
                 "".as_bytes(),
                 " test".as_bytes(),
                 "a".as_bytes(),
             ]),
-            Err(Error::UnquotedArgument(4))
+            Err(Error::UnquotedArgument("a".as_bytes()))
         )
     }
 
     #[test]
     fn unterminated_block() {
         assert_eq!(
-            el_semicolon(&vec![
+            el_semicolon_full_argv(&vec![
                 "-b".as_bytes(),
                 " echo".as_bytes(),
             ]),
author	Profpatsch <mail@profpatsch.de>	2020-06-29 00:39:57 +0200
committer	Profpatsch <mail@profpatsch.de>	2020-06-29 00:39:57 +0200
commit	54e19e0aeac16abd02eac2623e1eeefbb05ca2a3 (patch)
tree	5610fd3bef7b8d56ce07e1eca4c04fc9eea6770b /pkgs/profpatsch/execline
parent	3f79ddf06375c3b34569c8b9ce91a607bbd0f052 (diff)