diff options
Diffstat (limited to 'vendor/shell-words/src/lib.rs')
-rw-r--r-- | vendor/shell-words/src/lib.rs | 507 |
1 files changed, 507 insertions, 0 deletions
diff --git a/vendor/shell-words/src/lib.rs b/vendor/shell-words/src/lib.rs new file mode 100644 index 0000000..b93001f --- /dev/null +++ b/vendor/shell-words/src/lib.rs @@ -0,0 +1,507 @@ +// Copyright 2018 Tomasz Miąsko +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE> +// or the MIT license <LICENSE-MIT>, at your option. +// +//! Process command line according to parsing rules of Unix shell as specified +//! in [Shell Command Language in POSIX.1-2008][posix-shell]. +//! +//! [posix-shell]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html + +#![cfg_attr(not(feature = "std"), no_std)] +#![forbid(unsafe_code)] + +#[cfg(feature = "std")] +extern crate core; + +use core::fmt; +use core::mem; + +#[cfg(not(feature = "std"))] +#[macro_use] +extern crate alloc; + +#[cfg(not(feature = "std"))] +use alloc::string::String; +#[cfg(not(feature = "std"))] +use alloc::vec::Vec; + +#[cfg(not(feature = "std"))] +use alloc::borrow::Cow; +#[cfg(feature = "std")] +use std::borrow::Cow; + +/// An error returned when shell parsing fails. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct ParseError; + +impl fmt::Display for ParseError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str("missing closing quote") + } +} + +#[cfg(feature = "std")] +impl std::error::Error for ParseError {} + +enum State { + /// Within a delimiter. + Delimiter, + /// After backslash, but before starting word. + Backslash, + /// Within an unquoted word. + Unquoted, + /// After backslash in an unquoted word. + UnquotedBackslash, + /// Within a single quoted word. + SingleQuoted, + /// Within a double quoted word. + DoubleQuoted, + /// After backslash inside a double quoted word. + DoubleQuotedBackslash, + /// Inside a comment. + Comment, +} + +/// Splits command line into separate arguments, in much the same way Unix shell +/// would, but without many of expansion the shell would perform. +/// +/// The split functionality is compatible with behaviour of Unix shell, but with +/// word expansions limited to quote removal, and without special token +/// recognition rules for operators. +/// +/// The result is exactly the same as one obtained from Unix shell as long as +/// those unsupported features are not present in input: no operators, no +/// variable assignments, no tilde expansion, no parameter expansion, no command +/// substitution, no arithmetic expansion, no pathname expansion. +/// +/// In case those unsupported shell features are present, the syntax that +/// introduce them is interpreted literally. +/// +/// # Errors +/// +/// When input contains unmatched quote, an error is returned. +/// +/// # Compatibility with other implementations +/// +/// It should be fully compatible with g_shell_parse_argv from GLib, except that +/// in GLib it is an error not to have any words after tokenization. +/// +/// It is also very close to shlex.split available in Python standard library, +/// when used in POSIX mode with support for comments. Though, shlex +/// implementation diverges from POSIX, and from implementation contained herein +/// in three aspects. First, it doesn't support line continuations. +/// Second, inside double quotes, the backslash characters retains its special +/// meaning as an escape character only when followed by \\ or \", whereas POSIX +/// specifies that it should retain its special meaning when followed by: $, \`, +/// \", \\, or a newline. Third, it treats carriage return as one of delimiters. +/// +/// # Examples +/// +/// Building an executable using compiler obtained from CC environment variable +/// and compiler flags from both CFLAGS and CPPFLAGS. Similar to default build +/// rule for C used in GNU Make: +/// +/// ```rust,no_run +/// use std::env::var; +/// use std::process::Command; +/// +/// let cc = var("CC").unwrap_or_else(|_| "cc".to_owned()); +/// +/// let cflags_str = var("CFLAGS").unwrap_or_else(|_| String::new()); +/// let cflags = shell_words::split(&cflags_str).expect("failed to parse CFLAGS"); +/// +/// let cppflags_str = var("CPPFLAGS").unwrap_or_else(|_| String::new()); +/// let cppflags = shell_words::split(&cppflags_str).expect("failed to parse CPPFLAGS"); +/// +/// Command::new(cc) +/// .args(cflags) +/// .args(cppflags) +/// .args(&["-c", "a.c", "-o", "a.out"]) +/// .spawn() +/// .expect("failed to start subprocess") +/// .wait() +/// .expect("failed to wait for subprocess"); +/// ``` +pub fn split(s: &str) -> Result<Vec<String>, ParseError> { + use State::*; + + let mut words = Vec::new(); + let mut word = String::new(); + let mut chars = s.chars(); + let mut state = Delimiter; + + loop { + let c = chars.next(); + state = match state { + Delimiter => match c { + None => break, + Some('\'') => SingleQuoted, + Some('\"') => DoubleQuoted, + Some('\\') => Backslash, + Some('\t') | Some(' ') | Some('\n') => Delimiter, + Some('#') => Comment, + Some(c) => { + word.push(c); + Unquoted + } + }, + Backslash => match c { + None => { + word.push('\\'); + words.push(mem::replace(&mut word, String::new())); + break; + } + Some('\n') => Delimiter, + Some(c) => { + word.push(c); + Unquoted + } + }, + Unquoted => match c { + None => { + words.push(mem::replace(&mut word, String::new())); + break; + } + Some('\'') => SingleQuoted, + Some('\"') => DoubleQuoted, + Some('\\') => UnquotedBackslash, + Some('\t') | Some(' ') | Some('\n') => { + words.push(mem::replace(&mut word, String::new())); + Delimiter + } + Some(c) => { + word.push(c); + Unquoted + } + }, + UnquotedBackslash => match c { + None => { + word.push('\\'); + words.push(mem::replace(&mut word, String::new())); + break; + } + Some('\n') => Unquoted, + Some(c) => { + word.push(c); + Unquoted + } + }, + SingleQuoted => match c { + None => return Err(ParseError), + Some('\'') => Unquoted, + Some(c) => { + word.push(c); + SingleQuoted + } + }, + DoubleQuoted => match c { + None => return Err(ParseError), + Some('\"') => Unquoted, + Some('\\') => DoubleQuotedBackslash, + Some(c) => { + word.push(c); + DoubleQuoted + } + }, + DoubleQuotedBackslash => match c { + None => return Err(ParseError), + Some('\n') => DoubleQuoted, + Some(c @ '$') | Some(c @ '`') | Some(c @ '"') | Some(c @ '\\') => { + word.push(c); + DoubleQuoted + } + Some(c) => { + word.push('\\'); + word.push(c); + DoubleQuoted + } + }, + Comment => match c { + None => break, + Some('\n') => Delimiter, + Some(_) => Comment, + }, + } + } + + Ok(words) +} + +enum EscapeStyle { + /// No escaping. + None, + /// Wrap in single quotes. + SingleQuoted, + /// Single quotes combined with backslash. + Mixed, +} + +/// Determines escaping style to use. +fn escape_style(s: &str) -> EscapeStyle { + if s.is_empty() { + return EscapeStyle::SingleQuoted; + } + + let mut special = false; + let mut newline = false; + let mut single_quote = false; + + for c in s.chars() { + match c { + '\n' => { + newline = true; + special = true; + } + '\'' => { + single_quote = true; + special = true; + } + '|' | '&' | ';' | '<' | '>' | '(' | ')' | '$' | '`' | '\\' | '"' | ' ' | '\t' | '*' + | '?' | '[' | '#' | '˜' | '=' | '%' => { + special = true; + } + _ => continue, + } + } + + if !special { + EscapeStyle::None + } else if newline && !single_quote { + EscapeStyle::SingleQuoted + } else { + EscapeStyle::Mixed + } +} + +/// Escapes special characters in a string, so that it will retain its literal +/// meaning when used as a part of command in Unix shell. +/// +/// It tries to avoid introducing any unnecessary quotes or escape characters, +/// but specifics regarding quoting style are left unspecified. +pub fn quote(s: &str) -> Cow<str> { + // We are going somewhat out of the way to provide + // minimal amount of quoting in typical cases. + match escape_style(s) { + EscapeStyle::None => s.into(), + EscapeStyle::SingleQuoted => format!("'{}'", s).into(), + EscapeStyle::Mixed => { + let mut quoted = String::new(); + quoted.push('\''); + for c in s.chars() { + if c == '\'' { + quoted.push_str("'\\''"); + } else { + quoted.push(c); + } + } + quoted.push('\''); + quoted.into() + } + } +} + +/// Joins arguments into a single command line suitable for execution in Unix +/// shell. +/// +/// Each argument is quoted using [`quote`] to preserve its literal meaning when +/// parsed by Unix shell. +/// +/// Note: This function is essentially an inverse of [`split`]. +/// +/// # Examples +/// +/// Logging executed commands in format that can be easily copied and pasted +/// into an actual shell: +/// +/// ```rust,no_run +/// fn execute(args: &[&str]) { +/// use std::process::Command; +/// println!("Executing: {}", shell_words::join(args)); +/// Command::new(&args[0]) +/// .args(&args[1..]) +/// .spawn() +/// .expect("failed to start subprocess") +/// .wait() +/// .expect("failed to wait for subprocess"); +/// } +/// +/// execute(&["python", "-c", "print('Hello world!')"]); +/// ``` +/// +/// [`quote`]: fn.quote.html +/// [`split`]: fn.split.html +pub fn join<I, S>(words: I) -> String +where + I: IntoIterator<Item = S>, + S: AsRef<str>, +{ + let mut line = words.into_iter().fold(String::new(), |mut line, word| { + let quoted = quote(word.as_ref()); + line.push_str(quoted.as_ref()); + line.push(' '); + line + }); + line.pop(); + line +} + +#[cfg(test)] +mod tests { + use super::*; + + fn split_ok(cases: &[(&str, &[&str])]) { + for &(input, expected) in cases { + match split(input) { + Err(actual) => { + panic!( + "After split({:?})\nexpected: Ok({:?})\n actual: Err({:?})\n", + input, expected, actual + ); + } + Ok(actual) => { + assert!( + expected == actual.as_slice(), + "After split({:?}).unwrap()\nexpected: {:?}\n actual: {:?}\n", + input, + expected, + actual + ); + } + } + } + } + + #[test] + fn split_empty() { + split_ok(&[("", &[])]); + } + + #[test] + fn split_initial_whitespace_is_removed() { + split_ok(&[ + (" a", &["a"]), + ("\t\t\t\tbar", &["bar"]), + ("\t \nc", &["c"]), + ]); + } + + #[test] + fn split_trailing_whitespace_is_removed() { + split_ok(&[ + ("a ", &["a"]), + ("b\t", &["b"]), + ("c\t \n \n \n", &["c"]), + ("d\n\n", &["d"]), + ]); + } + + #[test] + fn split_carriage_return_is_not_special() { + split_ok(&[("c\ra\r'\r'\r", &["c\ra\r\r\r"])]); + } + + #[test] + fn split_single_quotes() { + split_ok(&[ + (r#"''"#, &[r#""#]), + (r#"'a'"#, &[r#"a"#]), + (r#"'\'"#, &[r#"\"#]), + (r#"' \ '"#, &[r#" \ "#]), + (r#"'#'"#, &[r#"#"#]), + ]); + } + + #[test] + fn split_double_quotes() { + split_ok(&[ + (r#""""#, &[""]), + (r#""""""#, &[""]), + (r#""a b c' d""#, &["a b c' d"]), + (r#""\a""#, &["\\a"]), + (r#""$""#, &["$"]), + (r#""\$""#, &["$"]), + (r#""`""#, &["`"]), + (r#""\`""#, &["`"]), + (r#""\"""#, &["\""]), + (r#""\\""#, &["\\"]), + ("\"\n\"", &["\n"]), + ("\"\\\n\"", &[""]), + ]); + } + + #[test] + fn split_unquoted() { + split_ok(&[ + (r#"\|\&\;"#, &[r#"|&;"#]), + (r#"\<\>"#, &[r#"<>"#]), + (r#"\(\)"#, &[r#"()"#]), + (r#"\$"#, &[r#"$"#]), + (r#"\`"#, &[r#"`"#]), + (r#"\""#, &[r#"""#]), + (r#"\'"#, &[r#"'"#]), + ("\\\n", &[]), + (" \\\n \n", &[]), + ("a\nb\nc", &["a", "b", "c"]), + ("a\\\nb\\\nc", &["abc"]), + ("foo bar baz", &["foo", "bar", "baz"]), + (r#"\🦉"#, &[r"🦉"]), + ]); + } + + #[test] + fn split_trailing_backslash() { + split_ok(&[("\\", &["\\"]), (" \\", &["\\"]), ("a\\", &["a\\"])]); + } + + #[test] + fn split_errors() { + assert_eq!(split("'abc"), Err(ParseError)); + assert_eq!(split("\""), Err(ParseError)); + assert_eq!(split("'\\"), Err(ParseError)); + assert_eq!(split("'\\"), Err(ParseError)); + } + + #[test] + fn split_comments() { + split_ok(&[ + (r#" x # comment "#, &["x"]), + (r#" w1#w2 "#, &["w1#w2"]), + (r#"'not really a # comment'"#, &["not really a # comment"]), + (" a # very long comment \n b # another comment", &["a", "b"]), + ]); + } + + #[test] + fn test_quote() { + assert_eq!(quote(""), "''"); + assert_eq!(quote("'"), "''\\'''"); + assert_eq!(quote("abc"), "abc"); + assert_eq!(quote("a \n b"), "'a \n b'"); + assert_eq!(quote("X'\nY"), "'X'\\''\nY'"); + } + + #[test] + fn test_join() { + assert_eq!(join(&["a", "b", "c"]), "a b c"); + assert_eq!(join(&[" ", "$", "\n"]), "' ' '$' '\n'"); + } + + #[test] + fn join_followed_by_split_is_identity() { + let cases: Vec<&[&str]> = vec![ + &["a"], + &["python", "-c", "print('Hello world!')"], + &["echo", " arg with spaces ", "arg \' with \" quotes"], + &["even newlines are quoted correctly\n", "\n", "\n\n\t "], + &["$", "`test`"], + &["cat", "~user/log*"], + &["test", "'a \"b", "\"X'"], + &["empty", "", "", ""], + ]; + for argv in cases { + let args = join(argv); + assert_eq!(split(&args).unwrap(), argv); + } + } +} |