summaryrefslogtreecommitdiff
path: root/vendor/shell-words/src/lib.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/shell-words/src/lib.rs')
-rw-r--r--vendor/shell-words/src/lib.rs507
1 files changed, 507 insertions, 0 deletions
diff --git a/vendor/shell-words/src/lib.rs b/vendor/shell-words/src/lib.rs
new file mode 100644
index 0000000..b93001f
--- /dev/null
+++ b/vendor/shell-words/src/lib.rs
@@ -0,0 +1,507 @@
+// Copyright 2018 Tomasz Miąsko
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE>
+// or the MIT license <LICENSE-MIT>, at your option.
+//
+//! Process command line according to parsing rules of Unix shell as specified
+//! in [Shell Command Language in POSIX.1-2008][posix-shell].
+//!
+//! [posix-shell]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html
+
+#![cfg_attr(not(feature = "std"), no_std)]
+#![forbid(unsafe_code)]
+
+#[cfg(feature = "std")]
+extern crate core;
+
+use core::fmt;
+use core::mem;
+
+#[cfg(not(feature = "std"))]
+#[macro_use]
+extern crate alloc;
+
+#[cfg(not(feature = "std"))]
+use alloc::string::String;
+#[cfg(not(feature = "std"))]
+use alloc::vec::Vec;
+
+#[cfg(not(feature = "std"))]
+use alloc::borrow::Cow;
+#[cfg(feature = "std")]
+use std::borrow::Cow;
+
+/// An error returned when shell parsing fails.
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub struct ParseError;
+
+impl fmt::Display for ParseError {
+ fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+ f.write_str("missing closing quote")
+ }
+}
+
+#[cfg(feature = "std")]
+impl std::error::Error for ParseError {}
+
+enum State {
+ /// Within a delimiter.
+ Delimiter,
+ /// After backslash, but before starting word.
+ Backslash,
+ /// Within an unquoted word.
+ Unquoted,
+ /// After backslash in an unquoted word.
+ UnquotedBackslash,
+ /// Within a single quoted word.
+ SingleQuoted,
+ /// Within a double quoted word.
+ DoubleQuoted,
+ /// After backslash inside a double quoted word.
+ DoubleQuotedBackslash,
+ /// Inside a comment.
+ Comment,
+}
+
+/// Splits command line into separate arguments, in much the same way Unix shell
+/// would, but without many of expansion the shell would perform.
+///
+/// The split functionality is compatible with behaviour of Unix shell, but with
+/// word expansions limited to quote removal, and without special token
+/// recognition rules for operators.
+///
+/// The result is exactly the same as one obtained from Unix shell as long as
+/// those unsupported features are not present in input: no operators, no
+/// variable assignments, no tilde expansion, no parameter expansion, no command
+/// substitution, no arithmetic expansion, no pathname expansion.
+///
+/// In case those unsupported shell features are present, the syntax that
+/// introduce them is interpreted literally.
+///
+/// # Errors
+///
+/// When input contains unmatched quote, an error is returned.
+///
+/// # Compatibility with other implementations
+///
+/// It should be fully compatible with g_shell_parse_argv from GLib, except that
+/// in GLib it is an error not to have any words after tokenization.
+///
+/// It is also very close to shlex.split available in Python standard library,
+/// when used in POSIX mode with support for comments. Though, shlex
+/// implementation diverges from POSIX, and from implementation contained herein
+/// in three aspects. First, it doesn't support line continuations.
+/// Second, inside double quotes, the backslash characters retains its special
+/// meaning as an escape character only when followed by \\ or \", whereas POSIX
+/// specifies that it should retain its special meaning when followed by: $, \`,
+/// \", \\, or a newline. Third, it treats carriage return as one of delimiters.
+///
+/// # Examples
+///
+/// Building an executable using compiler obtained from CC environment variable
+/// and compiler flags from both CFLAGS and CPPFLAGS. Similar to default build
+/// rule for C used in GNU Make:
+///
+/// ```rust,no_run
+/// use std::env::var;
+/// use std::process::Command;
+///
+/// let cc = var("CC").unwrap_or_else(|_| "cc".to_owned());
+///
+/// let cflags_str = var("CFLAGS").unwrap_or_else(|_| String::new());
+/// let cflags = shell_words::split(&cflags_str).expect("failed to parse CFLAGS");
+///
+/// let cppflags_str = var("CPPFLAGS").unwrap_or_else(|_| String::new());
+/// let cppflags = shell_words::split(&cppflags_str).expect("failed to parse CPPFLAGS");
+///
+/// Command::new(cc)
+/// .args(cflags)
+/// .args(cppflags)
+/// .args(&["-c", "a.c", "-o", "a.out"])
+/// .spawn()
+/// .expect("failed to start subprocess")
+/// .wait()
+/// .expect("failed to wait for subprocess");
+/// ```
+pub fn split(s: &str) -> Result<Vec<String>, ParseError> {
+ use State::*;
+
+ let mut words = Vec::new();
+ let mut word = String::new();
+ let mut chars = s.chars();
+ let mut state = Delimiter;
+
+ loop {
+ let c = chars.next();
+ state = match state {
+ Delimiter => match c {
+ None => break,
+ Some('\'') => SingleQuoted,
+ Some('\"') => DoubleQuoted,
+ Some('\\') => Backslash,
+ Some('\t') | Some(' ') | Some('\n') => Delimiter,
+ Some('#') => Comment,
+ Some(c) => {
+ word.push(c);
+ Unquoted
+ }
+ },
+ Backslash => match c {
+ None => {
+ word.push('\\');
+ words.push(mem::replace(&mut word, String::new()));
+ break;
+ }
+ Some('\n') => Delimiter,
+ Some(c) => {
+ word.push(c);
+ Unquoted
+ }
+ },
+ Unquoted => match c {
+ None => {
+ words.push(mem::replace(&mut word, String::new()));
+ break;
+ }
+ Some('\'') => SingleQuoted,
+ Some('\"') => DoubleQuoted,
+ Some('\\') => UnquotedBackslash,
+ Some('\t') | Some(' ') | Some('\n') => {
+ words.push(mem::replace(&mut word, String::new()));
+ Delimiter
+ }
+ Some(c) => {
+ word.push(c);
+ Unquoted
+ }
+ },
+ UnquotedBackslash => match c {
+ None => {
+ word.push('\\');
+ words.push(mem::replace(&mut word, String::new()));
+ break;
+ }
+ Some('\n') => Unquoted,
+ Some(c) => {
+ word.push(c);
+ Unquoted
+ }
+ },
+ SingleQuoted => match c {
+ None => return Err(ParseError),
+ Some('\'') => Unquoted,
+ Some(c) => {
+ word.push(c);
+ SingleQuoted
+ }
+ },
+ DoubleQuoted => match c {
+ None => return Err(ParseError),
+ Some('\"') => Unquoted,
+ Some('\\') => DoubleQuotedBackslash,
+ Some(c) => {
+ word.push(c);
+ DoubleQuoted
+ }
+ },
+ DoubleQuotedBackslash => match c {
+ None => return Err(ParseError),
+ Some('\n') => DoubleQuoted,
+ Some(c @ '$') | Some(c @ '`') | Some(c @ '"') | Some(c @ '\\') => {
+ word.push(c);
+ DoubleQuoted
+ }
+ Some(c) => {
+ word.push('\\');
+ word.push(c);
+ DoubleQuoted
+ }
+ },
+ Comment => match c {
+ None => break,
+ Some('\n') => Delimiter,
+ Some(_) => Comment,
+ },
+ }
+ }
+
+ Ok(words)
+}
+
+enum EscapeStyle {
+ /// No escaping.
+ None,
+ /// Wrap in single quotes.
+ SingleQuoted,
+ /// Single quotes combined with backslash.
+ Mixed,
+}
+
+/// Determines escaping style to use.
+fn escape_style(s: &str) -> EscapeStyle {
+ if s.is_empty() {
+ return EscapeStyle::SingleQuoted;
+ }
+
+ let mut special = false;
+ let mut newline = false;
+ let mut single_quote = false;
+
+ for c in s.chars() {
+ match c {
+ '\n' => {
+ newline = true;
+ special = true;
+ }
+ '\'' => {
+ single_quote = true;
+ special = true;
+ }
+ '|' | '&' | ';' | '<' | '>' | '(' | ')' | '$' | '`' | '\\' | '"' | ' ' | '\t' | '*'
+ | '?' | '[' | '#' | '˜' | '=' | '%' => {
+ special = true;
+ }
+ _ => continue,
+ }
+ }
+
+ if !special {
+ EscapeStyle::None
+ } else if newline && !single_quote {
+ EscapeStyle::SingleQuoted
+ } else {
+ EscapeStyle::Mixed
+ }
+}
+
+/// Escapes special characters in a string, so that it will retain its literal
+/// meaning when used as a part of command in Unix shell.
+///
+/// It tries to avoid introducing any unnecessary quotes or escape characters,
+/// but specifics regarding quoting style are left unspecified.
+pub fn quote(s: &str) -> Cow<str> {
+ // We are going somewhat out of the way to provide
+ // minimal amount of quoting in typical cases.
+ match escape_style(s) {
+ EscapeStyle::None => s.into(),
+ EscapeStyle::SingleQuoted => format!("'{}'", s).into(),
+ EscapeStyle::Mixed => {
+ let mut quoted = String::new();
+ quoted.push('\'');
+ for c in s.chars() {
+ if c == '\'' {
+ quoted.push_str("'\\''");
+ } else {
+ quoted.push(c);
+ }
+ }
+ quoted.push('\'');
+ quoted.into()
+ }
+ }
+}
+
+/// Joins arguments into a single command line suitable for execution in Unix
+/// shell.
+///
+/// Each argument is quoted using [`quote`] to preserve its literal meaning when
+/// parsed by Unix shell.
+///
+/// Note: This function is essentially an inverse of [`split`].
+///
+/// # Examples
+///
+/// Logging executed commands in format that can be easily copied and pasted
+/// into an actual shell:
+///
+/// ```rust,no_run
+/// fn execute(args: &[&str]) {
+/// use std::process::Command;
+/// println!("Executing: {}", shell_words::join(args));
+/// Command::new(&args[0])
+/// .args(&args[1..])
+/// .spawn()
+/// .expect("failed to start subprocess")
+/// .wait()
+/// .expect("failed to wait for subprocess");
+/// }
+///
+/// execute(&["python", "-c", "print('Hello world!')"]);
+/// ```
+///
+/// [`quote`]: fn.quote.html
+/// [`split`]: fn.split.html
+pub fn join<I, S>(words: I) -> String
+where
+ I: IntoIterator<Item = S>,
+ S: AsRef<str>,
+{
+ let mut line = words.into_iter().fold(String::new(), |mut line, word| {
+ let quoted = quote(word.as_ref());
+ line.push_str(quoted.as_ref());
+ line.push(' ');
+ line
+ });
+ line.pop();
+ line
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ fn split_ok(cases: &[(&str, &[&str])]) {
+ for &(input, expected) in cases {
+ match split(input) {
+ Err(actual) => {
+ panic!(
+ "After split({:?})\nexpected: Ok({:?})\n actual: Err({:?})\n",
+ input, expected, actual
+ );
+ }
+ Ok(actual) => {
+ assert!(
+ expected == actual.as_slice(),
+ "After split({:?}).unwrap()\nexpected: {:?}\n actual: {:?}\n",
+ input,
+ expected,
+ actual
+ );
+ }
+ }
+ }
+ }
+
+ #[test]
+ fn split_empty() {
+ split_ok(&[("", &[])]);
+ }
+
+ #[test]
+ fn split_initial_whitespace_is_removed() {
+ split_ok(&[
+ (" a", &["a"]),
+ ("\t\t\t\tbar", &["bar"]),
+ ("\t \nc", &["c"]),
+ ]);
+ }
+
+ #[test]
+ fn split_trailing_whitespace_is_removed() {
+ split_ok(&[
+ ("a ", &["a"]),
+ ("b\t", &["b"]),
+ ("c\t \n \n \n", &["c"]),
+ ("d\n\n", &["d"]),
+ ]);
+ }
+
+ #[test]
+ fn split_carriage_return_is_not_special() {
+ split_ok(&[("c\ra\r'\r'\r", &["c\ra\r\r\r"])]);
+ }
+
+ #[test]
+ fn split_single_quotes() {
+ split_ok(&[
+ (r#"''"#, &[r#""#]),
+ (r#"'a'"#, &[r#"a"#]),
+ (r#"'\'"#, &[r#"\"#]),
+ (r#"' \ '"#, &[r#" \ "#]),
+ (r#"'#'"#, &[r#"#"#]),
+ ]);
+ }
+
+ #[test]
+ fn split_double_quotes() {
+ split_ok(&[
+ (r#""""#, &[""]),
+ (r#""""""#, &[""]),
+ (r#""a b c' d""#, &["a b c' d"]),
+ (r#""\a""#, &["\\a"]),
+ (r#""$""#, &["$"]),
+ (r#""\$""#, &["$"]),
+ (r#""`""#, &["`"]),
+ (r#""\`""#, &["`"]),
+ (r#""\"""#, &["\""]),
+ (r#""\\""#, &["\\"]),
+ ("\"\n\"", &["\n"]),
+ ("\"\\\n\"", &[""]),
+ ]);
+ }
+
+ #[test]
+ fn split_unquoted() {
+ split_ok(&[
+ (r#"\|\&\;"#, &[r#"|&;"#]),
+ (r#"\<\>"#, &[r#"<>"#]),
+ (r#"\(\)"#, &[r#"()"#]),
+ (r#"\$"#, &[r#"$"#]),
+ (r#"\`"#, &[r#"`"#]),
+ (r#"\""#, &[r#"""#]),
+ (r#"\'"#, &[r#"'"#]),
+ ("\\\n", &[]),
+ (" \\\n \n", &[]),
+ ("a\nb\nc", &["a", "b", "c"]),
+ ("a\\\nb\\\nc", &["abc"]),
+ ("foo bar baz", &["foo", "bar", "baz"]),
+ (r#"\🦉"#, &[r"🦉"]),
+ ]);
+ }
+
+ #[test]
+ fn split_trailing_backslash() {
+ split_ok(&[("\\", &["\\"]), (" \\", &["\\"]), ("a\\", &["a\\"])]);
+ }
+
+ #[test]
+ fn split_errors() {
+ assert_eq!(split("'abc"), Err(ParseError));
+ assert_eq!(split("\""), Err(ParseError));
+ assert_eq!(split("'\\"), Err(ParseError));
+ assert_eq!(split("'\\"), Err(ParseError));
+ }
+
+ #[test]
+ fn split_comments() {
+ split_ok(&[
+ (r#" x # comment "#, &["x"]),
+ (r#" w1#w2 "#, &["w1#w2"]),
+ (r#"'not really a # comment'"#, &["not really a # comment"]),
+ (" a # very long comment \n b # another comment", &["a", "b"]),
+ ]);
+ }
+
+ #[test]
+ fn test_quote() {
+ assert_eq!(quote(""), "''");
+ assert_eq!(quote("'"), "''\\'''");
+ assert_eq!(quote("abc"), "abc");
+ assert_eq!(quote("a \n b"), "'a \n b'");
+ assert_eq!(quote("X'\nY"), "'X'\\''\nY'");
+ }
+
+ #[test]
+ fn test_join() {
+ assert_eq!(join(&["a", "b", "c"]), "a b c");
+ assert_eq!(join(&[" ", "$", "\n"]), "' ' '$' '\n'");
+ }
+
+ #[test]
+ fn join_followed_by_split_is_identity() {
+ let cases: Vec<&[&str]> = vec![
+ &["a"],
+ &["python", "-c", "print('Hello world!')"],
+ &["echo", " arg with spaces ", "arg \' with \" quotes"],
+ &["even newlines are quoted correctly\n", "\n", "\n\n\t "],
+ &["$", "`test`"],
+ &["cat", "~user/log*"],
+ &["test", "'a \"b", "\"X'"],
+ &["empty", "", "", ""],
+ ];
+ for argv in cases {
+ let args = join(argv);
+ assert_eq!(split(&args).unwrap(), argv);
+ }
+ }
+}