diff options
Diffstat (limited to 'vendor/anstream/src/adapter/strip.rs')
-rw-r--r-- | vendor/anstream/src/adapter/strip.rs | 513 |
1 files changed, 0 insertions, 513 deletions
diff --git a/vendor/anstream/src/adapter/strip.rs b/vendor/anstream/src/adapter/strip.rs deleted file mode 100644 index 5078c51..0000000 --- a/vendor/anstream/src/adapter/strip.rs +++ /dev/null @@ -1,513 +0,0 @@ -use anstyle_parse::state::state_change; -use anstyle_parse::state::Action; -use anstyle_parse::state::State; - -/// Strip ANSI escapes from a `&str`, returning the printable content -/// -/// This can be used to take output from a program that includes escape sequences and write it -/// somewhere that does not easily support them, such as a log file. -/// -/// For non-contiguous data, see [`StripStr`]. -/// -/// # Example -/// -/// ```rust -/// use std::io::Write as _; -/// -/// let styled_text = "\x1b[32mfoo\x1b[m bar"; -/// let plain_str = anstream::adapter::strip_str(&styled_text).to_string(); -/// assert_eq!(plain_str, "foo bar"); -/// ``` -#[inline] -pub fn strip_str(data: &str) -> StrippedStr<'_> { - StrippedStr::new(data) -} - -/// See [`strip_str`] -#[derive(Default, Clone, Debug, PartialEq, Eq)] -pub struct StrippedStr<'s> { - bytes: &'s [u8], - state: State, -} - -impl<'s> StrippedStr<'s> { - #[inline] - fn new(data: &'s str) -> Self { - Self { - bytes: data.as_bytes(), - state: State::Ground, - } - } - - /// Create a [`String`] of the printable content - #[inline] - #[allow(clippy::inherent_to_string_shadow_display)] // Single-allocation implementation - pub fn to_string(&self) -> String { - use std::fmt::Write as _; - let mut stripped = String::with_capacity(self.bytes.len()); - let _ = write!(&mut stripped, "{}", self); - stripped - } -} - -impl<'s> std::fmt::Display for StrippedStr<'s> { - /// **Note:** this does *not* exhaust the [`Iterator`] - #[inline] - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let iter = Self { - bytes: self.bytes, - state: self.state, - }; - for printable in iter { - printable.fmt(f)?; - } - Ok(()) - } -} - -impl<'s> Iterator for StrippedStr<'s> { - type Item = &'s str; - - #[inline] - fn next(&mut self) -> Option<Self::Item> { - next_str(&mut self.bytes, &mut self.state) - } -} - -/// Incrementally strip non-contiguous data -#[derive(Default, Clone, Debug, PartialEq, Eq)] -pub struct StripStr { - state: State, -} - -impl StripStr { - /// Initial state - pub fn new() -> Self { - Default::default() - } - - /// Strip the next segment of data - pub fn strip_next<'s>(&'s mut self, data: &'s str) -> StripStrIter<'s> { - StripStrIter { - bytes: data.as_bytes(), - state: &mut self.state, - } - } -} - -/// See [`StripStr`] -#[derive(Debug, PartialEq, Eq)] -pub struct StripStrIter<'s> { - bytes: &'s [u8], - state: &'s mut State, -} - -impl<'s> Iterator for StripStrIter<'s> { - type Item = &'s str; - - #[inline] - fn next(&mut self) -> Option<Self::Item> { - next_str(&mut self.bytes, self.state) - } -} - -#[inline] -fn next_str<'s>(bytes: &mut &'s [u8], state: &mut State) -> Option<&'s str> { - let offset = bytes.iter().copied().position(|b| { - let (next_state, action) = state_change(*state, b); - if next_state != State::Anywhere { - *state = next_state; - } - is_printable_str(action, b) - }); - let (_, next) = bytes.split_at(offset.unwrap_or(bytes.len())); - *bytes = next; - *state = State::Ground; - - let offset = bytes.iter().copied().position(|b| { - let (_next_state, action) = state_change(State::Ground, b); - !is_printable_str(action, b) - }); - let (printable, next) = bytes.split_at(offset.unwrap_or(bytes.len())); - *bytes = next; - if printable.is_empty() { - None - } else { - let printable = unsafe { - from_utf8_unchecked( - printable, - "`bytes` was validated as UTF-8, the parser preserves UTF-8 continuations", - ) - }; - Some(printable) - } -} - -#[inline] -unsafe fn from_utf8_unchecked<'b>(bytes: &'b [u8], safety_justification: &'static str) -> &'b str { - if cfg!(debug_assertions) { - // Catch problems more quickly when testing - std::str::from_utf8(bytes).expect(safety_justification) - } else { - std::str::from_utf8_unchecked(bytes) - } -} - -#[inline] -fn is_printable_str(action: Action, byte: u8) -> bool { - // VT320 considered 0x7f to be `Print`able but we expect to be working in UTF-8 systems and not - // ISO Latin-1, making it DEL and non-printable - const DEL: u8 = 0x7f; - (action == Action::Print && byte != DEL) - || action == Action::BeginUtf8 - // since we know the input is valid UTF-8, the only thing we can do with - // continuations is to print them - || is_utf8_continuation(byte) - || (action == Action::Execute && byte.is_ascii_whitespace()) -} - -#[inline] -fn is_utf8_continuation(b: u8) -> bool { - matches!(b, 0x80..=0xbf) -} - -/// Strip ANSI escapes from bytes, returning the printable content -/// -/// This can be used to take output from a program that includes escape sequences and write it -/// somewhere that does not easily support them, such as a log file. -/// -/// # Example -/// -/// ```rust -/// use std::io::Write as _; -/// -/// let styled_text = "\x1b[32mfoo\x1b[m bar"; -/// let plain_str = anstream::adapter::strip_bytes(styled_text.as_bytes()).into_vec(); -/// assert_eq!(plain_str.as_slice(), &b"foo bar"[..]); -/// ``` -#[inline] -pub fn strip_bytes(data: &[u8]) -> StrippedBytes<'_> { - StrippedBytes::new(data) -} - -/// See [`strip_bytes`] -#[derive(Default, Clone, Debug, PartialEq, Eq)] -pub struct StrippedBytes<'s> { - bytes: &'s [u8], - state: State, - utf8parser: Utf8Parser, -} - -impl<'s> StrippedBytes<'s> { - /// See [`strip_bytes`] - #[inline] - pub fn new(bytes: &'s [u8]) -> Self { - Self { - bytes, - state: State::Ground, - utf8parser: Default::default(), - } - } - - /// Strip the next slice of bytes - /// - /// Used when the content is in several non-contiguous slices - /// - /// # Panic - /// - /// May panic if it is not exhausted / empty - #[inline] - pub fn extend(&mut self, bytes: &'s [u8]) { - debug_assert!( - self.is_empty(), - "current bytes must be processed to ensure we end at the right state" - ); - self.bytes = bytes; - } - - /// Report the bytes has been exhausted - #[inline] - pub fn is_empty(&self) -> bool { - self.bytes.is_empty() - } - - /// Create a [`Vec`] of the printable content - #[inline] - pub fn into_vec(self) -> Vec<u8> { - let mut stripped = Vec::with_capacity(self.bytes.len()); - for printable in self { - stripped.extend(printable); - } - stripped - } -} - -impl<'s> Iterator for StrippedBytes<'s> { - type Item = &'s [u8]; - - #[inline] - fn next(&mut self) -> Option<Self::Item> { - next_bytes(&mut self.bytes, &mut self.state, &mut self.utf8parser) - } -} - -/// Incrementally strip non-contiguous data -#[derive(Default, Clone, Debug, PartialEq, Eq)] -pub struct StripBytes { - state: State, - utf8parser: Utf8Parser, -} - -impl StripBytes { - /// Initial state - pub fn new() -> Self { - Default::default() - } - - /// Strip the next segment of data - pub fn strip_next<'s>(&'s mut self, bytes: &'s [u8]) -> StripBytesIter<'s> { - StripBytesIter { - bytes, - state: &mut self.state, - utf8parser: &mut self.utf8parser, - } - } -} - -/// See [`StripBytes`] -#[derive(Debug, PartialEq, Eq)] -pub struct StripBytesIter<'s> { - bytes: &'s [u8], - state: &'s mut State, - utf8parser: &'s mut Utf8Parser, -} - -impl<'s> Iterator for StripBytesIter<'s> { - type Item = &'s [u8]; - - #[inline] - fn next(&mut self) -> Option<Self::Item> { - next_bytes(&mut self.bytes, self.state, self.utf8parser) - } -} - -#[inline] -fn next_bytes<'s>( - bytes: &mut &'s [u8], - state: &mut State, - utf8parser: &mut Utf8Parser, -) -> Option<&'s [u8]> { - let offset = bytes.iter().copied().position(|b| { - if *state == State::Utf8 { - true - } else { - let (next_state, action) = state_change(*state, b); - if next_state != State::Anywhere { - *state = next_state; - } - is_printable_bytes(action, b) - } - }); - let (_, next) = bytes.split_at(offset.unwrap_or(bytes.len())); - *bytes = next; - - let offset = bytes.iter().copied().position(|b| { - if *state == State::Utf8 { - if utf8parser.add(b) { - *state = State::Ground; - } - false - } else { - let (next_state, action) = state_change(State::Ground, b); - if next_state != State::Anywhere { - *state = next_state; - } - if *state == State::Utf8 { - utf8parser.add(b); - false - } else { - !is_printable_bytes(action, b) - } - } - }); - let (printable, next) = bytes.split_at(offset.unwrap_or(bytes.len())); - *bytes = next; - if printable.is_empty() { - None - } else { - Some(printable) - } -} - -#[derive(Default, Clone, Debug, PartialEq, Eq)] -pub struct Utf8Parser { - utf8_parser: utf8parse::Parser, -} - -impl Utf8Parser { - fn add(&mut self, byte: u8) -> bool { - let mut b = false; - let mut receiver = VtUtf8Receiver(&mut b); - self.utf8_parser.advance(&mut receiver, byte); - b - } -} - -struct VtUtf8Receiver<'a>(&'a mut bool); - -impl<'a> utf8parse::Receiver for VtUtf8Receiver<'a> { - fn codepoint(&mut self, _: char) { - *self.0 = true; - } - - fn invalid_sequence(&mut self) { - *self.0 = true; - } -} - -#[inline] -fn is_printable_bytes(action: Action, byte: u8) -> bool { - // VT320 considered 0x7f to be `Print`able but we expect to be working in UTF-8 systems and not - // ISO Latin-1, making it DEL and non-printable - const DEL: u8 = 0x7f; - - // Continuations aren't included as they may also be control codes, requiring more context - (action == Action::Print && byte != DEL) - || action == Action::BeginUtf8 - || (action == Action::Execute && byte.is_ascii_whitespace()) -} - -#[cfg(test)] -mod test { - use super::*; - use proptest::prelude::*; - - /// Model based off full parser - fn parser_strip(bytes: &[u8]) -> String { - #[derive(Default)] - struct Strip(String); - impl Strip { - fn with_capacity(capacity: usize) -> Self { - Self(String::with_capacity(capacity)) - } - } - impl anstyle_parse::Perform for Strip { - fn print(&mut self, c: char) { - self.0.push(c); - } - - fn execute(&mut self, byte: u8) { - if byte.is_ascii_whitespace() { - self.0.push(byte as char); - } - } - } - - let mut stripped = Strip::with_capacity(bytes.len()); - let mut parser = anstyle_parse::Parser::<anstyle_parse::DefaultCharAccumulator>::new(); - for byte in bytes { - parser.advance(&mut stripped, *byte); - } - stripped.0 - } - - /// Model verifying incremental parsing - fn strip_char(mut s: &str) -> String { - let mut result = String::new(); - let mut state = StripStr::new(); - while !s.is_empty() { - let mut indices = s.char_indices(); - indices.next(); // current - let offset = indices.next().map(|(i, _)| i).unwrap_or_else(|| s.len()); - let (current, remainder) = s.split_at(offset); - for printable in state.strip_next(current) { - result.push_str(printable); - } - s = remainder; - } - result - } - - /// Model verifying incremental parsing - fn strip_byte(s: &[u8]) -> Vec<u8> { - let mut result = Vec::new(); - let mut state = StripBytes::default(); - for start in 0..s.len() { - let current = &s[start..=start]; - for printable in state.strip_next(current) { - result.extend(printable); - } - } - result - } - - #[test] - fn test_strip_bytes_multibyte() { - let bytes = [240, 145, 141, 139]; - let expected = parser_strip(&bytes); - let actual = String::from_utf8(strip_bytes(&bytes).into_vec()).unwrap(); - assert_eq!(expected, actual); - } - - #[test] - fn test_strip_byte_multibyte() { - let bytes = [240, 145, 141, 139]; - let expected = parser_strip(&bytes); - let actual = String::from_utf8(strip_byte(&bytes).to_vec()).unwrap(); - assert_eq!(expected, actual); - } - - #[test] - fn test_strip_str_del() { - let input = std::str::from_utf8(&[0x7f]).unwrap(); - let expected = ""; - let actual = strip_str(input).to_string(); - assert_eq!(expected, actual); - } - - #[test] - fn test_strip_byte_del() { - let bytes = [0x7f]; - let expected = ""; - let actual = String::from_utf8(strip_byte(&bytes).to_vec()).unwrap(); - assert_eq!(expected, actual); - } - - proptest! { - #[test] - #[cfg_attr(miri, ignore)] // See https://github.com/AltSysrq/proptest/issues/253 - fn strip_str_no_escapes(s in "\\PC*") { - let expected = parser_strip(s.as_bytes()); - let actual = strip_str(&s).to_string(); - assert_eq!(expected, actual); - } - - #[test] - #[cfg_attr(miri, ignore)] // See https://github.com/AltSysrq/proptest/issues/253 - fn strip_char_no_escapes(s in "\\PC*") { - let expected = parser_strip(s.as_bytes()); - let actual = strip_char(&s); - assert_eq!(expected, actual); - } - - #[test] - #[cfg_attr(miri, ignore)] // See https://github.com/AltSysrq/proptest/issues/253 - fn strip_bytes_no_escapes(s in "\\PC*") { - dbg!(&s); - dbg!(s.as_bytes()); - let expected = parser_strip(s.as_bytes()); - let actual = String::from_utf8(strip_bytes(s.as_bytes()).into_vec()).unwrap(); - assert_eq!(expected, actual); - } - - #[test] - #[cfg_attr(miri, ignore)] // See https://github.com/AltSysrq/proptest/issues/253 - fn strip_byte_no_escapes(s in "\\PC*") { - dbg!(&s); - dbg!(s.as_bytes()); - let expected = parser_strip(s.as_bytes()); - let actual = String::from_utf8(strip_byte(s.as_bytes()).to_vec()).unwrap(); - assert_eq!(expected, actual); - } - } -} |