summaryrefslogtreecommitdiff
path: root/vendor/anstream/src/adapter/strip.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/anstream/src/adapter/strip.rs')
-rw-r--r--vendor/anstream/src/adapter/strip.rs513
1 files changed, 513 insertions, 0 deletions
diff --git a/vendor/anstream/src/adapter/strip.rs b/vendor/anstream/src/adapter/strip.rs
new file mode 100644
index 0000000..5078c51
--- /dev/null
+++ b/vendor/anstream/src/adapter/strip.rs
@@ -0,0 +1,513 @@
+use anstyle_parse::state::state_change;
+use anstyle_parse::state::Action;
+use anstyle_parse::state::State;
+
+/// Strip ANSI escapes from a `&str`, returning the printable content
+///
+/// This can be used to take output from a program that includes escape sequences and write it
+/// somewhere that does not easily support them, such as a log file.
+///
+/// For non-contiguous data, see [`StripStr`].
+///
+/// # Example
+///
+/// ```rust
+/// use std::io::Write as _;
+///
+/// let styled_text = "\x1b[32mfoo\x1b[m bar";
+/// let plain_str = anstream::adapter::strip_str(&styled_text).to_string();
+/// assert_eq!(plain_str, "foo bar");
+/// ```
+#[inline]
+pub fn strip_str(data: &str) -> StrippedStr<'_> {
+ StrippedStr::new(data)
+}
+
+/// See [`strip_str`]
+#[derive(Default, Clone, Debug, PartialEq, Eq)]
+pub struct StrippedStr<'s> {
+ bytes: &'s [u8],
+ state: State,
+}
+
+impl<'s> StrippedStr<'s> {
+ #[inline]
+ fn new(data: &'s str) -> Self {
+ Self {
+ bytes: data.as_bytes(),
+ state: State::Ground,
+ }
+ }
+
+ /// Create a [`String`] of the printable content
+ #[inline]
+ #[allow(clippy::inherent_to_string_shadow_display)] // Single-allocation implementation
+ pub fn to_string(&self) -> String {
+ use std::fmt::Write as _;
+ let mut stripped = String::with_capacity(self.bytes.len());
+ let _ = write!(&mut stripped, "{}", self);
+ stripped
+ }
+}
+
+impl<'s> std::fmt::Display for StrippedStr<'s> {
+ /// **Note:** this does *not* exhaust the [`Iterator`]
+ #[inline]
+ fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+ let iter = Self {
+ bytes: self.bytes,
+ state: self.state,
+ };
+ for printable in iter {
+ printable.fmt(f)?;
+ }
+ Ok(())
+ }
+}
+
+impl<'s> Iterator for StrippedStr<'s> {
+ type Item = &'s str;
+
+ #[inline]
+ fn next(&mut self) -> Option<Self::Item> {
+ next_str(&mut self.bytes, &mut self.state)
+ }
+}
+
+/// Incrementally strip non-contiguous data
+#[derive(Default, Clone, Debug, PartialEq, Eq)]
+pub struct StripStr {
+ state: State,
+}
+
+impl StripStr {
+ /// Initial state
+ pub fn new() -> Self {
+ Default::default()
+ }
+
+ /// Strip the next segment of data
+ pub fn strip_next<'s>(&'s mut self, data: &'s str) -> StripStrIter<'s> {
+ StripStrIter {
+ bytes: data.as_bytes(),
+ state: &mut self.state,
+ }
+ }
+}
+
+/// See [`StripStr`]
+#[derive(Debug, PartialEq, Eq)]
+pub struct StripStrIter<'s> {
+ bytes: &'s [u8],
+ state: &'s mut State,
+}
+
+impl<'s> Iterator for StripStrIter<'s> {
+ type Item = &'s str;
+
+ #[inline]
+ fn next(&mut self) -> Option<Self::Item> {
+ next_str(&mut self.bytes, self.state)
+ }
+}
+
+#[inline]
+fn next_str<'s>(bytes: &mut &'s [u8], state: &mut State) -> Option<&'s str> {
+ let offset = bytes.iter().copied().position(|b| {
+ let (next_state, action) = state_change(*state, b);
+ if next_state != State::Anywhere {
+ *state = next_state;
+ }
+ is_printable_str(action, b)
+ });
+ let (_, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
+ *bytes = next;
+ *state = State::Ground;
+
+ let offset = bytes.iter().copied().position(|b| {
+ let (_next_state, action) = state_change(State::Ground, b);
+ !is_printable_str(action, b)
+ });
+ let (printable, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
+ *bytes = next;
+ if printable.is_empty() {
+ None
+ } else {
+ let printable = unsafe {
+ from_utf8_unchecked(
+ printable,
+ "`bytes` was validated as UTF-8, the parser preserves UTF-8 continuations",
+ )
+ };
+ Some(printable)
+ }
+}
+
+#[inline]
+unsafe fn from_utf8_unchecked<'b>(bytes: &'b [u8], safety_justification: &'static str) -> &'b str {
+ if cfg!(debug_assertions) {
+ // Catch problems more quickly when testing
+ std::str::from_utf8(bytes).expect(safety_justification)
+ } else {
+ std::str::from_utf8_unchecked(bytes)
+ }
+}
+
+#[inline]
+fn is_printable_str(action: Action, byte: u8) -> bool {
+ // VT320 considered 0x7f to be `Print`able but we expect to be working in UTF-8 systems and not
+ // ISO Latin-1, making it DEL and non-printable
+ const DEL: u8 = 0x7f;
+ (action == Action::Print && byte != DEL)
+ || action == Action::BeginUtf8
+ // since we know the input is valid UTF-8, the only thing we can do with
+ // continuations is to print them
+ || is_utf8_continuation(byte)
+ || (action == Action::Execute && byte.is_ascii_whitespace())
+}
+
+#[inline]
+fn is_utf8_continuation(b: u8) -> bool {
+ matches!(b, 0x80..=0xbf)
+}
+
+/// Strip ANSI escapes from bytes, returning the printable content
+///
+/// This can be used to take output from a program that includes escape sequences and write it
+/// somewhere that does not easily support them, such as a log file.
+///
+/// # Example
+///
+/// ```rust
+/// use std::io::Write as _;
+///
+/// let styled_text = "\x1b[32mfoo\x1b[m bar";
+/// let plain_str = anstream::adapter::strip_bytes(styled_text.as_bytes()).into_vec();
+/// assert_eq!(plain_str.as_slice(), &b"foo bar"[..]);
+/// ```
+#[inline]
+pub fn strip_bytes(data: &[u8]) -> StrippedBytes<'_> {
+ StrippedBytes::new(data)
+}
+
+/// See [`strip_bytes`]
+#[derive(Default, Clone, Debug, PartialEq, Eq)]
+pub struct StrippedBytes<'s> {
+ bytes: &'s [u8],
+ state: State,
+ utf8parser: Utf8Parser,
+}
+
+impl<'s> StrippedBytes<'s> {
+ /// See [`strip_bytes`]
+ #[inline]
+ pub fn new(bytes: &'s [u8]) -> Self {
+ Self {
+ bytes,
+ state: State::Ground,
+ utf8parser: Default::default(),
+ }
+ }
+
+ /// Strip the next slice of bytes
+ ///
+ /// Used when the content is in several non-contiguous slices
+ ///
+ /// # Panic
+ ///
+ /// May panic if it is not exhausted / empty
+ #[inline]
+ pub fn extend(&mut self, bytes: &'s [u8]) {
+ debug_assert!(
+ self.is_empty(),
+ "current bytes must be processed to ensure we end at the right state"
+ );
+ self.bytes = bytes;
+ }
+
+ /// Report the bytes has been exhausted
+ #[inline]
+ pub fn is_empty(&self) -> bool {
+ self.bytes.is_empty()
+ }
+
+ /// Create a [`Vec`] of the printable content
+ #[inline]
+ pub fn into_vec(self) -> Vec<u8> {
+ let mut stripped = Vec::with_capacity(self.bytes.len());
+ for printable in self {
+ stripped.extend(printable);
+ }
+ stripped
+ }
+}
+
+impl<'s> Iterator for StrippedBytes<'s> {
+ type Item = &'s [u8];
+
+ #[inline]
+ fn next(&mut self) -> Option<Self::Item> {
+ next_bytes(&mut self.bytes, &mut self.state, &mut self.utf8parser)
+ }
+}
+
+/// Incrementally strip non-contiguous data
+#[derive(Default, Clone, Debug, PartialEq, Eq)]
+pub struct StripBytes {
+ state: State,
+ utf8parser: Utf8Parser,
+}
+
+impl StripBytes {
+ /// Initial state
+ pub fn new() -> Self {
+ Default::default()
+ }
+
+ /// Strip the next segment of data
+ pub fn strip_next<'s>(&'s mut self, bytes: &'s [u8]) -> StripBytesIter<'s> {
+ StripBytesIter {
+ bytes,
+ state: &mut self.state,
+ utf8parser: &mut self.utf8parser,
+ }
+ }
+}
+
+/// See [`StripBytes`]
+#[derive(Debug, PartialEq, Eq)]
+pub struct StripBytesIter<'s> {
+ bytes: &'s [u8],
+ state: &'s mut State,
+ utf8parser: &'s mut Utf8Parser,
+}
+
+impl<'s> Iterator for StripBytesIter<'s> {
+ type Item = &'s [u8];
+
+ #[inline]
+ fn next(&mut self) -> Option<Self::Item> {
+ next_bytes(&mut self.bytes, self.state, self.utf8parser)
+ }
+}
+
+#[inline]
+fn next_bytes<'s>(
+ bytes: &mut &'s [u8],
+ state: &mut State,
+ utf8parser: &mut Utf8Parser,
+) -> Option<&'s [u8]> {
+ let offset = bytes.iter().copied().position(|b| {
+ if *state == State::Utf8 {
+ true
+ } else {
+ let (next_state, action) = state_change(*state, b);
+ if next_state != State::Anywhere {
+ *state = next_state;
+ }
+ is_printable_bytes(action, b)
+ }
+ });
+ let (_, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
+ *bytes = next;
+
+ let offset = bytes.iter().copied().position(|b| {
+ if *state == State::Utf8 {
+ if utf8parser.add(b) {
+ *state = State::Ground;
+ }
+ false
+ } else {
+ let (next_state, action) = state_change(State::Ground, b);
+ if next_state != State::Anywhere {
+ *state = next_state;
+ }
+ if *state == State::Utf8 {
+ utf8parser.add(b);
+ false
+ } else {
+ !is_printable_bytes(action, b)
+ }
+ }
+ });
+ let (printable, next) = bytes.split_at(offset.unwrap_or(bytes.len()));
+ *bytes = next;
+ if printable.is_empty() {
+ None
+ } else {
+ Some(printable)
+ }
+}
+
+#[derive(Default, Clone, Debug, PartialEq, Eq)]
+pub struct Utf8Parser {
+ utf8_parser: utf8parse::Parser,
+}
+
+impl Utf8Parser {
+ fn add(&mut self, byte: u8) -> bool {
+ let mut b = false;
+ let mut receiver = VtUtf8Receiver(&mut b);
+ self.utf8_parser.advance(&mut receiver, byte);
+ b
+ }
+}
+
+struct VtUtf8Receiver<'a>(&'a mut bool);
+
+impl<'a> utf8parse::Receiver for VtUtf8Receiver<'a> {
+ fn codepoint(&mut self, _: char) {
+ *self.0 = true;
+ }
+
+ fn invalid_sequence(&mut self) {
+ *self.0 = true;
+ }
+}
+
+#[inline]
+fn is_printable_bytes(action: Action, byte: u8) -> bool {
+ // VT320 considered 0x7f to be `Print`able but we expect to be working in UTF-8 systems and not
+ // ISO Latin-1, making it DEL and non-printable
+ const DEL: u8 = 0x7f;
+
+ // Continuations aren't included as they may also be control codes, requiring more context
+ (action == Action::Print && byte != DEL)
+ || action == Action::BeginUtf8
+ || (action == Action::Execute && byte.is_ascii_whitespace())
+}
+
+#[cfg(test)]
+mod test {
+ use super::*;
+ use proptest::prelude::*;
+
+ /// Model based off full parser
+ fn parser_strip(bytes: &[u8]) -> String {
+ #[derive(Default)]
+ struct Strip(String);
+ impl Strip {
+ fn with_capacity(capacity: usize) -> Self {
+ Self(String::with_capacity(capacity))
+ }
+ }
+ impl anstyle_parse::Perform for Strip {
+ fn print(&mut self, c: char) {
+ self.0.push(c);
+ }
+
+ fn execute(&mut self, byte: u8) {
+ if byte.is_ascii_whitespace() {
+ self.0.push(byte as char);
+ }
+ }
+ }
+
+ let mut stripped = Strip::with_capacity(bytes.len());
+ let mut parser = anstyle_parse::Parser::<anstyle_parse::DefaultCharAccumulator>::new();
+ for byte in bytes {
+ parser.advance(&mut stripped, *byte);
+ }
+ stripped.0
+ }
+
+ /// Model verifying incremental parsing
+ fn strip_char(mut s: &str) -> String {
+ let mut result = String::new();
+ let mut state = StripStr::new();
+ while !s.is_empty() {
+ let mut indices = s.char_indices();
+ indices.next(); // current
+ let offset = indices.next().map(|(i, _)| i).unwrap_or_else(|| s.len());
+ let (current, remainder) = s.split_at(offset);
+ for printable in state.strip_next(current) {
+ result.push_str(printable);
+ }
+ s = remainder;
+ }
+ result
+ }
+
+ /// Model verifying incremental parsing
+ fn strip_byte(s: &[u8]) -> Vec<u8> {
+ let mut result = Vec::new();
+ let mut state = StripBytes::default();
+ for start in 0..s.len() {
+ let current = &s[start..=start];
+ for printable in state.strip_next(current) {
+ result.extend(printable);
+ }
+ }
+ result
+ }
+
+ #[test]
+ fn test_strip_bytes_multibyte() {
+ let bytes = [240, 145, 141, 139];
+ let expected = parser_strip(&bytes);
+ let actual = String::from_utf8(strip_bytes(&bytes).into_vec()).unwrap();
+ assert_eq!(expected, actual);
+ }
+
+ #[test]
+ fn test_strip_byte_multibyte() {
+ let bytes = [240, 145, 141, 139];
+ let expected = parser_strip(&bytes);
+ let actual = String::from_utf8(strip_byte(&bytes).to_vec()).unwrap();
+ assert_eq!(expected, actual);
+ }
+
+ #[test]
+ fn test_strip_str_del() {
+ let input = std::str::from_utf8(&[0x7f]).unwrap();
+ let expected = "";
+ let actual = strip_str(input).to_string();
+ assert_eq!(expected, actual);
+ }
+
+ #[test]
+ fn test_strip_byte_del() {
+ let bytes = [0x7f];
+ let expected = "";
+ let actual = String::from_utf8(strip_byte(&bytes).to_vec()).unwrap();
+ assert_eq!(expected, actual);
+ }
+
+ proptest! {
+ #[test]
+ #[cfg_attr(miri, ignore)] // See https://github.com/AltSysrq/proptest/issues/253
+ fn strip_str_no_escapes(s in "\\PC*") {
+ let expected = parser_strip(s.as_bytes());
+ let actual = strip_str(&s).to_string();
+ assert_eq!(expected, actual);
+ }
+
+ #[test]
+ #[cfg_attr(miri, ignore)] // See https://github.com/AltSysrq/proptest/issues/253
+ fn strip_char_no_escapes(s in "\\PC*") {
+ let expected = parser_strip(s.as_bytes());
+ let actual = strip_char(&s);
+ assert_eq!(expected, actual);
+ }
+
+ #[test]
+ #[cfg_attr(miri, ignore)] // See https://github.com/AltSysrq/proptest/issues/253
+ fn strip_bytes_no_escapes(s in "\\PC*") {
+ dbg!(&s);
+ dbg!(s.as_bytes());
+ let expected = parser_strip(s.as_bytes());
+ let actual = String::from_utf8(strip_bytes(s.as_bytes()).into_vec()).unwrap();
+ assert_eq!(expected, actual);
+ }
+
+ #[test]
+ #[cfg_attr(miri, ignore)] // See https://github.com/AltSysrq/proptest/issues/253
+ fn strip_byte_no_escapes(s in "\\PC*") {
+ dbg!(&s);
+ dbg!(s.as_bytes());
+ let expected = parser_strip(s.as_bytes());
+ let actual = String::from_utf8(strip_byte(s.as_bytes()).to_vec()).unwrap();
+ assert_eq!(expected, actual);
+ }
+ }
+}