diff options
Diffstat (limited to 'vendor/console/src/ansi.rs')
-rw-r--r-- | vendor/console/src/ansi.rs | 438 |
1 files changed, 438 insertions, 0 deletions
diff --git a/vendor/console/src/ansi.rs b/vendor/console/src/ansi.rs new file mode 100644 index 0000000..3a3c96c --- /dev/null +++ b/vendor/console/src/ansi.rs @@ -0,0 +1,438 @@ +use std::{ + borrow::Cow, + iter::{FusedIterator, Peekable}, + str::CharIndices, +}; + +#[derive(Debug, Clone, Copy)] +enum State { + Start, + S1, + S2, + S3, + S4, + S5, + S6, + S7, + S8, + S9, + S10, + S11, + Trap, +} + +impl Default for State { + fn default() -> Self { + Self::Start + } +} + +impl State { + fn is_final(&self) -> bool { + #[allow(clippy::match_like_matches_macro)] + match self { + Self::S3 | Self::S5 | Self::S6 | Self::S7 | Self::S8 | Self::S9 | Self::S11 => true, + _ => false, + } + } + + fn is_trapped(&self) -> bool { + #[allow(clippy::match_like_matches_macro)] + match self { + Self::Trap => true, + _ => false, + } + } + + fn transition(&mut self, c: char) { + *self = match c { + '\u{1b}' | '\u{9b}' => match self { + Self::Start => Self::S1, + _ => Self::Trap, + }, + '(' | ')' => match self { + Self::S1 => Self::S2, + Self::S2 | Self::S4 => Self::S4, + _ => Self::Trap, + }, + ';' => match self { + Self::S1 | Self::S2 | Self::S4 => Self::S4, + Self::S5 | Self::S6 | Self::S7 | Self::S8 | Self::S10 => Self::S10, + _ => Self::Trap, + }, + + '[' | '#' | '?' => match self { + Self::S1 | Self::S2 | Self::S4 => Self::S4, + _ => Self::Trap, + }, + '0'..='2' => match self { + Self::S1 | Self::S4 => Self::S5, + Self::S2 => Self::S3, + Self::S5 => Self::S6, + Self::S6 => Self::S7, + Self::S7 => Self::S8, + Self::S8 => Self::S9, + Self::S10 => Self::S5, + _ => Self::Trap, + }, + '3'..='9' => match self { + Self::S1 | Self::S4 => Self::S5, + Self::S2 => Self::S5, + Self::S5 => Self::S6, + Self::S6 => Self::S7, + Self::S7 => Self::S8, + Self::S8 => Self::S9, + Self::S10 => Self::S5, + _ => Self::Trap, + }, + 'A'..='P' | 'R' | 'Z' | 'c' | 'f'..='n' | 'q' | 'r' | 'y' | '=' | '>' | '<' => { + match self { + Self::S1 + | Self::S2 + | Self::S4 + | Self::S5 + | Self::S6 + | Self::S7 + | Self::S8 + | Self::S10 => Self::S11, + _ => Self::Trap, + } + } + _ => Self::Trap, + }; + } +} + +#[derive(Debug)] +struct Matches<'a> { + s: &'a str, + it: Peekable<CharIndices<'a>>, +} + +impl<'a> Matches<'a> { + fn new(s: &'a str) -> Self { + let it = s.char_indices().peekable(); + Self { s, it } + } +} + +#[derive(Debug)] +struct Match<'a> { + text: &'a str, + start: usize, + end: usize, +} + +impl<'a> Match<'a> { + #[inline] + pub fn as_str(&self) -> &'a str { + &self.text[self.start..self.end] + } +} + +impl<'a> Iterator for Matches<'a> { + type Item = Match<'a>; + + fn next(&mut self) -> Option<Self::Item> { + find_ansi_code_exclusive(&mut self.it).map(|(start, end)| Match { + text: self.s, + start, + end, + }) + } +} + +impl<'a> FusedIterator for Matches<'a> {} + +fn find_ansi_code_exclusive(it: &mut Peekable<CharIndices>) -> Option<(usize, usize)> { + 'outer: loop { + if let (start, '\u{1b}') | (start, '\u{9b}') = it.peek()? { + let start = *start; + let mut state = State::default(); + let mut maybe_end = None; + + loop { + let item = it.peek(); + + if let Some((idx, c)) = item { + state.transition(*c); + + if state.is_final() { + maybe_end = Some(*idx); + } + } + + // The match is greedy so run till we hit the trap state no matter what. A valid + // match is just one that was final at some point + if state.is_trapped() || item.is_none() { + match maybe_end { + Some(end) => { + // All possible final characters are a single byte so it's safe to make + // the end exclusive by just adding one + return Some((start, end + 1)); + } + // The character we are peeking right now might be the start of a match so + // we want to continue the loop without popping off that char + None => continue 'outer, + } + } + + it.next(); + } + } + + it.next(); + } +} + +/// Helper function to strip ansi codes. +pub fn strip_ansi_codes(s: &str) -> Cow<str> { + let mut char_it = s.char_indices().peekable(); + match find_ansi_code_exclusive(&mut char_it) { + Some(_) => { + let stripped: String = AnsiCodeIterator::new(s) + .filter_map(|(text, is_ansi)| if is_ansi { None } else { Some(text) }) + .collect(); + Cow::Owned(stripped) + } + None => Cow::Borrowed(s), + } +} + +/// An iterator over ansi codes in a string. +/// +/// This type can be used to scan over ansi codes in a string. +/// It yields tuples in the form `(s, is_ansi)` where `s` is a slice of +/// the original string and `is_ansi` indicates if the slice contains +/// ansi codes or string values. +pub struct AnsiCodeIterator<'a> { + s: &'a str, + pending_item: Option<(&'a str, bool)>, + last_idx: usize, + cur_idx: usize, + iter: Matches<'a>, +} + +impl<'a> AnsiCodeIterator<'a> { + /// Creates a new ansi code iterator. + pub fn new(s: &'a str) -> AnsiCodeIterator<'a> { + AnsiCodeIterator { + s, + pending_item: None, + last_idx: 0, + cur_idx: 0, + iter: Matches::new(s), + } + } + + /// Returns the string slice up to the current match. + pub fn current_slice(&self) -> &str { + &self.s[..self.cur_idx] + } + + /// Returns the string slice from the current match to the end. + pub fn rest_slice(&self) -> &str { + &self.s[self.cur_idx..] + } +} + +impl<'a> Iterator for AnsiCodeIterator<'a> { + type Item = (&'a str, bool); + + fn next(&mut self) -> Option<(&'a str, bool)> { + if let Some(pending_item) = self.pending_item.take() { + self.cur_idx += pending_item.0.len(); + Some(pending_item) + } else if let Some(m) = self.iter.next() { + let s = &self.s[self.last_idx..m.start]; + self.last_idx = m.end; + if s.is_empty() { + self.cur_idx = m.end; + Some((m.as_str(), true)) + } else { + self.cur_idx = m.start; + self.pending_item = Some((m.as_str(), true)); + Some((s, false)) + } + } else if self.last_idx < self.s.len() { + let rv = &self.s[self.last_idx..]; + self.cur_idx = self.s.len(); + self.last_idx = self.s.len(); + Some((rv, false)) + } else { + None + } + } +} + +impl<'a> FusedIterator for AnsiCodeIterator<'a> {} + +#[cfg(test)] +mod tests { + use super::*; + + use lazy_static::lazy_static; + use proptest::prelude::*; + use regex::Regex; + + // The manual dfa `State` is a handwritten translation from the previously used regex. That + // regex is kept here and used to ensure that the new matches are the same as the old + lazy_static! { + static ref STRIP_ANSI_RE: Regex = Regex::new( + r"[\x1b\x9b]([()][012AB]|[\[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-PRZcf-nqry=><])", + ) + .unwrap(); + } + + impl<'a, 'b> PartialEq<Match<'a>> for regex::Match<'b> { + fn eq(&self, other: &Match<'a>) -> bool { + self.start() == other.start && self.end() == other.end + } + } + + proptest! { + #[test] + fn dfa_matches_old_regex(s in r"([\x1b\x9b]?.*){0,5}") { + let old_matches: Vec<_> = STRIP_ANSI_RE.find_iter(&s).collect(); + let new_matches: Vec<_> = Matches::new(&s).collect(); + assert_eq!(old_matches, new_matches); + } + } + + #[test] + fn dfa_matches_regex_on_small_strings() { + // To make sure the test runs in a reasonable time this is a slimmed down list of + // characters to reduce the groups that are only used with each other along with one + // arbitrarily chosen character not used in the regex (' ') + const POSSIBLE_BYTES: &[u8] = &[b' ', 0x1b, 0x9b, b'(', b'0', b'[', b';', b'3', b'C']; + + fn check_all_strings_of_len(len: usize) { + _check_all_strings_of_len(len, &mut Vec::with_capacity(len)); + } + + fn _check_all_strings_of_len(len: usize, chunk: &mut Vec<u8>) { + if len == 0 { + if let Ok(s) = std::str::from_utf8(chunk) { + let old_matches: Vec<_> = STRIP_ANSI_RE.find_iter(s).collect(); + let new_matches: Vec<_> = Matches::new(s).collect(); + assert_eq!(old_matches, new_matches); + } + + return; + } + + for b in POSSIBLE_BYTES { + chunk.push(*b); + _check_all_strings_of_len(len - 1, chunk); + chunk.pop(); + } + } + + for str_len in 0..=6 { + check_all_strings_of_len(str_len); + } + } + + #[test] + fn complex_data() { + let s = std::fs::read_to_string( + std::path::Path::new("tests") + .join("data") + .join("sample_zellij_session.log"), + ) + .unwrap(); + + let old_matches: Vec<_> = STRIP_ANSI_RE.find_iter(&s).collect(); + let new_matches: Vec<_> = Matches::new(&s).collect(); + assert_eq!(old_matches, new_matches); + } + + #[test] + fn state_machine() { + let ansi_code = "\x1b)B"; + let mut state = State::default(); + assert!(!state.is_final()); + + for c in ansi_code.chars() { + state.transition(c); + } + assert!(state.is_final()); + + state.transition('A'); + assert!(state.is_trapped()); + } + + #[test] + fn back_to_back_entry_char() { + let s = "\x1b\x1bf"; + let matches: Vec<_> = Matches::new(s).map(|m| m.as_str()).collect(); + assert_eq!(&["\x1bf"], matches.as_slice()); + } + + #[test] + fn early_paren_can_use_many_chars() { + let s = "\x1b(C"; + let matches: Vec<_> = Matches::new(s).map(|m| m.as_str()).collect(); + assert_eq!(&[s], matches.as_slice()); + } + + #[test] + fn long_run_of_digits() { + let s = "\u{1b}00000"; + let matches: Vec<_> = Matches::new(s).map(|m| m.as_str()).collect(); + assert_eq!(&[s], matches.as_slice()); + } + + #[test] + fn test_ansi_iter_re_vt100() { + let s = "\x1b(0lpq\x1b)Benglish"; + let mut iter = AnsiCodeIterator::new(s); + assert_eq!(iter.next(), Some(("\x1b(0", true))); + assert_eq!(iter.next(), Some(("lpq", false))); + assert_eq!(iter.next(), Some(("\x1b)B", true))); + assert_eq!(iter.next(), Some(("english", false))); + } + + #[test] + fn test_ansi_iter_re() { + use crate::style; + let s = format!("Hello {}!", style("World").red().force_styling(true)); + let mut iter = AnsiCodeIterator::new(&s); + assert_eq!(iter.next(), Some(("Hello ", false))); + assert_eq!(iter.current_slice(), "Hello "); + assert_eq!(iter.rest_slice(), "\x1b[31mWorld\x1b[0m!"); + assert_eq!(iter.next(), Some(("\x1b[31m", true))); + assert_eq!(iter.current_slice(), "Hello \x1b[31m"); + assert_eq!(iter.rest_slice(), "World\x1b[0m!"); + assert_eq!(iter.next(), Some(("World", false))); + assert_eq!(iter.current_slice(), "Hello \x1b[31mWorld"); + assert_eq!(iter.rest_slice(), "\x1b[0m!"); + assert_eq!(iter.next(), Some(("\x1b[0m", true))); + assert_eq!(iter.current_slice(), "Hello \x1b[31mWorld\x1b[0m"); + assert_eq!(iter.rest_slice(), "!"); + assert_eq!(iter.next(), Some(("!", false))); + assert_eq!(iter.current_slice(), "Hello \x1b[31mWorld\x1b[0m!"); + assert_eq!(iter.rest_slice(), ""); + assert_eq!(iter.next(), None); + } + + #[test] + fn test_ansi_iter_re_on_multi() { + use crate::style; + let s = format!("{}", style("a").red().bold().force_styling(true)); + let mut iter = AnsiCodeIterator::new(&s); + assert_eq!(iter.next(), Some(("\x1b[31m", true))); + assert_eq!(iter.current_slice(), "\x1b[31m"); + assert_eq!(iter.rest_slice(), "\x1b[1ma\x1b[0m"); + assert_eq!(iter.next(), Some(("\x1b[1m", true))); + assert_eq!(iter.current_slice(), "\x1b[31m\x1b[1m"); + assert_eq!(iter.rest_slice(), "a\x1b[0m"); + assert_eq!(iter.next(), Some(("a", false))); + assert_eq!(iter.current_slice(), "\x1b[31m\x1b[1ma"); + assert_eq!(iter.rest_slice(), "\x1b[0m"); + assert_eq!(iter.next(), Some(("\x1b[0m", true))); + assert_eq!(iter.current_slice(), "\x1b[31m\x1b[1ma\x1b[0m"); + assert_eq!(iter.rest_slice(), ""); + assert_eq!(iter.next(), None); + } +} |