diff options
author | Valentin Popov <valentin@popov.link> | 2024-01-08 00:21:28 +0300 |
---|---|---|
committer | Valentin Popov <valentin@popov.link> | 2024-01-08 00:21:28 +0300 |
commit | 1b6a04ca5504955c571d1c97504fb45ea0befee4 (patch) | |
tree | 7579f518b23313e8a9748a88ab6173d5e030b227 /vendor/anstyle-parse/src/lib.rs | |
parent | 5ecd8cf2cba827454317368b68571df0d13d7842 (diff) | |
download | fparkan-1b6a04ca5504955c571d1c97504fb45ea0befee4.tar.xz fparkan-1b6a04ca5504955c571d1c97504fb45ea0befee4.zip |
Initial vendor packages
Signed-off-by: Valentin Popov <valentin@popov.link>
Diffstat (limited to 'vendor/anstyle-parse/src/lib.rs')
-rw-r--r-- | vendor/anstyle-parse/src/lib.rs | 431 |
1 files changed, 431 insertions, 0 deletions
diff --git a/vendor/anstyle-parse/src/lib.rs b/vendor/anstyle-parse/src/lib.rs new file mode 100644 index 0000000..e3d04c9 --- /dev/null +++ b/vendor/anstyle-parse/src/lib.rs @@ -0,0 +1,431 @@ +//! Parser for implementing virtual terminal emulators +//! +//! [`Parser`] is implemented according to [Paul Williams' ANSI parser +//! state machine]. The state machine doesn't assign meaning to the parsed data +//! and is thus not itself sufficient for writing a terminal emulator. Instead, +//! it is expected that an implementation of [`Perform`] is provided which does +//! something useful with the parsed data. The [`Parser`] handles the book +//! keeping, and the [`Perform`] gets to simply handle actions. +//! +//! # Examples +//! +//! For an example of using the [`Parser`] please see the examples folder. The example included +//! there simply logs all the actions [`Perform`] does. One quick thing to see it in action is to +//! pipe `vim` into it +//! +//! ```sh +//! cargo build --release --example parselog +//! vim | target/release/examples/parselog +//! ``` +//! +//! Just type `:q` to exit. +//! +//! # Differences from original state machine description +//! +//! * UTF-8 Support for Input +//! * OSC Strings can be terminated by 0x07 +//! * Only supports 7-bit codes. Some 8-bit codes are still supported, but they no longer work in +//! all states. +//! +//! [Paul Williams' ANSI parser state machine]: https://vt100.net/emu/dec_ansi_parser +#![cfg_attr(not(test), no_std)] + +#[cfg(not(feature = "core"))] +extern crate alloc; + +use core::mem::MaybeUninit; + +#[cfg(feature = "core")] +use arrayvec::ArrayVec; +#[cfg(feature = "utf8")] +use utf8parse as utf8; + +mod params; +pub mod state; + +pub use params::{Params, ParamsIter}; + +use state::{state_change, Action, State}; + +const MAX_INTERMEDIATES: usize = 2; +const MAX_OSC_PARAMS: usize = 16; +#[cfg(feature = "core")] +const MAX_OSC_RAW: usize = 1024; + +/// Parser for raw _VTE_ protocol which delegates actions to a [`Perform`] +#[derive(Default, Clone, Debug, PartialEq, Eq)] +pub struct Parser<C = DefaultCharAccumulator> { + state: State, + intermediates: [u8; MAX_INTERMEDIATES], + intermediate_idx: usize, + params: Params, + param: u16, + #[cfg(feature = "core")] + osc_raw: ArrayVec<u8, MAX_OSC_RAW>, + #[cfg(not(feature = "core"))] + osc_raw: alloc::vec::Vec<u8>, + osc_params: [(usize, usize); MAX_OSC_PARAMS], + osc_num_params: usize, + ignoring: bool, + utf8_parser: C, +} + +impl<C> Parser<C> +where + C: CharAccumulator, +{ + /// Create a new Parser + pub fn new() -> Parser { + Parser::default() + } + + #[inline] + fn params(&self) -> &Params { + &self.params + } + + #[inline] + fn intermediates(&self) -> &[u8] { + &self.intermediates[..self.intermediate_idx] + } + + /// Advance the parser state + /// + /// Requires a [`Perform`] in case `byte` triggers an action + #[inline] + pub fn advance<P: Perform>(&mut self, performer: &mut P, byte: u8) { + // Utf8 characters are handled out-of-band. + if let State::Utf8 = self.state { + self.process_utf8(performer, byte); + return; + } + + let (state, action) = state_change(self.state, byte); + self.perform_state_change(performer, state, action, byte); + } + + #[inline] + fn process_utf8<P>(&mut self, performer: &mut P, byte: u8) + where + P: Perform, + { + if let Some(c) = self.utf8_parser.add(byte) { + performer.print(c); + self.state = State::Ground; + } + } + + #[inline] + fn perform_state_change<P>(&mut self, performer: &mut P, state: State, action: Action, byte: u8) + where + P: Perform, + { + match state { + State::Anywhere => { + // Just run the action + self.perform_action(performer, action, byte); + } + state => { + match self.state { + State::DcsPassthrough => { + self.perform_action(performer, Action::Unhook, byte); + } + State::OscString => { + self.perform_action(performer, Action::OscEnd, byte); + } + _ => (), + } + + match action { + Action::Nop => (), + action => { + self.perform_action(performer, action, byte); + } + } + + match state { + State::CsiEntry | State::DcsEntry | State::Escape => { + self.perform_action(performer, Action::Clear, byte); + } + State::DcsPassthrough => { + self.perform_action(performer, Action::Hook, byte); + } + State::OscString => { + self.perform_action(performer, Action::OscStart, byte); + } + _ => (), + } + + // Assume the new state + self.state = state; + } + } + } + + /// Separate method for osc_dispatch that borrows self as read-only + /// + /// The aliasing is needed here for multiple slices into self.osc_raw + #[inline] + fn osc_dispatch<P: Perform>(&self, performer: &mut P, byte: u8) { + let mut slices: [MaybeUninit<&[u8]>; MAX_OSC_PARAMS] = + unsafe { MaybeUninit::uninit().assume_init() }; + + for (i, slice) in slices.iter_mut().enumerate().take(self.osc_num_params) { + let indices = self.osc_params[i]; + *slice = MaybeUninit::new(&self.osc_raw[indices.0..indices.1]); + } + + unsafe { + let num_params = self.osc_num_params; + let params = &slices[..num_params] as *const [MaybeUninit<&[u8]>] as *const [&[u8]]; + performer.osc_dispatch(&*params, byte == 0x07); + } + } + + #[inline] + fn perform_action<P: Perform>(&mut self, performer: &mut P, action: Action, byte: u8) { + match action { + Action::Print => performer.print(byte as char), + Action::Execute => performer.execute(byte), + Action::Hook => { + if self.params.is_full() { + self.ignoring = true; + } else { + self.params.push(self.param); + } + + performer.hook(self.params(), self.intermediates(), self.ignoring, byte); + } + Action::Put => performer.put(byte), + Action::OscStart => { + self.osc_raw.clear(); + self.osc_num_params = 0; + } + Action::OscPut => { + #[cfg(feature = "core")] + { + if self.osc_raw.is_full() { + return; + } + } + + let idx = self.osc_raw.len(); + + // Param separator + if byte == b';' { + let param_idx = self.osc_num_params; + match param_idx { + // Only process up to MAX_OSC_PARAMS + MAX_OSC_PARAMS => return, + + // First param is special - 0 to current byte index + 0 => { + self.osc_params[param_idx] = (0, idx); + } + + // All other params depend on previous indexing + _ => { + let prev = self.osc_params[param_idx - 1]; + let begin = prev.1; + self.osc_params[param_idx] = (begin, idx); + } + } + + self.osc_num_params += 1; + } else { + self.osc_raw.push(byte); + } + } + Action::OscEnd => { + let param_idx = self.osc_num_params; + let idx = self.osc_raw.len(); + + match param_idx { + // Finish last parameter if not already maxed + MAX_OSC_PARAMS => (), + + // First param is special - 0 to current byte index + 0 => { + self.osc_params[param_idx] = (0, idx); + self.osc_num_params += 1; + } + + // All other params depend on previous indexing + _ => { + let prev = self.osc_params[param_idx - 1]; + let begin = prev.1; + self.osc_params[param_idx] = (begin, idx); + self.osc_num_params += 1; + } + } + self.osc_dispatch(performer, byte); + } + Action::Unhook => performer.unhook(), + Action::CsiDispatch => { + if self.params.is_full() { + self.ignoring = true; + } else { + self.params.push(self.param); + } + + performer.csi_dispatch(self.params(), self.intermediates(), self.ignoring, byte); + } + Action::EscDispatch => { + performer.esc_dispatch(self.intermediates(), self.ignoring, byte); + } + Action::Collect => { + if self.intermediate_idx == MAX_INTERMEDIATES { + self.ignoring = true; + } else { + self.intermediates[self.intermediate_idx] = byte; + self.intermediate_idx += 1; + } + } + Action::Param => { + if self.params.is_full() { + self.ignoring = true; + return; + } + + if byte == b';' { + self.params.push(self.param); + self.param = 0; + } else if byte == b':' { + self.params.extend(self.param); + self.param = 0; + } else { + // Continue collecting bytes into param + self.param = self.param.saturating_mul(10); + self.param = self.param.saturating_add((byte - b'0') as u16); + } + } + Action::Clear => { + // Reset everything on ESC/CSI/DCS entry + self.intermediate_idx = 0; + self.ignoring = false; + self.param = 0; + + self.params.clear(); + } + Action::BeginUtf8 => self.process_utf8(performer, byte), + Action::Ignore => (), + Action::Nop => (), + } + } +} + +/// Build a `char` out of bytes +pub trait CharAccumulator: Default { + /// Build a `char` out of bytes + /// + /// Return `None` when more data is needed + fn add(&mut self, byte: u8) -> Option<char>; +} + +#[cfg(feature = "utf8")] +pub type DefaultCharAccumulator = Utf8Parser; +#[cfg(not(feature = "utf8"))] +pub type DefaultCharAccumulator = AsciiParser; + +/// Only allow parsing 7-bit ASCII +#[derive(Default, Clone, Debug, PartialEq, Eq)] +pub struct AsciiParser; + +impl CharAccumulator for AsciiParser { + fn add(&mut self, _byte: u8) -> Option<char> { + unreachable!("multi-byte UTF8 characters are unsupported") + } +} + +/// Allow parsing UTF-8 +#[cfg(feature = "utf8")] +#[derive(Default, Clone, Debug, PartialEq, Eq)] +pub struct Utf8Parser { + utf8_parser: utf8::Parser, +} + +#[cfg(feature = "utf8")] +impl CharAccumulator for Utf8Parser { + fn add(&mut self, byte: u8) -> Option<char> { + let mut c = None; + let mut receiver = VtUtf8Receiver(&mut c); + self.utf8_parser.advance(&mut receiver, byte); + c + } +} + +#[cfg(feature = "utf8")] +struct VtUtf8Receiver<'a>(&'a mut Option<char>); + +#[cfg(feature = "utf8")] +impl<'a> utf8::Receiver for VtUtf8Receiver<'a> { + fn codepoint(&mut self, c: char) { + *self.0 = Some(c); + } + + fn invalid_sequence(&mut self) { + *self.0 = Some('�'); + } +} + +/// Performs actions requested by the [`Parser`] +/// +/// Actions in this case mean, for example, handling a CSI escape sequence describing cursor +/// movement, or simply printing characters to the screen. +/// +/// The methods on this type correspond to actions described in +/// <http://vt100.net/emu/dec_ansi_parser>. I've done my best to describe them in +/// a useful way in my own words for completeness, but the site should be +/// referenced if something isn't clear. If the site disappears at some point in +/// the future, consider checking archive.org. +pub trait Perform { + /// Draw a character to the screen and update states. + fn print(&mut self, _c: char) {} + + /// Execute a C0 or C1 control function. + fn execute(&mut self, _byte: u8) {} + + /// Invoked when a final character arrives in first part of device control string. + /// + /// The control function should be determined from the private marker, final character, and + /// execute with a parameter list. A handler should be selected for remaining characters in the + /// string; the handler function should subsequently be called by `put` for every character in + /// the control string. + /// + /// The `ignore` flag indicates that more than two intermediates arrived and + /// subsequent characters were ignored. + fn hook(&mut self, _params: &Params, _intermediates: &[u8], _ignore: bool, _action: u8) {} + + /// Pass bytes as part of a device control string to the handle chosen in `hook`. C0 controls + /// will also be passed to the handler. + fn put(&mut self, _byte: u8) {} + + /// Called when a device control string is terminated. + /// + /// The previously selected handler should be notified that the DCS has + /// terminated. + fn unhook(&mut self) {} + + /// Dispatch an operating system command. + fn osc_dispatch(&mut self, _params: &[&[u8]], _bell_terminated: bool) {} + + /// A final character has arrived for a CSI sequence + /// + /// The `ignore` flag indicates that either more than two intermediates arrived + /// or the number of parameters exceeded the maximum supported length, + /// and subsequent characters were ignored. + fn csi_dispatch( + &mut self, + _params: &Params, + _intermediates: &[u8], + _ignore: bool, + _action: u8, + ) { + } + + /// The final character of an escape sequence has arrived. + /// + /// The `ignore` flag indicates that more than two intermediates arrived and + /// subsequent characters were ignored. + fn esc_dispatch(&mut self, _intermediates: &[u8], _ignore: bool, _byte: u8) {} +} |