//! Parser for implementing virtual terminal emulators //! //! [`Parser`] is implemented according to [Paul Williams' ANSI parser //! state machine]. The state machine doesn't assign meaning to the parsed data //! and is thus not itself sufficient for writing a terminal emulator. Instead, //! it is expected that an implementation of [`Perform`] is provided which does //! something useful with the parsed data. The [`Parser`] handles the book //! keeping, and the [`Perform`] gets to simply handle actions. //! //! # Examples //! //! For an example of using the [`Parser`] please see the examples folder. The example included //! there simply logs all the actions [`Perform`] does. One quick thing to see it in action is to //! pipe `vim` into it //! //! ```sh //! cargo build --release --example parselog //! vim | target/release/examples/parselog //! ``` //! //! Just type `:q` to exit. //! //! # Differences from original state machine description //! //! * UTF-8 Support for Input //! * OSC Strings can be terminated by 0x07 //! * Only supports 7-bit codes. Some 8-bit codes are still supported, but they no longer work in //! all states. //! //! [Paul Williams' ANSI parser state machine]: https://vt100.net/emu/dec_ansi_parser #![cfg_attr(not(test), no_std)] #[cfg(not(feature = "core"))] extern crate alloc; use core::mem::MaybeUninit; #[cfg(feature = "core")] use arrayvec::ArrayVec; #[cfg(feature = "utf8")] use utf8parse as utf8; mod params; pub mod state; pub use params::{Params, ParamsIter}; use state::{state_change, Action, State}; const MAX_INTERMEDIATES: usize = 2; const MAX_OSC_PARAMS: usize = 16; #[cfg(feature = "core")] const MAX_OSC_RAW: usize = 1024; /// Parser for raw _VTE_ protocol which delegates actions to a [`Perform`] #[derive(Default, Clone, Debug, PartialEq, Eq)] pub struct Parser { state: State, intermediates: [u8; MAX_INTERMEDIATES], intermediate_idx: usize, params: Params, param: u16, #[cfg(feature = "core")] osc_raw: ArrayVec, #[cfg(not(feature = "core"))] osc_raw: alloc::vec::Vec, osc_params: [(usize, usize); MAX_OSC_PARAMS], osc_num_params: usize, ignoring: bool, utf8_parser: C, } impl Parser where C: CharAccumulator, { /// Create a new Parser pub fn new() -> Parser { Parser::default() } #[inline] fn params(&self) -> &Params { &self.params } #[inline] fn intermediates(&self) -> &[u8] { &self.intermediates[..self.intermediate_idx] } /// Advance the parser state /// /// Requires a [`Perform`] in case `byte` triggers an action #[inline] pub fn advance(&mut self, performer: &mut P, byte: u8) { // Utf8 characters are handled out-of-band. if let State::Utf8 = self.state { self.process_utf8(performer, byte); return; } let (state, action) = state_change(self.state, byte); self.perform_state_change(performer, state, action, byte); } #[inline] fn process_utf8

(&mut self, performer: &mut P, byte: u8) where P: Perform, { if let Some(c) = self.utf8_parser.add(byte) { performer.print(c); self.state = State::Ground; } } #[inline] fn perform_state_change

(&mut self, performer: &mut P, state: State, action: Action, byte: u8) where P: Perform, { match state { State::Anywhere => { // Just run the action self.perform_action(performer, action, byte); } state => { match self.state { State::DcsPassthrough => { self.perform_action(performer, Action::Unhook, byte); } State::OscString => { self.perform_action(performer, Action::OscEnd, byte); } _ => (), } match action { Action::Nop => (), action => { self.perform_action(performer, action, byte); } } match state { State::CsiEntry | State::DcsEntry | State::Escape => { self.perform_action(performer, Action::Clear, byte); } State::DcsPassthrough => { self.perform_action(performer, Action::Hook, byte); } State::OscString => { self.perform_action(performer, Action::OscStart, byte); } _ => (), } // Assume the new state self.state = state; } } } /// Separate method for osc_dispatch that borrows self as read-only /// /// The aliasing is needed here for multiple slices into self.osc_raw #[inline] fn osc_dispatch(&self, performer: &mut P, byte: u8) { let mut slices: [MaybeUninit<&[u8]>; MAX_OSC_PARAMS] = unsafe { MaybeUninit::uninit().assume_init() }; for (i, slice) in slices.iter_mut().enumerate().take(self.osc_num_params) { let indices = self.osc_params[i]; *slice = MaybeUninit::new(&self.osc_raw[indices.0..indices.1]); } unsafe { let num_params = self.osc_num_params; let params = &slices[..num_params] as *const [MaybeUninit<&[u8]>] as *const [&[u8]]; performer.osc_dispatch(&*params, byte == 0x07); } } #[inline] fn perform_action(&mut self, performer: &mut P, action: Action, byte: u8) { match action { Action::Print => performer.print(byte as char), Action::Execute => performer.execute(byte), Action::Hook => { if self.params.is_full() { self.ignoring = true; } else { self.params.push(self.param); } performer.hook(self.params(), self.intermediates(), self.ignoring, byte); } Action::Put => performer.put(byte), Action::OscStart => { self.osc_raw.clear(); self.osc_num_params = 0; } Action::OscPut => { #[cfg(feature = "core")] { if self.osc_raw.is_full() { return; } } let idx = self.osc_raw.len(); // Param separator if byte == b';' { let param_idx = self.osc_num_params; match param_idx { // Only process up to MAX_OSC_PARAMS MAX_OSC_PARAMS => return, // First param is special - 0 to current byte index 0 => { self.osc_params[param_idx] = (0, idx); } // All other params depend on previous indexing _ => { let prev = self.osc_params[param_idx - 1]; let begin = prev.1; self.osc_params[param_idx] = (begin, idx); } } self.osc_num_params += 1; } else { self.osc_raw.push(byte); } } Action::OscEnd => { let param_idx = self.osc_num_params; let idx = self.osc_raw.len(); match param_idx { // Finish last parameter if not already maxed MAX_OSC_PARAMS => (), // First param is special - 0 to current byte index 0 => { self.osc_params[param_idx] = (0, idx); self.osc_num_params += 1; } // All other params depend on previous indexing _ => { let prev = self.osc_params[param_idx - 1]; let begin = prev.1; self.osc_params[param_idx] = (begin, idx); self.osc_num_params += 1; } } self.osc_dispatch(performer, byte); } Action::Unhook => performer.unhook(), Action::CsiDispatch => { if self.params.is_full() { self.ignoring = true; } else { self.params.push(self.param); } performer.csi_dispatch(self.params(), self.intermediates(), self.ignoring, byte); } Action::EscDispatch => { performer.esc_dispatch(self.intermediates(), self.ignoring, byte); } Action::Collect => { if self.intermediate_idx == MAX_INTERMEDIATES { self.ignoring = true; } else { self.intermediates[self.intermediate_idx] = byte; self.intermediate_idx += 1; } } Action::Param => { if self.params.is_full() { self.ignoring = true; return; } if byte == b';' { self.params.push(self.param); self.param = 0; } else if byte == b':' { self.params.extend(self.param); self.param = 0; } else { // Continue collecting bytes into param self.param = self.param.saturating_mul(10); self.param = self.param.saturating_add((byte - b'0') as u16); } } Action::Clear => { // Reset everything on ESC/CSI/DCS entry self.intermediate_idx = 0; self.ignoring = false; self.param = 0; self.params.clear(); } Action::BeginUtf8 => self.process_utf8(performer, byte), Action::Ignore => (), Action::Nop => (), } } } /// Build a `char` out of bytes pub trait CharAccumulator: Default { /// Build a `char` out of bytes /// /// Return `None` when more data is needed fn add(&mut self, byte: u8) -> Option; } #[cfg(feature = "utf8")] pub type DefaultCharAccumulator = Utf8Parser; #[cfg(not(feature = "utf8"))] pub type DefaultCharAccumulator = AsciiParser; /// Only allow parsing 7-bit ASCII #[derive(Default, Clone, Debug, PartialEq, Eq)] pub struct AsciiParser; impl CharAccumulator for AsciiParser { fn add(&mut self, _byte: u8) -> Option { unreachable!("multi-byte UTF8 characters are unsupported") } } /// Allow parsing UTF-8 #[cfg(feature = "utf8")] #[derive(Default, Clone, Debug, PartialEq, Eq)] pub struct Utf8Parser { utf8_parser: utf8::Parser, } #[cfg(feature = "utf8")] impl CharAccumulator for Utf8Parser { fn add(&mut self, byte: u8) -> Option { let mut c = None; let mut receiver = VtUtf8Receiver(&mut c); self.utf8_parser.advance(&mut receiver, byte); c } } #[cfg(feature = "utf8")] struct VtUtf8Receiver<'a>(&'a mut Option); #[cfg(feature = "utf8")] impl<'a> utf8::Receiver for VtUtf8Receiver<'a> { fn codepoint(&mut self, c: char) { *self.0 = Some(c); } fn invalid_sequence(&mut self) { *self.0 = Some('�'); } } /// Performs actions requested by the [`Parser`] /// /// Actions in this case mean, for example, handling a CSI escape sequence describing cursor /// movement, or simply printing characters to the screen. /// /// The methods on this type correspond to actions described in /// . I've done my best to describe them in /// a useful way in my own words for completeness, but the site should be /// referenced if something isn't clear. If the site disappears at some point in /// the future, consider checking archive.org. pub trait Perform { /// Draw a character to the screen and update states. fn print(&mut self, _c: char) {} /// Execute a C0 or C1 control function. fn execute(&mut self, _byte: u8) {} /// Invoked when a final character arrives in first part of device control string. /// /// The control function should be determined from the private marker, final character, and /// execute with a parameter list. A handler should be selected for remaining characters in the /// string; the handler function should subsequently be called by `put` for every character in /// the control string. /// /// The `ignore` flag indicates that more than two intermediates arrived and /// subsequent characters were ignored. fn hook(&mut self, _params: &Params, _intermediates: &[u8], _ignore: bool, _action: u8) {} /// Pass bytes as part of a device control string to the handle chosen in `hook`. C0 controls /// will also be passed to the handler. fn put(&mut self, _byte: u8) {} /// Called when a device control string is terminated. /// /// The previously selected handler should be notified that the DCS has /// terminated. fn unhook(&mut self) {} /// Dispatch an operating system command. fn osc_dispatch(&mut self, _params: &[&[u8]], _bell_terminated: bool) {} /// A final character has arrived for a CSI sequence /// /// The `ignore` flag indicates that either more than two intermediates arrived /// or the number of parameters exceeded the maximum supported length, /// and subsequent characters were ignored. fn csi_dispatch( &mut self, _params: &Params, _intermediates: &[u8], _ignore: bool, _action: u8, ) { } /// The final character of an escape sequence has arrived. /// /// The `ignore` flag indicates that more than two intermediates arrived and /// subsequent characters were ignored. fn esc_dispatch(&mut self, _intermediates: &[u8], _ignore: bool, _byte: u8) {} }