From 1b6a04ca5504955c571d1c97504fb45ea0befee4 Mon Sep 17 00:00:00 2001
From: Valentin Popov <valentin@popov.link>
Date: Mon, 8 Jan 2024 01:21:28 +0400
Subject: Initial vendor packages

Signed-off-by: Valentin Popov <valentin@popov.link>
---
 vendor/encode_unicode/src/utf16_iterators.rs | 270 +++++++++++++++++++++++++++
 1 file changed, 270 insertions(+)
 create mode 100644 vendor/encode_unicode/src/utf16_iterators.rs

(limited to 'vendor/encode_unicode/src/utf16_iterators.rs')
diff --git a/vendor/encode_unicode/src/utf16_iterators.rs b/vendor/encode_unicode/src/utf16_iterators.rs
new file mode 100644
index 0000000..7adb5ac
--- /dev/null
+++ b/vendor/encode_unicode/src/utf16_iterators.rs
@@ -0,0 +1,270 @@
+/* Copyright 2016 The encode_unicode Developers
+ *
+ * Licensed under the Apache License, Version 2.0, <LICENSE-APACHE or
+ * http://apache.org/licenses/LICENSE-2.0> or the MIT license <LICENSE-MIT or
+ * http://opensource.org/licenses/MIT>, at your option. This file may not be
+ * copied, modified, or distributed except according to those terms.
+ */
+
+use traits::CharExt;
+use utf16_char::Utf16Char;
+use errors::EmptyStrError;
+extern crate core;
+use self::core::fmt;
+use self::core::borrow::Borrow;
+
+// Invalid values that says the field is consumed or empty.
+const FIRST_USED: u16 = 0x_dc_00;
+const SECOND_USED: u16 = 0;
+
+/// Iterate over the units of the UTF-16 representation of a codepoint.
+#[derive(Clone)]
+pub struct Utf16Iterator {
+    first: u16,
+    second: u16,
+}
+impl From<char> for Utf16Iterator {
+    fn from(c: char) -> Self {
+        let (first, second) = c.to_utf16_tuple();
+        Utf16Iterator{ first: first,  second: second.unwrap_or(SECOND_USED) }
+    }
+}
+impl From<Utf16Char> for Utf16Iterator {
+    fn from(uc: Utf16Char) -> Self {
+        let (first, second) = uc.to_tuple();
+        Utf16Iterator{ first: first,  second: second.unwrap_or(SECOND_USED) }
+    }
+}
+impl Iterator for Utf16Iterator {
+    type Item=u16;
+    fn next(&mut self) -> Option<u16> {
+        match (self.first, self.second) {
+            (FIRST_USED, SECOND_USED)  =>  {                            None        },
+            (FIRST_USED, second     )  =>  {self.second = SECOND_USED;  Some(second)},
+            (first     ,      _     )  =>  {self.first = FIRST_USED;    Some(first )},
+        }
+    }
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        (self.len(), Some(self.len()))
+    }
+}
+impl ExactSizeIterator for Utf16Iterator {
+    fn len(&self) -> usize {
+        (if self.first == FIRST_USED {0} else {1}) +
+        (if self.second == SECOND_USED {0} else {1})
+    }
+}
+impl fmt::Debug for Utf16Iterator {
+    fn fmt(&self,  fmtr: &mut fmt::Formatter) -> fmt::Result {
+        let mut clone = self.clone();
+        match (clone.next(), clone.next()) {
+            (Some(one), None)  => write!(fmtr, "[{}]", one),
+            (Some(a), Some(b)) => write!(fmtr, "[{}, {}]", a, b),
+            (None,  _)         => write!(fmtr, "[]"),
+        }
+    }
+}
+
+
+
+/// Converts an iterator of `Utf16Char` (or `&Utf16Char`)
+/// to an iterator of `u16`s.  
+/// Is equivalent to calling `.flat_map()` on the original iterator,
+/// but the returned iterator is about twice as fast.
+///
+/// The exact number of units cannot be known in advance, but `size_hint()`
+/// gives the possible range.
+///
+/// # Examples
+///
+/// From iterator of values:
+///
+/// ```
+/// use encode_unicode::{iter_units, CharExt};
+///
+/// let iterator = "foo".chars().map(|c| c.to_utf16() );
+/// let mut units = [0; 4];
+/// for (u,dst) in iter_units(iterator).zip(&mut units) {*dst=u;}
+/// assert_eq!(units, ['f' as u16, 'o' as u16, 'o' as u16, 0]);
+/// ```
+///
+/// From iterator of references:
+///
+#[cfg_attr(feature="std", doc=" ```")]
+#[cfg_attr(not(feature="std"), doc=" ```no_compile")]
+/// use encode_unicode::{iter_units, CharExt, Utf16Char};
+///
+/// // (💣 takes two units)
+/// let chars: Vec<Utf16Char> = "💣 bomb 💣".chars().map(|c| c.to_utf16() ).collect();
+/// let units: Vec<u16> = iter_units(&chars).collect();
+/// let flat_map: Vec<u16> = chars.iter().flat_map(|u16c| *u16c ).collect();
+/// assert_eq!(units, flat_map);
+/// ```
+pub fn iter_units<U:Borrow<Utf16Char>, I:IntoIterator<Item=U>>
+(iterable: I) -> Utf16CharSplitter<U, I::IntoIter> {
+    Utf16CharSplitter{ inner: iterable.into_iter(),  prev_second: 0 }
+}
+
+/// The iterator type returned by `iter_units()`
+#[derive(Clone)]
+pub struct Utf16CharSplitter<U:Borrow<Utf16Char>, I:Iterator<Item=U>> {
+    inner: I,
+    prev_second: u16,
+}
+impl<I:Iterator<Item=Utf16Char>> From<I> for Utf16CharSplitter<Utf16Char,I> {
+    /// A less generic constructor than `iter_units()`
+    fn from(iter: I) -> Self {
+        iter_units(iter)
+    }
+}
+impl<U:Borrow<Utf16Char>, I:Iterator<Item=U>> Utf16CharSplitter<U,I> {
+    /// Extracts the source iterator.
+    ///
+    /// Note that `iter_units(iter.into_inner())` is not a no-op:  
+    /// If the last returned unit from `next()` was a leading surrogate,
+    /// the trailing surrogate is lost.
+    pub fn into_inner(self) -> I {
+        self.inner
+    }
+}
+impl<U:Borrow<Utf16Char>, I:Iterator<Item=U>> Iterator for Utf16CharSplitter<U,I> {
+    type Item = u16;
+    fn next(&mut self) -> Option<Self::Item> {
+        if self.prev_second == 0 {
+            self.inner.next().map(|u16c| {
+                let units = u16c.borrow().to_array();
+                self.prev_second = units[1];
+                units[0]
+            })
+        } else {
+            let prev_second = self.prev_second;
+            self.prev_second = 0;
+            Some(prev_second)
+        }
+    }
+    fn size_hint(&self) -> (usize,Option<usize>) {
+        // Doesn't need to handle unlikely overflows correctly because
+        // size_hint() cannot be relied upon anyway. (the trait isn't unsafe)
+        let (min, max) = self.inner.size_hint();
+        let add = if self.prev_second == 0 {0} else {1};
+        (min.wrapping_add(add), max.map(|max| max.wrapping_mul(2).wrapping_add(add) ))
+    }
+}
+
+
+
+/// An iterator over the codepoints in a `str` represented as `Utf16Char`.
+#[derive(Clone)]
+pub struct Utf16CharIndices<'a>{
+    str: &'a str,
+    index: usize,
+}
+impl<'a> From<&'a str> for Utf16CharIndices<'a> {
+    fn from(s: &str) -> Utf16CharIndices {
+        Utf16CharIndices{str: s, index: 0}
+    }
+}
+impl<'a> Utf16CharIndices<'a> {
+    /// Extract the remainder of the source `str`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use encode_unicode::{StrExt, Utf16Char};
+    /// let mut iter = "abc".utf16char_indices();
+    /// assert_eq!(iter.next_back(), Some((2, Utf16Char::from('c'))));
+    /// assert_eq!(iter.next(), Some((0, Utf16Char::from('a'))));
+    /// assert_eq!(iter.as_str(), "b");
+    /// ```
+    pub fn as_str(&self) -> &'a str {
+        &self.str[self.index..]
+    }
+}
+impl<'a> Iterator for Utf16CharIndices<'a> {
+    type Item = (usize,Utf16Char);
+    fn next(&mut self) -> Option<(usize,Utf16Char)> {
+        match Utf16Char::from_str_start(&self.str[self.index..]) {
+            Ok((u16c, bytes)) => {
+                let item = (self.index, u16c);
+                self.index += bytes;
+                Some(item)
+            },
+            Err(EmptyStrError) => None
+        }
+    }
+    fn size_hint(&self) -> (usize,Option<usize>) {
+        let len = self.str.len() - self.index;
+        // For len+3 to overflow, the slice must fill all but two bytes of
+        // addressable memory, and size_hint() doesn't need to be correct.
+        (len.wrapping_add(3)/4, Some(len))
+    }
+}
+impl<'a> DoubleEndedIterator for Utf16CharIndices<'a> {
+    fn next_back(&mut self) -> Option<(usize,Utf16Char)> {
+        if self.index < self.str.len() {
+            let rev = self.str.bytes().rev();
+            let len = 1 + rev.take_while(|b| b & 0b1100_0000 == 0b1000_0000 ).count();
+            let starts = self.str.len() - len;
+            let (u16c,_) = Utf16Char::from_str_start(&self.str[starts..]).unwrap();
+            self.str = &self.str[..starts];
+            Some((starts, u16c))
+        } else {
+            None
+        }
+    }
+}
+impl<'a> fmt::Debug for Utf16CharIndices<'a> {
+    fn fmt(&self,  fmtr: &mut fmt::Formatter) -> fmt::Result {
+        fmtr.debug_tuple("Utf16CharIndices")
+            .field(&self.index)
+            .field(&self.as_str())
+            .finish()
+    }
+}
+
+
+/// An iterator over the codepoints in a `str` represented as `Utf16Char`.
+#[derive(Clone)]
+pub struct Utf16Chars<'a>(Utf16CharIndices<'a>);
+impl<'a> From<&'a str> for Utf16Chars<'a> {
+    fn from(s: &str) -> Utf16Chars {
+        Utf16Chars(Utf16CharIndices::from(s))
+    }
+}
+impl<'a> Utf16Chars<'a> {
+    /// Extract the remainder of the source `str`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use encode_unicode::{StrExt, Utf16Char};
+    /// let mut iter = "abc".utf16chars();
+    /// assert_eq!(iter.next(), Some(Utf16Char::from('a')));
+    /// assert_eq!(iter.next_back(), Some(Utf16Char::from('c')));
+    /// assert_eq!(iter.as_str(), "b");
+    /// ```
+    pub fn as_str(&self) -> &'a str {
+        self.0.as_str()
+    }
+}
+impl<'a> Iterator for Utf16Chars<'a> {
+    type Item = Utf16Char;
+    fn next(&mut self) -> Option<Utf16Char> {
+        self.0.next().map(|(_,u16c)| u16c )
+    }
+    fn size_hint(&self) -> (usize,Option<usize>) {
+        self.0.size_hint()
+    }
+}
+impl<'a> DoubleEndedIterator for Utf16Chars<'a> {
+    fn next_back(&mut self) -> Option<Utf16Char> {
+        self.0.next_back().map(|(_,u16c)| u16c )
+    }
+}
+impl<'a> fmt::Debug for Utf16Chars<'a> {
+    fn fmt(&self,  fmtr: &mut fmt::Formatter) -> fmt::Result {
+        fmtr.debug_tuple("Utf16Chars")
+            .field(&self.as_str())
+            .finish()
+    }
+}
-- 
cgit v1.2.3