diff options
| author | Valentin Popov <valentin@popov.link> | 2024-01-08 00:21:28 +0300 | 
|---|---|---|
| committer | Valentin Popov <valentin@popov.link> | 2024-01-08 00:21:28 +0300 | 
| commit | 1b6a04ca5504955c571d1c97504fb45ea0befee4 (patch) | |
| tree | 7579f518b23313e8a9748a88ab6173d5e030b227 /vendor/half/src | |
| parent | 5ecd8cf2cba827454317368b68571df0d13d7842 (diff) | |
| download | fparkan-1b6a04ca5504955c571d1c97504fb45ea0befee4.tar.xz fparkan-1b6a04ca5504955c571d1c97504fb45ea0befee4.zip | |
Initial vendor packages
Signed-off-by: Valentin Popov <valentin@popov.link>
Diffstat (limited to 'vendor/half/src')
| -rw-r--r-- | vendor/half/src/bfloat.rs | 1841 | ||||
| -rw-r--r-- | vendor/half/src/bfloat/convert.rs | 148 | ||||
| -rw-r--r-- | vendor/half/src/binary16.rs | 1912 | ||||
| -rw-r--r-- | vendor/half/src/binary16/convert.rs | 752 | ||||
| -rw-r--r-- | vendor/half/src/leading_zeros.rs | 62 | ||||
| -rw-r--r-- | vendor/half/src/lib.rs | 233 | ||||
| -rw-r--r-- | vendor/half/src/num_traits.rs | 1483 | ||||
| -rw-r--r-- | vendor/half/src/slice.rs | 854 | ||||
| -rw-r--r-- | vendor/half/src/vec.rs | 274 | 
9 files changed, 7559 insertions, 0 deletions
| diff --git a/vendor/half/src/bfloat.rs b/vendor/half/src/bfloat.rs new file mode 100644 index 0000000..8b23863 --- /dev/null +++ b/vendor/half/src/bfloat.rs @@ -0,0 +1,1841 @@ +#[cfg(feature = "bytemuck")] +use bytemuck::{Pod, Zeroable}; +use core::{ +    cmp::Ordering, +    iter::{Product, Sum}, +    num::FpCategory, +    ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Rem, RemAssign, Sub, SubAssign}, +}; +#[cfg(not(target_arch = "spirv"))] +use core::{ +    fmt::{ +        Binary, Debug, Display, Error, Formatter, LowerExp, LowerHex, Octal, UpperExp, UpperHex, +    }, +    num::ParseFloatError, +    str::FromStr, +}; +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; +#[cfg(feature = "zerocopy")] +use zerocopy::{AsBytes, FromBytes}; + +pub(crate) mod convert; + +/// A 16-bit floating point type implementing the [`bfloat16`] format. +/// +/// The [`bfloat16`] floating point format is a truncated 16-bit version of the IEEE 754 standard +/// `binary32`, a.k.a [`f32`]. [`bf16`] has approximately the same dynamic range as [`f32`] by +/// having a lower precision than [`f16`][crate::f16]. While [`f16`][crate::f16] has a precision of +/// 11 bits, [`bf16`] has a precision of only 8 bits. +/// +/// Like [`f16`][crate::f16], [`bf16`] does not offer arithmetic operations as it is intended for +/// compact storage rather than calculations. Operations should be performed with [`f32`] or +/// higher-precision types and converted to/from [`bf16`] as necessary. +/// +/// [`bfloat16`]: https://en.wikipedia.org/wiki/Bfloat16_floating-point_format +#[allow(non_camel_case_types)] +#[derive(Clone, Copy, Default)] +#[repr(transparent)] +#[cfg_attr(feature = "serde", derive(Serialize))] +#[cfg_attr(feature = "bytemuck", derive(Zeroable, Pod))] +#[cfg_attr(feature = "zerocopy", derive(AsBytes, FromBytes))] +pub struct bf16(u16); + +impl bf16 { +    /// Constructs a [`bf16`] value from the raw bits. +    #[inline] +    #[must_use] +    pub const fn from_bits(bits: u16) -> bf16 { +        bf16(bits) +    } + +    /// Constructs a [`bf16`] value from a 32-bit floating point value. +    /// +    /// If the 32-bit value is too large to fit, ±∞ will result. NaN values are preserved. +    /// Subnormal values that are too tiny to be represented will result in ±0. All other values +    /// are truncated and rounded to the nearest representable value. +    #[inline] +    #[must_use] +    pub fn from_f32(value: f32) -> bf16 { +        Self::from_f32_const(value) +    } + +    /// Constructs a [`bf16`] value from a 32-bit floating point value. +    /// +    /// This function is identical to [`from_f32`][Self::from_f32] except it never uses hardware +    /// intrinsics, which allows it to be `const`. [`from_f32`][Self::from_f32] should be preferred +    /// in any non-`const` context. +    /// +    /// If the 32-bit value is too large to fit, ±∞ will result. NaN values are preserved. +    /// Subnormal values that are too tiny to be represented will result in ±0. All other values +    /// are truncated and rounded to the nearest representable value. +    #[inline] +    #[must_use] +    pub const fn from_f32_const(value: f32) -> bf16 { +        bf16(convert::f32_to_bf16(value)) +    } + +    /// Constructs a [`bf16`] value from a 64-bit floating point value. +    /// +    /// If the 64-bit value is to large to fit, ±∞ will result. NaN values are preserved. +    /// 64-bit subnormal values are too tiny to be represented and result in ±0. Exponents that +    /// underflow the minimum exponent will result in subnormals or ±0. All other values are +    /// truncated and rounded to the nearest representable value. +    #[inline] +    #[must_use] +    pub fn from_f64(value: f64) -> bf16 { +        Self::from_f64_const(value) +    } + +    /// Constructs a [`bf16`] value from a 64-bit floating point value. +    /// +    /// This function is identical to [`from_f64`][Self::from_f64] except it never uses hardware +    /// intrinsics, which allows it to be `const`. [`from_f64`][Self::from_f64] should be preferred +    /// in any non-`const` context. +    /// +    /// If the 64-bit value is to large to fit, ±∞ will result. NaN values are preserved. +    /// 64-bit subnormal values are too tiny to be represented and result in ±0. Exponents that +    /// underflow the minimum exponent will result in subnormals or ±0. All other values are +    /// truncated and rounded to the nearest representable value. +    #[inline] +    #[must_use] +    pub const fn from_f64_const(value: f64) -> bf16 { +        bf16(convert::f64_to_bf16(value)) +    } + +    /// Converts a [`bf16`] into the underlying bit representation. +    #[inline] +    #[must_use] +    pub const fn to_bits(self) -> u16 { +        self.0 +    } + +    /// Returns the memory representation of the underlying bit representation as a byte array in +    /// little-endian byte order. +    /// +    /// # Examples +    /// +    /// ```rust +    /// # use half::prelude::*; +    /// let bytes = bf16::from_f32(12.5).to_le_bytes(); +    /// assert_eq!(bytes, [0x48, 0x41]); +    /// ``` +    #[inline] +    #[must_use] +    pub const fn to_le_bytes(self) -> [u8; 2] { +        self.0.to_le_bytes() +    } + +    /// Returns the memory representation of the underlying bit representation as a byte array in +    /// big-endian (network) byte order. +    /// +    /// # Examples +    /// +    /// ```rust +    /// # use half::prelude::*; +    /// let bytes = bf16::from_f32(12.5).to_be_bytes(); +    /// assert_eq!(bytes, [0x41, 0x48]); +    /// ``` +    #[inline] +    #[must_use] +    pub const fn to_be_bytes(self) -> [u8; 2] { +        self.0.to_be_bytes() +    } + +    /// Returns the memory representation of the underlying bit representation as a byte array in +    /// native byte order. +    /// +    /// As the target platform's native endianness is used, portable code should use +    /// [`to_be_bytes`][bf16::to_be_bytes] or [`to_le_bytes`][bf16::to_le_bytes], as appropriate, +    /// instead. +    /// +    /// # Examples +    /// +    /// ```rust +    /// # use half::prelude::*; +    /// let bytes = bf16::from_f32(12.5).to_ne_bytes(); +    /// assert_eq!(bytes, if cfg!(target_endian = "big") { +    ///     [0x41, 0x48] +    /// } else { +    ///     [0x48, 0x41] +    /// }); +    /// ``` +    #[inline] +    #[must_use] +    pub const fn to_ne_bytes(self) -> [u8; 2] { +        self.0.to_ne_bytes() +    } + +    /// Creates a floating point value from its representation as a byte array in little endian. +    /// +    /// # Examples +    /// +    /// ```rust +    /// # use half::prelude::*; +    /// let value = bf16::from_le_bytes([0x48, 0x41]); +    /// assert_eq!(value, bf16::from_f32(12.5)); +    /// ``` +    #[inline] +    #[must_use] +    pub const fn from_le_bytes(bytes: [u8; 2]) -> bf16 { +        bf16::from_bits(u16::from_le_bytes(bytes)) +    } + +    /// Creates a floating point value from its representation as a byte array in big endian. +    /// +    /// # Examples +    /// +    /// ```rust +    /// # use half::prelude::*; +    /// let value = bf16::from_be_bytes([0x41, 0x48]); +    /// assert_eq!(value, bf16::from_f32(12.5)); +    /// ``` +    #[inline] +    #[must_use] +    pub const fn from_be_bytes(bytes: [u8; 2]) -> bf16 { +        bf16::from_bits(u16::from_be_bytes(bytes)) +    } + +    /// Creates a floating point value from its representation as a byte array in native endian. +    /// +    /// As the target platform's native endianness is used, portable code likely wants to use +    /// [`from_be_bytes`][bf16::from_be_bytes] or [`from_le_bytes`][bf16::from_le_bytes], as +    /// appropriate instead. +    /// +    /// # Examples +    /// +    /// ```rust +    /// # use half::prelude::*; +    /// let value = bf16::from_ne_bytes(if cfg!(target_endian = "big") { +    ///     [0x41, 0x48] +    /// } else { +    ///     [0x48, 0x41] +    /// }); +    /// assert_eq!(value, bf16::from_f32(12.5)); +    /// ``` +    #[inline] +    #[must_use] +    pub const fn from_ne_bytes(bytes: [u8; 2]) -> bf16 { +        bf16::from_bits(u16::from_ne_bytes(bytes)) +    } + +    /// Converts a [`bf16`] value into an [`f32`] value. +    /// +    /// This conversion is lossless as all values can be represented exactly in [`f32`]. +    #[inline] +    #[must_use] +    pub fn to_f32(self) -> f32 { +        self.to_f32_const() +    } + +    /// Converts a [`bf16`] value into an [`f32`] value. +    /// +    /// This function is identical to [`to_f32`][Self::to_f32] except it never uses hardware +    /// intrinsics, which allows it to be `const`. [`to_f32`][Self::to_f32] should be preferred +    /// in any non-`const` context. +    /// +    /// This conversion is lossless as all values can be represented exactly in [`f32`]. +    #[inline] +    #[must_use] +    pub const fn to_f32_const(self) -> f32 { +        convert::bf16_to_f32(self.0) +    } + +    /// Converts a [`bf16`] value into an [`f64`] value. +    /// +    /// This conversion is lossless as all values can be represented exactly in [`f64`]. +    #[inline] +    #[must_use] +    pub fn to_f64(self) -> f64 { +        self.to_f64_const() +    } + +    /// Converts a [`bf16`] value into an [`f64`] value. +    /// +    /// This function is identical to [`to_f64`][Self::to_f64] except it never uses hardware +    /// intrinsics, which allows it to be `const`. [`to_f64`][Self::to_f64] should be preferred +    /// in any non-`const` context. +    /// +    /// This conversion is lossless as all values can be represented exactly in [`f64`]. +    #[inline] +    #[must_use] +    pub const fn to_f64_const(self) -> f64 { +        convert::bf16_to_f64(self.0) +    } + +    /// Returns `true` if this value is NaN and `false` otherwise. +    /// +    /// # Examples +    /// +    /// ```rust +    /// # use half::prelude::*; +    /// +    /// let nan = bf16::NAN; +    /// let f = bf16::from_f32(7.0_f32); +    /// +    /// assert!(nan.is_nan()); +    /// assert!(!f.is_nan()); +    /// ``` +    #[inline] +    #[must_use] +    pub const fn is_nan(self) -> bool { +        self.0 & 0x7FFFu16 > 0x7F80u16 +    } + +    /// Returns `true` if this value is ±∞ and `false` otherwise. +    /// +    /// # Examples +    /// +    /// ```rust +    /// # use half::prelude::*; +    /// +    /// let f = bf16::from_f32(7.0f32); +    /// let inf = bf16::INFINITY; +    /// let neg_inf = bf16::NEG_INFINITY; +    /// let nan = bf16::NAN; +    /// +    /// assert!(!f.is_infinite()); +    /// assert!(!nan.is_infinite()); +    /// +    /// assert!(inf.is_infinite()); +    /// assert!(neg_inf.is_infinite()); +    /// ``` +    #[inline] +    #[must_use] +    pub const fn is_infinite(self) -> bool { +        self.0 & 0x7FFFu16 == 0x7F80u16 +    } + +    /// Returns `true` if this number is neither infinite nor NaN. +    /// +    /// # Examples +    /// +    /// ```rust +    /// # use half::prelude::*; +    /// +    /// let f = bf16::from_f32(7.0f32); +    /// let inf = bf16::INFINITY; +    /// let neg_inf = bf16::NEG_INFINITY; +    /// let nan = bf16::NAN; +    /// +    /// assert!(f.is_finite()); +    /// +    /// assert!(!nan.is_finite()); +    /// assert!(!inf.is_finite()); +    /// assert!(!neg_inf.is_finite()); +    /// ``` +    #[inline] +    #[must_use] +    pub const fn is_finite(self) -> bool { +        self.0 & 0x7F80u16 != 0x7F80u16 +    } + +    /// Returns `true` if the number is neither zero, infinite, subnormal, or NaN. +    /// +    /// # Examples +    /// +    /// ```rust +    /// # use half::prelude::*; +    /// +    /// let min = bf16::MIN_POSITIVE; +    /// let max = bf16::MAX; +    /// let lower_than_min = bf16::from_f32(1.0e-39_f32); +    /// let zero = bf16::from_f32(0.0_f32); +    /// +    /// assert!(min.is_normal()); +    /// assert!(max.is_normal()); +    /// +    /// assert!(!zero.is_normal()); +    /// assert!(!bf16::NAN.is_normal()); +    /// assert!(!bf16::INFINITY.is_normal()); +    /// // Values between 0 and `min` are subnormal. +    /// assert!(!lower_than_min.is_normal()); +    /// ``` +    #[inline] +    #[must_use] +    pub const fn is_normal(self) -> bool { +        let exp = self.0 & 0x7F80u16; +        exp != 0x7F80u16 && exp != 0 +    } + +    /// Returns the floating point category of the number. +    /// +    /// If only one property is going to be tested, it is generally faster to use the specific +    /// predicate instead. +    /// +    /// # Examples +    /// +    /// ```rust +    /// use std::num::FpCategory; +    /// # use half::prelude::*; +    /// +    /// let num = bf16::from_f32(12.4_f32); +    /// let inf = bf16::INFINITY; +    /// +    /// assert_eq!(num.classify(), FpCategory::Normal); +    /// assert_eq!(inf.classify(), FpCategory::Infinite); +    /// ``` +    #[must_use] +    pub const fn classify(self) -> FpCategory { +        let exp = self.0 & 0x7F80u16; +        let man = self.0 & 0x007Fu16; +        match (exp, man) { +            (0, 0) => FpCategory::Zero, +            (0, _) => FpCategory::Subnormal, +            (0x7F80u16, 0) => FpCategory::Infinite, +            (0x7F80u16, _) => FpCategory::Nan, +            _ => FpCategory::Normal, +        } +    } + +    /// Returns a number that represents the sign of `self`. +    /// +    /// * 1.0 if the number is positive, +0.0 or [`INFINITY`][bf16::INFINITY] +    /// * −1.0 if the number is negative, −0.0` or [`NEG_INFINITY`][bf16::NEG_INFINITY] +    /// * [`NAN`][bf16::NAN] if the number is NaN +    /// +    /// # Examples +    /// +    /// ```rust +    /// # use half::prelude::*; +    /// +    /// let f = bf16::from_f32(3.5_f32); +    /// +    /// assert_eq!(f.signum(), bf16::from_f32(1.0)); +    /// assert_eq!(bf16::NEG_INFINITY.signum(), bf16::from_f32(-1.0)); +    /// +    /// assert!(bf16::NAN.signum().is_nan()); +    /// ``` +    #[must_use] +    pub const fn signum(self) -> bf16 { +        if self.is_nan() { +            self +        } else if self.0 & 0x8000u16 != 0 { +            Self::NEG_ONE +        } else { +            Self::ONE +        } +    } + +    /// Returns `true` if and only if `self` has a positive sign, including +0.0, NaNs with a +    /// positive sign bit and +∞. +    /// +    /// # Examples +    /// +    /// ```rust +    /// # use half::prelude::*; +    /// +    /// let nan = bf16::NAN; +    /// let f = bf16::from_f32(7.0_f32); +    /// let g = bf16::from_f32(-7.0_f32); +    /// +    /// assert!(f.is_sign_positive()); +    /// assert!(!g.is_sign_positive()); +    /// // NaN can be either positive or negative +    /// assert!(nan.is_sign_positive() != nan.is_sign_negative()); +    /// ``` +    #[inline] +    #[must_use] +    pub const fn is_sign_positive(self) -> bool { +        self.0 & 0x8000u16 == 0 +    } + +    /// Returns `true` if and only if `self` has a negative sign, including −0.0, NaNs with a +    /// negative sign bit and −∞. +    /// +    /// # Examples +    /// +    /// ```rust +    /// # use half::prelude::*; +    /// +    /// let nan = bf16::NAN; +    /// let f = bf16::from_f32(7.0f32); +    /// let g = bf16::from_f32(-7.0f32); +    /// +    /// assert!(!f.is_sign_negative()); +    /// assert!(g.is_sign_negative()); +    /// // NaN can be either positive or negative +    /// assert!(nan.is_sign_positive() != nan.is_sign_negative()); +    /// ``` +    #[inline] +    #[must_use] +    pub const fn is_sign_negative(self) -> bool { +        self.0 & 0x8000u16 != 0 +    } + +    /// Returns a number composed of the magnitude of `self` and the sign of `sign`. +    /// +    /// Equal to `self` if the sign of `self` and `sign` are the same, otherwise equal to `-self`. +    /// If `self` is NaN, then NaN with the sign of `sign` is returned. +    /// +    /// # Examples +    /// +    /// ``` +    /// # use half::prelude::*; +    /// let f = bf16::from_f32(3.5); +    /// +    /// assert_eq!(f.copysign(bf16::from_f32(0.42)), bf16::from_f32(3.5)); +    /// assert_eq!(f.copysign(bf16::from_f32(-0.42)), bf16::from_f32(-3.5)); +    /// assert_eq!((-f).copysign(bf16::from_f32(0.42)), bf16::from_f32(3.5)); +    /// assert_eq!((-f).copysign(bf16::from_f32(-0.42)), bf16::from_f32(-3.5)); +    /// +    /// assert!(bf16::NAN.copysign(bf16::from_f32(1.0)).is_nan()); +    /// ``` +    #[inline] +    #[must_use] +    pub const fn copysign(self, sign: bf16) -> bf16 { +        bf16((sign.0 & 0x8000u16) | (self.0 & 0x7FFFu16)) +    } + +    /// Returns the maximum of the two numbers. +    /// +    /// If one of the arguments is NaN, then the other argument is returned. +    /// +    /// # Examples +    /// +    /// ``` +    /// # use half::prelude::*; +    /// let x = bf16::from_f32(1.0); +    /// let y = bf16::from_f32(2.0); +    /// +    /// assert_eq!(x.max(y), y); +    /// ``` +    #[inline] +    #[must_use] +    pub fn max(self, other: bf16) -> bf16 { +        if other > self && !other.is_nan() { +            other +        } else { +            self +        } +    } + +    /// Returns the minimum of the two numbers. +    /// +    /// If one of the arguments is NaN, then the other argument is returned. +    /// +    /// # Examples +    /// +    /// ``` +    /// # use half::prelude::*; +    /// let x = bf16::from_f32(1.0); +    /// let y = bf16::from_f32(2.0); +    /// +    /// assert_eq!(x.min(y), x); +    /// ``` +    #[inline] +    #[must_use] +    pub fn min(self, other: bf16) -> bf16 { +        if other < self && !other.is_nan() { +            other +        } else { +            self +        } +    } + +    /// Restrict a value to a certain interval unless it is NaN. +    /// +    /// Returns `max` if `self` is greater than `max`, and `min` if `self` is less than `min`. +    /// Otherwise this returns `self`. +    /// +    /// Note that this function returns NaN if the initial value was NaN as well. +    /// +    /// # Panics +    /// Panics if `min > max`, `min` is NaN, or `max` is NaN. +    /// +    /// # Examples +    /// +    /// ``` +    /// # use half::prelude::*; +    /// assert!(bf16::from_f32(-3.0).clamp(bf16::from_f32(-2.0), bf16::from_f32(1.0)) == bf16::from_f32(-2.0)); +    /// assert!(bf16::from_f32(0.0).clamp(bf16::from_f32(-2.0), bf16::from_f32(1.0)) == bf16::from_f32(0.0)); +    /// assert!(bf16::from_f32(2.0).clamp(bf16::from_f32(-2.0), bf16::from_f32(1.0)) == bf16::from_f32(1.0)); +    /// assert!(bf16::NAN.clamp(bf16::from_f32(-2.0), bf16::from_f32(1.0)).is_nan()); +    /// ``` +    #[inline] +    #[must_use] +    pub fn clamp(self, min: bf16, max: bf16) -> bf16 { +        assert!(min <= max); +        let mut x = self; +        if x < min { +            x = min; +        } +        if x > max { +            x = max; +        } +        x +    } + +    /// Returns the ordering between `self` and `other`. +    /// +    /// Unlike the standard partial comparison between floating point numbers, +    /// this comparison always produces an ordering in accordance to +    /// the `totalOrder` predicate as defined in the IEEE 754 (2008 revision) +    /// floating point standard. The values are ordered in the following sequence: +    /// +    /// - negative quiet NaN +    /// - negative signaling NaN +    /// - negative infinity +    /// - negative numbers +    /// - negative subnormal numbers +    /// - negative zero +    /// - positive zero +    /// - positive subnormal numbers +    /// - positive numbers +    /// - positive infinity +    /// - positive signaling NaN +    /// - positive quiet NaN. +    /// +    /// The ordering established by this function does not always agree with the +    /// [`PartialOrd`] and [`PartialEq`] implementations of `bf16`. For example, +    /// they consider negative and positive zero equal, while `total_cmp` +    /// doesn't. +    /// +    /// The interpretation of the signaling NaN bit follows the definition in +    /// the IEEE 754 standard, which may not match the interpretation by some of +    /// the older, non-conformant (e.g. MIPS) hardware implementations. +    /// +    /// # Examples +    /// ``` +    /// # use half::bf16; +    /// let mut v: Vec<bf16> = vec![]; +    /// v.push(bf16::ONE); +    /// v.push(bf16::INFINITY); +    /// v.push(bf16::NEG_INFINITY); +    /// v.push(bf16::NAN); +    /// v.push(bf16::MAX_SUBNORMAL); +    /// v.push(-bf16::MAX_SUBNORMAL); +    /// v.push(bf16::ZERO); +    /// v.push(bf16::NEG_ZERO); +    /// v.push(bf16::NEG_ONE); +    /// v.push(bf16::MIN_POSITIVE); +    /// +    /// v.sort_by(|a, b| a.total_cmp(&b)); +    /// +    /// assert!(v +    ///     .into_iter() +    ///     .zip( +    ///         [ +    ///             bf16::NEG_INFINITY, +    ///             bf16::NEG_ONE, +    ///             -bf16::MAX_SUBNORMAL, +    ///             bf16::NEG_ZERO, +    ///             bf16::ZERO, +    ///             bf16::MAX_SUBNORMAL, +    ///             bf16::MIN_POSITIVE, +    ///             bf16::ONE, +    ///             bf16::INFINITY, +    ///             bf16::NAN +    ///         ] +    ///         .iter() +    ///     ) +    ///     .all(|(a, b)| a.to_bits() == b.to_bits())); +    /// ``` +    // Implementation based on: https://doc.rust-lang.org/std/primitive.f32.html#method.total_cmp +    #[inline] +    #[must_use] +    pub fn total_cmp(&self, other: &Self) -> Ordering { +        let mut left = self.to_bits() as i16; +        let mut right = other.to_bits() as i16; +        left ^= (((left >> 15) as u16) >> 1) as i16; +        right ^= (((right >> 15) as u16) >> 1) as i16; +        left.cmp(&right) +    } + +    /// Alternate serialize adapter for serializing as a float. +    /// +    /// By default, [`bf16`] serializes as a newtype of [`u16`]. This is an alternate serialize +    /// implementation that serializes as an [`f32`] value. It is designed for use with +    /// `serialize_with` serde attributes. Deserialization from `f32` values is already supported by +    /// the default deserialize implementation. +    /// +    /// # Examples +    /// +    /// A demonstration on how to use this adapater: +    /// +    /// ``` +    /// use serde::{Serialize, Deserialize}; +    /// use half::bf16; +    /// +    /// #[derive(Serialize, Deserialize)] +    /// struct MyStruct { +    ///     #[serde(serialize_with = "bf16::serialize_as_f32")] +    ///     value: bf16 // Will be serialized as f32 instead of u16 +    /// } +    /// ``` +    #[cfg(feature = "serde")] +    pub fn serialize_as_f32<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> { +        serializer.serialize_f32(self.to_f32()) +    } + +    /// Alternate serialize adapter for serializing as a string. +    /// +    /// By default, [`bf16`] serializes as a newtype of [`u16`]. This is an alternate serialize +    /// implementation that serializes as a string value. It is designed for use with +    /// `serialize_with` serde attributes. Deserialization from string values is already supported +    /// by the default deserialize implementation. +    /// +    /// # Examples +    /// +    /// A demonstration on how to use this adapater: +    /// +    /// ``` +    /// use serde::{Serialize, Deserialize}; +    /// use half::bf16; +    /// +    /// #[derive(Serialize, Deserialize)] +    /// struct MyStruct { +    ///     #[serde(serialize_with = "bf16::serialize_as_string")] +    ///     value: bf16 // Will be serialized as a string instead of u16 +    /// } +    /// ``` +    #[cfg(feature = "serde")] +    pub fn serialize_as_string<S: serde::Serializer>( +        &self, +        serializer: S, +    ) -> Result<S::Ok, S::Error> { +        serializer.serialize_str(&self.to_string()) +    } + +    /// Approximate number of [`bf16`] significant digits in base 10 +    pub const DIGITS: u32 = 2; +    /// [`bf16`] +    /// [machine epsilon](https://en.wikipedia.org/wiki/Machine_epsilon) value +    /// +    /// This is the difference between 1.0 and the next largest representable number. +    pub const EPSILON: bf16 = bf16(0x3C00u16); +    /// [`bf16`] positive Infinity (+∞) +    pub const INFINITY: bf16 = bf16(0x7F80u16); +    /// Number of [`bf16`] significant digits in base 2 +    pub const MANTISSA_DIGITS: u32 = 8; +    /// Largest finite [`bf16`] value +    pub const MAX: bf16 = bf16(0x7F7F); +    /// Maximum possible [`bf16`] power of 10 exponent +    pub const MAX_10_EXP: i32 = 38; +    /// Maximum possible [`bf16`] power of 2 exponent +    pub const MAX_EXP: i32 = 128; +    /// Smallest finite [`bf16`] value +    pub const MIN: bf16 = bf16(0xFF7F); +    /// Minimum possible normal [`bf16`] power of 10 exponent +    pub const MIN_10_EXP: i32 = -37; +    /// One greater than the minimum possible normal [`bf16`] power of 2 exponent +    pub const MIN_EXP: i32 = -125; +    /// Smallest positive normal [`bf16`] value +    pub const MIN_POSITIVE: bf16 = bf16(0x0080u16); +    /// [`bf16`] Not a Number (NaN) +    pub const NAN: bf16 = bf16(0x7FC0u16); +    /// [`bf16`] negative infinity (-∞). +    pub const NEG_INFINITY: bf16 = bf16(0xFF80u16); +    /// The radix or base of the internal representation of [`bf16`] +    pub const RADIX: u32 = 2; + +    /// Minimum positive subnormal [`bf16`] value +    pub const MIN_POSITIVE_SUBNORMAL: bf16 = bf16(0x0001u16); +    /// Maximum subnormal [`bf16`] value +    pub const MAX_SUBNORMAL: bf16 = bf16(0x007Fu16); + +    /// [`bf16`] 1 +    pub const ONE: bf16 = bf16(0x3F80u16); +    /// [`bf16`] 0 +    pub const ZERO: bf16 = bf16(0x0000u16); +    /// [`bf16`] -0 +    pub const NEG_ZERO: bf16 = bf16(0x8000u16); +    /// [`bf16`] -1 +    pub const NEG_ONE: bf16 = bf16(0xBF80u16); + +    /// [`bf16`] Euler's number (ℯ) +    pub const E: bf16 = bf16(0x402Eu16); +    /// [`bf16`] Archimedes' constant (π) +    pub const PI: bf16 = bf16(0x4049u16); +    /// [`bf16`] 1/π +    pub const FRAC_1_PI: bf16 = bf16(0x3EA3u16); +    /// [`bf16`] 1/√2 +    pub const FRAC_1_SQRT_2: bf16 = bf16(0x3F35u16); +    /// [`bf16`] 2/π +    pub const FRAC_2_PI: bf16 = bf16(0x3F23u16); +    /// [`bf16`] 2/√π +    pub const FRAC_2_SQRT_PI: bf16 = bf16(0x3F90u16); +    /// [`bf16`] π/2 +    pub const FRAC_PI_2: bf16 = bf16(0x3FC9u16); +    /// [`bf16`] π/3 +    pub const FRAC_PI_3: bf16 = bf16(0x3F86u16); +    /// [`bf16`] π/4 +    pub const FRAC_PI_4: bf16 = bf16(0x3F49u16); +    /// [`bf16`] π/6 +    pub const FRAC_PI_6: bf16 = bf16(0x3F06u16); +    /// [`bf16`] π/8 +    pub const FRAC_PI_8: bf16 = bf16(0x3EC9u16); +    /// [`bf16`] 𝗅𝗇 10 +    pub const LN_10: bf16 = bf16(0x4013u16); +    /// [`bf16`] 𝗅𝗇 2 +    pub const LN_2: bf16 = bf16(0x3F31u16); +    /// [`bf16`] 𝗅𝗈𝗀₁₀ℯ +    pub const LOG10_E: bf16 = bf16(0x3EDEu16); +    /// [`bf16`] 𝗅𝗈𝗀₁₀2 +    pub const LOG10_2: bf16 = bf16(0x3E9Au16); +    /// [`bf16`] 𝗅𝗈𝗀₂ℯ +    pub const LOG2_E: bf16 = bf16(0x3FB9u16); +    /// [`bf16`] 𝗅𝗈𝗀₂10 +    pub const LOG2_10: bf16 = bf16(0x4055u16); +    /// [`bf16`] √2 +    pub const SQRT_2: bf16 = bf16(0x3FB5u16); +} + +impl From<bf16> for f32 { +    #[inline] +    fn from(x: bf16) -> f32 { +        x.to_f32() +    } +} + +impl From<bf16> for f64 { +    #[inline] +    fn from(x: bf16) -> f64 { +        x.to_f64() +    } +} + +impl From<i8> for bf16 { +    #[inline] +    fn from(x: i8) -> bf16 { +        // Convert to f32, then to bf16 +        bf16::from_f32(f32::from(x)) +    } +} + +impl From<u8> for bf16 { +    #[inline] +    fn from(x: u8) -> bf16 { +        // Convert to f32, then to f16 +        bf16::from_f32(f32::from(x)) +    } +} + +impl PartialEq for bf16 { +    fn eq(&self, other: &bf16) -> bool { +        if self.is_nan() || other.is_nan() { +            false +        } else { +            (self.0 == other.0) || ((self.0 | other.0) & 0x7FFFu16 == 0) +        } +    } +} + +impl PartialOrd for bf16 { +    fn partial_cmp(&self, other: &bf16) -> Option<Ordering> { +        if self.is_nan() || other.is_nan() { +            None +        } else { +            let neg = self.0 & 0x8000u16 != 0; +            let other_neg = other.0 & 0x8000u16 != 0; +            match (neg, other_neg) { +                (false, false) => Some(self.0.cmp(&other.0)), +                (false, true) => { +                    if (self.0 | other.0) & 0x7FFFu16 == 0 { +                        Some(Ordering::Equal) +                    } else { +                        Some(Ordering::Greater) +                    } +                } +                (true, false) => { +                    if (self.0 | other.0) & 0x7FFFu16 == 0 { +                        Some(Ordering::Equal) +                    } else { +                        Some(Ordering::Less) +                    } +                } +                (true, true) => Some(other.0.cmp(&self.0)), +            } +        } +    } + +    fn lt(&self, other: &bf16) -> bool { +        if self.is_nan() || other.is_nan() { +            false +        } else { +            let neg = self.0 & 0x8000u16 != 0; +            let other_neg = other.0 & 0x8000u16 != 0; +            match (neg, other_neg) { +                (false, false) => self.0 < other.0, +                (false, true) => false, +                (true, false) => (self.0 | other.0) & 0x7FFFu16 != 0, +                (true, true) => self.0 > other.0, +            } +        } +    } + +    fn le(&self, other: &bf16) -> bool { +        if self.is_nan() || other.is_nan() { +            false +        } else { +            let neg = self.0 & 0x8000u16 != 0; +            let other_neg = other.0 & 0x8000u16 != 0; +            match (neg, other_neg) { +                (false, false) => self.0 <= other.0, +                (false, true) => (self.0 | other.0) & 0x7FFFu16 == 0, +                (true, false) => true, +                (true, true) => self.0 >= other.0, +            } +        } +    } + +    fn gt(&self, other: &bf16) -> bool { +        if self.is_nan() || other.is_nan() { +            false +        } else { +            let neg = self.0 & 0x8000u16 != 0; +            let other_neg = other.0 & 0x8000u16 != 0; +            match (neg, other_neg) { +                (false, false) => self.0 > other.0, +                (false, true) => (self.0 | other.0) & 0x7FFFu16 != 0, +                (true, false) => false, +                (true, true) => self.0 < other.0, +            } +        } +    } + +    fn ge(&self, other: &bf16) -> bool { +        if self.is_nan() || other.is_nan() { +            false +        } else { +            let neg = self.0 & 0x8000u16 != 0; +            let other_neg = other.0 & 0x8000u16 != 0; +            match (neg, other_neg) { +                (false, false) => self.0 >= other.0, +                (false, true) => true, +                (true, false) => (self.0 | other.0) & 0x7FFFu16 == 0, +                (true, true) => self.0 <= other.0, +            } +        } +    } +} + +#[cfg(not(target_arch = "spirv"))] +impl FromStr for bf16 { +    type Err = ParseFloatError; +    fn from_str(src: &str) -> Result<bf16, ParseFloatError> { +        f32::from_str(src).map(bf16::from_f32) +    } +} + +#[cfg(not(target_arch = "spirv"))] +impl Debug for bf16 { +    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { +        write!(f, "{:?}", self.to_f32()) +    } +} + +#[cfg(not(target_arch = "spirv"))] +impl Display for bf16 { +    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { +        write!(f, "{}", self.to_f32()) +    } +} + +#[cfg(not(target_arch = "spirv"))] +impl LowerExp for bf16 { +    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { +        write!(f, "{:e}", self.to_f32()) +    } +} + +#[cfg(not(target_arch = "spirv"))] +impl UpperExp for bf16 { +    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { +        write!(f, "{:E}", self.to_f32()) +    } +} + +#[cfg(not(target_arch = "spirv"))] +impl Binary for bf16 { +    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { +        write!(f, "{:b}", self.0) +    } +} + +#[cfg(not(target_arch = "spirv"))] +impl Octal for bf16 { +    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { +        write!(f, "{:o}", self.0) +    } +} + +#[cfg(not(target_arch = "spirv"))] +impl LowerHex for bf16 { +    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { +        write!(f, "{:x}", self.0) +    } +} + +#[cfg(not(target_arch = "spirv"))] +impl UpperHex for bf16 { +    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { +        write!(f, "{:X}", self.0) +    } +} + +impl Neg for bf16 { +    type Output = Self; + +    fn neg(self) -> Self::Output { +        Self(self.0 ^ 0x8000) +    } +} + +impl Neg for &bf16 { +    type Output = <bf16 as Neg>::Output; + +    #[inline] +    fn neg(self) -> Self::Output { +        Neg::neg(*self) +    } +} + +impl Add for bf16 { +    type Output = Self; + +    fn add(self, rhs: Self) -> Self::Output { +        Self::from_f32(Self::to_f32(self) + Self::to_f32(rhs)) +    } +} + +impl Add<&bf16> for bf16 { +    type Output = <bf16 as Add<bf16>>::Output; + +    #[inline] +    fn add(self, rhs: &bf16) -> Self::Output { +        self.add(*rhs) +    } +} + +impl Add<&bf16> for &bf16 { +    type Output = <bf16 as Add<bf16>>::Output; + +    #[inline] +    fn add(self, rhs: &bf16) -> Self::Output { +        (*self).add(*rhs) +    } +} + +impl Add<bf16> for &bf16 { +    type Output = <bf16 as Add<bf16>>::Output; + +    #[inline] +    fn add(self, rhs: bf16) -> Self::Output { +        (*self).add(rhs) +    } +} + +impl AddAssign for bf16 { +    #[inline] +    fn add_assign(&mut self, rhs: Self) { +        *self = (*self).add(rhs); +    } +} + +impl AddAssign<&bf16> for bf16 { +    #[inline] +    fn add_assign(&mut self, rhs: &bf16) { +        *self = (*self).add(rhs); +    } +} + +impl Sub for bf16 { +    type Output = Self; + +    fn sub(self, rhs: Self) -> Self::Output { +        Self::from_f32(Self::to_f32(self) - Self::to_f32(rhs)) +    } +} + +impl Sub<&bf16> for bf16 { +    type Output = <bf16 as Sub<bf16>>::Output; + +    #[inline] +    fn sub(self, rhs: &bf16) -> Self::Output { +        self.sub(*rhs) +    } +} + +impl Sub<&bf16> for &bf16 { +    type Output = <bf16 as Sub<bf16>>::Output; + +    #[inline] +    fn sub(self, rhs: &bf16) -> Self::Output { +        (*self).sub(*rhs) +    } +} + +impl Sub<bf16> for &bf16 { +    type Output = <bf16 as Sub<bf16>>::Output; + +    #[inline] +    fn sub(self, rhs: bf16) -> Self::Output { +        (*self).sub(rhs) +    } +} + +impl SubAssign for bf16 { +    #[inline] +    fn sub_assign(&mut self, rhs: Self) { +        *self = (*self).sub(rhs); +    } +} + +impl SubAssign<&bf16> for bf16 { +    #[inline] +    fn sub_assign(&mut self, rhs: &bf16) { +        *self = (*self).sub(rhs); +    } +} + +impl Mul for bf16 { +    type Output = Self; + +    fn mul(self, rhs: Self) -> Self::Output { +        Self::from_f32(Self::to_f32(self) * Self::to_f32(rhs)) +    } +} + +impl Mul<&bf16> for bf16 { +    type Output = <bf16 as Mul<bf16>>::Output; + +    #[inline] +    fn mul(self, rhs: &bf16) -> Self::Output { +        self.mul(*rhs) +    } +} + +impl Mul<&bf16> for &bf16 { +    type Output = <bf16 as Mul<bf16>>::Output; + +    #[inline] +    fn mul(self, rhs: &bf16) -> Self::Output { +        (*self).mul(*rhs) +    } +} + +impl Mul<bf16> for &bf16 { +    type Output = <bf16 as Mul<bf16>>::Output; + +    #[inline] +    fn mul(self, rhs: bf16) -> Self::Output { +        (*self).mul(rhs) +    } +} + +impl MulAssign for bf16 { +    #[inline] +    fn mul_assign(&mut self, rhs: Self) { +        *self = (*self).mul(rhs); +    } +} + +impl MulAssign<&bf16> for bf16 { +    #[inline] +    fn mul_assign(&mut self, rhs: &bf16) { +        *self = (*self).mul(rhs); +    } +} + +impl Div for bf16 { +    type Output = Self; + +    fn div(self, rhs: Self) -> Self::Output { +        Self::from_f32(Self::to_f32(self) / Self::to_f32(rhs)) +    } +} + +impl Div<&bf16> for bf16 { +    type Output = <bf16 as Div<bf16>>::Output; + +    #[inline] +    fn div(self, rhs: &bf16) -> Self::Output { +        self.div(*rhs) +    } +} + +impl Div<&bf16> for &bf16 { +    type Output = <bf16 as Div<bf16>>::Output; + +    #[inline] +    fn div(self, rhs: &bf16) -> Self::Output { +        (*self).div(*rhs) +    } +} + +impl Div<bf16> for &bf16 { +    type Output = <bf16 as Div<bf16>>::Output; + +    #[inline] +    fn div(self, rhs: bf16) -> Self::Output { +        (*self).div(rhs) +    } +} + +impl DivAssign for bf16 { +    #[inline] +    fn div_assign(&mut self, rhs: Self) { +        *self = (*self).div(rhs); +    } +} + +impl DivAssign<&bf16> for bf16 { +    #[inline] +    fn div_assign(&mut self, rhs: &bf16) { +        *self = (*self).div(rhs); +    } +} + +impl Rem for bf16 { +    type Output = Self; + +    fn rem(self, rhs: Self) -> Self::Output { +        Self::from_f32(Self::to_f32(self) % Self::to_f32(rhs)) +    } +} + +impl Rem<&bf16> for bf16 { +    type Output = <bf16 as Rem<bf16>>::Output; + +    #[inline] +    fn rem(self, rhs: &bf16) -> Self::Output { +        self.rem(*rhs) +    } +} + +impl Rem<&bf16> for &bf16 { +    type Output = <bf16 as Rem<bf16>>::Output; + +    #[inline] +    fn rem(self, rhs: &bf16) -> Self::Output { +        (*self).rem(*rhs) +    } +} + +impl Rem<bf16> for &bf16 { +    type Output = <bf16 as Rem<bf16>>::Output; + +    #[inline] +    fn rem(self, rhs: bf16) -> Self::Output { +        (*self).rem(rhs) +    } +} + +impl RemAssign for bf16 { +    #[inline] +    fn rem_assign(&mut self, rhs: Self) { +        *self = (*self).rem(rhs); +    } +} + +impl RemAssign<&bf16> for bf16 { +    #[inline] +    fn rem_assign(&mut self, rhs: &bf16) { +        *self = (*self).rem(rhs); +    } +} + +impl Product for bf16 { +    #[inline] +    fn product<I: Iterator<Item = Self>>(iter: I) -> Self { +        bf16::from_f32(iter.map(|f| f.to_f32()).product()) +    } +} + +impl<'a> Product<&'a bf16> for bf16 { +    #[inline] +    fn product<I: Iterator<Item = &'a bf16>>(iter: I) -> Self { +        bf16::from_f32(iter.map(|f| f.to_f32()).product()) +    } +} + +impl Sum for bf16 { +    #[inline] +    fn sum<I: Iterator<Item = Self>>(iter: I) -> Self { +        bf16::from_f32(iter.map(|f| f.to_f32()).sum()) +    } +} + +impl<'a> Sum<&'a bf16> for bf16 { +    #[inline] +    fn sum<I: Iterator<Item = &'a bf16>>(iter: I) -> Self { +        bf16::from_f32(iter.map(|f| f.to_f32()).product()) +    } +} + +#[cfg(feature = "serde")] +struct Visitor; + +#[cfg(feature = "serde")] +impl<'de> Deserialize<'de> for bf16 { +    fn deserialize<D>(deserializer: D) -> Result<bf16, D::Error> +    where +        D: serde::de::Deserializer<'de>, +    { +        deserializer.deserialize_newtype_struct("bf16", Visitor) +    } +} + +#[cfg(feature = "serde")] +impl<'de> serde::de::Visitor<'de> for Visitor { +    type Value = bf16; + +    fn expecting(&self, formatter: &mut alloc::fmt::Formatter) -> alloc::fmt::Result { +        write!(formatter, "tuple struct bf16") +    } + +    fn visit_newtype_struct<D>(self, deserializer: D) -> Result<Self::Value, D::Error> +    where +        D: serde::Deserializer<'de>, +    { +        Ok(bf16(<u16 as Deserialize>::deserialize(deserializer)?)) +    } + +    fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> +    where +        E: serde::de::Error, +    { +        v.parse().map_err(|_| { +            serde::de::Error::invalid_value(serde::de::Unexpected::Str(v), &"a float string") +        }) +    } + +    fn visit_f32<E>(self, v: f32) -> Result<Self::Value, E> +    where +        E: serde::de::Error, +    { +        Ok(bf16::from_f32(v)) +    } + +    fn visit_f64<E>(self, v: f64) -> Result<Self::Value, E> +    where +        E: serde::de::Error, +    { +        Ok(bf16::from_f64(v)) +    } +} + +#[allow( +    clippy::cognitive_complexity, +    clippy::float_cmp, +    clippy::neg_cmp_op_on_partial_ord +)] +#[cfg(test)] +mod test { +    use super::*; +    use core::cmp::Ordering; +    #[cfg(feature = "num-traits")] +    use num_traits::{AsPrimitive, FromPrimitive, ToPrimitive}; +    use quickcheck_macros::quickcheck; + +    #[cfg(feature = "num-traits")] +    #[test] +    fn as_primitive() { +        let two = bf16::from_f32(2.0); +        assert_eq!(<i32 as AsPrimitive<bf16>>::as_(2), two); +        assert_eq!(<bf16 as AsPrimitive<i32>>::as_(two), 2); + +        assert_eq!(<f32 as AsPrimitive<bf16>>::as_(2.0), two); +        assert_eq!(<bf16 as AsPrimitive<f32>>::as_(two), 2.0); + +        assert_eq!(<f64 as AsPrimitive<bf16>>::as_(2.0), two); +        assert_eq!(<bf16 as AsPrimitive<f64>>::as_(two), 2.0); +    } + +    #[cfg(feature = "num-traits")] +    #[test] +    fn to_primitive() { +        let two = bf16::from_f32(2.0); +        assert_eq!(ToPrimitive::to_i32(&two).unwrap(), 2i32); +        assert_eq!(ToPrimitive::to_f32(&two).unwrap(), 2.0f32); +        assert_eq!(ToPrimitive::to_f64(&two).unwrap(), 2.0f64); +    } + +    #[cfg(feature = "num-traits")] +    #[test] +    fn from_primitive() { +        let two = bf16::from_f32(2.0); +        assert_eq!(<bf16 as FromPrimitive>::from_i32(2).unwrap(), two); +        assert_eq!(<bf16 as FromPrimitive>::from_f32(2.0).unwrap(), two); +        assert_eq!(<bf16 as FromPrimitive>::from_f64(2.0).unwrap(), two); +    } + +    #[test] +    fn test_bf16_consts_from_f32() { +        let one = bf16::from_f32(1.0); +        let zero = bf16::from_f32(0.0); +        let neg_zero = bf16::from_f32(-0.0); +        let neg_one = bf16::from_f32(-1.0); +        let inf = bf16::from_f32(core::f32::INFINITY); +        let neg_inf = bf16::from_f32(core::f32::NEG_INFINITY); +        let nan = bf16::from_f32(core::f32::NAN); + +        assert_eq!(bf16::ONE, one); +        assert_eq!(bf16::ZERO, zero); +        assert!(zero.is_sign_positive()); +        assert_eq!(bf16::NEG_ZERO, neg_zero); +        assert!(neg_zero.is_sign_negative()); +        assert_eq!(bf16::NEG_ONE, neg_one); +        assert!(neg_one.is_sign_negative()); +        assert_eq!(bf16::INFINITY, inf); +        assert_eq!(bf16::NEG_INFINITY, neg_inf); +        assert!(nan.is_nan()); +        assert!(bf16::NAN.is_nan()); + +        let e = bf16::from_f32(core::f32::consts::E); +        let pi = bf16::from_f32(core::f32::consts::PI); +        let frac_1_pi = bf16::from_f32(core::f32::consts::FRAC_1_PI); +        let frac_1_sqrt_2 = bf16::from_f32(core::f32::consts::FRAC_1_SQRT_2); +        let frac_2_pi = bf16::from_f32(core::f32::consts::FRAC_2_PI); +        let frac_2_sqrt_pi = bf16::from_f32(core::f32::consts::FRAC_2_SQRT_PI); +        let frac_pi_2 = bf16::from_f32(core::f32::consts::FRAC_PI_2); +        let frac_pi_3 = bf16::from_f32(core::f32::consts::FRAC_PI_3); +        let frac_pi_4 = bf16::from_f32(core::f32::consts::FRAC_PI_4); +        let frac_pi_6 = bf16::from_f32(core::f32::consts::FRAC_PI_6); +        let frac_pi_8 = bf16::from_f32(core::f32::consts::FRAC_PI_8); +        let ln_10 = bf16::from_f32(core::f32::consts::LN_10); +        let ln_2 = bf16::from_f32(core::f32::consts::LN_2); +        let log10_e = bf16::from_f32(core::f32::consts::LOG10_E); +        // core::f32::consts::LOG10_2 requires rustc 1.43.0 +        let log10_2 = bf16::from_f32(2f32.log10()); +        let log2_e = bf16::from_f32(core::f32::consts::LOG2_E); +        // core::f32::consts::LOG2_10 requires rustc 1.43.0 +        let log2_10 = bf16::from_f32(10f32.log2()); +        let sqrt_2 = bf16::from_f32(core::f32::consts::SQRT_2); + +        assert_eq!(bf16::E, e); +        assert_eq!(bf16::PI, pi); +        assert_eq!(bf16::FRAC_1_PI, frac_1_pi); +        assert_eq!(bf16::FRAC_1_SQRT_2, frac_1_sqrt_2); +        assert_eq!(bf16::FRAC_2_PI, frac_2_pi); +        assert_eq!(bf16::FRAC_2_SQRT_PI, frac_2_sqrt_pi); +        assert_eq!(bf16::FRAC_PI_2, frac_pi_2); +        assert_eq!(bf16::FRAC_PI_3, frac_pi_3); +        assert_eq!(bf16::FRAC_PI_4, frac_pi_4); +        assert_eq!(bf16::FRAC_PI_6, frac_pi_6); +        assert_eq!(bf16::FRAC_PI_8, frac_pi_8); +        assert_eq!(bf16::LN_10, ln_10); +        assert_eq!(bf16::LN_2, ln_2); +        assert_eq!(bf16::LOG10_E, log10_e); +        assert_eq!(bf16::LOG10_2, log10_2); +        assert_eq!(bf16::LOG2_E, log2_e); +        assert_eq!(bf16::LOG2_10, log2_10); +        assert_eq!(bf16::SQRT_2, sqrt_2); +    } + +    #[test] +    fn test_bf16_consts_from_f64() { +        let one = bf16::from_f64(1.0); +        let zero = bf16::from_f64(0.0); +        let neg_zero = bf16::from_f64(-0.0); +        let inf = bf16::from_f64(core::f64::INFINITY); +        let neg_inf = bf16::from_f64(core::f64::NEG_INFINITY); +        let nan = bf16::from_f64(core::f64::NAN); + +        assert_eq!(bf16::ONE, one); +        assert_eq!(bf16::ZERO, zero); +        assert_eq!(bf16::NEG_ZERO, neg_zero); +        assert_eq!(bf16::INFINITY, inf); +        assert_eq!(bf16::NEG_INFINITY, neg_inf); +        assert!(nan.is_nan()); +        assert!(bf16::NAN.is_nan()); + +        let e = bf16::from_f64(core::f64::consts::E); +        let pi = bf16::from_f64(core::f64::consts::PI); +        let frac_1_pi = bf16::from_f64(core::f64::consts::FRAC_1_PI); +        let frac_1_sqrt_2 = bf16::from_f64(core::f64::consts::FRAC_1_SQRT_2); +        let frac_2_pi = bf16::from_f64(core::f64::consts::FRAC_2_PI); +        let frac_2_sqrt_pi = bf16::from_f64(core::f64::consts::FRAC_2_SQRT_PI); +        let frac_pi_2 = bf16::from_f64(core::f64::consts::FRAC_PI_2); +        let frac_pi_3 = bf16::from_f64(core::f64::consts::FRAC_PI_3); +        let frac_pi_4 = bf16::from_f64(core::f64::consts::FRAC_PI_4); +        let frac_pi_6 = bf16::from_f64(core::f64::consts::FRAC_PI_6); +        let frac_pi_8 = bf16::from_f64(core::f64::consts::FRAC_PI_8); +        let ln_10 = bf16::from_f64(core::f64::consts::LN_10); +        let ln_2 = bf16::from_f64(core::f64::consts::LN_2); +        let log10_e = bf16::from_f64(core::f64::consts::LOG10_E); +        // core::f64::consts::LOG10_2 requires rustc 1.43.0 +        let log10_2 = bf16::from_f64(2f64.log10()); +        let log2_e = bf16::from_f64(core::f64::consts::LOG2_E); +        // core::f64::consts::LOG2_10 requires rustc 1.43.0 +        let log2_10 = bf16::from_f64(10f64.log2()); +        let sqrt_2 = bf16::from_f64(core::f64::consts::SQRT_2); + +        assert_eq!(bf16::E, e); +        assert_eq!(bf16::PI, pi); +        assert_eq!(bf16::FRAC_1_PI, frac_1_pi); +        assert_eq!(bf16::FRAC_1_SQRT_2, frac_1_sqrt_2); +        assert_eq!(bf16::FRAC_2_PI, frac_2_pi); +        assert_eq!(bf16::FRAC_2_SQRT_PI, frac_2_sqrt_pi); +        assert_eq!(bf16::FRAC_PI_2, frac_pi_2); +        assert_eq!(bf16::FRAC_PI_3, frac_pi_3); +        assert_eq!(bf16::FRAC_PI_4, frac_pi_4); +        assert_eq!(bf16::FRAC_PI_6, frac_pi_6); +        assert_eq!(bf16::FRAC_PI_8, frac_pi_8); +        assert_eq!(bf16::LN_10, ln_10); +        assert_eq!(bf16::LN_2, ln_2); +        assert_eq!(bf16::LOG10_E, log10_e); +        assert_eq!(bf16::LOG10_2, log10_2); +        assert_eq!(bf16::LOG2_E, log2_e); +        assert_eq!(bf16::LOG2_10, log2_10); +        assert_eq!(bf16::SQRT_2, sqrt_2); +    } + +    #[test] +    fn test_nan_conversion_to_smaller() { +        let nan64 = f64::from_bits(0x7FF0_0000_0000_0001u64); +        let neg_nan64 = f64::from_bits(0xFFF0_0000_0000_0001u64); +        let nan32 = f32::from_bits(0x7F80_0001u32); +        let neg_nan32 = f32::from_bits(0xFF80_0001u32); +        let nan32_from_64 = nan64 as f32; +        let neg_nan32_from_64 = neg_nan64 as f32; +        let nan16_from_64 = bf16::from_f64(nan64); +        let neg_nan16_from_64 = bf16::from_f64(neg_nan64); +        let nan16_from_32 = bf16::from_f32(nan32); +        let neg_nan16_from_32 = bf16::from_f32(neg_nan32); + +        assert!(nan64.is_nan() && nan64.is_sign_positive()); +        assert!(neg_nan64.is_nan() && neg_nan64.is_sign_negative()); +        assert!(nan32.is_nan() && nan32.is_sign_positive()); +        assert!(neg_nan32.is_nan() && neg_nan32.is_sign_negative()); +        assert!(nan32_from_64.is_nan() && nan32_from_64.is_sign_positive()); +        assert!(neg_nan32_from_64.is_nan() && neg_nan32_from_64.is_sign_negative()); +        assert!(nan16_from_64.is_nan() && nan16_from_64.is_sign_positive()); +        assert!(neg_nan16_from_64.is_nan() && neg_nan16_from_64.is_sign_negative()); +        assert!(nan16_from_32.is_nan() && nan16_from_32.is_sign_positive()); +        assert!(neg_nan16_from_32.is_nan() && neg_nan16_from_32.is_sign_negative()); +    } + +    #[test] +    fn test_nan_conversion_to_larger() { +        let nan16 = bf16::from_bits(0x7F81u16); +        let neg_nan16 = bf16::from_bits(0xFF81u16); +        let nan32 = f32::from_bits(0x7F80_0001u32); +        let neg_nan32 = f32::from_bits(0xFF80_0001u32); +        let nan32_from_16 = f32::from(nan16); +        let neg_nan32_from_16 = f32::from(neg_nan16); +        let nan64_from_16 = f64::from(nan16); +        let neg_nan64_from_16 = f64::from(neg_nan16); +        let nan64_from_32 = f64::from(nan32); +        let neg_nan64_from_32 = f64::from(neg_nan32); + +        assert!(nan16.is_nan() && nan16.is_sign_positive()); +        assert!(neg_nan16.is_nan() && neg_nan16.is_sign_negative()); +        assert!(nan32.is_nan() && nan32.is_sign_positive()); +        assert!(neg_nan32.is_nan() && neg_nan32.is_sign_negative()); +        assert!(nan32_from_16.is_nan() && nan32_from_16.is_sign_positive()); +        assert!(neg_nan32_from_16.is_nan() && neg_nan32_from_16.is_sign_negative()); +        assert!(nan64_from_16.is_nan() && nan64_from_16.is_sign_positive()); +        assert!(neg_nan64_from_16.is_nan() && neg_nan64_from_16.is_sign_negative()); +        assert!(nan64_from_32.is_nan() && nan64_from_32.is_sign_positive()); +        assert!(neg_nan64_from_32.is_nan() && neg_nan64_from_32.is_sign_negative()); +    } + +    #[test] +    fn test_bf16_to_f32() { +        let f = bf16::from_f32(7.0); +        assert_eq!(f.to_f32(), 7.0f32); + +        // 7.1 is NOT exactly representable in 16-bit, it's rounded +        let f = bf16::from_f32(7.1); +        let diff = (f.to_f32() - 7.1f32).abs(); +        // diff must be <= 4 * EPSILON, as 7 has two more significant bits than 1 +        assert!(diff <= 4.0 * bf16::EPSILON.to_f32()); + +        let tiny32 = f32::from_bits(0x0001_0000u32); +        assert_eq!(bf16::from_bits(0x0001).to_f32(), tiny32); +        assert_eq!(bf16::from_bits(0x0005).to_f32(), 5.0 * tiny32); + +        assert_eq!(bf16::from_bits(0x0001), bf16::from_f32(tiny32)); +        assert_eq!(bf16::from_bits(0x0005), bf16::from_f32(5.0 * tiny32)); +    } + +    #[test] +    fn test_bf16_to_f64() { +        let f = bf16::from_f64(7.0); +        assert_eq!(f.to_f64(), 7.0f64); + +        // 7.1 is NOT exactly representable in 16-bit, it's rounded +        let f = bf16::from_f64(7.1); +        let diff = (f.to_f64() - 7.1f64).abs(); +        // diff must be <= 4 * EPSILON, as 7 has two more significant bits than 1 +        assert!(diff <= 4.0 * bf16::EPSILON.to_f64()); + +        let tiny64 = 2.0f64.powi(-133); +        assert_eq!(bf16::from_bits(0x0001).to_f64(), tiny64); +        assert_eq!(bf16::from_bits(0x0005).to_f64(), 5.0 * tiny64); + +        assert_eq!(bf16::from_bits(0x0001), bf16::from_f64(tiny64)); +        assert_eq!(bf16::from_bits(0x0005), bf16::from_f64(5.0 * tiny64)); +    } + +    #[test] +    fn test_comparisons() { +        let zero = bf16::from_f64(0.0); +        let one = bf16::from_f64(1.0); +        let neg_zero = bf16::from_f64(-0.0); +        let neg_one = bf16::from_f64(-1.0); + +        assert_eq!(zero.partial_cmp(&neg_zero), Some(Ordering::Equal)); +        assert_eq!(neg_zero.partial_cmp(&zero), Some(Ordering::Equal)); +        assert!(zero == neg_zero); +        assert!(neg_zero == zero); +        assert!(!(zero != neg_zero)); +        assert!(!(neg_zero != zero)); +        assert!(!(zero < neg_zero)); +        assert!(!(neg_zero < zero)); +        assert!(zero <= neg_zero); +        assert!(neg_zero <= zero); +        assert!(!(zero > neg_zero)); +        assert!(!(neg_zero > zero)); +        assert!(zero >= neg_zero); +        assert!(neg_zero >= zero); + +        assert_eq!(one.partial_cmp(&neg_zero), Some(Ordering::Greater)); +        assert_eq!(neg_zero.partial_cmp(&one), Some(Ordering::Less)); +        assert!(!(one == neg_zero)); +        assert!(!(neg_zero == one)); +        assert!(one != neg_zero); +        assert!(neg_zero != one); +        assert!(!(one < neg_zero)); +        assert!(neg_zero < one); +        assert!(!(one <= neg_zero)); +        assert!(neg_zero <= one); +        assert!(one > neg_zero); +        assert!(!(neg_zero > one)); +        assert!(one >= neg_zero); +        assert!(!(neg_zero >= one)); + +        assert_eq!(one.partial_cmp(&neg_one), Some(Ordering::Greater)); +        assert_eq!(neg_one.partial_cmp(&one), Some(Ordering::Less)); +        assert!(!(one == neg_one)); +        assert!(!(neg_one == one)); +        assert!(one != neg_one); +        assert!(neg_one != one); +        assert!(!(one < neg_one)); +        assert!(neg_one < one); +        assert!(!(one <= neg_one)); +        assert!(neg_one <= one); +        assert!(one > neg_one); +        assert!(!(neg_one > one)); +        assert!(one >= neg_one); +        assert!(!(neg_one >= one)); +    } + +    #[test] +    #[allow(clippy::erasing_op, clippy::identity_op)] +    fn round_to_even_f32() { +        // smallest positive subnormal = 0b0.0000_001 * 2^-126 = 2^-133 +        let min_sub = bf16::from_bits(1); +        let min_sub_f = (-133f32).exp2(); +        assert_eq!(bf16::from_f32(min_sub_f).to_bits(), min_sub.to_bits()); +        assert_eq!(f32::from(min_sub).to_bits(), min_sub_f.to_bits()); + +        // 0.0000000_011111 rounded to 0.0000000 (< tie, no rounding) +        // 0.0000000_100000 rounded to 0.0000000 (tie and even, remains at even) +        // 0.0000000_100001 rounded to 0.0000001 (> tie, rounds up) +        assert_eq!( +            bf16::from_f32(min_sub_f * 0.49).to_bits(), +            min_sub.to_bits() * 0 +        ); +        assert_eq!( +            bf16::from_f32(min_sub_f * 0.50).to_bits(), +            min_sub.to_bits() * 0 +        ); +        assert_eq!( +            bf16::from_f32(min_sub_f * 0.51).to_bits(), +            min_sub.to_bits() * 1 +        ); + +        // 0.0000001_011111 rounded to 0.0000001 (< tie, no rounding) +        // 0.0000001_100000 rounded to 0.0000010 (tie and odd, rounds up to even) +        // 0.0000001_100001 rounded to 0.0000010 (> tie, rounds up) +        assert_eq!( +            bf16::from_f32(min_sub_f * 1.49).to_bits(), +            min_sub.to_bits() * 1 +        ); +        assert_eq!( +            bf16::from_f32(min_sub_f * 1.50).to_bits(), +            min_sub.to_bits() * 2 +        ); +        assert_eq!( +            bf16::from_f32(min_sub_f * 1.51).to_bits(), +            min_sub.to_bits() * 2 +        ); + +        // 0.0000010_011111 rounded to 0.0000010 (< tie, no rounding) +        // 0.0000010_100000 rounded to 0.0000010 (tie and even, remains at even) +        // 0.0000010_100001 rounded to 0.0000011 (> tie, rounds up) +        assert_eq!( +            bf16::from_f32(min_sub_f * 2.49).to_bits(), +            min_sub.to_bits() * 2 +        ); +        assert_eq!( +            bf16::from_f32(min_sub_f * 2.50).to_bits(), +            min_sub.to_bits() * 2 +        ); +        assert_eq!( +            bf16::from_f32(min_sub_f * 2.51).to_bits(), +            min_sub.to_bits() * 3 +        ); + +        assert_eq!( +            bf16::from_f32(250.49f32).to_bits(), +            bf16::from_f32(250.0).to_bits() +        ); +        assert_eq!( +            bf16::from_f32(250.50f32).to_bits(), +            bf16::from_f32(250.0).to_bits() +        ); +        assert_eq!( +            bf16::from_f32(250.51f32).to_bits(), +            bf16::from_f32(251.0).to_bits() +        ); +        assert_eq!( +            bf16::from_f32(251.49f32).to_bits(), +            bf16::from_f32(251.0).to_bits() +        ); +        assert_eq!( +            bf16::from_f32(251.50f32).to_bits(), +            bf16::from_f32(252.0).to_bits() +        ); +        assert_eq!( +            bf16::from_f32(251.51f32).to_bits(), +            bf16::from_f32(252.0).to_bits() +        ); +        assert_eq!( +            bf16::from_f32(252.49f32).to_bits(), +            bf16::from_f32(252.0).to_bits() +        ); +        assert_eq!( +            bf16::from_f32(252.50f32).to_bits(), +            bf16::from_f32(252.0).to_bits() +        ); +        assert_eq!( +            bf16::from_f32(252.51f32).to_bits(), +            bf16::from_f32(253.0).to_bits() +        ); +    } + +    #[test] +    #[allow(clippy::erasing_op, clippy::identity_op)] +    fn round_to_even_f64() { +        // smallest positive subnormal = 0b0.0000_001 * 2^-126 = 2^-133 +        let min_sub = bf16::from_bits(1); +        let min_sub_f = (-133f64).exp2(); +        assert_eq!(bf16::from_f64(min_sub_f).to_bits(), min_sub.to_bits()); +        assert_eq!(f64::from(min_sub).to_bits(), min_sub_f.to_bits()); + +        // 0.0000000_011111 rounded to 0.0000000 (< tie, no rounding) +        // 0.0000000_100000 rounded to 0.0000000 (tie and even, remains at even) +        // 0.0000000_100001 rounded to 0.0000001 (> tie, rounds up) +        assert_eq!( +            bf16::from_f64(min_sub_f * 0.49).to_bits(), +            min_sub.to_bits() * 0 +        ); +        assert_eq!( +            bf16::from_f64(min_sub_f * 0.50).to_bits(), +            min_sub.to_bits() * 0 +        ); +        assert_eq!( +            bf16::from_f64(min_sub_f * 0.51).to_bits(), +            min_sub.to_bits() * 1 +        ); + +        // 0.0000001_011111 rounded to 0.0000001 (< tie, no rounding) +        // 0.0000001_100000 rounded to 0.0000010 (tie and odd, rounds up to even) +        // 0.0000001_100001 rounded to 0.0000010 (> tie, rounds up) +        assert_eq!( +            bf16::from_f64(min_sub_f * 1.49).to_bits(), +            min_sub.to_bits() * 1 +        ); +        assert_eq!( +            bf16::from_f64(min_sub_f * 1.50).to_bits(), +            min_sub.to_bits() * 2 +        ); +        assert_eq!( +            bf16::from_f64(min_sub_f * 1.51).to_bits(), +            min_sub.to_bits() * 2 +        ); + +        // 0.0000010_011111 rounded to 0.0000010 (< tie, no rounding) +        // 0.0000010_100000 rounded to 0.0000010 (tie and even, remains at even) +        // 0.0000010_100001 rounded to 0.0000011 (> tie, rounds up) +        assert_eq!( +            bf16::from_f64(min_sub_f * 2.49).to_bits(), +            min_sub.to_bits() * 2 +        ); +        assert_eq!( +            bf16::from_f64(min_sub_f * 2.50).to_bits(), +            min_sub.to_bits() * 2 +        ); +        assert_eq!( +            bf16::from_f64(min_sub_f * 2.51).to_bits(), +            min_sub.to_bits() * 3 +        ); + +        assert_eq!( +            bf16::from_f64(250.49f64).to_bits(), +            bf16::from_f64(250.0).to_bits() +        ); +        assert_eq!( +            bf16::from_f64(250.50f64).to_bits(), +            bf16::from_f64(250.0).to_bits() +        ); +        assert_eq!( +            bf16::from_f64(250.51f64).to_bits(), +            bf16::from_f64(251.0).to_bits() +        ); +        assert_eq!( +            bf16::from_f64(251.49f64).to_bits(), +            bf16::from_f64(251.0).to_bits() +        ); +        assert_eq!( +            bf16::from_f64(251.50f64).to_bits(), +            bf16::from_f64(252.0).to_bits() +        ); +        assert_eq!( +            bf16::from_f64(251.51f64).to_bits(), +            bf16::from_f64(252.0).to_bits() +        ); +        assert_eq!( +            bf16::from_f64(252.49f64).to_bits(), +            bf16::from_f64(252.0).to_bits() +        ); +        assert_eq!( +            bf16::from_f64(252.50f64).to_bits(), +            bf16::from_f64(252.0).to_bits() +        ); +        assert_eq!( +            bf16::from_f64(252.51f64).to_bits(), +            bf16::from_f64(253.0).to_bits() +        ); +    } + +    impl quickcheck::Arbitrary for bf16 { +        fn arbitrary(g: &mut quickcheck::Gen) -> Self { +            bf16(u16::arbitrary(g)) +        } +    } + +    #[quickcheck] +    fn qc_roundtrip_bf16_f32_is_identity(f: bf16) -> bool { +        let roundtrip = bf16::from_f32(f.to_f32()); +        if f.is_nan() { +            roundtrip.is_nan() && f.is_sign_negative() == roundtrip.is_sign_negative() +        } else { +            f.0 == roundtrip.0 +        } +    } + +    #[quickcheck] +    fn qc_roundtrip_bf16_f64_is_identity(f: bf16) -> bool { +        let roundtrip = bf16::from_f64(f.to_f64()); +        if f.is_nan() { +            roundtrip.is_nan() && f.is_sign_negative() == roundtrip.is_sign_negative() +        } else { +            f.0 == roundtrip.0 +        } +    } +} diff --git a/vendor/half/src/bfloat/convert.rs b/vendor/half/src/bfloat/convert.rs new file mode 100644 index 0000000..8f258f5 --- /dev/null +++ b/vendor/half/src/bfloat/convert.rs @@ -0,0 +1,148 @@ +use crate::leading_zeros::leading_zeros_u16; +use core::mem; + +#[inline] +pub(crate) const fn f32_to_bf16(value: f32) -> u16 { +    // TODO: Replace mem::transmute with to_bits() once to_bits is const-stabilized +    // Convert to raw bytes +    let x: u32 = unsafe { mem::transmute(value) }; + +    // check for NaN +    if x & 0x7FFF_FFFFu32 > 0x7F80_0000u32 { +        // Keep high part of current mantissa but also set most significiant mantissa bit +        return ((x >> 16) | 0x0040u32) as u16; +    } + +    // round and shift +    let round_bit = 0x0000_8000u32; +    if (x & round_bit) != 0 && (x & (3 * round_bit - 1)) != 0 { +        (x >> 16) as u16 + 1 +    } else { +        (x >> 16) as u16 +    } +} + +#[inline] +pub(crate) const fn f64_to_bf16(value: f64) -> u16 { +    // TODO: Replace mem::transmute with to_bits() once to_bits is const-stabilized +    // Convert to raw bytes, truncating the last 32-bits of mantissa; that precision will always +    // be lost on half-precision. +    let val: u64 = unsafe { mem::transmute(value) }; +    let x = (val >> 32) as u32; + +    // Extract IEEE754 components +    let sign = x & 0x8000_0000u32; +    let exp = x & 0x7FF0_0000u32; +    let man = x & 0x000F_FFFFu32; + +    // Check for all exponent bits being set, which is Infinity or NaN +    if exp == 0x7FF0_0000u32 { +        // Set mantissa MSB for NaN (and also keep shifted mantissa bits). +        // We also have to check the last 32 bits. +        let nan_bit = if man == 0 && (val as u32 == 0) { +            0 +        } else { +            0x0040u32 +        }; +        return ((sign >> 16) | 0x7F80u32 | nan_bit | (man >> 13)) as u16; +    } + +    // The number is normalized, start assembling half precision version +    let half_sign = sign >> 16; +    // Unbias the exponent, then bias for bfloat16 precision +    let unbiased_exp = ((exp >> 20) as i64) - 1023; +    let half_exp = unbiased_exp + 127; + +    // Check for exponent overflow, return +infinity +    if half_exp >= 0xFF { +        return (half_sign | 0x7F80u32) as u16; +    } + +    // Check for underflow +    if half_exp <= 0 { +        // Check mantissa for what we can do +        if 7 - half_exp > 21 { +            // No rounding possibility, so this is a full underflow, return signed zero +            return half_sign as u16; +        } +        // Don't forget about hidden leading mantissa bit when assembling mantissa +        let man = man | 0x0010_0000u32; +        let mut half_man = man >> (14 - half_exp); +        // Check for rounding +        let round_bit = 1 << (13 - half_exp); +        if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 { +            half_man += 1; +        } +        // No exponent for subnormals +        return (half_sign | half_man) as u16; +    } + +    // Rebias the exponent +    let half_exp = (half_exp as u32) << 7; +    let half_man = man >> 13; +    // Check for rounding +    let round_bit = 0x0000_1000u32; +    if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 { +        // Round it +        ((half_sign | half_exp | half_man) + 1) as u16 +    } else { +        (half_sign | half_exp | half_man) as u16 +    } +} + +#[inline] +pub(crate) const fn bf16_to_f32(i: u16) -> f32 { +    // TODO: Replace mem::transmute with from_bits() once from_bits is const-stabilized +    // If NaN, keep current mantissa but also set most significiant mantissa bit +    if i & 0x7FFFu16 > 0x7F80u16 { +        unsafe { mem::transmute((i as u32 | 0x0040u32) << 16) } +    } else { +        unsafe { mem::transmute((i as u32) << 16) } +    } +} + +#[inline] +pub(crate) const fn bf16_to_f64(i: u16) -> f64 { +    // TODO: Replace mem::transmute with from_bits() once from_bits is const-stabilized +    // Check for signed zero +    if i & 0x7FFFu16 == 0 { +        return unsafe { mem::transmute((i as u64) << 48) }; +    } + +    let half_sign = (i & 0x8000u16) as u64; +    let half_exp = (i & 0x7F80u16) as u64; +    let half_man = (i & 0x007Fu16) as u64; + +    // Check for an infinity or NaN when all exponent bits set +    if half_exp == 0x7F80u64 { +        // Check for signed infinity if mantissa is zero +        if half_man == 0 { +            return unsafe { mem::transmute((half_sign << 48) | 0x7FF0_0000_0000_0000u64) }; +        } else { +            // NaN, keep current mantissa but also set most significiant mantissa bit +            return unsafe { +                mem::transmute((half_sign << 48) | 0x7FF8_0000_0000_0000u64 | (half_man << 45)) +            }; +        } +    } + +    // Calculate double-precision components with adjusted exponent +    let sign = half_sign << 48; +    // Unbias exponent +    let unbiased_exp = ((half_exp as i64) >> 7) - 127; + +    // Check for subnormals, which will be normalized by adjusting exponent +    if half_exp == 0 { +        // Calculate how much to adjust the exponent by +        let e = leading_zeros_u16(half_man as u16) - 9; + +        // Rebias and adjust exponent +        let exp = ((1023 - 127 - e) as u64) << 52; +        let man = (half_man << (46 + e)) & 0xF_FFFF_FFFF_FFFFu64; +        return unsafe { mem::transmute(sign | exp | man) }; +    } +    // Rebias exponent for a normalized normal +    let exp = ((unbiased_exp + 1023) as u64) << 52; +    let man = (half_man & 0x007Fu64) << 45; +    unsafe { mem::transmute(sign | exp | man) } +} diff --git a/vendor/half/src/binary16.rs b/vendor/half/src/binary16.rs new file mode 100644 index 0000000..b622f01 --- /dev/null +++ b/vendor/half/src/binary16.rs @@ -0,0 +1,1912 @@ +#[cfg(feature = "bytemuck")] +use bytemuck::{Pod, Zeroable}; +use core::{ +    cmp::Ordering, +    iter::{Product, Sum}, +    num::FpCategory, +    ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Rem, RemAssign, Sub, SubAssign}, +}; +#[cfg(not(target_arch = "spirv"))] +use core::{ +    fmt::{ +        Binary, Debug, Display, Error, Formatter, LowerExp, LowerHex, Octal, UpperExp, UpperHex, +    }, +    num::ParseFloatError, +    str::FromStr, +}; +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; +#[cfg(feature = "zerocopy")] +use zerocopy::{AsBytes, FromBytes}; + +pub(crate) mod convert; + +/// A 16-bit floating point type implementing the IEEE 754-2008 standard [`binary16`] a.k.a `half` +/// format. +/// +/// This 16-bit floating point type is intended for efficient storage where the full range and +/// precision of a larger floating point value is not required. Because [`f16`] is primarily for +/// efficient storage, floating point operations such as addition, multiplication, etc. are not +/// implemented. Operations should be performed with [`f32`] or higher-precision types and converted +/// to/from [`f16`] as necessary. +/// +/// [`binary16`]: https://en.wikipedia.org/wiki/Half-precision_floating-point_format +#[allow(non_camel_case_types)] +#[derive(Clone, Copy, Default)] +#[repr(transparent)] +#[cfg_attr(feature = "serde", derive(Serialize))] +#[cfg_attr(feature = "bytemuck", derive(Zeroable, Pod))] +#[cfg_attr(feature = "zerocopy", derive(AsBytes, FromBytes))] +pub struct f16(u16); + +impl f16 { +    /// Constructs a 16-bit floating point value from the raw bits. +    #[inline] +    #[must_use] +    pub const fn from_bits(bits: u16) -> f16 { +        f16(bits) +    } + +    /// Constructs a 16-bit floating point value from a 32-bit floating point value. +    /// +    /// If the 32-bit value is to large to fit in 16-bits, ±∞ will result. NaN values are +    /// preserved. 32-bit subnormal values are too tiny to be represented in 16-bits and result in +    /// ±0. Exponents that underflow the minimum 16-bit exponent will result in 16-bit subnormals +    /// or ±0. All other values are truncated and rounded to the nearest representable 16-bit +    /// value. +    #[inline] +    #[must_use] +    pub fn from_f32(value: f32) -> f16 { +        f16(convert::f32_to_f16(value)) +    } + +    /// Constructs a 16-bit floating point value from a 32-bit floating point value. +    /// +    /// This function is identical to [`from_f32`][Self::from_f32] except it never uses hardware +    /// intrinsics, which allows it to be `const`. [`from_f32`][Self::from_f32] should be preferred +    /// in any non-`const` context. +    /// +    /// If the 32-bit value is to large to fit in 16-bits, ±∞ will result. NaN values are +    /// preserved. 32-bit subnormal values are too tiny to be represented in 16-bits and result in +    /// ±0. Exponents that underflow the minimum 16-bit exponent will result in 16-bit subnormals +    /// or ±0. All other values are truncated and rounded to the nearest representable 16-bit +    /// value. +    #[inline] +    #[must_use] +    pub const fn from_f32_const(value: f32) -> f16 { +        f16(convert::f32_to_f16_fallback(value)) +    } + +    /// Constructs a 16-bit floating point value from a 64-bit floating point value. +    /// +    /// If the 64-bit value is to large to fit in 16-bits, ±∞ will result. NaN values are +    /// preserved. 64-bit subnormal values are too tiny to be represented in 16-bits and result in +    /// ±0. Exponents that underflow the minimum 16-bit exponent will result in 16-bit subnormals +    /// or ±0. All other values are truncated and rounded to the nearest representable 16-bit +    /// value. +    #[inline] +    #[must_use] +    pub fn from_f64(value: f64) -> f16 { +        f16(convert::f64_to_f16(value)) +    } + +    /// Constructs a 16-bit floating point value from a 64-bit floating point value. +    /// +    /// This function is identical to [`from_f64`][Self::from_f64] except it never uses hardware +    /// intrinsics, which allows it to be `const`. [`from_f64`][Self::from_f64] should be preferred +    /// in any non-`const` context. +    /// +    /// If the 64-bit value is to large to fit in 16-bits, ±∞ will result. NaN values are +    /// preserved. 64-bit subnormal values are too tiny to be represented in 16-bits and result in +    /// ±0. Exponents that underflow the minimum 16-bit exponent will result in 16-bit subnormals +    /// or ±0. All other values are truncated and rounded to the nearest representable 16-bit +    /// value. +    #[inline] +    #[must_use] +    pub const fn from_f64_const(value: f64) -> f16 { +        f16(convert::f64_to_f16_fallback(value)) +    } + +    /// Converts a [`f16`] into the underlying bit representation. +    #[inline] +    #[must_use] +    pub const fn to_bits(self) -> u16 { +        self.0 +    } + +    /// Returns the memory representation of the underlying bit representation as a byte array in +    /// little-endian byte order. +    /// +    /// # Examples +    /// +    /// ```rust +    /// # use half::prelude::*; +    /// let bytes = f16::from_f32(12.5).to_le_bytes(); +    /// assert_eq!(bytes, [0x40, 0x4A]); +    /// ``` +    #[inline] +    #[must_use] +    pub const fn to_le_bytes(self) -> [u8; 2] { +        self.0.to_le_bytes() +    } + +    /// Returns the memory representation of the underlying bit representation as a byte array in +    /// big-endian (network) byte order. +    /// +    /// # Examples +    /// +    /// ```rust +    /// # use half::prelude::*; +    /// let bytes = f16::from_f32(12.5).to_be_bytes(); +    /// assert_eq!(bytes, [0x4A, 0x40]); +    /// ``` +    #[inline] +    #[must_use] +    pub const fn to_be_bytes(self) -> [u8; 2] { +        self.0.to_be_bytes() +    } + +    /// Returns the memory representation of the underlying bit representation as a byte array in +    /// native byte order. +    /// +    /// As the target platform's native endianness is used, portable code should use +    /// [`to_be_bytes`][Self::to_be_bytes] or [`to_le_bytes`][Self::to_le_bytes], as appropriate, +    /// instead. +    /// +    /// # Examples +    /// +    /// ```rust +    /// # use half::prelude::*; +    /// let bytes = f16::from_f32(12.5).to_ne_bytes(); +    /// assert_eq!(bytes, if cfg!(target_endian = "big") { +    ///     [0x4A, 0x40] +    /// } else { +    ///     [0x40, 0x4A] +    /// }); +    /// ``` +    #[inline] +    #[must_use] +    pub const fn to_ne_bytes(self) -> [u8; 2] { +        self.0.to_ne_bytes() +    } + +    /// Creates a floating point value from its representation as a byte array in little endian. +    /// +    /// # Examples +    /// +    /// ```rust +    /// # use half::prelude::*; +    /// let value = f16::from_le_bytes([0x40, 0x4A]); +    /// assert_eq!(value, f16::from_f32(12.5)); +    /// ``` +    #[inline] +    #[must_use] +    pub const fn from_le_bytes(bytes: [u8; 2]) -> f16 { +        f16::from_bits(u16::from_le_bytes(bytes)) +    } + +    /// Creates a floating point value from its representation as a byte array in big endian. +    /// +    /// # Examples +    /// +    /// ```rust +    /// # use half::prelude::*; +    /// let value = f16::from_be_bytes([0x4A, 0x40]); +    /// assert_eq!(value, f16::from_f32(12.5)); +    /// ``` +    #[inline] +    #[must_use] +    pub const fn from_be_bytes(bytes: [u8; 2]) -> f16 { +        f16::from_bits(u16::from_be_bytes(bytes)) +    } + +    /// Creates a floating point value from its representation as a byte array in native endian. +    /// +    /// As the target platform's native endianness is used, portable code likely wants to use +    /// [`from_be_bytes`][Self::from_be_bytes] or [`from_le_bytes`][Self::from_le_bytes], as +    /// appropriate instead. +    /// +    /// # Examples +    /// +    /// ```rust +    /// # use half::prelude::*; +    /// let value = f16::from_ne_bytes(if cfg!(target_endian = "big") { +    ///     [0x4A, 0x40] +    /// } else { +    ///     [0x40, 0x4A] +    /// }); +    /// assert_eq!(value, f16::from_f32(12.5)); +    /// ``` +    #[inline] +    #[must_use] +    pub const fn from_ne_bytes(bytes: [u8; 2]) -> f16 { +        f16::from_bits(u16::from_ne_bytes(bytes)) +    } + +    /// Converts a [`f16`] value into a `f32` value. +    /// +    /// This conversion is lossless as all 16-bit floating point values can be represented exactly +    /// in 32-bit floating point. +    #[inline] +    #[must_use] +    pub fn to_f32(self) -> f32 { +        convert::f16_to_f32(self.0) +    } + +    /// Converts a [`f16`] value into a `f32` value. +    /// +    /// This function is identical to [`to_f32`][Self::to_f32] except it never uses hardware +    /// intrinsics, which allows it to be `const`. [`to_f32`][Self::to_f32] should be preferred +    /// in any non-`const` context. +    /// +    /// This conversion is lossless as all 16-bit floating point values can be represented exactly +    /// in 32-bit floating point. +    #[inline] +    #[must_use] +    pub const fn to_f32_const(self) -> f32 { +        convert::f16_to_f32_fallback(self.0) +    } + +    /// Converts a [`f16`] value into a `f64` value. +    /// +    /// This conversion is lossless as all 16-bit floating point values can be represented exactly +    /// in 64-bit floating point. +    #[inline] +    #[must_use] +    pub fn to_f64(self) -> f64 { +        convert::f16_to_f64(self.0) +    } + +    /// Converts a [`f16`] value into a `f64` value. +    /// +    /// This function is identical to [`to_f64`][Self::to_f64] except it never uses hardware +    /// intrinsics, which allows it to be `const`. [`to_f64`][Self::to_f64] should be preferred +    /// in any non-`const` context. +    /// +    /// This conversion is lossless as all 16-bit floating point values can be represented exactly +    /// in 64-bit floating point. +    #[inline] +    #[must_use] +    pub const fn to_f64_const(self) -> f64 { +        convert::f16_to_f64_fallback(self.0) +    } + +    /// Returns `true` if this value is `NaN` and `false` otherwise. +    /// +    /// # Examples +    /// +    /// ```rust +    /// # use half::prelude::*; +    /// +    /// let nan = f16::NAN; +    /// let f = f16::from_f32(7.0_f32); +    /// +    /// assert!(nan.is_nan()); +    /// assert!(!f.is_nan()); +    /// ``` +    #[inline] +    #[must_use] +    pub const fn is_nan(self) -> bool { +        self.0 & 0x7FFFu16 > 0x7C00u16 +    } + +    /// Returns `true` if this value is ±∞ and `false`. +    /// otherwise. +    /// +    /// # Examples +    /// +    /// ```rust +    /// # use half::prelude::*; +    /// +    /// let f = f16::from_f32(7.0f32); +    /// let inf = f16::INFINITY; +    /// let neg_inf = f16::NEG_INFINITY; +    /// let nan = f16::NAN; +    /// +    /// assert!(!f.is_infinite()); +    /// assert!(!nan.is_infinite()); +    /// +    /// assert!(inf.is_infinite()); +    /// assert!(neg_inf.is_infinite()); +    /// ``` +    #[inline] +    #[must_use] +    pub const fn is_infinite(self) -> bool { +        self.0 & 0x7FFFu16 == 0x7C00u16 +    } + +    /// Returns `true` if this number is neither infinite nor `NaN`. +    /// +    /// # Examples +    /// +    /// ```rust +    /// # use half::prelude::*; +    /// +    /// let f = f16::from_f32(7.0f32); +    /// let inf = f16::INFINITY; +    /// let neg_inf = f16::NEG_INFINITY; +    /// let nan = f16::NAN; +    /// +    /// assert!(f.is_finite()); +    /// +    /// assert!(!nan.is_finite()); +    /// assert!(!inf.is_finite()); +    /// assert!(!neg_inf.is_finite()); +    /// ``` +    #[inline] +    #[must_use] +    pub const fn is_finite(self) -> bool { +        self.0 & 0x7C00u16 != 0x7C00u16 +    } + +    /// Returns `true` if the number is neither zero, infinite, subnormal, or `NaN`. +    /// +    /// # Examples +    /// +    /// ```rust +    /// # use half::prelude::*; +    /// +    /// let min = f16::MIN_POSITIVE; +    /// let max = f16::MAX; +    /// let lower_than_min = f16::from_f32(1.0e-10_f32); +    /// let zero = f16::from_f32(0.0_f32); +    /// +    /// assert!(min.is_normal()); +    /// assert!(max.is_normal()); +    /// +    /// assert!(!zero.is_normal()); +    /// assert!(!f16::NAN.is_normal()); +    /// assert!(!f16::INFINITY.is_normal()); +    /// // Values between `0` and `min` are Subnormal. +    /// assert!(!lower_than_min.is_normal()); +    /// ``` +    #[inline] +    #[must_use] +    pub const fn is_normal(self) -> bool { +        let exp = self.0 & 0x7C00u16; +        exp != 0x7C00u16 && exp != 0 +    } + +    /// Returns the floating point category of the number. +    /// +    /// If only one property is going to be tested, it is generally faster to use the specific +    /// predicate instead. +    /// +    /// # Examples +    /// +    /// ```rust +    /// use std::num::FpCategory; +    /// # use half::prelude::*; +    /// +    /// let num = f16::from_f32(12.4_f32); +    /// let inf = f16::INFINITY; +    /// +    /// assert_eq!(num.classify(), FpCategory::Normal); +    /// assert_eq!(inf.classify(), FpCategory::Infinite); +    /// ``` +    #[must_use] +    pub const fn classify(self) -> FpCategory { +        let exp = self.0 & 0x7C00u16; +        let man = self.0 & 0x03FFu16; +        match (exp, man) { +            (0, 0) => FpCategory::Zero, +            (0, _) => FpCategory::Subnormal, +            (0x7C00u16, 0) => FpCategory::Infinite, +            (0x7C00u16, _) => FpCategory::Nan, +            _ => FpCategory::Normal, +        } +    } + +    /// Returns a number that represents the sign of `self`. +    /// +    /// * `1.0` if the number is positive, `+0.0` or [`INFINITY`][f16::INFINITY] +    /// * `-1.0` if the number is negative, `-0.0` or [`NEG_INFINITY`][f16::NEG_INFINITY] +    /// * [`NAN`][f16::NAN] if the number is `NaN` +    /// +    /// # Examples +    /// +    /// ```rust +    /// # use half::prelude::*; +    /// +    /// let f = f16::from_f32(3.5_f32); +    /// +    /// assert_eq!(f.signum(), f16::from_f32(1.0)); +    /// assert_eq!(f16::NEG_INFINITY.signum(), f16::from_f32(-1.0)); +    /// +    /// assert!(f16::NAN.signum().is_nan()); +    /// ``` +    #[must_use] +    pub const fn signum(self) -> f16 { +        if self.is_nan() { +            self +        } else if self.0 & 0x8000u16 != 0 { +            Self::NEG_ONE +        } else { +            Self::ONE +        } +    } + +    /// Returns `true` if and only if `self` has a positive sign, including `+0.0`, `NaNs` with a +    /// positive sign bit and +∞. +    /// +    /// # Examples +    /// +    /// ```rust +    /// # use half::prelude::*; +    /// +    /// let nan = f16::NAN; +    /// let f = f16::from_f32(7.0_f32); +    /// let g = f16::from_f32(-7.0_f32); +    /// +    /// assert!(f.is_sign_positive()); +    /// assert!(!g.is_sign_positive()); +    /// // `NaN` can be either positive or negative +    /// assert!(nan.is_sign_positive() != nan.is_sign_negative()); +    /// ``` +    #[inline] +    #[must_use] +    pub const fn is_sign_positive(self) -> bool { +        self.0 & 0x8000u16 == 0 +    } + +    /// Returns `true` if and only if `self` has a negative sign, including `-0.0`, `NaNs` with a +    /// negative sign bit and −∞. +    /// +    /// # Examples +    /// +    /// ```rust +    /// # use half::prelude::*; +    /// +    /// let nan = f16::NAN; +    /// let f = f16::from_f32(7.0f32); +    /// let g = f16::from_f32(-7.0f32); +    /// +    /// assert!(!f.is_sign_negative()); +    /// assert!(g.is_sign_negative()); +    /// // `NaN` can be either positive or negative +    /// assert!(nan.is_sign_positive() != nan.is_sign_negative()); +    /// ``` +    #[inline] +    #[must_use] +    pub const fn is_sign_negative(self) -> bool { +        self.0 & 0x8000u16 != 0 +    } + +    /// Returns a number composed of the magnitude of `self` and the sign of `sign`. +    /// +    /// Equal to `self` if the sign of `self` and `sign` are the same, otherwise equal to `-self`. +    /// If `self` is NaN, then NaN with the sign of `sign` is returned. +    /// +    /// # Examples +    /// +    /// ``` +    /// # use half::prelude::*; +    /// let f = f16::from_f32(3.5); +    /// +    /// assert_eq!(f.copysign(f16::from_f32(0.42)), f16::from_f32(3.5)); +    /// assert_eq!(f.copysign(f16::from_f32(-0.42)), f16::from_f32(-3.5)); +    /// assert_eq!((-f).copysign(f16::from_f32(0.42)), f16::from_f32(3.5)); +    /// assert_eq!((-f).copysign(f16::from_f32(-0.42)), f16::from_f32(-3.5)); +    /// +    /// assert!(f16::NAN.copysign(f16::from_f32(1.0)).is_nan()); +    /// ``` +    #[inline] +    #[must_use] +    pub const fn copysign(self, sign: f16) -> f16 { +        f16((sign.0 & 0x8000u16) | (self.0 & 0x7FFFu16)) +    } + +    /// Returns the maximum of the two numbers. +    /// +    /// If one of the arguments is NaN, then the other argument is returned. +    /// +    /// # Examples +    /// +    /// ``` +    /// # use half::prelude::*; +    /// let x = f16::from_f32(1.0); +    /// let y = f16::from_f32(2.0); +    /// +    /// assert_eq!(x.max(y), y); +    /// ``` +    #[inline] +    #[must_use] +    pub fn max(self, other: f16) -> f16 { +        if other > self && !other.is_nan() { +            other +        } else { +            self +        } +    } + +    /// Returns the minimum of the two numbers. +    /// +    /// If one of the arguments is NaN, then the other argument is returned. +    /// +    /// # Examples +    /// +    /// ``` +    /// # use half::prelude::*; +    /// let x = f16::from_f32(1.0); +    /// let y = f16::from_f32(2.0); +    /// +    /// assert_eq!(x.min(y), x); +    /// ``` +    #[inline] +    #[must_use] +    pub fn min(self, other: f16) -> f16 { +        if other < self && !other.is_nan() { +            other +        } else { +            self +        } +    } + +    /// Restrict a value to a certain interval unless it is NaN. +    /// +    /// Returns `max` if `self` is greater than `max`, and `min` if `self` is less than `min`. +    /// Otherwise this returns `self`. +    /// +    /// Note that this function returns NaN if the initial value was NaN as well. +    /// +    /// # Panics +    /// Panics if `min > max`, `min` is NaN, or `max` is NaN. +    /// +    /// # Examples +    /// +    /// ``` +    /// # use half::prelude::*; +    /// assert!(f16::from_f32(-3.0).clamp(f16::from_f32(-2.0), f16::from_f32(1.0)) == f16::from_f32(-2.0)); +    /// assert!(f16::from_f32(0.0).clamp(f16::from_f32(-2.0), f16::from_f32(1.0)) == f16::from_f32(0.0)); +    /// assert!(f16::from_f32(2.0).clamp(f16::from_f32(-2.0), f16::from_f32(1.0)) == f16::from_f32(1.0)); +    /// assert!(f16::NAN.clamp(f16::from_f32(-2.0), f16::from_f32(1.0)).is_nan()); +    /// ``` +    #[inline] +    #[must_use] +    pub fn clamp(self, min: f16, max: f16) -> f16 { +        assert!(min <= max); +        let mut x = self; +        if x < min { +            x = min; +        } +        if x > max { +            x = max; +        } +        x +    } + +    /// Returns the ordering between `self` and `other`. +    /// +    /// Unlike the standard partial comparison between floating point numbers, +    /// this comparison always produces an ordering in accordance to +    /// the `totalOrder` predicate as defined in the IEEE 754 (2008 revision) +    /// floating point standard. The values are ordered in the following sequence: +    /// +    /// - negative quiet NaN +    /// - negative signaling NaN +    /// - negative infinity +    /// - negative numbers +    /// - negative subnormal numbers +    /// - negative zero +    /// - positive zero +    /// - positive subnormal numbers +    /// - positive numbers +    /// - positive infinity +    /// - positive signaling NaN +    /// - positive quiet NaN. +    /// +    /// The ordering established by this function does not always agree with the +    /// [`PartialOrd`] and [`PartialEq`] implementations of `f16`. For example, +    /// they consider negative and positive zero equal, while `total_cmp` +    /// doesn't. +    /// +    /// The interpretation of the signaling NaN bit follows the definition in +    /// the IEEE 754 standard, which may not match the interpretation by some of +    /// the older, non-conformant (e.g. MIPS) hardware implementations. +    /// +    /// # Examples +    /// ``` +    /// # use half::f16; +    /// let mut v: Vec<f16> = vec![]; +    /// v.push(f16::ONE); +    /// v.push(f16::INFINITY); +    /// v.push(f16::NEG_INFINITY); +    /// v.push(f16::NAN); +    /// v.push(f16::MAX_SUBNORMAL); +    /// v.push(-f16::MAX_SUBNORMAL); +    /// v.push(f16::ZERO); +    /// v.push(f16::NEG_ZERO); +    /// v.push(f16::NEG_ONE); +    /// v.push(f16::MIN_POSITIVE); +    /// +    /// v.sort_by(|a, b| a.total_cmp(&b)); +    /// +    /// assert!(v +    ///     .into_iter() +    ///     .zip( +    ///         [ +    ///             f16::NEG_INFINITY, +    ///             f16::NEG_ONE, +    ///             -f16::MAX_SUBNORMAL, +    ///             f16::NEG_ZERO, +    ///             f16::ZERO, +    ///             f16::MAX_SUBNORMAL, +    ///             f16::MIN_POSITIVE, +    ///             f16::ONE, +    ///             f16::INFINITY, +    ///             f16::NAN +    ///         ] +    ///         .iter() +    ///     ) +    ///     .all(|(a, b)| a.to_bits() == b.to_bits())); +    /// ``` +    // Implementation based on: https://doc.rust-lang.org/std/primitive.f32.html#method.total_cmp +    #[inline] +    #[must_use] +    pub fn total_cmp(&self, other: &Self) -> Ordering { +        let mut left = self.to_bits() as i16; +        let mut right = other.to_bits() as i16; +        left ^= (((left >> 15) as u16) >> 1) as i16; +        right ^= (((right >> 15) as u16) >> 1) as i16; +        left.cmp(&right) +    } + +    /// Alternate serialize adapter for serializing as a float. +    /// +    /// By default, [`f16`] serializes as a newtype of [`u16`]. This is an alternate serialize +    /// implementation that serializes as an [`f32`] value. It is designed for use with +    /// `serialize_with` serde attributes. Deserialization from `f32` values is already supported by +    /// the default deserialize implementation. +    /// +    /// # Examples +    /// +    /// A demonstration on how to use this adapater: +    /// +    /// ``` +    /// use serde::{Serialize, Deserialize}; +    /// use half::f16; +    /// +    /// #[derive(Serialize, Deserialize)] +    /// struct MyStruct { +    ///     #[serde(serialize_with = "f16::serialize_as_f32")] +    ///     value: f16 // Will be serialized as f32 instead of u16 +    /// } +    /// ``` +    #[cfg(feature = "serde")] +    pub fn serialize_as_f32<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> { +        serializer.serialize_f32(self.to_f32()) +    } + +    /// Alternate serialize adapter for serializing as a string. +    /// +    /// By default, [`f16`] serializes as a newtype of [`u16`]. This is an alternate serialize +    /// implementation that serializes as a string value. It is designed for use with +    /// `serialize_with` serde attributes. Deserialization from string values is already supported +    /// by the default deserialize implementation. +    /// +    /// # Examples +    /// +    /// A demonstration on how to use this adapater: +    /// +    /// ``` +    /// use serde::{Serialize, Deserialize}; +    /// use half::f16; +    /// +    /// #[derive(Serialize, Deserialize)] +    /// struct MyStruct { +    ///     #[serde(serialize_with = "f16::serialize_as_string")] +    ///     value: f16 // Will be serialized as a string instead of u16 +    /// } +    /// ``` +    #[cfg(feature = "serde")] +    pub fn serialize_as_string<S: serde::Serializer>( +        &self, +        serializer: S, +    ) -> Result<S::Ok, S::Error> { +        serializer.serialize_str(&self.to_string()) +    } + +    /// Approximate number of [`f16`] significant digits in base 10 +    pub const DIGITS: u32 = 3; +    /// [`f16`] +    /// [machine epsilon](https://en.wikipedia.org/wiki/Machine_epsilon) value +    /// +    /// This is the difference between 1.0 and the next largest representable number. +    pub const EPSILON: f16 = f16(0x1400u16); +    /// [`f16`] positive Infinity (+∞) +    pub const INFINITY: f16 = f16(0x7C00u16); +    /// Number of [`f16`] significant digits in base 2 +    pub const MANTISSA_DIGITS: u32 = 11; +    /// Largest finite [`f16`] value +    pub const MAX: f16 = f16(0x7BFF); +    /// Maximum possible [`f16`] power of 10 exponent +    pub const MAX_10_EXP: i32 = 4; +    /// Maximum possible [`f16`] power of 2 exponent +    pub const MAX_EXP: i32 = 16; +    /// Smallest finite [`f16`] value +    pub const MIN: f16 = f16(0xFBFF); +    /// Minimum possible normal [`f16`] power of 10 exponent +    pub const MIN_10_EXP: i32 = -4; +    /// One greater than the minimum possible normal [`f16`] power of 2 exponent +    pub const MIN_EXP: i32 = -13; +    /// Smallest positive normal [`f16`] value +    pub const MIN_POSITIVE: f16 = f16(0x0400u16); +    /// [`f16`] Not a Number (NaN) +    pub const NAN: f16 = f16(0x7E00u16); +    /// [`f16`] negative infinity (-∞) +    pub const NEG_INFINITY: f16 = f16(0xFC00u16); +    /// The radix or base of the internal representation of [`f16`] +    pub const RADIX: u32 = 2; + +    /// Minimum positive subnormal [`f16`] value +    pub const MIN_POSITIVE_SUBNORMAL: f16 = f16(0x0001u16); +    /// Maximum subnormal [`f16`] value +    pub const MAX_SUBNORMAL: f16 = f16(0x03FFu16); + +    /// [`f16`] 1 +    pub const ONE: f16 = f16(0x3C00u16); +    /// [`f16`] 0 +    pub const ZERO: f16 = f16(0x0000u16); +    /// [`f16`] -0 +    pub const NEG_ZERO: f16 = f16(0x8000u16); +    /// [`f16`] -1 +    pub const NEG_ONE: f16 = f16(0xBC00u16); + +    /// [`f16`] Euler's number (ℯ) +    pub const E: f16 = f16(0x4170u16); +    /// [`f16`] Archimedes' constant (π) +    pub const PI: f16 = f16(0x4248u16); +    /// [`f16`] 1/π +    pub const FRAC_1_PI: f16 = f16(0x3518u16); +    /// [`f16`] 1/√2 +    pub const FRAC_1_SQRT_2: f16 = f16(0x39A8u16); +    /// [`f16`] 2/π +    pub const FRAC_2_PI: f16 = f16(0x3918u16); +    /// [`f16`] 2/√π +    pub const FRAC_2_SQRT_PI: f16 = f16(0x3C83u16); +    /// [`f16`] π/2 +    pub const FRAC_PI_2: f16 = f16(0x3E48u16); +    /// [`f16`] π/3 +    pub const FRAC_PI_3: f16 = f16(0x3C30u16); +    /// [`f16`] π/4 +    pub const FRAC_PI_4: f16 = f16(0x3A48u16); +    /// [`f16`] π/6 +    pub const FRAC_PI_6: f16 = f16(0x3830u16); +    /// [`f16`] π/8 +    pub const FRAC_PI_8: f16 = f16(0x3648u16); +    /// [`f16`] 𝗅𝗇 10 +    pub const LN_10: f16 = f16(0x409Bu16); +    /// [`f16`] 𝗅𝗇 2 +    pub const LN_2: f16 = f16(0x398Cu16); +    /// [`f16`] 𝗅𝗈𝗀₁₀ℯ +    pub const LOG10_E: f16 = f16(0x36F3u16); +    /// [`f16`] 𝗅𝗈𝗀₁₀2 +    pub const LOG10_2: f16 = f16(0x34D1u16); +    /// [`f16`] 𝗅𝗈𝗀₂ℯ +    pub const LOG2_E: f16 = f16(0x3DC5u16); +    /// [`f16`] 𝗅𝗈𝗀₂10 +    pub const LOG2_10: f16 = f16(0x42A5u16); +    /// [`f16`] √2 +    pub const SQRT_2: f16 = f16(0x3DA8u16); +} + +impl From<f16> for f32 { +    #[inline] +    fn from(x: f16) -> f32 { +        x.to_f32() +    } +} + +impl From<f16> for f64 { +    #[inline] +    fn from(x: f16) -> f64 { +        x.to_f64() +    } +} + +impl From<i8> for f16 { +    #[inline] +    fn from(x: i8) -> f16 { +        // Convert to f32, then to f16 +        f16::from_f32(f32::from(x)) +    } +} + +impl From<u8> for f16 { +    #[inline] +    fn from(x: u8) -> f16 { +        // Convert to f32, then to f16 +        f16::from_f32(f32::from(x)) +    } +} + +impl PartialEq for f16 { +    fn eq(&self, other: &f16) -> bool { +        if self.is_nan() || other.is_nan() { +            false +        } else { +            (self.0 == other.0) || ((self.0 | other.0) & 0x7FFFu16 == 0) +        } +    } +} + +impl PartialOrd for f16 { +    fn partial_cmp(&self, other: &f16) -> Option<Ordering> { +        if self.is_nan() || other.is_nan() { +            None +        } else { +            let neg = self.0 & 0x8000u16 != 0; +            let other_neg = other.0 & 0x8000u16 != 0; +            match (neg, other_neg) { +                (false, false) => Some(self.0.cmp(&other.0)), +                (false, true) => { +                    if (self.0 | other.0) & 0x7FFFu16 == 0 { +                        Some(Ordering::Equal) +                    } else { +                        Some(Ordering::Greater) +                    } +                } +                (true, false) => { +                    if (self.0 | other.0) & 0x7FFFu16 == 0 { +                        Some(Ordering::Equal) +                    } else { +                        Some(Ordering::Less) +                    } +                } +                (true, true) => Some(other.0.cmp(&self.0)), +            } +        } +    } + +    fn lt(&self, other: &f16) -> bool { +        if self.is_nan() || other.is_nan() { +            false +        } else { +            let neg = self.0 & 0x8000u16 != 0; +            let other_neg = other.0 & 0x8000u16 != 0; +            match (neg, other_neg) { +                (false, false) => self.0 < other.0, +                (false, true) => false, +                (true, false) => (self.0 | other.0) & 0x7FFFu16 != 0, +                (true, true) => self.0 > other.0, +            } +        } +    } + +    fn le(&self, other: &f16) -> bool { +        if self.is_nan() || other.is_nan() { +            false +        } else { +            let neg = self.0 & 0x8000u16 != 0; +            let other_neg = other.0 & 0x8000u16 != 0; +            match (neg, other_neg) { +                (false, false) => self.0 <= other.0, +                (false, true) => (self.0 | other.0) & 0x7FFFu16 == 0, +                (true, false) => true, +                (true, true) => self.0 >= other.0, +            } +        } +    } + +    fn gt(&self, other: &f16) -> bool { +        if self.is_nan() || other.is_nan() { +            false +        } else { +            let neg = self.0 & 0x8000u16 != 0; +            let other_neg = other.0 & 0x8000u16 != 0; +            match (neg, other_neg) { +                (false, false) => self.0 > other.0, +                (false, true) => (self.0 | other.0) & 0x7FFFu16 != 0, +                (true, false) => false, +                (true, true) => self.0 < other.0, +            } +        } +    } + +    fn ge(&self, other: &f16) -> bool { +        if self.is_nan() || other.is_nan() { +            false +        } else { +            let neg = self.0 & 0x8000u16 != 0; +            let other_neg = other.0 & 0x8000u16 != 0; +            match (neg, other_neg) { +                (false, false) => self.0 >= other.0, +                (false, true) => true, +                (true, false) => (self.0 | other.0) & 0x7FFFu16 == 0, +                (true, true) => self.0 <= other.0, +            } +        } +    } +} + +#[cfg(not(target_arch = "spirv"))] +impl FromStr for f16 { +    type Err = ParseFloatError; +    fn from_str(src: &str) -> Result<f16, ParseFloatError> { +        f32::from_str(src).map(f16::from_f32) +    } +} + +#[cfg(not(target_arch = "spirv"))] +impl Debug for f16 { +    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { +        write!(f, "{:?}", self.to_f32()) +    } +} + +#[cfg(not(target_arch = "spirv"))] +impl Display for f16 { +    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { +        write!(f, "{}", self.to_f32()) +    } +} + +#[cfg(not(target_arch = "spirv"))] +impl LowerExp for f16 { +    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { +        write!(f, "{:e}", self.to_f32()) +    } +} + +#[cfg(not(target_arch = "spirv"))] +impl UpperExp for f16 { +    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { +        write!(f, "{:E}", self.to_f32()) +    } +} + +#[cfg(not(target_arch = "spirv"))] +impl Binary for f16 { +    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { +        write!(f, "{:b}", self.0) +    } +} + +#[cfg(not(target_arch = "spirv"))] +impl Octal for f16 { +    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { +        write!(f, "{:o}", self.0) +    } +} + +#[cfg(not(target_arch = "spirv"))] +impl LowerHex for f16 { +    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { +        write!(f, "{:x}", self.0) +    } +} + +#[cfg(not(target_arch = "spirv"))] +impl UpperHex for f16 { +    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { +        write!(f, "{:X}", self.0) +    } +} + +impl Neg for f16 { +    type Output = Self; + +    #[inline] +    fn neg(self) -> Self::Output { +        Self(self.0 ^ 0x8000) +    } +} + +impl Neg for &f16 { +    type Output = <f16 as Neg>::Output; + +    #[inline] +    fn neg(self) -> Self::Output { +        Neg::neg(*self) +    } +} + +impl Add for f16 { +    type Output = Self; + +    #[inline] +    fn add(self, rhs: Self) -> Self::Output { +        Self::from_f32(Self::to_f32(self) + Self::to_f32(rhs)) +    } +} + +impl Add<&f16> for f16 { +    type Output = <f16 as Add<f16>>::Output; + +    #[inline] +    fn add(self, rhs: &f16) -> Self::Output { +        self.add(*rhs) +    } +} + +impl Add<&f16> for &f16 { +    type Output = <f16 as Add<f16>>::Output; + +    #[inline] +    fn add(self, rhs: &f16) -> Self::Output { +        (*self).add(*rhs) +    } +} + +impl Add<f16> for &f16 { +    type Output = <f16 as Add<f16>>::Output; + +    #[inline] +    fn add(self, rhs: f16) -> Self::Output { +        (*self).add(rhs) +    } +} + +impl AddAssign for f16 { +    #[inline] +    fn add_assign(&mut self, rhs: Self) { +        *self = (*self).add(rhs); +    } +} + +impl AddAssign<&f16> for f16 { +    #[inline] +    fn add_assign(&mut self, rhs: &f16) { +        *self = (*self).add(rhs); +    } +} + +impl Sub for f16 { +    type Output = Self; + +    #[inline] +    fn sub(self, rhs: Self) -> Self::Output { +        Self::from_f32(Self::to_f32(self) - Self::to_f32(rhs)) +    } +} + +impl Sub<&f16> for f16 { +    type Output = <f16 as Sub<f16>>::Output; + +    #[inline] +    fn sub(self, rhs: &f16) -> Self::Output { +        self.sub(*rhs) +    } +} + +impl Sub<&f16> for &f16 { +    type Output = <f16 as Sub<f16>>::Output; + +    #[inline] +    fn sub(self, rhs: &f16) -> Self::Output { +        (*self).sub(*rhs) +    } +} + +impl Sub<f16> for &f16 { +    type Output = <f16 as Sub<f16>>::Output; + +    #[inline] +    fn sub(self, rhs: f16) -> Self::Output { +        (*self).sub(rhs) +    } +} + +impl SubAssign for f16 { +    #[inline] +    fn sub_assign(&mut self, rhs: Self) { +        *self = (*self).sub(rhs); +    } +} + +impl SubAssign<&f16> for f16 { +    #[inline] +    fn sub_assign(&mut self, rhs: &f16) { +        *self = (*self).sub(rhs); +    } +} + +impl Mul for f16 { +    type Output = Self; + +    #[inline] +    fn mul(self, rhs: Self) -> Self::Output { +        Self::from_f32(Self::to_f32(self) * Self::to_f32(rhs)) +    } +} + +impl Mul<&f16> for f16 { +    type Output = <f16 as Mul<f16>>::Output; + +    #[inline] +    fn mul(self, rhs: &f16) -> Self::Output { +        self.mul(*rhs) +    } +} + +impl Mul<&f16> for &f16 { +    type Output = <f16 as Mul<f16>>::Output; + +    #[inline] +    fn mul(self, rhs: &f16) -> Self::Output { +        (*self).mul(*rhs) +    } +} + +impl Mul<f16> for &f16 { +    type Output = <f16 as Mul<f16>>::Output; + +    #[inline] +    fn mul(self, rhs: f16) -> Self::Output { +        (*self).mul(rhs) +    } +} + +impl MulAssign for f16 { +    #[inline] +    fn mul_assign(&mut self, rhs: Self) { +        *self = (*self).mul(rhs); +    } +} + +impl MulAssign<&f16> for f16 { +    #[inline] +    fn mul_assign(&mut self, rhs: &f16) { +        *self = (*self).mul(rhs); +    } +} + +impl Div for f16 { +    type Output = Self; + +    #[inline] +    fn div(self, rhs: Self) -> Self::Output { +        Self::from_f32(Self::to_f32(self) / Self::to_f32(rhs)) +    } +} + +impl Div<&f16> for f16 { +    type Output = <f16 as Div<f16>>::Output; + +    #[inline] +    fn div(self, rhs: &f16) -> Self::Output { +        self.div(*rhs) +    } +} + +impl Div<&f16> for &f16 { +    type Output = <f16 as Div<f16>>::Output; + +    #[inline] +    fn div(self, rhs: &f16) -> Self::Output { +        (*self).div(*rhs) +    } +} + +impl Div<f16> for &f16 { +    type Output = <f16 as Div<f16>>::Output; + +    #[inline] +    fn div(self, rhs: f16) -> Self::Output { +        (*self).div(rhs) +    } +} + +impl DivAssign for f16 { +    #[inline] +    fn div_assign(&mut self, rhs: Self) { +        *self = (*self).div(rhs); +    } +} + +impl DivAssign<&f16> for f16 { +    #[inline] +    fn div_assign(&mut self, rhs: &f16) { +        *self = (*self).div(rhs); +    } +} + +impl Rem for f16 { +    type Output = Self; + +    #[inline] +    fn rem(self, rhs: Self) -> Self::Output { +        Self::from_f32(Self::to_f32(self) % Self::to_f32(rhs)) +    } +} + +impl Rem<&f16> for f16 { +    type Output = <f16 as Rem<f16>>::Output; + +    #[inline] +    fn rem(self, rhs: &f16) -> Self::Output { +        self.rem(*rhs) +    } +} + +impl Rem<&f16> for &f16 { +    type Output = <f16 as Rem<f16>>::Output; + +    #[inline] +    fn rem(self, rhs: &f16) -> Self::Output { +        (*self).rem(*rhs) +    } +} + +impl Rem<f16> for &f16 { +    type Output = <f16 as Rem<f16>>::Output; + +    #[inline] +    fn rem(self, rhs: f16) -> Self::Output { +        (*self).rem(rhs) +    } +} + +impl RemAssign for f16 { +    #[inline] +    fn rem_assign(&mut self, rhs: Self) { +        *self = (*self).rem(rhs); +    } +} + +impl RemAssign<&f16> for f16 { +    #[inline] +    fn rem_assign(&mut self, rhs: &f16) { +        *self = (*self).rem(rhs); +    } +} + +impl Product for f16 { +    #[inline] +    fn product<I: Iterator<Item = Self>>(iter: I) -> Self { +        f16::from_f32(iter.map(|f| f.to_f32()).product()) +    } +} + +impl<'a> Product<&'a f16> for f16 { +    #[inline] +    fn product<I: Iterator<Item = &'a f16>>(iter: I) -> Self { +        f16::from_f32(iter.map(|f| f.to_f32()).product()) +    } +} + +impl Sum for f16 { +    #[inline] +    fn sum<I: Iterator<Item = Self>>(iter: I) -> Self { +        f16::from_f32(iter.map(|f| f.to_f32()).sum()) +    } +} + +impl<'a> Sum<&'a f16> for f16 { +    #[inline] +    fn sum<I: Iterator<Item = &'a f16>>(iter: I) -> Self { +        f16::from_f32(iter.map(|f| f.to_f32()).product()) +    } +} + +#[cfg(feature = "serde")] +struct Visitor; + +#[cfg(feature = "serde")] +impl<'de> Deserialize<'de> for f16 { +    fn deserialize<D>(deserializer: D) -> Result<f16, D::Error> +    where +        D: serde::de::Deserializer<'de>, +    { +        deserializer.deserialize_newtype_struct("f16", Visitor) +    } +} + +#[cfg(feature = "serde")] +impl<'de> serde::de::Visitor<'de> for Visitor { +    type Value = f16; + +    fn expecting(&self, formatter: &mut alloc::fmt::Formatter) -> alloc::fmt::Result { +        write!(formatter, "tuple struct f16") +    } + +    fn visit_newtype_struct<D>(self, deserializer: D) -> Result<Self::Value, D::Error> +    where +        D: serde::Deserializer<'de>, +    { +        Ok(f16(<u16 as Deserialize>::deserialize(deserializer)?)) +    } + +    fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> +    where +        E: serde::de::Error, +    { +        v.parse().map_err(|_| { +            serde::de::Error::invalid_value(serde::de::Unexpected::Str(v), &"a float string") +        }) +    } + +    fn visit_f32<E>(self, v: f32) -> Result<Self::Value, E> +    where +        E: serde::de::Error, +    { +        Ok(f16::from_f32(v)) +    } + +    fn visit_f64<E>(self, v: f64) -> Result<Self::Value, E> +    where +        E: serde::de::Error, +    { +        Ok(f16::from_f64(v)) +    } +} + +#[allow( +    clippy::cognitive_complexity, +    clippy::float_cmp, +    clippy::neg_cmp_op_on_partial_ord +)] +#[cfg(test)] +mod test { +    use super::*; +    use core::cmp::Ordering; +    #[cfg(feature = "num-traits")] +    use num_traits::{AsPrimitive, FromPrimitive, ToPrimitive}; +    use quickcheck_macros::quickcheck; + +    #[cfg(feature = "num-traits")] +    #[test] +    fn as_primitive() { +        let two = f16::from_f32(2.0); +        assert_eq!(<i32 as AsPrimitive<f16>>::as_(2), two); +        assert_eq!(<f16 as AsPrimitive<i32>>::as_(two), 2); + +        assert_eq!(<f32 as AsPrimitive<f16>>::as_(2.0), two); +        assert_eq!(<f16 as AsPrimitive<f32>>::as_(two), 2.0); + +        assert_eq!(<f64 as AsPrimitive<f16>>::as_(2.0), two); +        assert_eq!(<f16 as AsPrimitive<f64>>::as_(two), 2.0); +    } + +    #[cfg(feature = "num-traits")] +    #[test] +    fn to_primitive() { +        let two = f16::from_f32(2.0); +        assert_eq!(ToPrimitive::to_i32(&two).unwrap(), 2i32); +        assert_eq!(ToPrimitive::to_f32(&two).unwrap(), 2.0f32); +        assert_eq!(ToPrimitive::to_f64(&two).unwrap(), 2.0f64); +    } + +    #[cfg(feature = "num-traits")] +    #[test] +    fn from_primitive() { +        let two = f16::from_f32(2.0); +        assert_eq!(<f16 as FromPrimitive>::from_i32(2).unwrap(), two); +        assert_eq!(<f16 as FromPrimitive>::from_f32(2.0).unwrap(), two); +        assert_eq!(<f16 as FromPrimitive>::from_f64(2.0).unwrap(), two); +    } + +    #[test] +    fn test_f16_consts() { +        // DIGITS +        let digits = ((f16::MANTISSA_DIGITS as f32 - 1.0) * 2f32.log10()).floor() as u32; +        assert_eq!(f16::DIGITS, digits); +        // sanity check to show test is good +        let digits32 = ((core::f32::MANTISSA_DIGITS as f32 - 1.0) * 2f32.log10()).floor() as u32; +        assert_eq!(core::f32::DIGITS, digits32); + +        // EPSILON +        let one = f16::from_f32(1.0); +        let one_plus_epsilon = f16::from_bits(one.to_bits() + 1); +        let epsilon = f16::from_f32(one_plus_epsilon.to_f32() - 1.0); +        assert_eq!(f16::EPSILON, epsilon); +        // sanity check to show test is good +        let one_plus_epsilon32 = f32::from_bits(1.0f32.to_bits() + 1); +        let epsilon32 = one_plus_epsilon32 - 1f32; +        assert_eq!(core::f32::EPSILON, epsilon32); + +        // MAX, MIN and MIN_POSITIVE +        let max = f16::from_bits(f16::INFINITY.to_bits() - 1); +        let min = f16::from_bits(f16::NEG_INFINITY.to_bits() - 1); +        let min_pos = f16::from_f32(2f32.powi(f16::MIN_EXP - 1)); +        assert_eq!(f16::MAX, max); +        assert_eq!(f16::MIN, min); +        assert_eq!(f16::MIN_POSITIVE, min_pos); +        // sanity check to show test is good +        let max32 = f32::from_bits(core::f32::INFINITY.to_bits() - 1); +        let min32 = f32::from_bits(core::f32::NEG_INFINITY.to_bits() - 1); +        let min_pos32 = 2f32.powi(core::f32::MIN_EXP - 1); +        assert_eq!(core::f32::MAX, max32); +        assert_eq!(core::f32::MIN, min32); +        assert_eq!(core::f32::MIN_POSITIVE, min_pos32); + +        // MIN_10_EXP and MAX_10_EXP +        let ten_to_min = 10f32.powi(f16::MIN_10_EXP); +        assert!(ten_to_min / 10.0 < f16::MIN_POSITIVE.to_f32()); +        assert!(ten_to_min > f16::MIN_POSITIVE.to_f32()); +        let ten_to_max = 10f32.powi(f16::MAX_10_EXP); +        assert!(ten_to_max < f16::MAX.to_f32()); +        assert!(ten_to_max * 10.0 > f16::MAX.to_f32()); +        // sanity check to show test is good +        let ten_to_min32 = 10f64.powi(core::f32::MIN_10_EXP); +        assert!(ten_to_min32 / 10.0 < f64::from(core::f32::MIN_POSITIVE)); +        assert!(ten_to_min32 > f64::from(core::f32::MIN_POSITIVE)); +        let ten_to_max32 = 10f64.powi(core::f32::MAX_10_EXP); +        assert!(ten_to_max32 < f64::from(core::f32::MAX)); +        assert!(ten_to_max32 * 10.0 > f64::from(core::f32::MAX)); +    } + +    #[test] +    fn test_f16_consts_from_f32() { +        let one = f16::from_f32(1.0); +        let zero = f16::from_f32(0.0); +        let neg_zero = f16::from_f32(-0.0); +        let neg_one = f16::from_f32(-1.0); +        let inf = f16::from_f32(core::f32::INFINITY); +        let neg_inf = f16::from_f32(core::f32::NEG_INFINITY); +        let nan = f16::from_f32(core::f32::NAN); + +        assert_eq!(f16::ONE, one); +        assert_eq!(f16::ZERO, zero); +        assert!(zero.is_sign_positive()); +        assert_eq!(f16::NEG_ZERO, neg_zero); +        assert!(neg_zero.is_sign_negative()); +        assert_eq!(f16::NEG_ONE, neg_one); +        assert!(neg_one.is_sign_negative()); +        assert_eq!(f16::INFINITY, inf); +        assert_eq!(f16::NEG_INFINITY, neg_inf); +        assert!(nan.is_nan()); +        assert!(f16::NAN.is_nan()); + +        let e = f16::from_f32(core::f32::consts::E); +        let pi = f16::from_f32(core::f32::consts::PI); +        let frac_1_pi = f16::from_f32(core::f32::consts::FRAC_1_PI); +        let frac_1_sqrt_2 = f16::from_f32(core::f32::consts::FRAC_1_SQRT_2); +        let frac_2_pi = f16::from_f32(core::f32::consts::FRAC_2_PI); +        let frac_2_sqrt_pi = f16::from_f32(core::f32::consts::FRAC_2_SQRT_PI); +        let frac_pi_2 = f16::from_f32(core::f32::consts::FRAC_PI_2); +        let frac_pi_3 = f16::from_f32(core::f32::consts::FRAC_PI_3); +        let frac_pi_4 = f16::from_f32(core::f32::consts::FRAC_PI_4); +        let frac_pi_6 = f16::from_f32(core::f32::consts::FRAC_PI_6); +        let frac_pi_8 = f16::from_f32(core::f32::consts::FRAC_PI_8); +        let ln_10 = f16::from_f32(core::f32::consts::LN_10); +        let ln_2 = f16::from_f32(core::f32::consts::LN_2); +        let log10_e = f16::from_f32(core::f32::consts::LOG10_E); +        // core::f32::consts::LOG10_2 requires rustc 1.43.0 +        let log10_2 = f16::from_f32(2f32.log10()); +        let log2_e = f16::from_f32(core::f32::consts::LOG2_E); +        // core::f32::consts::LOG2_10 requires rustc 1.43.0 +        let log2_10 = f16::from_f32(10f32.log2()); +        let sqrt_2 = f16::from_f32(core::f32::consts::SQRT_2); + +        assert_eq!(f16::E, e); +        assert_eq!(f16::PI, pi); +        assert_eq!(f16::FRAC_1_PI, frac_1_pi); +        assert_eq!(f16::FRAC_1_SQRT_2, frac_1_sqrt_2); +        assert_eq!(f16::FRAC_2_PI, frac_2_pi); +        assert_eq!(f16::FRAC_2_SQRT_PI, frac_2_sqrt_pi); +        assert_eq!(f16::FRAC_PI_2, frac_pi_2); +        assert_eq!(f16::FRAC_PI_3, frac_pi_3); +        assert_eq!(f16::FRAC_PI_4, frac_pi_4); +        assert_eq!(f16::FRAC_PI_6, frac_pi_6); +        assert_eq!(f16::FRAC_PI_8, frac_pi_8); +        assert_eq!(f16::LN_10, ln_10); +        assert_eq!(f16::LN_2, ln_2); +        assert_eq!(f16::LOG10_E, log10_e); +        assert_eq!(f16::LOG10_2, log10_2); +        assert_eq!(f16::LOG2_E, log2_e); +        assert_eq!(f16::LOG2_10, log2_10); +        assert_eq!(f16::SQRT_2, sqrt_2); +    } + +    #[test] +    fn test_f16_consts_from_f64() { +        let one = f16::from_f64(1.0); +        let zero = f16::from_f64(0.0); +        let neg_zero = f16::from_f64(-0.0); +        let inf = f16::from_f64(core::f64::INFINITY); +        let neg_inf = f16::from_f64(core::f64::NEG_INFINITY); +        let nan = f16::from_f64(core::f64::NAN); + +        assert_eq!(f16::ONE, one); +        assert_eq!(f16::ZERO, zero); +        assert!(zero.is_sign_positive()); +        assert_eq!(f16::NEG_ZERO, neg_zero); +        assert!(neg_zero.is_sign_negative()); +        assert_eq!(f16::INFINITY, inf); +        assert_eq!(f16::NEG_INFINITY, neg_inf); +        assert!(nan.is_nan()); +        assert!(f16::NAN.is_nan()); + +        let e = f16::from_f64(core::f64::consts::E); +        let pi = f16::from_f64(core::f64::consts::PI); +        let frac_1_pi = f16::from_f64(core::f64::consts::FRAC_1_PI); +        let frac_1_sqrt_2 = f16::from_f64(core::f64::consts::FRAC_1_SQRT_2); +        let frac_2_pi = f16::from_f64(core::f64::consts::FRAC_2_PI); +        let frac_2_sqrt_pi = f16::from_f64(core::f64::consts::FRAC_2_SQRT_PI); +        let frac_pi_2 = f16::from_f64(core::f64::consts::FRAC_PI_2); +        let frac_pi_3 = f16::from_f64(core::f64::consts::FRAC_PI_3); +        let frac_pi_4 = f16::from_f64(core::f64::consts::FRAC_PI_4); +        let frac_pi_6 = f16::from_f64(core::f64::consts::FRAC_PI_6); +        let frac_pi_8 = f16::from_f64(core::f64::consts::FRAC_PI_8); +        let ln_10 = f16::from_f64(core::f64::consts::LN_10); +        let ln_2 = f16::from_f64(core::f64::consts::LN_2); +        let log10_e = f16::from_f64(core::f64::consts::LOG10_E); +        // core::f64::consts::LOG10_2 requires rustc 1.43.0 +        let log10_2 = f16::from_f64(2f64.log10()); +        let log2_e = f16::from_f64(core::f64::consts::LOG2_E); +        // core::f64::consts::LOG2_10 requires rustc 1.43.0 +        let log2_10 = f16::from_f64(10f64.log2()); +        let sqrt_2 = f16::from_f64(core::f64::consts::SQRT_2); + +        assert_eq!(f16::E, e); +        assert_eq!(f16::PI, pi); +        assert_eq!(f16::FRAC_1_PI, frac_1_pi); +        assert_eq!(f16::FRAC_1_SQRT_2, frac_1_sqrt_2); +        assert_eq!(f16::FRAC_2_PI, frac_2_pi); +        assert_eq!(f16::FRAC_2_SQRT_PI, frac_2_sqrt_pi); +        assert_eq!(f16::FRAC_PI_2, frac_pi_2); +        assert_eq!(f16::FRAC_PI_3, frac_pi_3); +        assert_eq!(f16::FRAC_PI_4, frac_pi_4); +        assert_eq!(f16::FRAC_PI_6, frac_pi_6); +        assert_eq!(f16::FRAC_PI_8, frac_pi_8); +        assert_eq!(f16::LN_10, ln_10); +        assert_eq!(f16::LN_2, ln_2); +        assert_eq!(f16::LOG10_E, log10_e); +        assert_eq!(f16::LOG10_2, log10_2); +        assert_eq!(f16::LOG2_E, log2_e); +        assert_eq!(f16::LOG2_10, log2_10); +        assert_eq!(f16::SQRT_2, sqrt_2); +    } + +    #[test] +    fn test_nan_conversion_to_smaller() { +        let nan64 = f64::from_bits(0x7FF0_0000_0000_0001u64); +        let neg_nan64 = f64::from_bits(0xFFF0_0000_0000_0001u64); +        let nan32 = f32::from_bits(0x7F80_0001u32); +        let neg_nan32 = f32::from_bits(0xFF80_0001u32); +        let nan32_from_64 = nan64 as f32; +        let neg_nan32_from_64 = neg_nan64 as f32; +        let nan16_from_64 = f16::from_f64(nan64); +        let neg_nan16_from_64 = f16::from_f64(neg_nan64); +        let nan16_from_32 = f16::from_f32(nan32); +        let neg_nan16_from_32 = f16::from_f32(neg_nan32); + +        assert!(nan64.is_nan() && nan64.is_sign_positive()); +        assert!(neg_nan64.is_nan() && neg_nan64.is_sign_negative()); +        assert!(nan32.is_nan() && nan32.is_sign_positive()); +        assert!(neg_nan32.is_nan() && neg_nan32.is_sign_negative()); +        assert!(nan32_from_64.is_nan() && nan32_from_64.is_sign_positive()); +        assert!(neg_nan32_from_64.is_nan() && neg_nan32_from_64.is_sign_negative()); +        assert!(nan16_from_64.is_nan() && nan16_from_64.is_sign_positive()); +        assert!(neg_nan16_from_64.is_nan() && neg_nan16_from_64.is_sign_negative()); +        assert!(nan16_from_32.is_nan() && nan16_from_32.is_sign_positive()); +        assert!(neg_nan16_from_32.is_nan() && neg_nan16_from_32.is_sign_negative()); +    } + +    #[test] +    fn test_nan_conversion_to_larger() { +        let nan16 = f16::from_bits(0x7C01u16); +        let neg_nan16 = f16::from_bits(0xFC01u16); +        let nan32 = f32::from_bits(0x7F80_0001u32); +        let neg_nan32 = f32::from_bits(0xFF80_0001u32); +        let nan32_from_16 = f32::from(nan16); +        let neg_nan32_from_16 = f32::from(neg_nan16); +        let nan64_from_16 = f64::from(nan16); +        let neg_nan64_from_16 = f64::from(neg_nan16); +        let nan64_from_32 = f64::from(nan32); +        let neg_nan64_from_32 = f64::from(neg_nan32); + +        assert!(nan16.is_nan() && nan16.is_sign_positive()); +        assert!(neg_nan16.is_nan() && neg_nan16.is_sign_negative()); +        assert!(nan32.is_nan() && nan32.is_sign_positive()); +        assert!(neg_nan32.is_nan() && neg_nan32.is_sign_negative()); +        assert!(nan32_from_16.is_nan() && nan32_from_16.is_sign_positive()); +        assert!(neg_nan32_from_16.is_nan() && neg_nan32_from_16.is_sign_negative()); +        assert!(nan64_from_16.is_nan() && nan64_from_16.is_sign_positive()); +        assert!(neg_nan64_from_16.is_nan() && neg_nan64_from_16.is_sign_negative()); +        assert!(nan64_from_32.is_nan() && nan64_from_32.is_sign_positive()); +        assert!(neg_nan64_from_32.is_nan() && neg_nan64_from_32.is_sign_negative()); +    } + +    #[test] +    fn test_f16_to_f32() { +        let f = f16::from_f32(7.0); +        assert_eq!(f.to_f32(), 7.0f32); + +        // 7.1 is NOT exactly representable in 16-bit, it's rounded +        let f = f16::from_f32(7.1); +        let diff = (f.to_f32() - 7.1f32).abs(); +        // diff must be <= 4 * EPSILON, as 7 has two more significant bits than 1 +        assert!(diff <= 4.0 * f16::EPSILON.to_f32()); + +        assert_eq!(f16::from_bits(0x0000_0001).to_f32(), 2.0f32.powi(-24)); +        assert_eq!(f16::from_bits(0x0000_0005).to_f32(), 5.0 * 2.0f32.powi(-24)); + +        assert_eq!(f16::from_bits(0x0000_0001), f16::from_f32(2.0f32.powi(-24))); +        assert_eq!( +            f16::from_bits(0x0000_0005), +            f16::from_f32(5.0 * 2.0f32.powi(-24)) +        ); +    } + +    #[test] +    fn test_f16_to_f64() { +        let f = f16::from_f64(7.0); +        assert_eq!(f.to_f64(), 7.0f64); + +        // 7.1 is NOT exactly representable in 16-bit, it's rounded +        let f = f16::from_f64(7.1); +        let diff = (f.to_f64() - 7.1f64).abs(); +        // diff must be <= 4 * EPSILON, as 7 has two more significant bits than 1 +        assert!(diff <= 4.0 * f16::EPSILON.to_f64()); + +        assert_eq!(f16::from_bits(0x0000_0001).to_f64(), 2.0f64.powi(-24)); +        assert_eq!(f16::from_bits(0x0000_0005).to_f64(), 5.0 * 2.0f64.powi(-24)); + +        assert_eq!(f16::from_bits(0x0000_0001), f16::from_f64(2.0f64.powi(-24))); +        assert_eq!( +            f16::from_bits(0x0000_0005), +            f16::from_f64(5.0 * 2.0f64.powi(-24)) +        ); +    } + +    #[test] +    fn test_comparisons() { +        let zero = f16::from_f64(0.0); +        let one = f16::from_f64(1.0); +        let neg_zero = f16::from_f64(-0.0); +        let neg_one = f16::from_f64(-1.0); + +        assert_eq!(zero.partial_cmp(&neg_zero), Some(Ordering::Equal)); +        assert_eq!(neg_zero.partial_cmp(&zero), Some(Ordering::Equal)); +        assert!(zero == neg_zero); +        assert!(neg_zero == zero); +        assert!(!(zero != neg_zero)); +        assert!(!(neg_zero != zero)); +        assert!(!(zero < neg_zero)); +        assert!(!(neg_zero < zero)); +        assert!(zero <= neg_zero); +        assert!(neg_zero <= zero); +        assert!(!(zero > neg_zero)); +        assert!(!(neg_zero > zero)); +        assert!(zero >= neg_zero); +        assert!(neg_zero >= zero); + +        assert_eq!(one.partial_cmp(&neg_zero), Some(Ordering::Greater)); +        assert_eq!(neg_zero.partial_cmp(&one), Some(Ordering::Less)); +        assert!(!(one == neg_zero)); +        assert!(!(neg_zero == one)); +        assert!(one != neg_zero); +        assert!(neg_zero != one); +        assert!(!(one < neg_zero)); +        assert!(neg_zero < one); +        assert!(!(one <= neg_zero)); +        assert!(neg_zero <= one); +        assert!(one > neg_zero); +        assert!(!(neg_zero > one)); +        assert!(one >= neg_zero); +        assert!(!(neg_zero >= one)); + +        assert_eq!(one.partial_cmp(&neg_one), Some(Ordering::Greater)); +        assert_eq!(neg_one.partial_cmp(&one), Some(Ordering::Less)); +        assert!(!(one == neg_one)); +        assert!(!(neg_one == one)); +        assert!(one != neg_one); +        assert!(neg_one != one); +        assert!(!(one < neg_one)); +        assert!(neg_one < one); +        assert!(!(one <= neg_one)); +        assert!(neg_one <= one); +        assert!(one > neg_one); +        assert!(!(neg_one > one)); +        assert!(one >= neg_one); +        assert!(!(neg_one >= one)); +    } + +    #[test] +    #[allow(clippy::erasing_op, clippy::identity_op)] +    fn round_to_even_f32() { +        // smallest positive subnormal = 0b0.0000_0000_01 * 2^-14 = 2^-24 +        let min_sub = f16::from_bits(1); +        let min_sub_f = (-24f32).exp2(); +        assert_eq!(f16::from_f32(min_sub_f).to_bits(), min_sub.to_bits()); +        assert_eq!(f32::from(min_sub).to_bits(), min_sub_f.to_bits()); + +        // 0.0000000000_011111 rounded to 0.0000000000 (< tie, no rounding) +        // 0.0000000000_100000 rounded to 0.0000000000 (tie and even, remains at even) +        // 0.0000000000_100001 rounded to 0.0000000001 (> tie, rounds up) +        assert_eq!( +            f16::from_f32(min_sub_f * 0.49).to_bits(), +            min_sub.to_bits() * 0 +        ); +        assert_eq!( +            f16::from_f32(min_sub_f * 0.50).to_bits(), +            min_sub.to_bits() * 0 +        ); +        assert_eq!( +            f16::from_f32(min_sub_f * 0.51).to_bits(), +            min_sub.to_bits() * 1 +        ); + +        // 0.0000000001_011111 rounded to 0.0000000001 (< tie, no rounding) +        // 0.0000000001_100000 rounded to 0.0000000010 (tie and odd, rounds up to even) +        // 0.0000000001_100001 rounded to 0.0000000010 (> tie, rounds up) +        assert_eq!( +            f16::from_f32(min_sub_f * 1.49).to_bits(), +            min_sub.to_bits() * 1 +        ); +        assert_eq!( +            f16::from_f32(min_sub_f * 1.50).to_bits(), +            min_sub.to_bits() * 2 +        ); +        assert_eq!( +            f16::from_f32(min_sub_f * 1.51).to_bits(), +            min_sub.to_bits() * 2 +        ); + +        // 0.0000000010_011111 rounded to 0.0000000010 (< tie, no rounding) +        // 0.0000000010_100000 rounded to 0.0000000010 (tie and even, remains at even) +        // 0.0000000010_100001 rounded to 0.0000000011 (> tie, rounds up) +        assert_eq!( +            f16::from_f32(min_sub_f * 2.49).to_bits(), +            min_sub.to_bits() * 2 +        ); +        assert_eq!( +            f16::from_f32(min_sub_f * 2.50).to_bits(), +            min_sub.to_bits() * 2 +        ); +        assert_eq!( +            f16::from_f32(min_sub_f * 2.51).to_bits(), +            min_sub.to_bits() * 3 +        ); + +        assert_eq!( +            f16::from_f32(2000.49f32).to_bits(), +            f16::from_f32(2000.0).to_bits() +        ); +        assert_eq!( +            f16::from_f32(2000.50f32).to_bits(), +            f16::from_f32(2000.0).to_bits() +        ); +        assert_eq!( +            f16::from_f32(2000.51f32).to_bits(), +            f16::from_f32(2001.0).to_bits() +        ); +        assert_eq!( +            f16::from_f32(2001.49f32).to_bits(), +            f16::from_f32(2001.0).to_bits() +        ); +        assert_eq!( +            f16::from_f32(2001.50f32).to_bits(), +            f16::from_f32(2002.0).to_bits() +        ); +        assert_eq!( +            f16::from_f32(2001.51f32).to_bits(), +            f16::from_f32(2002.0).to_bits() +        ); +        assert_eq!( +            f16::from_f32(2002.49f32).to_bits(), +            f16::from_f32(2002.0).to_bits() +        ); +        assert_eq!( +            f16::from_f32(2002.50f32).to_bits(), +            f16::from_f32(2002.0).to_bits() +        ); +        assert_eq!( +            f16::from_f32(2002.51f32).to_bits(), +            f16::from_f32(2003.0).to_bits() +        ); +    } + +    #[test] +    #[allow(clippy::erasing_op, clippy::identity_op)] +    fn round_to_even_f64() { +        // smallest positive subnormal = 0b0.0000_0000_01 * 2^-14 = 2^-24 +        let min_sub = f16::from_bits(1); +        let min_sub_f = (-24f64).exp2(); +        assert_eq!(f16::from_f64(min_sub_f).to_bits(), min_sub.to_bits()); +        assert_eq!(f64::from(min_sub).to_bits(), min_sub_f.to_bits()); + +        // 0.0000000000_011111 rounded to 0.0000000000 (< tie, no rounding) +        // 0.0000000000_100000 rounded to 0.0000000000 (tie and even, remains at even) +        // 0.0000000000_100001 rounded to 0.0000000001 (> tie, rounds up) +        assert_eq!( +            f16::from_f64(min_sub_f * 0.49).to_bits(), +            min_sub.to_bits() * 0 +        ); +        assert_eq!( +            f16::from_f64(min_sub_f * 0.50).to_bits(), +            min_sub.to_bits() * 0 +        ); +        assert_eq!( +            f16::from_f64(min_sub_f * 0.51).to_bits(), +            min_sub.to_bits() * 1 +        ); + +        // 0.0000000001_011111 rounded to 0.0000000001 (< tie, no rounding) +        // 0.0000000001_100000 rounded to 0.0000000010 (tie and odd, rounds up to even) +        // 0.0000000001_100001 rounded to 0.0000000010 (> tie, rounds up) +        assert_eq!( +            f16::from_f64(min_sub_f * 1.49).to_bits(), +            min_sub.to_bits() * 1 +        ); +        assert_eq!( +            f16::from_f64(min_sub_f * 1.50).to_bits(), +            min_sub.to_bits() * 2 +        ); +        assert_eq!( +            f16::from_f64(min_sub_f * 1.51).to_bits(), +            min_sub.to_bits() * 2 +        ); + +        // 0.0000000010_011111 rounded to 0.0000000010 (< tie, no rounding) +        // 0.0000000010_100000 rounded to 0.0000000010 (tie and even, remains at even) +        // 0.0000000010_100001 rounded to 0.0000000011 (> tie, rounds up) +        assert_eq!( +            f16::from_f64(min_sub_f * 2.49).to_bits(), +            min_sub.to_bits() * 2 +        ); +        assert_eq!( +            f16::from_f64(min_sub_f * 2.50).to_bits(), +            min_sub.to_bits() * 2 +        ); +        assert_eq!( +            f16::from_f64(min_sub_f * 2.51).to_bits(), +            min_sub.to_bits() * 3 +        ); + +        assert_eq!( +            f16::from_f64(2000.49f64).to_bits(), +            f16::from_f64(2000.0).to_bits() +        ); +        assert_eq!( +            f16::from_f64(2000.50f64).to_bits(), +            f16::from_f64(2000.0).to_bits() +        ); +        assert_eq!( +            f16::from_f64(2000.51f64).to_bits(), +            f16::from_f64(2001.0).to_bits() +        ); +        assert_eq!( +            f16::from_f64(2001.49f64).to_bits(), +            f16::from_f64(2001.0).to_bits() +        ); +        assert_eq!( +            f16::from_f64(2001.50f64).to_bits(), +            f16::from_f64(2002.0).to_bits() +        ); +        assert_eq!( +            f16::from_f64(2001.51f64).to_bits(), +            f16::from_f64(2002.0).to_bits() +        ); +        assert_eq!( +            f16::from_f64(2002.49f64).to_bits(), +            f16::from_f64(2002.0).to_bits() +        ); +        assert_eq!( +            f16::from_f64(2002.50f64).to_bits(), +            f16::from_f64(2002.0).to_bits() +        ); +        assert_eq!( +            f16::from_f64(2002.51f64).to_bits(), +            f16::from_f64(2003.0).to_bits() +        ); +    } + +    impl quickcheck::Arbitrary for f16 { +        fn arbitrary(g: &mut quickcheck::Gen) -> Self { +            f16(u16::arbitrary(g)) +        } +    } + +    #[quickcheck] +    fn qc_roundtrip_f16_f32_is_identity(f: f16) -> bool { +        let roundtrip = f16::from_f32(f.to_f32()); +        if f.is_nan() { +            roundtrip.is_nan() && f.is_sign_negative() == roundtrip.is_sign_negative() +        } else { +            f.0 == roundtrip.0 +        } +    } + +    #[quickcheck] +    fn qc_roundtrip_f16_f64_is_identity(f: f16) -> bool { +        let roundtrip = f16::from_f64(f.to_f64()); +        if f.is_nan() { +            roundtrip.is_nan() && f.is_sign_negative() == roundtrip.is_sign_negative() +        } else { +            f.0 == roundtrip.0 +        } +    } +} diff --git a/vendor/half/src/binary16/convert.rs b/vendor/half/src/binary16/convert.rs new file mode 100644 index 0000000..b96910f --- /dev/null +++ b/vendor/half/src/binary16/convert.rs @@ -0,0 +1,752 @@ +#![allow(dead_code, unused_imports)] +use crate::leading_zeros::leading_zeros_u16; +use core::mem; + +macro_rules! convert_fn { +    (fn $name:ident($($var:ident : $vartype:ty),+) -> $restype:ty { +            if feature("f16c") { $f16c:expr } +            else { $fallback:expr }}) => { +        #[inline] +        pub(crate) fn $name($($var: $vartype),+) -> $restype { +            // Use CPU feature detection if using std +            #[cfg(all( +                feature = "use-intrinsics", +                feature = "std", +                any(target_arch = "x86", target_arch = "x86_64"), +                not(target_feature = "f16c") +            ))] +            { +                if is_x86_feature_detected!("f16c") { +                    $f16c +                } else { +                    $fallback +                } +            } +            // Use intrinsics directly when a compile target or using no_std +            #[cfg(all( +                feature = "use-intrinsics", +                any(target_arch = "x86", target_arch = "x86_64"), +                target_feature = "f16c" +            ))] +            { +                $f16c +            } +            // Fallback to software +            #[cfg(any( +                not(feature = "use-intrinsics"), +                not(any(target_arch = "x86", target_arch = "x86_64")), +                all(not(feature = "std"), not(target_feature = "f16c")) +            ))] +            { +                $fallback +            } +        } +    }; +} + +convert_fn! { +    fn f32_to_f16(f: f32) -> u16 { +        if feature("f16c") { +            unsafe { x86::f32_to_f16_x86_f16c(f) } +        } else { +            f32_to_f16_fallback(f) +        } +    } +} + +convert_fn! { +    fn f64_to_f16(f: f64) -> u16 { +        if feature("f16c") { +            unsafe { x86::f32_to_f16_x86_f16c(f as f32) } +        } else { +            f64_to_f16_fallback(f) +        } +    } +} + +convert_fn! { +    fn f16_to_f32(i: u16) -> f32 { +        if feature("f16c") { +            unsafe { x86::f16_to_f32_x86_f16c(i) } +        } else { +            f16_to_f32_fallback(i) +        } +    } +} + +convert_fn! { +    fn f16_to_f64(i: u16) -> f64 { +        if feature("f16c") { +            unsafe { x86::f16_to_f32_x86_f16c(i) as f64 } +        } else { +            f16_to_f64_fallback(i) +        } +    } +} + +convert_fn! { +    fn f32x4_to_f16x4(f: &[f32; 4]) -> [u16; 4] { +        if feature("f16c") { +            unsafe { x86::f32x4_to_f16x4_x86_f16c(f) } +        } else { +            f32x4_to_f16x4_fallback(f) +        } +    } +} + +convert_fn! { +    fn f16x4_to_f32x4(i: &[u16; 4]) -> [f32; 4] { +        if feature("f16c") { +            unsafe { x86::f16x4_to_f32x4_x86_f16c(i) } +        } else { +            f16x4_to_f32x4_fallback(i) +        } +    } +} + +convert_fn! { +    fn f64x4_to_f16x4(f: &[f64; 4]) -> [u16; 4] { +        if feature("f16c") { +            unsafe { x86::f64x4_to_f16x4_x86_f16c(f) } +        } else { +            f64x4_to_f16x4_fallback(f) +        } +    } +} + +convert_fn! { +    fn f16x4_to_f64x4(i: &[u16; 4]) -> [f64; 4] { +        if feature("f16c") { +            unsafe { x86::f16x4_to_f64x4_x86_f16c(i) } +        } else { +            f16x4_to_f64x4_fallback(i) +        } +    } +} + +convert_fn! { +    fn f32x8_to_f16x8(f: &[f32; 8]) -> [u16; 8] { +        if feature("f16c") { +            unsafe { x86::f32x8_to_f16x8_x86_f16c(f) } +        } else { +            f32x8_to_f16x8_fallback(f) +        } +    } +} + +convert_fn! { +    fn f16x8_to_f32x8(i: &[u16; 8]) -> [f32; 8] { +        if feature("f16c") { +            unsafe { x86::f16x8_to_f32x8_x86_f16c(i) } +        } else { +            f16x8_to_f32x8_fallback(i) +        } +    } +} + +convert_fn! { +    fn f64x8_to_f16x8(f: &[f64; 8]) -> [u16; 8] { +        if feature("f16c") { +            unsafe { x86::f64x8_to_f16x8_x86_f16c(f) } +        } else { +            f64x8_to_f16x8_fallback(f) +        } +    } +} + +convert_fn! { +    fn f16x8_to_f64x8(i: &[u16; 8]) -> [f64; 8] { +        if feature("f16c") { +            unsafe { x86::f16x8_to_f64x8_x86_f16c(i) } +        } else { +            f16x8_to_f64x8_fallback(i) +        } +    } +} + +convert_fn! { +    fn f32_to_f16_slice(src: &[f32], dst: &mut [u16]) -> () { +        if feature("f16c") { +            convert_chunked_slice_8(src, dst, x86::f32x8_to_f16x8_x86_f16c, +                x86::f32x4_to_f16x4_x86_f16c) +        } else { +            slice_fallback(src, dst, f32_to_f16_fallback) +        } +    } +} + +convert_fn! { +    fn f16_to_f32_slice(src: &[u16], dst: &mut [f32]) -> () { +        if feature("f16c") { +            convert_chunked_slice_8(src, dst, x86::f16x8_to_f32x8_x86_f16c, +                x86::f16x4_to_f32x4_x86_f16c) +        } else { +            slice_fallback(src, dst, f16_to_f32_fallback) +        } +    } +} + +convert_fn! { +    fn f64_to_f16_slice(src: &[f64], dst: &mut [u16]) -> () { +        if feature("f16c") { +            convert_chunked_slice_8(src, dst, x86::f64x8_to_f16x8_x86_f16c, +                x86::f64x4_to_f16x4_x86_f16c) +        } else { +            slice_fallback(src, dst, f64_to_f16_fallback) +        } +    } +} + +convert_fn! { +    fn f16_to_f64_slice(src: &[u16], dst: &mut [f64]) -> () { +        if feature("f16c") { +            convert_chunked_slice_8(src, dst, x86::f16x8_to_f64x8_x86_f16c, +                x86::f16x4_to_f64x4_x86_f16c) +        } else { +            slice_fallback(src, dst, f16_to_f64_fallback) +        } +    } +} + +/// Chunks sliced into x8 or x4 arrays +#[inline] +fn convert_chunked_slice_8<S: Copy + Default, D: Copy>( +    src: &[S], +    dst: &mut [D], +    fn8: unsafe fn(&[S; 8]) -> [D; 8], +    fn4: unsafe fn(&[S; 4]) -> [D; 4], +) { +    assert_eq!(src.len(), dst.len()); + +    // TODO: Can be further optimized with array_chunks when it becomes stabilized + +    let src_chunks = src.chunks_exact(8); +    let mut dst_chunks = dst.chunks_exact_mut(8); +    let src_remainder = src_chunks.remainder(); +    for (s, d) in src_chunks.zip(&mut dst_chunks) { +        let chunk: &[S; 8] = s.try_into().unwrap(); +        d.copy_from_slice(unsafe { &fn8(chunk) }); +    } + +    // Process remainder +    if src_remainder.len() > 4 { +        let mut buf: [S; 8] = Default::default(); +        buf[..src_remainder.len()].copy_from_slice(src_remainder); +        let vec = unsafe { fn8(&buf) }; +        let dst_remainder = dst_chunks.into_remainder(); +        dst_remainder.copy_from_slice(&vec[..dst_remainder.len()]); +    } else if !src_remainder.is_empty() { +        let mut buf: [S; 4] = Default::default(); +        buf[..src_remainder.len()].copy_from_slice(src_remainder); +        let vec = unsafe { fn4(&buf) }; +        let dst_remainder = dst_chunks.into_remainder(); +        dst_remainder.copy_from_slice(&vec[..dst_remainder.len()]); +    } +} + +/// Chunks sliced into x4 arrays +#[inline] +fn convert_chunked_slice_4<S: Copy + Default, D: Copy>( +    src: &[S], +    dst: &mut [D], +    f: unsafe fn(&[S; 4]) -> [D; 4], +) { +    assert_eq!(src.len(), dst.len()); + +    // TODO: Can be further optimized with array_chunks when it becomes stabilized + +    let src_chunks = src.chunks_exact(4); +    let mut dst_chunks = dst.chunks_exact_mut(4); +    let src_remainder = src_chunks.remainder(); +    for (s, d) in src_chunks.zip(&mut dst_chunks) { +        let chunk: &[S; 4] = s.try_into().unwrap(); +        d.copy_from_slice(unsafe { &f(chunk) }); +    } + +    // Process remainder +    if !src_remainder.is_empty() { +        let mut buf: [S; 4] = Default::default(); +        buf[..src_remainder.len()].copy_from_slice(src_remainder); +        let vec = unsafe { f(&buf) }; +        let dst_remainder = dst_chunks.into_remainder(); +        dst_remainder.copy_from_slice(&vec[..dst_remainder.len()]); +    } +} + +/////////////// Fallbacks //////////////// + +// In the below functions, round to nearest, with ties to even. +// Let us call the most significant bit that will be shifted out the round_bit. +// +// Round up if either +//  a) Removed part > tie. +//     (mantissa & round_bit) != 0 && (mantissa & (round_bit - 1)) != 0 +//  b) Removed part == tie, and retained part is odd. +//     (mantissa & round_bit) != 0 && (mantissa & (2 * round_bit)) != 0 +// (If removed part == tie and retained part is even, do not round up.) +// These two conditions can be combined into one: +//     (mantissa & round_bit) != 0 && (mantissa & ((round_bit - 1) | (2 * round_bit))) != 0 +// which can be simplified into +//     (mantissa & round_bit) != 0 && (mantissa & (3 * round_bit - 1)) != 0 + +#[inline] +pub(crate) const fn f32_to_f16_fallback(value: f32) -> u16 { +    // TODO: Replace mem::transmute with to_bits() once to_bits is const-stabilized +    // Convert to raw bytes +    let x: u32 = unsafe { mem::transmute(value) }; + +    // Extract IEEE754 components +    let sign = x & 0x8000_0000u32; +    let exp = x & 0x7F80_0000u32; +    let man = x & 0x007F_FFFFu32; + +    // Check for all exponent bits being set, which is Infinity or NaN +    if exp == 0x7F80_0000u32 { +        // Set mantissa MSB for NaN (and also keep shifted mantissa bits) +        let nan_bit = if man == 0 { 0 } else { 0x0200u32 }; +        return ((sign >> 16) | 0x7C00u32 | nan_bit | (man >> 13)) as u16; +    } + +    // The number is normalized, start assembling half precision version +    let half_sign = sign >> 16; +    // Unbias the exponent, then bias for half precision +    let unbiased_exp = ((exp >> 23) as i32) - 127; +    let half_exp = unbiased_exp + 15; + +    // Check for exponent overflow, return +infinity +    if half_exp >= 0x1F { +        return (half_sign | 0x7C00u32) as u16; +    } + +    // Check for underflow +    if half_exp <= 0 { +        // Check mantissa for what we can do +        if 14 - half_exp > 24 { +            // No rounding possibility, so this is a full underflow, return signed zero +            return half_sign as u16; +        } +        // Don't forget about hidden leading mantissa bit when assembling mantissa +        let man = man | 0x0080_0000u32; +        let mut half_man = man >> (14 - half_exp); +        // Check for rounding (see comment above functions) +        let round_bit = 1 << (13 - half_exp); +        if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 { +            half_man += 1; +        } +        // No exponent for subnormals +        return (half_sign | half_man) as u16; +    } + +    // Rebias the exponent +    let half_exp = (half_exp as u32) << 10; +    let half_man = man >> 13; +    // Check for rounding (see comment above functions) +    let round_bit = 0x0000_1000u32; +    if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 { +        // Round it +        ((half_sign | half_exp | half_man) + 1) as u16 +    } else { +        (half_sign | half_exp | half_man) as u16 +    } +} + +#[inline] +pub(crate) const fn f64_to_f16_fallback(value: f64) -> u16 { +    // Convert to raw bytes, truncating the last 32-bits of mantissa; that precision will always +    // be lost on half-precision. +    // TODO: Replace mem::transmute with to_bits() once to_bits is const-stabilized +    let val: u64 = unsafe { mem::transmute(value) }; +    let x = (val >> 32) as u32; + +    // Extract IEEE754 components +    let sign = x & 0x8000_0000u32; +    let exp = x & 0x7FF0_0000u32; +    let man = x & 0x000F_FFFFu32; + +    // Check for all exponent bits being set, which is Infinity or NaN +    if exp == 0x7FF0_0000u32 { +        // Set mantissa MSB for NaN (and also keep shifted mantissa bits). +        // We also have to check the last 32 bits. +        let nan_bit = if man == 0 && (val as u32 == 0) { +            0 +        } else { +            0x0200u32 +        }; +        return ((sign >> 16) | 0x7C00u32 | nan_bit | (man >> 10)) as u16; +    } + +    // The number is normalized, start assembling half precision version +    let half_sign = sign >> 16; +    // Unbias the exponent, then bias for half precision +    let unbiased_exp = ((exp >> 20) as i64) - 1023; +    let half_exp = unbiased_exp + 15; + +    // Check for exponent overflow, return +infinity +    if half_exp >= 0x1F { +        return (half_sign | 0x7C00u32) as u16; +    } + +    // Check for underflow +    if half_exp <= 0 { +        // Check mantissa for what we can do +        if 10 - half_exp > 21 { +            // No rounding possibility, so this is a full underflow, return signed zero +            return half_sign as u16; +        } +        // Don't forget about hidden leading mantissa bit when assembling mantissa +        let man = man | 0x0010_0000u32; +        let mut half_man = man >> (11 - half_exp); +        // Check for rounding (see comment above functions) +        let round_bit = 1 << (10 - half_exp); +        if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 { +            half_man += 1; +        } +        // No exponent for subnormals +        return (half_sign | half_man) as u16; +    } + +    // Rebias the exponent +    let half_exp = (half_exp as u32) << 10; +    let half_man = man >> 10; +    // Check for rounding (see comment above functions) +    let round_bit = 0x0000_0200u32; +    if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 { +        // Round it +        ((half_sign | half_exp | half_man) + 1) as u16 +    } else { +        (half_sign | half_exp | half_man) as u16 +    } +} + +#[inline] +pub(crate) const fn f16_to_f32_fallback(i: u16) -> f32 { +    // Check for signed zero +    // TODO: Replace mem::transmute with from_bits() once from_bits is const-stabilized +    if i & 0x7FFFu16 == 0 { +        return unsafe { mem::transmute((i as u32) << 16) }; +    } + +    let half_sign = (i & 0x8000u16) as u32; +    let half_exp = (i & 0x7C00u16) as u32; +    let half_man = (i & 0x03FFu16) as u32; + +    // Check for an infinity or NaN when all exponent bits set +    if half_exp == 0x7C00u32 { +        // Check for signed infinity if mantissa is zero +        if half_man == 0 { +            return unsafe { mem::transmute((half_sign << 16) | 0x7F80_0000u32) }; +        } else { +            // NaN, keep current mantissa but also set most significiant mantissa bit +            return unsafe { +                mem::transmute((half_sign << 16) | 0x7FC0_0000u32 | (half_man << 13)) +            }; +        } +    } + +    // Calculate single-precision components with adjusted exponent +    let sign = half_sign << 16; +    // Unbias exponent +    let unbiased_exp = ((half_exp as i32) >> 10) - 15; + +    // Check for subnormals, which will be normalized by adjusting exponent +    if half_exp == 0 { +        // Calculate how much to adjust the exponent by +        let e = leading_zeros_u16(half_man as u16) - 6; + +        // Rebias and adjust exponent +        let exp = (127 - 15 - e) << 23; +        let man = (half_man << (14 + e)) & 0x7F_FF_FFu32; +        return unsafe { mem::transmute(sign | exp | man) }; +    } + +    // Rebias exponent for a normalized normal +    let exp = ((unbiased_exp + 127) as u32) << 23; +    let man = (half_man & 0x03FFu32) << 13; +    unsafe { mem::transmute(sign | exp | man) } +} + +#[inline] +pub(crate) const fn f16_to_f64_fallback(i: u16) -> f64 { +    // Check for signed zero +    // TODO: Replace mem::transmute with from_bits() once from_bits is const-stabilized +    if i & 0x7FFFu16 == 0 { +        return unsafe { mem::transmute((i as u64) << 48) }; +    } + +    let half_sign = (i & 0x8000u16) as u64; +    let half_exp = (i & 0x7C00u16) as u64; +    let half_man = (i & 0x03FFu16) as u64; + +    // Check for an infinity or NaN when all exponent bits set +    if half_exp == 0x7C00u64 { +        // Check for signed infinity if mantissa is zero +        if half_man == 0 { +            return unsafe { mem::transmute((half_sign << 48) | 0x7FF0_0000_0000_0000u64) }; +        } else { +            // NaN, keep current mantissa but also set most significiant mantissa bit +            return unsafe { +                mem::transmute((half_sign << 48) | 0x7FF8_0000_0000_0000u64 | (half_man << 42)) +            }; +        } +    } + +    // Calculate double-precision components with adjusted exponent +    let sign = half_sign << 48; +    // Unbias exponent +    let unbiased_exp = ((half_exp as i64) >> 10) - 15; + +    // Check for subnormals, which will be normalized by adjusting exponent +    if half_exp == 0 { +        // Calculate how much to adjust the exponent by +        let e = leading_zeros_u16(half_man as u16) - 6; + +        // Rebias and adjust exponent +        let exp = ((1023 - 15 - e) as u64) << 52; +        let man = (half_man << (43 + e)) & 0xF_FFFF_FFFF_FFFFu64; +        return unsafe { mem::transmute(sign | exp | man) }; +    } + +    // Rebias exponent for a normalized normal +    let exp = ((unbiased_exp + 1023) as u64) << 52; +    let man = (half_man & 0x03FFu64) << 42; +    unsafe { mem::transmute(sign | exp | man) } +} + +#[inline] +fn f16x4_to_f32x4_fallback(v: &[u16; 4]) -> [f32; 4] { +    [ +        f16_to_f32_fallback(v[0]), +        f16_to_f32_fallback(v[1]), +        f16_to_f32_fallback(v[2]), +        f16_to_f32_fallback(v[3]), +    ] +} + +#[inline] +fn f32x4_to_f16x4_fallback(v: &[f32; 4]) -> [u16; 4] { +    [ +        f32_to_f16_fallback(v[0]), +        f32_to_f16_fallback(v[1]), +        f32_to_f16_fallback(v[2]), +        f32_to_f16_fallback(v[3]), +    ] +} + +#[inline] +fn f16x4_to_f64x4_fallback(v: &[u16; 4]) -> [f64; 4] { +    [ +        f16_to_f64_fallback(v[0]), +        f16_to_f64_fallback(v[1]), +        f16_to_f64_fallback(v[2]), +        f16_to_f64_fallback(v[3]), +    ] +} + +#[inline] +fn f64x4_to_f16x4_fallback(v: &[f64; 4]) -> [u16; 4] { +    [ +        f64_to_f16_fallback(v[0]), +        f64_to_f16_fallback(v[1]), +        f64_to_f16_fallback(v[2]), +        f64_to_f16_fallback(v[3]), +    ] +} + +#[inline] +fn f16x8_to_f32x8_fallback(v: &[u16; 8]) -> [f32; 8] { +    [ +        f16_to_f32_fallback(v[0]), +        f16_to_f32_fallback(v[1]), +        f16_to_f32_fallback(v[2]), +        f16_to_f32_fallback(v[3]), +        f16_to_f32_fallback(v[4]), +        f16_to_f32_fallback(v[5]), +        f16_to_f32_fallback(v[6]), +        f16_to_f32_fallback(v[7]), +    ] +} + +#[inline] +fn f32x8_to_f16x8_fallback(v: &[f32; 8]) -> [u16; 8] { +    [ +        f32_to_f16_fallback(v[0]), +        f32_to_f16_fallback(v[1]), +        f32_to_f16_fallback(v[2]), +        f32_to_f16_fallback(v[3]), +        f32_to_f16_fallback(v[4]), +        f32_to_f16_fallback(v[5]), +        f32_to_f16_fallback(v[6]), +        f32_to_f16_fallback(v[7]), +    ] +} + +#[inline] +fn f16x8_to_f64x8_fallback(v: &[u16; 8]) -> [f64; 8] { +    [ +        f16_to_f64_fallback(v[0]), +        f16_to_f64_fallback(v[1]), +        f16_to_f64_fallback(v[2]), +        f16_to_f64_fallback(v[3]), +        f16_to_f64_fallback(v[4]), +        f16_to_f64_fallback(v[5]), +        f16_to_f64_fallback(v[6]), +        f16_to_f64_fallback(v[7]), +    ] +} + +#[inline] +fn f64x8_to_f16x8_fallback(v: &[f64; 8]) -> [u16; 8] { +    [ +        f64_to_f16_fallback(v[0]), +        f64_to_f16_fallback(v[1]), +        f64_to_f16_fallback(v[2]), +        f64_to_f16_fallback(v[3]), +        f64_to_f16_fallback(v[4]), +        f64_to_f16_fallback(v[5]), +        f64_to_f16_fallback(v[6]), +        f64_to_f16_fallback(v[7]), +    ] +} + +#[inline] +fn slice_fallback<S: Copy, D>(src: &[S], dst: &mut [D], f: fn(S) -> D) { +    assert_eq!(src.len(), dst.len()); +    for (s, d) in src.iter().copied().zip(dst.iter_mut()) { +        *d = f(s); +    } +} + +/////////////// x86/x86_64 f16c //////////////// +#[cfg(all( +    feature = "use-intrinsics", +    any(target_arch = "x86", target_arch = "x86_64") +))] +mod x86 { +    use core::{mem::MaybeUninit, ptr}; + +    #[cfg(target_arch = "x86")] +    use core::arch::x86::{ +        __m128, __m128i, __m256, _mm256_cvtph_ps, _mm256_cvtps_ph, _mm_cvtph_ps, +        _MM_FROUND_TO_NEAREST_INT, +    }; +    #[cfg(target_arch = "x86_64")] +    use core::arch::x86_64::{ +        __m128, __m128i, __m256, _mm256_cvtph_ps, _mm256_cvtps_ph, _mm_cvtph_ps, _mm_cvtps_ph, +        _MM_FROUND_TO_NEAREST_INT, +    }; + +    use super::convert_chunked_slice_8; + +    #[target_feature(enable = "f16c")] +    #[inline] +    pub(super) unsafe fn f16_to_f32_x86_f16c(i: u16) -> f32 { +        let mut vec = MaybeUninit::<__m128i>::zeroed(); +        vec.as_mut_ptr().cast::<u16>().write(i); +        let retval = _mm_cvtph_ps(vec.assume_init()); +        *(&retval as *const __m128).cast() +    } + +    #[target_feature(enable = "f16c")] +    #[inline] +    pub(super) unsafe fn f32_to_f16_x86_f16c(f: f32) -> u16 { +        let mut vec = MaybeUninit::<__m128>::zeroed(); +        vec.as_mut_ptr().cast::<f32>().write(f); +        let retval = _mm_cvtps_ph(vec.assume_init(), _MM_FROUND_TO_NEAREST_INT); +        *(&retval as *const __m128i).cast() +    } + +    #[target_feature(enable = "f16c")] +    #[inline] +    pub(super) unsafe fn f16x4_to_f32x4_x86_f16c(v: &[u16; 4]) -> [f32; 4] { +        let mut vec = MaybeUninit::<__m128i>::zeroed(); +        ptr::copy_nonoverlapping(v.as_ptr(), vec.as_mut_ptr().cast(), 4); +        let retval = _mm_cvtph_ps(vec.assume_init()); +        *(&retval as *const __m128).cast() +    } + +    #[target_feature(enable = "f16c")] +    #[inline] +    pub(super) unsafe fn f32x4_to_f16x4_x86_f16c(v: &[f32; 4]) -> [u16; 4] { +        let mut vec = MaybeUninit::<__m128>::uninit(); +        ptr::copy_nonoverlapping(v.as_ptr(), vec.as_mut_ptr().cast(), 4); +        let retval = _mm_cvtps_ph(vec.assume_init(), _MM_FROUND_TO_NEAREST_INT); +        *(&retval as *const __m128i).cast() +    } + +    #[target_feature(enable = "f16c")] +    #[inline] +    pub(super) unsafe fn f16x4_to_f64x4_x86_f16c(v: &[u16; 4]) -> [f64; 4] { +        let array = f16x4_to_f32x4_x86_f16c(v); +        // Let compiler vectorize this regular cast for now. +        // TODO: investigate auto-detecting sse2/avx convert features +        [ +            array[0] as f64, +            array[1] as f64, +            array[2] as f64, +            array[3] as f64, +        ] +    } + +    #[target_feature(enable = "f16c")] +    #[inline] +    pub(super) unsafe fn f64x4_to_f16x4_x86_f16c(v: &[f64; 4]) -> [u16; 4] { +        // Let compiler vectorize this regular cast for now. +        // TODO: investigate auto-detecting sse2/avx convert features +        let v = [v[0] as f32, v[1] as f32, v[2] as f32, v[3] as f32]; +        f32x4_to_f16x4_x86_f16c(&v) +    } + +    #[target_feature(enable = "f16c")] +    #[inline] +    pub(super) unsafe fn f16x8_to_f32x8_x86_f16c(v: &[u16; 8]) -> [f32; 8] { +        let mut vec = MaybeUninit::<__m128i>::zeroed(); +        ptr::copy_nonoverlapping(v.as_ptr(), vec.as_mut_ptr().cast(), 8); +        let retval = _mm256_cvtph_ps(vec.assume_init()); +        *(&retval as *const __m256).cast() +    } + +    #[target_feature(enable = "f16c")] +    #[inline] +    pub(super) unsafe fn f32x8_to_f16x8_x86_f16c(v: &[f32; 8]) -> [u16; 8] { +        let mut vec = MaybeUninit::<__m256>::uninit(); +        ptr::copy_nonoverlapping(v.as_ptr(), vec.as_mut_ptr().cast(), 8); +        let retval = _mm256_cvtps_ph(vec.assume_init(), _MM_FROUND_TO_NEAREST_INT); +        *(&retval as *const __m128i).cast() +    } + +    #[target_feature(enable = "f16c")] +    #[inline] +    pub(super) unsafe fn f16x8_to_f64x8_x86_f16c(v: &[u16; 8]) -> [f64; 8] { +        let array = f16x8_to_f32x8_x86_f16c(v); +        // Let compiler vectorize this regular cast for now. +        // TODO: investigate auto-detecting sse2/avx convert features +        [ +            array[0] as f64, +            array[1] as f64, +            array[2] as f64, +            array[3] as f64, +            array[4] as f64, +            array[5] as f64, +            array[6] as f64, +            array[7] as f64, +        ] +    } + +    #[target_feature(enable = "f16c")] +    #[inline] +    pub(super) unsafe fn f64x8_to_f16x8_x86_f16c(v: &[f64; 8]) -> [u16; 8] { +        // Let compiler vectorize this regular cast for now. +        // TODO: investigate auto-detecting sse2/avx convert features +        let v = [ +            v[0] as f32, +            v[1] as f32, +            v[2] as f32, +            v[3] as f32, +            v[4] as f32, +            v[5] as f32, +            v[6] as f32, +            v[7] as f32, +        ]; +        f32x8_to_f16x8_x86_f16c(&v) +    } +} diff --git a/vendor/half/src/leading_zeros.rs b/vendor/half/src/leading_zeros.rs new file mode 100644 index 0000000..6c73148 --- /dev/null +++ b/vendor/half/src/leading_zeros.rs @@ -0,0 +1,62 @@ +// https://doc.rust-lang.org/std/primitive.u16.html#method.leading_zeros + +#[cfg(not(any(all( +    target_arch = "spirv", +    not(all( +        target_feature = "IntegerFunctions2INTEL", +        target_feature = "SPV_INTEL_shader_integer_functions2" +    )) +))))] +pub(crate) const fn leading_zeros_u16(x: u16) -> u32 { +    x.leading_zeros() +} + +#[cfg(all( +    target_arch = "spirv", +    not(all( +        target_feature = "IntegerFunctions2INTEL", +        target_feature = "SPV_INTEL_shader_integer_functions2" +    )) +))] +pub(crate) const fn leading_zeros_u16(x: u16) -> u32 { +    leading_zeros_u16_fallback(x) +} + +#[cfg(any( +    test, +    all( +        target_arch = "spirv", +        not(all( +            target_feature = "IntegerFunctions2INTEL", +            target_feature = "SPV_INTEL_shader_integer_functions2" +        )) +    ) +))] +const fn leading_zeros_u16_fallback(mut x: u16) -> u32 { +    use crunchy::unroll; +    let mut c = 0; +    let msb = 1 << 15; +    unroll! { for i in 0 .. 16 { +        if x & msb == 0 { +            c += 1; +        } else { +            return c; +        } +        #[allow(unused_assignments)] +        if i < 15 { +            x <<= 1; +        } +    }} +    c +} + +#[cfg(test)] +mod test { + +    #[test] +    fn leading_zeros_u16_fallback() { +        for x in [44, 97, 304, 1179, 23571] { +            assert_eq!(super::leading_zeros_u16_fallback(x), x.leading_zeros()); +        } +    } +} diff --git a/vendor/half/src/lib.rs b/vendor/half/src/lib.rs new file mode 100644 index 0000000..f821945 --- /dev/null +++ b/vendor/half/src/lib.rs @@ -0,0 +1,233 @@ +//! A crate that provides support for half-precision 16-bit floating point types. +//! +//! This crate provides the [`f16`] type, which is an implementation of the IEEE 754-2008 standard +//! [`binary16`] a.k.a `half` floating point type. This 16-bit floating point type is intended for +//! efficient storage where the full range and precision of a larger floating point value is not +//! required. This is especially useful for image storage formats. +//! +//! This crate also provides a [`bf16`] type, an alternative 16-bit floating point format. The +//! [`bfloat16`] format is a truncated IEEE 754 standard `binary32` float that preserves the +//! exponent to allow the same range as [`f32`] but with only 8 bits of precision (instead of 11 +//! bits for [`f16`]). See the [`bf16`] type for details. +//! +//! Because [`f16`] and [`bf16`] are primarily for efficient storage, floating point operations such +//! as addition, multiplication, etc. are not implemented by hardware. While this crate does provide +//! the appropriate trait implementations for basic operations, they each convert the value to +//! [`f32`] before performing the operation and then back afterward. When performing complex +//! arithmetic, manually convert to and from [`f32`] before and after to reduce repeated conversions +//! for each operation. +//! +//! This crate also provides a [`slice`][mod@slice] module for zero-copy in-place conversions of +//! [`u16`] slices to both [`f16`] and [`bf16`], as well as efficient vectorized conversions of +//! larger buffers of floating point values to and from these half formats. +//! +//! The crate uses `#[no_std]` by default, so can be used in embedded environments without using the +//! Rust [`std`] library. A `std` feature to enable support for the standard library is available, +//! see the [Cargo Features](#cargo-features) section below. +//! +//! A [`prelude`] module is provided for easy importing of available utility traits. +//! +//! # Serialization +//! +//! When the `serde` feature is enabled, [`f16`] and [`bf16`] will be serialized as a newtype of +//! [`u16`] by default. In binary formats this is ideal, as it will generally use just two bytes for +//! storage. For string formats like JSON, however, this isn't as useful, and due to design +//! limitations of serde, it's not possible for the default `Serialize` implementation to support +//! different serialization for different formats. +//! +//! Instead, it's up to the containter type of the floats to control how it is serialized. This can +//! easily be controlled when using the derive macros using `#[serde(serialize_with="")]` +//! attributes. For both [`f16`] and [`bf16`] a `serialize_as_f32` and `serialize_as_string` are +//! provided for use with this attribute. +//! +//! Deserialization of both float types supports deserializing from the default serialization, +//! strings, and `f32`/`f64` values, so no additional work is required. +//! +//! # Cargo Features +//! +//! This crate supports a number of optional cargo features. None of these features are enabled by +//! default, even `std`. +//! +//! - **`use-intrinsics`** -- Use [`core::arch`] hardware intrinsics for `f16` and `bf16` conversions +//!   if available on the compiler target. This feature currently only works on nightly Rust +//!   until the corresponding intrinsics are stabilized. +//! +//!   When this feature is enabled and the hardware supports it, the functions and traits in the +//!   [`slice`][mod@slice] module will use vectorized SIMD intructions for increased efficiency. +//! +//!   By default, without this feature, conversions are done only in software, which will also be +//!   the fallback if the target does not have hardware support. Note that without the `std` +//!   feature enabled, no runtime CPU feature detection is used, so the hardware support is only +//!   compiled if the compiler target supports the CPU feature. +//! +//! - **`alloc`** -- Enable use of the [`alloc`] crate when not using the `std` library. +//! +//!   Among other functions, this enables the [`vec`] module, which contains zero-copy +//!   conversions for the [`Vec`] type. This allows fast conversion between raw `Vec<u16>` bits and +//!   `Vec<f16>` or `Vec<bf16>` arrays, and vice versa. +//! +//! - **`std`** -- Enable features that depend on the Rust [`std`] library. This also enables the +//!   `alloc` feature automatically. +//! +//!   Enabling the `std` feature also enables runtime CPU feature detection when the +//!   `use-intrsincis` feature is also enabled. Without this feature detection, intrinsics are only +//!   used when compiler target supports the target feature. +//! +//! - **`serde`** -- Adds support for the [`serde`] crate by implementing [`Serialize`] and +//!   [`Deserialize`] traits for both [`f16`] and [`bf16`]. +//! +//! - **`num-traits`** -- Adds support for the [`num-traits`] crate by implementing [`ToPrimitive`], +//!   [`FromPrimitive`], [`AsPrimitive`], [`Num`], [`Float`], [`FloatCore`], and [`Bounded`] traits +//!   for both [`f16`] and [`bf16`]. +//! +//! - **`bytemuck`** -- Adds support for the [`bytemuck`] crate by implementing [`Zeroable`] and +//!   [`Pod`] traits for both [`f16`] and [`bf16`]. +//! +//! - **`zerocopy`** -- Adds support for the [`zerocopy`] crate by implementing [`AsBytes`] and +//!   [`FromBytes`] traits for both [`f16`] and [`bf16`]. +//! +//! [`alloc`]: https://doc.rust-lang.org/alloc/ +//! [`std`]: https://doc.rust-lang.org/std/ +//! [`binary16`]: https://en.wikipedia.org/wiki/Half-precision_floating-point_format +//! [`bfloat16`]: https://en.wikipedia.org/wiki/Bfloat16_floating-point_format +//! [`serde`]: https://crates.io/crates/serde +//! [`bytemuck`]: https://crates.io/crates/bytemuck +//! [`num-traits`]: https://crates.io/crates/num-traits +//! [`zerocopy`]: https://crates.io/crates/zerocopy +#![cfg_attr( +    feature = "alloc", +    doc = " +[`vec`]: mod@vec" +)] +#![cfg_attr( +    not(feature = "alloc"), +    doc = " +[`vec`]: # +[`Vec`]: https://docs.rust-lang.org/stable/alloc/vec/struct.Vec.html" +)] +#![cfg_attr( +    feature = "serde", +    doc = " +[`Serialize`]: serde::Serialize +[`Deserialize`]: serde::Deserialize" +)] +#![cfg_attr( +    not(feature = "serde"), +    doc = " +[`Serialize`]: https://docs.rs/serde/*/serde/trait.Serialize.html +[`Deserialize`]: https://docs.rs/serde/*/serde/trait.Deserialize.html" +)] +#![cfg_attr( +    feature = "num-traits", +    doc = " +[`ToPrimitive`]: ::num_traits::ToPrimitive +[`FromPrimitive`]: ::num_traits::FromPrimitive +[`AsPrimitive`]: ::num_traits::AsPrimitive +[`Num`]: ::num_traits::Num +[`Float`]: ::num_traits::Float +[`FloatCore`]: ::num_traits::float::FloatCore +[`Bounded`]: ::num_traits::Bounded" +)] +#![cfg_attr( +    not(feature = "num-traits"), +    doc = " +[`ToPrimitive`]: https://docs.rs/num-traits/*/num_traits/cast/trait.ToPrimitive.html +[`FromPrimitive`]: https://docs.rs/num-traits/*/num_traits/cast/trait.FromPrimitive.html +[`AsPrimitive`]: https://docs.rs/num-traits/*/num_traits/cast/trait.AsPrimitive.html +[`Num`]: https://docs.rs/num-traits/*/num_traits/trait.Num.html +[`Float`]: https://docs.rs/num-traits/*/num_traits/float/trait.Float.html +[`FloatCore`]: https://docs.rs/num-traits/*/num_traits/float/trait.FloatCore.html +[`Bounded`]: https://docs.rs/num-traits/*/num_traits/bounds/trait.Bounded.html" +)] +#![cfg_attr( +    feature = "bytemuck", +    doc = " +[`Zeroable`]: bytemuck::Zeroable +[`Pod`]: bytemuck::Pod" +)] +#![cfg_attr( +    not(feature = "bytemuck"), +    doc = " +[`Zeroable`]: https://docs.rs/bytemuck/*/bytemuck/trait.Zeroable.html +[`Pod`]: https://docs.rs/bytemuck/*bytemuck/trait.Pod.html" +)] +#![cfg_attr( +    feature = "zerocopy", +    doc = " +[`AsBytes`]: zerocopy::AsBytes +[`FromBytes`]: zerocopy::FromBytes" +)] +#![cfg_attr( +    not(feature = "zerocopy"), +    doc = " +[`AsBytes`]: https://docs.rs/zerocopy/*/zerocopy/trait.AsBytes.html +[`FromBytes`]: https://docs.rs/zerocopy/*/zerocopy/trait.FromBytes.html" +)] +#![warn( +    missing_docs, +    missing_copy_implementations, +    trivial_numeric_casts, +    future_incompatible +)] +#![cfg_attr(not(target_arch = "spirv"), warn(missing_debug_implementations))] +#![allow(clippy::verbose_bit_mask, clippy::cast_lossless)] +#![cfg_attr(not(feature = "std"), no_std)] +#![cfg_attr( +    all( +        feature = "use-intrinsics", +        any(target_arch = "x86", target_arch = "x86_64") +    ), +    feature(stdsimd, f16c_target_feature) +)] +#![doc(html_root_url = "https://docs.rs/half/2.2.1")] +#![doc(test(attr(deny(warnings), allow(unused))))] +#![cfg_attr(docsrs, feature(doc_cfg))] + +#[cfg(feature = "alloc")] +extern crate alloc; + +mod bfloat; +mod binary16; +mod leading_zeros; +#[cfg(feature = "num-traits")] +mod num_traits; + +#[cfg(not(target_arch = "spirv"))] +pub mod slice; +#[cfg(feature = "alloc")] +#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] +pub mod vec; + +pub use bfloat::bf16; +pub use binary16::f16; + +/// A collection of the most used items and traits in this crate for easy importing. +/// +/// # Examples +/// +/// ```rust +/// use half::prelude::*; +/// ``` +pub mod prelude { +    #[doc(no_inline)] +    pub use crate::{bf16, f16}; + +    #[cfg(not(target_arch = "spirv"))] +    #[doc(no_inline)] +    pub use crate::slice::{HalfBitsSliceExt, HalfFloatSliceExt}; + +    #[cfg(feature = "alloc")] +    #[doc(no_inline)] +    #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] +    pub use crate::vec::{HalfBitsVecExt, HalfFloatVecExt}; +} + +// Keep this module private to crate +mod private { +    use crate::{bf16, f16}; + +    pub trait SealedHalf {} + +    impl SealedHalf for f16 {} +    impl SealedHalf for bf16 {} +} diff --git a/vendor/half/src/num_traits.rs b/vendor/half/src/num_traits.rs new file mode 100644 index 0000000..4318699 --- /dev/null +++ b/vendor/half/src/num_traits.rs @@ -0,0 +1,1483 @@ +use crate::{bf16, f16}; +use core::cmp::Ordering; +use core::{num::FpCategory, ops::Div}; +use num_traits::{ +    AsPrimitive, Bounded, FloatConst, FromPrimitive, Num, NumCast, One, ToPrimitive, Zero, +}; + +impl ToPrimitive for f16 { +    #[inline] +    fn to_i64(&self) -> Option<i64> { +        Self::to_f32(*self).to_i64() +    } +    #[inline] +    fn to_u64(&self) -> Option<u64> { +        Self::to_f32(*self).to_u64() +    } +    #[inline] +    fn to_i8(&self) -> Option<i8> { +        Self::to_f32(*self).to_i8() +    } +    #[inline] +    fn to_u8(&self) -> Option<u8> { +        Self::to_f32(*self).to_u8() +    } +    #[inline] +    fn to_i16(&self) -> Option<i16> { +        Self::to_f32(*self).to_i16() +    } +    #[inline] +    fn to_u16(&self) -> Option<u16> { +        Self::to_f32(*self).to_u16() +    } +    #[inline] +    fn to_i32(&self) -> Option<i32> { +        Self::to_f32(*self).to_i32() +    } +    #[inline] +    fn to_u32(&self) -> Option<u32> { +        Self::to_f32(*self).to_u32() +    } +    #[inline] +    fn to_f32(&self) -> Option<f32> { +        Some(Self::to_f32(*self)) +    } +    #[inline] +    fn to_f64(&self) -> Option<f64> { +        Some(Self::to_f64(*self)) +    } +} + +impl FromPrimitive for f16 { +    #[inline] +    fn from_i64(n: i64) -> Option<Self> { +        n.to_f32().map(Self::from_f32) +    } +    #[inline] +    fn from_u64(n: u64) -> Option<Self> { +        n.to_f32().map(Self::from_f32) +    } +    #[inline] +    fn from_i8(n: i8) -> Option<Self> { +        n.to_f32().map(Self::from_f32) +    } +    #[inline] +    fn from_u8(n: u8) -> Option<Self> { +        n.to_f32().map(Self::from_f32) +    } +    #[inline] +    fn from_i16(n: i16) -> Option<Self> { +        n.to_f32().map(Self::from_f32) +    } +    #[inline] +    fn from_u16(n: u16) -> Option<Self> { +        n.to_f32().map(Self::from_f32) +    } +    #[inline] +    fn from_i32(n: i32) -> Option<Self> { +        n.to_f32().map(Self::from_f32) +    } +    #[inline] +    fn from_u32(n: u32) -> Option<Self> { +        n.to_f32().map(Self::from_f32) +    } +    #[inline] +    fn from_f32(n: f32) -> Option<Self> { +        n.to_f32().map(Self::from_f32) +    } +    #[inline] +    fn from_f64(n: f64) -> Option<Self> { +        n.to_f64().map(Self::from_f64) +    } +} + +impl Num for f16 { +    type FromStrRadixErr = <f32 as Num>::FromStrRadixErr; + +    #[inline] +    fn from_str_radix(str: &str, radix: u32) -> Result<Self, Self::FromStrRadixErr> { +        Ok(Self::from_f32(f32::from_str_radix(str, radix)?)) +    } +} + +impl One for f16 { +    #[inline] +    fn one() -> Self { +        Self::ONE +    } +} + +impl Zero for f16 { +    #[inline] +    fn zero() -> Self { +        Self::ZERO +    } + +    #[inline] +    fn is_zero(&self) -> bool { +        *self == Self::ZERO +    } +} + +impl NumCast for f16 { +    #[inline] +    fn from<T: ToPrimitive>(n: T) -> Option<Self> { +        n.to_f32().map(Self::from_f32) +    } +} + +impl num_traits::float::FloatCore for f16 { +    #[inline] +    fn infinity() -> Self { +        Self::INFINITY +    } + +    #[inline] +    fn neg_infinity() -> Self { +        Self::NEG_INFINITY +    } + +    #[inline] +    fn nan() -> Self { +        Self::NAN +    } + +    #[inline] +    fn neg_zero() -> Self { +        Self::NEG_ZERO +    } + +    #[inline] +    fn min_value() -> Self { +        Self::MIN +    } + +    #[inline] +    fn min_positive_value() -> Self { +        Self::MIN_POSITIVE +    } + +    #[inline] +    fn epsilon() -> Self { +        Self::EPSILON +    } + +    #[inline] +    fn max_value() -> Self { +        Self::MAX +    } + +    #[inline] +    fn is_nan(self) -> bool { +        self.is_nan() +    } + +    #[inline] +    fn is_infinite(self) -> bool { +        self.is_infinite() +    } + +    #[inline] +    fn is_finite(self) -> bool { +        self.is_finite() +    } + +    #[inline] +    fn is_normal(self) -> bool { +        self.is_normal() +    } + +    #[inline] +    fn classify(self) -> FpCategory { +        self.classify() +    } + +    #[inline] +    fn floor(self) -> Self { +        Self::from_f32(self.to_f32().floor()) +    } + +    #[inline] +    fn ceil(self) -> Self { +        Self::from_f32(self.to_f32().ceil()) +    } + +    #[inline] +    fn round(self) -> Self { +        Self::from_f32(self.to_f32().round()) +    } + +    #[inline] +    fn trunc(self) -> Self { +        Self::from_f32(self.to_f32().trunc()) +    } + +    #[inline] +    fn fract(self) -> Self { +        Self::from_f32(self.to_f32().fract()) +    } + +    #[inline] +    fn abs(self) -> Self { +        Self::from_bits(self.to_bits() & 0x7FFF) +    } + +    #[inline] +    fn signum(self) -> Self { +        self.signum() +    } + +    #[inline] +    fn is_sign_positive(self) -> bool { +        self.is_sign_positive() +    } + +    #[inline] +    fn is_sign_negative(self) -> bool { +        self.is_sign_negative() +    } + +    fn min(self, other: Self) -> Self { +        match self.partial_cmp(&other) { +            None => { +                if self.is_nan() { +                    other +                } else { +                    self +                } +            } +            Some(Ordering::Greater) | Some(Ordering::Equal) => other, +            Some(Ordering::Less) => self, +        } +    } + +    fn max(self, other: Self) -> Self { +        match self.partial_cmp(&other) { +            None => { +                if self.is_nan() { +                    other +                } else { +                    self +                } +            } +            Some(Ordering::Greater) | Some(Ordering::Equal) => self, +            Some(Ordering::Less) => other, +        } +    } + +    #[inline] +    fn recip(self) -> Self { +        Self::from_f32(self.to_f32().recip()) +    } + +    #[inline] +    fn powi(self, exp: i32) -> Self { +        Self::from_f32(self.to_f32().powi(exp)) +    } + +    #[inline] +    fn to_degrees(self) -> Self { +        Self::from_f32(self.to_f32().to_degrees()) +    } + +    #[inline] +    fn to_radians(self) -> Self { +        Self::from_f32(self.to_f32().to_radians()) +    } + +    #[inline] +    fn integer_decode(self) -> (u64, i16, i8) { +        num_traits::float::FloatCore::integer_decode(self.to_f32()) +    } +} + +impl num_traits::float::Float for f16 { +    #[inline] +    fn nan() -> Self { +        Self::NAN +    } + +    #[inline] +    fn infinity() -> Self { +        Self::INFINITY +    } + +    #[inline] +    fn neg_infinity() -> Self { +        Self::NEG_INFINITY +    } + +    #[inline] +    fn neg_zero() -> Self { +        Self::NEG_ZERO +    } + +    #[inline] +    fn min_value() -> Self { +        Self::MIN +    } + +    #[inline] +    fn min_positive_value() -> Self { +        Self::MIN_POSITIVE +    } + +    #[inline] +    fn epsilon() -> Self { +        Self::EPSILON +    } + +    #[inline] +    fn max_value() -> Self { +        Self::MAX +    } + +    #[inline] +    fn is_nan(self) -> bool { +        self.is_nan() +    } + +    #[inline] +    fn is_infinite(self) -> bool { +        self.is_infinite() +    } + +    #[inline] +    fn is_finite(self) -> bool { +        self.is_finite() +    } + +    #[inline] +    fn is_normal(self) -> bool { +        self.is_normal() +    } + +    #[inline] +    fn classify(self) -> FpCategory { +        self.classify() +    } + +    #[inline] +    fn floor(self) -> Self { +        Self::from_f32(self.to_f32().floor()) +    } + +    #[inline] +    fn ceil(self) -> Self { +        Self::from_f32(self.to_f32().ceil()) +    } + +    #[inline] +    fn round(self) -> Self { +        Self::from_f32(self.to_f32().round()) +    } + +    #[inline] +    fn trunc(self) -> Self { +        Self::from_f32(self.to_f32().trunc()) +    } + +    #[inline] +    fn fract(self) -> Self { +        Self::from_f32(self.to_f32().fract()) +    } + +    #[inline] +    fn abs(self) -> Self { +        Self::from_f32(self.to_f32().abs()) +    } + +    #[inline] +    fn signum(self) -> Self { +        Self::from_f32(self.to_f32().signum()) +    } + +    #[inline] +    fn is_sign_positive(self) -> bool { +        self.is_sign_positive() +    } + +    #[inline] +    fn is_sign_negative(self) -> bool { +        self.is_sign_negative() +    } + +    #[inline] +    fn mul_add(self, a: Self, b: Self) -> Self { +        Self::from_f32(self.to_f32().mul_add(a.to_f32(), b.to_f32())) +    } + +    #[inline] +    fn recip(self) -> Self { +        Self::from_f32(self.to_f32().recip()) +    } + +    #[inline] +    fn powi(self, n: i32) -> Self { +        Self::from_f32(self.to_f32().powi(n)) +    } + +    #[inline] +    fn powf(self, n: Self) -> Self { +        Self::from_f32(self.to_f32().powf(n.to_f32())) +    } + +    #[inline] +    fn sqrt(self) -> Self { +        Self::from_f32(self.to_f32().sqrt()) +    } + +    #[inline] +    fn exp(self) -> Self { +        Self::from_f32(self.to_f32().exp()) +    } + +    #[inline] +    fn exp2(self) -> Self { +        Self::from_f32(self.to_f32().exp2()) +    } + +    #[inline] +    fn ln(self) -> Self { +        Self::from_f32(self.to_f32().ln()) +    } + +    #[inline] +    fn log(self, base: Self) -> Self { +        Self::from_f32(self.to_f32().log(base.to_f32())) +    } + +    #[inline] +    fn log2(self) -> Self { +        Self::from_f32(self.to_f32().log2()) +    } + +    #[inline] +    fn log10(self) -> Self { +        Self::from_f32(self.to_f32().log10()) +    } + +    #[inline] +    fn to_degrees(self) -> Self { +        Self::from_f32(self.to_f32().to_degrees()) +    } + +    #[inline] +    fn to_radians(self) -> Self { +        Self::from_f32(self.to_f32().to_radians()) +    } + +    #[inline] +    fn max(self, other: Self) -> Self { +        self.max(other) +    } + +    #[inline] +    fn min(self, other: Self) -> Self { +        self.min(other) +    } + +    #[inline] +    fn abs_sub(self, other: Self) -> Self { +        Self::from_f32((self.to_f32() - other.to_f32()).max(0.0)) +    } + +    #[inline] +    fn cbrt(self) -> Self { +        Self::from_f32(self.to_f32().cbrt()) +    } + +    #[inline] +    fn hypot(self, other: Self) -> Self { +        Self::from_f32(self.to_f32().hypot(other.to_f32())) +    } + +    #[inline] +    fn sin(self) -> Self { +        Self::from_f32(self.to_f32().sin()) +    } + +    #[inline] +    fn cos(self) -> Self { +        Self::from_f32(self.to_f32().cos()) +    } + +    #[inline] +    fn tan(self) -> Self { +        Self::from_f32(self.to_f32().tan()) +    } + +    #[inline] +    fn asin(self) -> Self { +        Self::from_f32(self.to_f32().asin()) +    } + +    #[inline] +    fn acos(self) -> Self { +        Self::from_f32(self.to_f32().acos()) +    } + +    #[inline] +    fn atan(self) -> Self { +        Self::from_f32(self.to_f32().atan()) +    } + +    #[inline] +    fn atan2(self, other: Self) -> Self { +        Self::from_f32(self.to_f32().atan2(other.to_f32())) +    } + +    #[inline] +    fn sin_cos(self) -> (Self, Self) { +        let (sin, cos) = self.to_f32().sin_cos(); +        (Self::from_f32(sin), Self::from_f32(cos)) +    } + +    #[inline] +    fn exp_m1(self) -> Self { +        Self::from_f32(self.to_f32().exp_m1()) +    } + +    #[inline] +    fn ln_1p(self) -> Self { +        Self::from_f32(self.to_f32().ln_1p()) +    } + +    #[inline] +    fn sinh(self) -> Self { +        Self::from_f32(self.to_f32().sinh()) +    } + +    #[inline] +    fn cosh(self) -> Self { +        Self::from_f32(self.to_f32().cosh()) +    } + +    #[inline] +    fn tanh(self) -> Self { +        Self::from_f32(self.to_f32().tanh()) +    } + +    #[inline] +    fn asinh(self) -> Self { +        Self::from_f32(self.to_f32().asinh()) +    } + +    #[inline] +    fn acosh(self) -> Self { +        Self::from_f32(self.to_f32().acosh()) +    } + +    #[inline] +    fn atanh(self) -> Self { +        Self::from_f32(self.to_f32().atanh()) +    } + +    #[inline] +    fn integer_decode(self) -> (u64, i16, i8) { +        num_traits::float::Float::integer_decode(self.to_f32()) +    } +} + +impl FloatConst for f16 { +    #[inline] +    fn E() -> Self { +        Self::E +    } + +    #[inline] +    fn FRAC_1_PI() -> Self { +        Self::FRAC_1_PI +    } + +    #[inline] +    fn FRAC_1_SQRT_2() -> Self { +        Self::FRAC_1_SQRT_2 +    } + +    #[inline] +    fn FRAC_2_PI() -> Self { +        Self::FRAC_2_PI +    } + +    #[inline] +    fn FRAC_2_SQRT_PI() -> Self { +        Self::FRAC_2_SQRT_PI +    } + +    #[inline] +    fn FRAC_PI_2() -> Self { +        Self::FRAC_PI_2 +    } + +    #[inline] +    fn FRAC_PI_3() -> Self { +        Self::FRAC_PI_3 +    } + +    #[inline] +    fn FRAC_PI_4() -> Self { +        Self::FRAC_PI_4 +    } + +    #[inline] +    fn FRAC_PI_6() -> Self { +        Self::FRAC_PI_6 +    } + +    #[inline] +    fn FRAC_PI_8() -> Self { +        Self::FRAC_PI_8 +    } + +    #[inline] +    fn LN_10() -> Self { +        Self::LN_10 +    } + +    #[inline] +    fn LN_2() -> Self { +        Self::LN_2 +    } + +    #[inline] +    fn LOG10_E() -> Self { +        Self::LOG10_E +    } + +    #[inline] +    fn LOG2_E() -> Self { +        Self::LOG2_E +    } + +    #[inline] +    fn PI() -> Self { +        Self::PI +    } + +    fn SQRT_2() -> Self { +        Self::SQRT_2 +    } + +    #[inline] +    fn LOG10_2() -> Self +    where +        Self: Sized + Div<Self, Output = Self>, +    { +        Self::LOG10_2 +    } + +    #[inline] +    fn LOG2_10() -> Self +    where +        Self: Sized + Div<Self, Output = Self>, +    { +        Self::LOG2_10 +    } +} + +impl Bounded for f16 { +    #[inline] +    fn min_value() -> Self { +        f16::MIN +    } + +    #[inline] +    fn max_value() -> Self { +        f16::MAX +    } +} + +macro_rules! impl_as_primitive_to_f16 { +    ($ty:ty, $meth:ident) => { +        impl AsPrimitive<$ty> for f16 { +            #[inline] +            fn as_(self) -> $ty { +                self.$meth().as_() +            } +        } +    }; +} + +impl AsPrimitive<f16> for f16 { +    #[inline] +    fn as_(self) -> f16 { +        self +    } +} + +impl_as_primitive_to_f16!(i64, to_f32); +impl_as_primitive_to_f16!(u64, to_f32); +impl_as_primitive_to_f16!(i8, to_f32); +impl_as_primitive_to_f16!(u8, to_f32); +impl_as_primitive_to_f16!(i16, to_f32); +impl_as_primitive_to_f16!(u16, to_f32); +impl_as_primitive_to_f16!(i32, to_f32); +impl_as_primitive_to_f16!(u32, to_f32); +impl_as_primitive_to_f16!(isize, to_f32); +impl_as_primitive_to_f16!(usize, to_f32); +impl_as_primitive_to_f16!(f32, to_f32); +impl_as_primitive_to_f16!(f64, to_f64); + +macro_rules! impl_as_primitive_f16_from { +    ($ty:ty, $meth:ident) => { +        impl AsPrimitive<f16> for $ty { +            #[inline] +            fn as_(self) -> f16 { +                f16::$meth(self.as_()) +            } +        } +    }; +} + +impl_as_primitive_f16_from!(i64, from_f32); +impl_as_primitive_f16_from!(u64, from_f32); +impl_as_primitive_f16_from!(i8, from_f32); +impl_as_primitive_f16_from!(u8, from_f32); +impl_as_primitive_f16_from!(i16, from_f32); +impl_as_primitive_f16_from!(u16, from_f32); +impl_as_primitive_f16_from!(i32, from_f32); +impl_as_primitive_f16_from!(u32, from_f32); +impl_as_primitive_f16_from!(isize, from_f32); +impl_as_primitive_f16_from!(usize, from_f32); +impl_as_primitive_f16_from!(f32, from_f32); +impl_as_primitive_f16_from!(f64, from_f64); + +impl ToPrimitive for bf16 { +    #[inline] +    fn to_i64(&self) -> Option<i64> { +        Self::to_f32(*self).to_i64() +    } +    #[inline] +    fn to_u64(&self) -> Option<u64> { +        Self::to_f32(*self).to_u64() +    } +    #[inline] +    fn to_i8(&self) -> Option<i8> { +        Self::to_f32(*self).to_i8() +    } +    #[inline] +    fn to_u8(&self) -> Option<u8> { +        Self::to_f32(*self).to_u8() +    } +    #[inline] +    fn to_i16(&self) -> Option<i16> { +        Self::to_f32(*self).to_i16() +    } +    #[inline] +    fn to_u16(&self) -> Option<u16> { +        Self::to_f32(*self).to_u16() +    } +    #[inline] +    fn to_i32(&self) -> Option<i32> { +        Self::to_f32(*self).to_i32() +    } +    #[inline] +    fn to_u32(&self) -> Option<u32> { +        Self::to_f32(*self).to_u32() +    } +    #[inline] +    fn to_f32(&self) -> Option<f32> { +        Some(Self::to_f32(*self)) +    } +    #[inline] +    fn to_f64(&self) -> Option<f64> { +        Some(Self::to_f64(*self)) +    } +} + +impl FromPrimitive for bf16 { +    #[inline] +    fn from_i64(n: i64) -> Option<Self> { +        n.to_f32().map(Self::from_f32) +    } +    #[inline] +    fn from_u64(n: u64) -> Option<Self> { +        n.to_f32().map(Self::from_f32) +    } +    #[inline] +    fn from_i8(n: i8) -> Option<Self> { +        n.to_f32().map(Self::from_f32) +    } +    #[inline] +    fn from_u8(n: u8) -> Option<Self> { +        n.to_f32().map(Self::from_f32) +    } +    #[inline] +    fn from_i16(n: i16) -> Option<Self> { +        n.to_f32().map(Self::from_f32) +    } +    #[inline] +    fn from_u16(n: u16) -> Option<Self> { +        n.to_f32().map(Self::from_f32) +    } +    #[inline] +    fn from_i32(n: i32) -> Option<Self> { +        n.to_f32().map(Self::from_f32) +    } +    #[inline] +    fn from_u32(n: u32) -> Option<Self> { +        n.to_f32().map(Self::from_f32) +    } +    #[inline] +    fn from_f32(n: f32) -> Option<Self> { +        n.to_f32().map(Self::from_f32) +    } +    #[inline] +    fn from_f64(n: f64) -> Option<Self> { +        n.to_f64().map(Self::from_f64) +    } +} + +impl Num for bf16 { +    type FromStrRadixErr = <f32 as Num>::FromStrRadixErr; + +    #[inline] +    fn from_str_radix(str: &str, radix: u32) -> Result<Self, Self::FromStrRadixErr> { +        Ok(Self::from_f32(f32::from_str_radix(str, radix)?)) +    } +} + +impl One for bf16 { +    #[inline] +    fn one() -> Self { +        Self::ONE +    } +} + +impl Zero for bf16 { +    #[inline] +    fn zero() -> Self { +        Self::ZERO +    } + +    #[inline] +    fn is_zero(&self) -> bool { +        *self == Self::ZERO +    } +} + +impl NumCast for bf16 { +    #[inline] +    fn from<T: ToPrimitive>(n: T) -> Option<Self> { +        n.to_f32().map(Self::from_f32) +    } +} + +impl num_traits::float::FloatCore for bf16 { +    #[inline] +    fn infinity() -> Self { +        Self::INFINITY +    } + +    #[inline] +    fn neg_infinity() -> Self { +        Self::NEG_INFINITY +    } + +    #[inline] +    fn nan() -> Self { +        Self::NAN +    } + +    #[inline] +    fn neg_zero() -> Self { +        Self::NEG_ZERO +    } + +    #[inline] +    fn min_value() -> Self { +        Self::MIN +    } + +    #[inline] +    fn min_positive_value() -> Self { +        Self::MIN_POSITIVE +    } + +    #[inline] +    fn epsilon() -> Self { +        Self::EPSILON +    } + +    #[inline] +    fn max_value() -> Self { +        Self::MAX +    } + +    #[inline] +    fn is_nan(self) -> bool { +        self.is_nan() +    } + +    #[inline] +    fn is_infinite(self) -> bool { +        self.is_infinite() +    } + +    #[inline] +    fn is_finite(self) -> bool { +        self.is_finite() +    } + +    #[inline] +    fn is_normal(self) -> bool { +        self.is_normal() +    } + +    #[inline] +    fn classify(self) -> FpCategory { +        self.classify() +    } + +    #[inline] +    fn floor(self) -> Self { +        Self::from_f32(self.to_f32().floor()) +    } + +    #[inline] +    fn ceil(self) -> Self { +        Self::from_f32(self.to_f32().ceil()) +    } + +    #[inline] +    fn round(self) -> Self { +        Self::from_f32(self.to_f32().round()) +    } + +    #[inline] +    fn trunc(self) -> Self { +        Self::from_f32(self.to_f32().trunc()) +    } + +    #[inline] +    fn fract(self) -> Self { +        Self::from_f32(self.to_f32().fract()) +    } + +    #[inline] +    fn abs(self) -> Self { +        Self::from_bits(self.to_bits() & 0x7FFF) +    } + +    #[inline] +    fn signum(self) -> Self { +        self.signum() +    } + +    #[inline] +    fn is_sign_positive(self) -> bool { +        self.is_sign_positive() +    } + +    #[inline] +    fn is_sign_negative(self) -> bool { +        self.is_sign_negative() +    } + +    fn min(self, other: Self) -> Self { +        match self.partial_cmp(&other) { +            None => { +                if self.is_nan() { +                    other +                } else { +                    self +                } +            } +            Some(Ordering::Greater) | Some(Ordering::Equal) => other, +            Some(Ordering::Less) => self, +        } +    } + +    fn max(self, other: Self) -> Self { +        match self.partial_cmp(&other) { +            None => { +                if self.is_nan() { +                    other +                } else { +                    self +                } +            } +            Some(Ordering::Greater) | Some(Ordering::Equal) => self, +            Some(Ordering::Less) => other, +        } +    } + +    #[inline] +    fn recip(self) -> Self { +        Self::from_f32(self.to_f32().recip()) +    } + +    #[inline] +    fn powi(self, exp: i32) -> Self { +        Self::from_f32(self.to_f32().powi(exp)) +    } + +    #[inline] +    fn to_degrees(self) -> Self { +        Self::from_f32(self.to_f32().to_degrees()) +    } + +    #[inline] +    fn to_radians(self) -> Self { +        Self::from_f32(self.to_f32().to_radians()) +    } + +    #[inline] +    fn integer_decode(self) -> (u64, i16, i8) { +        num_traits::float::FloatCore::integer_decode(self.to_f32()) +    } +} + +impl num_traits::float::Float for bf16 { +    #[inline] +    fn nan() -> Self { +        Self::NAN +    } + +    #[inline] +    fn infinity() -> Self { +        Self::INFINITY +    } + +    #[inline] +    fn neg_infinity() -> Self { +        Self::NEG_INFINITY +    } + +    #[inline] +    fn neg_zero() -> Self { +        Self::NEG_ZERO +    } + +    #[inline] +    fn min_value() -> Self { +        Self::MIN +    } + +    #[inline] +    fn min_positive_value() -> Self { +        Self::MIN_POSITIVE +    } + +    #[inline] +    fn epsilon() -> Self { +        Self::EPSILON +    } + +    #[inline] +    fn max_value() -> Self { +        Self::MAX +    } + +    #[inline] +    fn is_nan(self) -> bool { +        self.is_nan() +    } + +    #[inline] +    fn is_infinite(self) -> bool { +        self.is_infinite() +    } + +    #[inline] +    fn is_finite(self) -> bool { +        self.is_finite() +    } + +    #[inline] +    fn is_normal(self) -> bool { +        self.is_normal() +    } + +    #[inline] +    fn classify(self) -> FpCategory { +        self.classify() +    } + +    #[inline] +    fn floor(self) -> Self { +        Self::from_f32(self.to_f32().floor()) +    } + +    #[inline] +    fn ceil(self) -> Self { +        Self::from_f32(self.to_f32().ceil()) +    } + +    #[inline] +    fn round(self) -> Self { +        Self::from_f32(self.to_f32().round()) +    } + +    #[inline] +    fn trunc(self) -> Self { +        Self::from_f32(self.to_f32().trunc()) +    } + +    #[inline] +    fn fract(self) -> Self { +        Self::from_f32(self.to_f32().fract()) +    } + +    #[inline] +    fn abs(self) -> Self { +        Self::from_f32(self.to_f32().abs()) +    } + +    #[inline] +    fn signum(self) -> Self { +        Self::from_f32(self.to_f32().signum()) +    } + +    #[inline] +    fn is_sign_positive(self) -> bool { +        self.is_sign_positive() +    } + +    #[inline] +    fn is_sign_negative(self) -> bool { +        self.is_sign_negative() +    } + +    #[inline] +    fn mul_add(self, a: Self, b: Self) -> Self { +        Self::from_f32(self.to_f32().mul_add(a.to_f32(), b.to_f32())) +    } + +    #[inline] +    fn recip(self) -> Self { +        Self::from_f32(self.to_f32().recip()) +    } + +    #[inline] +    fn powi(self, n: i32) -> Self { +        Self::from_f32(self.to_f32().powi(n)) +    } + +    #[inline] +    fn powf(self, n: Self) -> Self { +        Self::from_f32(self.to_f32().powf(n.to_f32())) +    } + +    #[inline] +    fn sqrt(self) -> Self { +        Self::from_f32(self.to_f32().sqrt()) +    } + +    #[inline] +    fn exp(self) -> Self { +        Self::from_f32(self.to_f32().exp()) +    } + +    #[inline] +    fn exp2(self) -> Self { +        Self::from_f32(self.to_f32().exp2()) +    } + +    #[inline] +    fn ln(self) -> Self { +        Self::from_f32(self.to_f32().ln()) +    } + +    #[inline] +    fn log(self, base: Self) -> Self { +        Self::from_f32(self.to_f32().log(base.to_f32())) +    } + +    #[inline] +    fn log2(self) -> Self { +        Self::from_f32(self.to_f32().log2()) +    } + +    #[inline] +    fn log10(self) -> Self { +        Self::from_f32(self.to_f32().log10()) +    } + +    #[inline] +    fn to_degrees(self) -> Self { +        Self::from_f32(self.to_f32().to_degrees()) +    } + +    #[inline] +    fn to_radians(self) -> Self { +        Self::from_f32(self.to_f32().to_radians()) +    } + +    #[inline] +    fn max(self, other: Self) -> Self { +        self.max(other) +    } + +    #[inline] +    fn min(self, other: Self) -> Self { +        self.min(other) +    } + +    #[inline] +    fn abs_sub(self, other: Self) -> Self { +        Self::from_f32((self.to_f32() - other.to_f32()).max(0.0)) +    } + +    #[inline] +    fn cbrt(self) -> Self { +        Self::from_f32(self.to_f32().cbrt()) +    } + +    #[inline] +    fn hypot(self, other: Self) -> Self { +        Self::from_f32(self.to_f32().hypot(other.to_f32())) +    } + +    #[inline] +    fn sin(self) -> Self { +        Self::from_f32(self.to_f32().sin()) +    } + +    #[inline] +    fn cos(self) -> Self { +        Self::from_f32(self.to_f32().cos()) +    } + +    #[inline] +    fn tan(self) -> Self { +        Self::from_f32(self.to_f32().tan()) +    } + +    #[inline] +    fn asin(self) -> Self { +        Self::from_f32(self.to_f32().asin()) +    } + +    #[inline] +    fn acos(self) -> Self { +        Self::from_f32(self.to_f32().acos()) +    } + +    #[inline] +    fn atan(self) -> Self { +        Self::from_f32(self.to_f32().atan()) +    } + +    #[inline] +    fn atan2(self, other: Self) -> Self { +        Self::from_f32(self.to_f32().atan2(other.to_f32())) +    } + +    #[inline] +    fn sin_cos(self) -> (Self, Self) { +        let (sin, cos) = self.to_f32().sin_cos(); +        (Self::from_f32(sin), Self::from_f32(cos)) +    } + +    #[inline] +    fn exp_m1(self) -> Self { +        Self::from_f32(self.to_f32().exp_m1()) +    } + +    #[inline] +    fn ln_1p(self) -> Self { +        Self::from_f32(self.to_f32().ln_1p()) +    } + +    #[inline] +    fn sinh(self) -> Self { +        Self::from_f32(self.to_f32().sinh()) +    } + +    #[inline] +    fn cosh(self) -> Self { +        Self::from_f32(self.to_f32().cosh()) +    } + +    #[inline] +    fn tanh(self) -> Self { +        Self::from_f32(self.to_f32().tanh()) +    } + +    #[inline] +    fn asinh(self) -> Self { +        Self::from_f32(self.to_f32().asinh()) +    } + +    #[inline] +    fn acosh(self) -> Self { +        Self::from_f32(self.to_f32().acosh()) +    } + +    #[inline] +    fn atanh(self) -> Self { +        Self::from_f32(self.to_f32().atanh()) +    } + +    #[inline] +    fn integer_decode(self) -> (u64, i16, i8) { +        num_traits::float::Float::integer_decode(self.to_f32()) +    } +} + +impl FloatConst for bf16 { +    #[inline] +    fn E() -> Self { +        Self::E +    } + +    #[inline] +    fn FRAC_1_PI() -> Self { +        Self::FRAC_1_PI +    } + +    #[inline] +    fn FRAC_1_SQRT_2() -> Self { +        Self::FRAC_1_SQRT_2 +    } + +    #[inline] +    fn FRAC_2_PI() -> Self { +        Self::FRAC_2_PI +    } + +    #[inline] +    fn FRAC_2_SQRT_PI() -> Self { +        Self::FRAC_2_SQRT_PI +    } + +    #[inline] +    fn FRAC_PI_2() -> Self { +        Self::FRAC_PI_2 +    } + +    #[inline] +    fn FRAC_PI_3() -> Self { +        Self::FRAC_PI_3 +    } + +    #[inline] +    fn FRAC_PI_4() -> Self { +        Self::FRAC_PI_4 +    } + +    #[inline] +    fn FRAC_PI_6() -> Self { +        Self::FRAC_PI_6 +    } + +    #[inline] +    fn FRAC_PI_8() -> Self { +        Self::FRAC_PI_8 +    } + +    #[inline] +    fn LN_10() -> Self { +        Self::LN_10 +    } + +    #[inline] +    fn LN_2() -> Self { +        Self::LN_2 +    } + +    #[inline] +    fn LOG10_E() -> Self { +        Self::LOG10_E +    } + +    #[inline] +    fn LOG2_E() -> Self { +        Self::LOG2_E +    } + +    #[inline] +    fn PI() -> Self { +        Self::PI +    } + +    #[inline] +    fn SQRT_2() -> Self { +        Self::SQRT_2 +    } + +    #[inline] +    fn LOG10_2() -> Self +    where +        Self: Sized + Div<Self, Output = Self>, +    { +        Self::LOG10_2 +    } + +    #[inline] +    fn LOG2_10() -> Self +    where +        Self: Sized + Div<Self, Output = Self>, +    { +        Self::LOG2_10 +    } +} + +impl Bounded for bf16 { +    #[inline] +    fn min_value() -> Self { +        bf16::MIN +    } + +    #[inline] +    fn max_value() -> Self { +        bf16::MAX +    } +} + +impl AsPrimitive<bf16> for bf16 { +    #[inline] +    fn as_(self) -> bf16 { +        self +    } +} + +macro_rules! impl_as_primitive_to_bf16 { +    ($ty:ty, $meth:ident) => { +        impl AsPrimitive<$ty> for bf16 { +            #[inline] +            fn as_(self) -> $ty { +                self.$meth().as_() +            } +        } +    }; +} + +impl_as_primitive_to_bf16!(i64, to_f32); +impl_as_primitive_to_bf16!(u64, to_f32); +impl_as_primitive_to_bf16!(i8, to_f32); +impl_as_primitive_to_bf16!(u8, to_f32); +impl_as_primitive_to_bf16!(i16, to_f32); +impl_as_primitive_to_bf16!(u16, to_f32); +impl_as_primitive_to_bf16!(i32, to_f32); +impl_as_primitive_to_bf16!(u32, to_f32); +impl_as_primitive_to_bf16!(isize, to_f32); +impl_as_primitive_to_bf16!(usize, to_f32); +impl_as_primitive_to_bf16!(f32, to_f32); +impl_as_primitive_to_bf16!(f64, to_f64); + +macro_rules! impl_as_primitive_bf16_from { +    ($ty:ty, $meth:ident) => { +        impl AsPrimitive<bf16> for $ty { +            #[inline] +            fn as_(self) -> bf16 { +                bf16::$meth(self.as_()) +            } +        } +    }; +} + +impl_as_primitive_bf16_from!(i64, from_f32); +impl_as_primitive_bf16_from!(u64, from_f32); +impl_as_primitive_bf16_from!(i8, from_f32); +impl_as_primitive_bf16_from!(u8, from_f32); +impl_as_primitive_bf16_from!(i16, from_f32); +impl_as_primitive_bf16_from!(u16, from_f32); +impl_as_primitive_bf16_from!(i32, from_f32); +impl_as_primitive_bf16_from!(u32, from_f32); +impl_as_primitive_bf16_from!(isize, from_f32); +impl_as_primitive_bf16_from!(usize, from_f32); +impl_as_primitive_bf16_from!(f32, from_f32); +impl_as_primitive_bf16_from!(f64, from_f64); diff --git a/vendor/half/src/slice.rs b/vendor/half/src/slice.rs new file mode 100644 index 0000000..f1e9feb --- /dev/null +++ b/vendor/half/src/slice.rs @@ -0,0 +1,854 @@ +//! Contains utility functions and traits to convert between slices of [`u16`] bits and [`f16`] or +//! [`bf16`] numbers. +//! +//! The utility [`HalfBitsSliceExt`] sealed extension trait is implemented for `[u16]` slices, +//! while the utility [`HalfFloatSliceExt`] sealed extension trait is implemented for both `[f16]` +//! and `[bf16]` slices. These traits provide efficient conversions and reinterpret casting of +//! larger buffers of floating point values, and are automatically included in the +//! [`prelude`][crate::prelude] module. + +use crate::{bf16, binary16::convert, f16}; +#[cfg(feature = "alloc")] +use alloc::vec::Vec; +use core::slice; + +/// Extensions to `[f16]` and `[bf16]` slices to support conversion and reinterpret operations. +/// +/// This trait is sealed and cannot be implemented outside of this crate. +pub trait HalfFloatSliceExt: private::SealedHalfFloatSlice { +    /// Reinterprets a slice of [`f16`] or [`bf16`] numbers as a slice of [`u16`] bits. +    /// +    /// This is a zero-copy operation. The reinterpreted slice has the same lifetime and memory +    /// location as `self`. +    /// +    /// # Examples +    /// +    /// ```rust +    /// # use half::prelude::*; +    /// let float_buffer = [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]; +    /// let int_buffer = float_buffer.reinterpret_cast(); +    /// +    /// assert_eq!(int_buffer, [float_buffer[0].to_bits(), float_buffer[1].to_bits(), float_buffer[2].to_bits()]); +    /// ``` +    #[must_use] +    fn reinterpret_cast(&self) -> &[u16]; + +    /// Reinterprets a mutable slice of [`f16`] or [`bf16`] numbers as a mutable slice of [`u16`]. +    /// bits +    /// +    /// This is a zero-copy operation. The transmuted slice has the same lifetime as the original, +    /// which prevents mutating `self` as long as the returned `&mut [u16]` is borrowed. +    /// +    /// # Examples +    /// +    /// ```rust +    /// # use half::prelude::*; +    /// let mut float_buffer = [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]; +    /// +    /// { +    ///     let int_buffer = float_buffer.reinterpret_cast_mut(); +    /// +    ///     assert_eq!(int_buffer, [f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]); +    /// +    ///     // Mutating the u16 slice will mutating the original +    ///     int_buffer[0] = 0; +    /// } +    /// +    /// // Note that we need to drop int_buffer before using float_buffer again or we will get a borrow error. +    /// assert_eq!(float_buffer, [f16::from_f32(0.), f16::from_f32(2.), f16::from_f32(3.)]); +    /// ``` +    #[must_use] +    fn reinterpret_cast_mut(&mut self) -> &mut [u16]; + +    /// Converts all of the elements of a `[f32]` slice into [`f16`] or [`bf16`] values in `self`. +    /// +    /// The length of `src` must be the same as `self`. +    /// +    /// The conversion operation is vectorized over the slice, meaning the conversion may be more +    /// efficient than converting individual elements on some hardware that supports SIMD +    /// conversions. See [crate documentation](crate) for more information on hardware conversion +    /// support. +    /// +    /// # Panics +    /// +    /// This function will panic if the two slices have different lengths. +    /// +    /// # Examples +    /// ```rust +    /// # use half::prelude::*; +    /// // Initialize an empty buffer +    /// let mut buffer = [0u16; 4]; +    /// let buffer = buffer.reinterpret_cast_mut::<f16>(); +    /// +    /// let float_values = [1., 2., 3., 4.]; +    /// +    /// // Now convert +    /// buffer.convert_from_f32_slice(&float_values); +    /// +    /// assert_eq!(buffer, [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.), f16::from_f32(4.)]); +    /// ``` +    fn convert_from_f32_slice(&mut self, src: &[f32]); + +    /// Converts all of the elements of a `[f64]` slice into [`f16`] or [`bf16`] values in `self`. +    /// +    /// The length of `src` must be the same as `self`. +    /// +    /// The conversion operation is vectorized over the slice, meaning the conversion may be more +    /// efficient than converting individual elements on some hardware that supports SIMD +    /// conversions. See [crate documentation](crate) for more information on hardware conversion +    /// support. +    /// +    /// # Panics +    /// +    /// This function will panic if the two slices have different lengths. +    /// +    /// # Examples +    /// ```rust +    /// # use half::prelude::*; +    /// // Initialize an empty buffer +    /// let mut buffer = [0u16; 4]; +    /// let buffer = buffer.reinterpret_cast_mut::<f16>(); +    /// +    /// let float_values = [1., 2., 3., 4.]; +    /// +    /// // Now convert +    /// buffer.convert_from_f64_slice(&float_values); +    /// +    /// assert_eq!(buffer, [f16::from_f64(1.), f16::from_f64(2.), f16::from_f64(3.), f16::from_f64(4.)]); +    /// ``` +    fn convert_from_f64_slice(&mut self, src: &[f64]); + +    /// Converts all of the [`f16`] or [`bf16`] elements of `self` into [`f32`] values in `dst`. +    /// +    /// The length of `src` must be the same as `self`. +    /// +    /// The conversion operation is vectorized over the slice, meaning the conversion may be more +    /// efficient than converting individual elements on some hardware that supports SIMD +    /// conversions. See [crate documentation](crate) for more information on hardware conversion +    /// support. +    /// +    /// # Panics +    /// +    /// This function will panic if the two slices have different lengths. +    /// +    /// # Examples +    /// ```rust +    /// # use half::prelude::*; +    /// // Initialize an empty buffer +    /// let mut buffer = [0f32; 4]; +    /// +    /// let half_values = [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.), f16::from_f32(4.)]; +    /// +    /// // Now convert +    /// half_values.convert_to_f32_slice(&mut buffer); +    /// +    /// assert_eq!(buffer, [1., 2., 3., 4.]); +    /// ``` +    fn convert_to_f32_slice(&self, dst: &mut [f32]); + +    /// Converts all of the [`f16`] or [`bf16`] elements of `self` into [`f64`] values in `dst`. +    /// +    /// The length of `src` must be the same as `self`. +    /// +    /// The conversion operation is vectorized over the slice, meaning the conversion may be more +    /// efficient than converting individual elements on some hardware that supports SIMD +    /// conversions. See [crate documentation](crate) for more information on hardware conversion +    /// support. +    /// +    /// # Panics +    /// +    /// This function will panic if the two slices have different lengths. +    /// +    /// # Examples +    /// ```rust +    /// # use half::prelude::*; +    /// // Initialize an empty buffer +    /// let mut buffer = [0f64; 4]; +    /// +    /// let half_values = [f16::from_f64(1.), f16::from_f64(2.), f16::from_f64(3.), f16::from_f64(4.)]; +    /// +    /// // Now convert +    /// half_values.convert_to_f64_slice(&mut buffer); +    /// +    /// assert_eq!(buffer, [1., 2., 3., 4.]); +    /// ``` +    fn convert_to_f64_slice(&self, dst: &mut [f64]); + +    // Because trait is sealed, we can get away with different interfaces between features. + +    /// Converts all of the [`f16`] or [`bf16`] elements of `self` into [`f32`] values in a new +    /// vector +    /// +    /// The conversion operation is vectorized over the slice, meaning the conversion may be more +    /// efficient than converting individual elements on some hardware that supports SIMD +    /// conversions. See [crate documentation](crate) for more information on hardware conversion +    /// support. +    /// +    /// This method is only available with the `std` or `alloc` feature. +    /// +    /// # Examples +    /// ```rust +    /// # use half::prelude::*; +    /// let half_values = [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.), f16::from_f32(4.)]; +    /// let vec = half_values.to_f32_vec(); +    /// +    /// assert_eq!(vec, vec![1., 2., 3., 4.]); +    /// ``` +    #[cfg(any(feature = "alloc", feature = "std"))] +    #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] +    #[must_use] +    fn to_f32_vec(&self) -> Vec<f32>; + +    /// Converts all of the [`f16`] or [`bf16`] elements of `self` into [`f64`] values in a new +    /// vector. +    /// +    /// The conversion operation is vectorized over the slice, meaning the conversion may be more +    /// efficient than converting individual elements on some hardware that supports SIMD +    /// conversions. See [crate documentation](crate) for more information on hardware conversion +    /// support. +    /// +    /// This method is only available with the `std` or `alloc` feature. +    /// +    /// # Examples +    /// ```rust +    /// # use half::prelude::*; +    /// let half_values = [f16::from_f64(1.), f16::from_f64(2.), f16::from_f64(3.), f16::from_f64(4.)]; +    /// let vec = half_values.to_f64_vec(); +    /// +    /// assert_eq!(vec, vec![1., 2., 3., 4.]); +    /// ``` +    #[cfg(feature = "alloc")] +    #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] +    #[must_use] +    fn to_f64_vec(&self) -> Vec<f64>; +} + +/// Extensions to `[u16]` slices to support reinterpret operations. +/// +/// This trait is sealed and cannot be implemented outside of this crate. +pub trait HalfBitsSliceExt: private::SealedHalfBitsSlice { +    /// Reinterprets a slice of [`u16`] bits as a slice of [`f16`] or [`bf16`] numbers. +    /// +    /// `H` is the type to cast to, and must be either the [`f16`] or [`bf16`] type. +    /// +    /// This is a zero-copy operation. The reinterpreted slice has the same lifetime and memory +    /// location as `self`. +    /// +    /// # Examples +    /// +    /// ```rust +    /// # use half::prelude::*; +    /// let int_buffer = [f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]; +    /// let float_buffer: &[f16] = int_buffer.reinterpret_cast(); +    /// +    /// assert_eq!(float_buffer, [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]); +    /// +    /// // You may have to specify the cast type directly if the compiler can't infer the type. +    /// // The following is also valid in Rust. +    /// let typed_buffer = int_buffer.reinterpret_cast::<f16>(); +    /// ``` +    #[must_use] +    fn reinterpret_cast<H>(&self) -> &[H] +    where +        H: crate::private::SealedHalf; + +    /// Reinterprets a mutable slice of [`u16`] bits as a mutable slice of [`f16`] or [`bf16`] +    /// numbers. +    /// +    /// `H` is the type to cast to, and must be either the [`f16`] or [`bf16`] type. +    /// +    /// This is a zero-copy operation. The transmuted slice has the same lifetime as the original, +    /// which prevents mutating `self` as long as the returned `&mut [f16]` is borrowed. +    /// +    /// # Examples +    /// +    /// ```rust +    /// # use half::prelude::*; +    /// let mut int_buffer = [f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]; +    /// +    /// { +    ///     let float_buffer: &mut [f16] = int_buffer.reinterpret_cast_mut(); +    /// +    ///     assert_eq!(float_buffer, [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]); +    /// +    ///     // Mutating the f16 slice will mutating the original +    ///     float_buffer[0] = f16::from_f32(0.); +    /// } +    /// +    /// // Note that we need to drop float_buffer before using int_buffer again or we will get a borrow error. +    /// assert_eq!(int_buffer, [f16::from_f32(0.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]); +    /// +    /// // You may have to specify the cast type directly if the compiler can't infer the type. +    /// // The following is also valid in Rust. +    /// let typed_buffer = int_buffer.reinterpret_cast_mut::<f16>(); +    /// ``` +    #[must_use] +    fn reinterpret_cast_mut<H>(&mut self) -> &mut [H] +    where +        H: crate::private::SealedHalf; +} + +mod private { +    use crate::{bf16, f16}; + +    pub trait SealedHalfFloatSlice {} +    impl SealedHalfFloatSlice for [f16] {} +    impl SealedHalfFloatSlice for [bf16] {} + +    pub trait SealedHalfBitsSlice {} +    impl SealedHalfBitsSlice for [u16] {} +} + +impl HalfFloatSliceExt for [f16] { +    #[inline] +    fn reinterpret_cast(&self) -> &[u16] { +        let pointer = self.as_ptr() as *const u16; +        let length = self.len(); +        // SAFETY: We are reconstructing full length of original slice, using its same lifetime, +        // and the size of elements are identical +        unsafe { slice::from_raw_parts(pointer, length) } +    } + +    #[inline] +    fn reinterpret_cast_mut(&mut self) -> &mut [u16] { +        let pointer = self.as_mut_ptr().cast::<u16>(); +        let length = self.len(); +        // SAFETY: We are reconstructing full length of original slice, using its same lifetime, +        // and the size of elements are identical +        unsafe { slice::from_raw_parts_mut(pointer, length) } +    } + +    fn convert_from_f32_slice(&mut self, src: &[f32]) { +        assert_eq!( +            self.len(), +            src.len(), +            "destination and source slices have different lengths" +        ); + +        convert::f32_to_f16_slice(src, self.reinterpret_cast_mut()) +    } + +    fn convert_from_f64_slice(&mut self, src: &[f64]) { +        assert_eq!( +            self.len(), +            src.len(), +            "destination and source slices have different lengths" +        ); + +        convert::f64_to_f16_slice(src, self.reinterpret_cast_mut()) +    } + +    fn convert_to_f32_slice(&self, dst: &mut [f32]) { +        assert_eq!( +            self.len(), +            dst.len(), +            "destination and source slices have different lengths" +        ); + +        convert::f16_to_f32_slice(self.reinterpret_cast(), dst) +    } + +    fn convert_to_f64_slice(&self, dst: &mut [f64]) { +        assert_eq!( +            self.len(), +            dst.len(), +            "destination and source slices have different lengths" +        ); + +        convert::f16_to_f64_slice(self.reinterpret_cast(), dst) +    } + +    #[cfg(any(feature = "alloc", feature = "std"))] +    #[inline] +    #[allow(clippy::uninit_vec)] +    fn to_f32_vec(&self) -> Vec<f32> { +        let mut vec = Vec::with_capacity(self.len()); +        // SAFETY: convert will initialize every value in the vector without reading them, +        // so this is safe to do instead of double initialize from resize, and we're setting it to +        // same value as capacity. +        unsafe { vec.set_len(self.len()) }; +        self.convert_to_f32_slice(&mut vec); +        vec +    } + +    #[cfg(any(feature = "alloc", feature = "std"))] +    #[inline] +    #[allow(clippy::uninit_vec)] +    fn to_f64_vec(&self) -> Vec<f64> { +        let mut vec = Vec::with_capacity(self.len()); +        // SAFETY: convert will initialize every value in the vector without reading them, +        // so this is safe to do instead of double initialize from resize, and we're setting it to +        // same value as capacity. +        unsafe { vec.set_len(self.len()) }; +        self.convert_to_f64_slice(&mut vec); +        vec +    } +} + +impl HalfFloatSliceExt for [bf16] { +    #[inline] +    fn reinterpret_cast(&self) -> &[u16] { +        let pointer = self.as_ptr() as *const u16; +        let length = self.len(); +        // SAFETY: We are reconstructing full length of original slice, using its same lifetime, +        // and the size of elements are identical +        unsafe { slice::from_raw_parts(pointer, length) } +    } + +    #[inline] +    fn reinterpret_cast_mut(&mut self) -> &mut [u16] { +        let pointer = self.as_mut_ptr().cast::<u16>(); +        let length = self.len(); +        // SAFETY: We are reconstructing full length of original slice, using its same lifetime, +        // and the size of elements are identical +        unsafe { slice::from_raw_parts_mut(pointer, length) } +    } + +    fn convert_from_f32_slice(&mut self, src: &[f32]) { +        assert_eq!( +            self.len(), +            src.len(), +            "destination and source slices have different lengths" +        ); + +        // Just use regular loop here until there's any bf16 SIMD support. +        for (i, f) in src.iter().enumerate() { +            self[i] = bf16::from_f32(*f); +        } +    } + +    fn convert_from_f64_slice(&mut self, src: &[f64]) { +        assert_eq!( +            self.len(), +            src.len(), +            "destination and source slices have different lengths" +        ); + +        // Just use regular loop here until there's any bf16 SIMD support. +        for (i, f) in src.iter().enumerate() { +            self[i] = bf16::from_f64(*f); +        } +    } + +    fn convert_to_f32_slice(&self, dst: &mut [f32]) { +        assert_eq!( +            self.len(), +            dst.len(), +            "destination and source slices have different lengths" +        ); + +        // Just use regular loop here until there's any bf16 SIMD support. +        for (i, f) in self.iter().enumerate() { +            dst[i] = f.to_f32(); +        } +    } + +    fn convert_to_f64_slice(&self, dst: &mut [f64]) { +        assert_eq!( +            self.len(), +            dst.len(), +            "destination and source slices have different lengths" +        ); + +        // Just use regular loop here until there's any bf16 SIMD support. +        for (i, f) in self.iter().enumerate() { +            dst[i] = f.to_f64(); +        } +    } + +    #[cfg(any(feature = "alloc", feature = "std"))] +    #[inline] +    #[allow(clippy::uninit_vec)] +    fn to_f32_vec(&self) -> Vec<f32> { +        let mut vec = Vec::with_capacity(self.len()); +        // SAFETY: convert will initialize every value in the vector without reading them, +        // so this is safe to do instead of double initialize from resize, and we're setting it to +        // same value as capacity. +        unsafe { vec.set_len(self.len()) }; +        self.convert_to_f32_slice(&mut vec); +        vec +    } + +    #[cfg(any(feature = "alloc", feature = "std"))] +    #[inline] +    #[allow(clippy::uninit_vec)] +    fn to_f64_vec(&self) -> Vec<f64> { +        let mut vec = Vec::with_capacity(self.len()); +        // SAFETY: convert will initialize every value in the vector without reading them, +        // so this is safe to do instead of double initialize from resize, and we're setting it to +        // same value as capacity. +        unsafe { vec.set_len(self.len()) }; +        self.convert_to_f64_slice(&mut vec); +        vec +    } +} + +impl HalfBitsSliceExt for [u16] { +    // Since we sealed all the traits involved, these are safe. +    #[inline] +    fn reinterpret_cast<H>(&self) -> &[H] +    where +        H: crate::private::SealedHalf, +    { +        let pointer = self.as_ptr() as *const H; +        let length = self.len(); +        // SAFETY: We are reconstructing full length of original slice, using its same lifetime, +        // and the size of elements are identical +        unsafe { slice::from_raw_parts(pointer, length) } +    } + +    #[inline] +    fn reinterpret_cast_mut<H>(&mut self) -> &mut [H] +    where +        H: crate::private::SealedHalf, +    { +        let pointer = self.as_mut_ptr() as *mut H; +        let length = self.len(); +        // SAFETY: We are reconstructing full length of original slice, using its same lifetime, +        // and the size of elements are identical +        unsafe { slice::from_raw_parts_mut(pointer, length) } +    } +} + +#[allow(clippy::float_cmp)] +#[cfg(test)] +mod test { +    use super::{HalfBitsSliceExt, HalfFloatSliceExt}; +    use crate::{bf16, f16}; + +    #[test] +    fn test_slice_conversions_f16() { +        let bits = &[ +            f16::E.to_bits(), +            f16::PI.to_bits(), +            f16::EPSILON.to_bits(), +            f16::FRAC_1_SQRT_2.to_bits(), +        ]; +        let numbers = &[f16::E, f16::PI, f16::EPSILON, f16::FRAC_1_SQRT_2]; + +        // Convert from bits to numbers +        let from_bits = bits.reinterpret_cast::<f16>(); +        assert_eq!(from_bits, numbers); + +        // Convert from numbers back to bits +        let to_bits = from_bits.reinterpret_cast(); +        assert_eq!(to_bits, bits); +    } + +    #[test] +    fn test_mutablility_f16() { +        let mut bits_array = [f16::PI.to_bits()]; +        let bits = &mut bits_array[..]; + +        { +            // would not compile without these braces +            let numbers = bits.reinterpret_cast_mut(); +            numbers[0] = f16::E; +        } + +        assert_eq!(bits, &[f16::E.to_bits()]); + +        bits[0] = f16::LN_2.to_bits(); +        assert_eq!(bits, &[f16::LN_2.to_bits()]); +    } + +    #[test] +    fn test_slice_conversions_bf16() { +        let bits = &[ +            bf16::E.to_bits(), +            bf16::PI.to_bits(), +            bf16::EPSILON.to_bits(), +            bf16::FRAC_1_SQRT_2.to_bits(), +        ]; +        let numbers = &[bf16::E, bf16::PI, bf16::EPSILON, bf16::FRAC_1_SQRT_2]; + +        // Convert from bits to numbers +        let from_bits = bits.reinterpret_cast::<bf16>(); +        assert_eq!(from_bits, numbers); + +        // Convert from numbers back to bits +        let to_bits = from_bits.reinterpret_cast(); +        assert_eq!(to_bits, bits); +    } + +    #[test] +    fn test_mutablility_bf16() { +        let mut bits_array = [bf16::PI.to_bits()]; +        let bits = &mut bits_array[..]; + +        { +            // would not compile without these braces +            let numbers = bits.reinterpret_cast_mut(); +            numbers[0] = bf16::E; +        } + +        assert_eq!(bits, &[bf16::E.to_bits()]); + +        bits[0] = bf16::LN_2.to_bits(); +        assert_eq!(bits, &[bf16::LN_2.to_bits()]); +    } + +    #[test] +    fn slice_convert_f16_f32() { +        // Exact chunks +        let vf32 = [1., 2., 3., 4., 5., 6., 7., 8.]; +        let vf16 = [ +            f16::from_f32(1.), +            f16::from_f32(2.), +            f16::from_f32(3.), +            f16::from_f32(4.), +            f16::from_f32(5.), +            f16::from_f32(6.), +            f16::from_f32(7.), +            f16::from_f32(8.), +        ]; +        let mut buf32 = vf32; +        let mut buf16 = vf16; + +        vf16.convert_to_f32_slice(&mut buf32); +        assert_eq!(&vf32, &buf32); + +        buf16.convert_from_f32_slice(&vf32); +        assert_eq!(&vf16, &buf16); + +        // Partial with chunks +        let vf32 = [1., 2., 3., 4., 5., 6., 7., 8., 9.]; +        let vf16 = [ +            f16::from_f32(1.), +            f16::from_f32(2.), +            f16::from_f32(3.), +            f16::from_f32(4.), +            f16::from_f32(5.), +            f16::from_f32(6.), +            f16::from_f32(7.), +            f16::from_f32(8.), +            f16::from_f32(9.), +        ]; +        let mut buf32 = vf32; +        let mut buf16 = vf16; + +        vf16.convert_to_f32_slice(&mut buf32); +        assert_eq!(&vf32, &buf32); + +        buf16.convert_from_f32_slice(&vf32); +        assert_eq!(&vf16, &buf16); + +        // Partial with chunks +        let vf32 = [1., 2.]; +        let vf16 = [f16::from_f32(1.), f16::from_f32(2.)]; +        let mut buf32 = vf32; +        let mut buf16 = vf16; + +        vf16.convert_to_f32_slice(&mut buf32); +        assert_eq!(&vf32, &buf32); + +        buf16.convert_from_f32_slice(&vf32); +        assert_eq!(&vf16, &buf16); +    } + +    #[test] +    fn slice_convert_bf16_f32() { +        // Exact chunks +        let vf32 = [1., 2., 3., 4., 5., 6., 7., 8.]; +        let vf16 = [ +            bf16::from_f32(1.), +            bf16::from_f32(2.), +            bf16::from_f32(3.), +            bf16::from_f32(4.), +            bf16::from_f32(5.), +            bf16::from_f32(6.), +            bf16::from_f32(7.), +            bf16::from_f32(8.), +        ]; +        let mut buf32 = vf32; +        let mut buf16 = vf16; + +        vf16.convert_to_f32_slice(&mut buf32); +        assert_eq!(&vf32, &buf32); + +        buf16.convert_from_f32_slice(&vf32); +        assert_eq!(&vf16, &buf16); + +        // Partial with chunks +        let vf32 = [1., 2., 3., 4., 5., 6., 7., 8., 9.]; +        let vf16 = [ +            bf16::from_f32(1.), +            bf16::from_f32(2.), +            bf16::from_f32(3.), +            bf16::from_f32(4.), +            bf16::from_f32(5.), +            bf16::from_f32(6.), +            bf16::from_f32(7.), +            bf16::from_f32(8.), +            bf16::from_f32(9.), +        ]; +        let mut buf32 = vf32; +        let mut buf16 = vf16; + +        vf16.convert_to_f32_slice(&mut buf32); +        assert_eq!(&vf32, &buf32); + +        buf16.convert_from_f32_slice(&vf32); +        assert_eq!(&vf16, &buf16); + +        // Partial with chunks +        let vf32 = [1., 2.]; +        let vf16 = [bf16::from_f32(1.), bf16::from_f32(2.)]; +        let mut buf32 = vf32; +        let mut buf16 = vf16; + +        vf16.convert_to_f32_slice(&mut buf32); +        assert_eq!(&vf32, &buf32); + +        buf16.convert_from_f32_slice(&vf32); +        assert_eq!(&vf16, &buf16); +    } + +    #[test] +    fn slice_convert_f16_f64() { +        // Exact chunks +        let vf64 = [1., 2., 3., 4., 5., 6., 7., 8.]; +        let vf16 = [ +            f16::from_f64(1.), +            f16::from_f64(2.), +            f16::from_f64(3.), +            f16::from_f64(4.), +            f16::from_f64(5.), +            f16::from_f64(6.), +            f16::from_f64(7.), +            f16::from_f64(8.), +        ]; +        let mut buf64 = vf64; +        let mut buf16 = vf16; + +        vf16.convert_to_f64_slice(&mut buf64); +        assert_eq!(&vf64, &buf64); + +        buf16.convert_from_f64_slice(&vf64); +        assert_eq!(&vf16, &buf16); + +        // Partial with chunks +        let vf64 = [1., 2., 3., 4., 5., 6., 7., 8., 9.]; +        let vf16 = [ +            f16::from_f64(1.), +            f16::from_f64(2.), +            f16::from_f64(3.), +            f16::from_f64(4.), +            f16::from_f64(5.), +            f16::from_f64(6.), +            f16::from_f64(7.), +            f16::from_f64(8.), +            f16::from_f64(9.), +        ]; +        let mut buf64 = vf64; +        let mut buf16 = vf16; + +        vf16.convert_to_f64_slice(&mut buf64); +        assert_eq!(&vf64, &buf64); + +        buf16.convert_from_f64_slice(&vf64); +        assert_eq!(&vf16, &buf16); + +        // Partial with chunks +        let vf64 = [1., 2.]; +        let vf16 = [f16::from_f64(1.), f16::from_f64(2.)]; +        let mut buf64 = vf64; +        let mut buf16 = vf16; + +        vf16.convert_to_f64_slice(&mut buf64); +        assert_eq!(&vf64, &buf64); + +        buf16.convert_from_f64_slice(&vf64); +        assert_eq!(&vf16, &buf16); +    } + +    #[test] +    fn slice_convert_bf16_f64() { +        // Exact chunks +        let vf64 = [1., 2., 3., 4., 5., 6., 7., 8.]; +        let vf16 = [ +            bf16::from_f64(1.), +            bf16::from_f64(2.), +            bf16::from_f64(3.), +            bf16::from_f64(4.), +            bf16::from_f64(5.), +            bf16::from_f64(6.), +            bf16::from_f64(7.), +            bf16::from_f64(8.), +        ]; +        let mut buf64 = vf64; +        let mut buf16 = vf16; + +        vf16.convert_to_f64_slice(&mut buf64); +        assert_eq!(&vf64, &buf64); + +        buf16.convert_from_f64_slice(&vf64); +        assert_eq!(&vf16, &buf16); + +        // Partial with chunks +        let vf64 = [1., 2., 3., 4., 5., 6., 7., 8., 9.]; +        let vf16 = [ +            bf16::from_f64(1.), +            bf16::from_f64(2.), +            bf16::from_f64(3.), +            bf16::from_f64(4.), +            bf16::from_f64(5.), +            bf16::from_f64(6.), +            bf16::from_f64(7.), +            bf16::from_f64(8.), +            bf16::from_f64(9.), +        ]; +        let mut buf64 = vf64; +        let mut buf16 = vf16; + +        vf16.convert_to_f64_slice(&mut buf64); +        assert_eq!(&vf64, &buf64); + +        buf16.convert_from_f64_slice(&vf64); +        assert_eq!(&vf16, &buf16); + +        // Partial with chunks +        let vf64 = [1., 2.]; +        let vf16 = [bf16::from_f64(1.), bf16::from_f64(2.)]; +        let mut buf64 = vf64; +        let mut buf16 = vf16; + +        vf16.convert_to_f64_slice(&mut buf64); +        assert_eq!(&vf64, &buf64); + +        buf16.convert_from_f64_slice(&vf64); +        assert_eq!(&vf16, &buf16); +    } + +    #[test] +    #[should_panic] +    fn convert_from_f32_slice_len_mismatch_panics() { +        let mut slice1 = [f16::ZERO; 3]; +        let slice2 = [0f32; 4]; +        slice1.convert_from_f32_slice(&slice2); +    } + +    #[test] +    #[should_panic] +    fn convert_from_f64_slice_len_mismatch_panics() { +        let mut slice1 = [f16::ZERO; 3]; +        let slice2 = [0f64; 4]; +        slice1.convert_from_f64_slice(&slice2); +    } + +    #[test] +    #[should_panic] +    fn convert_to_f32_slice_len_mismatch_panics() { +        let slice1 = [f16::ZERO; 3]; +        let mut slice2 = [0f32; 4]; +        slice1.convert_to_f32_slice(&mut slice2); +    } + +    #[test] +    #[should_panic] +    fn convert_to_f64_slice_len_mismatch_panics() { +        let slice1 = [f16::ZERO; 3]; +        let mut slice2 = [0f64; 4]; +        slice1.convert_to_f64_slice(&mut slice2); +    } +} diff --git a/vendor/half/src/vec.rs b/vendor/half/src/vec.rs new file mode 100644 index 0000000..27ad3e7 --- /dev/null +++ b/vendor/half/src/vec.rs @@ -0,0 +1,274 @@ +//! Contains utility functions and traits to convert between vectors of [`u16`] bits and [`f16`] or +//! [`bf16`] vectors. +//! +//! The utility [`HalfBitsVecExt`] sealed extension trait is implemented for [`Vec<u16>`] vectors, +//! while the utility [`HalfFloatVecExt`] sealed extension trait is implemented for both +//! [`Vec<f16>`] and [`Vec<bf16>`] vectors. These traits provide efficient conversions and +//! reinterpret casting of larger buffers of floating point values, and are automatically included +//! in the [`prelude`][crate::prelude] module. +//! +//! This module is only available with the `std` or `alloc` feature. + +use super::{bf16, f16, slice::HalfFloatSliceExt}; +#[cfg(feature = "alloc")] +use alloc::vec::Vec; +use core::mem; + +/// Extensions to [`Vec<f16>`] and [`Vec<bf16>`] to support reinterpret operations. +/// +/// This trait is sealed and cannot be implemented outside of this crate. +pub trait HalfFloatVecExt: private::SealedHalfFloatVec { +    /// Reinterprets a vector of [`f16`]or [`bf16`] numbers as a vector of [`u16`] bits. +    /// +    /// This is a zero-copy operation. The reinterpreted vector has the same memory location as +    /// `self`. +    /// +    /// # Examples +    /// +    /// ```rust +    /// # use half::prelude::*; +    /// let float_buffer = vec![f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]; +    /// let int_buffer = float_buffer.reinterpret_into(); +    /// +    /// assert_eq!(int_buffer, [f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]); +    /// ``` +    #[must_use] +    fn reinterpret_into(self) -> Vec<u16>; + +    /// Converts all of the elements of a `[f32]` slice into a new [`f16`] or [`bf16`] vector. +    /// +    /// The conversion operation is vectorized over the slice, meaning the conversion may be more +    /// efficient than converting individual elements on some hardware that supports SIMD +    /// conversions. See [crate documentation][crate] for more information on hardware conversion +    /// support. +    /// +    /// # Examples +    /// ```rust +    /// # use half::prelude::*; +    /// let float_values = [1., 2., 3., 4.]; +    /// let vec: Vec<f16> = Vec::from_f32_slice(&float_values); +    /// +    /// assert_eq!(vec, vec![f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.), f16::from_f32(4.)]); +    /// ``` +    #[must_use] +    fn from_f32_slice(slice: &[f32]) -> Self; + +    /// Converts all of the elements of a `[f64]` slice into a new [`f16`] or [`bf16`] vector. +    /// +    /// The conversion operation is vectorized over the slice, meaning the conversion may be more +    /// efficient than converting individual elements on some hardware that supports SIMD +    /// conversions. See [crate documentation][crate] for more information on hardware conversion +    /// support. +    /// +    /// # Examples +    /// ```rust +    /// # use half::prelude::*; +    /// let float_values = [1., 2., 3., 4.]; +    /// let vec: Vec<f16> = Vec::from_f64_slice(&float_values); +    /// +    /// assert_eq!(vec, vec![f16::from_f64(1.), f16::from_f64(2.), f16::from_f64(3.), f16::from_f64(4.)]); +    /// ``` +    #[must_use] +    fn from_f64_slice(slice: &[f64]) -> Self; +} + +/// Extensions to [`Vec<u16>`] to support reinterpret operations. +/// +/// This trait is sealed and cannot be implemented outside of this crate. +pub trait HalfBitsVecExt: private::SealedHalfBitsVec { +    /// Reinterprets a vector of [`u16`] bits as a vector of [`f16`] or [`bf16`] numbers. +    /// +    /// `H` is the type to cast to, and must be either the [`f16`] or [`bf16`] type. +    /// +    /// This is a zero-copy operation. The reinterpreted vector has the same memory location as +    /// `self`. +    /// +    /// # Examples +    /// +    /// ```rust +    /// # use half::prelude::*; +    /// let int_buffer = vec![f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]; +    /// let float_buffer = int_buffer.reinterpret_into::<f16>(); +    /// +    /// assert_eq!(float_buffer, [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]); +    /// ``` +    #[must_use] +    fn reinterpret_into<H>(self) -> Vec<H> +    where +        H: crate::private::SealedHalf; +} + +mod private { +    use crate::{bf16, f16}; +    #[cfg(feature = "alloc")] +    use alloc::vec::Vec; + +    pub trait SealedHalfFloatVec {} +    impl SealedHalfFloatVec for Vec<f16> {} +    impl SealedHalfFloatVec for Vec<bf16> {} + +    pub trait SealedHalfBitsVec {} +    impl SealedHalfBitsVec for Vec<u16> {} +} + +impl HalfFloatVecExt for Vec<f16> { +    #[inline] +    fn reinterpret_into(mut self) -> Vec<u16> { +        // An f16 array has same length and capacity as u16 array +        let length = self.len(); +        let capacity = self.capacity(); + +        // Actually reinterpret the contents of the Vec<f16> as u16, +        // knowing that structs are represented as only their members in memory, +        // which is the u16 part of `f16(u16)` +        let pointer = self.as_mut_ptr() as *mut u16; + +        // Prevent running a destructor on the old Vec<u16>, so the pointer won't be deleted +        mem::forget(self); + +        // Finally construct a new Vec<f16> from the raw pointer +        // SAFETY: We are reconstructing full length and capacity of original vector, +        // using its original pointer, and the size of elements are identical. +        unsafe { Vec::from_raw_parts(pointer, length, capacity) } +    } + +    #[allow(clippy::uninit_vec)] +    fn from_f32_slice(slice: &[f32]) -> Self { +        let mut vec = Vec::with_capacity(slice.len()); +        // SAFETY: convert will initialize every value in the vector without reading them, +        // so this is safe to do instead of double initialize from resize, and we're setting it to +        // same value as capacity. +        unsafe { vec.set_len(slice.len()) }; +        vec.convert_from_f32_slice(slice); +        vec +    } + +    #[allow(clippy::uninit_vec)] +    fn from_f64_slice(slice: &[f64]) -> Self { +        let mut vec = Vec::with_capacity(slice.len()); +        // SAFETY: convert will initialize every value in the vector without reading them, +        // so this is safe to do instead of double initialize from resize, and we're setting it to +        // same value as capacity. +        unsafe { vec.set_len(slice.len()) }; +        vec.convert_from_f64_slice(slice); +        vec +    } +} + +impl HalfFloatVecExt for Vec<bf16> { +    #[inline] +    fn reinterpret_into(mut self) -> Vec<u16> { +        // An f16 array has same length and capacity as u16 array +        let length = self.len(); +        let capacity = self.capacity(); + +        // Actually reinterpret the contents of the Vec<f16> as u16, +        // knowing that structs are represented as only their members in memory, +        // which is the u16 part of `f16(u16)` +        let pointer = self.as_mut_ptr() as *mut u16; + +        // Prevent running a destructor on the old Vec<u16>, so the pointer won't be deleted +        mem::forget(self); + +        // Finally construct a new Vec<f16> from the raw pointer +        // SAFETY: We are reconstructing full length and capacity of original vector, +        // using its original pointer, and the size of elements are identical. +        unsafe { Vec::from_raw_parts(pointer, length, capacity) } +    } + +    #[allow(clippy::uninit_vec)] +    fn from_f32_slice(slice: &[f32]) -> Self { +        let mut vec = Vec::with_capacity(slice.len()); +        // SAFETY: convert will initialize every value in the vector without reading them, +        // so this is safe to do instead of double initialize from resize, and we're setting it to +        // same value as capacity. +        unsafe { vec.set_len(slice.len()) }; +        vec.convert_from_f32_slice(slice); +        vec +    } + +    #[allow(clippy::uninit_vec)] +    fn from_f64_slice(slice: &[f64]) -> Self { +        let mut vec = Vec::with_capacity(slice.len()); +        // SAFETY: convert will initialize every value in the vector without reading them, +        // so this is safe to do instead of double initialize from resize, and we're setting it to +        // same value as capacity. +        unsafe { vec.set_len(slice.len()) }; +        vec.convert_from_f64_slice(slice); +        vec +    } +} + +impl HalfBitsVecExt for Vec<u16> { +    // This is safe because all traits are sealed +    #[inline] +    fn reinterpret_into<H>(mut self) -> Vec<H> +    where +        H: crate::private::SealedHalf, +    { +        // An f16 array has same length and capacity as u16 array +        let length = self.len(); +        let capacity = self.capacity(); + +        // Actually reinterpret the contents of the Vec<u16> as f16, +        // knowing that structs are represented as only their members in memory, +        // which is the u16 part of `f16(u16)` +        let pointer = self.as_mut_ptr() as *mut H; + +        // Prevent running a destructor on the old Vec<u16>, so the pointer won't be deleted +        mem::forget(self); + +        // Finally construct a new Vec<f16> from the raw pointer +        // SAFETY: We are reconstructing full length and capacity of original vector, +        // using its original pointer, and the size of elements are identical. +        unsafe { Vec::from_raw_parts(pointer, length, capacity) } +    } +} + +#[cfg(test)] +mod test { +    use super::{HalfBitsVecExt, HalfFloatVecExt}; +    use crate::{bf16, f16}; +    #[cfg(all(feature = "alloc", not(feature = "std")))] +    use alloc::vec; + +    #[test] +    fn test_vec_conversions_f16() { +        let numbers = vec![f16::E, f16::PI, f16::EPSILON, f16::FRAC_1_SQRT_2]; +        let bits = vec![ +            f16::E.to_bits(), +            f16::PI.to_bits(), +            f16::EPSILON.to_bits(), +            f16::FRAC_1_SQRT_2.to_bits(), +        ]; +        let bits_cloned = bits.clone(); + +        // Convert from bits to numbers +        let from_bits = bits.reinterpret_into::<f16>(); +        assert_eq!(&from_bits[..], &numbers[..]); + +        // Convert from numbers back to bits +        let to_bits = from_bits.reinterpret_into(); +        assert_eq!(&to_bits[..], &bits_cloned[..]); +    } + +    #[test] +    fn test_vec_conversions_bf16() { +        let numbers = vec![bf16::E, bf16::PI, bf16::EPSILON, bf16::FRAC_1_SQRT_2]; +        let bits = vec![ +            bf16::E.to_bits(), +            bf16::PI.to_bits(), +            bf16::EPSILON.to_bits(), +            bf16::FRAC_1_SQRT_2.to_bits(), +        ]; +        let bits_cloned = bits.clone(); + +        // Convert from bits to numbers +        let from_bits = bits.reinterpret_into::<bf16>(); +        assert_eq!(&from_bits[..], &numbers[..]); + +        // Convert from numbers back to bits +        let to_bits = from_bits.reinterpret_into(); +        assert_eq!(&to_bits[..], &bits_cloned[..]); +    } +} | 
