diff options
Diffstat (limited to 'vendor/half/src')
-rw-r--r-- | vendor/half/src/bfloat.rs | 1841 | ||||
-rw-r--r-- | vendor/half/src/bfloat/convert.rs | 148 | ||||
-rw-r--r-- | vendor/half/src/binary16.rs | 1912 | ||||
-rw-r--r-- | vendor/half/src/binary16/convert.rs | 752 | ||||
-rw-r--r-- | vendor/half/src/leading_zeros.rs | 62 | ||||
-rw-r--r-- | vendor/half/src/lib.rs | 233 | ||||
-rw-r--r-- | vendor/half/src/num_traits.rs | 1483 | ||||
-rw-r--r-- | vendor/half/src/slice.rs | 854 | ||||
-rw-r--r-- | vendor/half/src/vec.rs | 274 |
9 files changed, 7559 insertions, 0 deletions
diff --git a/vendor/half/src/bfloat.rs b/vendor/half/src/bfloat.rs new file mode 100644 index 0000000..8b23863 --- /dev/null +++ b/vendor/half/src/bfloat.rs @@ -0,0 +1,1841 @@ +#[cfg(feature = "bytemuck")] +use bytemuck::{Pod, Zeroable}; +use core::{ + cmp::Ordering, + iter::{Product, Sum}, + num::FpCategory, + ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Rem, RemAssign, Sub, SubAssign}, +}; +#[cfg(not(target_arch = "spirv"))] +use core::{ + fmt::{ + Binary, Debug, Display, Error, Formatter, LowerExp, LowerHex, Octal, UpperExp, UpperHex, + }, + num::ParseFloatError, + str::FromStr, +}; +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; +#[cfg(feature = "zerocopy")] +use zerocopy::{AsBytes, FromBytes}; + +pub(crate) mod convert; + +/// A 16-bit floating point type implementing the [`bfloat16`] format. +/// +/// The [`bfloat16`] floating point format is a truncated 16-bit version of the IEEE 754 standard +/// `binary32`, a.k.a [`f32`]. [`bf16`] has approximately the same dynamic range as [`f32`] by +/// having a lower precision than [`f16`][crate::f16]. While [`f16`][crate::f16] has a precision of +/// 11 bits, [`bf16`] has a precision of only 8 bits. +/// +/// Like [`f16`][crate::f16], [`bf16`] does not offer arithmetic operations as it is intended for +/// compact storage rather than calculations. Operations should be performed with [`f32`] or +/// higher-precision types and converted to/from [`bf16`] as necessary. +/// +/// [`bfloat16`]: https://en.wikipedia.org/wiki/Bfloat16_floating-point_format +#[allow(non_camel_case_types)] +#[derive(Clone, Copy, Default)] +#[repr(transparent)] +#[cfg_attr(feature = "serde", derive(Serialize))] +#[cfg_attr(feature = "bytemuck", derive(Zeroable, Pod))] +#[cfg_attr(feature = "zerocopy", derive(AsBytes, FromBytes))] +pub struct bf16(u16); + +impl bf16 { + /// Constructs a [`bf16`] value from the raw bits. + #[inline] + #[must_use] + pub const fn from_bits(bits: u16) -> bf16 { + bf16(bits) + } + + /// Constructs a [`bf16`] value from a 32-bit floating point value. + /// + /// If the 32-bit value is too large to fit, ±∞ will result. NaN values are preserved. + /// Subnormal values that are too tiny to be represented will result in ±0. All other values + /// are truncated and rounded to the nearest representable value. + #[inline] + #[must_use] + pub fn from_f32(value: f32) -> bf16 { + Self::from_f32_const(value) + } + + /// Constructs a [`bf16`] value from a 32-bit floating point value. + /// + /// This function is identical to [`from_f32`][Self::from_f32] except it never uses hardware + /// intrinsics, which allows it to be `const`. [`from_f32`][Self::from_f32] should be preferred + /// in any non-`const` context. + /// + /// If the 32-bit value is too large to fit, ±∞ will result. NaN values are preserved. + /// Subnormal values that are too tiny to be represented will result in ±0. All other values + /// are truncated and rounded to the nearest representable value. + #[inline] + #[must_use] + pub const fn from_f32_const(value: f32) -> bf16 { + bf16(convert::f32_to_bf16(value)) + } + + /// Constructs a [`bf16`] value from a 64-bit floating point value. + /// + /// If the 64-bit value is to large to fit, ±∞ will result. NaN values are preserved. + /// 64-bit subnormal values are too tiny to be represented and result in ±0. Exponents that + /// underflow the minimum exponent will result in subnormals or ±0. All other values are + /// truncated and rounded to the nearest representable value. + #[inline] + #[must_use] + pub fn from_f64(value: f64) -> bf16 { + Self::from_f64_const(value) + } + + /// Constructs a [`bf16`] value from a 64-bit floating point value. + /// + /// This function is identical to [`from_f64`][Self::from_f64] except it never uses hardware + /// intrinsics, which allows it to be `const`. [`from_f64`][Self::from_f64] should be preferred + /// in any non-`const` context. + /// + /// If the 64-bit value is to large to fit, ±∞ will result. NaN values are preserved. + /// 64-bit subnormal values are too tiny to be represented and result in ±0. Exponents that + /// underflow the minimum exponent will result in subnormals or ±0. All other values are + /// truncated and rounded to the nearest representable value. + #[inline] + #[must_use] + pub const fn from_f64_const(value: f64) -> bf16 { + bf16(convert::f64_to_bf16(value)) + } + + /// Converts a [`bf16`] into the underlying bit representation. + #[inline] + #[must_use] + pub const fn to_bits(self) -> u16 { + self.0 + } + + /// Returns the memory representation of the underlying bit representation as a byte array in + /// little-endian byte order. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// let bytes = bf16::from_f32(12.5).to_le_bytes(); + /// assert_eq!(bytes, [0x48, 0x41]); + /// ``` + #[inline] + #[must_use] + pub const fn to_le_bytes(self) -> [u8; 2] { + self.0.to_le_bytes() + } + + /// Returns the memory representation of the underlying bit representation as a byte array in + /// big-endian (network) byte order. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// let bytes = bf16::from_f32(12.5).to_be_bytes(); + /// assert_eq!(bytes, [0x41, 0x48]); + /// ``` + #[inline] + #[must_use] + pub const fn to_be_bytes(self) -> [u8; 2] { + self.0.to_be_bytes() + } + + /// Returns the memory representation of the underlying bit representation as a byte array in + /// native byte order. + /// + /// As the target platform's native endianness is used, portable code should use + /// [`to_be_bytes`][bf16::to_be_bytes] or [`to_le_bytes`][bf16::to_le_bytes], as appropriate, + /// instead. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// let bytes = bf16::from_f32(12.5).to_ne_bytes(); + /// assert_eq!(bytes, if cfg!(target_endian = "big") { + /// [0x41, 0x48] + /// } else { + /// [0x48, 0x41] + /// }); + /// ``` + #[inline] + #[must_use] + pub const fn to_ne_bytes(self) -> [u8; 2] { + self.0.to_ne_bytes() + } + + /// Creates a floating point value from its representation as a byte array in little endian. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// let value = bf16::from_le_bytes([0x48, 0x41]); + /// assert_eq!(value, bf16::from_f32(12.5)); + /// ``` + #[inline] + #[must_use] + pub const fn from_le_bytes(bytes: [u8; 2]) -> bf16 { + bf16::from_bits(u16::from_le_bytes(bytes)) + } + + /// Creates a floating point value from its representation as a byte array in big endian. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// let value = bf16::from_be_bytes([0x41, 0x48]); + /// assert_eq!(value, bf16::from_f32(12.5)); + /// ``` + #[inline] + #[must_use] + pub const fn from_be_bytes(bytes: [u8; 2]) -> bf16 { + bf16::from_bits(u16::from_be_bytes(bytes)) + } + + /// Creates a floating point value from its representation as a byte array in native endian. + /// + /// As the target platform's native endianness is used, portable code likely wants to use + /// [`from_be_bytes`][bf16::from_be_bytes] or [`from_le_bytes`][bf16::from_le_bytes], as + /// appropriate instead. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// let value = bf16::from_ne_bytes(if cfg!(target_endian = "big") { + /// [0x41, 0x48] + /// } else { + /// [0x48, 0x41] + /// }); + /// assert_eq!(value, bf16::from_f32(12.5)); + /// ``` + #[inline] + #[must_use] + pub const fn from_ne_bytes(bytes: [u8; 2]) -> bf16 { + bf16::from_bits(u16::from_ne_bytes(bytes)) + } + + /// Converts a [`bf16`] value into an [`f32`] value. + /// + /// This conversion is lossless as all values can be represented exactly in [`f32`]. + #[inline] + #[must_use] + pub fn to_f32(self) -> f32 { + self.to_f32_const() + } + + /// Converts a [`bf16`] value into an [`f32`] value. + /// + /// This function is identical to [`to_f32`][Self::to_f32] except it never uses hardware + /// intrinsics, which allows it to be `const`. [`to_f32`][Self::to_f32] should be preferred + /// in any non-`const` context. + /// + /// This conversion is lossless as all values can be represented exactly in [`f32`]. + #[inline] + #[must_use] + pub const fn to_f32_const(self) -> f32 { + convert::bf16_to_f32(self.0) + } + + /// Converts a [`bf16`] value into an [`f64`] value. + /// + /// This conversion is lossless as all values can be represented exactly in [`f64`]. + #[inline] + #[must_use] + pub fn to_f64(self) -> f64 { + self.to_f64_const() + } + + /// Converts a [`bf16`] value into an [`f64`] value. + /// + /// This function is identical to [`to_f64`][Self::to_f64] except it never uses hardware + /// intrinsics, which allows it to be `const`. [`to_f64`][Self::to_f64] should be preferred + /// in any non-`const` context. + /// + /// This conversion is lossless as all values can be represented exactly in [`f64`]. + #[inline] + #[must_use] + pub const fn to_f64_const(self) -> f64 { + convert::bf16_to_f64(self.0) + } + + /// Returns `true` if this value is NaN and `false` otherwise. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// + /// let nan = bf16::NAN; + /// let f = bf16::from_f32(7.0_f32); + /// + /// assert!(nan.is_nan()); + /// assert!(!f.is_nan()); + /// ``` + #[inline] + #[must_use] + pub const fn is_nan(self) -> bool { + self.0 & 0x7FFFu16 > 0x7F80u16 + } + + /// Returns `true` if this value is ±∞ and `false` otherwise. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// + /// let f = bf16::from_f32(7.0f32); + /// let inf = bf16::INFINITY; + /// let neg_inf = bf16::NEG_INFINITY; + /// let nan = bf16::NAN; + /// + /// assert!(!f.is_infinite()); + /// assert!(!nan.is_infinite()); + /// + /// assert!(inf.is_infinite()); + /// assert!(neg_inf.is_infinite()); + /// ``` + #[inline] + #[must_use] + pub const fn is_infinite(self) -> bool { + self.0 & 0x7FFFu16 == 0x7F80u16 + } + + /// Returns `true` if this number is neither infinite nor NaN. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// + /// let f = bf16::from_f32(7.0f32); + /// let inf = bf16::INFINITY; + /// let neg_inf = bf16::NEG_INFINITY; + /// let nan = bf16::NAN; + /// + /// assert!(f.is_finite()); + /// + /// assert!(!nan.is_finite()); + /// assert!(!inf.is_finite()); + /// assert!(!neg_inf.is_finite()); + /// ``` + #[inline] + #[must_use] + pub const fn is_finite(self) -> bool { + self.0 & 0x7F80u16 != 0x7F80u16 + } + + /// Returns `true` if the number is neither zero, infinite, subnormal, or NaN. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// + /// let min = bf16::MIN_POSITIVE; + /// let max = bf16::MAX; + /// let lower_than_min = bf16::from_f32(1.0e-39_f32); + /// let zero = bf16::from_f32(0.0_f32); + /// + /// assert!(min.is_normal()); + /// assert!(max.is_normal()); + /// + /// assert!(!zero.is_normal()); + /// assert!(!bf16::NAN.is_normal()); + /// assert!(!bf16::INFINITY.is_normal()); + /// // Values between 0 and `min` are subnormal. + /// assert!(!lower_than_min.is_normal()); + /// ``` + #[inline] + #[must_use] + pub const fn is_normal(self) -> bool { + let exp = self.0 & 0x7F80u16; + exp != 0x7F80u16 && exp != 0 + } + + /// Returns the floating point category of the number. + /// + /// If only one property is going to be tested, it is generally faster to use the specific + /// predicate instead. + /// + /// # Examples + /// + /// ```rust + /// use std::num::FpCategory; + /// # use half::prelude::*; + /// + /// let num = bf16::from_f32(12.4_f32); + /// let inf = bf16::INFINITY; + /// + /// assert_eq!(num.classify(), FpCategory::Normal); + /// assert_eq!(inf.classify(), FpCategory::Infinite); + /// ``` + #[must_use] + pub const fn classify(self) -> FpCategory { + let exp = self.0 & 0x7F80u16; + let man = self.0 & 0x007Fu16; + match (exp, man) { + (0, 0) => FpCategory::Zero, + (0, _) => FpCategory::Subnormal, + (0x7F80u16, 0) => FpCategory::Infinite, + (0x7F80u16, _) => FpCategory::Nan, + _ => FpCategory::Normal, + } + } + + /// Returns a number that represents the sign of `self`. + /// + /// * 1.0 if the number is positive, +0.0 or [`INFINITY`][bf16::INFINITY] + /// * −1.0 if the number is negative, −0.0` or [`NEG_INFINITY`][bf16::NEG_INFINITY] + /// * [`NAN`][bf16::NAN] if the number is NaN + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// + /// let f = bf16::from_f32(3.5_f32); + /// + /// assert_eq!(f.signum(), bf16::from_f32(1.0)); + /// assert_eq!(bf16::NEG_INFINITY.signum(), bf16::from_f32(-1.0)); + /// + /// assert!(bf16::NAN.signum().is_nan()); + /// ``` + #[must_use] + pub const fn signum(self) -> bf16 { + if self.is_nan() { + self + } else if self.0 & 0x8000u16 != 0 { + Self::NEG_ONE + } else { + Self::ONE + } + } + + /// Returns `true` if and only if `self` has a positive sign, including +0.0, NaNs with a + /// positive sign bit and +∞. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// + /// let nan = bf16::NAN; + /// let f = bf16::from_f32(7.0_f32); + /// let g = bf16::from_f32(-7.0_f32); + /// + /// assert!(f.is_sign_positive()); + /// assert!(!g.is_sign_positive()); + /// // NaN can be either positive or negative + /// assert!(nan.is_sign_positive() != nan.is_sign_negative()); + /// ``` + #[inline] + #[must_use] + pub const fn is_sign_positive(self) -> bool { + self.0 & 0x8000u16 == 0 + } + + /// Returns `true` if and only if `self` has a negative sign, including −0.0, NaNs with a + /// negative sign bit and −∞. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// + /// let nan = bf16::NAN; + /// let f = bf16::from_f32(7.0f32); + /// let g = bf16::from_f32(-7.0f32); + /// + /// assert!(!f.is_sign_negative()); + /// assert!(g.is_sign_negative()); + /// // NaN can be either positive or negative + /// assert!(nan.is_sign_positive() != nan.is_sign_negative()); + /// ``` + #[inline] + #[must_use] + pub const fn is_sign_negative(self) -> bool { + self.0 & 0x8000u16 != 0 + } + + /// Returns a number composed of the magnitude of `self` and the sign of `sign`. + /// + /// Equal to `self` if the sign of `self` and `sign` are the same, otherwise equal to `-self`. + /// If `self` is NaN, then NaN with the sign of `sign` is returned. + /// + /// # Examples + /// + /// ``` + /// # use half::prelude::*; + /// let f = bf16::from_f32(3.5); + /// + /// assert_eq!(f.copysign(bf16::from_f32(0.42)), bf16::from_f32(3.5)); + /// assert_eq!(f.copysign(bf16::from_f32(-0.42)), bf16::from_f32(-3.5)); + /// assert_eq!((-f).copysign(bf16::from_f32(0.42)), bf16::from_f32(3.5)); + /// assert_eq!((-f).copysign(bf16::from_f32(-0.42)), bf16::from_f32(-3.5)); + /// + /// assert!(bf16::NAN.copysign(bf16::from_f32(1.0)).is_nan()); + /// ``` + #[inline] + #[must_use] + pub const fn copysign(self, sign: bf16) -> bf16 { + bf16((sign.0 & 0x8000u16) | (self.0 & 0x7FFFu16)) + } + + /// Returns the maximum of the two numbers. + /// + /// If one of the arguments is NaN, then the other argument is returned. + /// + /// # Examples + /// + /// ``` + /// # use half::prelude::*; + /// let x = bf16::from_f32(1.0); + /// let y = bf16::from_f32(2.0); + /// + /// assert_eq!(x.max(y), y); + /// ``` + #[inline] + #[must_use] + pub fn max(self, other: bf16) -> bf16 { + if other > self && !other.is_nan() { + other + } else { + self + } + } + + /// Returns the minimum of the two numbers. + /// + /// If one of the arguments is NaN, then the other argument is returned. + /// + /// # Examples + /// + /// ``` + /// # use half::prelude::*; + /// let x = bf16::from_f32(1.0); + /// let y = bf16::from_f32(2.0); + /// + /// assert_eq!(x.min(y), x); + /// ``` + #[inline] + #[must_use] + pub fn min(self, other: bf16) -> bf16 { + if other < self && !other.is_nan() { + other + } else { + self + } + } + + /// Restrict a value to a certain interval unless it is NaN. + /// + /// Returns `max` if `self` is greater than `max`, and `min` if `self` is less than `min`. + /// Otherwise this returns `self`. + /// + /// Note that this function returns NaN if the initial value was NaN as well. + /// + /// # Panics + /// Panics if `min > max`, `min` is NaN, or `max` is NaN. + /// + /// # Examples + /// + /// ``` + /// # use half::prelude::*; + /// assert!(bf16::from_f32(-3.0).clamp(bf16::from_f32(-2.0), bf16::from_f32(1.0)) == bf16::from_f32(-2.0)); + /// assert!(bf16::from_f32(0.0).clamp(bf16::from_f32(-2.0), bf16::from_f32(1.0)) == bf16::from_f32(0.0)); + /// assert!(bf16::from_f32(2.0).clamp(bf16::from_f32(-2.0), bf16::from_f32(1.0)) == bf16::from_f32(1.0)); + /// assert!(bf16::NAN.clamp(bf16::from_f32(-2.0), bf16::from_f32(1.0)).is_nan()); + /// ``` + #[inline] + #[must_use] + pub fn clamp(self, min: bf16, max: bf16) -> bf16 { + assert!(min <= max); + let mut x = self; + if x < min { + x = min; + } + if x > max { + x = max; + } + x + } + + /// Returns the ordering between `self` and `other`. + /// + /// Unlike the standard partial comparison between floating point numbers, + /// this comparison always produces an ordering in accordance to + /// the `totalOrder` predicate as defined in the IEEE 754 (2008 revision) + /// floating point standard. The values are ordered in the following sequence: + /// + /// - negative quiet NaN + /// - negative signaling NaN + /// - negative infinity + /// - negative numbers + /// - negative subnormal numbers + /// - negative zero + /// - positive zero + /// - positive subnormal numbers + /// - positive numbers + /// - positive infinity + /// - positive signaling NaN + /// - positive quiet NaN. + /// + /// The ordering established by this function does not always agree with the + /// [`PartialOrd`] and [`PartialEq`] implementations of `bf16`. For example, + /// they consider negative and positive zero equal, while `total_cmp` + /// doesn't. + /// + /// The interpretation of the signaling NaN bit follows the definition in + /// the IEEE 754 standard, which may not match the interpretation by some of + /// the older, non-conformant (e.g. MIPS) hardware implementations. + /// + /// # Examples + /// ``` + /// # use half::bf16; + /// let mut v: Vec<bf16> = vec![]; + /// v.push(bf16::ONE); + /// v.push(bf16::INFINITY); + /// v.push(bf16::NEG_INFINITY); + /// v.push(bf16::NAN); + /// v.push(bf16::MAX_SUBNORMAL); + /// v.push(-bf16::MAX_SUBNORMAL); + /// v.push(bf16::ZERO); + /// v.push(bf16::NEG_ZERO); + /// v.push(bf16::NEG_ONE); + /// v.push(bf16::MIN_POSITIVE); + /// + /// v.sort_by(|a, b| a.total_cmp(&b)); + /// + /// assert!(v + /// .into_iter() + /// .zip( + /// [ + /// bf16::NEG_INFINITY, + /// bf16::NEG_ONE, + /// -bf16::MAX_SUBNORMAL, + /// bf16::NEG_ZERO, + /// bf16::ZERO, + /// bf16::MAX_SUBNORMAL, + /// bf16::MIN_POSITIVE, + /// bf16::ONE, + /// bf16::INFINITY, + /// bf16::NAN + /// ] + /// .iter() + /// ) + /// .all(|(a, b)| a.to_bits() == b.to_bits())); + /// ``` + // Implementation based on: https://doc.rust-lang.org/std/primitive.f32.html#method.total_cmp + #[inline] + #[must_use] + pub fn total_cmp(&self, other: &Self) -> Ordering { + let mut left = self.to_bits() as i16; + let mut right = other.to_bits() as i16; + left ^= (((left >> 15) as u16) >> 1) as i16; + right ^= (((right >> 15) as u16) >> 1) as i16; + left.cmp(&right) + } + + /// Alternate serialize adapter for serializing as a float. + /// + /// By default, [`bf16`] serializes as a newtype of [`u16`]. This is an alternate serialize + /// implementation that serializes as an [`f32`] value. It is designed for use with + /// `serialize_with` serde attributes. Deserialization from `f32` values is already supported by + /// the default deserialize implementation. + /// + /// # Examples + /// + /// A demonstration on how to use this adapater: + /// + /// ``` + /// use serde::{Serialize, Deserialize}; + /// use half::bf16; + /// + /// #[derive(Serialize, Deserialize)] + /// struct MyStruct { + /// #[serde(serialize_with = "bf16::serialize_as_f32")] + /// value: bf16 // Will be serialized as f32 instead of u16 + /// } + /// ``` + #[cfg(feature = "serde")] + pub fn serialize_as_f32<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> { + serializer.serialize_f32(self.to_f32()) + } + + /// Alternate serialize adapter for serializing as a string. + /// + /// By default, [`bf16`] serializes as a newtype of [`u16`]. This is an alternate serialize + /// implementation that serializes as a string value. It is designed for use with + /// `serialize_with` serde attributes. Deserialization from string values is already supported + /// by the default deserialize implementation. + /// + /// # Examples + /// + /// A demonstration on how to use this adapater: + /// + /// ``` + /// use serde::{Serialize, Deserialize}; + /// use half::bf16; + /// + /// #[derive(Serialize, Deserialize)] + /// struct MyStruct { + /// #[serde(serialize_with = "bf16::serialize_as_string")] + /// value: bf16 // Will be serialized as a string instead of u16 + /// } + /// ``` + #[cfg(feature = "serde")] + pub fn serialize_as_string<S: serde::Serializer>( + &self, + serializer: S, + ) -> Result<S::Ok, S::Error> { + serializer.serialize_str(&self.to_string()) + } + + /// Approximate number of [`bf16`] significant digits in base 10 + pub const DIGITS: u32 = 2; + /// [`bf16`] + /// [machine epsilon](https://en.wikipedia.org/wiki/Machine_epsilon) value + /// + /// This is the difference between 1.0 and the next largest representable number. + pub const EPSILON: bf16 = bf16(0x3C00u16); + /// [`bf16`] positive Infinity (+∞) + pub const INFINITY: bf16 = bf16(0x7F80u16); + /// Number of [`bf16`] significant digits in base 2 + pub const MANTISSA_DIGITS: u32 = 8; + /// Largest finite [`bf16`] value + pub const MAX: bf16 = bf16(0x7F7F); + /// Maximum possible [`bf16`] power of 10 exponent + pub const MAX_10_EXP: i32 = 38; + /// Maximum possible [`bf16`] power of 2 exponent + pub const MAX_EXP: i32 = 128; + /// Smallest finite [`bf16`] value + pub const MIN: bf16 = bf16(0xFF7F); + /// Minimum possible normal [`bf16`] power of 10 exponent + pub const MIN_10_EXP: i32 = -37; + /// One greater than the minimum possible normal [`bf16`] power of 2 exponent + pub const MIN_EXP: i32 = -125; + /// Smallest positive normal [`bf16`] value + pub const MIN_POSITIVE: bf16 = bf16(0x0080u16); + /// [`bf16`] Not a Number (NaN) + pub const NAN: bf16 = bf16(0x7FC0u16); + /// [`bf16`] negative infinity (-∞). + pub const NEG_INFINITY: bf16 = bf16(0xFF80u16); + /// The radix or base of the internal representation of [`bf16`] + pub const RADIX: u32 = 2; + + /// Minimum positive subnormal [`bf16`] value + pub const MIN_POSITIVE_SUBNORMAL: bf16 = bf16(0x0001u16); + /// Maximum subnormal [`bf16`] value + pub const MAX_SUBNORMAL: bf16 = bf16(0x007Fu16); + + /// [`bf16`] 1 + pub const ONE: bf16 = bf16(0x3F80u16); + /// [`bf16`] 0 + pub const ZERO: bf16 = bf16(0x0000u16); + /// [`bf16`] -0 + pub const NEG_ZERO: bf16 = bf16(0x8000u16); + /// [`bf16`] -1 + pub const NEG_ONE: bf16 = bf16(0xBF80u16); + + /// [`bf16`] Euler's number (ℯ) + pub const E: bf16 = bf16(0x402Eu16); + /// [`bf16`] Archimedes' constant (π) + pub const PI: bf16 = bf16(0x4049u16); + /// [`bf16`] 1/π + pub const FRAC_1_PI: bf16 = bf16(0x3EA3u16); + /// [`bf16`] 1/√2 + pub const FRAC_1_SQRT_2: bf16 = bf16(0x3F35u16); + /// [`bf16`] 2/π + pub const FRAC_2_PI: bf16 = bf16(0x3F23u16); + /// [`bf16`] 2/√π + pub const FRAC_2_SQRT_PI: bf16 = bf16(0x3F90u16); + /// [`bf16`] π/2 + pub const FRAC_PI_2: bf16 = bf16(0x3FC9u16); + /// [`bf16`] π/3 + pub const FRAC_PI_3: bf16 = bf16(0x3F86u16); + /// [`bf16`] π/4 + pub const FRAC_PI_4: bf16 = bf16(0x3F49u16); + /// [`bf16`] π/6 + pub const FRAC_PI_6: bf16 = bf16(0x3F06u16); + /// [`bf16`] π/8 + pub const FRAC_PI_8: bf16 = bf16(0x3EC9u16); + /// [`bf16`] 𝗅𝗇 10 + pub const LN_10: bf16 = bf16(0x4013u16); + /// [`bf16`] 𝗅𝗇 2 + pub const LN_2: bf16 = bf16(0x3F31u16); + /// [`bf16`] 𝗅𝗈𝗀₁₀ℯ + pub const LOG10_E: bf16 = bf16(0x3EDEu16); + /// [`bf16`] 𝗅𝗈𝗀₁₀2 + pub const LOG10_2: bf16 = bf16(0x3E9Au16); + /// [`bf16`] 𝗅𝗈𝗀₂ℯ + pub const LOG2_E: bf16 = bf16(0x3FB9u16); + /// [`bf16`] 𝗅𝗈𝗀₂10 + pub const LOG2_10: bf16 = bf16(0x4055u16); + /// [`bf16`] √2 + pub const SQRT_2: bf16 = bf16(0x3FB5u16); +} + +impl From<bf16> for f32 { + #[inline] + fn from(x: bf16) -> f32 { + x.to_f32() + } +} + +impl From<bf16> for f64 { + #[inline] + fn from(x: bf16) -> f64 { + x.to_f64() + } +} + +impl From<i8> for bf16 { + #[inline] + fn from(x: i8) -> bf16 { + // Convert to f32, then to bf16 + bf16::from_f32(f32::from(x)) + } +} + +impl From<u8> for bf16 { + #[inline] + fn from(x: u8) -> bf16 { + // Convert to f32, then to f16 + bf16::from_f32(f32::from(x)) + } +} + +impl PartialEq for bf16 { + fn eq(&self, other: &bf16) -> bool { + if self.is_nan() || other.is_nan() { + false + } else { + (self.0 == other.0) || ((self.0 | other.0) & 0x7FFFu16 == 0) + } + } +} + +impl PartialOrd for bf16 { + fn partial_cmp(&self, other: &bf16) -> Option<Ordering> { + if self.is_nan() || other.is_nan() { + None + } else { + let neg = self.0 & 0x8000u16 != 0; + let other_neg = other.0 & 0x8000u16 != 0; + match (neg, other_neg) { + (false, false) => Some(self.0.cmp(&other.0)), + (false, true) => { + if (self.0 | other.0) & 0x7FFFu16 == 0 { + Some(Ordering::Equal) + } else { + Some(Ordering::Greater) + } + } + (true, false) => { + if (self.0 | other.0) & 0x7FFFu16 == 0 { + Some(Ordering::Equal) + } else { + Some(Ordering::Less) + } + } + (true, true) => Some(other.0.cmp(&self.0)), + } + } + } + + fn lt(&self, other: &bf16) -> bool { + if self.is_nan() || other.is_nan() { + false + } else { + let neg = self.0 & 0x8000u16 != 0; + let other_neg = other.0 & 0x8000u16 != 0; + match (neg, other_neg) { + (false, false) => self.0 < other.0, + (false, true) => false, + (true, false) => (self.0 | other.0) & 0x7FFFu16 != 0, + (true, true) => self.0 > other.0, + } + } + } + + fn le(&self, other: &bf16) -> bool { + if self.is_nan() || other.is_nan() { + false + } else { + let neg = self.0 & 0x8000u16 != 0; + let other_neg = other.0 & 0x8000u16 != 0; + match (neg, other_neg) { + (false, false) => self.0 <= other.0, + (false, true) => (self.0 | other.0) & 0x7FFFu16 == 0, + (true, false) => true, + (true, true) => self.0 >= other.0, + } + } + } + + fn gt(&self, other: &bf16) -> bool { + if self.is_nan() || other.is_nan() { + false + } else { + let neg = self.0 & 0x8000u16 != 0; + let other_neg = other.0 & 0x8000u16 != 0; + match (neg, other_neg) { + (false, false) => self.0 > other.0, + (false, true) => (self.0 | other.0) & 0x7FFFu16 != 0, + (true, false) => false, + (true, true) => self.0 < other.0, + } + } + } + + fn ge(&self, other: &bf16) -> bool { + if self.is_nan() || other.is_nan() { + false + } else { + let neg = self.0 & 0x8000u16 != 0; + let other_neg = other.0 & 0x8000u16 != 0; + match (neg, other_neg) { + (false, false) => self.0 >= other.0, + (false, true) => true, + (true, false) => (self.0 | other.0) & 0x7FFFu16 == 0, + (true, true) => self.0 <= other.0, + } + } + } +} + +#[cfg(not(target_arch = "spirv"))] +impl FromStr for bf16 { + type Err = ParseFloatError; + fn from_str(src: &str) -> Result<bf16, ParseFloatError> { + f32::from_str(src).map(bf16::from_f32) + } +} + +#[cfg(not(target_arch = "spirv"))] +impl Debug for bf16 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + write!(f, "{:?}", self.to_f32()) + } +} + +#[cfg(not(target_arch = "spirv"))] +impl Display for bf16 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + write!(f, "{}", self.to_f32()) + } +} + +#[cfg(not(target_arch = "spirv"))] +impl LowerExp for bf16 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + write!(f, "{:e}", self.to_f32()) + } +} + +#[cfg(not(target_arch = "spirv"))] +impl UpperExp for bf16 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + write!(f, "{:E}", self.to_f32()) + } +} + +#[cfg(not(target_arch = "spirv"))] +impl Binary for bf16 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + write!(f, "{:b}", self.0) + } +} + +#[cfg(not(target_arch = "spirv"))] +impl Octal for bf16 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + write!(f, "{:o}", self.0) + } +} + +#[cfg(not(target_arch = "spirv"))] +impl LowerHex for bf16 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + write!(f, "{:x}", self.0) + } +} + +#[cfg(not(target_arch = "spirv"))] +impl UpperHex for bf16 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + write!(f, "{:X}", self.0) + } +} + +impl Neg for bf16 { + type Output = Self; + + fn neg(self) -> Self::Output { + Self(self.0 ^ 0x8000) + } +} + +impl Neg for &bf16 { + type Output = <bf16 as Neg>::Output; + + #[inline] + fn neg(self) -> Self::Output { + Neg::neg(*self) + } +} + +impl Add for bf16 { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + Self::from_f32(Self::to_f32(self) + Self::to_f32(rhs)) + } +} + +impl Add<&bf16> for bf16 { + type Output = <bf16 as Add<bf16>>::Output; + + #[inline] + fn add(self, rhs: &bf16) -> Self::Output { + self.add(*rhs) + } +} + +impl Add<&bf16> for &bf16 { + type Output = <bf16 as Add<bf16>>::Output; + + #[inline] + fn add(self, rhs: &bf16) -> Self::Output { + (*self).add(*rhs) + } +} + +impl Add<bf16> for &bf16 { + type Output = <bf16 as Add<bf16>>::Output; + + #[inline] + fn add(self, rhs: bf16) -> Self::Output { + (*self).add(rhs) + } +} + +impl AddAssign for bf16 { + #[inline] + fn add_assign(&mut self, rhs: Self) { + *self = (*self).add(rhs); + } +} + +impl AddAssign<&bf16> for bf16 { + #[inline] + fn add_assign(&mut self, rhs: &bf16) { + *self = (*self).add(rhs); + } +} + +impl Sub for bf16 { + type Output = Self; + + fn sub(self, rhs: Self) -> Self::Output { + Self::from_f32(Self::to_f32(self) - Self::to_f32(rhs)) + } +} + +impl Sub<&bf16> for bf16 { + type Output = <bf16 as Sub<bf16>>::Output; + + #[inline] + fn sub(self, rhs: &bf16) -> Self::Output { + self.sub(*rhs) + } +} + +impl Sub<&bf16> for &bf16 { + type Output = <bf16 as Sub<bf16>>::Output; + + #[inline] + fn sub(self, rhs: &bf16) -> Self::Output { + (*self).sub(*rhs) + } +} + +impl Sub<bf16> for &bf16 { + type Output = <bf16 as Sub<bf16>>::Output; + + #[inline] + fn sub(self, rhs: bf16) -> Self::Output { + (*self).sub(rhs) + } +} + +impl SubAssign for bf16 { + #[inline] + fn sub_assign(&mut self, rhs: Self) { + *self = (*self).sub(rhs); + } +} + +impl SubAssign<&bf16> for bf16 { + #[inline] + fn sub_assign(&mut self, rhs: &bf16) { + *self = (*self).sub(rhs); + } +} + +impl Mul for bf16 { + type Output = Self; + + fn mul(self, rhs: Self) -> Self::Output { + Self::from_f32(Self::to_f32(self) * Self::to_f32(rhs)) + } +} + +impl Mul<&bf16> for bf16 { + type Output = <bf16 as Mul<bf16>>::Output; + + #[inline] + fn mul(self, rhs: &bf16) -> Self::Output { + self.mul(*rhs) + } +} + +impl Mul<&bf16> for &bf16 { + type Output = <bf16 as Mul<bf16>>::Output; + + #[inline] + fn mul(self, rhs: &bf16) -> Self::Output { + (*self).mul(*rhs) + } +} + +impl Mul<bf16> for &bf16 { + type Output = <bf16 as Mul<bf16>>::Output; + + #[inline] + fn mul(self, rhs: bf16) -> Self::Output { + (*self).mul(rhs) + } +} + +impl MulAssign for bf16 { + #[inline] + fn mul_assign(&mut self, rhs: Self) { + *self = (*self).mul(rhs); + } +} + +impl MulAssign<&bf16> for bf16 { + #[inline] + fn mul_assign(&mut self, rhs: &bf16) { + *self = (*self).mul(rhs); + } +} + +impl Div for bf16 { + type Output = Self; + + fn div(self, rhs: Self) -> Self::Output { + Self::from_f32(Self::to_f32(self) / Self::to_f32(rhs)) + } +} + +impl Div<&bf16> for bf16 { + type Output = <bf16 as Div<bf16>>::Output; + + #[inline] + fn div(self, rhs: &bf16) -> Self::Output { + self.div(*rhs) + } +} + +impl Div<&bf16> for &bf16 { + type Output = <bf16 as Div<bf16>>::Output; + + #[inline] + fn div(self, rhs: &bf16) -> Self::Output { + (*self).div(*rhs) + } +} + +impl Div<bf16> for &bf16 { + type Output = <bf16 as Div<bf16>>::Output; + + #[inline] + fn div(self, rhs: bf16) -> Self::Output { + (*self).div(rhs) + } +} + +impl DivAssign for bf16 { + #[inline] + fn div_assign(&mut self, rhs: Self) { + *self = (*self).div(rhs); + } +} + +impl DivAssign<&bf16> for bf16 { + #[inline] + fn div_assign(&mut self, rhs: &bf16) { + *self = (*self).div(rhs); + } +} + +impl Rem for bf16 { + type Output = Self; + + fn rem(self, rhs: Self) -> Self::Output { + Self::from_f32(Self::to_f32(self) % Self::to_f32(rhs)) + } +} + +impl Rem<&bf16> for bf16 { + type Output = <bf16 as Rem<bf16>>::Output; + + #[inline] + fn rem(self, rhs: &bf16) -> Self::Output { + self.rem(*rhs) + } +} + +impl Rem<&bf16> for &bf16 { + type Output = <bf16 as Rem<bf16>>::Output; + + #[inline] + fn rem(self, rhs: &bf16) -> Self::Output { + (*self).rem(*rhs) + } +} + +impl Rem<bf16> for &bf16 { + type Output = <bf16 as Rem<bf16>>::Output; + + #[inline] + fn rem(self, rhs: bf16) -> Self::Output { + (*self).rem(rhs) + } +} + +impl RemAssign for bf16 { + #[inline] + fn rem_assign(&mut self, rhs: Self) { + *self = (*self).rem(rhs); + } +} + +impl RemAssign<&bf16> for bf16 { + #[inline] + fn rem_assign(&mut self, rhs: &bf16) { + *self = (*self).rem(rhs); + } +} + +impl Product for bf16 { + #[inline] + fn product<I: Iterator<Item = Self>>(iter: I) -> Self { + bf16::from_f32(iter.map(|f| f.to_f32()).product()) + } +} + +impl<'a> Product<&'a bf16> for bf16 { + #[inline] + fn product<I: Iterator<Item = &'a bf16>>(iter: I) -> Self { + bf16::from_f32(iter.map(|f| f.to_f32()).product()) + } +} + +impl Sum for bf16 { + #[inline] + fn sum<I: Iterator<Item = Self>>(iter: I) -> Self { + bf16::from_f32(iter.map(|f| f.to_f32()).sum()) + } +} + +impl<'a> Sum<&'a bf16> for bf16 { + #[inline] + fn sum<I: Iterator<Item = &'a bf16>>(iter: I) -> Self { + bf16::from_f32(iter.map(|f| f.to_f32()).product()) + } +} + +#[cfg(feature = "serde")] +struct Visitor; + +#[cfg(feature = "serde")] +impl<'de> Deserialize<'de> for bf16 { + fn deserialize<D>(deserializer: D) -> Result<bf16, D::Error> + where + D: serde::de::Deserializer<'de>, + { + deserializer.deserialize_newtype_struct("bf16", Visitor) + } +} + +#[cfg(feature = "serde")] +impl<'de> serde::de::Visitor<'de> for Visitor { + type Value = bf16; + + fn expecting(&self, formatter: &mut alloc::fmt::Formatter) -> alloc::fmt::Result { + write!(formatter, "tuple struct bf16") + } + + fn visit_newtype_struct<D>(self, deserializer: D) -> Result<Self::Value, D::Error> + where + D: serde::Deserializer<'de>, + { + Ok(bf16(<u16 as Deserialize>::deserialize(deserializer)?)) + } + + fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> + where + E: serde::de::Error, + { + v.parse().map_err(|_| { + serde::de::Error::invalid_value(serde::de::Unexpected::Str(v), &"a float string") + }) + } + + fn visit_f32<E>(self, v: f32) -> Result<Self::Value, E> + where + E: serde::de::Error, + { + Ok(bf16::from_f32(v)) + } + + fn visit_f64<E>(self, v: f64) -> Result<Self::Value, E> + where + E: serde::de::Error, + { + Ok(bf16::from_f64(v)) + } +} + +#[allow( + clippy::cognitive_complexity, + clippy::float_cmp, + clippy::neg_cmp_op_on_partial_ord +)] +#[cfg(test)] +mod test { + use super::*; + use core::cmp::Ordering; + #[cfg(feature = "num-traits")] + use num_traits::{AsPrimitive, FromPrimitive, ToPrimitive}; + use quickcheck_macros::quickcheck; + + #[cfg(feature = "num-traits")] + #[test] + fn as_primitive() { + let two = bf16::from_f32(2.0); + assert_eq!(<i32 as AsPrimitive<bf16>>::as_(2), two); + assert_eq!(<bf16 as AsPrimitive<i32>>::as_(two), 2); + + assert_eq!(<f32 as AsPrimitive<bf16>>::as_(2.0), two); + assert_eq!(<bf16 as AsPrimitive<f32>>::as_(two), 2.0); + + assert_eq!(<f64 as AsPrimitive<bf16>>::as_(2.0), two); + assert_eq!(<bf16 as AsPrimitive<f64>>::as_(two), 2.0); + } + + #[cfg(feature = "num-traits")] + #[test] + fn to_primitive() { + let two = bf16::from_f32(2.0); + assert_eq!(ToPrimitive::to_i32(&two).unwrap(), 2i32); + assert_eq!(ToPrimitive::to_f32(&two).unwrap(), 2.0f32); + assert_eq!(ToPrimitive::to_f64(&two).unwrap(), 2.0f64); + } + + #[cfg(feature = "num-traits")] + #[test] + fn from_primitive() { + let two = bf16::from_f32(2.0); + assert_eq!(<bf16 as FromPrimitive>::from_i32(2).unwrap(), two); + assert_eq!(<bf16 as FromPrimitive>::from_f32(2.0).unwrap(), two); + assert_eq!(<bf16 as FromPrimitive>::from_f64(2.0).unwrap(), two); + } + + #[test] + fn test_bf16_consts_from_f32() { + let one = bf16::from_f32(1.0); + let zero = bf16::from_f32(0.0); + let neg_zero = bf16::from_f32(-0.0); + let neg_one = bf16::from_f32(-1.0); + let inf = bf16::from_f32(core::f32::INFINITY); + let neg_inf = bf16::from_f32(core::f32::NEG_INFINITY); + let nan = bf16::from_f32(core::f32::NAN); + + assert_eq!(bf16::ONE, one); + assert_eq!(bf16::ZERO, zero); + assert!(zero.is_sign_positive()); + assert_eq!(bf16::NEG_ZERO, neg_zero); + assert!(neg_zero.is_sign_negative()); + assert_eq!(bf16::NEG_ONE, neg_one); + assert!(neg_one.is_sign_negative()); + assert_eq!(bf16::INFINITY, inf); + assert_eq!(bf16::NEG_INFINITY, neg_inf); + assert!(nan.is_nan()); + assert!(bf16::NAN.is_nan()); + + let e = bf16::from_f32(core::f32::consts::E); + let pi = bf16::from_f32(core::f32::consts::PI); + let frac_1_pi = bf16::from_f32(core::f32::consts::FRAC_1_PI); + let frac_1_sqrt_2 = bf16::from_f32(core::f32::consts::FRAC_1_SQRT_2); + let frac_2_pi = bf16::from_f32(core::f32::consts::FRAC_2_PI); + let frac_2_sqrt_pi = bf16::from_f32(core::f32::consts::FRAC_2_SQRT_PI); + let frac_pi_2 = bf16::from_f32(core::f32::consts::FRAC_PI_2); + let frac_pi_3 = bf16::from_f32(core::f32::consts::FRAC_PI_3); + let frac_pi_4 = bf16::from_f32(core::f32::consts::FRAC_PI_4); + let frac_pi_6 = bf16::from_f32(core::f32::consts::FRAC_PI_6); + let frac_pi_8 = bf16::from_f32(core::f32::consts::FRAC_PI_8); + let ln_10 = bf16::from_f32(core::f32::consts::LN_10); + let ln_2 = bf16::from_f32(core::f32::consts::LN_2); + let log10_e = bf16::from_f32(core::f32::consts::LOG10_E); + // core::f32::consts::LOG10_2 requires rustc 1.43.0 + let log10_2 = bf16::from_f32(2f32.log10()); + let log2_e = bf16::from_f32(core::f32::consts::LOG2_E); + // core::f32::consts::LOG2_10 requires rustc 1.43.0 + let log2_10 = bf16::from_f32(10f32.log2()); + let sqrt_2 = bf16::from_f32(core::f32::consts::SQRT_2); + + assert_eq!(bf16::E, e); + assert_eq!(bf16::PI, pi); + assert_eq!(bf16::FRAC_1_PI, frac_1_pi); + assert_eq!(bf16::FRAC_1_SQRT_2, frac_1_sqrt_2); + assert_eq!(bf16::FRAC_2_PI, frac_2_pi); + assert_eq!(bf16::FRAC_2_SQRT_PI, frac_2_sqrt_pi); + assert_eq!(bf16::FRAC_PI_2, frac_pi_2); + assert_eq!(bf16::FRAC_PI_3, frac_pi_3); + assert_eq!(bf16::FRAC_PI_4, frac_pi_4); + assert_eq!(bf16::FRAC_PI_6, frac_pi_6); + assert_eq!(bf16::FRAC_PI_8, frac_pi_8); + assert_eq!(bf16::LN_10, ln_10); + assert_eq!(bf16::LN_2, ln_2); + assert_eq!(bf16::LOG10_E, log10_e); + assert_eq!(bf16::LOG10_2, log10_2); + assert_eq!(bf16::LOG2_E, log2_e); + assert_eq!(bf16::LOG2_10, log2_10); + assert_eq!(bf16::SQRT_2, sqrt_2); + } + + #[test] + fn test_bf16_consts_from_f64() { + let one = bf16::from_f64(1.0); + let zero = bf16::from_f64(0.0); + let neg_zero = bf16::from_f64(-0.0); + let inf = bf16::from_f64(core::f64::INFINITY); + let neg_inf = bf16::from_f64(core::f64::NEG_INFINITY); + let nan = bf16::from_f64(core::f64::NAN); + + assert_eq!(bf16::ONE, one); + assert_eq!(bf16::ZERO, zero); + assert_eq!(bf16::NEG_ZERO, neg_zero); + assert_eq!(bf16::INFINITY, inf); + assert_eq!(bf16::NEG_INFINITY, neg_inf); + assert!(nan.is_nan()); + assert!(bf16::NAN.is_nan()); + + let e = bf16::from_f64(core::f64::consts::E); + let pi = bf16::from_f64(core::f64::consts::PI); + let frac_1_pi = bf16::from_f64(core::f64::consts::FRAC_1_PI); + let frac_1_sqrt_2 = bf16::from_f64(core::f64::consts::FRAC_1_SQRT_2); + let frac_2_pi = bf16::from_f64(core::f64::consts::FRAC_2_PI); + let frac_2_sqrt_pi = bf16::from_f64(core::f64::consts::FRAC_2_SQRT_PI); + let frac_pi_2 = bf16::from_f64(core::f64::consts::FRAC_PI_2); + let frac_pi_3 = bf16::from_f64(core::f64::consts::FRAC_PI_3); + let frac_pi_4 = bf16::from_f64(core::f64::consts::FRAC_PI_4); + let frac_pi_6 = bf16::from_f64(core::f64::consts::FRAC_PI_6); + let frac_pi_8 = bf16::from_f64(core::f64::consts::FRAC_PI_8); + let ln_10 = bf16::from_f64(core::f64::consts::LN_10); + let ln_2 = bf16::from_f64(core::f64::consts::LN_2); + let log10_e = bf16::from_f64(core::f64::consts::LOG10_E); + // core::f64::consts::LOG10_2 requires rustc 1.43.0 + let log10_2 = bf16::from_f64(2f64.log10()); + let log2_e = bf16::from_f64(core::f64::consts::LOG2_E); + // core::f64::consts::LOG2_10 requires rustc 1.43.0 + let log2_10 = bf16::from_f64(10f64.log2()); + let sqrt_2 = bf16::from_f64(core::f64::consts::SQRT_2); + + assert_eq!(bf16::E, e); + assert_eq!(bf16::PI, pi); + assert_eq!(bf16::FRAC_1_PI, frac_1_pi); + assert_eq!(bf16::FRAC_1_SQRT_2, frac_1_sqrt_2); + assert_eq!(bf16::FRAC_2_PI, frac_2_pi); + assert_eq!(bf16::FRAC_2_SQRT_PI, frac_2_sqrt_pi); + assert_eq!(bf16::FRAC_PI_2, frac_pi_2); + assert_eq!(bf16::FRAC_PI_3, frac_pi_3); + assert_eq!(bf16::FRAC_PI_4, frac_pi_4); + assert_eq!(bf16::FRAC_PI_6, frac_pi_6); + assert_eq!(bf16::FRAC_PI_8, frac_pi_8); + assert_eq!(bf16::LN_10, ln_10); + assert_eq!(bf16::LN_2, ln_2); + assert_eq!(bf16::LOG10_E, log10_e); + assert_eq!(bf16::LOG10_2, log10_2); + assert_eq!(bf16::LOG2_E, log2_e); + assert_eq!(bf16::LOG2_10, log2_10); + assert_eq!(bf16::SQRT_2, sqrt_2); + } + + #[test] + fn test_nan_conversion_to_smaller() { + let nan64 = f64::from_bits(0x7FF0_0000_0000_0001u64); + let neg_nan64 = f64::from_bits(0xFFF0_0000_0000_0001u64); + let nan32 = f32::from_bits(0x7F80_0001u32); + let neg_nan32 = f32::from_bits(0xFF80_0001u32); + let nan32_from_64 = nan64 as f32; + let neg_nan32_from_64 = neg_nan64 as f32; + let nan16_from_64 = bf16::from_f64(nan64); + let neg_nan16_from_64 = bf16::from_f64(neg_nan64); + let nan16_from_32 = bf16::from_f32(nan32); + let neg_nan16_from_32 = bf16::from_f32(neg_nan32); + + assert!(nan64.is_nan() && nan64.is_sign_positive()); + assert!(neg_nan64.is_nan() && neg_nan64.is_sign_negative()); + assert!(nan32.is_nan() && nan32.is_sign_positive()); + assert!(neg_nan32.is_nan() && neg_nan32.is_sign_negative()); + assert!(nan32_from_64.is_nan() && nan32_from_64.is_sign_positive()); + assert!(neg_nan32_from_64.is_nan() && neg_nan32_from_64.is_sign_negative()); + assert!(nan16_from_64.is_nan() && nan16_from_64.is_sign_positive()); + assert!(neg_nan16_from_64.is_nan() && neg_nan16_from_64.is_sign_negative()); + assert!(nan16_from_32.is_nan() && nan16_from_32.is_sign_positive()); + assert!(neg_nan16_from_32.is_nan() && neg_nan16_from_32.is_sign_negative()); + } + + #[test] + fn test_nan_conversion_to_larger() { + let nan16 = bf16::from_bits(0x7F81u16); + let neg_nan16 = bf16::from_bits(0xFF81u16); + let nan32 = f32::from_bits(0x7F80_0001u32); + let neg_nan32 = f32::from_bits(0xFF80_0001u32); + let nan32_from_16 = f32::from(nan16); + let neg_nan32_from_16 = f32::from(neg_nan16); + let nan64_from_16 = f64::from(nan16); + let neg_nan64_from_16 = f64::from(neg_nan16); + let nan64_from_32 = f64::from(nan32); + let neg_nan64_from_32 = f64::from(neg_nan32); + + assert!(nan16.is_nan() && nan16.is_sign_positive()); + assert!(neg_nan16.is_nan() && neg_nan16.is_sign_negative()); + assert!(nan32.is_nan() && nan32.is_sign_positive()); + assert!(neg_nan32.is_nan() && neg_nan32.is_sign_negative()); + assert!(nan32_from_16.is_nan() && nan32_from_16.is_sign_positive()); + assert!(neg_nan32_from_16.is_nan() && neg_nan32_from_16.is_sign_negative()); + assert!(nan64_from_16.is_nan() && nan64_from_16.is_sign_positive()); + assert!(neg_nan64_from_16.is_nan() && neg_nan64_from_16.is_sign_negative()); + assert!(nan64_from_32.is_nan() && nan64_from_32.is_sign_positive()); + assert!(neg_nan64_from_32.is_nan() && neg_nan64_from_32.is_sign_negative()); + } + + #[test] + fn test_bf16_to_f32() { + let f = bf16::from_f32(7.0); + assert_eq!(f.to_f32(), 7.0f32); + + // 7.1 is NOT exactly representable in 16-bit, it's rounded + let f = bf16::from_f32(7.1); + let diff = (f.to_f32() - 7.1f32).abs(); + // diff must be <= 4 * EPSILON, as 7 has two more significant bits than 1 + assert!(diff <= 4.0 * bf16::EPSILON.to_f32()); + + let tiny32 = f32::from_bits(0x0001_0000u32); + assert_eq!(bf16::from_bits(0x0001).to_f32(), tiny32); + assert_eq!(bf16::from_bits(0x0005).to_f32(), 5.0 * tiny32); + + assert_eq!(bf16::from_bits(0x0001), bf16::from_f32(tiny32)); + assert_eq!(bf16::from_bits(0x0005), bf16::from_f32(5.0 * tiny32)); + } + + #[test] + fn test_bf16_to_f64() { + let f = bf16::from_f64(7.0); + assert_eq!(f.to_f64(), 7.0f64); + + // 7.1 is NOT exactly representable in 16-bit, it's rounded + let f = bf16::from_f64(7.1); + let diff = (f.to_f64() - 7.1f64).abs(); + // diff must be <= 4 * EPSILON, as 7 has two more significant bits than 1 + assert!(diff <= 4.0 * bf16::EPSILON.to_f64()); + + let tiny64 = 2.0f64.powi(-133); + assert_eq!(bf16::from_bits(0x0001).to_f64(), tiny64); + assert_eq!(bf16::from_bits(0x0005).to_f64(), 5.0 * tiny64); + + assert_eq!(bf16::from_bits(0x0001), bf16::from_f64(tiny64)); + assert_eq!(bf16::from_bits(0x0005), bf16::from_f64(5.0 * tiny64)); + } + + #[test] + fn test_comparisons() { + let zero = bf16::from_f64(0.0); + let one = bf16::from_f64(1.0); + let neg_zero = bf16::from_f64(-0.0); + let neg_one = bf16::from_f64(-1.0); + + assert_eq!(zero.partial_cmp(&neg_zero), Some(Ordering::Equal)); + assert_eq!(neg_zero.partial_cmp(&zero), Some(Ordering::Equal)); + assert!(zero == neg_zero); + assert!(neg_zero == zero); + assert!(!(zero != neg_zero)); + assert!(!(neg_zero != zero)); + assert!(!(zero < neg_zero)); + assert!(!(neg_zero < zero)); + assert!(zero <= neg_zero); + assert!(neg_zero <= zero); + assert!(!(zero > neg_zero)); + assert!(!(neg_zero > zero)); + assert!(zero >= neg_zero); + assert!(neg_zero >= zero); + + assert_eq!(one.partial_cmp(&neg_zero), Some(Ordering::Greater)); + assert_eq!(neg_zero.partial_cmp(&one), Some(Ordering::Less)); + assert!(!(one == neg_zero)); + assert!(!(neg_zero == one)); + assert!(one != neg_zero); + assert!(neg_zero != one); + assert!(!(one < neg_zero)); + assert!(neg_zero < one); + assert!(!(one <= neg_zero)); + assert!(neg_zero <= one); + assert!(one > neg_zero); + assert!(!(neg_zero > one)); + assert!(one >= neg_zero); + assert!(!(neg_zero >= one)); + + assert_eq!(one.partial_cmp(&neg_one), Some(Ordering::Greater)); + assert_eq!(neg_one.partial_cmp(&one), Some(Ordering::Less)); + assert!(!(one == neg_one)); + assert!(!(neg_one == one)); + assert!(one != neg_one); + assert!(neg_one != one); + assert!(!(one < neg_one)); + assert!(neg_one < one); + assert!(!(one <= neg_one)); + assert!(neg_one <= one); + assert!(one > neg_one); + assert!(!(neg_one > one)); + assert!(one >= neg_one); + assert!(!(neg_one >= one)); + } + + #[test] + #[allow(clippy::erasing_op, clippy::identity_op)] + fn round_to_even_f32() { + // smallest positive subnormal = 0b0.0000_001 * 2^-126 = 2^-133 + let min_sub = bf16::from_bits(1); + let min_sub_f = (-133f32).exp2(); + assert_eq!(bf16::from_f32(min_sub_f).to_bits(), min_sub.to_bits()); + assert_eq!(f32::from(min_sub).to_bits(), min_sub_f.to_bits()); + + // 0.0000000_011111 rounded to 0.0000000 (< tie, no rounding) + // 0.0000000_100000 rounded to 0.0000000 (tie and even, remains at even) + // 0.0000000_100001 rounded to 0.0000001 (> tie, rounds up) + assert_eq!( + bf16::from_f32(min_sub_f * 0.49).to_bits(), + min_sub.to_bits() * 0 + ); + assert_eq!( + bf16::from_f32(min_sub_f * 0.50).to_bits(), + min_sub.to_bits() * 0 + ); + assert_eq!( + bf16::from_f32(min_sub_f * 0.51).to_bits(), + min_sub.to_bits() * 1 + ); + + // 0.0000001_011111 rounded to 0.0000001 (< tie, no rounding) + // 0.0000001_100000 rounded to 0.0000010 (tie and odd, rounds up to even) + // 0.0000001_100001 rounded to 0.0000010 (> tie, rounds up) + assert_eq!( + bf16::from_f32(min_sub_f * 1.49).to_bits(), + min_sub.to_bits() * 1 + ); + assert_eq!( + bf16::from_f32(min_sub_f * 1.50).to_bits(), + min_sub.to_bits() * 2 + ); + assert_eq!( + bf16::from_f32(min_sub_f * 1.51).to_bits(), + min_sub.to_bits() * 2 + ); + + // 0.0000010_011111 rounded to 0.0000010 (< tie, no rounding) + // 0.0000010_100000 rounded to 0.0000010 (tie and even, remains at even) + // 0.0000010_100001 rounded to 0.0000011 (> tie, rounds up) + assert_eq!( + bf16::from_f32(min_sub_f * 2.49).to_bits(), + min_sub.to_bits() * 2 + ); + assert_eq!( + bf16::from_f32(min_sub_f * 2.50).to_bits(), + min_sub.to_bits() * 2 + ); + assert_eq!( + bf16::from_f32(min_sub_f * 2.51).to_bits(), + min_sub.to_bits() * 3 + ); + + assert_eq!( + bf16::from_f32(250.49f32).to_bits(), + bf16::from_f32(250.0).to_bits() + ); + assert_eq!( + bf16::from_f32(250.50f32).to_bits(), + bf16::from_f32(250.0).to_bits() + ); + assert_eq!( + bf16::from_f32(250.51f32).to_bits(), + bf16::from_f32(251.0).to_bits() + ); + assert_eq!( + bf16::from_f32(251.49f32).to_bits(), + bf16::from_f32(251.0).to_bits() + ); + assert_eq!( + bf16::from_f32(251.50f32).to_bits(), + bf16::from_f32(252.0).to_bits() + ); + assert_eq!( + bf16::from_f32(251.51f32).to_bits(), + bf16::from_f32(252.0).to_bits() + ); + assert_eq!( + bf16::from_f32(252.49f32).to_bits(), + bf16::from_f32(252.0).to_bits() + ); + assert_eq!( + bf16::from_f32(252.50f32).to_bits(), + bf16::from_f32(252.0).to_bits() + ); + assert_eq!( + bf16::from_f32(252.51f32).to_bits(), + bf16::from_f32(253.0).to_bits() + ); + } + + #[test] + #[allow(clippy::erasing_op, clippy::identity_op)] + fn round_to_even_f64() { + // smallest positive subnormal = 0b0.0000_001 * 2^-126 = 2^-133 + let min_sub = bf16::from_bits(1); + let min_sub_f = (-133f64).exp2(); + assert_eq!(bf16::from_f64(min_sub_f).to_bits(), min_sub.to_bits()); + assert_eq!(f64::from(min_sub).to_bits(), min_sub_f.to_bits()); + + // 0.0000000_011111 rounded to 0.0000000 (< tie, no rounding) + // 0.0000000_100000 rounded to 0.0000000 (tie and even, remains at even) + // 0.0000000_100001 rounded to 0.0000001 (> tie, rounds up) + assert_eq!( + bf16::from_f64(min_sub_f * 0.49).to_bits(), + min_sub.to_bits() * 0 + ); + assert_eq!( + bf16::from_f64(min_sub_f * 0.50).to_bits(), + min_sub.to_bits() * 0 + ); + assert_eq!( + bf16::from_f64(min_sub_f * 0.51).to_bits(), + min_sub.to_bits() * 1 + ); + + // 0.0000001_011111 rounded to 0.0000001 (< tie, no rounding) + // 0.0000001_100000 rounded to 0.0000010 (tie and odd, rounds up to even) + // 0.0000001_100001 rounded to 0.0000010 (> tie, rounds up) + assert_eq!( + bf16::from_f64(min_sub_f * 1.49).to_bits(), + min_sub.to_bits() * 1 + ); + assert_eq!( + bf16::from_f64(min_sub_f * 1.50).to_bits(), + min_sub.to_bits() * 2 + ); + assert_eq!( + bf16::from_f64(min_sub_f * 1.51).to_bits(), + min_sub.to_bits() * 2 + ); + + // 0.0000010_011111 rounded to 0.0000010 (< tie, no rounding) + // 0.0000010_100000 rounded to 0.0000010 (tie and even, remains at even) + // 0.0000010_100001 rounded to 0.0000011 (> tie, rounds up) + assert_eq!( + bf16::from_f64(min_sub_f * 2.49).to_bits(), + min_sub.to_bits() * 2 + ); + assert_eq!( + bf16::from_f64(min_sub_f * 2.50).to_bits(), + min_sub.to_bits() * 2 + ); + assert_eq!( + bf16::from_f64(min_sub_f * 2.51).to_bits(), + min_sub.to_bits() * 3 + ); + + assert_eq!( + bf16::from_f64(250.49f64).to_bits(), + bf16::from_f64(250.0).to_bits() + ); + assert_eq!( + bf16::from_f64(250.50f64).to_bits(), + bf16::from_f64(250.0).to_bits() + ); + assert_eq!( + bf16::from_f64(250.51f64).to_bits(), + bf16::from_f64(251.0).to_bits() + ); + assert_eq!( + bf16::from_f64(251.49f64).to_bits(), + bf16::from_f64(251.0).to_bits() + ); + assert_eq!( + bf16::from_f64(251.50f64).to_bits(), + bf16::from_f64(252.0).to_bits() + ); + assert_eq!( + bf16::from_f64(251.51f64).to_bits(), + bf16::from_f64(252.0).to_bits() + ); + assert_eq!( + bf16::from_f64(252.49f64).to_bits(), + bf16::from_f64(252.0).to_bits() + ); + assert_eq!( + bf16::from_f64(252.50f64).to_bits(), + bf16::from_f64(252.0).to_bits() + ); + assert_eq!( + bf16::from_f64(252.51f64).to_bits(), + bf16::from_f64(253.0).to_bits() + ); + } + + impl quickcheck::Arbitrary for bf16 { + fn arbitrary(g: &mut quickcheck::Gen) -> Self { + bf16(u16::arbitrary(g)) + } + } + + #[quickcheck] + fn qc_roundtrip_bf16_f32_is_identity(f: bf16) -> bool { + let roundtrip = bf16::from_f32(f.to_f32()); + if f.is_nan() { + roundtrip.is_nan() && f.is_sign_negative() == roundtrip.is_sign_negative() + } else { + f.0 == roundtrip.0 + } + } + + #[quickcheck] + fn qc_roundtrip_bf16_f64_is_identity(f: bf16) -> bool { + let roundtrip = bf16::from_f64(f.to_f64()); + if f.is_nan() { + roundtrip.is_nan() && f.is_sign_negative() == roundtrip.is_sign_negative() + } else { + f.0 == roundtrip.0 + } + } +} diff --git a/vendor/half/src/bfloat/convert.rs b/vendor/half/src/bfloat/convert.rs new file mode 100644 index 0000000..8f258f5 --- /dev/null +++ b/vendor/half/src/bfloat/convert.rs @@ -0,0 +1,148 @@ +use crate::leading_zeros::leading_zeros_u16; +use core::mem; + +#[inline] +pub(crate) const fn f32_to_bf16(value: f32) -> u16 { + // TODO: Replace mem::transmute with to_bits() once to_bits is const-stabilized + // Convert to raw bytes + let x: u32 = unsafe { mem::transmute(value) }; + + // check for NaN + if x & 0x7FFF_FFFFu32 > 0x7F80_0000u32 { + // Keep high part of current mantissa but also set most significiant mantissa bit + return ((x >> 16) | 0x0040u32) as u16; + } + + // round and shift + let round_bit = 0x0000_8000u32; + if (x & round_bit) != 0 && (x & (3 * round_bit - 1)) != 0 { + (x >> 16) as u16 + 1 + } else { + (x >> 16) as u16 + } +} + +#[inline] +pub(crate) const fn f64_to_bf16(value: f64) -> u16 { + // TODO: Replace mem::transmute with to_bits() once to_bits is const-stabilized + // Convert to raw bytes, truncating the last 32-bits of mantissa; that precision will always + // be lost on half-precision. + let val: u64 = unsafe { mem::transmute(value) }; + let x = (val >> 32) as u32; + + // Extract IEEE754 components + let sign = x & 0x8000_0000u32; + let exp = x & 0x7FF0_0000u32; + let man = x & 0x000F_FFFFu32; + + // Check for all exponent bits being set, which is Infinity or NaN + if exp == 0x7FF0_0000u32 { + // Set mantissa MSB for NaN (and also keep shifted mantissa bits). + // We also have to check the last 32 bits. + let nan_bit = if man == 0 && (val as u32 == 0) { + 0 + } else { + 0x0040u32 + }; + return ((sign >> 16) | 0x7F80u32 | nan_bit | (man >> 13)) as u16; + } + + // The number is normalized, start assembling half precision version + let half_sign = sign >> 16; + // Unbias the exponent, then bias for bfloat16 precision + let unbiased_exp = ((exp >> 20) as i64) - 1023; + let half_exp = unbiased_exp + 127; + + // Check for exponent overflow, return +infinity + if half_exp >= 0xFF { + return (half_sign | 0x7F80u32) as u16; + } + + // Check for underflow + if half_exp <= 0 { + // Check mantissa for what we can do + if 7 - half_exp > 21 { + // No rounding possibility, so this is a full underflow, return signed zero + return half_sign as u16; + } + // Don't forget about hidden leading mantissa bit when assembling mantissa + let man = man | 0x0010_0000u32; + let mut half_man = man >> (14 - half_exp); + // Check for rounding + let round_bit = 1 << (13 - half_exp); + if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 { + half_man += 1; + } + // No exponent for subnormals + return (half_sign | half_man) as u16; + } + + // Rebias the exponent + let half_exp = (half_exp as u32) << 7; + let half_man = man >> 13; + // Check for rounding + let round_bit = 0x0000_1000u32; + if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 { + // Round it + ((half_sign | half_exp | half_man) + 1) as u16 + } else { + (half_sign | half_exp | half_man) as u16 + } +} + +#[inline] +pub(crate) const fn bf16_to_f32(i: u16) -> f32 { + // TODO: Replace mem::transmute with from_bits() once from_bits is const-stabilized + // If NaN, keep current mantissa but also set most significiant mantissa bit + if i & 0x7FFFu16 > 0x7F80u16 { + unsafe { mem::transmute((i as u32 | 0x0040u32) << 16) } + } else { + unsafe { mem::transmute((i as u32) << 16) } + } +} + +#[inline] +pub(crate) const fn bf16_to_f64(i: u16) -> f64 { + // TODO: Replace mem::transmute with from_bits() once from_bits is const-stabilized + // Check for signed zero + if i & 0x7FFFu16 == 0 { + return unsafe { mem::transmute((i as u64) << 48) }; + } + + let half_sign = (i & 0x8000u16) as u64; + let half_exp = (i & 0x7F80u16) as u64; + let half_man = (i & 0x007Fu16) as u64; + + // Check for an infinity or NaN when all exponent bits set + if half_exp == 0x7F80u64 { + // Check for signed infinity if mantissa is zero + if half_man == 0 { + return unsafe { mem::transmute((half_sign << 48) | 0x7FF0_0000_0000_0000u64) }; + } else { + // NaN, keep current mantissa but also set most significiant mantissa bit + return unsafe { + mem::transmute((half_sign << 48) | 0x7FF8_0000_0000_0000u64 | (half_man << 45)) + }; + } + } + + // Calculate double-precision components with adjusted exponent + let sign = half_sign << 48; + // Unbias exponent + let unbiased_exp = ((half_exp as i64) >> 7) - 127; + + // Check for subnormals, which will be normalized by adjusting exponent + if half_exp == 0 { + // Calculate how much to adjust the exponent by + let e = leading_zeros_u16(half_man as u16) - 9; + + // Rebias and adjust exponent + let exp = ((1023 - 127 - e) as u64) << 52; + let man = (half_man << (46 + e)) & 0xF_FFFF_FFFF_FFFFu64; + return unsafe { mem::transmute(sign | exp | man) }; + } + // Rebias exponent for a normalized normal + let exp = ((unbiased_exp + 1023) as u64) << 52; + let man = (half_man & 0x007Fu64) << 45; + unsafe { mem::transmute(sign | exp | man) } +} diff --git a/vendor/half/src/binary16.rs b/vendor/half/src/binary16.rs new file mode 100644 index 0000000..b622f01 --- /dev/null +++ b/vendor/half/src/binary16.rs @@ -0,0 +1,1912 @@ +#[cfg(feature = "bytemuck")] +use bytemuck::{Pod, Zeroable}; +use core::{ + cmp::Ordering, + iter::{Product, Sum}, + num::FpCategory, + ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Rem, RemAssign, Sub, SubAssign}, +}; +#[cfg(not(target_arch = "spirv"))] +use core::{ + fmt::{ + Binary, Debug, Display, Error, Formatter, LowerExp, LowerHex, Octal, UpperExp, UpperHex, + }, + num::ParseFloatError, + str::FromStr, +}; +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; +#[cfg(feature = "zerocopy")] +use zerocopy::{AsBytes, FromBytes}; + +pub(crate) mod convert; + +/// A 16-bit floating point type implementing the IEEE 754-2008 standard [`binary16`] a.k.a `half` +/// format. +/// +/// This 16-bit floating point type is intended for efficient storage where the full range and +/// precision of a larger floating point value is not required. Because [`f16`] is primarily for +/// efficient storage, floating point operations such as addition, multiplication, etc. are not +/// implemented. Operations should be performed with [`f32`] or higher-precision types and converted +/// to/from [`f16`] as necessary. +/// +/// [`binary16`]: https://en.wikipedia.org/wiki/Half-precision_floating-point_format +#[allow(non_camel_case_types)] +#[derive(Clone, Copy, Default)] +#[repr(transparent)] +#[cfg_attr(feature = "serde", derive(Serialize))] +#[cfg_attr(feature = "bytemuck", derive(Zeroable, Pod))] +#[cfg_attr(feature = "zerocopy", derive(AsBytes, FromBytes))] +pub struct f16(u16); + +impl f16 { + /// Constructs a 16-bit floating point value from the raw bits. + #[inline] + #[must_use] + pub const fn from_bits(bits: u16) -> f16 { + f16(bits) + } + + /// Constructs a 16-bit floating point value from a 32-bit floating point value. + /// + /// If the 32-bit value is to large to fit in 16-bits, ±∞ will result. NaN values are + /// preserved. 32-bit subnormal values are too tiny to be represented in 16-bits and result in + /// ±0. Exponents that underflow the minimum 16-bit exponent will result in 16-bit subnormals + /// or ±0. All other values are truncated and rounded to the nearest representable 16-bit + /// value. + #[inline] + #[must_use] + pub fn from_f32(value: f32) -> f16 { + f16(convert::f32_to_f16(value)) + } + + /// Constructs a 16-bit floating point value from a 32-bit floating point value. + /// + /// This function is identical to [`from_f32`][Self::from_f32] except it never uses hardware + /// intrinsics, which allows it to be `const`. [`from_f32`][Self::from_f32] should be preferred + /// in any non-`const` context. + /// + /// If the 32-bit value is to large to fit in 16-bits, ±∞ will result. NaN values are + /// preserved. 32-bit subnormal values are too tiny to be represented in 16-bits and result in + /// ±0. Exponents that underflow the minimum 16-bit exponent will result in 16-bit subnormals + /// or ±0. All other values are truncated and rounded to the nearest representable 16-bit + /// value. + #[inline] + #[must_use] + pub const fn from_f32_const(value: f32) -> f16 { + f16(convert::f32_to_f16_fallback(value)) + } + + /// Constructs a 16-bit floating point value from a 64-bit floating point value. + /// + /// If the 64-bit value is to large to fit in 16-bits, ±∞ will result. NaN values are + /// preserved. 64-bit subnormal values are too tiny to be represented in 16-bits and result in + /// ±0. Exponents that underflow the minimum 16-bit exponent will result in 16-bit subnormals + /// or ±0. All other values are truncated and rounded to the nearest representable 16-bit + /// value. + #[inline] + #[must_use] + pub fn from_f64(value: f64) -> f16 { + f16(convert::f64_to_f16(value)) + } + + /// Constructs a 16-bit floating point value from a 64-bit floating point value. + /// + /// This function is identical to [`from_f64`][Self::from_f64] except it never uses hardware + /// intrinsics, which allows it to be `const`. [`from_f64`][Self::from_f64] should be preferred + /// in any non-`const` context. + /// + /// If the 64-bit value is to large to fit in 16-bits, ±∞ will result. NaN values are + /// preserved. 64-bit subnormal values are too tiny to be represented in 16-bits and result in + /// ±0. Exponents that underflow the minimum 16-bit exponent will result in 16-bit subnormals + /// or ±0. All other values are truncated and rounded to the nearest representable 16-bit + /// value. + #[inline] + #[must_use] + pub const fn from_f64_const(value: f64) -> f16 { + f16(convert::f64_to_f16_fallback(value)) + } + + /// Converts a [`f16`] into the underlying bit representation. + #[inline] + #[must_use] + pub const fn to_bits(self) -> u16 { + self.0 + } + + /// Returns the memory representation of the underlying bit representation as a byte array in + /// little-endian byte order. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// let bytes = f16::from_f32(12.5).to_le_bytes(); + /// assert_eq!(bytes, [0x40, 0x4A]); + /// ``` + #[inline] + #[must_use] + pub const fn to_le_bytes(self) -> [u8; 2] { + self.0.to_le_bytes() + } + + /// Returns the memory representation of the underlying bit representation as a byte array in + /// big-endian (network) byte order. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// let bytes = f16::from_f32(12.5).to_be_bytes(); + /// assert_eq!(bytes, [0x4A, 0x40]); + /// ``` + #[inline] + #[must_use] + pub const fn to_be_bytes(self) -> [u8; 2] { + self.0.to_be_bytes() + } + + /// Returns the memory representation of the underlying bit representation as a byte array in + /// native byte order. + /// + /// As the target platform's native endianness is used, portable code should use + /// [`to_be_bytes`][Self::to_be_bytes] or [`to_le_bytes`][Self::to_le_bytes], as appropriate, + /// instead. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// let bytes = f16::from_f32(12.5).to_ne_bytes(); + /// assert_eq!(bytes, if cfg!(target_endian = "big") { + /// [0x4A, 0x40] + /// } else { + /// [0x40, 0x4A] + /// }); + /// ``` + #[inline] + #[must_use] + pub const fn to_ne_bytes(self) -> [u8; 2] { + self.0.to_ne_bytes() + } + + /// Creates a floating point value from its representation as a byte array in little endian. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// let value = f16::from_le_bytes([0x40, 0x4A]); + /// assert_eq!(value, f16::from_f32(12.5)); + /// ``` + #[inline] + #[must_use] + pub const fn from_le_bytes(bytes: [u8; 2]) -> f16 { + f16::from_bits(u16::from_le_bytes(bytes)) + } + + /// Creates a floating point value from its representation as a byte array in big endian. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// let value = f16::from_be_bytes([0x4A, 0x40]); + /// assert_eq!(value, f16::from_f32(12.5)); + /// ``` + #[inline] + #[must_use] + pub const fn from_be_bytes(bytes: [u8; 2]) -> f16 { + f16::from_bits(u16::from_be_bytes(bytes)) + } + + /// Creates a floating point value from its representation as a byte array in native endian. + /// + /// As the target platform's native endianness is used, portable code likely wants to use + /// [`from_be_bytes`][Self::from_be_bytes] or [`from_le_bytes`][Self::from_le_bytes], as + /// appropriate instead. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// let value = f16::from_ne_bytes(if cfg!(target_endian = "big") { + /// [0x4A, 0x40] + /// } else { + /// [0x40, 0x4A] + /// }); + /// assert_eq!(value, f16::from_f32(12.5)); + /// ``` + #[inline] + #[must_use] + pub const fn from_ne_bytes(bytes: [u8; 2]) -> f16 { + f16::from_bits(u16::from_ne_bytes(bytes)) + } + + /// Converts a [`f16`] value into a `f32` value. + /// + /// This conversion is lossless as all 16-bit floating point values can be represented exactly + /// in 32-bit floating point. + #[inline] + #[must_use] + pub fn to_f32(self) -> f32 { + convert::f16_to_f32(self.0) + } + + /// Converts a [`f16`] value into a `f32` value. + /// + /// This function is identical to [`to_f32`][Self::to_f32] except it never uses hardware + /// intrinsics, which allows it to be `const`. [`to_f32`][Self::to_f32] should be preferred + /// in any non-`const` context. + /// + /// This conversion is lossless as all 16-bit floating point values can be represented exactly + /// in 32-bit floating point. + #[inline] + #[must_use] + pub const fn to_f32_const(self) -> f32 { + convert::f16_to_f32_fallback(self.0) + } + + /// Converts a [`f16`] value into a `f64` value. + /// + /// This conversion is lossless as all 16-bit floating point values can be represented exactly + /// in 64-bit floating point. + #[inline] + #[must_use] + pub fn to_f64(self) -> f64 { + convert::f16_to_f64(self.0) + } + + /// Converts a [`f16`] value into a `f64` value. + /// + /// This function is identical to [`to_f64`][Self::to_f64] except it never uses hardware + /// intrinsics, which allows it to be `const`. [`to_f64`][Self::to_f64] should be preferred + /// in any non-`const` context. + /// + /// This conversion is lossless as all 16-bit floating point values can be represented exactly + /// in 64-bit floating point. + #[inline] + #[must_use] + pub const fn to_f64_const(self) -> f64 { + convert::f16_to_f64_fallback(self.0) + } + + /// Returns `true` if this value is `NaN` and `false` otherwise. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// + /// let nan = f16::NAN; + /// let f = f16::from_f32(7.0_f32); + /// + /// assert!(nan.is_nan()); + /// assert!(!f.is_nan()); + /// ``` + #[inline] + #[must_use] + pub const fn is_nan(self) -> bool { + self.0 & 0x7FFFu16 > 0x7C00u16 + } + + /// Returns `true` if this value is ±∞ and `false`. + /// otherwise. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// + /// let f = f16::from_f32(7.0f32); + /// let inf = f16::INFINITY; + /// let neg_inf = f16::NEG_INFINITY; + /// let nan = f16::NAN; + /// + /// assert!(!f.is_infinite()); + /// assert!(!nan.is_infinite()); + /// + /// assert!(inf.is_infinite()); + /// assert!(neg_inf.is_infinite()); + /// ``` + #[inline] + #[must_use] + pub const fn is_infinite(self) -> bool { + self.0 & 0x7FFFu16 == 0x7C00u16 + } + + /// Returns `true` if this number is neither infinite nor `NaN`. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// + /// let f = f16::from_f32(7.0f32); + /// let inf = f16::INFINITY; + /// let neg_inf = f16::NEG_INFINITY; + /// let nan = f16::NAN; + /// + /// assert!(f.is_finite()); + /// + /// assert!(!nan.is_finite()); + /// assert!(!inf.is_finite()); + /// assert!(!neg_inf.is_finite()); + /// ``` + #[inline] + #[must_use] + pub const fn is_finite(self) -> bool { + self.0 & 0x7C00u16 != 0x7C00u16 + } + + /// Returns `true` if the number is neither zero, infinite, subnormal, or `NaN`. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// + /// let min = f16::MIN_POSITIVE; + /// let max = f16::MAX; + /// let lower_than_min = f16::from_f32(1.0e-10_f32); + /// let zero = f16::from_f32(0.0_f32); + /// + /// assert!(min.is_normal()); + /// assert!(max.is_normal()); + /// + /// assert!(!zero.is_normal()); + /// assert!(!f16::NAN.is_normal()); + /// assert!(!f16::INFINITY.is_normal()); + /// // Values between `0` and `min` are Subnormal. + /// assert!(!lower_than_min.is_normal()); + /// ``` + #[inline] + #[must_use] + pub const fn is_normal(self) -> bool { + let exp = self.0 & 0x7C00u16; + exp != 0x7C00u16 && exp != 0 + } + + /// Returns the floating point category of the number. + /// + /// If only one property is going to be tested, it is generally faster to use the specific + /// predicate instead. + /// + /// # Examples + /// + /// ```rust + /// use std::num::FpCategory; + /// # use half::prelude::*; + /// + /// let num = f16::from_f32(12.4_f32); + /// let inf = f16::INFINITY; + /// + /// assert_eq!(num.classify(), FpCategory::Normal); + /// assert_eq!(inf.classify(), FpCategory::Infinite); + /// ``` + #[must_use] + pub const fn classify(self) -> FpCategory { + let exp = self.0 & 0x7C00u16; + let man = self.0 & 0x03FFu16; + match (exp, man) { + (0, 0) => FpCategory::Zero, + (0, _) => FpCategory::Subnormal, + (0x7C00u16, 0) => FpCategory::Infinite, + (0x7C00u16, _) => FpCategory::Nan, + _ => FpCategory::Normal, + } + } + + /// Returns a number that represents the sign of `self`. + /// + /// * `1.0` if the number is positive, `+0.0` or [`INFINITY`][f16::INFINITY] + /// * `-1.0` if the number is negative, `-0.0` or [`NEG_INFINITY`][f16::NEG_INFINITY] + /// * [`NAN`][f16::NAN] if the number is `NaN` + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// + /// let f = f16::from_f32(3.5_f32); + /// + /// assert_eq!(f.signum(), f16::from_f32(1.0)); + /// assert_eq!(f16::NEG_INFINITY.signum(), f16::from_f32(-1.0)); + /// + /// assert!(f16::NAN.signum().is_nan()); + /// ``` + #[must_use] + pub const fn signum(self) -> f16 { + if self.is_nan() { + self + } else if self.0 & 0x8000u16 != 0 { + Self::NEG_ONE + } else { + Self::ONE + } + } + + /// Returns `true` if and only if `self` has a positive sign, including `+0.0`, `NaNs` with a + /// positive sign bit and +∞. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// + /// let nan = f16::NAN; + /// let f = f16::from_f32(7.0_f32); + /// let g = f16::from_f32(-7.0_f32); + /// + /// assert!(f.is_sign_positive()); + /// assert!(!g.is_sign_positive()); + /// // `NaN` can be either positive or negative + /// assert!(nan.is_sign_positive() != nan.is_sign_negative()); + /// ``` + #[inline] + #[must_use] + pub const fn is_sign_positive(self) -> bool { + self.0 & 0x8000u16 == 0 + } + + /// Returns `true` if and only if `self` has a negative sign, including `-0.0`, `NaNs` with a + /// negative sign bit and −∞. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// + /// let nan = f16::NAN; + /// let f = f16::from_f32(7.0f32); + /// let g = f16::from_f32(-7.0f32); + /// + /// assert!(!f.is_sign_negative()); + /// assert!(g.is_sign_negative()); + /// // `NaN` can be either positive or negative + /// assert!(nan.is_sign_positive() != nan.is_sign_negative()); + /// ``` + #[inline] + #[must_use] + pub const fn is_sign_negative(self) -> bool { + self.0 & 0x8000u16 != 0 + } + + /// Returns a number composed of the magnitude of `self` and the sign of `sign`. + /// + /// Equal to `self` if the sign of `self` and `sign` are the same, otherwise equal to `-self`. + /// If `self` is NaN, then NaN with the sign of `sign` is returned. + /// + /// # Examples + /// + /// ``` + /// # use half::prelude::*; + /// let f = f16::from_f32(3.5); + /// + /// assert_eq!(f.copysign(f16::from_f32(0.42)), f16::from_f32(3.5)); + /// assert_eq!(f.copysign(f16::from_f32(-0.42)), f16::from_f32(-3.5)); + /// assert_eq!((-f).copysign(f16::from_f32(0.42)), f16::from_f32(3.5)); + /// assert_eq!((-f).copysign(f16::from_f32(-0.42)), f16::from_f32(-3.5)); + /// + /// assert!(f16::NAN.copysign(f16::from_f32(1.0)).is_nan()); + /// ``` + #[inline] + #[must_use] + pub const fn copysign(self, sign: f16) -> f16 { + f16((sign.0 & 0x8000u16) | (self.0 & 0x7FFFu16)) + } + + /// Returns the maximum of the two numbers. + /// + /// If one of the arguments is NaN, then the other argument is returned. + /// + /// # Examples + /// + /// ``` + /// # use half::prelude::*; + /// let x = f16::from_f32(1.0); + /// let y = f16::from_f32(2.0); + /// + /// assert_eq!(x.max(y), y); + /// ``` + #[inline] + #[must_use] + pub fn max(self, other: f16) -> f16 { + if other > self && !other.is_nan() { + other + } else { + self + } + } + + /// Returns the minimum of the two numbers. + /// + /// If one of the arguments is NaN, then the other argument is returned. + /// + /// # Examples + /// + /// ``` + /// # use half::prelude::*; + /// let x = f16::from_f32(1.0); + /// let y = f16::from_f32(2.0); + /// + /// assert_eq!(x.min(y), x); + /// ``` + #[inline] + #[must_use] + pub fn min(self, other: f16) -> f16 { + if other < self && !other.is_nan() { + other + } else { + self + } + } + + /// Restrict a value to a certain interval unless it is NaN. + /// + /// Returns `max` if `self` is greater than `max`, and `min` if `self` is less than `min`. + /// Otherwise this returns `self`. + /// + /// Note that this function returns NaN if the initial value was NaN as well. + /// + /// # Panics + /// Panics if `min > max`, `min` is NaN, or `max` is NaN. + /// + /// # Examples + /// + /// ``` + /// # use half::prelude::*; + /// assert!(f16::from_f32(-3.0).clamp(f16::from_f32(-2.0), f16::from_f32(1.0)) == f16::from_f32(-2.0)); + /// assert!(f16::from_f32(0.0).clamp(f16::from_f32(-2.0), f16::from_f32(1.0)) == f16::from_f32(0.0)); + /// assert!(f16::from_f32(2.0).clamp(f16::from_f32(-2.0), f16::from_f32(1.0)) == f16::from_f32(1.0)); + /// assert!(f16::NAN.clamp(f16::from_f32(-2.0), f16::from_f32(1.0)).is_nan()); + /// ``` + #[inline] + #[must_use] + pub fn clamp(self, min: f16, max: f16) -> f16 { + assert!(min <= max); + let mut x = self; + if x < min { + x = min; + } + if x > max { + x = max; + } + x + } + + /// Returns the ordering between `self` and `other`. + /// + /// Unlike the standard partial comparison between floating point numbers, + /// this comparison always produces an ordering in accordance to + /// the `totalOrder` predicate as defined in the IEEE 754 (2008 revision) + /// floating point standard. The values are ordered in the following sequence: + /// + /// - negative quiet NaN + /// - negative signaling NaN + /// - negative infinity + /// - negative numbers + /// - negative subnormal numbers + /// - negative zero + /// - positive zero + /// - positive subnormal numbers + /// - positive numbers + /// - positive infinity + /// - positive signaling NaN + /// - positive quiet NaN. + /// + /// The ordering established by this function does not always agree with the + /// [`PartialOrd`] and [`PartialEq`] implementations of `f16`. For example, + /// they consider negative and positive zero equal, while `total_cmp` + /// doesn't. + /// + /// The interpretation of the signaling NaN bit follows the definition in + /// the IEEE 754 standard, which may not match the interpretation by some of + /// the older, non-conformant (e.g. MIPS) hardware implementations. + /// + /// # Examples + /// ``` + /// # use half::f16; + /// let mut v: Vec<f16> = vec![]; + /// v.push(f16::ONE); + /// v.push(f16::INFINITY); + /// v.push(f16::NEG_INFINITY); + /// v.push(f16::NAN); + /// v.push(f16::MAX_SUBNORMAL); + /// v.push(-f16::MAX_SUBNORMAL); + /// v.push(f16::ZERO); + /// v.push(f16::NEG_ZERO); + /// v.push(f16::NEG_ONE); + /// v.push(f16::MIN_POSITIVE); + /// + /// v.sort_by(|a, b| a.total_cmp(&b)); + /// + /// assert!(v + /// .into_iter() + /// .zip( + /// [ + /// f16::NEG_INFINITY, + /// f16::NEG_ONE, + /// -f16::MAX_SUBNORMAL, + /// f16::NEG_ZERO, + /// f16::ZERO, + /// f16::MAX_SUBNORMAL, + /// f16::MIN_POSITIVE, + /// f16::ONE, + /// f16::INFINITY, + /// f16::NAN + /// ] + /// .iter() + /// ) + /// .all(|(a, b)| a.to_bits() == b.to_bits())); + /// ``` + // Implementation based on: https://doc.rust-lang.org/std/primitive.f32.html#method.total_cmp + #[inline] + #[must_use] + pub fn total_cmp(&self, other: &Self) -> Ordering { + let mut left = self.to_bits() as i16; + let mut right = other.to_bits() as i16; + left ^= (((left >> 15) as u16) >> 1) as i16; + right ^= (((right >> 15) as u16) >> 1) as i16; + left.cmp(&right) + } + + /// Alternate serialize adapter for serializing as a float. + /// + /// By default, [`f16`] serializes as a newtype of [`u16`]. This is an alternate serialize + /// implementation that serializes as an [`f32`] value. It is designed for use with + /// `serialize_with` serde attributes. Deserialization from `f32` values is already supported by + /// the default deserialize implementation. + /// + /// # Examples + /// + /// A demonstration on how to use this adapater: + /// + /// ``` + /// use serde::{Serialize, Deserialize}; + /// use half::f16; + /// + /// #[derive(Serialize, Deserialize)] + /// struct MyStruct { + /// #[serde(serialize_with = "f16::serialize_as_f32")] + /// value: f16 // Will be serialized as f32 instead of u16 + /// } + /// ``` + #[cfg(feature = "serde")] + pub fn serialize_as_f32<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> { + serializer.serialize_f32(self.to_f32()) + } + + /// Alternate serialize adapter for serializing as a string. + /// + /// By default, [`f16`] serializes as a newtype of [`u16`]. This is an alternate serialize + /// implementation that serializes as a string value. It is designed for use with + /// `serialize_with` serde attributes. Deserialization from string values is already supported + /// by the default deserialize implementation. + /// + /// # Examples + /// + /// A demonstration on how to use this adapater: + /// + /// ``` + /// use serde::{Serialize, Deserialize}; + /// use half::f16; + /// + /// #[derive(Serialize, Deserialize)] + /// struct MyStruct { + /// #[serde(serialize_with = "f16::serialize_as_string")] + /// value: f16 // Will be serialized as a string instead of u16 + /// } + /// ``` + #[cfg(feature = "serde")] + pub fn serialize_as_string<S: serde::Serializer>( + &self, + serializer: S, + ) -> Result<S::Ok, S::Error> { + serializer.serialize_str(&self.to_string()) + } + + /// Approximate number of [`f16`] significant digits in base 10 + pub const DIGITS: u32 = 3; + /// [`f16`] + /// [machine epsilon](https://en.wikipedia.org/wiki/Machine_epsilon) value + /// + /// This is the difference between 1.0 and the next largest representable number. + pub const EPSILON: f16 = f16(0x1400u16); + /// [`f16`] positive Infinity (+∞) + pub const INFINITY: f16 = f16(0x7C00u16); + /// Number of [`f16`] significant digits in base 2 + pub const MANTISSA_DIGITS: u32 = 11; + /// Largest finite [`f16`] value + pub const MAX: f16 = f16(0x7BFF); + /// Maximum possible [`f16`] power of 10 exponent + pub const MAX_10_EXP: i32 = 4; + /// Maximum possible [`f16`] power of 2 exponent + pub const MAX_EXP: i32 = 16; + /// Smallest finite [`f16`] value + pub const MIN: f16 = f16(0xFBFF); + /// Minimum possible normal [`f16`] power of 10 exponent + pub const MIN_10_EXP: i32 = -4; + /// One greater than the minimum possible normal [`f16`] power of 2 exponent + pub const MIN_EXP: i32 = -13; + /// Smallest positive normal [`f16`] value + pub const MIN_POSITIVE: f16 = f16(0x0400u16); + /// [`f16`] Not a Number (NaN) + pub const NAN: f16 = f16(0x7E00u16); + /// [`f16`] negative infinity (-∞) + pub const NEG_INFINITY: f16 = f16(0xFC00u16); + /// The radix or base of the internal representation of [`f16`] + pub const RADIX: u32 = 2; + + /// Minimum positive subnormal [`f16`] value + pub const MIN_POSITIVE_SUBNORMAL: f16 = f16(0x0001u16); + /// Maximum subnormal [`f16`] value + pub const MAX_SUBNORMAL: f16 = f16(0x03FFu16); + + /// [`f16`] 1 + pub const ONE: f16 = f16(0x3C00u16); + /// [`f16`] 0 + pub const ZERO: f16 = f16(0x0000u16); + /// [`f16`] -0 + pub const NEG_ZERO: f16 = f16(0x8000u16); + /// [`f16`] -1 + pub const NEG_ONE: f16 = f16(0xBC00u16); + + /// [`f16`] Euler's number (ℯ) + pub const E: f16 = f16(0x4170u16); + /// [`f16`] Archimedes' constant (π) + pub const PI: f16 = f16(0x4248u16); + /// [`f16`] 1/π + pub const FRAC_1_PI: f16 = f16(0x3518u16); + /// [`f16`] 1/√2 + pub const FRAC_1_SQRT_2: f16 = f16(0x39A8u16); + /// [`f16`] 2/π + pub const FRAC_2_PI: f16 = f16(0x3918u16); + /// [`f16`] 2/√π + pub const FRAC_2_SQRT_PI: f16 = f16(0x3C83u16); + /// [`f16`] π/2 + pub const FRAC_PI_2: f16 = f16(0x3E48u16); + /// [`f16`] π/3 + pub const FRAC_PI_3: f16 = f16(0x3C30u16); + /// [`f16`] π/4 + pub const FRAC_PI_4: f16 = f16(0x3A48u16); + /// [`f16`] π/6 + pub const FRAC_PI_6: f16 = f16(0x3830u16); + /// [`f16`] π/8 + pub const FRAC_PI_8: f16 = f16(0x3648u16); + /// [`f16`] 𝗅𝗇 10 + pub const LN_10: f16 = f16(0x409Bu16); + /// [`f16`] 𝗅𝗇 2 + pub const LN_2: f16 = f16(0x398Cu16); + /// [`f16`] 𝗅𝗈𝗀₁₀ℯ + pub const LOG10_E: f16 = f16(0x36F3u16); + /// [`f16`] 𝗅𝗈𝗀₁₀2 + pub const LOG10_2: f16 = f16(0x34D1u16); + /// [`f16`] 𝗅𝗈𝗀₂ℯ + pub const LOG2_E: f16 = f16(0x3DC5u16); + /// [`f16`] 𝗅𝗈𝗀₂10 + pub const LOG2_10: f16 = f16(0x42A5u16); + /// [`f16`] √2 + pub const SQRT_2: f16 = f16(0x3DA8u16); +} + +impl From<f16> for f32 { + #[inline] + fn from(x: f16) -> f32 { + x.to_f32() + } +} + +impl From<f16> for f64 { + #[inline] + fn from(x: f16) -> f64 { + x.to_f64() + } +} + +impl From<i8> for f16 { + #[inline] + fn from(x: i8) -> f16 { + // Convert to f32, then to f16 + f16::from_f32(f32::from(x)) + } +} + +impl From<u8> for f16 { + #[inline] + fn from(x: u8) -> f16 { + // Convert to f32, then to f16 + f16::from_f32(f32::from(x)) + } +} + +impl PartialEq for f16 { + fn eq(&self, other: &f16) -> bool { + if self.is_nan() || other.is_nan() { + false + } else { + (self.0 == other.0) || ((self.0 | other.0) & 0x7FFFu16 == 0) + } + } +} + +impl PartialOrd for f16 { + fn partial_cmp(&self, other: &f16) -> Option<Ordering> { + if self.is_nan() || other.is_nan() { + None + } else { + let neg = self.0 & 0x8000u16 != 0; + let other_neg = other.0 & 0x8000u16 != 0; + match (neg, other_neg) { + (false, false) => Some(self.0.cmp(&other.0)), + (false, true) => { + if (self.0 | other.0) & 0x7FFFu16 == 0 { + Some(Ordering::Equal) + } else { + Some(Ordering::Greater) + } + } + (true, false) => { + if (self.0 | other.0) & 0x7FFFu16 == 0 { + Some(Ordering::Equal) + } else { + Some(Ordering::Less) + } + } + (true, true) => Some(other.0.cmp(&self.0)), + } + } + } + + fn lt(&self, other: &f16) -> bool { + if self.is_nan() || other.is_nan() { + false + } else { + let neg = self.0 & 0x8000u16 != 0; + let other_neg = other.0 & 0x8000u16 != 0; + match (neg, other_neg) { + (false, false) => self.0 < other.0, + (false, true) => false, + (true, false) => (self.0 | other.0) & 0x7FFFu16 != 0, + (true, true) => self.0 > other.0, + } + } + } + + fn le(&self, other: &f16) -> bool { + if self.is_nan() || other.is_nan() { + false + } else { + let neg = self.0 & 0x8000u16 != 0; + let other_neg = other.0 & 0x8000u16 != 0; + match (neg, other_neg) { + (false, false) => self.0 <= other.0, + (false, true) => (self.0 | other.0) & 0x7FFFu16 == 0, + (true, false) => true, + (true, true) => self.0 >= other.0, + } + } + } + + fn gt(&self, other: &f16) -> bool { + if self.is_nan() || other.is_nan() { + false + } else { + let neg = self.0 & 0x8000u16 != 0; + let other_neg = other.0 & 0x8000u16 != 0; + match (neg, other_neg) { + (false, false) => self.0 > other.0, + (false, true) => (self.0 | other.0) & 0x7FFFu16 != 0, + (true, false) => false, + (true, true) => self.0 < other.0, + } + } + } + + fn ge(&self, other: &f16) -> bool { + if self.is_nan() || other.is_nan() { + false + } else { + let neg = self.0 & 0x8000u16 != 0; + let other_neg = other.0 & 0x8000u16 != 0; + match (neg, other_neg) { + (false, false) => self.0 >= other.0, + (false, true) => true, + (true, false) => (self.0 | other.0) & 0x7FFFu16 == 0, + (true, true) => self.0 <= other.0, + } + } + } +} + +#[cfg(not(target_arch = "spirv"))] +impl FromStr for f16 { + type Err = ParseFloatError; + fn from_str(src: &str) -> Result<f16, ParseFloatError> { + f32::from_str(src).map(f16::from_f32) + } +} + +#[cfg(not(target_arch = "spirv"))] +impl Debug for f16 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + write!(f, "{:?}", self.to_f32()) + } +} + +#[cfg(not(target_arch = "spirv"))] +impl Display for f16 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + write!(f, "{}", self.to_f32()) + } +} + +#[cfg(not(target_arch = "spirv"))] +impl LowerExp for f16 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + write!(f, "{:e}", self.to_f32()) + } +} + +#[cfg(not(target_arch = "spirv"))] +impl UpperExp for f16 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + write!(f, "{:E}", self.to_f32()) + } +} + +#[cfg(not(target_arch = "spirv"))] +impl Binary for f16 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + write!(f, "{:b}", self.0) + } +} + +#[cfg(not(target_arch = "spirv"))] +impl Octal for f16 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + write!(f, "{:o}", self.0) + } +} + +#[cfg(not(target_arch = "spirv"))] +impl LowerHex for f16 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + write!(f, "{:x}", self.0) + } +} + +#[cfg(not(target_arch = "spirv"))] +impl UpperHex for f16 { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + write!(f, "{:X}", self.0) + } +} + +impl Neg for f16 { + type Output = Self; + + #[inline] + fn neg(self) -> Self::Output { + Self(self.0 ^ 0x8000) + } +} + +impl Neg for &f16 { + type Output = <f16 as Neg>::Output; + + #[inline] + fn neg(self) -> Self::Output { + Neg::neg(*self) + } +} + +impl Add for f16 { + type Output = Self; + + #[inline] + fn add(self, rhs: Self) -> Self::Output { + Self::from_f32(Self::to_f32(self) + Self::to_f32(rhs)) + } +} + +impl Add<&f16> for f16 { + type Output = <f16 as Add<f16>>::Output; + + #[inline] + fn add(self, rhs: &f16) -> Self::Output { + self.add(*rhs) + } +} + +impl Add<&f16> for &f16 { + type Output = <f16 as Add<f16>>::Output; + + #[inline] + fn add(self, rhs: &f16) -> Self::Output { + (*self).add(*rhs) + } +} + +impl Add<f16> for &f16 { + type Output = <f16 as Add<f16>>::Output; + + #[inline] + fn add(self, rhs: f16) -> Self::Output { + (*self).add(rhs) + } +} + +impl AddAssign for f16 { + #[inline] + fn add_assign(&mut self, rhs: Self) { + *self = (*self).add(rhs); + } +} + +impl AddAssign<&f16> for f16 { + #[inline] + fn add_assign(&mut self, rhs: &f16) { + *self = (*self).add(rhs); + } +} + +impl Sub for f16 { + type Output = Self; + + #[inline] + fn sub(self, rhs: Self) -> Self::Output { + Self::from_f32(Self::to_f32(self) - Self::to_f32(rhs)) + } +} + +impl Sub<&f16> for f16 { + type Output = <f16 as Sub<f16>>::Output; + + #[inline] + fn sub(self, rhs: &f16) -> Self::Output { + self.sub(*rhs) + } +} + +impl Sub<&f16> for &f16 { + type Output = <f16 as Sub<f16>>::Output; + + #[inline] + fn sub(self, rhs: &f16) -> Self::Output { + (*self).sub(*rhs) + } +} + +impl Sub<f16> for &f16 { + type Output = <f16 as Sub<f16>>::Output; + + #[inline] + fn sub(self, rhs: f16) -> Self::Output { + (*self).sub(rhs) + } +} + +impl SubAssign for f16 { + #[inline] + fn sub_assign(&mut self, rhs: Self) { + *self = (*self).sub(rhs); + } +} + +impl SubAssign<&f16> for f16 { + #[inline] + fn sub_assign(&mut self, rhs: &f16) { + *self = (*self).sub(rhs); + } +} + +impl Mul for f16 { + type Output = Self; + + #[inline] + fn mul(self, rhs: Self) -> Self::Output { + Self::from_f32(Self::to_f32(self) * Self::to_f32(rhs)) + } +} + +impl Mul<&f16> for f16 { + type Output = <f16 as Mul<f16>>::Output; + + #[inline] + fn mul(self, rhs: &f16) -> Self::Output { + self.mul(*rhs) + } +} + +impl Mul<&f16> for &f16 { + type Output = <f16 as Mul<f16>>::Output; + + #[inline] + fn mul(self, rhs: &f16) -> Self::Output { + (*self).mul(*rhs) + } +} + +impl Mul<f16> for &f16 { + type Output = <f16 as Mul<f16>>::Output; + + #[inline] + fn mul(self, rhs: f16) -> Self::Output { + (*self).mul(rhs) + } +} + +impl MulAssign for f16 { + #[inline] + fn mul_assign(&mut self, rhs: Self) { + *self = (*self).mul(rhs); + } +} + +impl MulAssign<&f16> for f16 { + #[inline] + fn mul_assign(&mut self, rhs: &f16) { + *self = (*self).mul(rhs); + } +} + +impl Div for f16 { + type Output = Self; + + #[inline] + fn div(self, rhs: Self) -> Self::Output { + Self::from_f32(Self::to_f32(self) / Self::to_f32(rhs)) + } +} + +impl Div<&f16> for f16 { + type Output = <f16 as Div<f16>>::Output; + + #[inline] + fn div(self, rhs: &f16) -> Self::Output { + self.div(*rhs) + } +} + +impl Div<&f16> for &f16 { + type Output = <f16 as Div<f16>>::Output; + + #[inline] + fn div(self, rhs: &f16) -> Self::Output { + (*self).div(*rhs) + } +} + +impl Div<f16> for &f16 { + type Output = <f16 as Div<f16>>::Output; + + #[inline] + fn div(self, rhs: f16) -> Self::Output { + (*self).div(rhs) + } +} + +impl DivAssign for f16 { + #[inline] + fn div_assign(&mut self, rhs: Self) { + *self = (*self).div(rhs); + } +} + +impl DivAssign<&f16> for f16 { + #[inline] + fn div_assign(&mut self, rhs: &f16) { + *self = (*self).div(rhs); + } +} + +impl Rem for f16 { + type Output = Self; + + #[inline] + fn rem(self, rhs: Self) -> Self::Output { + Self::from_f32(Self::to_f32(self) % Self::to_f32(rhs)) + } +} + +impl Rem<&f16> for f16 { + type Output = <f16 as Rem<f16>>::Output; + + #[inline] + fn rem(self, rhs: &f16) -> Self::Output { + self.rem(*rhs) + } +} + +impl Rem<&f16> for &f16 { + type Output = <f16 as Rem<f16>>::Output; + + #[inline] + fn rem(self, rhs: &f16) -> Self::Output { + (*self).rem(*rhs) + } +} + +impl Rem<f16> for &f16 { + type Output = <f16 as Rem<f16>>::Output; + + #[inline] + fn rem(self, rhs: f16) -> Self::Output { + (*self).rem(rhs) + } +} + +impl RemAssign for f16 { + #[inline] + fn rem_assign(&mut self, rhs: Self) { + *self = (*self).rem(rhs); + } +} + +impl RemAssign<&f16> for f16 { + #[inline] + fn rem_assign(&mut self, rhs: &f16) { + *self = (*self).rem(rhs); + } +} + +impl Product for f16 { + #[inline] + fn product<I: Iterator<Item = Self>>(iter: I) -> Self { + f16::from_f32(iter.map(|f| f.to_f32()).product()) + } +} + +impl<'a> Product<&'a f16> for f16 { + #[inline] + fn product<I: Iterator<Item = &'a f16>>(iter: I) -> Self { + f16::from_f32(iter.map(|f| f.to_f32()).product()) + } +} + +impl Sum for f16 { + #[inline] + fn sum<I: Iterator<Item = Self>>(iter: I) -> Self { + f16::from_f32(iter.map(|f| f.to_f32()).sum()) + } +} + +impl<'a> Sum<&'a f16> for f16 { + #[inline] + fn sum<I: Iterator<Item = &'a f16>>(iter: I) -> Self { + f16::from_f32(iter.map(|f| f.to_f32()).product()) + } +} + +#[cfg(feature = "serde")] +struct Visitor; + +#[cfg(feature = "serde")] +impl<'de> Deserialize<'de> for f16 { + fn deserialize<D>(deserializer: D) -> Result<f16, D::Error> + where + D: serde::de::Deserializer<'de>, + { + deserializer.deserialize_newtype_struct("f16", Visitor) + } +} + +#[cfg(feature = "serde")] +impl<'de> serde::de::Visitor<'de> for Visitor { + type Value = f16; + + fn expecting(&self, formatter: &mut alloc::fmt::Formatter) -> alloc::fmt::Result { + write!(formatter, "tuple struct f16") + } + + fn visit_newtype_struct<D>(self, deserializer: D) -> Result<Self::Value, D::Error> + where + D: serde::Deserializer<'de>, + { + Ok(f16(<u16 as Deserialize>::deserialize(deserializer)?)) + } + + fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> + where + E: serde::de::Error, + { + v.parse().map_err(|_| { + serde::de::Error::invalid_value(serde::de::Unexpected::Str(v), &"a float string") + }) + } + + fn visit_f32<E>(self, v: f32) -> Result<Self::Value, E> + where + E: serde::de::Error, + { + Ok(f16::from_f32(v)) + } + + fn visit_f64<E>(self, v: f64) -> Result<Self::Value, E> + where + E: serde::de::Error, + { + Ok(f16::from_f64(v)) + } +} + +#[allow( + clippy::cognitive_complexity, + clippy::float_cmp, + clippy::neg_cmp_op_on_partial_ord +)] +#[cfg(test)] +mod test { + use super::*; + use core::cmp::Ordering; + #[cfg(feature = "num-traits")] + use num_traits::{AsPrimitive, FromPrimitive, ToPrimitive}; + use quickcheck_macros::quickcheck; + + #[cfg(feature = "num-traits")] + #[test] + fn as_primitive() { + let two = f16::from_f32(2.0); + assert_eq!(<i32 as AsPrimitive<f16>>::as_(2), two); + assert_eq!(<f16 as AsPrimitive<i32>>::as_(two), 2); + + assert_eq!(<f32 as AsPrimitive<f16>>::as_(2.0), two); + assert_eq!(<f16 as AsPrimitive<f32>>::as_(two), 2.0); + + assert_eq!(<f64 as AsPrimitive<f16>>::as_(2.0), two); + assert_eq!(<f16 as AsPrimitive<f64>>::as_(two), 2.0); + } + + #[cfg(feature = "num-traits")] + #[test] + fn to_primitive() { + let two = f16::from_f32(2.0); + assert_eq!(ToPrimitive::to_i32(&two).unwrap(), 2i32); + assert_eq!(ToPrimitive::to_f32(&two).unwrap(), 2.0f32); + assert_eq!(ToPrimitive::to_f64(&two).unwrap(), 2.0f64); + } + + #[cfg(feature = "num-traits")] + #[test] + fn from_primitive() { + let two = f16::from_f32(2.0); + assert_eq!(<f16 as FromPrimitive>::from_i32(2).unwrap(), two); + assert_eq!(<f16 as FromPrimitive>::from_f32(2.0).unwrap(), two); + assert_eq!(<f16 as FromPrimitive>::from_f64(2.0).unwrap(), two); + } + + #[test] + fn test_f16_consts() { + // DIGITS + let digits = ((f16::MANTISSA_DIGITS as f32 - 1.0) * 2f32.log10()).floor() as u32; + assert_eq!(f16::DIGITS, digits); + // sanity check to show test is good + let digits32 = ((core::f32::MANTISSA_DIGITS as f32 - 1.0) * 2f32.log10()).floor() as u32; + assert_eq!(core::f32::DIGITS, digits32); + + // EPSILON + let one = f16::from_f32(1.0); + let one_plus_epsilon = f16::from_bits(one.to_bits() + 1); + let epsilon = f16::from_f32(one_plus_epsilon.to_f32() - 1.0); + assert_eq!(f16::EPSILON, epsilon); + // sanity check to show test is good + let one_plus_epsilon32 = f32::from_bits(1.0f32.to_bits() + 1); + let epsilon32 = one_plus_epsilon32 - 1f32; + assert_eq!(core::f32::EPSILON, epsilon32); + + // MAX, MIN and MIN_POSITIVE + let max = f16::from_bits(f16::INFINITY.to_bits() - 1); + let min = f16::from_bits(f16::NEG_INFINITY.to_bits() - 1); + let min_pos = f16::from_f32(2f32.powi(f16::MIN_EXP - 1)); + assert_eq!(f16::MAX, max); + assert_eq!(f16::MIN, min); + assert_eq!(f16::MIN_POSITIVE, min_pos); + // sanity check to show test is good + let max32 = f32::from_bits(core::f32::INFINITY.to_bits() - 1); + let min32 = f32::from_bits(core::f32::NEG_INFINITY.to_bits() - 1); + let min_pos32 = 2f32.powi(core::f32::MIN_EXP - 1); + assert_eq!(core::f32::MAX, max32); + assert_eq!(core::f32::MIN, min32); + assert_eq!(core::f32::MIN_POSITIVE, min_pos32); + + // MIN_10_EXP and MAX_10_EXP + let ten_to_min = 10f32.powi(f16::MIN_10_EXP); + assert!(ten_to_min / 10.0 < f16::MIN_POSITIVE.to_f32()); + assert!(ten_to_min > f16::MIN_POSITIVE.to_f32()); + let ten_to_max = 10f32.powi(f16::MAX_10_EXP); + assert!(ten_to_max < f16::MAX.to_f32()); + assert!(ten_to_max * 10.0 > f16::MAX.to_f32()); + // sanity check to show test is good + let ten_to_min32 = 10f64.powi(core::f32::MIN_10_EXP); + assert!(ten_to_min32 / 10.0 < f64::from(core::f32::MIN_POSITIVE)); + assert!(ten_to_min32 > f64::from(core::f32::MIN_POSITIVE)); + let ten_to_max32 = 10f64.powi(core::f32::MAX_10_EXP); + assert!(ten_to_max32 < f64::from(core::f32::MAX)); + assert!(ten_to_max32 * 10.0 > f64::from(core::f32::MAX)); + } + + #[test] + fn test_f16_consts_from_f32() { + let one = f16::from_f32(1.0); + let zero = f16::from_f32(0.0); + let neg_zero = f16::from_f32(-0.0); + let neg_one = f16::from_f32(-1.0); + let inf = f16::from_f32(core::f32::INFINITY); + let neg_inf = f16::from_f32(core::f32::NEG_INFINITY); + let nan = f16::from_f32(core::f32::NAN); + + assert_eq!(f16::ONE, one); + assert_eq!(f16::ZERO, zero); + assert!(zero.is_sign_positive()); + assert_eq!(f16::NEG_ZERO, neg_zero); + assert!(neg_zero.is_sign_negative()); + assert_eq!(f16::NEG_ONE, neg_one); + assert!(neg_one.is_sign_negative()); + assert_eq!(f16::INFINITY, inf); + assert_eq!(f16::NEG_INFINITY, neg_inf); + assert!(nan.is_nan()); + assert!(f16::NAN.is_nan()); + + let e = f16::from_f32(core::f32::consts::E); + let pi = f16::from_f32(core::f32::consts::PI); + let frac_1_pi = f16::from_f32(core::f32::consts::FRAC_1_PI); + let frac_1_sqrt_2 = f16::from_f32(core::f32::consts::FRAC_1_SQRT_2); + let frac_2_pi = f16::from_f32(core::f32::consts::FRAC_2_PI); + let frac_2_sqrt_pi = f16::from_f32(core::f32::consts::FRAC_2_SQRT_PI); + let frac_pi_2 = f16::from_f32(core::f32::consts::FRAC_PI_2); + let frac_pi_3 = f16::from_f32(core::f32::consts::FRAC_PI_3); + let frac_pi_4 = f16::from_f32(core::f32::consts::FRAC_PI_4); + let frac_pi_6 = f16::from_f32(core::f32::consts::FRAC_PI_6); + let frac_pi_8 = f16::from_f32(core::f32::consts::FRAC_PI_8); + let ln_10 = f16::from_f32(core::f32::consts::LN_10); + let ln_2 = f16::from_f32(core::f32::consts::LN_2); + let log10_e = f16::from_f32(core::f32::consts::LOG10_E); + // core::f32::consts::LOG10_2 requires rustc 1.43.0 + let log10_2 = f16::from_f32(2f32.log10()); + let log2_e = f16::from_f32(core::f32::consts::LOG2_E); + // core::f32::consts::LOG2_10 requires rustc 1.43.0 + let log2_10 = f16::from_f32(10f32.log2()); + let sqrt_2 = f16::from_f32(core::f32::consts::SQRT_2); + + assert_eq!(f16::E, e); + assert_eq!(f16::PI, pi); + assert_eq!(f16::FRAC_1_PI, frac_1_pi); + assert_eq!(f16::FRAC_1_SQRT_2, frac_1_sqrt_2); + assert_eq!(f16::FRAC_2_PI, frac_2_pi); + assert_eq!(f16::FRAC_2_SQRT_PI, frac_2_sqrt_pi); + assert_eq!(f16::FRAC_PI_2, frac_pi_2); + assert_eq!(f16::FRAC_PI_3, frac_pi_3); + assert_eq!(f16::FRAC_PI_4, frac_pi_4); + assert_eq!(f16::FRAC_PI_6, frac_pi_6); + assert_eq!(f16::FRAC_PI_8, frac_pi_8); + assert_eq!(f16::LN_10, ln_10); + assert_eq!(f16::LN_2, ln_2); + assert_eq!(f16::LOG10_E, log10_e); + assert_eq!(f16::LOG10_2, log10_2); + assert_eq!(f16::LOG2_E, log2_e); + assert_eq!(f16::LOG2_10, log2_10); + assert_eq!(f16::SQRT_2, sqrt_2); + } + + #[test] + fn test_f16_consts_from_f64() { + let one = f16::from_f64(1.0); + let zero = f16::from_f64(0.0); + let neg_zero = f16::from_f64(-0.0); + let inf = f16::from_f64(core::f64::INFINITY); + let neg_inf = f16::from_f64(core::f64::NEG_INFINITY); + let nan = f16::from_f64(core::f64::NAN); + + assert_eq!(f16::ONE, one); + assert_eq!(f16::ZERO, zero); + assert!(zero.is_sign_positive()); + assert_eq!(f16::NEG_ZERO, neg_zero); + assert!(neg_zero.is_sign_negative()); + assert_eq!(f16::INFINITY, inf); + assert_eq!(f16::NEG_INFINITY, neg_inf); + assert!(nan.is_nan()); + assert!(f16::NAN.is_nan()); + + let e = f16::from_f64(core::f64::consts::E); + let pi = f16::from_f64(core::f64::consts::PI); + let frac_1_pi = f16::from_f64(core::f64::consts::FRAC_1_PI); + let frac_1_sqrt_2 = f16::from_f64(core::f64::consts::FRAC_1_SQRT_2); + let frac_2_pi = f16::from_f64(core::f64::consts::FRAC_2_PI); + let frac_2_sqrt_pi = f16::from_f64(core::f64::consts::FRAC_2_SQRT_PI); + let frac_pi_2 = f16::from_f64(core::f64::consts::FRAC_PI_2); + let frac_pi_3 = f16::from_f64(core::f64::consts::FRAC_PI_3); + let frac_pi_4 = f16::from_f64(core::f64::consts::FRAC_PI_4); + let frac_pi_6 = f16::from_f64(core::f64::consts::FRAC_PI_6); + let frac_pi_8 = f16::from_f64(core::f64::consts::FRAC_PI_8); + let ln_10 = f16::from_f64(core::f64::consts::LN_10); + let ln_2 = f16::from_f64(core::f64::consts::LN_2); + let log10_e = f16::from_f64(core::f64::consts::LOG10_E); + // core::f64::consts::LOG10_2 requires rustc 1.43.0 + let log10_2 = f16::from_f64(2f64.log10()); + let log2_e = f16::from_f64(core::f64::consts::LOG2_E); + // core::f64::consts::LOG2_10 requires rustc 1.43.0 + let log2_10 = f16::from_f64(10f64.log2()); + let sqrt_2 = f16::from_f64(core::f64::consts::SQRT_2); + + assert_eq!(f16::E, e); + assert_eq!(f16::PI, pi); + assert_eq!(f16::FRAC_1_PI, frac_1_pi); + assert_eq!(f16::FRAC_1_SQRT_2, frac_1_sqrt_2); + assert_eq!(f16::FRAC_2_PI, frac_2_pi); + assert_eq!(f16::FRAC_2_SQRT_PI, frac_2_sqrt_pi); + assert_eq!(f16::FRAC_PI_2, frac_pi_2); + assert_eq!(f16::FRAC_PI_3, frac_pi_3); + assert_eq!(f16::FRAC_PI_4, frac_pi_4); + assert_eq!(f16::FRAC_PI_6, frac_pi_6); + assert_eq!(f16::FRAC_PI_8, frac_pi_8); + assert_eq!(f16::LN_10, ln_10); + assert_eq!(f16::LN_2, ln_2); + assert_eq!(f16::LOG10_E, log10_e); + assert_eq!(f16::LOG10_2, log10_2); + assert_eq!(f16::LOG2_E, log2_e); + assert_eq!(f16::LOG2_10, log2_10); + assert_eq!(f16::SQRT_2, sqrt_2); + } + + #[test] + fn test_nan_conversion_to_smaller() { + let nan64 = f64::from_bits(0x7FF0_0000_0000_0001u64); + let neg_nan64 = f64::from_bits(0xFFF0_0000_0000_0001u64); + let nan32 = f32::from_bits(0x7F80_0001u32); + let neg_nan32 = f32::from_bits(0xFF80_0001u32); + let nan32_from_64 = nan64 as f32; + let neg_nan32_from_64 = neg_nan64 as f32; + let nan16_from_64 = f16::from_f64(nan64); + let neg_nan16_from_64 = f16::from_f64(neg_nan64); + let nan16_from_32 = f16::from_f32(nan32); + let neg_nan16_from_32 = f16::from_f32(neg_nan32); + + assert!(nan64.is_nan() && nan64.is_sign_positive()); + assert!(neg_nan64.is_nan() && neg_nan64.is_sign_negative()); + assert!(nan32.is_nan() && nan32.is_sign_positive()); + assert!(neg_nan32.is_nan() && neg_nan32.is_sign_negative()); + assert!(nan32_from_64.is_nan() && nan32_from_64.is_sign_positive()); + assert!(neg_nan32_from_64.is_nan() && neg_nan32_from_64.is_sign_negative()); + assert!(nan16_from_64.is_nan() && nan16_from_64.is_sign_positive()); + assert!(neg_nan16_from_64.is_nan() && neg_nan16_from_64.is_sign_negative()); + assert!(nan16_from_32.is_nan() && nan16_from_32.is_sign_positive()); + assert!(neg_nan16_from_32.is_nan() && neg_nan16_from_32.is_sign_negative()); + } + + #[test] + fn test_nan_conversion_to_larger() { + let nan16 = f16::from_bits(0x7C01u16); + let neg_nan16 = f16::from_bits(0xFC01u16); + let nan32 = f32::from_bits(0x7F80_0001u32); + let neg_nan32 = f32::from_bits(0xFF80_0001u32); + let nan32_from_16 = f32::from(nan16); + let neg_nan32_from_16 = f32::from(neg_nan16); + let nan64_from_16 = f64::from(nan16); + let neg_nan64_from_16 = f64::from(neg_nan16); + let nan64_from_32 = f64::from(nan32); + let neg_nan64_from_32 = f64::from(neg_nan32); + + assert!(nan16.is_nan() && nan16.is_sign_positive()); + assert!(neg_nan16.is_nan() && neg_nan16.is_sign_negative()); + assert!(nan32.is_nan() && nan32.is_sign_positive()); + assert!(neg_nan32.is_nan() && neg_nan32.is_sign_negative()); + assert!(nan32_from_16.is_nan() && nan32_from_16.is_sign_positive()); + assert!(neg_nan32_from_16.is_nan() && neg_nan32_from_16.is_sign_negative()); + assert!(nan64_from_16.is_nan() && nan64_from_16.is_sign_positive()); + assert!(neg_nan64_from_16.is_nan() && neg_nan64_from_16.is_sign_negative()); + assert!(nan64_from_32.is_nan() && nan64_from_32.is_sign_positive()); + assert!(neg_nan64_from_32.is_nan() && neg_nan64_from_32.is_sign_negative()); + } + + #[test] + fn test_f16_to_f32() { + let f = f16::from_f32(7.0); + assert_eq!(f.to_f32(), 7.0f32); + + // 7.1 is NOT exactly representable in 16-bit, it's rounded + let f = f16::from_f32(7.1); + let diff = (f.to_f32() - 7.1f32).abs(); + // diff must be <= 4 * EPSILON, as 7 has two more significant bits than 1 + assert!(diff <= 4.0 * f16::EPSILON.to_f32()); + + assert_eq!(f16::from_bits(0x0000_0001).to_f32(), 2.0f32.powi(-24)); + assert_eq!(f16::from_bits(0x0000_0005).to_f32(), 5.0 * 2.0f32.powi(-24)); + + assert_eq!(f16::from_bits(0x0000_0001), f16::from_f32(2.0f32.powi(-24))); + assert_eq!( + f16::from_bits(0x0000_0005), + f16::from_f32(5.0 * 2.0f32.powi(-24)) + ); + } + + #[test] + fn test_f16_to_f64() { + let f = f16::from_f64(7.0); + assert_eq!(f.to_f64(), 7.0f64); + + // 7.1 is NOT exactly representable in 16-bit, it's rounded + let f = f16::from_f64(7.1); + let diff = (f.to_f64() - 7.1f64).abs(); + // diff must be <= 4 * EPSILON, as 7 has two more significant bits than 1 + assert!(diff <= 4.0 * f16::EPSILON.to_f64()); + + assert_eq!(f16::from_bits(0x0000_0001).to_f64(), 2.0f64.powi(-24)); + assert_eq!(f16::from_bits(0x0000_0005).to_f64(), 5.0 * 2.0f64.powi(-24)); + + assert_eq!(f16::from_bits(0x0000_0001), f16::from_f64(2.0f64.powi(-24))); + assert_eq!( + f16::from_bits(0x0000_0005), + f16::from_f64(5.0 * 2.0f64.powi(-24)) + ); + } + + #[test] + fn test_comparisons() { + let zero = f16::from_f64(0.0); + let one = f16::from_f64(1.0); + let neg_zero = f16::from_f64(-0.0); + let neg_one = f16::from_f64(-1.0); + + assert_eq!(zero.partial_cmp(&neg_zero), Some(Ordering::Equal)); + assert_eq!(neg_zero.partial_cmp(&zero), Some(Ordering::Equal)); + assert!(zero == neg_zero); + assert!(neg_zero == zero); + assert!(!(zero != neg_zero)); + assert!(!(neg_zero != zero)); + assert!(!(zero < neg_zero)); + assert!(!(neg_zero < zero)); + assert!(zero <= neg_zero); + assert!(neg_zero <= zero); + assert!(!(zero > neg_zero)); + assert!(!(neg_zero > zero)); + assert!(zero >= neg_zero); + assert!(neg_zero >= zero); + + assert_eq!(one.partial_cmp(&neg_zero), Some(Ordering::Greater)); + assert_eq!(neg_zero.partial_cmp(&one), Some(Ordering::Less)); + assert!(!(one == neg_zero)); + assert!(!(neg_zero == one)); + assert!(one != neg_zero); + assert!(neg_zero != one); + assert!(!(one < neg_zero)); + assert!(neg_zero < one); + assert!(!(one <= neg_zero)); + assert!(neg_zero <= one); + assert!(one > neg_zero); + assert!(!(neg_zero > one)); + assert!(one >= neg_zero); + assert!(!(neg_zero >= one)); + + assert_eq!(one.partial_cmp(&neg_one), Some(Ordering::Greater)); + assert_eq!(neg_one.partial_cmp(&one), Some(Ordering::Less)); + assert!(!(one == neg_one)); + assert!(!(neg_one == one)); + assert!(one != neg_one); + assert!(neg_one != one); + assert!(!(one < neg_one)); + assert!(neg_one < one); + assert!(!(one <= neg_one)); + assert!(neg_one <= one); + assert!(one > neg_one); + assert!(!(neg_one > one)); + assert!(one >= neg_one); + assert!(!(neg_one >= one)); + } + + #[test] + #[allow(clippy::erasing_op, clippy::identity_op)] + fn round_to_even_f32() { + // smallest positive subnormal = 0b0.0000_0000_01 * 2^-14 = 2^-24 + let min_sub = f16::from_bits(1); + let min_sub_f = (-24f32).exp2(); + assert_eq!(f16::from_f32(min_sub_f).to_bits(), min_sub.to_bits()); + assert_eq!(f32::from(min_sub).to_bits(), min_sub_f.to_bits()); + + // 0.0000000000_011111 rounded to 0.0000000000 (< tie, no rounding) + // 0.0000000000_100000 rounded to 0.0000000000 (tie and even, remains at even) + // 0.0000000000_100001 rounded to 0.0000000001 (> tie, rounds up) + assert_eq!( + f16::from_f32(min_sub_f * 0.49).to_bits(), + min_sub.to_bits() * 0 + ); + assert_eq!( + f16::from_f32(min_sub_f * 0.50).to_bits(), + min_sub.to_bits() * 0 + ); + assert_eq!( + f16::from_f32(min_sub_f * 0.51).to_bits(), + min_sub.to_bits() * 1 + ); + + // 0.0000000001_011111 rounded to 0.0000000001 (< tie, no rounding) + // 0.0000000001_100000 rounded to 0.0000000010 (tie and odd, rounds up to even) + // 0.0000000001_100001 rounded to 0.0000000010 (> tie, rounds up) + assert_eq!( + f16::from_f32(min_sub_f * 1.49).to_bits(), + min_sub.to_bits() * 1 + ); + assert_eq!( + f16::from_f32(min_sub_f * 1.50).to_bits(), + min_sub.to_bits() * 2 + ); + assert_eq!( + f16::from_f32(min_sub_f * 1.51).to_bits(), + min_sub.to_bits() * 2 + ); + + // 0.0000000010_011111 rounded to 0.0000000010 (< tie, no rounding) + // 0.0000000010_100000 rounded to 0.0000000010 (tie and even, remains at even) + // 0.0000000010_100001 rounded to 0.0000000011 (> tie, rounds up) + assert_eq!( + f16::from_f32(min_sub_f * 2.49).to_bits(), + min_sub.to_bits() * 2 + ); + assert_eq!( + f16::from_f32(min_sub_f * 2.50).to_bits(), + min_sub.to_bits() * 2 + ); + assert_eq!( + f16::from_f32(min_sub_f * 2.51).to_bits(), + min_sub.to_bits() * 3 + ); + + assert_eq!( + f16::from_f32(2000.49f32).to_bits(), + f16::from_f32(2000.0).to_bits() + ); + assert_eq!( + f16::from_f32(2000.50f32).to_bits(), + f16::from_f32(2000.0).to_bits() + ); + assert_eq!( + f16::from_f32(2000.51f32).to_bits(), + f16::from_f32(2001.0).to_bits() + ); + assert_eq!( + f16::from_f32(2001.49f32).to_bits(), + f16::from_f32(2001.0).to_bits() + ); + assert_eq!( + f16::from_f32(2001.50f32).to_bits(), + f16::from_f32(2002.0).to_bits() + ); + assert_eq!( + f16::from_f32(2001.51f32).to_bits(), + f16::from_f32(2002.0).to_bits() + ); + assert_eq!( + f16::from_f32(2002.49f32).to_bits(), + f16::from_f32(2002.0).to_bits() + ); + assert_eq!( + f16::from_f32(2002.50f32).to_bits(), + f16::from_f32(2002.0).to_bits() + ); + assert_eq!( + f16::from_f32(2002.51f32).to_bits(), + f16::from_f32(2003.0).to_bits() + ); + } + + #[test] + #[allow(clippy::erasing_op, clippy::identity_op)] + fn round_to_even_f64() { + // smallest positive subnormal = 0b0.0000_0000_01 * 2^-14 = 2^-24 + let min_sub = f16::from_bits(1); + let min_sub_f = (-24f64).exp2(); + assert_eq!(f16::from_f64(min_sub_f).to_bits(), min_sub.to_bits()); + assert_eq!(f64::from(min_sub).to_bits(), min_sub_f.to_bits()); + + // 0.0000000000_011111 rounded to 0.0000000000 (< tie, no rounding) + // 0.0000000000_100000 rounded to 0.0000000000 (tie and even, remains at even) + // 0.0000000000_100001 rounded to 0.0000000001 (> tie, rounds up) + assert_eq!( + f16::from_f64(min_sub_f * 0.49).to_bits(), + min_sub.to_bits() * 0 + ); + assert_eq!( + f16::from_f64(min_sub_f * 0.50).to_bits(), + min_sub.to_bits() * 0 + ); + assert_eq!( + f16::from_f64(min_sub_f * 0.51).to_bits(), + min_sub.to_bits() * 1 + ); + + // 0.0000000001_011111 rounded to 0.0000000001 (< tie, no rounding) + // 0.0000000001_100000 rounded to 0.0000000010 (tie and odd, rounds up to even) + // 0.0000000001_100001 rounded to 0.0000000010 (> tie, rounds up) + assert_eq!( + f16::from_f64(min_sub_f * 1.49).to_bits(), + min_sub.to_bits() * 1 + ); + assert_eq!( + f16::from_f64(min_sub_f * 1.50).to_bits(), + min_sub.to_bits() * 2 + ); + assert_eq!( + f16::from_f64(min_sub_f * 1.51).to_bits(), + min_sub.to_bits() * 2 + ); + + // 0.0000000010_011111 rounded to 0.0000000010 (< tie, no rounding) + // 0.0000000010_100000 rounded to 0.0000000010 (tie and even, remains at even) + // 0.0000000010_100001 rounded to 0.0000000011 (> tie, rounds up) + assert_eq!( + f16::from_f64(min_sub_f * 2.49).to_bits(), + min_sub.to_bits() * 2 + ); + assert_eq!( + f16::from_f64(min_sub_f * 2.50).to_bits(), + min_sub.to_bits() * 2 + ); + assert_eq!( + f16::from_f64(min_sub_f * 2.51).to_bits(), + min_sub.to_bits() * 3 + ); + + assert_eq!( + f16::from_f64(2000.49f64).to_bits(), + f16::from_f64(2000.0).to_bits() + ); + assert_eq!( + f16::from_f64(2000.50f64).to_bits(), + f16::from_f64(2000.0).to_bits() + ); + assert_eq!( + f16::from_f64(2000.51f64).to_bits(), + f16::from_f64(2001.0).to_bits() + ); + assert_eq!( + f16::from_f64(2001.49f64).to_bits(), + f16::from_f64(2001.0).to_bits() + ); + assert_eq!( + f16::from_f64(2001.50f64).to_bits(), + f16::from_f64(2002.0).to_bits() + ); + assert_eq!( + f16::from_f64(2001.51f64).to_bits(), + f16::from_f64(2002.0).to_bits() + ); + assert_eq!( + f16::from_f64(2002.49f64).to_bits(), + f16::from_f64(2002.0).to_bits() + ); + assert_eq!( + f16::from_f64(2002.50f64).to_bits(), + f16::from_f64(2002.0).to_bits() + ); + assert_eq!( + f16::from_f64(2002.51f64).to_bits(), + f16::from_f64(2003.0).to_bits() + ); + } + + impl quickcheck::Arbitrary for f16 { + fn arbitrary(g: &mut quickcheck::Gen) -> Self { + f16(u16::arbitrary(g)) + } + } + + #[quickcheck] + fn qc_roundtrip_f16_f32_is_identity(f: f16) -> bool { + let roundtrip = f16::from_f32(f.to_f32()); + if f.is_nan() { + roundtrip.is_nan() && f.is_sign_negative() == roundtrip.is_sign_negative() + } else { + f.0 == roundtrip.0 + } + } + + #[quickcheck] + fn qc_roundtrip_f16_f64_is_identity(f: f16) -> bool { + let roundtrip = f16::from_f64(f.to_f64()); + if f.is_nan() { + roundtrip.is_nan() && f.is_sign_negative() == roundtrip.is_sign_negative() + } else { + f.0 == roundtrip.0 + } + } +} diff --git a/vendor/half/src/binary16/convert.rs b/vendor/half/src/binary16/convert.rs new file mode 100644 index 0000000..b96910f --- /dev/null +++ b/vendor/half/src/binary16/convert.rs @@ -0,0 +1,752 @@ +#![allow(dead_code, unused_imports)] +use crate::leading_zeros::leading_zeros_u16; +use core::mem; + +macro_rules! convert_fn { + (fn $name:ident($($var:ident : $vartype:ty),+) -> $restype:ty { + if feature("f16c") { $f16c:expr } + else { $fallback:expr }}) => { + #[inline] + pub(crate) fn $name($($var: $vartype),+) -> $restype { + // Use CPU feature detection if using std + #[cfg(all( + feature = "use-intrinsics", + feature = "std", + any(target_arch = "x86", target_arch = "x86_64"), + not(target_feature = "f16c") + ))] + { + if is_x86_feature_detected!("f16c") { + $f16c + } else { + $fallback + } + } + // Use intrinsics directly when a compile target or using no_std + #[cfg(all( + feature = "use-intrinsics", + any(target_arch = "x86", target_arch = "x86_64"), + target_feature = "f16c" + ))] + { + $f16c + } + // Fallback to software + #[cfg(any( + not(feature = "use-intrinsics"), + not(any(target_arch = "x86", target_arch = "x86_64")), + all(not(feature = "std"), not(target_feature = "f16c")) + ))] + { + $fallback + } + } + }; +} + +convert_fn! { + fn f32_to_f16(f: f32) -> u16 { + if feature("f16c") { + unsafe { x86::f32_to_f16_x86_f16c(f) } + } else { + f32_to_f16_fallback(f) + } + } +} + +convert_fn! { + fn f64_to_f16(f: f64) -> u16 { + if feature("f16c") { + unsafe { x86::f32_to_f16_x86_f16c(f as f32) } + } else { + f64_to_f16_fallback(f) + } + } +} + +convert_fn! { + fn f16_to_f32(i: u16) -> f32 { + if feature("f16c") { + unsafe { x86::f16_to_f32_x86_f16c(i) } + } else { + f16_to_f32_fallback(i) + } + } +} + +convert_fn! { + fn f16_to_f64(i: u16) -> f64 { + if feature("f16c") { + unsafe { x86::f16_to_f32_x86_f16c(i) as f64 } + } else { + f16_to_f64_fallback(i) + } + } +} + +convert_fn! { + fn f32x4_to_f16x4(f: &[f32; 4]) -> [u16; 4] { + if feature("f16c") { + unsafe { x86::f32x4_to_f16x4_x86_f16c(f) } + } else { + f32x4_to_f16x4_fallback(f) + } + } +} + +convert_fn! { + fn f16x4_to_f32x4(i: &[u16; 4]) -> [f32; 4] { + if feature("f16c") { + unsafe { x86::f16x4_to_f32x4_x86_f16c(i) } + } else { + f16x4_to_f32x4_fallback(i) + } + } +} + +convert_fn! { + fn f64x4_to_f16x4(f: &[f64; 4]) -> [u16; 4] { + if feature("f16c") { + unsafe { x86::f64x4_to_f16x4_x86_f16c(f) } + } else { + f64x4_to_f16x4_fallback(f) + } + } +} + +convert_fn! { + fn f16x4_to_f64x4(i: &[u16; 4]) -> [f64; 4] { + if feature("f16c") { + unsafe { x86::f16x4_to_f64x4_x86_f16c(i) } + } else { + f16x4_to_f64x4_fallback(i) + } + } +} + +convert_fn! { + fn f32x8_to_f16x8(f: &[f32; 8]) -> [u16; 8] { + if feature("f16c") { + unsafe { x86::f32x8_to_f16x8_x86_f16c(f) } + } else { + f32x8_to_f16x8_fallback(f) + } + } +} + +convert_fn! { + fn f16x8_to_f32x8(i: &[u16; 8]) -> [f32; 8] { + if feature("f16c") { + unsafe { x86::f16x8_to_f32x8_x86_f16c(i) } + } else { + f16x8_to_f32x8_fallback(i) + } + } +} + +convert_fn! { + fn f64x8_to_f16x8(f: &[f64; 8]) -> [u16; 8] { + if feature("f16c") { + unsafe { x86::f64x8_to_f16x8_x86_f16c(f) } + } else { + f64x8_to_f16x8_fallback(f) + } + } +} + +convert_fn! { + fn f16x8_to_f64x8(i: &[u16; 8]) -> [f64; 8] { + if feature("f16c") { + unsafe { x86::f16x8_to_f64x8_x86_f16c(i) } + } else { + f16x8_to_f64x8_fallback(i) + } + } +} + +convert_fn! { + fn f32_to_f16_slice(src: &[f32], dst: &mut [u16]) -> () { + if feature("f16c") { + convert_chunked_slice_8(src, dst, x86::f32x8_to_f16x8_x86_f16c, + x86::f32x4_to_f16x4_x86_f16c) + } else { + slice_fallback(src, dst, f32_to_f16_fallback) + } + } +} + +convert_fn! { + fn f16_to_f32_slice(src: &[u16], dst: &mut [f32]) -> () { + if feature("f16c") { + convert_chunked_slice_8(src, dst, x86::f16x8_to_f32x8_x86_f16c, + x86::f16x4_to_f32x4_x86_f16c) + } else { + slice_fallback(src, dst, f16_to_f32_fallback) + } + } +} + +convert_fn! { + fn f64_to_f16_slice(src: &[f64], dst: &mut [u16]) -> () { + if feature("f16c") { + convert_chunked_slice_8(src, dst, x86::f64x8_to_f16x8_x86_f16c, + x86::f64x4_to_f16x4_x86_f16c) + } else { + slice_fallback(src, dst, f64_to_f16_fallback) + } + } +} + +convert_fn! { + fn f16_to_f64_slice(src: &[u16], dst: &mut [f64]) -> () { + if feature("f16c") { + convert_chunked_slice_8(src, dst, x86::f16x8_to_f64x8_x86_f16c, + x86::f16x4_to_f64x4_x86_f16c) + } else { + slice_fallback(src, dst, f16_to_f64_fallback) + } + } +} + +/// Chunks sliced into x8 or x4 arrays +#[inline] +fn convert_chunked_slice_8<S: Copy + Default, D: Copy>( + src: &[S], + dst: &mut [D], + fn8: unsafe fn(&[S; 8]) -> [D; 8], + fn4: unsafe fn(&[S; 4]) -> [D; 4], +) { + assert_eq!(src.len(), dst.len()); + + // TODO: Can be further optimized with array_chunks when it becomes stabilized + + let src_chunks = src.chunks_exact(8); + let mut dst_chunks = dst.chunks_exact_mut(8); + let src_remainder = src_chunks.remainder(); + for (s, d) in src_chunks.zip(&mut dst_chunks) { + let chunk: &[S; 8] = s.try_into().unwrap(); + d.copy_from_slice(unsafe { &fn8(chunk) }); + } + + // Process remainder + if src_remainder.len() > 4 { + let mut buf: [S; 8] = Default::default(); + buf[..src_remainder.len()].copy_from_slice(src_remainder); + let vec = unsafe { fn8(&buf) }; + let dst_remainder = dst_chunks.into_remainder(); + dst_remainder.copy_from_slice(&vec[..dst_remainder.len()]); + } else if !src_remainder.is_empty() { + let mut buf: [S; 4] = Default::default(); + buf[..src_remainder.len()].copy_from_slice(src_remainder); + let vec = unsafe { fn4(&buf) }; + let dst_remainder = dst_chunks.into_remainder(); + dst_remainder.copy_from_slice(&vec[..dst_remainder.len()]); + } +} + +/// Chunks sliced into x4 arrays +#[inline] +fn convert_chunked_slice_4<S: Copy + Default, D: Copy>( + src: &[S], + dst: &mut [D], + f: unsafe fn(&[S; 4]) -> [D; 4], +) { + assert_eq!(src.len(), dst.len()); + + // TODO: Can be further optimized with array_chunks when it becomes stabilized + + let src_chunks = src.chunks_exact(4); + let mut dst_chunks = dst.chunks_exact_mut(4); + let src_remainder = src_chunks.remainder(); + for (s, d) in src_chunks.zip(&mut dst_chunks) { + let chunk: &[S; 4] = s.try_into().unwrap(); + d.copy_from_slice(unsafe { &f(chunk) }); + } + + // Process remainder + if !src_remainder.is_empty() { + let mut buf: [S; 4] = Default::default(); + buf[..src_remainder.len()].copy_from_slice(src_remainder); + let vec = unsafe { f(&buf) }; + let dst_remainder = dst_chunks.into_remainder(); + dst_remainder.copy_from_slice(&vec[..dst_remainder.len()]); + } +} + +/////////////// Fallbacks //////////////// + +// In the below functions, round to nearest, with ties to even. +// Let us call the most significant bit that will be shifted out the round_bit. +// +// Round up if either +// a) Removed part > tie. +// (mantissa & round_bit) != 0 && (mantissa & (round_bit - 1)) != 0 +// b) Removed part == tie, and retained part is odd. +// (mantissa & round_bit) != 0 && (mantissa & (2 * round_bit)) != 0 +// (If removed part == tie and retained part is even, do not round up.) +// These two conditions can be combined into one: +// (mantissa & round_bit) != 0 && (mantissa & ((round_bit - 1) | (2 * round_bit))) != 0 +// which can be simplified into +// (mantissa & round_bit) != 0 && (mantissa & (3 * round_bit - 1)) != 0 + +#[inline] +pub(crate) const fn f32_to_f16_fallback(value: f32) -> u16 { + // TODO: Replace mem::transmute with to_bits() once to_bits is const-stabilized + // Convert to raw bytes + let x: u32 = unsafe { mem::transmute(value) }; + + // Extract IEEE754 components + let sign = x & 0x8000_0000u32; + let exp = x & 0x7F80_0000u32; + let man = x & 0x007F_FFFFu32; + + // Check for all exponent bits being set, which is Infinity or NaN + if exp == 0x7F80_0000u32 { + // Set mantissa MSB for NaN (and also keep shifted mantissa bits) + let nan_bit = if man == 0 { 0 } else { 0x0200u32 }; + return ((sign >> 16) | 0x7C00u32 | nan_bit | (man >> 13)) as u16; + } + + // The number is normalized, start assembling half precision version + let half_sign = sign >> 16; + // Unbias the exponent, then bias for half precision + let unbiased_exp = ((exp >> 23) as i32) - 127; + let half_exp = unbiased_exp + 15; + + // Check for exponent overflow, return +infinity + if half_exp >= 0x1F { + return (half_sign | 0x7C00u32) as u16; + } + + // Check for underflow + if half_exp <= 0 { + // Check mantissa for what we can do + if 14 - half_exp > 24 { + // No rounding possibility, so this is a full underflow, return signed zero + return half_sign as u16; + } + // Don't forget about hidden leading mantissa bit when assembling mantissa + let man = man | 0x0080_0000u32; + let mut half_man = man >> (14 - half_exp); + // Check for rounding (see comment above functions) + let round_bit = 1 << (13 - half_exp); + if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 { + half_man += 1; + } + // No exponent for subnormals + return (half_sign | half_man) as u16; + } + + // Rebias the exponent + let half_exp = (half_exp as u32) << 10; + let half_man = man >> 13; + // Check for rounding (see comment above functions) + let round_bit = 0x0000_1000u32; + if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 { + // Round it + ((half_sign | half_exp | half_man) + 1) as u16 + } else { + (half_sign | half_exp | half_man) as u16 + } +} + +#[inline] +pub(crate) const fn f64_to_f16_fallback(value: f64) -> u16 { + // Convert to raw bytes, truncating the last 32-bits of mantissa; that precision will always + // be lost on half-precision. + // TODO: Replace mem::transmute with to_bits() once to_bits is const-stabilized + let val: u64 = unsafe { mem::transmute(value) }; + let x = (val >> 32) as u32; + + // Extract IEEE754 components + let sign = x & 0x8000_0000u32; + let exp = x & 0x7FF0_0000u32; + let man = x & 0x000F_FFFFu32; + + // Check for all exponent bits being set, which is Infinity or NaN + if exp == 0x7FF0_0000u32 { + // Set mantissa MSB for NaN (and also keep shifted mantissa bits). + // We also have to check the last 32 bits. + let nan_bit = if man == 0 && (val as u32 == 0) { + 0 + } else { + 0x0200u32 + }; + return ((sign >> 16) | 0x7C00u32 | nan_bit | (man >> 10)) as u16; + } + + // The number is normalized, start assembling half precision version + let half_sign = sign >> 16; + // Unbias the exponent, then bias for half precision + let unbiased_exp = ((exp >> 20) as i64) - 1023; + let half_exp = unbiased_exp + 15; + + // Check for exponent overflow, return +infinity + if half_exp >= 0x1F { + return (half_sign | 0x7C00u32) as u16; + } + + // Check for underflow + if half_exp <= 0 { + // Check mantissa for what we can do + if 10 - half_exp > 21 { + // No rounding possibility, so this is a full underflow, return signed zero + return half_sign as u16; + } + // Don't forget about hidden leading mantissa bit when assembling mantissa + let man = man | 0x0010_0000u32; + let mut half_man = man >> (11 - half_exp); + // Check for rounding (see comment above functions) + let round_bit = 1 << (10 - half_exp); + if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 { + half_man += 1; + } + // No exponent for subnormals + return (half_sign | half_man) as u16; + } + + // Rebias the exponent + let half_exp = (half_exp as u32) << 10; + let half_man = man >> 10; + // Check for rounding (see comment above functions) + let round_bit = 0x0000_0200u32; + if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 { + // Round it + ((half_sign | half_exp | half_man) + 1) as u16 + } else { + (half_sign | half_exp | half_man) as u16 + } +} + +#[inline] +pub(crate) const fn f16_to_f32_fallback(i: u16) -> f32 { + // Check for signed zero + // TODO: Replace mem::transmute with from_bits() once from_bits is const-stabilized + if i & 0x7FFFu16 == 0 { + return unsafe { mem::transmute((i as u32) << 16) }; + } + + let half_sign = (i & 0x8000u16) as u32; + let half_exp = (i & 0x7C00u16) as u32; + let half_man = (i & 0x03FFu16) as u32; + + // Check for an infinity or NaN when all exponent bits set + if half_exp == 0x7C00u32 { + // Check for signed infinity if mantissa is zero + if half_man == 0 { + return unsafe { mem::transmute((half_sign << 16) | 0x7F80_0000u32) }; + } else { + // NaN, keep current mantissa but also set most significiant mantissa bit + return unsafe { + mem::transmute((half_sign << 16) | 0x7FC0_0000u32 | (half_man << 13)) + }; + } + } + + // Calculate single-precision components with adjusted exponent + let sign = half_sign << 16; + // Unbias exponent + let unbiased_exp = ((half_exp as i32) >> 10) - 15; + + // Check for subnormals, which will be normalized by adjusting exponent + if half_exp == 0 { + // Calculate how much to adjust the exponent by + let e = leading_zeros_u16(half_man as u16) - 6; + + // Rebias and adjust exponent + let exp = (127 - 15 - e) << 23; + let man = (half_man << (14 + e)) & 0x7F_FF_FFu32; + return unsafe { mem::transmute(sign | exp | man) }; + } + + // Rebias exponent for a normalized normal + let exp = ((unbiased_exp + 127) as u32) << 23; + let man = (half_man & 0x03FFu32) << 13; + unsafe { mem::transmute(sign | exp | man) } +} + +#[inline] +pub(crate) const fn f16_to_f64_fallback(i: u16) -> f64 { + // Check for signed zero + // TODO: Replace mem::transmute with from_bits() once from_bits is const-stabilized + if i & 0x7FFFu16 == 0 { + return unsafe { mem::transmute((i as u64) << 48) }; + } + + let half_sign = (i & 0x8000u16) as u64; + let half_exp = (i & 0x7C00u16) as u64; + let half_man = (i & 0x03FFu16) as u64; + + // Check for an infinity or NaN when all exponent bits set + if half_exp == 0x7C00u64 { + // Check for signed infinity if mantissa is zero + if half_man == 0 { + return unsafe { mem::transmute((half_sign << 48) | 0x7FF0_0000_0000_0000u64) }; + } else { + // NaN, keep current mantissa but also set most significiant mantissa bit + return unsafe { + mem::transmute((half_sign << 48) | 0x7FF8_0000_0000_0000u64 | (half_man << 42)) + }; + } + } + + // Calculate double-precision components with adjusted exponent + let sign = half_sign << 48; + // Unbias exponent + let unbiased_exp = ((half_exp as i64) >> 10) - 15; + + // Check for subnormals, which will be normalized by adjusting exponent + if half_exp == 0 { + // Calculate how much to adjust the exponent by + let e = leading_zeros_u16(half_man as u16) - 6; + + // Rebias and adjust exponent + let exp = ((1023 - 15 - e) as u64) << 52; + let man = (half_man << (43 + e)) & 0xF_FFFF_FFFF_FFFFu64; + return unsafe { mem::transmute(sign | exp | man) }; + } + + // Rebias exponent for a normalized normal + let exp = ((unbiased_exp + 1023) as u64) << 52; + let man = (half_man & 0x03FFu64) << 42; + unsafe { mem::transmute(sign | exp | man) } +} + +#[inline] +fn f16x4_to_f32x4_fallback(v: &[u16; 4]) -> [f32; 4] { + [ + f16_to_f32_fallback(v[0]), + f16_to_f32_fallback(v[1]), + f16_to_f32_fallback(v[2]), + f16_to_f32_fallback(v[3]), + ] +} + +#[inline] +fn f32x4_to_f16x4_fallback(v: &[f32; 4]) -> [u16; 4] { + [ + f32_to_f16_fallback(v[0]), + f32_to_f16_fallback(v[1]), + f32_to_f16_fallback(v[2]), + f32_to_f16_fallback(v[3]), + ] +} + +#[inline] +fn f16x4_to_f64x4_fallback(v: &[u16; 4]) -> [f64; 4] { + [ + f16_to_f64_fallback(v[0]), + f16_to_f64_fallback(v[1]), + f16_to_f64_fallback(v[2]), + f16_to_f64_fallback(v[3]), + ] +} + +#[inline] +fn f64x4_to_f16x4_fallback(v: &[f64; 4]) -> [u16; 4] { + [ + f64_to_f16_fallback(v[0]), + f64_to_f16_fallback(v[1]), + f64_to_f16_fallback(v[2]), + f64_to_f16_fallback(v[3]), + ] +} + +#[inline] +fn f16x8_to_f32x8_fallback(v: &[u16; 8]) -> [f32; 8] { + [ + f16_to_f32_fallback(v[0]), + f16_to_f32_fallback(v[1]), + f16_to_f32_fallback(v[2]), + f16_to_f32_fallback(v[3]), + f16_to_f32_fallback(v[4]), + f16_to_f32_fallback(v[5]), + f16_to_f32_fallback(v[6]), + f16_to_f32_fallback(v[7]), + ] +} + +#[inline] +fn f32x8_to_f16x8_fallback(v: &[f32; 8]) -> [u16; 8] { + [ + f32_to_f16_fallback(v[0]), + f32_to_f16_fallback(v[1]), + f32_to_f16_fallback(v[2]), + f32_to_f16_fallback(v[3]), + f32_to_f16_fallback(v[4]), + f32_to_f16_fallback(v[5]), + f32_to_f16_fallback(v[6]), + f32_to_f16_fallback(v[7]), + ] +} + +#[inline] +fn f16x8_to_f64x8_fallback(v: &[u16; 8]) -> [f64; 8] { + [ + f16_to_f64_fallback(v[0]), + f16_to_f64_fallback(v[1]), + f16_to_f64_fallback(v[2]), + f16_to_f64_fallback(v[3]), + f16_to_f64_fallback(v[4]), + f16_to_f64_fallback(v[5]), + f16_to_f64_fallback(v[6]), + f16_to_f64_fallback(v[7]), + ] +} + +#[inline] +fn f64x8_to_f16x8_fallback(v: &[f64; 8]) -> [u16; 8] { + [ + f64_to_f16_fallback(v[0]), + f64_to_f16_fallback(v[1]), + f64_to_f16_fallback(v[2]), + f64_to_f16_fallback(v[3]), + f64_to_f16_fallback(v[4]), + f64_to_f16_fallback(v[5]), + f64_to_f16_fallback(v[6]), + f64_to_f16_fallback(v[7]), + ] +} + +#[inline] +fn slice_fallback<S: Copy, D>(src: &[S], dst: &mut [D], f: fn(S) -> D) { + assert_eq!(src.len(), dst.len()); + for (s, d) in src.iter().copied().zip(dst.iter_mut()) { + *d = f(s); + } +} + +/////////////// x86/x86_64 f16c //////////////// +#[cfg(all( + feature = "use-intrinsics", + any(target_arch = "x86", target_arch = "x86_64") +))] +mod x86 { + use core::{mem::MaybeUninit, ptr}; + + #[cfg(target_arch = "x86")] + use core::arch::x86::{ + __m128, __m128i, __m256, _mm256_cvtph_ps, _mm256_cvtps_ph, _mm_cvtph_ps, + _MM_FROUND_TO_NEAREST_INT, + }; + #[cfg(target_arch = "x86_64")] + use core::arch::x86_64::{ + __m128, __m128i, __m256, _mm256_cvtph_ps, _mm256_cvtps_ph, _mm_cvtph_ps, _mm_cvtps_ph, + _MM_FROUND_TO_NEAREST_INT, + }; + + use super::convert_chunked_slice_8; + + #[target_feature(enable = "f16c")] + #[inline] + pub(super) unsafe fn f16_to_f32_x86_f16c(i: u16) -> f32 { + let mut vec = MaybeUninit::<__m128i>::zeroed(); + vec.as_mut_ptr().cast::<u16>().write(i); + let retval = _mm_cvtph_ps(vec.assume_init()); + *(&retval as *const __m128).cast() + } + + #[target_feature(enable = "f16c")] + #[inline] + pub(super) unsafe fn f32_to_f16_x86_f16c(f: f32) -> u16 { + let mut vec = MaybeUninit::<__m128>::zeroed(); + vec.as_mut_ptr().cast::<f32>().write(f); + let retval = _mm_cvtps_ph(vec.assume_init(), _MM_FROUND_TO_NEAREST_INT); + *(&retval as *const __m128i).cast() + } + + #[target_feature(enable = "f16c")] + #[inline] + pub(super) unsafe fn f16x4_to_f32x4_x86_f16c(v: &[u16; 4]) -> [f32; 4] { + let mut vec = MaybeUninit::<__m128i>::zeroed(); + ptr::copy_nonoverlapping(v.as_ptr(), vec.as_mut_ptr().cast(), 4); + let retval = _mm_cvtph_ps(vec.assume_init()); + *(&retval as *const __m128).cast() + } + + #[target_feature(enable = "f16c")] + #[inline] + pub(super) unsafe fn f32x4_to_f16x4_x86_f16c(v: &[f32; 4]) -> [u16; 4] { + let mut vec = MaybeUninit::<__m128>::uninit(); + ptr::copy_nonoverlapping(v.as_ptr(), vec.as_mut_ptr().cast(), 4); + let retval = _mm_cvtps_ph(vec.assume_init(), _MM_FROUND_TO_NEAREST_INT); + *(&retval as *const __m128i).cast() + } + + #[target_feature(enable = "f16c")] + #[inline] + pub(super) unsafe fn f16x4_to_f64x4_x86_f16c(v: &[u16; 4]) -> [f64; 4] { + let array = f16x4_to_f32x4_x86_f16c(v); + // Let compiler vectorize this regular cast for now. + // TODO: investigate auto-detecting sse2/avx convert features + [ + array[0] as f64, + array[1] as f64, + array[2] as f64, + array[3] as f64, + ] + } + + #[target_feature(enable = "f16c")] + #[inline] + pub(super) unsafe fn f64x4_to_f16x4_x86_f16c(v: &[f64; 4]) -> [u16; 4] { + // Let compiler vectorize this regular cast for now. + // TODO: investigate auto-detecting sse2/avx convert features + let v = [v[0] as f32, v[1] as f32, v[2] as f32, v[3] as f32]; + f32x4_to_f16x4_x86_f16c(&v) + } + + #[target_feature(enable = "f16c")] + #[inline] + pub(super) unsafe fn f16x8_to_f32x8_x86_f16c(v: &[u16; 8]) -> [f32; 8] { + let mut vec = MaybeUninit::<__m128i>::zeroed(); + ptr::copy_nonoverlapping(v.as_ptr(), vec.as_mut_ptr().cast(), 8); + let retval = _mm256_cvtph_ps(vec.assume_init()); + *(&retval as *const __m256).cast() + } + + #[target_feature(enable = "f16c")] + #[inline] + pub(super) unsafe fn f32x8_to_f16x8_x86_f16c(v: &[f32; 8]) -> [u16; 8] { + let mut vec = MaybeUninit::<__m256>::uninit(); + ptr::copy_nonoverlapping(v.as_ptr(), vec.as_mut_ptr().cast(), 8); + let retval = _mm256_cvtps_ph(vec.assume_init(), _MM_FROUND_TO_NEAREST_INT); + *(&retval as *const __m128i).cast() + } + + #[target_feature(enable = "f16c")] + #[inline] + pub(super) unsafe fn f16x8_to_f64x8_x86_f16c(v: &[u16; 8]) -> [f64; 8] { + let array = f16x8_to_f32x8_x86_f16c(v); + // Let compiler vectorize this regular cast for now. + // TODO: investigate auto-detecting sse2/avx convert features + [ + array[0] as f64, + array[1] as f64, + array[2] as f64, + array[3] as f64, + array[4] as f64, + array[5] as f64, + array[6] as f64, + array[7] as f64, + ] + } + + #[target_feature(enable = "f16c")] + #[inline] + pub(super) unsafe fn f64x8_to_f16x8_x86_f16c(v: &[f64; 8]) -> [u16; 8] { + // Let compiler vectorize this regular cast for now. + // TODO: investigate auto-detecting sse2/avx convert features + let v = [ + v[0] as f32, + v[1] as f32, + v[2] as f32, + v[3] as f32, + v[4] as f32, + v[5] as f32, + v[6] as f32, + v[7] as f32, + ]; + f32x8_to_f16x8_x86_f16c(&v) + } +} diff --git a/vendor/half/src/leading_zeros.rs b/vendor/half/src/leading_zeros.rs new file mode 100644 index 0000000..6c73148 --- /dev/null +++ b/vendor/half/src/leading_zeros.rs @@ -0,0 +1,62 @@ +// https://doc.rust-lang.org/std/primitive.u16.html#method.leading_zeros + +#[cfg(not(any(all( + target_arch = "spirv", + not(all( + target_feature = "IntegerFunctions2INTEL", + target_feature = "SPV_INTEL_shader_integer_functions2" + )) +))))] +pub(crate) const fn leading_zeros_u16(x: u16) -> u32 { + x.leading_zeros() +} + +#[cfg(all( + target_arch = "spirv", + not(all( + target_feature = "IntegerFunctions2INTEL", + target_feature = "SPV_INTEL_shader_integer_functions2" + )) +))] +pub(crate) const fn leading_zeros_u16(x: u16) -> u32 { + leading_zeros_u16_fallback(x) +} + +#[cfg(any( + test, + all( + target_arch = "spirv", + not(all( + target_feature = "IntegerFunctions2INTEL", + target_feature = "SPV_INTEL_shader_integer_functions2" + )) + ) +))] +const fn leading_zeros_u16_fallback(mut x: u16) -> u32 { + use crunchy::unroll; + let mut c = 0; + let msb = 1 << 15; + unroll! { for i in 0 .. 16 { + if x & msb == 0 { + c += 1; + } else { + return c; + } + #[allow(unused_assignments)] + if i < 15 { + x <<= 1; + } + }} + c +} + +#[cfg(test)] +mod test { + + #[test] + fn leading_zeros_u16_fallback() { + for x in [44, 97, 304, 1179, 23571] { + assert_eq!(super::leading_zeros_u16_fallback(x), x.leading_zeros()); + } + } +} diff --git a/vendor/half/src/lib.rs b/vendor/half/src/lib.rs new file mode 100644 index 0000000..f821945 --- /dev/null +++ b/vendor/half/src/lib.rs @@ -0,0 +1,233 @@ +//! A crate that provides support for half-precision 16-bit floating point types. +//! +//! This crate provides the [`f16`] type, which is an implementation of the IEEE 754-2008 standard +//! [`binary16`] a.k.a `half` floating point type. This 16-bit floating point type is intended for +//! efficient storage where the full range and precision of a larger floating point value is not +//! required. This is especially useful for image storage formats. +//! +//! This crate also provides a [`bf16`] type, an alternative 16-bit floating point format. The +//! [`bfloat16`] format is a truncated IEEE 754 standard `binary32` float that preserves the +//! exponent to allow the same range as [`f32`] but with only 8 bits of precision (instead of 11 +//! bits for [`f16`]). See the [`bf16`] type for details. +//! +//! Because [`f16`] and [`bf16`] are primarily for efficient storage, floating point operations such +//! as addition, multiplication, etc. are not implemented by hardware. While this crate does provide +//! the appropriate trait implementations for basic operations, they each convert the value to +//! [`f32`] before performing the operation and then back afterward. When performing complex +//! arithmetic, manually convert to and from [`f32`] before and after to reduce repeated conversions +//! for each operation. +//! +//! This crate also provides a [`slice`][mod@slice] module for zero-copy in-place conversions of +//! [`u16`] slices to both [`f16`] and [`bf16`], as well as efficient vectorized conversions of +//! larger buffers of floating point values to and from these half formats. +//! +//! The crate uses `#[no_std]` by default, so can be used in embedded environments without using the +//! Rust [`std`] library. A `std` feature to enable support for the standard library is available, +//! see the [Cargo Features](#cargo-features) section below. +//! +//! A [`prelude`] module is provided for easy importing of available utility traits. +//! +//! # Serialization +//! +//! When the `serde` feature is enabled, [`f16`] and [`bf16`] will be serialized as a newtype of +//! [`u16`] by default. In binary formats this is ideal, as it will generally use just two bytes for +//! storage. For string formats like JSON, however, this isn't as useful, and due to design +//! limitations of serde, it's not possible for the default `Serialize` implementation to support +//! different serialization for different formats. +//! +//! Instead, it's up to the containter type of the floats to control how it is serialized. This can +//! easily be controlled when using the derive macros using `#[serde(serialize_with="")]` +//! attributes. For both [`f16`] and [`bf16`] a `serialize_as_f32` and `serialize_as_string` are +//! provided for use with this attribute. +//! +//! Deserialization of both float types supports deserializing from the default serialization, +//! strings, and `f32`/`f64` values, so no additional work is required. +//! +//! # Cargo Features +//! +//! This crate supports a number of optional cargo features. None of these features are enabled by +//! default, even `std`. +//! +//! - **`use-intrinsics`** -- Use [`core::arch`] hardware intrinsics for `f16` and `bf16` conversions +//! if available on the compiler target. This feature currently only works on nightly Rust +//! until the corresponding intrinsics are stabilized. +//! +//! When this feature is enabled and the hardware supports it, the functions and traits in the +//! [`slice`][mod@slice] module will use vectorized SIMD intructions for increased efficiency. +//! +//! By default, without this feature, conversions are done only in software, which will also be +//! the fallback if the target does not have hardware support. Note that without the `std` +//! feature enabled, no runtime CPU feature detection is used, so the hardware support is only +//! compiled if the compiler target supports the CPU feature. +//! +//! - **`alloc`** -- Enable use of the [`alloc`] crate when not using the `std` library. +//! +//! Among other functions, this enables the [`vec`] module, which contains zero-copy +//! conversions for the [`Vec`] type. This allows fast conversion between raw `Vec<u16>` bits and +//! `Vec<f16>` or `Vec<bf16>` arrays, and vice versa. +//! +//! - **`std`** -- Enable features that depend on the Rust [`std`] library. This also enables the +//! `alloc` feature automatically. +//! +//! Enabling the `std` feature also enables runtime CPU feature detection when the +//! `use-intrsincis` feature is also enabled. Without this feature detection, intrinsics are only +//! used when compiler target supports the target feature. +//! +//! - **`serde`** -- Adds support for the [`serde`] crate by implementing [`Serialize`] and +//! [`Deserialize`] traits for both [`f16`] and [`bf16`]. +//! +//! - **`num-traits`** -- Adds support for the [`num-traits`] crate by implementing [`ToPrimitive`], +//! [`FromPrimitive`], [`AsPrimitive`], [`Num`], [`Float`], [`FloatCore`], and [`Bounded`] traits +//! for both [`f16`] and [`bf16`]. +//! +//! - **`bytemuck`** -- Adds support for the [`bytemuck`] crate by implementing [`Zeroable`] and +//! [`Pod`] traits for both [`f16`] and [`bf16`]. +//! +//! - **`zerocopy`** -- Adds support for the [`zerocopy`] crate by implementing [`AsBytes`] and +//! [`FromBytes`] traits for both [`f16`] and [`bf16`]. +//! +//! [`alloc`]: https://doc.rust-lang.org/alloc/ +//! [`std`]: https://doc.rust-lang.org/std/ +//! [`binary16`]: https://en.wikipedia.org/wiki/Half-precision_floating-point_format +//! [`bfloat16`]: https://en.wikipedia.org/wiki/Bfloat16_floating-point_format +//! [`serde`]: https://crates.io/crates/serde +//! [`bytemuck`]: https://crates.io/crates/bytemuck +//! [`num-traits`]: https://crates.io/crates/num-traits +//! [`zerocopy`]: https://crates.io/crates/zerocopy +#![cfg_attr( + feature = "alloc", + doc = " +[`vec`]: mod@vec" +)] +#![cfg_attr( + not(feature = "alloc"), + doc = " +[`vec`]: # +[`Vec`]: https://docs.rust-lang.org/stable/alloc/vec/struct.Vec.html" +)] +#![cfg_attr( + feature = "serde", + doc = " +[`Serialize`]: serde::Serialize +[`Deserialize`]: serde::Deserialize" +)] +#![cfg_attr( + not(feature = "serde"), + doc = " +[`Serialize`]: https://docs.rs/serde/*/serde/trait.Serialize.html +[`Deserialize`]: https://docs.rs/serde/*/serde/trait.Deserialize.html" +)] +#![cfg_attr( + feature = "num-traits", + doc = " +[`ToPrimitive`]: ::num_traits::ToPrimitive +[`FromPrimitive`]: ::num_traits::FromPrimitive +[`AsPrimitive`]: ::num_traits::AsPrimitive +[`Num`]: ::num_traits::Num +[`Float`]: ::num_traits::Float +[`FloatCore`]: ::num_traits::float::FloatCore +[`Bounded`]: ::num_traits::Bounded" +)] +#![cfg_attr( + not(feature = "num-traits"), + doc = " +[`ToPrimitive`]: https://docs.rs/num-traits/*/num_traits/cast/trait.ToPrimitive.html +[`FromPrimitive`]: https://docs.rs/num-traits/*/num_traits/cast/trait.FromPrimitive.html +[`AsPrimitive`]: https://docs.rs/num-traits/*/num_traits/cast/trait.AsPrimitive.html +[`Num`]: https://docs.rs/num-traits/*/num_traits/trait.Num.html +[`Float`]: https://docs.rs/num-traits/*/num_traits/float/trait.Float.html +[`FloatCore`]: https://docs.rs/num-traits/*/num_traits/float/trait.FloatCore.html +[`Bounded`]: https://docs.rs/num-traits/*/num_traits/bounds/trait.Bounded.html" +)] +#![cfg_attr( + feature = "bytemuck", + doc = " +[`Zeroable`]: bytemuck::Zeroable +[`Pod`]: bytemuck::Pod" +)] +#![cfg_attr( + not(feature = "bytemuck"), + doc = " +[`Zeroable`]: https://docs.rs/bytemuck/*/bytemuck/trait.Zeroable.html +[`Pod`]: https://docs.rs/bytemuck/*bytemuck/trait.Pod.html" +)] +#![cfg_attr( + feature = "zerocopy", + doc = " +[`AsBytes`]: zerocopy::AsBytes +[`FromBytes`]: zerocopy::FromBytes" +)] +#![cfg_attr( + not(feature = "zerocopy"), + doc = " +[`AsBytes`]: https://docs.rs/zerocopy/*/zerocopy/trait.AsBytes.html +[`FromBytes`]: https://docs.rs/zerocopy/*/zerocopy/trait.FromBytes.html" +)] +#![warn( + missing_docs, + missing_copy_implementations, + trivial_numeric_casts, + future_incompatible +)] +#![cfg_attr(not(target_arch = "spirv"), warn(missing_debug_implementations))] +#![allow(clippy::verbose_bit_mask, clippy::cast_lossless)] +#![cfg_attr(not(feature = "std"), no_std)] +#![cfg_attr( + all( + feature = "use-intrinsics", + any(target_arch = "x86", target_arch = "x86_64") + ), + feature(stdsimd, f16c_target_feature) +)] +#![doc(html_root_url = "https://docs.rs/half/2.2.1")] +#![doc(test(attr(deny(warnings), allow(unused))))] +#![cfg_attr(docsrs, feature(doc_cfg))] + +#[cfg(feature = "alloc")] +extern crate alloc; + +mod bfloat; +mod binary16; +mod leading_zeros; +#[cfg(feature = "num-traits")] +mod num_traits; + +#[cfg(not(target_arch = "spirv"))] +pub mod slice; +#[cfg(feature = "alloc")] +#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] +pub mod vec; + +pub use bfloat::bf16; +pub use binary16::f16; + +/// A collection of the most used items and traits in this crate for easy importing. +/// +/// # Examples +/// +/// ```rust +/// use half::prelude::*; +/// ``` +pub mod prelude { + #[doc(no_inline)] + pub use crate::{bf16, f16}; + + #[cfg(not(target_arch = "spirv"))] + #[doc(no_inline)] + pub use crate::slice::{HalfBitsSliceExt, HalfFloatSliceExt}; + + #[cfg(feature = "alloc")] + #[doc(no_inline)] + #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] + pub use crate::vec::{HalfBitsVecExt, HalfFloatVecExt}; +} + +// Keep this module private to crate +mod private { + use crate::{bf16, f16}; + + pub trait SealedHalf {} + + impl SealedHalf for f16 {} + impl SealedHalf for bf16 {} +} diff --git a/vendor/half/src/num_traits.rs b/vendor/half/src/num_traits.rs new file mode 100644 index 0000000..4318699 --- /dev/null +++ b/vendor/half/src/num_traits.rs @@ -0,0 +1,1483 @@ +use crate::{bf16, f16}; +use core::cmp::Ordering; +use core::{num::FpCategory, ops::Div}; +use num_traits::{ + AsPrimitive, Bounded, FloatConst, FromPrimitive, Num, NumCast, One, ToPrimitive, Zero, +}; + +impl ToPrimitive for f16 { + #[inline] + fn to_i64(&self) -> Option<i64> { + Self::to_f32(*self).to_i64() + } + #[inline] + fn to_u64(&self) -> Option<u64> { + Self::to_f32(*self).to_u64() + } + #[inline] + fn to_i8(&self) -> Option<i8> { + Self::to_f32(*self).to_i8() + } + #[inline] + fn to_u8(&self) -> Option<u8> { + Self::to_f32(*self).to_u8() + } + #[inline] + fn to_i16(&self) -> Option<i16> { + Self::to_f32(*self).to_i16() + } + #[inline] + fn to_u16(&self) -> Option<u16> { + Self::to_f32(*self).to_u16() + } + #[inline] + fn to_i32(&self) -> Option<i32> { + Self::to_f32(*self).to_i32() + } + #[inline] + fn to_u32(&self) -> Option<u32> { + Self::to_f32(*self).to_u32() + } + #[inline] + fn to_f32(&self) -> Option<f32> { + Some(Self::to_f32(*self)) + } + #[inline] + fn to_f64(&self) -> Option<f64> { + Some(Self::to_f64(*self)) + } +} + +impl FromPrimitive for f16 { + #[inline] + fn from_i64(n: i64) -> Option<Self> { + n.to_f32().map(Self::from_f32) + } + #[inline] + fn from_u64(n: u64) -> Option<Self> { + n.to_f32().map(Self::from_f32) + } + #[inline] + fn from_i8(n: i8) -> Option<Self> { + n.to_f32().map(Self::from_f32) + } + #[inline] + fn from_u8(n: u8) -> Option<Self> { + n.to_f32().map(Self::from_f32) + } + #[inline] + fn from_i16(n: i16) -> Option<Self> { + n.to_f32().map(Self::from_f32) + } + #[inline] + fn from_u16(n: u16) -> Option<Self> { + n.to_f32().map(Self::from_f32) + } + #[inline] + fn from_i32(n: i32) -> Option<Self> { + n.to_f32().map(Self::from_f32) + } + #[inline] + fn from_u32(n: u32) -> Option<Self> { + n.to_f32().map(Self::from_f32) + } + #[inline] + fn from_f32(n: f32) -> Option<Self> { + n.to_f32().map(Self::from_f32) + } + #[inline] + fn from_f64(n: f64) -> Option<Self> { + n.to_f64().map(Self::from_f64) + } +} + +impl Num for f16 { + type FromStrRadixErr = <f32 as Num>::FromStrRadixErr; + + #[inline] + fn from_str_radix(str: &str, radix: u32) -> Result<Self, Self::FromStrRadixErr> { + Ok(Self::from_f32(f32::from_str_radix(str, radix)?)) + } +} + +impl One for f16 { + #[inline] + fn one() -> Self { + Self::ONE + } +} + +impl Zero for f16 { + #[inline] + fn zero() -> Self { + Self::ZERO + } + + #[inline] + fn is_zero(&self) -> bool { + *self == Self::ZERO + } +} + +impl NumCast for f16 { + #[inline] + fn from<T: ToPrimitive>(n: T) -> Option<Self> { + n.to_f32().map(Self::from_f32) + } +} + +impl num_traits::float::FloatCore for f16 { + #[inline] + fn infinity() -> Self { + Self::INFINITY + } + + #[inline] + fn neg_infinity() -> Self { + Self::NEG_INFINITY + } + + #[inline] + fn nan() -> Self { + Self::NAN + } + + #[inline] + fn neg_zero() -> Self { + Self::NEG_ZERO + } + + #[inline] + fn min_value() -> Self { + Self::MIN + } + + #[inline] + fn min_positive_value() -> Self { + Self::MIN_POSITIVE + } + + #[inline] + fn epsilon() -> Self { + Self::EPSILON + } + + #[inline] + fn max_value() -> Self { + Self::MAX + } + + #[inline] + fn is_nan(self) -> bool { + self.is_nan() + } + + #[inline] + fn is_infinite(self) -> bool { + self.is_infinite() + } + + #[inline] + fn is_finite(self) -> bool { + self.is_finite() + } + + #[inline] + fn is_normal(self) -> bool { + self.is_normal() + } + + #[inline] + fn classify(self) -> FpCategory { + self.classify() + } + + #[inline] + fn floor(self) -> Self { + Self::from_f32(self.to_f32().floor()) + } + + #[inline] + fn ceil(self) -> Self { + Self::from_f32(self.to_f32().ceil()) + } + + #[inline] + fn round(self) -> Self { + Self::from_f32(self.to_f32().round()) + } + + #[inline] + fn trunc(self) -> Self { + Self::from_f32(self.to_f32().trunc()) + } + + #[inline] + fn fract(self) -> Self { + Self::from_f32(self.to_f32().fract()) + } + + #[inline] + fn abs(self) -> Self { + Self::from_bits(self.to_bits() & 0x7FFF) + } + + #[inline] + fn signum(self) -> Self { + self.signum() + } + + #[inline] + fn is_sign_positive(self) -> bool { + self.is_sign_positive() + } + + #[inline] + fn is_sign_negative(self) -> bool { + self.is_sign_negative() + } + + fn min(self, other: Self) -> Self { + match self.partial_cmp(&other) { + None => { + if self.is_nan() { + other + } else { + self + } + } + Some(Ordering::Greater) | Some(Ordering::Equal) => other, + Some(Ordering::Less) => self, + } + } + + fn max(self, other: Self) -> Self { + match self.partial_cmp(&other) { + None => { + if self.is_nan() { + other + } else { + self + } + } + Some(Ordering::Greater) | Some(Ordering::Equal) => self, + Some(Ordering::Less) => other, + } + } + + #[inline] + fn recip(self) -> Self { + Self::from_f32(self.to_f32().recip()) + } + + #[inline] + fn powi(self, exp: i32) -> Self { + Self::from_f32(self.to_f32().powi(exp)) + } + + #[inline] + fn to_degrees(self) -> Self { + Self::from_f32(self.to_f32().to_degrees()) + } + + #[inline] + fn to_radians(self) -> Self { + Self::from_f32(self.to_f32().to_radians()) + } + + #[inline] + fn integer_decode(self) -> (u64, i16, i8) { + num_traits::float::FloatCore::integer_decode(self.to_f32()) + } +} + +impl num_traits::float::Float for f16 { + #[inline] + fn nan() -> Self { + Self::NAN + } + + #[inline] + fn infinity() -> Self { + Self::INFINITY + } + + #[inline] + fn neg_infinity() -> Self { + Self::NEG_INFINITY + } + + #[inline] + fn neg_zero() -> Self { + Self::NEG_ZERO + } + + #[inline] + fn min_value() -> Self { + Self::MIN + } + + #[inline] + fn min_positive_value() -> Self { + Self::MIN_POSITIVE + } + + #[inline] + fn epsilon() -> Self { + Self::EPSILON + } + + #[inline] + fn max_value() -> Self { + Self::MAX + } + + #[inline] + fn is_nan(self) -> bool { + self.is_nan() + } + + #[inline] + fn is_infinite(self) -> bool { + self.is_infinite() + } + + #[inline] + fn is_finite(self) -> bool { + self.is_finite() + } + + #[inline] + fn is_normal(self) -> bool { + self.is_normal() + } + + #[inline] + fn classify(self) -> FpCategory { + self.classify() + } + + #[inline] + fn floor(self) -> Self { + Self::from_f32(self.to_f32().floor()) + } + + #[inline] + fn ceil(self) -> Self { + Self::from_f32(self.to_f32().ceil()) + } + + #[inline] + fn round(self) -> Self { + Self::from_f32(self.to_f32().round()) + } + + #[inline] + fn trunc(self) -> Self { + Self::from_f32(self.to_f32().trunc()) + } + + #[inline] + fn fract(self) -> Self { + Self::from_f32(self.to_f32().fract()) + } + + #[inline] + fn abs(self) -> Self { + Self::from_f32(self.to_f32().abs()) + } + + #[inline] + fn signum(self) -> Self { + Self::from_f32(self.to_f32().signum()) + } + + #[inline] + fn is_sign_positive(self) -> bool { + self.is_sign_positive() + } + + #[inline] + fn is_sign_negative(self) -> bool { + self.is_sign_negative() + } + + #[inline] + fn mul_add(self, a: Self, b: Self) -> Self { + Self::from_f32(self.to_f32().mul_add(a.to_f32(), b.to_f32())) + } + + #[inline] + fn recip(self) -> Self { + Self::from_f32(self.to_f32().recip()) + } + + #[inline] + fn powi(self, n: i32) -> Self { + Self::from_f32(self.to_f32().powi(n)) + } + + #[inline] + fn powf(self, n: Self) -> Self { + Self::from_f32(self.to_f32().powf(n.to_f32())) + } + + #[inline] + fn sqrt(self) -> Self { + Self::from_f32(self.to_f32().sqrt()) + } + + #[inline] + fn exp(self) -> Self { + Self::from_f32(self.to_f32().exp()) + } + + #[inline] + fn exp2(self) -> Self { + Self::from_f32(self.to_f32().exp2()) + } + + #[inline] + fn ln(self) -> Self { + Self::from_f32(self.to_f32().ln()) + } + + #[inline] + fn log(self, base: Self) -> Self { + Self::from_f32(self.to_f32().log(base.to_f32())) + } + + #[inline] + fn log2(self) -> Self { + Self::from_f32(self.to_f32().log2()) + } + + #[inline] + fn log10(self) -> Self { + Self::from_f32(self.to_f32().log10()) + } + + #[inline] + fn to_degrees(self) -> Self { + Self::from_f32(self.to_f32().to_degrees()) + } + + #[inline] + fn to_radians(self) -> Self { + Self::from_f32(self.to_f32().to_radians()) + } + + #[inline] + fn max(self, other: Self) -> Self { + self.max(other) + } + + #[inline] + fn min(self, other: Self) -> Self { + self.min(other) + } + + #[inline] + fn abs_sub(self, other: Self) -> Self { + Self::from_f32((self.to_f32() - other.to_f32()).max(0.0)) + } + + #[inline] + fn cbrt(self) -> Self { + Self::from_f32(self.to_f32().cbrt()) + } + + #[inline] + fn hypot(self, other: Self) -> Self { + Self::from_f32(self.to_f32().hypot(other.to_f32())) + } + + #[inline] + fn sin(self) -> Self { + Self::from_f32(self.to_f32().sin()) + } + + #[inline] + fn cos(self) -> Self { + Self::from_f32(self.to_f32().cos()) + } + + #[inline] + fn tan(self) -> Self { + Self::from_f32(self.to_f32().tan()) + } + + #[inline] + fn asin(self) -> Self { + Self::from_f32(self.to_f32().asin()) + } + + #[inline] + fn acos(self) -> Self { + Self::from_f32(self.to_f32().acos()) + } + + #[inline] + fn atan(self) -> Self { + Self::from_f32(self.to_f32().atan()) + } + + #[inline] + fn atan2(self, other: Self) -> Self { + Self::from_f32(self.to_f32().atan2(other.to_f32())) + } + + #[inline] + fn sin_cos(self) -> (Self, Self) { + let (sin, cos) = self.to_f32().sin_cos(); + (Self::from_f32(sin), Self::from_f32(cos)) + } + + #[inline] + fn exp_m1(self) -> Self { + Self::from_f32(self.to_f32().exp_m1()) + } + + #[inline] + fn ln_1p(self) -> Self { + Self::from_f32(self.to_f32().ln_1p()) + } + + #[inline] + fn sinh(self) -> Self { + Self::from_f32(self.to_f32().sinh()) + } + + #[inline] + fn cosh(self) -> Self { + Self::from_f32(self.to_f32().cosh()) + } + + #[inline] + fn tanh(self) -> Self { + Self::from_f32(self.to_f32().tanh()) + } + + #[inline] + fn asinh(self) -> Self { + Self::from_f32(self.to_f32().asinh()) + } + + #[inline] + fn acosh(self) -> Self { + Self::from_f32(self.to_f32().acosh()) + } + + #[inline] + fn atanh(self) -> Self { + Self::from_f32(self.to_f32().atanh()) + } + + #[inline] + fn integer_decode(self) -> (u64, i16, i8) { + num_traits::float::Float::integer_decode(self.to_f32()) + } +} + +impl FloatConst for f16 { + #[inline] + fn E() -> Self { + Self::E + } + + #[inline] + fn FRAC_1_PI() -> Self { + Self::FRAC_1_PI + } + + #[inline] + fn FRAC_1_SQRT_2() -> Self { + Self::FRAC_1_SQRT_2 + } + + #[inline] + fn FRAC_2_PI() -> Self { + Self::FRAC_2_PI + } + + #[inline] + fn FRAC_2_SQRT_PI() -> Self { + Self::FRAC_2_SQRT_PI + } + + #[inline] + fn FRAC_PI_2() -> Self { + Self::FRAC_PI_2 + } + + #[inline] + fn FRAC_PI_3() -> Self { + Self::FRAC_PI_3 + } + + #[inline] + fn FRAC_PI_4() -> Self { + Self::FRAC_PI_4 + } + + #[inline] + fn FRAC_PI_6() -> Self { + Self::FRAC_PI_6 + } + + #[inline] + fn FRAC_PI_8() -> Self { + Self::FRAC_PI_8 + } + + #[inline] + fn LN_10() -> Self { + Self::LN_10 + } + + #[inline] + fn LN_2() -> Self { + Self::LN_2 + } + + #[inline] + fn LOG10_E() -> Self { + Self::LOG10_E + } + + #[inline] + fn LOG2_E() -> Self { + Self::LOG2_E + } + + #[inline] + fn PI() -> Self { + Self::PI + } + + fn SQRT_2() -> Self { + Self::SQRT_2 + } + + #[inline] + fn LOG10_2() -> Self + where + Self: Sized + Div<Self, Output = Self>, + { + Self::LOG10_2 + } + + #[inline] + fn LOG2_10() -> Self + where + Self: Sized + Div<Self, Output = Self>, + { + Self::LOG2_10 + } +} + +impl Bounded for f16 { + #[inline] + fn min_value() -> Self { + f16::MIN + } + + #[inline] + fn max_value() -> Self { + f16::MAX + } +} + +macro_rules! impl_as_primitive_to_f16 { + ($ty:ty, $meth:ident) => { + impl AsPrimitive<$ty> for f16 { + #[inline] + fn as_(self) -> $ty { + self.$meth().as_() + } + } + }; +} + +impl AsPrimitive<f16> for f16 { + #[inline] + fn as_(self) -> f16 { + self + } +} + +impl_as_primitive_to_f16!(i64, to_f32); +impl_as_primitive_to_f16!(u64, to_f32); +impl_as_primitive_to_f16!(i8, to_f32); +impl_as_primitive_to_f16!(u8, to_f32); +impl_as_primitive_to_f16!(i16, to_f32); +impl_as_primitive_to_f16!(u16, to_f32); +impl_as_primitive_to_f16!(i32, to_f32); +impl_as_primitive_to_f16!(u32, to_f32); +impl_as_primitive_to_f16!(isize, to_f32); +impl_as_primitive_to_f16!(usize, to_f32); +impl_as_primitive_to_f16!(f32, to_f32); +impl_as_primitive_to_f16!(f64, to_f64); + +macro_rules! impl_as_primitive_f16_from { + ($ty:ty, $meth:ident) => { + impl AsPrimitive<f16> for $ty { + #[inline] + fn as_(self) -> f16 { + f16::$meth(self.as_()) + } + } + }; +} + +impl_as_primitive_f16_from!(i64, from_f32); +impl_as_primitive_f16_from!(u64, from_f32); +impl_as_primitive_f16_from!(i8, from_f32); +impl_as_primitive_f16_from!(u8, from_f32); +impl_as_primitive_f16_from!(i16, from_f32); +impl_as_primitive_f16_from!(u16, from_f32); +impl_as_primitive_f16_from!(i32, from_f32); +impl_as_primitive_f16_from!(u32, from_f32); +impl_as_primitive_f16_from!(isize, from_f32); +impl_as_primitive_f16_from!(usize, from_f32); +impl_as_primitive_f16_from!(f32, from_f32); +impl_as_primitive_f16_from!(f64, from_f64); + +impl ToPrimitive for bf16 { + #[inline] + fn to_i64(&self) -> Option<i64> { + Self::to_f32(*self).to_i64() + } + #[inline] + fn to_u64(&self) -> Option<u64> { + Self::to_f32(*self).to_u64() + } + #[inline] + fn to_i8(&self) -> Option<i8> { + Self::to_f32(*self).to_i8() + } + #[inline] + fn to_u8(&self) -> Option<u8> { + Self::to_f32(*self).to_u8() + } + #[inline] + fn to_i16(&self) -> Option<i16> { + Self::to_f32(*self).to_i16() + } + #[inline] + fn to_u16(&self) -> Option<u16> { + Self::to_f32(*self).to_u16() + } + #[inline] + fn to_i32(&self) -> Option<i32> { + Self::to_f32(*self).to_i32() + } + #[inline] + fn to_u32(&self) -> Option<u32> { + Self::to_f32(*self).to_u32() + } + #[inline] + fn to_f32(&self) -> Option<f32> { + Some(Self::to_f32(*self)) + } + #[inline] + fn to_f64(&self) -> Option<f64> { + Some(Self::to_f64(*self)) + } +} + +impl FromPrimitive for bf16 { + #[inline] + fn from_i64(n: i64) -> Option<Self> { + n.to_f32().map(Self::from_f32) + } + #[inline] + fn from_u64(n: u64) -> Option<Self> { + n.to_f32().map(Self::from_f32) + } + #[inline] + fn from_i8(n: i8) -> Option<Self> { + n.to_f32().map(Self::from_f32) + } + #[inline] + fn from_u8(n: u8) -> Option<Self> { + n.to_f32().map(Self::from_f32) + } + #[inline] + fn from_i16(n: i16) -> Option<Self> { + n.to_f32().map(Self::from_f32) + } + #[inline] + fn from_u16(n: u16) -> Option<Self> { + n.to_f32().map(Self::from_f32) + } + #[inline] + fn from_i32(n: i32) -> Option<Self> { + n.to_f32().map(Self::from_f32) + } + #[inline] + fn from_u32(n: u32) -> Option<Self> { + n.to_f32().map(Self::from_f32) + } + #[inline] + fn from_f32(n: f32) -> Option<Self> { + n.to_f32().map(Self::from_f32) + } + #[inline] + fn from_f64(n: f64) -> Option<Self> { + n.to_f64().map(Self::from_f64) + } +} + +impl Num for bf16 { + type FromStrRadixErr = <f32 as Num>::FromStrRadixErr; + + #[inline] + fn from_str_radix(str: &str, radix: u32) -> Result<Self, Self::FromStrRadixErr> { + Ok(Self::from_f32(f32::from_str_radix(str, radix)?)) + } +} + +impl One for bf16 { + #[inline] + fn one() -> Self { + Self::ONE + } +} + +impl Zero for bf16 { + #[inline] + fn zero() -> Self { + Self::ZERO + } + + #[inline] + fn is_zero(&self) -> bool { + *self == Self::ZERO + } +} + +impl NumCast for bf16 { + #[inline] + fn from<T: ToPrimitive>(n: T) -> Option<Self> { + n.to_f32().map(Self::from_f32) + } +} + +impl num_traits::float::FloatCore for bf16 { + #[inline] + fn infinity() -> Self { + Self::INFINITY + } + + #[inline] + fn neg_infinity() -> Self { + Self::NEG_INFINITY + } + + #[inline] + fn nan() -> Self { + Self::NAN + } + + #[inline] + fn neg_zero() -> Self { + Self::NEG_ZERO + } + + #[inline] + fn min_value() -> Self { + Self::MIN + } + + #[inline] + fn min_positive_value() -> Self { + Self::MIN_POSITIVE + } + + #[inline] + fn epsilon() -> Self { + Self::EPSILON + } + + #[inline] + fn max_value() -> Self { + Self::MAX + } + + #[inline] + fn is_nan(self) -> bool { + self.is_nan() + } + + #[inline] + fn is_infinite(self) -> bool { + self.is_infinite() + } + + #[inline] + fn is_finite(self) -> bool { + self.is_finite() + } + + #[inline] + fn is_normal(self) -> bool { + self.is_normal() + } + + #[inline] + fn classify(self) -> FpCategory { + self.classify() + } + + #[inline] + fn floor(self) -> Self { + Self::from_f32(self.to_f32().floor()) + } + + #[inline] + fn ceil(self) -> Self { + Self::from_f32(self.to_f32().ceil()) + } + + #[inline] + fn round(self) -> Self { + Self::from_f32(self.to_f32().round()) + } + + #[inline] + fn trunc(self) -> Self { + Self::from_f32(self.to_f32().trunc()) + } + + #[inline] + fn fract(self) -> Self { + Self::from_f32(self.to_f32().fract()) + } + + #[inline] + fn abs(self) -> Self { + Self::from_bits(self.to_bits() & 0x7FFF) + } + + #[inline] + fn signum(self) -> Self { + self.signum() + } + + #[inline] + fn is_sign_positive(self) -> bool { + self.is_sign_positive() + } + + #[inline] + fn is_sign_negative(self) -> bool { + self.is_sign_negative() + } + + fn min(self, other: Self) -> Self { + match self.partial_cmp(&other) { + None => { + if self.is_nan() { + other + } else { + self + } + } + Some(Ordering::Greater) | Some(Ordering::Equal) => other, + Some(Ordering::Less) => self, + } + } + + fn max(self, other: Self) -> Self { + match self.partial_cmp(&other) { + None => { + if self.is_nan() { + other + } else { + self + } + } + Some(Ordering::Greater) | Some(Ordering::Equal) => self, + Some(Ordering::Less) => other, + } + } + + #[inline] + fn recip(self) -> Self { + Self::from_f32(self.to_f32().recip()) + } + + #[inline] + fn powi(self, exp: i32) -> Self { + Self::from_f32(self.to_f32().powi(exp)) + } + + #[inline] + fn to_degrees(self) -> Self { + Self::from_f32(self.to_f32().to_degrees()) + } + + #[inline] + fn to_radians(self) -> Self { + Self::from_f32(self.to_f32().to_radians()) + } + + #[inline] + fn integer_decode(self) -> (u64, i16, i8) { + num_traits::float::FloatCore::integer_decode(self.to_f32()) + } +} + +impl num_traits::float::Float for bf16 { + #[inline] + fn nan() -> Self { + Self::NAN + } + + #[inline] + fn infinity() -> Self { + Self::INFINITY + } + + #[inline] + fn neg_infinity() -> Self { + Self::NEG_INFINITY + } + + #[inline] + fn neg_zero() -> Self { + Self::NEG_ZERO + } + + #[inline] + fn min_value() -> Self { + Self::MIN + } + + #[inline] + fn min_positive_value() -> Self { + Self::MIN_POSITIVE + } + + #[inline] + fn epsilon() -> Self { + Self::EPSILON + } + + #[inline] + fn max_value() -> Self { + Self::MAX + } + + #[inline] + fn is_nan(self) -> bool { + self.is_nan() + } + + #[inline] + fn is_infinite(self) -> bool { + self.is_infinite() + } + + #[inline] + fn is_finite(self) -> bool { + self.is_finite() + } + + #[inline] + fn is_normal(self) -> bool { + self.is_normal() + } + + #[inline] + fn classify(self) -> FpCategory { + self.classify() + } + + #[inline] + fn floor(self) -> Self { + Self::from_f32(self.to_f32().floor()) + } + + #[inline] + fn ceil(self) -> Self { + Self::from_f32(self.to_f32().ceil()) + } + + #[inline] + fn round(self) -> Self { + Self::from_f32(self.to_f32().round()) + } + + #[inline] + fn trunc(self) -> Self { + Self::from_f32(self.to_f32().trunc()) + } + + #[inline] + fn fract(self) -> Self { + Self::from_f32(self.to_f32().fract()) + } + + #[inline] + fn abs(self) -> Self { + Self::from_f32(self.to_f32().abs()) + } + + #[inline] + fn signum(self) -> Self { + Self::from_f32(self.to_f32().signum()) + } + + #[inline] + fn is_sign_positive(self) -> bool { + self.is_sign_positive() + } + + #[inline] + fn is_sign_negative(self) -> bool { + self.is_sign_negative() + } + + #[inline] + fn mul_add(self, a: Self, b: Self) -> Self { + Self::from_f32(self.to_f32().mul_add(a.to_f32(), b.to_f32())) + } + + #[inline] + fn recip(self) -> Self { + Self::from_f32(self.to_f32().recip()) + } + + #[inline] + fn powi(self, n: i32) -> Self { + Self::from_f32(self.to_f32().powi(n)) + } + + #[inline] + fn powf(self, n: Self) -> Self { + Self::from_f32(self.to_f32().powf(n.to_f32())) + } + + #[inline] + fn sqrt(self) -> Self { + Self::from_f32(self.to_f32().sqrt()) + } + + #[inline] + fn exp(self) -> Self { + Self::from_f32(self.to_f32().exp()) + } + + #[inline] + fn exp2(self) -> Self { + Self::from_f32(self.to_f32().exp2()) + } + + #[inline] + fn ln(self) -> Self { + Self::from_f32(self.to_f32().ln()) + } + + #[inline] + fn log(self, base: Self) -> Self { + Self::from_f32(self.to_f32().log(base.to_f32())) + } + + #[inline] + fn log2(self) -> Self { + Self::from_f32(self.to_f32().log2()) + } + + #[inline] + fn log10(self) -> Self { + Self::from_f32(self.to_f32().log10()) + } + + #[inline] + fn to_degrees(self) -> Self { + Self::from_f32(self.to_f32().to_degrees()) + } + + #[inline] + fn to_radians(self) -> Self { + Self::from_f32(self.to_f32().to_radians()) + } + + #[inline] + fn max(self, other: Self) -> Self { + self.max(other) + } + + #[inline] + fn min(self, other: Self) -> Self { + self.min(other) + } + + #[inline] + fn abs_sub(self, other: Self) -> Self { + Self::from_f32((self.to_f32() - other.to_f32()).max(0.0)) + } + + #[inline] + fn cbrt(self) -> Self { + Self::from_f32(self.to_f32().cbrt()) + } + + #[inline] + fn hypot(self, other: Self) -> Self { + Self::from_f32(self.to_f32().hypot(other.to_f32())) + } + + #[inline] + fn sin(self) -> Self { + Self::from_f32(self.to_f32().sin()) + } + + #[inline] + fn cos(self) -> Self { + Self::from_f32(self.to_f32().cos()) + } + + #[inline] + fn tan(self) -> Self { + Self::from_f32(self.to_f32().tan()) + } + + #[inline] + fn asin(self) -> Self { + Self::from_f32(self.to_f32().asin()) + } + + #[inline] + fn acos(self) -> Self { + Self::from_f32(self.to_f32().acos()) + } + + #[inline] + fn atan(self) -> Self { + Self::from_f32(self.to_f32().atan()) + } + + #[inline] + fn atan2(self, other: Self) -> Self { + Self::from_f32(self.to_f32().atan2(other.to_f32())) + } + + #[inline] + fn sin_cos(self) -> (Self, Self) { + let (sin, cos) = self.to_f32().sin_cos(); + (Self::from_f32(sin), Self::from_f32(cos)) + } + + #[inline] + fn exp_m1(self) -> Self { + Self::from_f32(self.to_f32().exp_m1()) + } + + #[inline] + fn ln_1p(self) -> Self { + Self::from_f32(self.to_f32().ln_1p()) + } + + #[inline] + fn sinh(self) -> Self { + Self::from_f32(self.to_f32().sinh()) + } + + #[inline] + fn cosh(self) -> Self { + Self::from_f32(self.to_f32().cosh()) + } + + #[inline] + fn tanh(self) -> Self { + Self::from_f32(self.to_f32().tanh()) + } + + #[inline] + fn asinh(self) -> Self { + Self::from_f32(self.to_f32().asinh()) + } + + #[inline] + fn acosh(self) -> Self { + Self::from_f32(self.to_f32().acosh()) + } + + #[inline] + fn atanh(self) -> Self { + Self::from_f32(self.to_f32().atanh()) + } + + #[inline] + fn integer_decode(self) -> (u64, i16, i8) { + num_traits::float::Float::integer_decode(self.to_f32()) + } +} + +impl FloatConst for bf16 { + #[inline] + fn E() -> Self { + Self::E + } + + #[inline] + fn FRAC_1_PI() -> Self { + Self::FRAC_1_PI + } + + #[inline] + fn FRAC_1_SQRT_2() -> Self { + Self::FRAC_1_SQRT_2 + } + + #[inline] + fn FRAC_2_PI() -> Self { + Self::FRAC_2_PI + } + + #[inline] + fn FRAC_2_SQRT_PI() -> Self { + Self::FRAC_2_SQRT_PI + } + + #[inline] + fn FRAC_PI_2() -> Self { + Self::FRAC_PI_2 + } + + #[inline] + fn FRAC_PI_3() -> Self { + Self::FRAC_PI_3 + } + + #[inline] + fn FRAC_PI_4() -> Self { + Self::FRAC_PI_4 + } + + #[inline] + fn FRAC_PI_6() -> Self { + Self::FRAC_PI_6 + } + + #[inline] + fn FRAC_PI_8() -> Self { + Self::FRAC_PI_8 + } + + #[inline] + fn LN_10() -> Self { + Self::LN_10 + } + + #[inline] + fn LN_2() -> Self { + Self::LN_2 + } + + #[inline] + fn LOG10_E() -> Self { + Self::LOG10_E + } + + #[inline] + fn LOG2_E() -> Self { + Self::LOG2_E + } + + #[inline] + fn PI() -> Self { + Self::PI + } + + #[inline] + fn SQRT_2() -> Self { + Self::SQRT_2 + } + + #[inline] + fn LOG10_2() -> Self + where + Self: Sized + Div<Self, Output = Self>, + { + Self::LOG10_2 + } + + #[inline] + fn LOG2_10() -> Self + where + Self: Sized + Div<Self, Output = Self>, + { + Self::LOG2_10 + } +} + +impl Bounded for bf16 { + #[inline] + fn min_value() -> Self { + bf16::MIN + } + + #[inline] + fn max_value() -> Self { + bf16::MAX + } +} + +impl AsPrimitive<bf16> for bf16 { + #[inline] + fn as_(self) -> bf16 { + self + } +} + +macro_rules! impl_as_primitive_to_bf16 { + ($ty:ty, $meth:ident) => { + impl AsPrimitive<$ty> for bf16 { + #[inline] + fn as_(self) -> $ty { + self.$meth().as_() + } + } + }; +} + +impl_as_primitive_to_bf16!(i64, to_f32); +impl_as_primitive_to_bf16!(u64, to_f32); +impl_as_primitive_to_bf16!(i8, to_f32); +impl_as_primitive_to_bf16!(u8, to_f32); +impl_as_primitive_to_bf16!(i16, to_f32); +impl_as_primitive_to_bf16!(u16, to_f32); +impl_as_primitive_to_bf16!(i32, to_f32); +impl_as_primitive_to_bf16!(u32, to_f32); +impl_as_primitive_to_bf16!(isize, to_f32); +impl_as_primitive_to_bf16!(usize, to_f32); +impl_as_primitive_to_bf16!(f32, to_f32); +impl_as_primitive_to_bf16!(f64, to_f64); + +macro_rules! impl_as_primitive_bf16_from { + ($ty:ty, $meth:ident) => { + impl AsPrimitive<bf16> for $ty { + #[inline] + fn as_(self) -> bf16 { + bf16::$meth(self.as_()) + } + } + }; +} + +impl_as_primitive_bf16_from!(i64, from_f32); +impl_as_primitive_bf16_from!(u64, from_f32); +impl_as_primitive_bf16_from!(i8, from_f32); +impl_as_primitive_bf16_from!(u8, from_f32); +impl_as_primitive_bf16_from!(i16, from_f32); +impl_as_primitive_bf16_from!(u16, from_f32); +impl_as_primitive_bf16_from!(i32, from_f32); +impl_as_primitive_bf16_from!(u32, from_f32); +impl_as_primitive_bf16_from!(isize, from_f32); +impl_as_primitive_bf16_from!(usize, from_f32); +impl_as_primitive_bf16_from!(f32, from_f32); +impl_as_primitive_bf16_from!(f64, from_f64); diff --git a/vendor/half/src/slice.rs b/vendor/half/src/slice.rs new file mode 100644 index 0000000..f1e9feb --- /dev/null +++ b/vendor/half/src/slice.rs @@ -0,0 +1,854 @@ +//! Contains utility functions and traits to convert between slices of [`u16`] bits and [`f16`] or +//! [`bf16`] numbers. +//! +//! The utility [`HalfBitsSliceExt`] sealed extension trait is implemented for `[u16]` slices, +//! while the utility [`HalfFloatSliceExt`] sealed extension trait is implemented for both `[f16]` +//! and `[bf16]` slices. These traits provide efficient conversions and reinterpret casting of +//! larger buffers of floating point values, and are automatically included in the +//! [`prelude`][crate::prelude] module. + +use crate::{bf16, binary16::convert, f16}; +#[cfg(feature = "alloc")] +use alloc::vec::Vec; +use core::slice; + +/// Extensions to `[f16]` and `[bf16]` slices to support conversion and reinterpret operations. +/// +/// This trait is sealed and cannot be implemented outside of this crate. +pub trait HalfFloatSliceExt: private::SealedHalfFloatSlice { + /// Reinterprets a slice of [`f16`] or [`bf16`] numbers as a slice of [`u16`] bits. + /// + /// This is a zero-copy operation. The reinterpreted slice has the same lifetime and memory + /// location as `self`. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// let float_buffer = [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]; + /// let int_buffer = float_buffer.reinterpret_cast(); + /// + /// assert_eq!(int_buffer, [float_buffer[0].to_bits(), float_buffer[1].to_bits(), float_buffer[2].to_bits()]); + /// ``` + #[must_use] + fn reinterpret_cast(&self) -> &[u16]; + + /// Reinterprets a mutable slice of [`f16`] or [`bf16`] numbers as a mutable slice of [`u16`]. + /// bits + /// + /// This is a zero-copy operation. The transmuted slice has the same lifetime as the original, + /// which prevents mutating `self` as long as the returned `&mut [u16]` is borrowed. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// let mut float_buffer = [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]; + /// + /// { + /// let int_buffer = float_buffer.reinterpret_cast_mut(); + /// + /// assert_eq!(int_buffer, [f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]); + /// + /// // Mutating the u16 slice will mutating the original + /// int_buffer[0] = 0; + /// } + /// + /// // Note that we need to drop int_buffer before using float_buffer again or we will get a borrow error. + /// assert_eq!(float_buffer, [f16::from_f32(0.), f16::from_f32(2.), f16::from_f32(3.)]); + /// ``` + #[must_use] + fn reinterpret_cast_mut(&mut self) -> &mut [u16]; + + /// Converts all of the elements of a `[f32]` slice into [`f16`] or [`bf16`] values in `self`. + /// + /// The length of `src` must be the same as `self`. + /// + /// The conversion operation is vectorized over the slice, meaning the conversion may be more + /// efficient than converting individual elements on some hardware that supports SIMD + /// conversions. See [crate documentation](crate) for more information on hardware conversion + /// support. + /// + /// # Panics + /// + /// This function will panic if the two slices have different lengths. + /// + /// # Examples + /// ```rust + /// # use half::prelude::*; + /// // Initialize an empty buffer + /// let mut buffer = [0u16; 4]; + /// let buffer = buffer.reinterpret_cast_mut::<f16>(); + /// + /// let float_values = [1., 2., 3., 4.]; + /// + /// // Now convert + /// buffer.convert_from_f32_slice(&float_values); + /// + /// assert_eq!(buffer, [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.), f16::from_f32(4.)]); + /// ``` + fn convert_from_f32_slice(&mut self, src: &[f32]); + + /// Converts all of the elements of a `[f64]` slice into [`f16`] or [`bf16`] values in `self`. + /// + /// The length of `src` must be the same as `self`. + /// + /// The conversion operation is vectorized over the slice, meaning the conversion may be more + /// efficient than converting individual elements on some hardware that supports SIMD + /// conversions. See [crate documentation](crate) for more information on hardware conversion + /// support. + /// + /// # Panics + /// + /// This function will panic if the two slices have different lengths. + /// + /// # Examples + /// ```rust + /// # use half::prelude::*; + /// // Initialize an empty buffer + /// let mut buffer = [0u16; 4]; + /// let buffer = buffer.reinterpret_cast_mut::<f16>(); + /// + /// let float_values = [1., 2., 3., 4.]; + /// + /// // Now convert + /// buffer.convert_from_f64_slice(&float_values); + /// + /// assert_eq!(buffer, [f16::from_f64(1.), f16::from_f64(2.), f16::from_f64(3.), f16::from_f64(4.)]); + /// ``` + fn convert_from_f64_slice(&mut self, src: &[f64]); + + /// Converts all of the [`f16`] or [`bf16`] elements of `self` into [`f32`] values in `dst`. + /// + /// The length of `src` must be the same as `self`. + /// + /// The conversion operation is vectorized over the slice, meaning the conversion may be more + /// efficient than converting individual elements on some hardware that supports SIMD + /// conversions. See [crate documentation](crate) for more information on hardware conversion + /// support. + /// + /// # Panics + /// + /// This function will panic if the two slices have different lengths. + /// + /// # Examples + /// ```rust + /// # use half::prelude::*; + /// // Initialize an empty buffer + /// let mut buffer = [0f32; 4]; + /// + /// let half_values = [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.), f16::from_f32(4.)]; + /// + /// // Now convert + /// half_values.convert_to_f32_slice(&mut buffer); + /// + /// assert_eq!(buffer, [1., 2., 3., 4.]); + /// ``` + fn convert_to_f32_slice(&self, dst: &mut [f32]); + + /// Converts all of the [`f16`] or [`bf16`] elements of `self` into [`f64`] values in `dst`. + /// + /// The length of `src` must be the same as `self`. + /// + /// The conversion operation is vectorized over the slice, meaning the conversion may be more + /// efficient than converting individual elements on some hardware that supports SIMD + /// conversions. See [crate documentation](crate) for more information on hardware conversion + /// support. + /// + /// # Panics + /// + /// This function will panic if the two slices have different lengths. + /// + /// # Examples + /// ```rust + /// # use half::prelude::*; + /// // Initialize an empty buffer + /// let mut buffer = [0f64; 4]; + /// + /// let half_values = [f16::from_f64(1.), f16::from_f64(2.), f16::from_f64(3.), f16::from_f64(4.)]; + /// + /// // Now convert + /// half_values.convert_to_f64_slice(&mut buffer); + /// + /// assert_eq!(buffer, [1., 2., 3., 4.]); + /// ``` + fn convert_to_f64_slice(&self, dst: &mut [f64]); + + // Because trait is sealed, we can get away with different interfaces between features. + + /// Converts all of the [`f16`] or [`bf16`] elements of `self` into [`f32`] values in a new + /// vector + /// + /// The conversion operation is vectorized over the slice, meaning the conversion may be more + /// efficient than converting individual elements on some hardware that supports SIMD + /// conversions. See [crate documentation](crate) for more information on hardware conversion + /// support. + /// + /// This method is only available with the `std` or `alloc` feature. + /// + /// # Examples + /// ```rust + /// # use half::prelude::*; + /// let half_values = [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.), f16::from_f32(4.)]; + /// let vec = half_values.to_f32_vec(); + /// + /// assert_eq!(vec, vec![1., 2., 3., 4.]); + /// ``` + #[cfg(any(feature = "alloc", feature = "std"))] + #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] + #[must_use] + fn to_f32_vec(&self) -> Vec<f32>; + + /// Converts all of the [`f16`] or [`bf16`] elements of `self` into [`f64`] values in a new + /// vector. + /// + /// The conversion operation is vectorized over the slice, meaning the conversion may be more + /// efficient than converting individual elements on some hardware that supports SIMD + /// conversions. See [crate documentation](crate) for more information on hardware conversion + /// support. + /// + /// This method is only available with the `std` or `alloc` feature. + /// + /// # Examples + /// ```rust + /// # use half::prelude::*; + /// let half_values = [f16::from_f64(1.), f16::from_f64(2.), f16::from_f64(3.), f16::from_f64(4.)]; + /// let vec = half_values.to_f64_vec(); + /// + /// assert_eq!(vec, vec![1., 2., 3., 4.]); + /// ``` + #[cfg(feature = "alloc")] + #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] + #[must_use] + fn to_f64_vec(&self) -> Vec<f64>; +} + +/// Extensions to `[u16]` slices to support reinterpret operations. +/// +/// This trait is sealed and cannot be implemented outside of this crate. +pub trait HalfBitsSliceExt: private::SealedHalfBitsSlice { + /// Reinterprets a slice of [`u16`] bits as a slice of [`f16`] or [`bf16`] numbers. + /// + /// `H` is the type to cast to, and must be either the [`f16`] or [`bf16`] type. + /// + /// This is a zero-copy operation. The reinterpreted slice has the same lifetime and memory + /// location as `self`. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// let int_buffer = [f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]; + /// let float_buffer: &[f16] = int_buffer.reinterpret_cast(); + /// + /// assert_eq!(float_buffer, [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]); + /// + /// // You may have to specify the cast type directly if the compiler can't infer the type. + /// // The following is also valid in Rust. + /// let typed_buffer = int_buffer.reinterpret_cast::<f16>(); + /// ``` + #[must_use] + fn reinterpret_cast<H>(&self) -> &[H] + where + H: crate::private::SealedHalf; + + /// Reinterprets a mutable slice of [`u16`] bits as a mutable slice of [`f16`] or [`bf16`] + /// numbers. + /// + /// `H` is the type to cast to, and must be either the [`f16`] or [`bf16`] type. + /// + /// This is a zero-copy operation. The transmuted slice has the same lifetime as the original, + /// which prevents mutating `self` as long as the returned `&mut [f16]` is borrowed. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// let mut int_buffer = [f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]; + /// + /// { + /// let float_buffer: &mut [f16] = int_buffer.reinterpret_cast_mut(); + /// + /// assert_eq!(float_buffer, [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]); + /// + /// // Mutating the f16 slice will mutating the original + /// float_buffer[0] = f16::from_f32(0.); + /// } + /// + /// // Note that we need to drop float_buffer before using int_buffer again or we will get a borrow error. + /// assert_eq!(int_buffer, [f16::from_f32(0.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]); + /// + /// // You may have to specify the cast type directly if the compiler can't infer the type. + /// // The following is also valid in Rust. + /// let typed_buffer = int_buffer.reinterpret_cast_mut::<f16>(); + /// ``` + #[must_use] + fn reinterpret_cast_mut<H>(&mut self) -> &mut [H] + where + H: crate::private::SealedHalf; +} + +mod private { + use crate::{bf16, f16}; + + pub trait SealedHalfFloatSlice {} + impl SealedHalfFloatSlice for [f16] {} + impl SealedHalfFloatSlice for [bf16] {} + + pub trait SealedHalfBitsSlice {} + impl SealedHalfBitsSlice for [u16] {} +} + +impl HalfFloatSliceExt for [f16] { + #[inline] + fn reinterpret_cast(&self) -> &[u16] { + let pointer = self.as_ptr() as *const u16; + let length = self.len(); + // SAFETY: We are reconstructing full length of original slice, using its same lifetime, + // and the size of elements are identical + unsafe { slice::from_raw_parts(pointer, length) } + } + + #[inline] + fn reinterpret_cast_mut(&mut self) -> &mut [u16] { + let pointer = self.as_mut_ptr().cast::<u16>(); + let length = self.len(); + // SAFETY: We are reconstructing full length of original slice, using its same lifetime, + // and the size of elements are identical + unsafe { slice::from_raw_parts_mut(pointer, length) } + } + + fn convert_from_f32_slice(&mut self, src: &[f32]) { + assert_eq!( + self.len(), + src.len(), + "destination and source slices have different lengths" + ); + + convert::f32_to_f16_slice(src, self.reinterpret_cast_mut()) + } + + fn convert_from_f64_slice(&mut self, src: &[f64]) { + assert_eq!( + self.len(), + src.len(), + "destination and source slices have different lengths" + ); + + convert::f64_to_f16_slice(src, self.reinterpret_cast_mut()) + } + + fn convert_to_f32_slice(&self, dst: &mut [f32]) { + assert_eq!( + self.len(), + dst.len(), + "destination and source slices have different lengths" + ); + + convert::f16_to_f32_slice(self.reinterpret_cast(), dst) + } + + fn convert_to_f64_slice(&self, dst: &mut [f64]) { + assert_eq!( + self.len(), + dst.len(), + "destination and source slices have different lengths" + ); + + convert::f16_to_f64_slice(self.reinterpret_cast(), dst) + } + + #[cfg(any(feature = "alloc", feature = "std"))] + #[inline] + #[allow(clippy::uninit_vec)] + fn to_f32_vec(&self) -> Vec<f32> { + let mut vec = Vec::with_capacity(self.len()); + // SAFETY: convert will initialize every value in the vector without reading them, + // so this is safe to do instead of double initialize from resize, and we're setting it to + // same value as capacity. + unsafe { vec.set_len(self.len()) }; + self.convert_to_f32_slice(&mut vec); + vec + } + + #[cfg(any(feature = "alloc", feature = "std"))] + #[inline] + #[allow(clippy::uninit_vec)] + fn to_f64_vec(&self) -> Vec<f64> { + let mut vec = Vec::with_capacity(self.len()); + // SAFETY: convert will initialize every value in the vector without reading them, + // so this is safe to do instead of double initialize from resize, and we're setting it to + // same value as capacity. + unsafe { vec.set_len(self.len()) }; + self.convert_to_f64_slice(&mut vec); + vec + } +} + +impl HalfFloatSliceExt for [bf16] { + #[inline] + fn reinterpret_cast(&self) -> &[u16] { + let pointer = self.as_ptr() as *const u16; + let length = self.len(); + // SAFETY: We are reconstructing full length of original slice, using its same lifetime, + // and the size of elements are identical + unsafe { slice::from_raw_parts(pointer, length) } + } + + #[inline] + fn reinterpret_cast_mut(&mut self) -> &mut [u16] { + let pointer = self.as_mut_ptr().cast::<u16>(); + let length = self.len(); + // SAFETY: We are reconstructing full length of original slice, using its same lifetime, + // and the size of elements are identical + unsafe { slice::from_raw_parts_mut(pointer, length) } + } + + fn convert_from_f32_slice(&mut self, src: &[f32]) { + assert_eq!( + self.len(), + src.len(), + "destination and source slices have different lengths" + ); + + // Just use regular loop here until there's any bf16 SIMD support. + for (i, f) in src.iter().enumerate() { + self[i] = bf16::from_f32(*f); + } + } + + fn convert_from_f64_slice(&mut self, src: &[f64]) { + assert_eq!( + self.len(), + src.len(), + "destination and source slices have different lengths" + ); + + // Just use regular loop here until there's any bf16 SIMD support. + for (i, f) in src.iter().enumerate() { + self[i] = bf16::from_f64(*f); + } + } + + fn convert_to_f32_slice(&self, dst: &mut [f32]) { + assert_eq!( + self.len(), + dst.len(), + "destination and source slices have different lengths" + ); + + // Just use regular loop here until there's any bf16 SIMD support. + for (i, f) in self.iter().enumerate() { + dst[i] = f.to_f32(); + } + } + + fn convert_to_f64_slice(&self, dst: &mut [f64]) { + assert_eq!( + self.len(), + dst.len(), + "destination and source slices have different lengths" + ); + + // Just use regular loop here until there's any bf16 SIMD support. + for (i, f) in self.iter().enumerate() { + dst[i] = f.to_f64(); + } + } + + #[cfg(any(feature = "alloc", feature = "std"))] + #[inline] + #[allow(clippy::uninit_vec)] + fn to_f32_vec(&self) -> Vec<f32> { + let mut vec = Vec::with_capacity(self.len()); + // SAFETY: convert will initialize every value in the vector without reading them, + // so this is safe to do instead of double initialize from resize, and we're setting it to + // same value as capacity. + unsafe { vec.set_len(self.len()) }; + self.convert_to_f32_slice(&mut vec); + vec + } + + #[cfg(any(feature = "alloc", feature = "std"))] + #[inline] + #[allow(clippy::uninit_vec)] + fn to_f64_vec(&self) -> Vec<f64> { + let mut vec = Vec::with_capacity(self.len()); + // SAFETY: convert will initialize every value in the vector without reading them, + // so this is safe to do instead of double initialize from resize, and we're setting it to + // same value as capacity. + unsafe { vec.set_len(self.len()) }; + self.convert_to_f64_slice(&mut vec); + vec + } +} + +impl HalfBitsSliceExt for [u16] { + // Since we sealed all the traits involved, these are safe. + #[inline] + fn reinterpret_cast<H>(&self) -> &[H] + where + H: crate::private::SealedHalf, + { + let pointer = self.as_ptr() as *const H; + let length = self.len(); + // SAFETY: We are reconstructing full length of original slice, using its same lifetime, + // and the size of elements are identical + unsafe { slice::from_raw_parts(pointer, length) } + } + + #[inline] + fn reinterpret_cast_mut<H>(&mut self) -> &mut [H] + where + H: crate::private::SealedHalf, + { + let pointer = self.as_mut_ptr() as *mut H; + let length = self.len(); + // SAFETY: We are reconstructing full length of original slice, using its same lifetime, + // and the size of elements are identical + unsafe { slice::from_raw_parts_mut(pointer, length) } + } +} + +#[allow(clippy::float_cmp)] +#[cfg(test)] +mod test { + use super::{HalfBitsSliceExt, HalfFloatSliceExt}; + use crate::{bf16, f16}; + + #[test] + fn test_slice_conversions_f16() { + let bits = &[ + f16::E.to_bits(), + f16::PI.to_bits(), + f16::EPSILON.to_bits(), + f16::FRAC_1_SQRT_2.to_bits(), + ]; + let numbers = &[f16::E, f16::PI, f16::EPSILON, f16::FRAC_1_SQRT_2]; + + // Convert from bits to numbers + let from_bits = bits.reinterpret_cast::<f16>(); + assert_eq!(from_bits, numbers); + + // Convert from numbers back to bits + let to_bits = from_bits.reinterpret_cast(); + assert_eq!(to_bits, bits); + } + + #[test] + fn test_mutablility_f16() { + let mut bits_array = [f16::PI.to_bits()]; + let bits = &mut bits_array[..]; + + { + // would not compile without these braces + let numbers = bits.reinterpret_cast_mut(); + numbers[0] = f16::E; + } + + assert_eq!(bits, &[f16::E.to_bits()]); + + bits[0] = f16::LN_2.to_bits(); + assert_eq!(bits, &[f16::LN_2.to_bits()]); + } + + #[test] + fn test_slice_conversions_bf16() { + let bits = &[ + bf16::E.to_bits(), + bf16::PI.to_bits(), + bf16::EPSILON.to_bits(), + bf16::FRAC_1_SQRT_2.to_bits(), + ]; + let numbers = &[bf16::E, bf16::PI, bf16::EPSILON, bf16::FRAC_1_SQRT_2]; + + // Convert from bits to numbers + let from_bits = bits.reinterpret_cast::<bf16>(); + assert_eq!(from_bits, numbers); + + // Convert from numbers back to bits + let to_bits = from_bits.reinterpret_cast(); + assert_eq!(to_bits, bits); + } + + #[test] + fn test_mutablility_bf16() { + let mut bits_array = [bf16::PI.to_bits()]; + let bits = &mut bits_array[..]; + + { + // would not compile without these braces + let numbers = bits.reinterpret_cast_mut(); + numbers[0] = bf16::E; + } + + assert_eq!(bits, &[bf16::E.to_bits()]); + + bits[0] = bf16::LN_2.to_bits(); + assert_eq!(bits, &[bf16::LN_2.to_bits()]); + } + + #[test] + fn slice_convert_f16_f32() { + // Exact chunks + let vf32 = [1., 2., 3., 4., 5., 6., 7., 8.]; + let vf16 = [ + f16::from_f32(1.), + f16::from_f32(2.), + f16::from_f32(3.), + f16::from_f32(4.), + f16::from_f32(5.), + f16::from_f32(6.), + f16::from_f32(7.), + f16::from_f32(8.), + ]; + let mut buf32 = vf32; + let mut buf16 = vf16; + + vf16.convert_to_f32_slice(&mut buf32); + assert_eq!(&vf32, &buf32); + + buf16.convert_from_f32_slice(&vf32); + assert_eq!(&vf16, &buf16); + + // Partial with chunks + let vf32 = [1., 2., 3., 4., 5., 6., 7., 8., 9.]; + let vf16 = [ + f16::from_f32(1.), + f16::from_f32(2.), + f16::from_f32(3.), + f16::from_f32(4.), + f16::from_f32(5.), + f16::from_f32(6.), + f16::from_f32(7.), + f16::from_f32(8.), + f16::from_f32(9.), + ]; + let mut buf32 = vf32; + let mut buf16 = vf16; + + vf16.convert_to_f32_slice(&mut buf32); + assert_eq!(&vf32, &buf32); + + buf16.convert_from_f32_slice(&vf32); + assert_eq!(&vf16, &buf16); + + // Partial with chunks + let vf32 = [1., 2.]; + let vf16 = [f16::from_f32(1.), f16::from_f32(2.)]; + let mut buf32 = vf32; + let mut buf16 = vf16; + + vf16.convert_to_f32_slice(&mut buf32); + assert_eq!(&vf32, &buf32); + + buf16.convert_from_f32_slice(&vf32); + assert_eq!(&vf16, &buf16); + } + + #[test] + fn slice_convert_bf16_f32() { + // Exact chunks + let vf32 = [1., 2., 3., 4., 5., 6., 7., 8.]; + let vf16 = [ + bf16::from_f32(1.), + bf16::from_f32(2.), + bf16::from_f32(3.), + bf16::from_f32(4.), + bf16::from_f32(5.), + bf16::from_f32(6.), + bf16::from_f32(7.), + bf16::from_f32(8.), + ]; + let mut buf32 = vf32; + let mut buf16 = vf16; + + vf16.convert_to_f32_slice(&mut buf32); + assert_eq!(&vf32, &buf32); + + buf16.convert_from_f32_slice(&vf32); + assert_eq!(&vf16, &buf16); + + // Partial with chunks + let vf32 = [1., 2., 3., 4., 5., 6., 7., 8., 9.]; + let vf16 = [ + bf16::from_f32(1.), + bf16::from_f32(2.), + bf16::from_f32(3.), + bf16::from_f32(4.), + bf16::from_f32(5.), + bf16::from_f32(6.), + bf16::from_f32(7.), + bf16::from_f32(8.), + bf16::from_f32(9.), + ]; + let mut buf32 = vf32; + let mut buf16 = vf16; + + vf16.convert_to_f32_slice(&mut buf32); + assert_eq!(&vf32, &buf32); + + buf16.convert_from_f32_slice(&vf32); + assert_eq!(&vf16, &buf16); + + // Partial with chunks + let vf32 = [1., 2.]; + let vf16 = [bf16::from_f32(1.), bf16::from_f32(2.)]; + let mut buf32 = vf32; + let mut buf16 = vf16; + + vf16.convert_to_f32_slice(&mut buf32); + assert_eq!(&vf32, &buf32); + + buf16.convert_from_f32_slice(&vf32); + assert_eq!(&vf16, &buf16); + } + + #[test] + fn slice_convert_f16_f64() { + // Exact chunks + let vf64 = [1., 2., 3., 4., 5., 6., 7., 8.]; + let vf16 = [ + f16::from_f64(1.), + f16::from_f64(2.), + f16::from_f64(3.), + f16::from_f64(4.), + f16::from_f64(5.), + f16::from_f64(6.), + f16::from_f64(7.), + f16::from_f64(8.), + ]; + let mut buf64 = vf64; + let mut buf16 = vf16; + + vf16.convert_to_f64_slice(&mut buf64); + assert_eq!(&vf64, &buf64); + + buf16.convert_from_f64_slice(&vf64); + assert_eq!(&vf16, &buf16); + + // Partial with chunks + let vf64 = [1., 2., 3., 4., 5., 6., 7., 8., 9.]; + let vf16 = [ + f16::from_f64(1.), + f16::from_f64(2.), + f16::from_f64(3.), + f16::from_f64(4.), + f16::from_f64(5.), + f16::from_f64(6.), + f16::from_f64(7.), + f16::from_f64(8.), + f16::from_f64(9.), + ]; + let mut buf64 = vf64; + let mut buf16 = vf16; + + vf16.convert_to_f64_slice(&mut buf64); + assert_eq!(&vf64, &buf64); + + buf16.convert_from_f64_slice(&vf64); + assert_eq!(&vf16, &buf16); + + // Partial with chunks + let vf64 = [1., 2.]; + let vf16 = [f16::from_f64(1.), f16::from_f64(2.)]; + let mut buf64 = vf64; + let mut buf16 = vf16; + + vf16.convert_to_f64_slice(&mut buf64); + assert_eq!(&vf64, &buf64); + + buf16.convert_from_f64_slice(&vf64); + assert_eq!(&vf16, &buf16); + } + + #[test] + fn slice_convert_bf16_f64() { + // Exact chunks + let vf64 = [1., 2., 3., 4., 5., 6., 7., 8.]; + let vf16 = [ + bf16::from_f64(1.), + bf16::from_f64(2.), + bf16::from_f64(3.), + bf16::from_f64(4.), + bf16::from_f64(5.), + bf16::from_f64(6.), + bf16::from_f64(7.), + bf16::from_f64(8.), + ]; + let mut buf64 = vf64; + let mut buf16 = vf16; + + vf16.convert_to_f64_slice(&mut buf64); + assert_eq!(&vf64, &buf64); + + buf16.convert_from_f64_slice(&vf64); + assert_eq!(&vf16, &buf16); + + // Partial with chunks + let vf64 = [1., 2., 3., 4., 5., 6., 7., 8., 9.]; + let vf16 = [ + bf16::from_f64(1.), + bf16::from_f64(2.), + bf16::from_f64(3.), + bf16::from_f64(4.), + bf16::from_f64(5.), + bf16::from_f64(6.), + bf16::from_f64(7.), + bf16::from_f64(8.), + bf16::from_f64(9.), + ]; + let mut buf64 = vf64; + let mut buf16 = vf16; + + vf16.convert_to_f64_slice(&mut buf64); + assert_eq!(&vf64, &buf64); + + buf16.convert_from_f64_slice(&vf64); + assert_eq!(&vf16, &buf16); + + // Partial with chunks + let vf64 = [1., 2.]; + let vf16 = [bf16::from_f64(1.), bf16::from_f64(2.)]; + let mut buf64 = vf64; + let mut buf16 = vf16; + + vf16.convert_to_f64_slice(&mut buf64); + assert_eq!(&vf64, &buf64); + + buf16.convert_from_f64_slice(&vf64); + assert_eq!(&vf16, &buf16); + } + + #[test] + #[should_panic] + fn convert_from_f32_slice_len_mismatch_panics() { + let mut slice1 = [f16::ZERO; 3]; + let slice2 = [0f32; 4]; + slice1.convert_from_f32_slice(&slice2); + } + + #[test] + #[should_panic] + fn convert_from_f64_slice_len_mismatch_panics() { + let mut slice1 = [f16::ZERO; 3]; + let slice2 = [0f64; 4]; + slice1.convert_from_f64_slice(&slice2); + } + + #[test] + #[should_panic] + fn convert_to_f32_slice_len_mismatch_panics() { + let slice1 = [f16::ZERO; 3]; + let mut slice2 = [0f32; 4]; + slice1.convert_to_f32_slice(&mut slice2); + } + + #[test] + #[should_panic] + fn convert_to_f64_slice_len_mismatch_panics() { + let slice1 = [f16::ZERO; 3]; + let mut slice2 = [0f64; 4]; + slice1.convert_to_f64_slice(&mut slice2); + } +} diff --git a/vendor/half/src/vec.rs b/vendor/half/src/vec.rs new file mode 100644 index 0000000..27ad3e7 --- /dev/null +++ b/vendor/half/src/vec.rs @@ -0,0 +1,274 @@ +//! Contains utility functions and traits to convert between vectors of [`u16`] bits and [`f16`] or +//! [`bf16`] vectors. +//! +//! The utility [`HalfBitsVecExt`] sealed extension trait is implemented for [`Vec<u16>`] vectors, +//! while the utility [`HalfFloatVecExt`] sealed extension trait is implemented for both +//! [`Vec<f16>`] and [`Vec<bf16>`] vectors. These traits provide efficient conversions and +//! reinterpret casting of larger buffers of floating point values, and are automatically included +//! in the [`prelude`][crate::prelude] module. +//! +//! This module is only available with the `std` or `alloc` feature. + +use super::{bf16, f16, slice::HalfFloatSliceExt}; +#[cfg(feature = "alloc")] +use alloc::vec::Vec; +use core::mem; + +/// Extensions to [`Vec<f16>`] and [`Vec<bf16>`] to support reinterpret operations. +/// +/// This trait is sealed and cannot be implemented outside of this crate. +pub trait HalfFloatVecExt: private::SealedHalfFloatVec { + /// Reinterprets a vector of [`f16`]or [`bf16`] numbers as a vector of [`u16`] bits. + /// + /// This is a zero-copy operation. The reinterpreted vector has the same memory location as + /// `self`. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// let float_buffer = vec![f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]; + /// let int_buffer = float_buffer.reinterpret_into(); + /// + /// assert_eq!(int_buffer, [f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]); + /// ``` + #[must_use] + fn reinterpret_into(self) -> Vec<u16>; + + /// Converts all of the elements of a `[f32]` slice into a new [`f16`] or [`bf16`] vector. + /// + /// The conversion operation is vectorized over the slice, meaning the conversion may be more + /// efficient than converting individual elements on some hardware that supports SIMD + /// conversions. See [crate documentation][crate] for more information on hardware conversion + /// support. + /// + /// # Examples + /// ```rust + /// # use half::prelude::*; + /// let float_values = [1., 2., 3., 4.]; + /// let vec: Vec<f16> = Vec::from_f32_slice(&float_values); + /// + /// assert_eq!(vec, vec![f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.), f16::from_f32(4.)]); + /// ``` + #[must_use] + fn from_f32_slice(slice: &[f32]) -> Self; + + /// Converts all of the elements of a `[f64]` slice into a new [`f16`] or [`bf16`] vector. + /// + /// The conversion operation is vectorized over the slice, meaning the conversion may be more + /// efficient than converting individual elements on some hardware that supports SIMD + /// conversions. See [crate documentation][crate] for more information on hardware conversion + /// support. + /// + /// # Examples + /// ```rust + /// # use half::prelude::*; + /// let float_values = [1., 2., 3., 4.]; + /// let vec: Vec<f16> = Vec::from_f64_slice(&float_values); + /// + /// assert_eq!(vec, vec![f16::from_f64(1.), f16::from_f64(2.), f16::from_f64(3.), f16::from_f64(4.)]); + /// ``` + #[must_use] + fn from_f64_slice(slice: &[f64]) -> Self; +} + +/// Extensions to [`Vec<u16>`] to support reinterpret operations. +/// +/// This trait is sealed and cannot be implemented outside of this crate. +pub trait HalfBitsVecExt: private::SealedHalfBitsVec { + /// Reinterprets a vector of [`u16`] bits as a vector of [`f16`] or [`bf16`] numbers. + /// + /// `H` is the type to cast to, and must be either the [`f16`] or [`bf16`] type. + /// + /// This is a zero-copy operation. The reinterpreted vector has the same memory location as + /// `self`. + /// + /// # Examples + /// + /// ```rust + /// # use half::prelude::*; + /// let int_buffer = vec![f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]; + /// let float_buffer = int_buffer.reinterpret_into::<f16>(); + /// + /// assert_eq!(float_buffer, [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]); + /// ``` + #[must_use] + fn reinterpret_into<H>(self) -> Vec<H> + where + H: crate::private::SealedHalf; +} + +mod private { + use crate::{bf16, f16}; + #[cfg(feature = "alloc")] + use alloc::vec::Vec; + + pub trait SealedHalfFloatVec {} + impl SealedHalfFloatVec for Vec<f16> {} + impl SealedHalfFloatVec for Vec<bf16> {} + + pub trait SealedHalfBitsVec {} + impl SealedHalfBitsVec for Vec<u16> {} +} + +impl HalfFloatVecExt for Vec<f16> { + #[inline] + fn reinterpret_into(mut self) -> Vec<u16> { + // An f16 array has same length and capacity as u16 array + let length = self.len(); + let capacity = self.capacity(); + + // Actually reinterpret the contents of the Vec<f16> as u16, + // knowing that structs are represented as only their members in memory, + // which is the u16 part of `f16(u16)` + let pointer = self.as_mut_ptr() as *mut u16; + + // Prevent running a destructor on the old Vec<u16>, so the pointer won't be deleted + mem::forget(self); + + // Finally construct a new Vec<f16> from the raw pointer + // SAFETY: We are reconstructing full length and capacity of original vector, + // using its original pointer, and the size of elements are identical. + unsafe { Vec::from_raw_parts(pointer, length, capacity) } + } + + #[allow(clippy::uninit_vec)] + fn from_f32_slice(slice: &[f32]) -> Self { + let mut vec = Vec::with_capacity(slice.len()); + // SAFETY: convert will initialize every value in the vector without reading them, + // so this is safe to do instead of double initialize from resize, and we're setting it to + // same value as capacity. + unsafe { vec.set_len(slice.len()) }; + vec.convert_from_f32_slice(slice); + vec + } + + #[allow(clippy::uninit_vec)] + fn from_f64_slice(slice: &[f64]) -> Self { + let mut vec = Vec::with_capacity(slice.len()); + // SAFETY: convert will initialize every value in the vector without reading them, + // so this is safe to do instead of double initialize from resize, and we're setting it to + // same value as capacity. + unsafe { vec.set_len(slice.len()) }; + vec.convert_from_f64_slice(slice); + vec + } +} + +impl HalfFloatVecExt for Vec<bf16> { + #[inline] + fn reinterpret_into(mut self) -> Vec<u16> { + // An f16 array has same length and capacity as u16 array + let length = self.len(); + let capacity = self.capacity(); + + // Actually reinterpret the contents of the Vec<f16> as u16, + // knowing that structs are represented as only their members in memory, + // which is the u16 part of `f16(u16)` + let pointer = self.as_mut_ptr() as *mut u16; + + // Prevent running a destructor on the old Vec<u16>, so the pointer won't be deleted + mem::forget(self); + + // Finally construct a new Vec<f16> from the raw pointer + // SAFETY: We are reconstructing full length and capacity of original vector, + // using its original pointer, and the size of elements are identical. + unsafe { Vec::from_raw_parts(pointer, length, capacity) } + } + + #[allow(clippy::uninit_vec)] + fn from_f32_slice(slice: &[f32]) -> Self { + let mut vec = Vec::with_capacity(slice.len()); + // SAFETY: convert will initialize every value in the vector without reading them, + // so this is safe to do instead of double initialize from resize, and we're setting it to + // same value as capacity. + unsafe { vec.set_len(slice.len()) }; + vec.convert_from_f32_slice(slice); + vec + } + + #[allow(clippy::uninit_vec)] + fn from_f64_slice(slice: &[f64]) -> Self { + let mut vec = Vec::with_capacity(slice.len()); + // SAFETY: convert will initialize every value in the vector without reading them, + // so this is safe to do instead of double initialize from resize, and we're setting it to + // same value as capacity. + unsafe { vec.set_len(slice.len()) }; + vec.convert_from_f64_slice(slice); + vec + } +} + +impl HalfBitsVecExt for Vec<u16> { + // This is safe because all traits are sealed + #[inline] + fn reinterpret_into<H>(mut self) -> Vec<H> + where + H: crate::private::SealedHalf, + { + // An f16 array has same length and capacity as u16 array + let length = self.len(); + let capacity = self.capacity(); + + // Actually reinterpret the contents of the Vec<u16> as f16, + // knowing that structs are represented as only their members in memory, + // which is the u16 part of `f16(u16)` + let pointer = self.as_mut_ptr() as *mut H; + + // Prevent running a destructor on the old Vec<u16>, so the pointer won't be deleted + mem::forget(self); + + // Finally construct a new Vec<f16> from the raw pointer + // SAFETY: We are reconstructing full length and capacity of original vector, + // using its original pointer, and the size of elements are identical. + unsafe { Vec::from_raw_parts(pointer, length, capacity) } + } +} + +#[cfg(test)] +mod test { + use super::{HalfBitsVecExt, HalfFloatVecExt}; + use crate::{bf16, f16}; + #[cfg(all(feature = "alloc", not(feature = "std")))] + use alloc::vec; + + #[test] + fn test_vec_conversions_f16() { + let numbers = vec![f16::E, f16::PI, f16::EPSILON, f16::FRAC_1_SQRT_2]; + let bits = vec![ + f16::E.to_bits(), + f16::PI.to_bits(), + f16::EPSILON.to_bits(), + f16::FRAC_1_SQRT_2.to_bits(), + ]; + let bits_cloned = bits.clone(); + + // Convert from bits to numbers + let from_bits = bits.reinterpret_into::<f16>(); + assert_eq!(&from_bits[..], &numbers[..]); + + // Convert from numbers back to bits + let to_bits = from_bits.reinterpret_into(); + assert_eq!(&to_bits[..], &bits_cloned[..]); + } + + #[test] + fn test_vec_conversions_bf16() { + let numbers = vec![bf16::E, bf16::PI, bf16::EPSILON, bf16::FRAC_1_SQRT_2]; + let bits = vec![ + bf16::E.to_bits(), + bf16::PI.to_bits(), + bf16::EPSILON.to_bits(), + bf16::FRAC_1_SQRT_2.to_bits(), + ]; + let bits_cloned = bits.clone(); + + // Convert from bits to numbers + let from_bits = bits.reinterpret_into::<bf16>(); + assert_eq!(&from_bits[..], &numbers[..]); + + // Convert from numbers back to bits + let to_bits = from_bits.reinterpret_into(); + assert_eq!(&to_bits[..], &bits_cloned[..]); + } +} |