diff options
author | Valentin Popov <valentin@popov.link> | 2024-07-19 15:37:58 +0300 |
---|---|---|
committer | Valentin Popov <valentin@popov.link> | 2024-07-19 15:37:58 +0300 |
commit | a990de90fe41456a23e58bd087d2f107d321f3a1 (patch) | |
tree | 15afc392522a9e85dc3332235e311b7d39352ea9 /vendor/half/src | |
parent | 3d48cd3f81164bbfc1a755dc1d4a9a02f98c8ddd (diff) | |
download | fparkan-a990de90fe41456a23e58bd087d2f107d321f3a1.tar.xz fparkan-a990de90fe41456a23e58bd087d2f107d321f3a1.zip |
Deleted vendor folder
Diffstat (limited to 'vendor/half/src')
-rw-r--r-- | vendor/half/src/bfloat.rs | 1841 | ||||
-rw-r--r-- | vendor/half/src/bfloat/convert.rs | 148 | ||||
-rw-r--r-- | vendor/half/src/binary16.rs | 1912 | ||||
-rw-r--r-- | vendor/half/src/binary16/convert.rs | 752 | ||||
-rw-r--r-- | vendor/half/src/leading_zeros.rs | 62 | ||||
-rw-r--r-- | vendor/half/src/lib.rs | 233 | ||||
-rw-r--r-- | vendor/half/src/num_traits.rs | 1483 | ||||
-rw-r--r-- | vendor/half/src/slice.rs | 854 | ||||
-rw-r--r-- | vendor/half/src/vec.rs | 274 |
9 files changed, 0 insertions, 7559 deletions
diff --git a/vendor/half/src/bfloat.rs b/vendor/half/src/bfloat.rs deleted file mode 100644 index 8b23863..0000000 --- a/vendor/half/src/bfloat.rs +++ /dev/null @@ -1,1841 +0,0 @@ -#[cfg(feature = "bytemuck")] -use bytemuck::{Pod, Zeroable}; -use core::{ - cmp::Ordering, - iter::{Product, Sum}, - num::FpCategory, - ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Rem, RemAssign, Sub, SubAssign}, -}; -#[cfg(not(target_arch = "spirv"))] -use core::{ - fmt::{ - Binary, Debug, Display, Error, Formatter, LowerExp, LowerHex, Octal, UpperExp, UpperHex, - }, - num::ParseFloatError, - str::FromStr, -}; -#[cfg(feature = "serde")] -use serde::{Deserialize, Serialize}; -#[cfg(feature = "zerocopy")] -use zerocopy::{AsBytes, FromBytes}; - -pub(crate) mod convert; - -/// A 16-bit floating point type implementing the [`bfloat16`] format. -/// -/// The [`bfloat16`] floating point format is a truncated 16-bit version of the IEEE 754 standard -/// `binary32`, a.k.a [`f32`]. [`bf16`] has approximately the same dynamic range as [`f32`] by -/// having a lower precision than [`f16`][crate::f16]. While [`f16`][crate::f16] has a precision of -/// 11 bits, [`bf16`] has a precision of only 8 bits. -/// -/// Like [`f16`][crate::f16], [`bf16`] does not offer arithmetic operations as it is intended for -/// compact storage rather than calculations. Operations should be performed with [`f32`] or -/// higher-precision types and converted to/from [`bf16`] as necessary. -/// -/// [`bfloat16`]: https://en.wikipedia.org/wiki/Bfloat16_floating-point_format -#[allow(non_camel_case_types)] -#[derive(Clone, Copy, Default)] -#[repr(transparent)] -#[cfg_attr(feature = "serde", derive(Serialize))] -#[cfg_attr(feature = "bytemuck", derive(Zeroable, Pod))] -#[cfg_attr(feature = "zerocopy", derive(AsBytes, FromBytes))] -pub struct bf16(u16); - -impl bf16 { - /// Constructs a [`bf16`] value from the raw bits. - #[inline] - #[must_use] - pub const fn from_bits(bits: u16) -> bf16 { - bf16(bits) - } - - /// Constructs a [`bf16`] value from a 32-bit floating point value. - /// - /// If the 32-bit value is too large to fit, ±∞ will result. NaN values are preserved. - /// Subnormal values that are too tiny to be represented will result in ±0. All other values - /// are truncated and rounded to the nearest representable value. - #[inline] - #[must_use] - pub fn from_f32(value: f32) -> bf16 { - Self::from_f32_const(value) - } - - /// Constructs a [`bf16`] value from a 32-bit floating point value. - /// - /// This function is identical to [`from_f32`][Self::from_f32] except it never uses hardware - /// intrinsics, which allows it to be `const`. [`from_f32`][Self::from_f32] should be preferred - /// in any non-`const` context. - /// - /// If the 32-bit value is too large to fit, ±∞ will result. NaN values are preserved. - /// Subnormal values that are too tiny to be represented will result in ±0. All other values - /// are truncated and rounded to the nearest representable value. - #[inline] - #[must_use] - pub const fn from_f32_const(value: f32) -> bf16 { - bf16(convert::f32_to_bf16(value)) - } - - /// Constructs a [`bf16`] value from a 64-bit floating point value. - /// - /// If the 64-bit value is to large to fit, ±∞ will result. NaN values are preserved. - /// 64-bit subnormal values are too tiny to be represented and result in ±0. Exponents that - /// underflow the minimum exponent will result in subnormals or ±0. All other values are - /// truncated and rounded to the nearest representable value. - #[inline] - #[must_use] - pub fn from_f64(value: f64) -> bf16 { - Self::from_f64_const(value) - } - - /// Constructs a [`bf16`] value from a 64-bit floating point value. - /// - /// This function is identical to [`from_f64`][Self::from_f64] except it never uses hardware - /// intrinsics, which allows it to be `const`. [`from_f64`][Self::from_f64] should be preferred - /// in any non-`const` context. - /// - /// If the 64-bit value is to large to fit, ±∞ will result. NaN values are preserved. - /// 64-bit subnormal values are too tiny to be represented and result in ±0. Exponents that - /// underflow the minimum exponent will result in subnormals or ±0. All other values are - /// truncated and rounded to the nearest representable value. - #[inline] - #[must_use] - pub const fn from_f64_const(value: f64) -> bf16 { - bf16(convert::f64_to_bf16(value)) - } - - /// Converts a [`bf16`] into the underlying bit representation. - #[inline] - #[must_use] - pub const fn to_bits(self) -> u16 { - self.0 - } - - /// Returns the memory representation of the underlying bit representation as a byte array in - /// little-endian byte order. - /// - /// # Examples - /// - /// ```rust - /// # use half::prelude::*; - /// let bytes = bf16::from_f32(12.5).to_le_bytes(); - /// assert_eq!(bytes, [0x48, 0x41]); - /// ``` - #[inline] - #[must_use] - pub const fn to_le_bytes(self) -> [u8; 2] { - self.0.to_le_bytes() - } - - /// Returns the memory representation of the underlying bit representation as a byte array in - /// big-endian (network) byte order. - /// - /// # Examples - /// - /// ```rust - /// # use half::prelude::*; - /// let bytes = bf16::from_f32(12.5).to_be_bytes(); - /// assert_eq!(bytes, [0x41, 0x48]); - /// ``` - #[inline] - #[must_use] - pub const fn to_be_bytes(self) -> [u8; 2] { - self.0.to_be_bytes() - } - - /// Returns the memory representation of the underlying bit representation as a byte array in - /// native byte order. - /// - /// As the target platform's native endianness is used, portable code should use - /// [`to_be_bytes`][bf16::to_be_bytes] or [`to_le_bytes`][bf16::to_le_bytes], as appropriate, - /// instead. - /// - /// # Examples - /// - /// ```rust - /// # use half::prelude::*; - /// let bytes = bf16::from_f32(12.5).to_ne_bytes(); - /// assert_eq!(bytes, if cfg!(target_endian = "big") { - /// [0x41, 0x48] - /// } else { - /// [0x48, 0x41] - /// }); - /// ``` - #[inline] - #[must_use] - pub const fn to_ne_bytes(self) -> [u8; 2] { - self.0.to_ne_bytes() - } - - /// Creates a floating point value from its representation as a byte array in little endian. - /// - /// # Examples - /// - /// ```rust - /// # use half::prelude::*; - /// let value = bf16::from_le_bytes([0x48, 0x41]); - /// assert_eq!(value, bf16::from_f32(12.5)); - /// ``` - #[inline] - #[must_use] - pub const fn from_le_bytes(bytes: [u8; 2]) -> bf16 { - bf16::from_bits(u16::from_le_bytes(bytes)) - } - - /// Creates a floating point value from its representation as a byte array in big endian. - /// - /// # Examples - /// - /// ```rust - /// # use half::prelude::*; - /// let value = bf16::from_be_bytes([0x41, 0x48]); - /// assert_eq!(value, bf16::from_f32(12.5)); - /// ``` - #[inline] - #[must_use] - pub const fn from_be_bytes(bytes: [u8; 2]) -> bf16 { - bf16::from_bits(u16::from_be_bytes(bytes)) - } - - /// Creates a floating point value from its representation as a byte array in native endian. - /// - /// As the target platform's native endianness is used, portable code likely wants to use - /// [`from_be_bytes`][bf16::from_be_bytes] or [`from_le_bytes`][bf16::from_le_bytes], as - /// appropriate instead. - /// - /// # Examples - /// - /// ```rust - /// # use half::prelude::*; - /// let value = bf16::from_ne_bytes(if cfg!(target_endian = "big") { - /// [0x41, 0x48] - /// } else { - /// [0x48, 0x41] - /// }); - /// assert_eq!(value, bf16::from_f32(12.5)); - /// ``` - #[inline] - #[must_use] - pub const fn from_ne_bytes(bytes: [u8; 2]) -> bf16 { - bf16::from_bits(u16::from_ne_bytes(bytes)) - } - - /// Converts a [`bf16`] value into an [`f32`] value. - /// - /// This conversion is lossless as all values can be represented exactly in [`f32`]. - #[inline] - #[must_use] - pub fn to_f32(self) -> f32 { - self.to_f32_const() - } - - /// Converts a [`bf16`] value into an [`f32`] value. - /// - /// This function is identical to [`to_f32`][Self::to_f32] except it never uses hardware - /// intrinsics, which allows it to be `const`. [`to_f32`][Self::to_f32] should be preferred - /// in any non-`const` context. - /// - /// This conversion is lossless as all values can be represented exactly in [`f32`]. - #[inline] - #[must_use] - pub const fn to_f32_const(self) -> f32 { - convert::bf16_to_f32(self.0) - } - - /// Converts a [`bf16`] value into an [`f64`] value. - /// - /// This conversion is lossless as all values can be represented exactly in [`f64`]. - #[inline] - #[must_use] - pub fn to_f64(self) -> f64 { - self.to_f64_const() - } - - /// Converts a [`bf16`] value into an [`f64`] value. - /// - /// This function is identical to [`to_f64`][Self::to_f64] except it never uses hardware - /// intrinsics, which allows it to be `const`. [`to_f64`][Self::to_f64] should be preferred - /// in any non-`const` context. - /// - /// This conversion is lossless as all values can be represented exactly in [`f64`]. - #[inline] - #[must_use] - pub const fn to_f64_const(self) -> f64 { - convert::bf16_to_f64(self.0) - } - - /// Returns `true` if this value is NaN and `false` otherwise. - /// - /// # Examples - /// - /// ```rust - /// # use half::prelude::*; - /// - /// let nan = bf16::NAN; - /// let f = bf16::from_f32(7.0_f32); - /// - /// assert!(nan.is_nan()); - /// assert!(!f.is_nan()); - /// ``` - #[inline] - #[must_use] - pub const fn is_nan(self) -> bool { - self.0 & 0x7FFFu16 > 0x7F80u16 - } - - /// Returns `true` if this value is ±∞ and `false` otherwise. - /// - /// # Examples - /// - /// ```rust - /// # use half::prelude::*; - /// - /// let f = bf16::from_f32(7.0f32); - /// let inf = bf16::INFINITY; - /// let neg_inf = bf16::NEG_INFINITY; - /// let nan = bf16::NAN; - /// - /// assert!(!f.is_infinite()); - /// assert!(!nan.is_infinite()); - /// - /// assert!(inf.is_infinite()); - /// assert!(neg_inf.is_infinite()); - /// ``` - #[inline] - #[must_use] - pub const fn is_infinite(self) -> bool { - self.0 & 0x7FFFu16 == 0x7F80u16 - } - - /// Returns `true` if this number is neither infinite nor NaN. - /// - /// # Examples - /// - /// ```rust - /// # use half::prelude::*; - /// - /// let f = bf16::from_f32(7.0f32); - /// let inf = bf16::INFINITY; - /// let neg_inf = bf16::NEG_INFINITY; - /// let nan = bf16::NAN; - /// - /// assert!(f.is_finite()); - /// - /// assert!(!nan.is_finite()); - /// assert!(!inf.is_finite()); - /// assert!(!neg_inf.is_finite()); - /// ``` - #[inline] - #[must_use] - pub const fn is_finite(self) -> bool { - self.0 & 0x7F80u16 != 0x7F80u16 - } - - /// Returns `true` if the number is neither zero, infinite, subnormal, or NaN. - /// - /// # Examples - /// - /// ```rust - /// # use half::prelude::*; - /// - /// let min = bf16::MIN_POSITIVE; - /// let max = bf16::MAX; - /// let lower_than_min = bf16::from_f32(1.0e-39_f32); - /// let zero = bf16::from_f32(0.0_f32); - /// - /// assert!(min.is_normal()); - /// assert!(max.is_normal()); - /// - /// assert!(!zero.is_normal()); - /// assert!(!bf16::NAN.is_normal()); - /// assert!(!bf16::INFINITY.is_normal()); - /// // Values between 0 and `min` are subnormal. - /// assert!(!lower_than_min.is_normal()); - /// ``` - #[inline] - #[must_use] - pub const fn is_normal(self) -> bool { - let exp = self.0 & 0x7F80u16; - exp != 0x7F80u16 && exp != 0 - } - - /// Returns the floating point category of the number. - /// - /// If only one property is going to be tested, it is generally faster to use the specific - /// predicate instead. - /// - /// # Examples - /// - /// ```rust - /// use std::num::FpCategory; - /// # use half::prelude::*; - /// - /// let num = bf16::from_f32(12.4_f32); - /// let inf = bf16::INFINITY; - /// - /// assert_eq!(num.classify(), FpCategory::Normal); - /// assert_eq!(inf.classify(), FpCategory::Infinite); - /// ``` - #[must_use] - pub const fn classify(self) -> FpCategory { - let exp = self.0 & 0x7F80u16; - let man = self.0 & 0x007Fu16; - match (exp, man) { - (0, 0) => FpCategory::Zero, - (0, _) => FpCategory::Subnormal, - (0x7F80u16, 0) => FpCategory::Infinite, - (0x7F80u16, _) => FpCategory::Nan, - _ => FpCategory::Normal, - } - } - - /// Returns a number that represents the sign of `self`. - /// - /// * 1.0 if the number is positive, +0.0 or [`INFINITY`][bf16::INFINITY] - /// * −1.0 if the number is negative, −0.0` or [`NEG_INFINITY`][bf16::NEG_INFINITY] - /// * [`NAN`][bf16::NAN] if the number is NaN - /// - /// # Examples - /// - /// ```rust - /// # use half::prelude::*; - /// - /// let f = bf16::from_f32(3.5_f32); - /// - /// assert_eq!(f.signum(), bf16::from_f32(1.0)); - /// assert_eq!(bf16::NEG_INFINITY.signum(), bf16::from_f32(-1.0)); - /// - /// assert!(bf16::NAN.signum().is_nan()); - /// ``` - #[must_use] - pub const fn signum(self) -> bf16 { - if self.is_nan() { - self - } else if self.0 & 0x8000u16 != 0 { - Self::NEG_ONE - } else { - Self::ONE - } - } - - /// Returns `true` if and only if `self` has a positive sign, including +0.0, NaNs with a - /// positive sign bit and +∞. - /// - /// # Examples - /// - /// ```rust - /// # use half::prelude::*; - /// - /// let nan = bf16::NAN; - /// let f = bf16::from_f32(7.0_f32); - /// let g = bf16::from_f32(-7.0_f32); - /// - /// assert!(f.is_sign_positive()); - /// assert!(!g.is_sign_positive()); - /// // NaN can be either positive or negative - /// assert!(nan.is_sign_positive() != nan.is_sign_negative()); - /// ``` - #[inline] - #[must_use] - pub const fn is_sign_positive(self) -> bool { - self.0 & 0x8000u16 == 0 - } - - /// Returns `true` if and only if `self` has a negative sign, including −0.0, NaNs with a - /// negative sign bit and −∞. - /// - /// # Examples - /// - /// ```rust - /// # use half::prelude::*; - /// - /// let nan = bf16::NAN; - /// let f = bf16::from_f32(7.0f32); - /// let g = bf16::from_f32(-7.0f32); - /// - /// assert!(!f.is_sign_negative()); - /// assert!(g.is_sign_negative()); - /// // NaN can be either positive or negative - /// assert!(nan.is_sign_positive() != nan.is_sign_negative()); - /// ``` - #[inline] - #[must_use] - pub const fn is_sign_negative(self) -> bool { - self.0 & 0x8000u16 != 0 - } - - /// Returns a number composed of the magnitude of `self` and the sign of `sign`. - /// - /// Equal to `self` if the sign of `self` and `sign` are the same, otherwise equal to `-self`. - /// If `self` is NaN, then NaN with the sign of `sign` is returned. - /// - /// # Examples - /// - /// ``` - /// # use half::prelude::*; - /// let f = bf16::from_f32(3.5); - /// - /// assert_eq!(f.copysign(bf16::from_f32(0.42)), bf16::from_f32(3.5)); - /// assert_eq!(f.copysign(bf16::from_f32(-0.42)), bf16::from_f32(-3.5)); - /// assert_eq!((-f).copysign(bf16::from_f32(0.42)), bf16::from_f32(3.5)); - /// assert_eq!((-f).copysign(bf16::from_f32(-0.42)), bf16::from_f32(-3.5)); - /// - /// assert!(bf16::NAN.copysign(bf16::from_f32(1.0)).is_nan()); - /// ``` - #[inline] - #[must_use] - pub const fn copysign(self, sign: bf16) -> bf16 { - bf16((sign.0 & 0x8000u16) | (self.0 & 0x7FFFu16)) - } - - /// Returns the maximum of the two numbers. - /// - /// If one of the arguments is NaN, then the other argument is returned. - /// - /// # Examples - /// - /// ``` - /// # use half::prelude::*; - /// let x = bf16::from_f32(1.0); - /// let y = bf16::from_f32(2.0); - /// - /// assert_eq!(x.max(y), y); - /// ``` - #[inline] - #[must_use] - pub fn max(self, other: bf16) -> bf16 { - if other > self && !other.is_nan() { - other - } else { - self - } - } - - /// Returns the minimum of the two numbers. - /// - /// If one of the arguments is NaN, then the other argument is returned. - /// - /// # Examples - /// - /// ``` - /// # use half::prelude::*; - /// let x = bf16::from_f32(1.0); - /// let y = bf16::from_f32(2.0); - /// - /// assert_eq!(x.min(y), x); - /// ``` - #[inline] - #[must_use] - pub fn min(self, other: bf16) -> bf16 { - if other < self && !other.is_nan() { - other - } else { - self - } - } - - /// Restrict a value to a certain interval unless it is NaN. - /// - /// Returns `max` if `self` is greater than `max`, and `min` if `self` is less than `min`. - /// Otherwise this returns `self`. - /// - /// Note that this function returns NaN if the initial value was NaN as well. - /// - /// # Panics - /// Panics if `min > max`, `min` is NaN, or `max` is NaN. - /// - /// # Examples - /// - /// ``` - /// # use half::prelude::*; - /// assert!(bf16::from_f32(-3.0).clamp(bf16::from_f32(-2.0), bf16::from_f32(1.0)) == bf16::from_f32(-2.0)); - /// assert!(bf16::from_f32(0.0).clamp(bf16::from_f32(-2.0), bf16::from_f32(1.0)) == bf16::from_f32(0.0)); - /// assert!(bf16::from_f32(2.0).clamp(bf16::from_f32(-2.0), bf16::from_f32(1.0)) == bf16::from_f32(1.0)); - /// assert!(bf16::NAN.clamp(bf16::from_f32(-2.0), bf16::from_f32(1.0)).is_nan()); - /// ``` - #[inline] - #[must_use] - pub fn clamp(self, min: bf16, max: bf16) -> bf16 { - assert!(min <= max); - let mut x = self; - if x < min { - x = min; - } - if x > max { - x = max; - } - x - } - - /// Returns the ordering between `self` and `other`. - /// - /// Unlike the standard partial comparison between floating point numbers, - /// this comparison always produces an ordering in accordance to - /// the `totalOrder` predicate as defined in the IEEE 754 (2008 revision) - /// floating point standard. The values are ordered in the following sequence: - /// - /// - negative quiet NaN - /// - negative signaling NaN - /// - negative infinity - /// - negative numbers - /// - negative subnormal numbers - /// - negative zero - /// - positive zero - /// - positive subnormal numbers - /// - positive numbers - /// - positive infinity - /// - positive signaling NaN - /// - positive quiet NaN. - /// - /// The ordering established by this function does not always agree with the - /// [`PartialOrd`] and [`PartialEq`] implementations of `bf16`. For example, - /// they consider negative and positive zero equal, while `total_cmp` - /// doesn't. - /// - /// The interpretation of the signaling NaN bit follows the definition in - /// the IEEE 754 standard, which may not match the interpretation by some of - /// the older, non-conformant (e.g. MIPS) hardware implementations. - /// - /// # Examples - /// ``` - /// # use half::bf16; - /// let mut v: Vec<bf16> = vec![]; - /// v.push(bf16::ONE); - /// v.push(bf16::INFINITY); - /// v.push(bf16::NEG_INFINITY); - /// v.push(bf16::NAN); - /// v.push(bf16::MAX_SUBNORMAL); - /// v.push(-bf16::MAX_SUBNORMAL); - /// v.push(bf16::ZERO); - /// v.push(bf16::NEG_ZERO); - /// v.push(bf16::NEG_ONE); - /// v.push(bf16::MIN_POSITIVE); - /// - /// v.sort_by(|a, b| a.total_cmp(&b)); - /// - /// assert!(v - /// .into_iter() - /// .zip( - /// [ - /// bf16::NEG_INFINITY, - /// bf16::NEG_ONE, - /// -bf16::MAX_SUBNORMAL, - /// bf16::NEG_ZERO, - /// bf16::ZERO, - /// bf16::MAX_SUBNORMAL, - /// bf16::MIN_POSITIVE, - /// bf16::ONE, - /// bf16::INFINITY, - /// bf16::NAN - /// ] - /// .iter() - /// ) - /// .all(|(a, b)| a.to_bits() == b.to_bits())); - /// ``` - // Implementation based on: https://doc.rust-lang.org/std/primitive.f32.html#method.total_cmp - #[inline] - #[must_use] - pub fn total_cmp(&self, other: &Self) -> Ordering { - let mut left = self.to_bits() as i16; - let mut right = other.to_bits() as i16; - left ^= (((left >> 15) as u16) >> 1) as i16; - right ^= (((right >> 15) as u16) >> 1) as i16; - left.cmp(&right) - } - - /// Alternate serialize adapter for serializing as a float. - /// - /// By default, [`bf16`] serializes as a newtype of [`u16`]. This is an alternate serialize - /// implementation that serializes as an [`f32`] value. It is designed for use with - /// `serialize_with` serde attributes. Deserialization from `f32` values is already supported by - /// the default deserialize implementation. - /// - /// # Examples - /// - /// A demonstration on how to use this adapater: - /// - /// ``` - /// use serde::{Serialize, Deserialize}; - /// use half::bf16; - /// - /// #[derive(Serialize, Deserialize)] - /// struct MyStruct { - /// #[serde(serialize_with = "bf16::serialize_as_f32")] - /// value: bf16 // Will be serialized as f32 instead of u16 - /// } - /// ``` - #[cfg(feature = "serde")] - pub fn serialize_as_f32<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> { - serializer.serialize_f32(self.to_f32()) - } - - /// Alternate serialize adapter for serializing as a string. - /// - /// By default, [`bf16`] serializes as a newtype of [`u16`]. This is an alternate serialize - /// implementation that serializes as a string value. It is designed for use with - /// `serialize_with` serde attributes. Deserialization from string values is already supported - /// by the default deserialize implementation. - /// - /// # Examples - /// - /// A demonstration on how to use this adapater: - /// - /// ``` - /// use serde::{Serialize, Deserialize}; - /// use half::bf16; - /// - /// #[derive(Serialize, Deserialize)] - /// struct MyStruct { - /// #[serde(serialize_with = "bf16::serialize_as_string")] - /// value: bf16 // Will be serialized as a string instead of u16 - /// } - /// ``` - #[cfg(feature = "serde")] - pub fn serialize_as_string<S: serde::Serializer>( - &self, - serializer: S, - ) -> Result<S::Ok, S::Error> { - serializer.serialize_str(&self.to_string()) - } - - /// Approximate number of [`bf16`] significant digits in base 10 - pub const DIGITS: u32 = 2; - /// [`bf16`] - /// [machine epsilon](https://en.wikipedia.org/wiki/Machine_epsilon) value - /// - /// This is the difference between 1.0 and the next largest representable number. - pub const EPSILON: bf16 = bf16(0x3C00u16); - /// [`bf16`] positive Infinity (+∞) - pub const INFINITY: bf16 = bf16(0x7F80u16); - /// Number of [`bf16`] significant digits in base 2 - pub const MANTISSA_DIGITS: u32 = 8; - /// Largest finite [`bf16`] value - pub const MAX: bf16 = bf16(0x7F7F); - /// Maximum possible [`bf16`] power of 10 exponent - pub const MAX_10_EXP: i32 = 38; - /// Maximum possible [`bf16`] power of 2 exponent - pub const MAX_EXP: i32 = 128; - /// Smallest finite [`bf16`] value - pub const MIN: bf16 = bf16(0xFF7F); - /// Minimum possible normal [`bf16`] power of 10 exponent - pub const MIN_10_EXP: i32 = -37; - /// One greater than the minimum possible normal [`bf16`] power of 2 exponent - pub const MIN_EXP: i32 = -125; - /// Smallest positive normal [`bf16`] value - pub const MIN_POSITIVE: bf16 = bf16(0x0080u16); - /// [`bf16`] Not a Number (NaN) - pub const NAN: bf16 = bf16(0x7FC0u16); - /// [`bf16`] negative infinity (-∞). - pub const NEG_INFINITY: bf16 = bf16(0xFF80u16); - /// The radix or base of the internal representation of [`bf16`] - pub const RADIX: u32 = 2; - - /// Minimum positive subnormal [`bf16`] value - pub const MIN_POSITIVE_SUBNORMAL: bf16 = bf16(0x0001u16); - /// Maximum subnormal [`bf16`] value - pub const MAX_SUBNORMAL: bf16 = bf16(0x007Fu16); - - /// [`bf16`] 1 - pub const ONE: bf16 = bf16(0x3F80u16); - /// [`bf16`] 0 - pub const ZERO: bf16 = bf16(0x0000u16); - /// [`bf16`] -0 - pub const NEG_ZERO: bf16 = bf16(0x8000u16); - /// [`bf16`] -1 - pub const NEG_ONE: bf16 = bf16(0xBF80u16); - - /// [`bf16`] Euler's number (ℯ) - pub const E: bf16 = bf16(0x402Eu16); - /// [`bf16`] Archimedes' constant (π) - pub const PI: bf16 = bf16(0x4049u16); - /// [`bf16`] 1/π - pub const FRAC_1_PI: bf16 = bf16(0x3EA3u16); - /// [`bf16`] 1/√2 - pub const FRAC_1_SQRT_2: bf16 = bf16(0x3F35u16); - /// [`bf16`] 2/π - pub const FRAC_2_PI: bf16 = bf16(0x3F23u16); - /// [`bf16`] 2/√π - pub const FRAC_2_SQRT_PI: bf16 = bf16(0x3F90u16); - /// [`bf16`] π/2 - pub const FRAC_PI_2: bf16 = bf16(0x3FC9u16); - /// [`bf16`] π/3 - pub const FRAC_PI_3: bf16 = bf16(0x3F86u16); - /// [`bf16`] π/4 - pub const FRAC_PI_4: bf16 = bf16(0x3F49u16); - /// [`bf16`] π/6 - pub const FRAC_PI_6: bf16 = bf16(0x3F06u16); - /// [`bf16`] π/8 - pub const FRAC_PI_8: bf16 = bf16(0x3EC9u16); - /// [`bf16`] 𝗅𝗇 10 - pub const LN_10: bf16 = bf16(0x4013u16); - /// [`bf16`] 𝗅𝗇 2 - pub const LN_2: bf16 = bf16(0x3F31u16); - /// [`bf16`] 𝗅𝗈𝗀₁₀ℯ - pub const LOG10_E: bf16 = bf16(0x3EDEu16); - /// [`bf16`] 𝗅𝗈𝗀₁₀2 - pub const LOG10_2: bf16 = bf16(0x3E9Au16); - /// [`bf16`] 𝗅𝗈𝗀₂ℯ - pub const LOG2_E: bf16 = bf16(0x3FB9u16); - /// [`bf16`] 𝗅𝗈𝗀₂10 - pub const LOG2_10: bf16 = bf16(0x4055u16); - /// [`bf16`] √2 - pub const SQRT_2: bf16 = bf16(0x3FB5u16); -} - -impl From<bf16> for f32 { - #[inline] - fn from(x: bf16) -> f32 { - x.to_f32() - } -} - -impl From<bf16> for f64 { - #[inline] - fn from(x: bf16) -> f64 { - x.to_f64() - } -} - -impl From<i8> for bf16 { - #[inline] - fn from(x: i8) -> bf16 { - // Convert to f32, then to bf16 - bf16::from_f32(f32::from(x)) - } -} - -impl From<u8> for bf16 { - #[inline] - fn from(x: u8) -> bf16 { - // Convert to f32, then to f16 - bf16::from_f32(f32::from(x)) - } -} - -impl PartialEq for bf16 { - fn eq(&self, other: &bf16) -> bool { - if self.is_nan() || other.is_nan() { - false - } else { - (self.0 == other.0) || ((self.0 | other.0) & 0x7FFFu16 == 0) - } - } -} - -impl PartialOrd for bf16 { - fn partial_cmp(&self, other: &bf16) -> Option<Ordering> { - if self.is_nan() || other.is_nan() { - None - } else { - let neg = self.0 & 0x8000u16 != 0; - let other_neg = other.0 & 0x8000u16 != 0; - match (neg, other_neg) { - (false, false) => Some(self.0.cmp(&other.0)), - (false, true) => { - if (self.0 | other.0) & 0x7FFFu16 == 0 { - Some(Ordering::Equal) - } else { - Some(Ordering::Greater) - } - } - (true, false) => { - if (self.0 | other.0) & 0x7FFFu16 == 0 { - Some(Ordering::Equal) - } else { - Some(Ordering::Less) - } - } - (true, true) => Some(other.0.cmp(&self.0)), - } - } - } - - fn lt(&self, other: &bf16) -> bool { - if self.is_nan() || other.is_nan() { - false - } else { - let neg = self.0 & 0x8000u16 != 0; - let other_neg = other.0 & 0x8000u16 != 0; - match (neg, other_neg) { - (false, false) => self.0 < other.0, - (false, true) => false, - (true, false) => (self.0 | other.0) & 0x7FFFu16 != 0, - (true, true) => self.0 > other.0, - } - } - } - - fn le(&self, other: &bf16) -> bool { - if self.is_nan() || other.is_nan() { - false - } else { - let neg = self.0 & 0x8000u16 != 0; - let other_neg = other.0 & 0x8000u16 != 0; - match (neg, other_neg) { - (false, false) => self.0 <= other.0, - (false, true) => (self.0 | other.0) & 0x7FFFu16 == 0, - (true, false) => true, - (true, true) => self.0 >= other.0, - } - } - } - - fn gt(&self, other: &bf16) -> bool { - if self.is_nan() || other.is_nan() { - false - } else { - let neg = self.0 & 0x8000u16 != 0; - let other_neg = other.0 & 0x8000u16 != 0; - match (neg, other_neg) { - (false, false) => self.0 > other.0, - (false, true) => (self.0 | other.0) & 0x7FFFu16 != 0, - (true, false) => false, - (true, true) => self.0 < other.0, - } - } - } - - fn ge(&self, other: &bf16) -> bool { - if self.is_nan() || other.is_nan() { - false - } else { - let neg = self.0 & 0x8000u16 != 0; - let other_neg = other.0 & 0x8000u16 != 0; - match (neg, other_neg) { - (false, false) => self.0 >= other.0, - (false, true) => true, - (true, false) => (self.0 | other.0) & 0x7FFFu16 == 0, - (true, true) => self.0 <= other.0, - } - } - } -} - -#[cfg(not(target_arch = "spirv"))] -impl FromStr for bf16 { - type Err = ParseFloatError; - fn from_str(src: &str) -> Result<bf16, ParseFloatError> { - f32::from_str(src).map(bf16::from_f32) - } -} - -#[cfg(not(target_arch = "spirv"))] -impl Debug for bf16 { - fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { - write!(f, "{:?}", self.to_f32()) - } -} - -#[cfg(not(target_arch = "spirv"))] -impl Display for bf16 { - fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { - write!(f, "{}", self.to_f32()) - } -} - -#[cfg(not(target_arch = "spirv"))] -impl LowerExp for bf16 { - fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { - write!(f, "{:e}", self.to_f32()) - } -} - -#[cfg(not(target_arch = "spirv"))] -impl UpperExp for bf16 { - fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { - write!(f, "{:E}", self.to_f32()) - } -} - -#[cfg(not(target_arch = "spirv"))] -impl Binary for bf16 { - fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { - write!(f, "{:b}", self.0) - } -} - -#[cfg(not(target_arch = "spirv"))] -impl Octal for bf16 { - fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { - write!(f, "{:o}", self.0) - } -} - -#[cfg(not(target_arch = "spirv"))] -impl LowerHex for bf16 { - fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { - write!(f, "{:x}", self.0) - } -} - -#[cfg(not(target_arch = "spirv"))] -impl UpperHex for bf16 { - fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { - write!(f, "{:X}", self.0) - } -} - -impl Neg for bf16 { - type Output = Self; - - fn neg(self) -> Self::Output { - Self(self.0 ^ 0x8000) - } -} - -impl Neg for &bf16 { - type Output = <bf16 as Neg>::Output; - - #[inline] - fn neg(self) -> Self::Output { - Neg::neg(*self) - } -} - -impl Add for bf16 { - type Output = Self; - - fn add(self, rhs: Self) -> Self::Output { - Self::from_f32(Self::to_f32(self) + Self::to_f32(rhs)) - } -} - -impl Add<&bf16> for bf16 { - type Output = <bf16 as Add<bf16>>::Output; - - #[inline] - fn add(self, rhs: &bf16) -> Self::Output { - self.add(*rhs) - } -} - -impl Add<&bf16> for &bf16 { - type Output = <bf16 as Add<bf16>>::Output; - - #[inline] - fn add(self, rhs: &bf16) -> Self::Output { - (*self).add(*rhs) - } -} - -impl Add<bf16> for &bf16 { - type Output = <bf16 as Add<bf16>>::Output; - - #[inline] - fn add(self, rhs: bf16) -> Self::Output { - (*self).add(rhs) - } -} - -impl AddAssign for bf16 { - #[inline] - fn add_assign(&mut self, rhs: Self) { - *self = (*self).add(rhs); - } -} - -impl AddAssign<&bf16> for bf16 { - #[inline] - fn add_assign(&mut self, rhs: &bf16) { - *self = (*self).add(rhs); - } -} - -impl Sub for bf16 { - type Output = Self; - - fn sub(self, rhs: Self) -> Self::Output { - Self::from_f32(Self::to_f32(self) - Self::to_f32(rhs)) - } -} - -impl Sub<&bf16> for bf16 { - type Output = <bf16 as Sub<bf16>>::Output; - - #[inline] - fn sub(self, rhs: &bf16) -> Self::Output { - self.sub(*rhs) - } -} - -impl Sub<&bf16> for &bf16 { - type Output = <bf16 as Sub<bf16>>::Output; - - #[inline] - fn sub(self, rhs: &bf16) -> Self::Output { - (*self).sub(*rhs) - } -} - -impl Sub<bf16> for &bf16 { - type Output = <bf16 as Sub<bf16>>::Output; - - #[inline] - fn sub(self, rhs: bf16) -> Self::Output { - (*self).sub(rhs) - } -} - -impl SubAssign for bf16 { - #[inline] - fn sub_assign(&mut self, rhs: Self) { - *self = (*self).sub(rhs); - } -} - -impl SubAssign<&bf16> for bf16 { - #[inline] - fn sub_assign(&mut self, rhs: &bf16) { - *self = (*self).sub(rhs); - } -} - -impl Mul for bf16 { - type Output = Self; - - fn mul(self, rhs: Self) -> Self::Output { - Self::from_f32(Self::to_f32(self) * Self::to_f32(rhs)) - } -} - -impl Mul<&bf16> for bf16 { - type Output = <bf16 as Mul<bf16>>::Output; - - #[inline] - fn mul(self, rhs: &bf16) -> Self::Output { - self.mul(*rhs) - } -} - -impl Mul<&bf16> for &bf16 { - type Output = <bf16 as Mul<bf16>>::Output; - - #[inline] - fn mul(self, rhs: &bf16) -> Self::Output { - (*self).mul(*rhs) - } -} - -impl Mul<bf16> for &bf16 { - type Output = <bf16 as Mul<bf16>>::Output; - - #[inline] - fn mul(self, rhs: bf16) -> Self::Output { - (*self).mul(rhs) - } -} - -impl MulAssign for bf16 { - #[inline] - fn mul_assign(&mut self, rhs: Self) { - *self = (*self).mul(rhs); - } -} - -impl MulAssign<&bf16> for bf16 { - #[inline] - fn mul_assign(&mut self, rhs: &bf16) { - *self = (*self).mul(rhs); - } -} - -impl Div for bf16 { - type Output = Self; - - fn div(self, rhs: Self) -> Self::Output { - Self::from_f32(Self::to_f32(self) / Self::to_f32(rhs)) - } -} - -impl Div<&bf16> for bf16 { - type Output = <bf16 as Div<bf16>>::Output; - - #[inline] - fn div(self, rhs: &bf16) -> Self::Output { - self.div(*rhs) - } -} - -impl Div<&bf16> for &bf16 { - type Output = <bf16 as Div<bf16>>::Output; - - #[inline] - fn div(self, rhs: &bf16) -> Self::Output { - (*self).div(*rhs) - } -} - -impl Div<bf16> for &bf16 { - type Output = <bf16 as Div<bf16>>::Output; - - #[inline] - fn div(self, rhs: bf16) -> Self::Output { - (*self).div(rhs) - } -} - -impl DivAssign for bf16 { - #[inline] - fn div_assign(&mut self, rhs: Self) { - *self = (*self).div(rhs); - } -} - -impl DivAssign<&bf16> for bf16 { - #[inline] - fn div_assign(&mut self, rhs: &bf16) { - *self = (*self).div(rhs); - } -} - -impl Rem for bf16 { - type Output = Self; - - fn rem(self, rhs: Self) -> Self::Output { - Self::from_f32(Self::to_f32(self) % Self::to_f32(rhs)) - } -} - -impl Rem<&bf16> for bf16 { - type Output = <bf16 as Rem<bf16>>::Output; - - #[inline] - fn rem(self, rhs: &bf16) -> Self::Output { - self.rem(*rhs) - } -} - -impl Rem<&bf16> for &bf16 { - type Output = <bf16 as Rem<bf16>>::Output; - - #[inline] - fn rem(self, rhs: &bf16) -> Self::Output { - (*self).rem(*rhs) - } -} - -impl Rem<bf16> for &bf16 { - type Output = <bf16 as Rem<bf16>>::Output; - - #[inline] - fn rem(self, rhs: bf16) -> Self::Output { - (*self).rem(rhs) - } -} - -impl RemAssign for bf16 { - #[inline] - fn rem_assign(&mut self, rhs: Self) { - *self = (*self).rem(rhs); - } -} - -impl RemAssign<&bf16> for bf16 { - #[inline] - fn rem_assign(&mut self, rhs: &bf16) { - *self = (*self).rem(rhs); - } -} - -impl Product for bf16 { - #[inline] - fn product<I: Iterator<Item = Self>>(iter: I) -> Self { - bf16::from_f32(iter.map(|f| f.to_f32()).product()) - } -} - -impl<'a> Product<&'a bf16> for bf16 { - #[inline] - fn product<I: Iterator<Item = &'a bf16>>(iter: I) -> Self { - bf16::from_f32(iter.map(|f| f.to_f32()).product()) - } -} - -impl Sum for bf16 { - #[inline] - fn sum<I: Iterator<Item = Self>>(iter: I) -> Self { - bf16::from_f32(iter.map(|f| f.to_f32()).sum()) - } -} - -impl<'a> Sum<&'a bf16> for bf16 { - #[inline] - fn sum<I: Iterator<Item = &'a bf16>>(iter: I) -> Self { - bf16::from_f32(iter.map(|f| f.to_f32()).product()) - } -} - -#[cfg(feature = "serde")] -struct Visitor; - -#[cfg(feature = "serde")] -impl<'de> Deserialize<'de> for bf16 { - fn deserialize<D>(deserializer: D) -> Result<bf16, D::Error> - where - D: serde::de::Deserializer<'de>, - { - deserializer.deserialize_newtype_struct("bf16", Visitor) - } -} - -#[cfg(feature = "serde")] -impl<'de> serde::de::Visitor<'de> for Visitor { - type Value = bf16; - - fn expecting(&self, formatter: &mut alloc::fmt::Formatter) -> alloc::fmt::Result { - write!(formatter, "tuple struct bf16") - } - - fn visit_newtype_struct<D>(self, deserializer: D) -> Result<Self::Value, D::Error> - where - D: serde::Deserializer<'de>, - { - Ok(bf16(<u16 as Deserialize>::deserialize(deserializer)?)) - } - - fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> - where - E: serde::de::Error, - { - v.parse().map_err(|_| { - serde::de::Error::invalid_value(serde::de::Unexpected::Str(v), &"a float string") - }) - } - - fn visit_f32<E>(self, v: f32) -> Result<Self::Value, E> - where - E: serde::de::Error, - { - Ok(bf16::from_f32(v)) - } - - fn visit_f64<E>(self, v: f64) -> Result<Self::Value, E> - where - E: serde::de::Error, - { - Ok(bf16::from_f64(v)) - } -} - -#[allow( - clippy::cognitive_complexity, - clippy::float_cmp, - clippy::neg_cmp_op_on_partial_ord -)] -#[cfg(test)] -mod test { - use super::*; - use core::cmp::Ordering; - #[cfg(feature = "num-traits")] - use num_traits::{AsPrimitive, FromPrimitive, ToPrimitive}; - use quickcheck_macros::quickcheck; - - #[cfg(feature = "num-traits")] - #[test] - fn as_primitive() { - let two = bf16::from_f32(2.0); - assert_eq!(<i32 as AsPrimitive<bf16>>::as_(2), two); - assert_eq!(<bf16 as AsPrimitive<i32>>::as_(two), 2); - - assert_eq!(<f32 as AsPrimitive<bf16>>::as_(2.0), two); - assert_eq!(<bf16 as AsPrimitive<f32>>::as_(two), 2.0); - - assert_eq!(<f64 as AsPrimitive<bf16>>::as_(2.0), two); - assert_eq!(<bf16 as AsPrimitive<f64>>::as_(two), 2.0); - } - - #[cfg(feature = "num-traits")] - #[test] - fn to_primitive() { - let two = bf16::from_f32(2.0); - assert_eq!(ToPrimitive::to_i32(&two).unwrap(), 2i32); - assert_eq!(ToPrimitive::to_f32(&two).unwrap(), 2.0f32); - assert_eq!(ToPrimitive::to_f64(&two).unwrap(), 2.0f64); - } - - #[cfg(feature = "num-traits")] - #[test] - fn from_primitive() { - let two = bf16::from_f32(2.0); - assert_eq!(<bf16 as FromPrimitive>::from_i32(2).unwrap(), two); - assert_eq!(<bf16 as FromPrimitive>::from_f32(2.0).unwrap(), two); - assert_eq!(<bf16 as FromPrimitive>::from_f64(2.0).unwrap(), two); - } - - #[test] - fn test_bf16_consts_from_f32() { - let one = bf16::from_f32(1.0); - let zero = bf16::from_f32(0.0); - let neg_zero = bf16::from_f32(-0.0); - let neg_one = bf16::from_f32(-1.0); - let inf = bf16::from_f32(core::f32::INFINITY); - let neg_inf = bf16::from_f32(core::f32::NEG_INFINITY); - let nan = bf16::from_f32(core::f32::NAN); - - assert_eq!(bf16::ONE, one); - assert_eq!(bf16::ZERO, zero); - assert!(zero.is_sign_positive()); - assert_eq!(bf16::NEG_ZERO, neg_zero); - assert!(neg_zero.is_sign_negative()); - assert_eq!(bf16::NEG_ONE, neg_one); - assert!(neg_one.is_sign_negative()); - assert_eq!(bf16::INFINITY, inf); - assert_eq!(bf16::NEG_INFINITY, neg_inf); - assert!(nan.is_nan()); - assert!(bf16::NAN.is_nan()); - - let e = bf16::from_f32(core::f32::consts::E); - let pi = bf16::from_f32(core::f32::consts::PI); - let frac_1_pi = bf16::from_f32(core::f32::consts::FRAC_1_PI); - let frac_1_sqrt_2 = bf16::from_f32(core::f32::consts::FRAC_1_SQRT_2); - let frac_2_pi = bf16::from_f32(core::f32::consts::FRAC_2_PI); - let frac_2_sqrt_pi = bf16::from_f32(core::f32::consts::FRAC_2_SQRT_PI); - let frac_pi_2 = bf16::from_f32(core::f32::consts::FRAC_PI_2); - let frac_pi_3 = bf16::from_f32(core::f32::consts::FRAC_PI_3); - let frac_pi_4 = bf16::from_f32(core::f32::consts::FRAC_PI_4); - let frac_pi_6 = bf16::from_f32(core::f32::consts::FRAC_PI_6); - let frac_pi_8 = bf16::from_f32(core::f32::consts::FRAC_PI_8); - let ln_10 = bf16::from_f32(core::f32::consts::LN_10); - let ln_2 = bf16::from_f32(core::f32::consts::LN_2); - let log10_e = bf16::from_f32(core::f32::consts::LOG10_E); - // core::f32::consts::LOG10_2 requires rustc 1.43.0 - let log10_2 = bf16::from_f32(2f32.log10()); - let log2_e = bf16::from_f32(core::f32::consts::LOG2_E); - // core::f32::consts::LOG2_10 requires rustc 1.43.0 - let log2_10 = bf16::from_f32(10f32.log2()); - let sqrt_2 = bf16::from_f32(core::f32::consts::SQRT_2); - - assert_eq!(bf16::E, e); - assert_eq!(bf16::PI, pi); - assert_eq!(bf16::FRAC_1_PI, frac_1_pi); - assert_eq!(bf16::FRAC_1_SQRT_2, frac_1_sqrt_2); - assert_eq!(bf16::FRAC_2_PI, frac_2_pi); - assert_eq!(bf16::FRAC_2_SQRT_PI, frac_2_sqrt_pi); - assert_eq!(bf16::FRAC_PI_2, frac_pi_2); - assert_eq!(bf16::FRAC_PI_3, frac_pi_3); - assert_eq!(bf16::FRAC_PI_4, frac_pi_4); - assert_eq!(bf16::FRAC_PI_6, frac_pi_6); - assert_eq!(bf16::FRAC_PI_8, frac_pi_8); - assert_eq!(bf16::LN_10, ln_10); - assert_eq!(bf16::LN_2, ln_2); - assert_eq!(bf16::LOG10_E, log10_e); - assert_eq!(bf16::LOG10_2, log10_2); - assert_eq!(bf16::LOG2_E, log2_e); - assert_eq!(bf16::LOG2_10, log2_10); - assert_eq!(bf16::SQRT_2, sqrt_2); - } - - #[test] - fn test_bf16_consts_from_f64() { - let one = bf16::from_f64(1.0); - let zero = bf16::from_f64(0.0); - let neg_zero = bf16::from_f64(-0.0); - let inf = bf16::from_f64(core::f64::INFINITY); - let neg_inf = bf16::from_f64(core::f64::NEG_INFINITY); - let nan = bf16::from_f64(core::f64::NAN); - - assert_eq!(bf16::ONE, one); - assert_eq!(bf16::ZERO, zero); - assert_eq!(bf16::NEG_ZERO, neg_zero); - assert_eq!(bf16::INFINITY, inf); - assert_eq!(bf16::NEG_INFINITY, neg_inf); - assert!(nan.is_nan()); - assert!(bf16::NAN.is_nan()); - - let e = bf16::from_f64(core::f64::consts::E); - let pi = bf16::from_f64(core::f64::consts::PI); - let frac_1_pi = bf16::from_f64(core::f64::consts::FRAC_1_PI); - let frac_1_sqrt_2 = bf16::from_f64(core::f64::consts::FRAC_1_SQRT_2); - let frac_2_pi = bf16::from_f64(core::f64::consts::FRAC_2_PI); - let frac_2_sqrt_pi = bf16::from_f64(core::f64::consts::FRAC_2_SQRT_PI); - let frac_pi_2 = bf16::from_f64(core::f64::consts::FRAC_PI_2); - let frac_pi_3 = bf16::from_f64(core::f64::consts::FRAC_PI_3); - let frac_pi_4 = bf16::from_f64(core::f64::consts::FRAC_PI_4); - let frac_pi_6 = bf16::from_f64(core::f64::consts::FRAC_PI_6); - let frac_pi_8 = bf16::from_f64(core::f64::consts::FRAC_PI_8); - let ln_10 = bf16::from_f64(core::f64::consts::LN_10); - let ln_2 = bf16::from_f64(core::f64::consts::LN_2); - let log10_e = bf16::from_f64(core::f64::consts::LOG10_E); - // core::f64::consts::LOG10_2 requires rustc 1.43.0 - let log10_2 = bf16::from_f64(2f64.log10()); - let log2_e = bf16::from_f64(core::f64::consts::LOG2_E); - // core::f64::consts::LOG2_10 requires rustc 1.43.0 - let log2_10 = bf16::from_f64(10f64.log2()); - let sqrt_2 = bf16::from_f64(core::f64::consts::SQRT_2); - - assert_eq!(bf16::E, e); - assert_eq!(bf16::PI, pi); - assert_eq!(bf16::FRAC_1_PI, frac_1_pi); - assert_eq!(bf16::FRAC_1_SQRT_2, frac_1_sqrt_2); - assert_eq!(bf16::FRAC_2_PI, frac_2_pi); - assert_eq!(bf16::FRAC_2_SQRT_PI, frac_2_sqrt_pi); - assert_eq!(bf16::FRAC_PI_2, frac_pi_2); - assert_eq!(bf16::FRAC_PI_3, frac_pi_3); - assert_eq!(bf16::FRAC_PI_4, frac_pi_4); - assert_eq!(bf16::FRAC_PI_6, frac_pi_6); - assert_eq!(bf16::FRAC_PI_8, frac_pi_8); - assert_eq!(bf16::LN_10, ln_10); - assert_eq!(bf16::LN_2, ln_2); - assert_eq!(bf16::LOG10_E, log10_e); - assert_eq!(bf16::LOG10_2, log10_2); - assert_eq!(bf16::LOG2_E, log2_e); - assert_eq!(bf16::LOG2_10, log2_10); - assert_eq!(bf16::SQRT_2, sqrt_2); - } - - #[test] - fn test_nan_conversion_to_smaller() { - let nan64 = f64::from_bits(0x7FF0_0000_0000_0001u64); - let neg_nan64 = f64::from_bits(0xFFF0_0000_0000_0001u64); - let nan32 = f32::from_bits(0x7F80_0001u32); - let neg_nan32 = f32::from_bits(0xFF80_0001u32); - let nan32_from_64 = nan64 as f32; - let neg_nan32_from_64 = neg_nan64 as f32; - let nan16_from_64 = bf16::from_f64(nan64); - let neg_nan16_from_64 = bf16::from_f64(neg_nan64); - let nan16_from_32 = bf16::from_f32(nan32); - let neg_nan16_from_32 = bf16::from_f32(neg_nan32); - - assert!(nan64.is_nan() && nan64.is_sign_positive()); - assert!(neg_nan64.is_nan() && neg_nan64.is_sign_negative()); - assert!(nan32.is_nan() && nan32.is_sign_positive()); - assert!(neg_nan32.is_nan() && neg_nan32.is_sign_negative()); - assert!(nan32_from_64.is_nan() && nan32_from_64.is_sign_positive()); - assert!(neg_nan32_from_64.is_nan() && neg_nan32_from_64.is_sign_negative()); - assert!(nan16_from_64.is_nan() && nan16_from_64.is_sign_positive()); - assert!(neg_nan16_from_64.is_nan() && neg_nan16_from_64.is_sign_negative()); - assert!(nan16_from_32.is_nan() && nan16_from_32.is_sign_positive()); - assert!(neg_nan16_from_32.is_nan() && neg_nan16_from_32.is_sign_negative()); - } - - #[test] - fn test_nan_conversion_to_larger() { - let nan16 = bf16::from_bits(0x7F81u16); - let neg_nan16 = bf16::from_bits(0xFF81u16); - let nan32 = f32::from_bits(0x7F80_0001u32); - let neg_nan32 = f32::from_bits(0xFF80_0001u32); - let nan32_from_16 = f32::from(nan16); - let neg_nan32_from_16 = f32::from(neg_nan16); - let nan64_from_16 = f64::from(nan16); - let neg_nan64_from_16 = f64::from(neg_nan16); - let nan64_from_32 = f64::from(nan32); - let neg_nan64_from_32 = f64::from(neg_nan32); - - assert!(nan16.is_nan() && nan16.is_sign_positive()); - assert!(neg_nan16.is_nan() && neg_nan16.is_sign_negative()); - assert!(nan32.is_nan() && nan32.is_sign_positive()); - assert!(neg_nan32.is_nan() && neg_nan32.is_sign_negative()); - assert!(nan32_from_16.is_nan() && nan32_from_16.is_sign_positive()); - assert!(neg_nan32_from_16.is_nan() && neg_nan32_from_16.is_sign_negative()); - assert!(nan64_from_16.is_nan() && nan64_from_16.is_sign_positive()); - assert!(neg_nan64_from_16.is_nan() && neg_nan64_from_16.is_sign_negative()); - assert!(nan64_from_32.is_nan() && nan64_from_32.is_sign_positive()); - assert!(neg_nan64_from_32.is_nan() && neg_nan64_from_32.is_sign_negative()); - } - - #[test] - fn test_bf16_to_f32() { - let f = bf16::from_f32(7.0); - assert_eq!(f.to_f32(), 7.0f32); - - // 7.1 is NOT exactly representable in 16-bit, it's rounded - let f = bf16::from_f32(7.1); - let diff = (f.to_f32() - 7.1f32).abs(); - // diff must be <= 4 * EPSILON, as 7 has two more significant bits than 1 - assert!(diff <= 4.0 * bf16::EPSILON.to_f32()); - - let tiny32 = f32::from_bits(0x0001_0000u32); - assert_eq!(bf16::from_bits(0x0001).to_f32(), tiny32); - assert_eq!(bf16::from_bits(0x0005).to_f32(), 5.0 * tiny32); - - assert_eq!(bf16::from_bits(0x0001), bf16::from_f32(tiny32)); - assert_eq!(bf16::from_bits(0x0005), bf16::from_f32(5.0 * tiny32)); - } - - #[test] - fn test_bf16_to_f64() { - let f = bf16::from_f64(7.0); - assert_eq!(f.to_f64(), 7.0f64); - - // 7.1 is NOT exactly representable in 16-bit, it's rounded - let f = bf16::from_f64(7.1); - let diff = (f.to_f64() - 7.1f64).abs(); - // diff must be <= 4 * EPSILON, as 7 has two more significant bits than 1 - assert!(diff <= 4.0 * bf16::EPSILON.to_f64()); - - let tiny64 = 2.0f64.powi(-133); - assert_eq!(bf16::from_bits(0x0001).to_f64(), tiny64); - assert_eq!(bf16::from_bits(0x0005).to_f64(), 5.0 * tiny64); - - assert_eq!(bf16::from_bits(0x0001), bf16::from_f64(tiny64)); - assert_eq!(bf16::from_bits(0x0005), bf16::from_f64(5.0 * tiny64)); - } - - #[test] - fn test_comparisons() { - let zero = bf16::from_f64(0.0); - let one = bf16::from_f64(1.0); - let neg_zero = bf16::from_f64(-0.0); - let neg_one = bf16::from_f64(-1.0); - - assert_eq!(zero.partial_cmp(&neg_zero), Some(Ordering::Equal)); - assert_eq!(neg_zero.partial_cmp(&zero), Some(Ordering::Equal)); - assert!(zero == neg_zero); - assert!(neg_zero == zero); - assert!(!(zero != neg_zero)); - assert!(!(neg_zero != zero)); - assert!(!(zero < neg_zero)); - assert!(!(neg_zero < zero)); - assert!(zero <= neg_zero); - assert!(neg_zero <= zero); - assert!(!(zero > neg_zero)); - assert!(!(neg_zero > zero)); - assert!(zero >= neg_zero); - assert!(neg_zero >= zero); - - assert_eq!(one.partial_cmp(&neg_zero), Some(Ordering::Greater)); - assert_eq!(neg_zero.partial_cmp(&one), Some(Ordering::Less)); - assert!(!(one == neg_zero)); - assert!(!(neg_zero == one)); - assert!(one != neg_zero); - assert!(neg_zero != one); - assert!(!(one < neg_zero)); - assert!(neg_zero < one); - assert!(!(one <= neg_zero)); - assert!(neg_zero <= one); - assert!(one > neg_zero); - assert!(!(neg_zero > one)); - assert!(one >= neg_zero); - assert!(!(neg_zero >= one)); - - assert_eq!(one.partial_cmp(&neg_one), Some(Ordering::Greater)); - assert_eq!(neg_one.partial_cmp(&one), Some(Ordering::Less)); - assert!(!(one == neg_one)); - assert!(!(neg_one == one)); - assert!(one != neg_one); - assert!(neg_one != one); - assert!(!(one < neg_one)); - assert!(neg_one < one); - assert!(!(one <= neg_one)); - assert!(neg_one <= one); - assert!(one > neg_one); - assert!(!(neg_one > one)); - assert!(one >= neg_one); - assert!(!(neg_one >= one)); - } - - #[test] - #[allow(clippy::erasing_op, clippy::identity_op)] - fn round_to_even_f32() { - // smallest positive subnormal = 0b0.0000_001 * 2^-126 = 2^-133 - let min_sub = bf16::from_bits(1); - let min_sub_f = (-133f32).exp2(); - assert_eq!(bf16::from_f32(min_sub_f).to_bits(), min_sub.to_bits()); - assert_eq!(f32::from(min_sub).to_bits(), min_sub_f.to_bits()); - - // 0.0000000_011111 rounded to 0.0000000 (< tie, no rounding) - // 0.0000000_100000 rounded to 0.0000000 (tie and even, remains at even) - // 0.0000000_100001 rounded to 0.0000001 (> tie, rounds up) - assert_eq!( - bf16::from_f32(min_sub_f * 0.49).to_bits(), - min_sub.to_bits() * 0 - ); - assert_eq!( - bf16::from_f32(min_sub_f * 0.50).to_bits(), - min_sub.to_bits() * 0 - ); - assert_eq!( - bf16::from_f32(min_sub_f * 0.51).to_bits(), - min_sub.to_bits() * 1 - ); - - // 0.0000001_011111 rounded to 0.0000001 (< tie, no rounding) - // 0.0000001_100000 rounded to 0.0000010 (tie and odd, rounds up to even) - // 0.0000001_100001 rounded to 0.0000010 (> tie, rounds up) - assert_eq!( - bf16::from_f32(min_sub_f * 1.49).to_bits(), - min_sub.to_bits() * 1 - ); - assert_eq!( - bf16::from_f32(min_sub_f * 1.50).to_bits(), - min_sub.to_bits() * 2 - ); - assert_eq!( - bf16::from_f32(min_sub_f * 1.51).to_bits(), - min_sub.to_bits() * 2 - ); - - // 0.0000010_011111 rounded to 0.0000010 (< tie, no rounding) - // 0.0000010_100000 rounded to 0.0000010 (tie and even, remains at even) - // 0.0000010_100001 rounded to 0.0000011 (> tie, rounds up) - assert_eq!( - bf16::from_f32(min_sub_f * 2.49).to_bits(), - min_sub.to_bits() * 2 - ); - assert_eq!( - bf16::from_f32(min_sub_f * 2.50).to_bits(), - min_sub.to_bits() * 2 - ); - assert_eq!( - bf16::from_f32(min_sub_f * 2.51).to_bits(), - min_sub.to_bits() * 3 - ); - - assert_eq!( - bf16::from_f32(250.49f32).to_bits(), - bf16::from_f32(250.0).to_bits() - ); - assert_eq!( - bf16::from_f32(250.50f32).to_bits(), - bf16::from_f32(250.0).to_bits() - ); - assert_eq!( - bf16::from_f32(250.51f32).to_bits(), - bf16::from_f32(251.0).to_bits() - ); - assert_eq!( - bf16::from_f32(251.49f32).to_bits(), - bf16::from_f32(251.0).to_bits() - ); - assert_eq!( - bf16::from_f32(251.50f32).to_bits(), - bf16::from_f32(252.0).to_bits() - ); - assert_eq!( - bf16::from_f32(251.51f32).to_bits(), - bf16::from_f32(252.0).to_bits() - ); - assert_eq!( - bf16::from_f32(252.49f32).to_bits(), - bf16::from_f32(252.0).to_bits() - ); - assert_eq!( - bf16::from_f32(252.50f32).to_bits(), - bf16::from_f32(252.0).to_bits() - ); - assert_eq!( - bf16::from_f32(252.51f32).to_bits(), - bf16::from_f32(253.0).to_bits() - ); - } - - #[test] - #[allow(clippy::erasing_op, clippy::identity_op)] - fn round_to_even_f64() { - // smallest positive subnormal = 0b0.0000_001 * 2^-126 = 2^-133 - let min_sub = bf16::from_bits(1); - let min_sub_f = (-133f64).exp2(); - assert_eq!(bf16::from_f64(min_sub_f).to_bits(), min_sub.to_bits()); - assert_eq!(f64::from(min_sub).to_bits(), min_sub_f.to_bits()); - - // 0.0000000_011111 rounded to 0.0000000 (< tie, no rounding) - // 0.0000000_100000 rounded to 0.0000000 (tie and even, remains at even) - // 0.0000000_100001 rounded to 0.0000001 (> tie, rounds up) - assert_eq!( - bf16::from_f64(min_sub_f * 0.49).to_bits(), - min_sub.to_bits() * 0 - ); - assert_eq!( - bf16::from_f64(min_sub_f * 0.50).to_bits(), - min_sub.to_bits() * 0 - ); - assert_eq!( - bf16::from_f64(min_sub_f * 0.51).to_bits(), - min_sub.to_bits() * 1 - ); - - // 0.0000001_011111 rounded to 0.0000001 (< tie, no rounding) - // 0.0000001_100000 rounded to 0.0000010 (tie and odd, rounds up to even) - // 0.0000001_100001 rounded to 0.0000010 (> tie, rounds up) - assert_eq!( - bf16::from_f64(min_sub_f * 1.49).to_bits(), - min_sub.to_bits() * 1 - ); - assert_eq!( - bf16::from_f64(min_sub_f * 1.50).to_bits(), - min_sub.to_bits() * 2 - ); - assert_eq!( - bf16::from_f64(min_sub_f * 1.51).to_bits(), - min_sub.to_bits() * 2 - ); - - // 0.0000010_011111 rounded to 0.0000010 (< tie, no rounding) - // 0.0000010_100000 rounded to 0.0000010 (tie and even, remains at even) - // 0.0000010_100001 rounded to 0.0000011 (> tie, rounds up) - assert_eq!( - bf16::from_f64(min_sub_f * 2.49).to_bits(), - min_sub.to_bits() * 2 - ); - assert_eq!( - bf16::from_f64(min_sub_f * 2.50).to_bits(), - min_sub.to_bits() * 2 - ); - assert_eq!( - bf16::from_f64(min_sub_f * 2.51).to_bits(), - min_sub.to_bits() * 3 - ); - - assert_eq!( - bf16::from_f64(250.49f64).to_bits(), - bf16::from_f64(250.0).to_bits() - ); - assert_eq!( - bf16::from_f64(250.50f64).to_bits(), - bf16::from_f64(250.0).to_bits() - ); - assert_eq!( - bf16::from_f64(250.51f64).to_bits(), - bf16::from_f64(251.0).to_bits() - ); - assert_eq!( - bf16::from_f64(251.49f64).to_bits(), - bf16::from_f64(251.0).to_bits() - ); - assert_eq!( - bf16::from_f64(251.50f64).to_bits(), - bf16::from_f64(252.0).to_bits() - ); - assert_eq!( - bf16::from_f64(251.51f64).to_bits(), - bf16::from_f64(252.0).to_bits() - ); - assert_eq!( - bf16::from_f64(252.49f64).to_bits(), - bf16::from_f64(252.0).to_bits() - ); - assert_eq!( - bf16::from_f64(252.50f64).to_bits(), - bf16::from_f64(252.0).to_bits() - ); - assert_eq!( - bf16::from_f64(252.51f64).to_bits(), - bf16::from_f64(253.0).to_bits() - ); - } - - impl quickcheck::Arbitrary for bf16 { - fn arbitrary(g: &mut quickcheck::Gen) -> Self { - bf16(u16::arbitrary(g)) - } - } - - #[quickcheck] - fn qc_roundtrip_bf16_f32_is_identity(f: bf16) -> bool { - let roundtrip = bf16::from_f32(f.to_f32()); - if f.is_nan() { - roundtrip.is_nan() && f.is_sign_negative() == roundtrip.is_sign_negative() - } else { - f.0 == roundtrip.0 - } - } - - #[quickcheck] - fn qc_roundtrip_bf16_f64_is_identity(f: bf16) -> bool { - let roundtrip = bf16::from_f64(f.to_f64()); - if f.is_nan() { - roundtrip.is_nan() && f.is_sign_negative() == roundtrip.is_sign_negative() - } else { - f.0 == roundtrip.0 - } - } -} diff --git a/vendor/half/src/bfloat/convert.rs b/vendor/half/src/bfloat/convert.rs deleted file mode 100644 index 8f258f5..0000000 --- a/vendor/half/src/bfloat/convert.rs +++ /dev/null @@ -1,148 +0,0 @@ -use crate::leading_zeros::leading_zeros_u16; -use core::mem; - -#[inline] -pub(crate) const fn f32_to_bf16(value: f32) -> u16 { - // TODO: Replace mem::transmute with to_bits() once to_bits is const-stabilized - // Convert to raw bytes - let x: u32 = unsafe { mem::transmute(value) }; - - // check for NaN - if x & 0x7FFF_FFFFu32 > 0x7F80_0000u32 { - // Keep high part of current mantissa but also set most significiant mantissa bit - return ((x >> 16) | 0x0040u32) as u16; - } - - // round and shift - let round_bit = 0x0000_8000u32; - if (x & round_bit) != 0 && (x & (3 * round_bit - 1)) != 0 { - (x >> 16) as u16 + 1 - } else { - (x >> 16) as u16 - } -} - -#[inline] -pub(crate) const fn f64_to_bf16(value: f64) -> u16 { - // TODO: Replace mem::transmute with to_bits() once to_bits is const-stabilized - // Convert to raw bytes, truncating the last 32-bits of mantissa; that precision will always - // be lost on half-precision. - let val: u64 = unsafe { mem::transmute(value) }; - let x = (val >> 32) as u32; - - // Extract IEEE754 components - let sign = x & 0x8000_0000u32; - let exp = x & 0x7FF0_0000u32; - let man = x & 0x000F_FFFFu32; - - // Check for all exponent bits being set, which is Infinity or NaN - if exp == 0x7FF0_0000u32 { - // Set mantissa MSB for NaN (and also keep shifted mantissa bits). - // We also have to check the last 32 bits. - let nan_bit = if man == 0 && (val as u32 == 0) { - 0 - } else { - 0x0040u32 - }; - return ((sign >> 16) | 0x7F80u32 | nan_bit | (man >> 13)) as u16; - } - - // The number is normalized, start assembling half precision version - let half_sign = sign >> 16; - // Unbias the exponent, then bias for bfloat16 precision - let unbiased_exp = ((exp >> 20) as i64) - 1023; - let half_exp = unbiased_exp + 127; - - // Check for exponent overflow, return +infinity - if half_exp >= 0xFF { - return (half_sign | 0x7F80u32) as u16; - } - - // Check for underflow - if half_exp <= 0 { - // Check mantissa for what we can do - if 7 - half_exp > 21 { - // No rounding possibility, so this is a full underflow, return signed zero - return half_sign as u16; - } - // Don't forget about hidden leading mantissa bit when assembling mantissa - let man = man | 0x0010_0000u32; - let mut half_man = man >> (14 - half_exp); - // Check for rounding - let round_bit = 1 << (13 - half_exp); - if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 { - half_man += 1; - } - // No exponent for subnormals - return (half_sign | half_man) as u16; - } - - // Rebias the exponent - let half_exp = (half_exp as u32) << 7; - let half_man = man >> 13; - // Check for rounding - let round_bit = 0x0000_1000u32; - if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 { - // Round it - ((half_sign | half_exp | half_man) + 1) as u16 - } else { - (half_sign | half_exp | half_man) as u16 - } -} - -#[inline] -pub(crate) const fn bf16_to_f32(i: u16) -> f32 { - // TODO: Replace mem::transmute with from_bits() once from_bits is const-stabilized - // If NaN, keep current mantissa but also set most significiant mantissa bit - if i & 0x7FFFu16 > 0x7F80u16 { - unsafe { mem::transmute((i as u32 | 0x0040u32) << 16) } - } else { - unsafe { mem::transmute((i as u32) << 16) } - } -} - -#[inline] -pub(crate) const fn bf16_to_f64(i: u16) -> f64 { - // TODO: Replace mem::transmute with from_bits() once from_bits is const-stabilized - // Check for signed zero - if i & 0x7FFFu16 == 0 { - return unsafe { mem::transmute((i as u64) << 48) }; - } - - let half_sign = (i & 0x8000u16) as u64; - let half_exp = (i & 0x7F80u16) as u64; - let half_man = (i & 0x007Fu16) as u64; - - // Check for an infinity or NaN when all exponent bits set - if half_exp == 0x7F80u64 { - // Check for signed infinity if mantissa is zero - if half_man == 0 { - return unsafe { mem::transmute((half_sign << 48) | 0x7FF0_0000_0000_0000u64) }; - } else { - // NaN, keep current mantissa but also set most significiant mantissa bit - return unsafe { - mem::transmute((half_sign << 48) | 0x7FF8_0000_0000_0000u64 | (half_man << 45)) - }; - } - } - - // Calculate double-precision components with adjusted exponent - let sign = half_sign << 48; - // Unbias exponent - let unbiased_exp = ((half_exp as i64) >> 7) - 127; - - // Check for subnormals, which will be normalized by adjusting exponent - if half_exp == 0 { - // Calculate how much to adjust the exponent by - let e = leading_zeros_u16(half_man as u16) - 9; - - // Rebias and adjust exponent - let exp = ((1023 - 127 - e) as u64) << 52; - let man = (half_man << (46 + e)) & 0xF_FFFF_FFFF_FFFFu64; - return unsafe { mem::transmute(sign | exp | man) }; - } - // Rebias exponent for a normalized normal - let exp = ((unbiased_exp + 1023) as u64) << 52; - let man = (half_man & 0x007Fu64) << 45; - unsafe { mem::transmute(sign | exp | man) } -} diff --git a/vendor/half/src/binary16.rs b/vendor/half/src/binary16.rs deleted file mode 100644 index b622f01..0000000 --- a/vendor/half/src/binary16.rs +++ /dev/null @@ -1,1912 +0,0 @@ -#[cfg(feature = "bytemuck")] -use bytemuck::{Pod, Zeroable}; -use core::{ - cmp::Ordering, - iter::{Product, Sum}, - num::FpCategory, - ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Rem, RemAssign, Sub, SubAssign}, -}; -#[cfg(not(target_arch = "spirv"))] -use core::{ - fmt::{ - Binary, Debug, Display, Error, Formatter, LowerExp, LowerHex, Octal, UpperExp, UpperHex, - }, - num::ParseFloatError, - str::FromStr, -}; -#[cfg(feature = "serde")] -use serde::{Deserialize, Serialize}; -#[cfg(feature = "zerocopy")] -use zerocopy::{AsBytes, FromBytes}; - -pub(crate) mod convert; - -/// A 16-bit floating point type implementing the IEEE 754-2008 standard [`binary16`] a.k.a `half` -/// format. -/// -/// This 16-bit floating point type is intended for efficient storage where the full range and -/// precision of a larger floating point value is not required. Because [`f16`] is primarily for -/// efficient storage, floating point operations such as addition, multiplication, etc. are not -/// implemented. Operations should be performed with [`f32`] or higher-precision types and converted -/// to/from [`f16`] as necessary. -/// -/// [`binary16`]: https://en.wikipedia.org/wiki/Half-precision_floating-point_format -#[allow(non_camel_case_types)] -#[derive(Clone, Copy, Default)] -#[repr(transparent)] -#[cfg_attr(feature = "serde", derive(Serialize))] -#[cfg_attr(feature = "bytemuck", derive(Zeroable, Pod))] -#[cfg_attr(feature = "zerocopy", derive(AsBytes, FromBytes))] -pub struct f16(u16); - -impl f16 { - /// Constructs a 16-bit floating point value from the raw bits. - #[inline] - #[must_use] - pub const fn from_bits(bits: u16) -> f16 { - f16(bits) - } - - /// Constructs a 16-bit floating point value from a 32-bit floating point value. - /// - /// If the 32-bit value is to large to fit in 16-bits, ±∞ will result. NaN values are - /// preserved. 32-bit subnormal values are too tiny to be represented in 16-bits and result in - /// ±0. Exponents that underflow the minimum 16-bit exponent will result in 16-bit subnormals - /// or ±0. All other values are truncated and rounded to the nearest representable 16-bit - /// value. - #[inline] - #[must_use] - pub fn from_f32(value: f32) -> f16 { - f16(convert::f32_to_f16(value)) - } - - /// Constructs a 16-bit floating point value from a 32-bit floating point value. - /// - /// This function is identical to [`from_f32`][Self::from_f32] except it never uses hardware - /// intrinsics, which allows it to be `const`. [`from_f32`][Self::from_f32] should be preferred - /// in any non-`const` context. - /// - /// If the 32-bit value is to large to fit in 16-bits, ±∞ will result. NaN values are - /// preserved. 32-bit subnormal values are too tiny to be represented in 16-bits and result in - /// ±0. Exponents that underflow the minimum 16-bit exponent will result in 16-bit subnormals - /// or ±0. All other values are truncated and rounded to the nearest representable 16-bit - /// value. - #[inline] - #[must_use] - pub const fn from_f32_const(value: f32) -> f16 { - f16(convert::f32_to_f16_fallback(value)) - } - - /// Constructs a 16-bit floating point value from a 64-bit floating point value. - /// - /// If the 64-bit value is to large to fit in 16-bits, ±∞ will result. NaN values are - /// preserved. 64-bit subnormal values are too tiny to be represented in 16-bits and result in - /// ±0. Exponents that underflow the minimum 16-bit exponent will result in 16-bit subnormals - /// or ±0. All other values are truncated and rounded to the nearest representable 16-bit - /// value. - #[inline] - #[must_use] - pub fn from_f64(value: f64) -> f16 { - f16(convert::f64_to_f16(value)) - } - - /// Constructs a 16-bit floating point value from a 64-bit floating point value. - /// - /// This function is identical to [`from_f64`][Self::from_f64] except it never uses hardware - /// intrinsics, which allows it to be `const`. [`from_f64`][Self::from_f64] should be preferred - /// in any non-`const` context. - /// - /// If the 64-bit value is to large to fit in 16-bits, ±∞ will result. NaN values are - /// preserved. 64-bit subnormal values are too tiny to be represented in 16-bits and result in - /// ±0. Exponents that underflow the minimum 16-bit exponent will result in 16-bit subnormals - /// or ±0. All other values are truncated and rounded to the nearest representable 16-bit - /// value. - #[inline] - #[must_use] - pub const fn from_f64_const(value: f64) -> f16 { - f16(convert::f64_to_f16_fallback(value)) - } - - /// Converts a [`f16`] into the underlying bit representation. - #[inline] - #[must_use] - pub const fn to_bits(self) -> u16 { - self.0 - } - - /// Returns the memory representation of the underlying bit representation as a byte array in - /// little-endian byte order. - /// - /// # Examples - /// - /// ```rust - /// # use half::prelude::*; - /// let bytes = f16::from_f32(12.5).to_le_bytes(); - /// assert_eq!(bytes, [0x40, 0x4A]); - /// ``` - #[inline] - #[must_use] - pub const fn to_le_bytes(self) -> [u8; 2] { - self.0.to_le_bytes() - } - - /// Returns the memory representation of the underlying bit representation as a byte array in - /// big-endian (network) byte order. - /// - /// # Examples - /// - /// ```rust - /// # use half::prelude::*; - /// let bytes = f16::from_f32(12.5).to_be_bytes(); - /// assert_eq!(bytes, [0x4A, 0x40]); - /// ``` - #[inline] - #[must_use] - pub const fn to_be_bytes(self) -> [u8; 2] { - self.0.to_be_bytes() - } - - /// Returns the memory representation of the underlying bit representation as a byte array in - /// native byte order. - /// - /// As the target platform's native endianness is used, portable code should use - /// [`to_be_bytes`][Self::to_be_bytes] or [`to_le_bytes`][Self::to_le_bytes], as appropriate, - /// instead. - /// - /// # Examples - /// - /// ```rust - /// # use half::prelude::*; - /// let bytes = f16::from_f32(12.5).to_ne_bytes(); - /// assert_eq!(bytes, if cfg!(target_endian = "big") { - /// [0x4A, 0x40] - /// } else { - /// [0x40, 0x4A] - /// }); - /// ``` - #[inline] - #[must_use] - pub const fn to_ne_bytes(self) -> [u8; 2] { - self.0.to_ne_bytes() - } - - /// Creates a floating point value from its representation as a byte array in little endian. - /// - /// # Examples - /// - /// ```rust - /// # use half::prelude::*; - /// let value = f16::from_le_bytes([0x40, 0x4A]); - /// assert_eq!(value, f16::from_f32(12.5)); - /// ``` - #[inline] - #[must_use] - pub const fn from_le_bytes(bytes: [u8; 2]) -> f16 { - f16::from_bits(u16::from_le_bytes(bytes)) - } - - /// Creates a floating point value from its representation as a byte array in big endian. - /// - /// # Examples - /// - /// ```rust - /// # use half::prelude::*; - /// let value = f16::from_be_bytes([0x4A, 0x40]); - /// assert_eq!(value, f16::from_f32(12.5)); - /// ``` - #[inline] - #[must_use] - pub const fn from_be_bytes(bytes: [u8; 2]) -> f16 { - f16::from_bits(u16::from_be_bytes(bytes)) - } - - /// Creates a floating point value from its representation as a byte array in native endian. - /// - /// As the target platform's native endianness is used, portable code likely wants to use - /// [`from_be_bytes`][Self::from_be_bytes] or [`from_le_bytes`][Self::from_le_bytes], as - /// appropriate instead. - /// - /// # Examples - /// - /// ```rust - /// # use half::prelude::*; - /// let value = f16::from_ne_bytes(if cfg!(target_endian = "big") { - /// [0x4A, 0x40] - /// } else { - /// [0x40, 0x4A] - /// }); - /// assert_eq!(value, f16::from_f32(12.5)); - /// ``` - #[inline] - #[must_use] - pub const fn from_ne_bytes(bytes: [u8; 2]) -> f16 { - f16::from_bits(u16::from_ne_bytes(bytes)) - } - - /// Converts a [`f16`] value into a `f32` value. - /// - /// This conversion is lossless as all 16-bit floating point values can be represented exactly - /// in 32-bit floating point. - #[inline] - #[must_use] - pub fn to_f32(self) -> f32 { - convert::f16_to_f32(self.0) - } - - /// Converts a [`f16`] value into a `f32` value. - /// - /// This function is identical to [`to_f32`][Self::to_f32] except it never uses hardware - /// intrinsics, which allows it to be `const`. [`to_f32`][Self::to_f32] should be preferred - /// in any non-`const` context. - /// - /// This conversion is lossless as all 16-bit floating point values can be represented exactly - /// in 32-bit floating point. - #[inline] - #[must_use] - pub const fn to_f32_const(self) -> f32 { - convert::f16_to_f32_fallback(self.0) - } - - /// Converts a [`f16`] value into a `f64` value. - /// - /// This conversion is lossless as all 16-bit floating point values can be represented exactly - /// in 64-bit floating point. - #[inline] - #[must_use] - pub fn to_f64(self) -> f64 { - convert::f16_to_f64(self.0) - } - - /// Converts a [`f16`] value into a `f64` value. - /// - /// This function is identical to [`to_f64`][Self::to_f64] except it never uses hardware - /// intrinsics, which allows it to be `const`. [`to_f64`][Self::to_f64] should be preferred - /// in any non-`const` context. - /// - /// This conversion is lossless as all 16-bit floating point values can be represented exactly - /// in 64-bit floating point. - #[inline] - #[must_use] - pub const fn to_f64_const(self) -> f64 { - convert::f16_to_f64_fallback(self.0) - } - - /// Returns `true` if this value is `NaN` and `false` otherwise. - /// - /// # Examples - /// - /// ```rust - /// # use half::prelude::*; - /// - /// let nan = f16::NAN; - /// let f = f16::from_f32(7.0_f32); - /// - /// assert!(nan.is_nan()); - /// assert!(!f.is_nan()); - /// ``` - #[inline] - #[must_use] - pub const fn is_nan(self) -> bool { - self.0 & 0x7FFFu16 > 0x7C00u16 - } - - /// Returns `true` if this value is ±∞ and `false`. - /// otherwise. - /// - /// # Examples - /// - /// ```rust - /// # use half::prelude::*; - /// - /// let f = f16::from_f32(7.0f32); - /// let inf = f16::INFINITY; - /// let neg_inf = f16::NEG_INFINITY; - /// let nan = f16::NAN; - /// - /// assert!(!f.is_infinite()); - /// assert!(!nan.is_infinite()); - /// - /// assert!(inf.is_infinite()); - /// assert!(neg_inf.is_infinite()); - /// ``` - #[inline] - #[must_use] - pub const fn is_infinite(self) -> bool { - self.0 & 0x7FFFu16 == 0x7C00u16 - } - - /// Returns `true` if this number is neither infinite nor `NaN`. - /// - /// # Examples - /// - /// ```rust - /// # use half::prelude::*; - /// - /// let f = f16::from_f32(7.0f32); - /// let inf = f16::INFINITY; - /// let neg_inf = f16::NEG_INFINITY; - /// let nan = f16::NAN; - /// - /// assert!(f.is_finite()); - /// - /// assert!(!nan.is_finite()); - /// assert!(!inf.is_finite()); - /// assert!(!neg_inf.is_finite()); - /// ``` - #[inline] - #[must_use] - pub const fn is_finite(self) -> bool { - self.0 & 0x7C00u16 != 0x7C00u16 - } - - /// Returns `true` if the number is neither zero, infinite, subnormal, or `NaN`. - /// - /// # Examples - /// - /// ```rust - /// # use half::prelude::*; - /// - /// let min = f16::MIN_POSITIVE; - /// let max = f16::MAX; - /// let lower_than_min = f16::from_f32(1.0e-10_f32); - /// let zero = f16::from_f32(0.0_f32); - /// - /// assert!(min.is_normal()); - /// assert!(max.is_normal()); - /// - /// assert!(!zero.is_normal()); - /// assert!(!f16::NAN.is_normal()); - /// assert!(!f16::INFINITY.is_normal()); - /// // Values between `0` and `min` are Subnormal. - /// assert!(!lower_than_min.is_normal()); - /// ``` - #[inline] - #[must_use] - pub const fn is_normal(self) -> bool { - let exp = self.0 & 0x7C00u16; - exp != 0x7C00u16 && exp != 0 - } - - /// Returns the floating point category of the number. - /// - /// If only one property is going to be tested, it is generally faster to use the specific - /// predicate instead. - /// - /// # Examples - /// - /// ```rust - /// use std::num::FpCategory; - /// # use half::prelude::*; - /// - /// let num = f16::from_f32(12.4_f32); - /// let inf = f16::INFINITY; - /// - /// assert_eq!(num.classify(), FpCategory::Normal); - /// assert_eq!(inf.classify(), FpCategory::Infinite); - /// ``` - #[must_use] - pub const fn classify(self) -> FpCategory { - let exp = self.0 & 0x7C00u16; - let man = self.0 & 0x03FFu16; - match (exp, man) { - (0, 0) => FpCategory::Zero, - (0, _) => FpCategory::Subnormal, - (0x7C00u16, 0) => FpCategory::Infinite, - (0x7C00u16, _) => FpCategory::Nan, - _ => FpCategory::Normal, - } - } - - /// Returns a number that represents the sign of `self`. - /// - /// * `1.0` if the number is positive, `+0.0` or [`INFINITY`][f16::INFINITY] - /// * `-1.0` if the number is negative, `-0.0` or [`NEG_INFINITY`][f16::NEG_INFINITY] - /// * [`NAN`][f16::NAN] if the number is `NaN` - /// - /// # Examples - /// - /// ```rust - /// # use half::prelude::*; - /// - /// let f = f16::from_f32(3.5_f32); - /// - /// assert_eq!(f.signum(), f16::from_f32(1.0)); - /// assert_eq!(f16::NEG_INFINITY.signum(), f16::from_f32(-1.0)); - /// - /// assert!(f16::NAN.signum().is_nan()); - /// ``` - #[must_use] - pub const fn signum(self) -> f16 { - if self.is_nan() { - self - } else if self.0 & 0x8000u16 != 0 { - Self::NEG_ONE - } else { - Self::ONE - } - } - - /// Returns `true` if and only if `self` has a positive sign, including `+0.0`, `NaNs` with a - /// positive sign bit and +∞. - /// - /// # Examples - /// - /// ```rust - /// # use half::prelude::*; - /// - /// let nan = f16::NAN; - /// let f = f16::from_f32(7.0_f32); - /// let g = f16::from_f32(-7.0_f32); - /// - /// assert!(f.is_sign_positive()); - /// assert!(!g.is_sign_positive()); - /// // `NaN` can be either positive or negative - /// assert!(nan.is_sign_positive() != nan.is_sign_negative()); - /// ``` - #[inline] - #[must_use] - pub const fn is_sign_positive(self) -> bool { - self.0 & 0x8000u16 == 0 - } - - /// Returns `true` if and only if `self` has a negative sign, including `-0.0`, `NaNs` with a - /// negative sign bit and −∞. - /// - /// # Examples - /// - /// ```rust - /// # use half::prelude::*; - /// - /// let nan = f16::NAN; - /// let f = f16::from_f32(7.0f32); - /// let g = f16::from_f32(-7.0f32); - /// - /// assert!(!f.is_sign_negative()); - /// assert!(g.is_sign_negative()); - /// // `NaN` can be either positive or negative - /// assert!(nan.is_sign_positive() != nan.is_sign_negative()); - /// ``` - #[inline] - #[must_use] - pub const fn is_sign_negative(self) -> bool { - self.0 & 0x8000u16 != 0 - } - - /// Returns a number composed of the magnitude of `self` and the sign of `sign`. - /// - /// Equal to `self` if the sign of `self` and `sign` are the same, otherwise equal to `-self`. - /// If `self` is NaN, then NaN with the sign of `sign` is returned. - /// - /// # Examples - /// - /// ``` - /// # use half::prelude::*; - /// let f = f16::from_f32(3.5); - /// - /// assert_eq!(f.copysign(f16::from_f32(0.42)), f16::from_f32(3.5)); - /// assert_eq!(f.copysign(f16::from_f32(-0.42)), f16::from_f32(-3.5)); - /// assert_eq!((-f).copysign(f16::from_f32(0.42)), f16::from_f32(3.5)); - /// assert_eq!((-f).copysign(f16::from_f32(-0.42)), f16::from_f32(-3.5)); - /// - /// assert!(f16::NAN.copysign(f16::from_f32(1.0)).is_nan()); - /// ``` - #[inline] - #[must_use] - pub const fn copysign(self, sign: f16) -> f16 { - f16((sign.0 & 0x8000u16) | (self.0 & 0x7FFFu16)) - } - - /// Returns the maximum of the two numbers. - /// - /// If one of the arguments is NaN, then the other argument is returned. - /// - /// # Examples - /// - /// ``` - /// # use half::prelude::*; - /// let x = f16::from_f32(1.0); - /// let y = f16::from_f32(2.0); - /// - /// assert_eq!(x.max(y), y); - /// ``` - #[inline] - #[must_use] - pub fn max(self, other: f16) -> f16 { - if other > self && !other.is_nan() { - other - } else { - self - } - } - - /// Returns the minimum of the two numbers. - /// - /// If one of the arguments is NaN, then the other argument is returned. - /// - /// # Examples - /// - /// ``` - /// # use half::prelude::*; - /// let x = f16::from_f32(1.0); - /// let y = f16::from_f32(2.0); - /// - /// assert_eq!(x.min(y), x); - /// ``` - #[inline] - #[must_use] - pub fn min(self, other: f16) -> f16 { - if other < self && !other.is_nan() { - other - } else { - self - } - } - - /// Restrict a value to a certain interval unless it is NaN. - /// - /// Returns `max` if `self` is greater than `max`, and `min` if `self` is less than `min`. - /// Otherwise this returns `self`. - /// - /// Note that this function returns NaN if the initial value was NaN as well. - /// - /// # Panics - /// Panics if `min > max`, `min` is NaN, or `max` is NaN. - /// - /// # Examples - /// - /// ``` - /// # use half::prelude::*; - /// assert!(f16::from_f32(-3.0).clamp(f16::from_f32(-2.0), f16::from_f32(1.0)) == f16::from_f32(-2.0)); - /// assert!(f16::from_f32(0.0).clamp(f16::from_f32(-2.0), f16::from_f32(1.0)) == f16::from_f32(0.0)); - /// assert!(f16::from_f32(2.0).clamp(f16::from_f32(-2.0), f16::from_f32(1.0)) == f16::from_f32(1.0)); - /// assert!(f16::NAN.clamp(f16::from_f32(-2.0), f16::from_f32(1.0)).is_nan()); - /// ``` - #[inline] - #[must_use] - pub fn clamp(self, min: f16, max: f16) -> f16 { - assert!(min <= max); - let mut x = self; - if x < min { - x = min; - } - if x > max { - x = max; - } - x - } - - /// Returns the ordering between `self` and `other`. - /// - /// Unlike the standard partial comparison between floating point numbers, - /// this comparison always produces an ordering in accordance to - /// the `totalOrder` predicate as defined in the IEEE 754 (2008 revision) - /// floating point standard. The values are ordered in the following sequence: - /// - /// - negative quiet NaN - /// - negative signaling NaN - /// - negative infinity - /// - negative numbers - /// - negative subnormal numbers - /// - negative zero - /// - positive zero - /// - positive subnormal numbers - /// - positive numbers - /// - positive infinity - /// - positive signaling NaN - /// - positive quiet NaN. - /// - /// The ordering established by this function does not always agree with the - /// [`PartialOrd`] and [`PartialEq`] implementations of `f16`. For example, - /// they consider negative and positive zero equal, while `total_cmp` - /// doesn't. - /// - /// The interpretation of the signaling NaN bit follows the definition in - /// the IEEE 754 standard, which may not match the interpretation by some of - /// the older, non-conformant (e.g. MIPS) hardware implementations. - /// - /// # Examples - /// ``` - /// # use half::f16; - /// let mut v: Vec<f16> = vec![]; - /// v.push(f16::ONE); - /// v.push(f16::INFINITY); - /// v.push(f16::NEG_INFINITY); - /// v.push(f16::NAN); - /// v.push(f16::MAX_SUBNORMAL); - /// v.push(-f16::MAX_SUBNORMAL); - /// v.push(f16::ZERO); - /// v.push(f16::NEG_ZERO); - /// v.push(f16::NEG_ONE); - /// v.push(f16::MIN_POSITIVE); - /// - /// v.sort_by(|a, b| a.total_cmp(&b)); - /// - /// assert!(v - /// .into_iter() - /// .zip( - /// [ - /// f16::NEG_INFINITY, - /// f16::NEG_ONE, - /// -f16::MAX_SUBNORMAL, - /// f16::NEG_ZERO, - /// f16::ZERO, - /// f16::MAX_SUBNORMAL, - /// f16::MIN_POSITIVE, - /// f16::ONE, - /// f16::INFINITY, - /// f16::NAN - /// ] - /// .iter() - /// ) - /// .all(|(a, b)| a.to_bits() == b.to_bits())); - /// ``` - // Implementation based on: https://doc.rust-lang.org/std/primitive.f32.html#method.total_cmp - #[inline] - #[must_use] - pub fn total_cmp(&self, other: &Self) -> Ordering { - let mut left = self.to_bits() as i16; - let mut right = other.to_bits() as i16; - left ^= (((left >> 15) as u16) >> 1) as i16; - right ^= (((right >> 15) as u16) >> 1) as i16; - left.cmp(&right) - } - - /// Alternate serialize adapter for serializing as a float. - /// - /// By default, [`f16`] serializes as a newtype of [`u16`]. This is an alternate serialize - /// implementation that serializes as an [`f32`] value. It is designed for use with - /// `serialize_with` serde attributes. Deserialization from `f32` values is already supported by - /// the default deserialize implementation. - /// - /// # Examples - /// - /// A demonstration on how to use this adapater: - /// - /// ``` - /// use serde::{Serialize, Deserialize}; - /// use half::f16; - /// - /// #[derive(Serialize, Deserialize)] - /// struct MyStruct { - /// #[serde(serialize_with = "f16::serialize_as_f32")] - /// value: f16 // Will be serialized as f32 instead of u16 - /// } - /// ``` - #[cfg(feature = "serde")] - pub fn serialize_as_f32<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> { - serializer.serialize_f32(self.to_f32()) - } - - /// Alternate serialize adapter for serializing as a string. - /// - /// By default, [`f16`] serializes as a newtype of [`u16`]. This is an alternate serialize - /// implementation that serializes as a string value. It is designed for use with - /// `serialize_with` serde attributes. Deserialization from string values is already supported - /// by the default deserialize implementation. - /// - /// # Examples - /// - /// A demonstration on how to use this adapater: - /// - /// ``` - /// use serde::{Serialize, Deserialize}; - /// use half::f16; - /// - /// #[derive(Serialize, Deserialize)] - /// struct MyStruct { - /// #[serde(serialize_with = "f16::serialize_as_string")] - /// value: f16 // Will be serialized as a string instead of u16 - /// } - /// ``` - #[cfg(feature = "serde")] - pub fn serialize_as_string<S: serde::Serializer>( - &self, - serializer: S, - ) -> Result<S::Ok, S::Error> { - serializer.serialize_str(&self.to_string()) - } - - /// Approximate number of [`f16`] significant digits in base 10 - pub const DIGITS: u32 = 3; - /// [`f16`] - /// [machine epsilon](https://en.wikipedia.org/wiki/Machine_epsilon) value - /// - /// This is the difference between 1.0 and the next largest representable number. - pub const EPSILON: f16 = f16(0x1400u16); - /// [`f16`] positive Infinity (+∞) - pub const INFINITY: f16 = f16(0x7C00u16); - /// Number of [`f16`] significant digits in base 2 - pub const MANTISSA_DIGITS: u32 = 11; - /// Largest finite [`f16`] value - pub const MAX: f16 = f16(0x7BFF); - /// Maximum possible [`f16`] power of 10 exponent - pub const MAX_10_EXP: i32 = 4; - /// Maximum possible [`f16`] power of 2 exponent - pub const MAX_EXP: i32 = 16; - /// Smallest finite [`f16`] value - pub const MIN: f16 = f16(0xFBFF); - /// Minimum possible normal [`f16`] power of 10 exponent - pub const MIN_10_EXP: i32 = -4; - /// One greater than the minimum possible normal [`f16`] power of 2 exponent - pub const MIN_EXP: i32 = -13; - /// Smallest positive normal [`f16`] value - pub const MIN_POSITIVE: f16 = f16(0x0400u16); - /// [`f16`] Not a Number (NaN) - pub const NAN: f16 = f16(0x7E00u16); - /// [`f16`] negative infinity (-∞) - pub const NEG_INFINITY: f16 = f16(0xFC00u16); - /// The radix or base of the internal representation of [`f16`] - pub const RADIX: u32 = 2; - - /// Minimum positive subnormal [`f16`] value - pub const MIN_POSITIVE_SUBNORMAL: f16 = f16(0x0001u16); - /// Maximum subnormal [`f16`] value - pub const MAX_SUBNORMAL: f16 = f16(0x03FFu16); - - /// [`f16`] 1 - pub const ONE: f16 = f16(0x3C00u16); - /// [`f16`] 0 - pub const ZERO: f16 = f16(0x0000u16); - /// [`f16`] -0 - pub const NEG_ZERO: f16 = f16(0x8000u16); - /// [`f16`] -1 - pub const NEG_ONE: f16 = f16(0xBC00u16); - - /// [`f16`] Euler's number (ℯ) - pub const E: f16 = f16(0x4170u16); - /// [`f16`] Archimedes' constant (π) - pub const PI: f16 = f16(0x4248u16); - /// [`f16`] 1/π - pub const FRAC_1_PI: f16 = f16(0x3518u16); - /// [`f16`] 1/√2 - pub const FRAC_1_SQRT_2: f16 = f16(0x39A8u16); - /// [`f16`] 2/π - pub const FRAC_2_PI: f16 = f16(0x3918u16); - /// [`f16`] 2/√π - pub const FRAC_2_SQRT_PI: f16 = f16(0x3C83u16); - /// [`f16`] π/2 - pub const FRAC_PI_2: f16 = f16(0x3E48u16); - /// [`f16`] π/3 - pub const FRAC_PI_3: f16 = f16(0x3C30u16); - /// [`f16`] π/4 - pub const FRAC_PI_4: f16 = f16(0x3A48u16); - /// [`f16`] π/6 - pub const FRAC_PI_6: f16 = f16(0x3830u16); - /// [`f16`] π/8 - pub const FRAC_PI_8: f16 = f16(0x3648u16); - /// [`f16`] 𝗅𝗇 10 - pub const LN_10: f16 = f16(0x409Bu16); - /// [`f16`] 𝗅𝗇 2 - pub const LN_2: f16 = f16(0x398Cu16); - /// [`f16`] 𝗅𝗈𝗀₁₀ℯ - pub const LOG10_E: f16 = f16(0x36F3u16); - /// [`f16`] 𝗅𝗈𝗀₁₀2 - pub const LOG10_2: f16 = f16(0x34D1u16); - /// [`f16`] 𝗅𝗈𝗀₂ℯ - pub const LOG2_E: f16 = f16(0x3DC5u16); - /// [`f16`] 𝗅𝗈𝗀₂10 - pub const LOG2_10: f16 = f16(0x42A5u16); - /// [`f16`] √2 - pub const SQRT_2: f16 = f16(0x3DA8u16); -} - -impl From<f16> for f32 { - #[inline] - fn from(x: f16) -> f32 { - x.to_f32() - } -} - -impl From<f16> for f64 { - #[inline] - fn from(x: f16) -> f64 { - x.to_f64() - } -} - -impl From<i8> for f16 { - #[inline] - fn from(x: i8) -> f16 { - // Convert to f32, then to f16 - f16::from_f32(f32::from(x)) - } -} - -impl From<u8> for f16 { - #[inline] - fn from(x: u8) -> f16 { - // Convert to f32, then to f16 - f16::from_f32(f32::from(x)) - } -} - -impl PartialEq for f16 { - fn eq(&self, other: &f16) -> bool { - if self.is_nan() || other.is_nan() { - false - } else { - (self.0 == other.0) || ((self.0 | other.0) & 0x7FFFu16 == 0) - } - } -} - -impl PartialOrd for f16 { - fn partial_cmp(&self, other: &f16) -> Option<Ordering> { - if self.is_nan() || other.is_nan() { - None - } else { - let neg = self.0 & 0x8000u16 != 0; - let other_neg = other.0 & 0x8000u16 != 0; - match (neg, other_neg) { - (false, false) => Some(self.0.cmp(&other.0)), - (false, true) => { - if (self.0 | other.0) & 0x7FFFu16 == 0 { - Some(Ordering::Equal) - } else { - Some(Ordering::Greater) - } - } - (true, false) => { - if (self.0 | other.0) & 0x7FFFu16 == 0 { - Some(Ordering::Equal) - } else { - Some(Ordering::Less) - } - } - (true, true) => Some(other.0.cmp(&self.0)), - } - } - } - - fn lt(&self, other: &f16) -> bool { - if self.is_nan() || other.is_nan() { - false - } else { - let neg = self.0 & 0x8000u16 != 0; - let other_neg = other.0 & 0x8000u16 != 0; - match (neg, other_neg) { - (false, false) => self.0 < other.0, - (false, true) => false, - (true, false) => (self.0 | other.0) & 0x7FFFu16 != 0, - (true, true) => self.0 > other.0, - } - } - } - - fn le(&self, other: &f16) -> bool { - if self.is_nan() || other.is_nan() { - false - } else { - let neg = self.0 & 0x8000u16 != 0; - let other_neg = other.0 & 0x8000u16 != 0; - match (neg, other_neg) { - (false, false) => self.0 <= other.0, - (false, true) => (self.0 | other.0) & 0x7FFFu16 == 0, - (true, false) => true, - (true, true) => self.0 >= other.0, - } - } - } - - fn gt(&self, other: &f16) -> bool { - if self.is_nan() || other.is_nan() { - false - } else { - let neg = self.0 & 0x8000u16 != 0; - let other_neg = other.0 & 0x8000u16 != 0; - match (neg, other_neg) { - (false, false) => self.0 > other.0, - (false, true) => (self.0 | other.0) & 0x7FFFu16 != 0, - (true, false) => false, - (true, true) => self.0 < other.0, - } - } - } - - fn ge(&self, other: &f16) -> bool { - if self.is_nan() || other.is_nan() { - false - } else { - let neg = self.0 & 0x8000u16 != 0; - let other_neg = other.0 & 0x8000u16 != 0; - match (neg, other_neg) { - (false, false) => self.0 >= other.0, - (false, true) => true, - (true, false) => (self.0 | other.0) & 0x7FFFu16 == 0, - (true, true) => self.0 <= other.0, - } - } - } -} - -#[cfg(not(target_arch = "spirv"))] -impl FromStr for f16 { - type Err = ParseFloatError; - fn from_str(src: &str) -> Result<f16, ParseFloatError> { - f32::from_str(src).map(f16::from_f32) - } -} - -#[cfg(not(target_arch = "spirv"))] -impl Debug for f16 { - fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { - write!(f, "{:?}", self.to_f32()) - } -} - -#[cfg(not(target_arch = "spirv"))] -impl Display for f16 { - fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { - write!(f, "{}", self.to_f32()) - } -} - -#[cfg(not(target_arch = "spirv"))] -impl LowerExp for f16 { - fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { - write!(f, "{:e}", self.to_f32()) - } -} - -#[cfg(not(target_arch = "spirv"))] -impl UpperExp for f16 { - fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { - write!(f, "{:E}", self.to_f32()) - } -} - -#[cfg(not(target_arch = "spirv"))] -impl Binary for f16 { - fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { - write!(f, "{:b}", self.0) - } -} - -#[cfg(not(target_arch = "spirv"))] -impl Octal for f16 { - fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { - write!(f, "{:o}", self.0) - } -} - -#[cfg(not(target_arch = "spirv"))] -impl LowerHex for f16 { - fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { - write!(f, "{:x}", self.0) - } -} - -#[cfg(not(target_arch = "spirv"))] -impl UpperHex for f16 { - fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { - write!(f, "{:X}", self.0) - } -} - -impl Neg for f16 { - type Output = Self; - - #[inline] - fn neg(self) -> Self::Output { - Self(self.0 ^ 0x8000) - } -} - -impl Neg for &f16 { - type Output = <f16 as Neg>::Output; - - #[inline] - fn neg(self) -> Self::Output { - Neg::neg(*self) - } -} - -impl Add for f16 { - type Output = Self; - - #[inline] - fn add(self, rhs: Self) -> Self::Output { - Self::from_f32(Self::to_f32(self) + Self::to_f32(rhs)) - } -} - -impl Add<&f16> for f16 { - type Output = <f16 as Add<f16>>::Output; - - #[inline] - fn add(self, rhs: &f16) -> Self::Output { - self.add(*rhs) - } -} - -impl Add<&f16> for &f16 { - type Output = <f16 as Add<f16>>::Output; - - #[inline] - fn add(self, rhs: &f16) -> Self::Output { - (*self).add(*rhs) - } -} - -impl Add<f16> for &f16 { - type Output = <f16 as Add<f16>>::Output; - - #[inline] - fn add(self, rhs: f16) -> Self::Output { - (*self).add(rhs) - } -} - -impl AddAssign for f16 { - #[inline] - fn add_assign(&mut self, rhs: Self) { - *self = (*self).add(rhs); - } -} - -impl AddAssign<&f16> for f16 { - #[inline] - fn add_assign(&mut self, rhs: &f16) { - *self = (*self).add(rhs); - } -} - -impl Sub for f16 { - type Output = Self; - - #[inline] - fn sub(self, rhs: Self) -> Self::Output { - Self::from_f32(Self::to_f32(self) - Self::to_f32(rhs)) - } -} - -impl Sub<&f16> for f16 { - type Output = <f16 as Sub<f16>>::Output; - - #[inline] - fn sub(self, rhs: &f16) -> Self::Output { - self.sub(*rhs) - } -} - -impl Sub<&f16> for &f16 { - type Output = <f16 as Sub<f16>>::Output; - - #[inline] - fn sub(self, rhs: &f16) -> Self::Output { - (*self).sub(*rhs) - } -} - -impl Sub<f16> for &f16 { - type Output = <f16 as Sub<f16>>::Output; - - #[inline] - fn sub(self, rhs: f16) -> Self::Output { - (*self).sub(rhs) - } -} - -impl SubAssign for f16 { - #[inline] - fn sub_assign(&mut self, rhs: Self) { - *self = (*self).sub(rhs); - } -} - -impl SubAssign<&f16> for f16 { - #[inline] - fn sub_assign(&mut self, rhs: &f16) { - *self = (*self).sub(rhs); - } -} - -impl Mul for f16 { - type Output = Self; - - #[inline] - fn mul(self, rhs: Self) -> Self::Output { - Self::from_f32(Self::to_f32(self) * Self::to_f32(rhs)) - } -} - -impl Mul<&f16> for f16 { - type Output = <f16 as Mul<f16>>::Output; - - #[inline] - fn mul(self, rhs: &f16) -> Self::Output { - self.mul(*rhs) - } -} - -impl Mul<&f16> for &f16 { - type Output = <f16 as Mul<f16>>::Output; - - #[inline] - fn mul(self, rhs: &f16) -> Self::Output { - (*self).mul(*rhs) - } -} - -impl Mul<f16> for &f16 { - type Output = <f16 as Mul<f16>>::Output; - - #[inline] - fn mul(self, rhs: f16) -> Self::Output { - (*self).mul(rhs) - } -} - -impl MulAssign for f16 { - #[inline] - fn mul_assign(&mut self, rhs: Self) { - *self = (*self).mul(rhs); - } -} - -impl MulAssign<&f16> for f16 { - #[inline] - fn mul_assign(&mut self, rhs: &f16) { - *self = (*self).mul(rhs); - } -} - -impl Div for f16 { - type Output = Self; - - #[inline] - fn div(self, rhs: Self) -> Self::Output { - Self::from_f32(Self::to_f32(self) / Self::to_f32(rhs)) - } -} - -impl Div<&f16> for f16 { - type Output = <f16 as Div<f16>>::Output; - - #[inline] - fn div(self, rhs: &f16) -> Self::Output { - self.div(*rhs) - } -} - -impl Div<&f16> for &f16 { - type Output = <f16 as Div<f16>>::Output; - - #[inline] - fn div(self, rhs: &f16) -> Self::Output { - (*self).div(*rhs) - } -} - -impl Div<f16> for &f16 { - type Output = <f16 as Div<f16>>::Output; - - #[inline] - fn div(self, rhs: f16) -> Self::Output { - (*self).div(rhs) - } -} - -impl DivAssign for f16 { - #[inline] - fn div_assign(&mut self, rhs: Self) { - *self = (*self).div(rhs); - } -} - -impl DivAssign<&f16> for f16 { - #[inline] - fn div_assign(&mut self, rhs: &f16) { - *self = (*self).div(rhs); - } -} - -impl Rem for f16 { - type Output = Self; - - #[inline] - fn rem(self, rhs: Self) -> Self::Output { - Self::from_f32(Self::to_f32(self) % Self::to_f32(rhs)) - } -} - -impl Rem<&f16> for f16 { - type Output = <f16 as Rem<f16>>::Output; - - #[inline] - fn rem(self, rhs: &f16) -> Self::Output { - self.rem(*rhs) - } -} - -impl Rem<&f16> for &f16 { - type Output = <f16 as Rem<f16>>::Output; - - #[inline] - fn rem(self, rhs: &f16) -> Self::Output { - (*self).rem(*rhs) - } -} - -impl Rem<f16> for &f16 { - type Output = <f16 as Rem<f16>>::Output; - - #[inline] - fn rem(self, rhs: f16) -> Self::Output { - (*self).rem(rhs) - } -} - -impl RemAssign for f16 { - #[inline] - fn rem_assign(&mut self, rhs: Self) { - *self = (*self).rem(rhs); - } -} - -impl RemAssign<&f16> for f16 { - #[inline] - fn rem_assign(&mut self, rhs: &f16) { - *self = (*self).rem(rhs); - } -} - -impl Product for f16 { - #[inline] - fn product<I: Iterator<Item = Self>>(iter: I) -> Self { - f16::from_f32(iter.map(|f| f.to_f32()).product()) - } -} - -impl<'a> Product<&'a f16> for f16 { - #[inline] - fn product<I: Iterator<Item = &'a f16>>(iter: I) -> Self { - f16::from_f32(iter.map(|f| f.to_f32()).product()) - } -} - -impl Sum for f16 { - #[inline] - fn sum<I: Iterator<Item = Self>>(iter: I) -> Self { - f16::from_f32(iter.map(|f| f.to_f32()).sum()) - } -} - -impl<'a> Sum<&'a f16> for f16 { - #[inline] - fn sum<I: Iterator<Item = &'a f16>>(iter: I) -> Self { - f16::from_f32(iter.map(|f| f.to_f32()).product()) - } -} - -#[cfg(feature = "serde")] -struct Visitor; - -#[cfg(feature = "serde")] -impl<'de> Deserialize<'de> for f16 { - fn deserialize<D>(deserializer: D) -> Result<f16, D::Error> - where - D: serde::de::Deserializer<'de>, - { - deserializer.deserialize_newtype_struct("f16", Visitor) - } -} - -#[cfg(feature = "serde")] -impl<'de> serde::de::Visitor<'de> for Visitor { - type Value = f16; - - fn expecting(&self, formatter: &mut alloc::fmt::Formatter) -> alloc::fmt::Result { - write!(formatter, "tuple struct f16") - } - - fn visit_newtype_struct<D>(self, deserializer: D) -> Result<Self::Value, D::Error> - where - D: serde::Deserializer<'de>, - { - Ok(f16(<u16 as Deserialize>::deserialize(deserializer)?)) - } - - fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> - where - E: serde::de::Error, - { - v.parse().map_err(|_| { - serde::de::Error::invalid_value(serde::de::Unexpected::Str(v), &"a float string") - }) - } - - fn visit_f32<E>(self, v: f32) -> Result<Self::Value, E> - where - E: serde::de::Error, - { - Ok(f16::from_f32(v)) - } - - fn visit_f64<E>(self, v: f64) -> Result<Self::Value, E> - where - E: serde::de::Error, - { - Ok(f16::from_f64(v)) - } -} - -#[allow( - clippy::cognitive_complexity, - clippy::float_cmp, - clippy::neg_cmp_op_on_partial_ord -)] -#[cfg(test)] -mod test { - use super::*; - use core::cmp::Ordering; - #[cfg(feature = "num-traits")] - use num_traits::{AsPrimitive, FromPrimitive, ToPrimitive}; - use quickcheck_macros::quickcheck; - - #[cfg(feature = "num-traits")] - #[test] - fn as_primitive() { - let two = f16::from_f32(2.0); - assert_eq!(<i32 as AsPrimitive<f16>>::as_(2), two); - assert_eq!(<f16 as AsPrimitive<i32>>::as_(two), 2); - - assert_eq!(<f32 as AsPrimitive<f16>>::as_(2.0), two); - assert_eq!(<f16 as AsPrimitive<f32>>::as_(two), 2.0); - - assert_eq!(<f64 as AsPrimitive<f16>>::as_(2.0), two); - assert_eq!(<f16 as AsPrimitive<f64>>::as_(two), 2.0); - } - - #[cfg(feature = "num-traits")] - #[test] - fn to_primitive() { - let two = f16::from_f32(2.0); - assert_eq!(ToPrimitive::to_i32(&two).unwrap(), 2i32); - assert_eq!(ToPrimitive::to_f32(&two).unwrap(), 2.0f32); - assert_eq!(ToPrimitive::to_f64(&two).unwrap(), 2.0f64); - } - - #[cfg(feature = "num-traits")] - #[test] - fn from_primitive() { - let two = f16::from_f32(2.0); - assert_eq!(<f16 as FromPrimitive>::from_i32(2).unwrap(), two); - assert_eq!(<f16 as FromPrimitive>::from_f32(2.0).unwrap(), two); - assert_eq!(<f16 as FromPrimitive>::from_f64(2.0).unwrap(), two); - } - - #[test] - fn test_f16_consts() { - // DIGITS - let digits = ((f16::MANTISSA_DIGITS as f32 - 1.0) * 2f32.log10()).floor() as u32; - assert_eq!(f16::DIGITS, digits); - // sanity check to show test is good - let digits32 = ((core::f32::MANTISSA_DIGITS as f32 - 1.0) * 2f32.log10()).floor() as u32; - assert_eq!(core::f32::DIGITS, digits32); - - // EPSILON - let one = f16::from_f32(1.0); - let one_plus_epsilon = f16::from_bits(one.to_bits() + 1); - let epsilon = f16::from_f32(one_plus_epsilon.to_f32() - 1.0); - assert_eq!(f16::EPSILON, epsilon); - // sanity check to show test is good - let one_plus_epsilon32 = f32::from_bits(1.0f32.to_bits() + 1); - let epsilon32 = one_plus_epsilon32 - 1f32; - assert_eq!(core::f32::EPSILON, epsilon32); - - // MAX, MIN and MIN_POSITIVE - let max = f16::from_bits(f16::INFINITY.to_bits() - 1); - let min = f16::from_bits(f16::NEG_INFINITY.to_bits() - 1); - let min_pos = f16::from_f32(2f32.powi(f16::MIN_EXP - 1)); - assert_eq!(f16::MAX, max); - assert_eq!(f16::MIN, min); - assert_eq!(f16::MIN_POSITIVE, min_pos); - // sanity check to show test is good - let max32 = f32::from_bits(core::f32::INFINITY.to_bits() - 1); - let min32 = f32::from_bits(core::f32::NEG_INFINITY.to_bits() - 1); - let min_pos32 = 2f32.powi(core::f32::MIN_EXP - 1); - assert_eq!(core::f32::MAX, max32); - assert_eq!(core::f32::MIN, min32); - assert_eq!(core::f32::MIN_POSITIVE, min_pos32); - - // MIN_10_EXP and MAX_10_EXP - let ten_to_min = 10f32.powi(f16::MIN_10_EXP); - assert!(ten_to_min / 10.0 < f16::MIN_POSITIVE.to_f32()); - assert!(ten_to_min > f16::MIN_POSITIVE.to_f32()); - let ten_to_max = 10f32.powi(f16::MAX_10_EXP); - assert!(ten_to_max < f16::MAX.to_f32()); - assert!(ten_to_max * 10.0 > f16::MAX.to_f32()); - // sanity check to show test is good - let ten_to_min32 = 10f64.powi(core::f32::MIN_10_EXP); - assert!(ten_to_min32 / 10.0 < f64::from(core::f32::MIN_POSITIVE)); - assert!(ten_to_min32 > f64::from(core::f32::MIN_POSITIVE)); - let ten_to_max32 = 10f64.powi(core::f32::MAX_10_EXP); - assert!(ten_to_max32 < f64::from(core::f32::MAX)); - assert!(ten_to_max32 * 10.0 > f64::from(core::f32::MAX)); - } - - #[test] - fn test_f16_consts_from_f32() { - let one = f16::from_f32(1.0); - let zero = f16::from_f32(0.0); - let neg_zero = f16::from_f32(-0.0); - let neg_one = f16::from_f32(-1.0); - let inf = f16::from_f32(core::f32::INFINITY); - let neg_inf = f16::from_f32(core::f32::NEG_INFINITY); - let nan = f16::from_f32(core::f32::NAN); - - assert_eq!(f16::ONE, one); - assert_eq!(f16::ZERO, zero); - assert!(zero.is_sign_positive()); - assert_eq!(f16::NEG_ZERO, neg_zero); - assert!(neg_zero.is_sign_negative()); - assert_eq!(f16::NEG_ONE, neg_one); - assert!(neg_one.is_sign_negative()); - assert_eq!(f16::INFINITY, inf); - assert_eq!(f16::NEG_INFINITY, neg_inf); - assert!(nan.is_nan()); - assert!(f16::NAN.is_nan()); - - let e = f16::from_f32(core::f32::consts::E); - let pi = f16::from_f32(core::f32::consts::PI); - let frac_1_pi = f16::from_f32(core::f32::consts::FRAC_1_PI); - let frac_1_sqrt_2 = f16::from_f32(core::f32::consts::FRAC_1_SQRT_2); - let frac_2_pi = f16::from_f32(core::f32::consts::FRAC_2_PI); - let frac_2_sqrt_pi = f16::from_f32(core::f32::consts::FRAC_2_SQRT_PI); - let frac_pi_2 = f16::from_f32(core::f32::consts::FRAC_PI_2); - let frac_pi_3 = f16::from_f32(core::f32::consts::FRAC_PI_3); - let frac_pi_4 = f16::from_f32(core::f32::consts::FRAC_PI_4); - let frac_pi_6 = f16::from_f32(core::f32::consts::FRAC_PI_6); - let frac_pi_8 = f16::from_f32(core::f32::consts::FRAC_PI_8); - let ln_10 = f16::from_f32(core::f32::consts::LN_10); - let ln_2 = f16::from_f32(core::f32::consts::LN_2); - let log10_e = f16::from_f32(core::f32::consts::LOG10_E); - // core::f32::consts::LOG10_2 requires rustc 1.43.0 - let log10_2 = f16::from_f32(2f32.log10()); - let log2_e = f16::from_f32(core::f32::consts::LOG2_E); - // core::f32::consts::LOG2_10 requires rustc 1.43.0 - let log2_10 = f16::from_f32(10f32.log2()); - let sqrt_2 = f16::from_f32(core::f32::consts::SQRT_2); - - assert_eq!(f16::E, e); - assert_eq!(f16::PI, pi); - assert_eq!(f16::FRAC_1_PI, frac_1_pi); - assert_eq!(f16::FRAC_1_SQRT_2, frac_1_sqrt_2); - assert_eq!(f16::FRAC_2_PI, frac_2_pi); - assert_eq!(f16::FRAC_2_SQRT_PI, frac_2_sqrt_pi); - assert_eq!(f16::FRAC_PI_2, frac_pi_2); - assert_eq!(f16::FRAC_PI_3, frac_pi_3); - assert_eq!(f16::FRAC_PI_4, frac_pi_4); - assert_eq!(f16::FRAC_PI_6, frac_pi_6); - assert_eq!(f16::FRAC_PI_8, frac_pi_8); - assert_eq!(f16::LN_10, ln_10); - assert_eq!(f16::LN_2, ln_2); - assert_eq!(f16::LOG10_E, log10_e); - assert_eq!(f16::LOG10_2, log10_2); - assert_eq!(f16::LOG2_E, log2_e); - assert_eq!(f16::LOG2_10, log2_10); - assert_eq!(f16::SQRT_2, sqrt_2); - } - - #[test] - fn test_f16_consts_from_f64() { - let one = f16::from_f64(1.0); - let zero = f16::from_f64(0.0); - let neg_zero = f16::from_f64(-0.0); - let inf = f16::from_f64(core::f64::INFINITY); - let neg_inf = f16::from_f64(core::f64::NEG_INFINITY); - let nan = f16::from_f64(core::f64::NAN); - - assert_eq!(f16::ONE, one); - assert_eq!(f16::ZERO, zero); - assert!(zero.is_sign_positive()); - assert_eq!(f16::NEG_ZERO, neg_zero); - assert!(neg_zero.is_sign_negative()); - assert_eq!(f16::INFINITY, inf); - assert_eq!(f16::NEG_INFINITY, neg_inf); - assert!(nan.is_nan()); - assert!(f16::NAN.is_nan()); - - let e = f16::from_f64(core::f64::consts::E); - let pi = f16::from_f64(core::f64::consts::PI); - let frac_1_pi = f16::from_f64(core::f64::consts::FRAC_1_PI); - let frac_1_sqrt_2 = f16::from_f64(core::f64::consts::FRAC_1_SQRT_2); - let frac_2_pi = f16::from_f64(core::f64::consts::FRAC_2_PI); - let frac_2_sqrt_pi = f16::from_f64(core::f64::consts::FRAC_2_SQRT_PI); - let frac_pi_2 = f16::from_f64(core::f64::consts::FRAC_PI_2); - let frac_pi_3 = f16::from_f64(core::f64::consts::FRAC_PI_3); - let frac_pi_4 = f16::from_f64(core::f64::consts::FRAC_PI_4); - let frac_pi_6 = f16::from_f64(core::f64::consts::FRAC_PI_6); - let frac_pi_8 = f16::from_f64(core::f64::consts::FRAC_PI_8); - let ln_10 = f16::from_f64(core::f64::consts::LN_10); - let ln_2 = f16::from_f64(core::f64::consts::LN_2); - let log10_e = f16::from_f64(core::f64::consts::LOG10_E); - // core::f64::consts::LOG10_2 requires rustc 1.43.0 - let log10_2 = f16::from_f64(2f64.log10()); - let log2_e = f16::from_f64(core::f64::consts::LOG2_E); - // core::f64::consts::LOG2_10 requires rustc 1.43.0 - let log2_10 = f16::from_f64(10f64.log2()); - let sqrt_2 = f16::from_f64(core::f64::consts::SQRT_2); - - assert_eq!(f16::E, e); - assert_eq!(f16::PI, pi); - assert_eq!(f16::FRAC_1_PI, frac_1_pi); - assert_eq!(f16::FRAC_1_SQRT_2, frac_1_sqrt_2); - assert_eq!(f16::FRAC_2_PI, frac_2_pi); - assert_eq!(f16::FRAC_2_SQRT_PI, frac_2_sqrt_pi); - assert_eq!(f16::FRAC_PI_2, frac_pi_2); - assert_eq!(f16::FRAC_PI_3, frac_pi_3); - assert_eq!(f16::FRAC_PI_4, frac_pi_4); - assert_eq!(f16::FRAC_PI_6, frac_pi_6); - assert_eq!(f16::FRAC_PI_8, frac_pi_8); - assert_eq!(f16::LN_10, ln_10); - assert_eq!(f16::LN_2, ln_2); - assert_eq!(f16::LOG10_E, log10_e); - assert_eq!(f16::LOG10_2, log10_2); - assert_eq!(f16::LOG2_E, log2_e); - assert_eq!(f16::LOG2_10, log2_10); - assert_eq!(f16::SQRT_2, sqrt_2); - } - - #[test] - fn test_nan_conversion_to_smaller() { - let nan64 = f64::from_bits(0x7FF0_0000_0000_0001u64); - let neg_nan64 = f64::from_bits(0xFFF0_0000_0000_0001u64); - let nan32 = f32::from_bits(0x7F80_0001u32); - let neg_nan32 = f32::from_bits(0xFF80_0001u32); - let nan32_from_64 = nan64 as f32; - let neg_nan32_from_64 = neg_nan64 as f32; - let nan16_from_64 = f16::from_f64(nan64); - let neg_nan16_from_64 = f16::from_f64(neg_nan64); - let nan16_from_32 = f16::from_f32(nan32); - let neg_nan16_from_32 = f16::from_f32(neg_nan32); - - assert!(nan64.is_nan() && nan64.is_sign_positive()); - assert!(neg_nan64.is_nan() && neg_nan64.is_sign_negative()); - assert!(nan32.is_nan() && nan32.is_sign_positive()); - assert!(neg_nan32.is_nan() && neg_nan32.is_sign_negative()); - assert!(nan32_from_64.is_nan() && nan32_from_64.is_sign_positive()); - assert!(neg_nan32_from_64.is_nan() && neg_nan32_from_64.is_sign_negative()); - assert!(nan16_from_64.is_nan() && nan16_from_64.is_sign_positive()); - assert!(neg_nan16_from_64.is_nan() && neg_nan16_from_64.is_sign_negative()); - assert!(nan16_from_32.is_nan() && nan16_from_32.is_sign_positive()); - assert!(neg_nan16_from_32.is_nan() && neg_nan16_from_32.is_sign_negative()); - } - - #[test] - fn test_nan_conversion_to_larger() { - let nan16 = f16::from_bits(0x7C01u16); - let neg_nan16 = f16::from_bits(0xFC01u16); - let nan32 = f32::from_bits(0x7F80_0001u32); - let neg_nan32 = f32::from_bits(0xFF80_0001u32); - let nan32_from_16 = f32::from(nan16); - let neg_nan32_from_16 = f32::from(neg_nan16); - let nan64_from_16 = f64::from(nan16); - let neg_nan64_from_16 = f64::from(neg_nan16); - let nan64_from_32 = f64::from(nan32); - let neg_nan64_from_32 = f64::from(neg_nan32); - - assert!(nan16.is_nan() && nan16.is_sign_positive()); - assert!(neg_nan16.is_nan() && neg_nan16.is_sign_negative()); - assert!(nan32.is_nan() && nan32.is_sign_positive()); - assert!(neg_nan32.is_nan() && neg_nan32.is_sign_negative()); - assert!(nan32_from_16.is_nan() && nan32_from_16.is_sign_positive()); - assert!(neg_nan32_from_16.is_nan() && neg_nan32_from_16.is_sign_negative()); - assert!(nan64_from_16.is_nan() && nan64_from_16.is_sign_positive()); - assert!(neg_nan64_from_16.is_nan() && neg_nan64_from_16.is_sign_negative()); - assert!(nan64_from_32.is_nan() && nan64_from_32.is_sign_positive()); - assert!(neg_nan64_from_32.is_nan() && neg_nan64_from_32.is_sign_negative()); - } - - #[test] - fn test_f16_to_f32() { - let f = f16::from_f32(7.0); - assert_eq!(f.to_f32(), 7.0f32); - - // 7.1 is NOT exactly representable in 16-bit, it's rounded - let f = f16::from_f32(7.1); - let diff = (f.to_f32() - 7.1f32).abs(); - // diff must be <= 4 * EPSILON, as 7 has two more significant bits than 1 - assert!(diff <= 4.0 * f16::EPSILON.to_f32()); - - assert_eq!(f16::from_bits(0x0000_0001).to_f32(), 2.0f32.powi(-24)); - assert_eq!(f16::from_bits(0x0000_0005).to_f32(), 5.0 * 2.0f32.powi(-24)); - - assert_eq!(f16::from_bits(0x0000_0001), f16::from_f32(2.0f32.powi(-24))); - assert_eq!( - f16::from_bits(0x0000_0005), - f16::from_f32(5.0 * 2.0f32.powi(-24)) - ); - } - - #[test] - fn test_f16_to_f64() { - let f = f16::from_f64(7.0); - assert_eq!(f.to_f64(), 7.0f64); - - // 7.1 is NOT exactly representable in 16-bit, it's rounded - let f = f16::from_f64(7.1); - let diff = (f.to_f64() - 7.1f64).abs(); - // diff must be <= 4 * EPSILON, as 7 has two more significant bits than 1 - assert!(diff <= 4.0 * f16::EPSILON.to_f64()); - - assert_eq!(f16::from_bits(0x0000_0001).to_f64(), 2.0f64.powi(-24)); - assert_eq!(f16::from_bits(0x0000_0005).to_f64(), 5.0 * 2.0f64.powi(-24)); - - assert_eq!(f16::from_bits(0x0000_0001), f16::from_f64(2.0f64.powi(-24))); - assert_eq!( - f16::from_bits(0x0000_0005), - f16::from_f64(5.0 * 2.0f64.powi(-24)) - ); - } - - #[test] - fn test_comparisons() { - let zero = f16::from_f64(0.0); - let one = f16::from_f64(1.0); - let neg_zero = f16::from_f64(-0.0); - let neg_one = f16::from_f64(-1.0); - - assert_eq!(zero.partial_cmp(&neg_zero), Some(Ordering::Equal)); - assert_eq!(neg_zero.partial_cmp(&zero), Some(Ordering::Equal)); - assert!(zero == neg_zero); - assert!(neg_zero == zero); - assert!(!(zero != neg_zero)); - assert!(!(neg_zero != zero)); - assert!(!(zero < neg_zero)); - assert!(!(neg_zero < zero)); - assert!(zero <= neg_zero); - assert!(neg_zero <= zero); - assert!(!(zero > neg_zero)); - assert!(!(neg_zero > zero)); - assert!(zero >= neg_zero); - assert!(neg_zero >= zero); - - assert_eq!(one.partial_cmp(&neg_zero), Some(Ordering::Greater)); - assert_eq!(neg_zero.partial_cmp(&one), Some(Ordering::Less)); - assert!(!(one == neg_zero)); - assert!(!(neg_zero == one)); - assert!(one != neg_zero); - assert!(neg_zero != one); - assert!(!(one < neg_zero)); - assert!(neg_zero < one); - assert!(!(one <= neg_zero)); - assert!(neg_zero <= one); - assert!(one > neg_zero); - assert!(!(neg_zero > one)); - assert!(one >= neg_zero); - assert!(!(neg_zero >= one)); - - assert_eq!(one.partial_cmp(&neg_one), Some(Ordering::Greater)); - assert_eq!(neg_one.partial_cmp(&one), Some(Ordering::Less)); - assert!(!(one == neg_one)); - assert!(!(neg_one == one)); - assert!(one != neg_one); - assert!(neg_one != one); - assert!(!(one < neg_one)); - assert!(neg_one < one); - assert!(!(one <= neg_one)); - assert!(neg_one <= one); - assert!(one > neg_one); - assert!(!(neg_one > one)); - assert!(one >= neg_one); - assert!(!(neg_one >= one)); - } - - #[test] - #[allow(clippy::erasing_op, clippy::identity_op)] - fn round_to_even_f32() { - // smallest positive subnormal = 0b0.0000_0000_01 * 2^-14 = 2^-24 - let min_sub = f16::from_bits(1); - let min_sub_f = (-24f32).exp2(); - assert_eq!(f16::from_f32(min_sub_f).to_bits(), min_sub.to_bits()); - assert_eq!(f32::from(min_sub).to_bits(), min_sub_f.to_bits()); - - // 0.0000000000_011111 rounded to 0.0000000000 (< tie, no rounding) - // 0.0000000000_100000 rounded to 0.0000000000 (tie and even, remains at even) - // 0.0000000000_100001 rounded to 0.0000000001 (> tie, rounds up) - assert_eq!( - f16::from_f32(min_sub_f * 0.49).to_bits(), - min_sub.to_bits() * 0 - ); - assert_eq!( - f16::from_f32(min_sub_f * 0.50).to_bits(), - min_sub.to_bits() * 0 - ); - assert_eq!( - f16::from_f32(min_sub_f * 0.51).to_bits(), - min_sub.to_bits() * 1 - ); - - // 0.0000000001_011111 rounded to 0.0000000001 (< tie, no rounding) - // 0.0000000001_100000 rounded to 0.0000000010 (tie and odd, rounds up to even) - // 0.0000000001_100001 rounded to 0.0000000010 (> tie, rounds up) - assert_eq!( - f16::from_f32(min_sub_f * 1.49).to_bits(), - min_sub.to_bits() * 1 - ); - assert_eq!( - f16::from_f32(min_sub_f * 1.50).to_bits(), - min_sub.to_bits() * 2 - ); - assert_eq!( - f16::from_f32(min_sub_f * 1.51).to_bits(), - min_sub.to_bits() * 2 - ); - - // 0.0000000010_011111 rounded to 0.0000000010 (< tie, no rounding) - // 0.0000000010_100000 rounded to 0.0000000010 (tie and even, remains at even) - // 0.0000000010_100001 rounded to 0.0000000011 (> tie, rounds up) - assert_eq!( - f16::from_f32(min_sub_f * 2.49).to_bits(), - min_sub.to_bits() * 2 - ); - assert_eq!( - f16::from_f32(min_sub_f * 2.50).to_bits(), - min_sub.to_bits() * 2 - ); - assert_eq!( - f16::from_f32(min_sub_f * 2.51).to_bits(), - min_sub.to_bits() * 3 - ); - - assert_eq!( - f16::from_f32(2000.49f32).to_bits(), - f16::from_f32(2000.0).to_bits() - ); - assert_eq!( - f16::from_f32(2000.50f32).to_bits(), - f16::from_f32(2000.0).to_bits() - ); - assert_eq!( - f16::from_f32(2000.51f32).to_bits(), - f16::from_f32(2001.0).to_bits() - ); - assert_eq!( - f16::from_f32(2001.49f32).to_bits(), - f16::from_f32(2001.0).to_bits() - ); - assert_eq!( - f16::from_f32(2001.50f32).to_bits(), - f16::from_f32(2002.0).to_bits() - ); - assert_eq!( - f16::from_f32(2001.51f32).to_bits(), - f16::from_f32(2002.0).to_bits() - ); - assert_eq!( - f16::from_f32(2002.49f32).to_bits(), - f16::from_f32(2002.0).to_bits() - ); - assert_eq!( - f16::from_f32(2002.50f32).to_bits(), - f16::from_f32(2002.0).to_bits() - ); - assert_eq!( - f16::from_f32(2002.51f32).to_bits(), - f16::from_f32(2003.0).to_bits() - ); - } - - #[test] - #[allow(clippy::erasing_op, clippy::identity_op)] - fn round_to_even_f64() { - // smallest positive subnormal = 0b0.0000_0000_01 * 2^-14 = 2^-24 - let min_sub = f16::from_bits(1); - let min_sub_f = (-24f64).exp2(); - assert_eq!(f16::from_f64(min_sub_f).to_bits(), min_sub.to_bits()); - assert_eq!(f64::from(min_sub).to_bits(), min_sub_f.to_bits()); - - // 0.0000000000_011111 rounded to 0.0000000000 (< tie, no rounding) - // 0.0000000000_100000 rounded to 0.0000000000 (tie and even, remains at even) - // 0.0000000000_100001 rounded to 0.0000000001 (> tie, rounds up) - assert_eq!( - f16::from_f64(min_sub_f * 0.49).to_bits(), - min_sub.to_bits() * 0 - ); - assert_eq!( - f16::from_f64(min_sub_f * 0.50).to_bits(), - min_sub.to_bits() * 0 - ); - assert_eq!( - f16::from_f64(min_sub_f * 0.51).to_bits(), - min_sub.to_bits() * 1 - ); - - // 0.0000000001_011111 rounded to 0.0000000001 (< tie, no rounding) - // 0.0000000001_100000 rounded to 0.0000000010 (tie and odd, rounds up to even) - // 0.0000000001_100001 rounded to 0.0000000010 (> tie, rounds up) - assert_eq!( - f16::from_f64(min_sub_f * 1.49).to_bits(), - min_sub.to_bits() * 1 - ); - assert_eq!( - f16::from_f64(min_sub_f * 1.50).to_bits(), - min_sub.to_bits() * 2 - ); - assert_eq!( - f16::from_f64(min_sub_f * 1.51).to_bits(), - min_sub.to_bits() * 2 - ); - - // 0.0000000010_011111 rounded to 0.0000000010 (< tie, no rounding) - // 0.0000000010_100000 rounded to 0.0000000010 (tie and even, remains at even) - // 0.0000000010_100001 rounded to 0.0000000011 (> tie, rounds up) - assert_eq!( - f16::from_f64(min_sub_f * 2.49).to_bits(), - min_sub.to_bits() * 2 - ); - assert_eq!( - f16::from_f64(min_sub_f * 2.50).to_bits(), - min_sub.to_bits() * 2 - ); - assert_eq!( - f16::from_f64(min_sub_f * 2.51).to_bits(), - min_sub.to_bits() * 3 - ); - - assert_eq!( - f16::from_f64(2000.49f64).to_bits(), - f16::from_f64(2000.0).to_bits() - ); - assert_eq!( - f16::from_f64(2000.50f64).to_bits(), - f16::from_f64(2000.0).to_bits() - ); - assert_eq!( - f16::from_f64(2000.51f64).to_bits(), - f16::from_f64(2001.0).to_bits() - ); - assert_eq!( - f16::from_f64(2001.49f64).to_bits(), - f16::from_f64(2001.0).to_bits() - ); - assert_eq!( - f16::from_f64(2001.50f64).to_bits(), - f16::from_f64(2002.0).to_bits() - ); - assert_eq!( - f16::from_f64(2001.51f64).to_bits(), - f16::from_f64(2002.0).to_bits() - ); - assert_eq!( - f16::from_f64(2002.49f64).to_bits(), - f16::from_f64(2002.0).to_bits() - ); - assert_eq!( - f16::from_f64(2002.50f64).to_bits(), - f16::from_f64(2002.0).to_bits() - ); - assert_eq!( - f16::from_f64(2002.51f64).to_bits(), - f16::from_f64(2003.0).to_bits() - ); - } - - impl quickcheck::Arbitrary for f16 { - fn arbitrary(g: &mut quickcheck::Gen) -> Self { - f16(u16::arbitrary(g)) - } - } - - #[quickcheck] - fn qc_roundtrip_f16_f32_is_identity(f: f16) -> bool { - let roundtrip = f16::from_f32(f.to_f32()); - if f.is_nan() { - roundtrip.is_nan() && f.is_sign_negative() == roundtrip.is_sign_negative() - } else { - f.0 == roundtrip.0 - } - } - - #[quickcheck] - fn qc_roundtrip_f16_f64_is_identity(f: f16) -> bool { - let roundtrip = f16::from_f64(f.to_f64()); - if f.is_nan() { - roundtrip.is_nan() && f.is_sign_negative() == roundtrip.is_sign_negative() - } else { - f.0 == roundtrip.0 - } - } -} diff --git a/vendor/half/src/binary16/convert.rs b/vendor/half/src/binary16/convert.rs deleted file mode 100644 index b96910f..0000000 --- a/vendor/half/src/binary16/convert.rs +++ /dev/null @@ -1,752 +0,0 @@ -#![allow(dead_code, unused_imports)] -use crate::leading_zeros::leading_zeros_u16; -use core::mem; - -macro_rules! convert_fn { - (fn $name:ident($($var:ident : $vartype:ty),+) -> $restype:ty { - if feature("f16c") { $f16c:expr } - else { $fallback:expr }}) => { - #[inline] - pub(crate) fn $name($($var: $vartype),+) -> $restype { - // Use CPU feature detection if using std - #[cfg(all( - feature = "use-intrinsics", - feature = "std", - any(target_arch = "x86", target_arch = "x86_64"), - not(target_feature = "f16c") - ))] - { - if is_x86_feature_detected!("f16c") { - $f16c - } else { - $fallback - } - } - // Use intrinsics directly when a compile target or using no_std - #[cfg(all( - feature = "use-intrinsics", - any(target_arch = "x86", target_arch = "x86_64"), - target_feature = "f16c" - ))] - { - $f16c - } - // Fallback to software - #[cfg(any( - not(feature = "use-intrinsics"), - not(any(target_arch = "x86", target_arch = "x86_64")), - all(not(feature = "std"), not(target_feature = "f16c")) - ))] - { - $fallback - } - } - }; -} - -convert_fn! { - fn f32_to_f16(f: f32) -> u16 { - if feature("f16c") { - unsafe { x86::f32_to_f16_x86_f16c(f) } - } else { - f32_to_f16_fallback(f) - } - } -} - -convert_fn! { - fn f64_to_f16(f: f64) -> u16 { - if feature("f16c") { - unsafe { x86::f32_to_f16_x86_f16c(f as f32) } - } else { - f64_to_f16_fallback(f) - } - } -} - -convert_fn! { - fn f16_to_f32(i: u16) -> f32 { - if feature("f16c") { - unsafe { x86::f16_to_f32_x86_f16c(i) } - } else { - f16_to_f32_fallback(i) - } - } -} - -convert_fn! { - fn f16_to_f64(i: u16) -> f64 { - if feature("f16c") { - unsafe { x86::f16_to_f32_x86_f16c(i) as f64 } - } else { - f16_to_f64_fallback(i) - } - } -} - -convert_fn! { - fn f32x4_to_f16x4(f: &[f32; 4]) -> [u16; 4] { - if feature("f16c") { - unsafe { x86::f32x4_to_f16x4_x86_f16c(f) } - } else { - f32x4_to_f16x4_fallback(f) - } - } -} - -convert_fn! { - fn f16x4_to_f32x4(i: &[u16; 4]) -> [f32; 4] { - if feature("f16c") { - unsafe { x86::f16x4_to_f32x4_x86_f16c(i) } - } else { - f16x4_to_f32x4_fallback(i) - } - } -} - -convert_fn! { - fn f64x4_to_f16x4(f: &[f64; 4]) -> [u16; 4] { - if feature("f16c") { - unsafe { x86::f64x4_to_f16x4_x86_f16c(f) } - } else { - f64x4_to_f16x4_fallback(f) - } - } -} - -convert_fn! { - fn f16x4_to_f64x4(i: &[u16; 4]) -> [f64; 4] { - if feature("f16c") { - unsafe { x86::f16x4_to_f64x4_x86_f16c(i) } - } else { - f16x4_to_f64x4_fallback(i) - } - } -} - -convert_fn! { - fn f32x8_to_f16x8(f: &[f32; 8]) -> [u16; 8] { - if feature("f16c") { - unsafe { x86::f32x8_to_f16x8_x86_f16c(f) } - } else { - f32x8_to_f16x8_fallback(f) - } - } -} - -convert_fn! { - fn f16x8_to_f32x8(i: &[u16; 8]) -> [f32; 8] { - if feature("f16c") { - unsafe { x86::f16x8_to_f32x8_x86_f16c(i) } - } else { - f16x8_to_f32x8_fallback(i) - } - } -} - -convert_fn! { - fn f64x8_to_f16x8(f: &[f64; 8]) -> [u16; 8] { - if feature("f16c") { - unsafe { x86::f64x8_to_f16x8_x86_f16c(f) } - } else { - f64x8_to_f16x8_fallback(f) - } - } -} - -convert_fn! { - fn f16x8_to_f64x8(i: &[u16; 8]) -> [f64; 8] { - if feature("f16c") { - unsafe { x86::f16x8_to_f64x8_x86_f16c(i) } - } else { - f16x8_to_f64x8_fallback(i) - } - } -} - -convert_fn! { - fn f32_to_f16_slice(src: &[f32], dst: &mut [u16]) -> () { - if feature("f16c") { - convert_chunked_slice_8(src, dst, x86::f32x8_to_f16x8_x86_f16c, - x86::f32x4_to_f16x4_x86_f16c) - } else { - slice_fallback(src, dst, f32_to_f16_fallback) - } - } -} - -convert_fn! { - fn f16_to_f32_slice(src: &[u16], dst: &mut [f32]) -> () { - if feature("f16c") { - convert_chunked_slice_8(src, dst, x86::f16x8_to_f32x8_x86_f16c, - x86::f16x4_to_f32x4_x86_f16c) - } else { - slice_fallback(src, dst, f16_to_f32_fallback) - } - } -} - -convert_fn! { - fn f64_to_f16_slice(src: &[f64], dst: &mut [u16]) -> () { - if feature("f16c") { - convert_chunked_slice_8(src, dst, x86::f64x8_to_f16x8_x86_f16c, - x86::f64x4_to_f16x4_x86_f16c) - } else { - slice_fallback(src, dst, f64_to_f16_fallback) - } - } -} - -convert_fn! { - fn f16_to_f64_slice(src: &[u16], dst: &mut [f64]) -> () { - if feature("f16c") { - convert_chunked_slice_8(src, dst, x86::f16x8_to_f64x8_x86_f16c, - x86::f16x4_to_f64x4_x86_f16c) - } else { - slice_fallback(src, dst, f16_to_f64_fallback) - } - } -} - -/// Chunks sliced into x8 or x4 arrays -#[inline] -fn convert_chunked_slice_8<S: Copy + Default, D: Copy>( - src: &[S], - dst: &mut [D], - fn8: unsafe fn(&[S; 8]) -> [D; 8], - fn4: unsafe fn(&[S; 4]) -> [D; 4], -) { - assert_eq!(src.len(), dst.len()); - - // TODO: Can be further optimized with array_chunks when it becomes stabilized - - let src_chunks = src.chunks_exact(8); - let mut dst_chunks = dst.chunks_exact_mut(8); - let src_remainder = src_chunks.remainder(); - for (s, d) in src_chunks.zip(&mut dst_chunks) { - let chunk: &[S; 8] = s.try_into().unwrap(); - d.copy_from_slice(unsafe { &fn8(chunk) }); - } - - // Process remainder - if src_remainder.len() > 4 { - let mut buf: [S; 8] = Default::default(); - buf[..src_remainder.len()].copy_from_slice(src_remainder); - let vec = unsafe { fn8(&buf) }; - let dst_remainder = dst_chunks.into_remainder(); - dst_remainder.copy_from_slice(&vec[..dst_remainder.len()]); - } else if !src_remainder.is_empty() { - let mut buf: [S; 4] = Default::default(); - buf[..src_remainder.len()].copy_from_slice(src_remainder); - let vec = unsafe { fn4(&buf) }; - let dst_remainder = dst_chunks.into_remainder(); - dst_remainder.copy_from_slice(&vec[..dst_remainder.len()]); - } -} - -/// Chunks sliced into x4 arrays -#[inline] -fn convert_chunked_slice_4<S: Copy + Default, D: Copy>( - src: &[S], - dst: &mut [D], - f: unsafe fn(&[S; 4]) -> [D; 4], -) { - assert_eq!(src.len(), dst.len()); - - // TODO: Can be further optimized with array_chunks when it becomes stabilized - - let src_chunks = src.chunks_exact(4); - let mut dst_chunks = dst.chunks_exact_mut(4); - let src_remainder = src_chunks.remainder(); - for (s, d) in src_chunks.zip(&mut dst_chunks) { - let chunk: &[S; 4] = s.try_into().unwrap(); - d.copy_from_slice(unsafe { &f(chunk) }); - } - - // Process remainder - if !src_remainder.is_empty() { - let mut buf: [S; 4] = Default::default(); - buf[..src_remainder.len()].copy_from_slice(src_remainder); - let vec = unsafe { f(&buf) }; - let dst_remainder = dst_chunks.into_remainder(); - dst_remainder.copy_from_slice(&vec[..dst_remainder.len()]); - } -} - -/////////////// Fallbacks //////////////// - -// In the below functions, round to nearest, with ties to even. -// Let us call the most significant bit that will be shifted out the round_bit. -// -// Round up if either -// a) Removed part > tie. -// (mantissa & round_bit) != 0 && (mantissa & (round_bit - 1)) != 0 -// b) Removed part == tie, and retained part is odd. -// (mantissa & round_bit) != 0 && (mantissa & (2 * round_bit)) != 0 -// (If removed part == tie and retained part is even, do not round up.) -// These two conditions can be combined into one: -// (mantissa & round_bit) != 0 && (mantissa & ((round_bit - 1) | (2 * round_bit))) != 0 -// which can be simplified into -// (mantissa & round_bit) != 0 && (mantissa & (3 * round_bit - 1)) != 0 - -#[inline] -pub(crate) const fn f32_to_f16_fallback(value: f32) -> u16 { - // TODO: Replace mem::transmute with to_bits() once to_bits is const-stabilized - // Convert to raw bytes - let x: u32 = unsafe { mem::transmute(value) }; - - // Extract IEEE754 components - let sign = x & 0x8000_0000u32; - let exp = x & 0x7F80_0000u32; - let man = x & 0x007F_FFFFu32; - - // Check for all exponent bits being set, which is Infinity or NaN - if exp == 0x7F80_0000u32 { - // Set mantissa MSB for NaN (and also keep shifted mantissa bits) - let nan_bit = if man == 0 { 0 } else { 0x0200u32 }; - return ((sign >> 16) | 0x7C00u32 | nan_bit | (man >> 13)) as u16; - } - - // The number is normalized, start assembling half precision version - let half_sign = sign >> 16; - // Unbias the exponent, then bias for half precision - let unbiased_exp = ((exp >> 23) as i32) - 127; - let half_exp = unbiased_exp + 15; - - // Check for exponent overflow, return +infinity - if half_exp >= 0x1F { - return (half_sign | 0x7C00u32) as u16; - } - - // Check for underflow - if half_exp <= 0 { - // Check mantissa for what we can do - if 14 - half_exp > 24 { - // No rounding possibility, so this is a full underflow, return signed zero - return half_sign as u16; - } - // Don't forget about hidden leading mantissa bit when assembling mantissa - let man = man | 0x0080_0000u32; - let mut half_man = man >> (14 - half_exp); - // Check for rounding (see comment above functions) - let round_bit = 1 << (13 - half_exp); - if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 { - half_man += 1; - } - // No exponent for subnormals - return (half_sign | half_man) as u16; - } - - // Rebias the exponent - let half_exp = (half_exp as u32) << 10; - let half_man = man >> 13; - // Check for rounding (see comment above functions) - let round_bit = 0x0000_1000u32; - if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 { - // Round it - ((half_sign | half_exp | half_man) + 1) as u16 - } else { - (half_sign | half_exp | half_man) as u16 - } -} - -#[inline] -pub(crate) const fn f64_to_f16_fallback(value: f64) -> u16 { - // Convert to raw bytes, truncating the last 32-bits of mantissa; that precision will always - // be lost on half-precision. - // TODO: Replace mem::transmute with to_bits() once to_bits is const-stabilized - let val: u64 = unsafe { mem::transmute(value) }; - let x = (val >> 32) as u32; - - // Extract IEEE754 components - let sign = x & 0x8000_0000u32; - let exp = x & 0x7FF0_0000u32; - let man = x & 0x000F_FFFFu32; - - // Check for all exponent bits being set, which is Infinity or NaN - if exp == 0x7FF0_0000u32 { - // Set mantissa MSB for NaN (and also keep shifted mantissa bits). - // We also have to check the last 32 bits. - let nan_bit = if man == 0 && (val as u32 == 0) { - 0 - } else { - 0x0200u32 - }; - return ((sign >> 16) | 0x7C00u32 | nan_bit | (man >> 10)) as u16; - } - - // The number is normalized, start assembling half precision version - let half_sign = sign >> 16; - // Unbias the exponent, then bias for half precision - let unbiased_exp = ((exp >> 20) as i64) - 1023; - let half_exp = unbiased_exp + 15; - - // Check for exponent overflow, return +infinity - if half_exp >= 0x1F { - return (half_sign | 0x7C00u32) as u16; - } - - // Check for underflow - if half_exp <= 0 { - // Check mantissa for what we can do - if 10 - half_exp > 21 { - // No rounding possibility, so this is a full underflow, return signed zero - return half_sign as u16; - } - // Don't forget about hidden leading mantissa bit when assembling mantissa - let man = man | 0x0010_0000u32; - let mut half_man = man >> (11 - half_exp); - // Check for rounding (see comment above functions) - let round_bit = 1 << (10 - half_exp); - if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 { - half_man += 1; - } - // No exponent for subnormals - return (half_sign | half_man) as u16; - } - - // Rebias the exponent - let half_exp = (half_exp as u32) << 10; - let half_man = man >> 10; - // Check for rounding (see comment above functions) - let round_bit = 0x0000_0200u32; - if (man & round_bit) != 0 && (man & (3 * round_bit - 1)) != 0 { - // Round it - ((half_sign | half_exp | half_man) + 1) as u16 - } else { - (half_sign | half_exp | half_man) as u16 - } -} - -#[inline] -pub(crate) const fn f16_to_f32_fallback(i: u16) -> f32 { - // Check for signed zero - // TODO: Replace mem::transmute with from_bits() once from_bits is const-stabilized - if i & 0x7FFFu16 == 0 { - return unsafe { mem::transmute((i as u32) << 16) }; - } - - let half_sign = (i & 0x8000u16) as u32; - let half_exp = (i & 0x7C00u16) as u32; - let half_man = (i & 0x03FFu16) as u32; - - // Check for an infinity or NaN when all exponent bits set - if half_exp == 0x7C00u32 { - // Check for signed infinity if mantissa is zero - if half_man == 0 { - return unsafe { mem::transmute((half_sign << 16) | 0x7F80_0000u32) }; - } else { - // NaN, keep current mantissa but also set most significiant mantissa bit - return unsafe { - mem::transmute((half_sign << 16) | 0x7FC0_0000u32 | (half_man << 13)) - }; - } - } - - // Calculate single-precision components with adjusted exponent - let sign = half_sign << 16; - // Unbias exponent - let unbiased_exp = ((half_exp as i32) >> 10) - 15; - - // Check for subnormals, which will be normalized by adjusting exponent - if half_exp == 0 { - // Calculate how much to adjust the exponent by - let e = leading_zeros_u16(half_man as u16) - 6; - - // Rebias and adjust exponent - let exp = (127 - 15 - e) << 23; - let man = (half_man << (14 + e)) & 0x7F_FF_FFu32; - return unsafe { mem::transmute(sign | exp | man) }; - } - - // Rebias exponent for a normalized normal - let exp = ((unbiased_exp + 127) as u32) << 23; - let man = (half_man & 0x03FFu32) << 13; - unsafe { mem::transmute(sign | exp | man) } -} - -#[inline] -pub(crate) const fn f16_to_f64_fallback(i: u16) -> f64 { - // Check for signed zero - // TODO: Replace mem::transmute with from_bits() once from_bits is const-stabilized - if i & 0x7FFFu16 == 0 { - return unsafe { mem::transmute((i as u64) << 48) }; - } - - let half_sign = (i & 0x8000u16) as u64; - let half_exp = (i & 0x7C00u16) as u64; - let half_man = (i & 0x03FFu16) as u64; - - // Check for an infinity or NaN when all exponent bits set - if half_exp == 0x7C00u64 { - // Check for signed infinity if mantissa is zero - if half_man == 0 { - return unsafe { mem::transmute((half_sign << 48) | 0x7FF0_0000_0000_0000u64) }; - } else { - // NaN, keep current mantissa but also set most significiant mantissa bit - return unsafe { - mem::transmute((half_sign << 48) | 0x7FF8_0000_0000_0000u64 | (half_man << 42)) - }; - } - } - - // Calculate double-precision components with adjusted exponent - let sign = half_sign << 48; - // Unbias exponent - let unbiased_exp = ((half_exp as i64) >> 10) - 15; - - // Check for subnormals, which will be normalized by adjusting exponent - if half_exp == 0 { - // Calculate how much to adjust the exponent by - let e = leading_zeros_u16(half_man as u16) - 6; - - // Rebias and adjust exponent - let exp = ((1023 - 15 - e) as u64) << 52; - let man = (half_man << (43 + e)) & 0xF_FFFF_FFFF_FFFFu64; - return unsafe { mem::transmute(sign | exp | man) }; - } - - // Rebias exponent for a normalized normal - let exp = ((unbiased_exp + 1023) as u64) << 52; - let man = (half_man & 0x03FFu64) << 42; - unsafe { mem::transmute(sign | exp | man) } -} - -#[inline] -fn f16x4_to_f32x4_fallback(v: &[u16; 4]) -> [f32; 4] { - [ - f16_to_f32_fallback(v[0]), - f16_to_f32_fallback(v[1]), - f16_to_f32_fallback(v[2]), - f16_to_f32_fallback(v[3]), - ] -} - -#[inline] -fn f32x4_to_f16x4_fallback(v: &[f32; 4]) -> [u16; 4] { - [ - f32_to_f16_fallback(v[0]), - f32_to_f16_fallback(v[1]), - f32_to_f16_fallback(v[2]), - f32_to_f16_fallback(v[3]), - ] -} - -#[inline] -fn f16x4_to_f64x4_fallback(v: &[u16; 4]) -> [f64; 4] { - [ - f16_to_f64_fallback(v[0]), - f16_to_f64_fallback(v[1]), - f16_to_f64_fallback(v[2]), - f16_to_f64_fallback(v[3]), - ] -} - -#[inline] -fn f64x4_to_f16x4_fallback(v: &[f64; 4]) -> [u16; 4] { - [ - f64_to_f16_fallback(v[0]), - f64_to_f16_fallback(v[1]), - f64_to_f16_fallback(v[2]), - f64_to_f16_fallback(v[3]), - ] -} - -#[inline] -fn f16x8_to_f32x8_fallback(v: &[u16; 8]) -> [f32; 8] { - [ - f16_to_f32_fallback(v[0]), - f16_to_f32_fallback(v[1]), - f16_to_f32_fallback(v[2]), - f16_to_f32_fallback(v[3]), - f16_to_f32_fallback(v[4]), - f16_to_f32_fallback(v[5]), - f16_to_f32_fallback(v[6]), - f16_to_f32_fallback(v[7]), - ] -} - -#[inline] -fn f32x8_to_f16x8_fallback(v: &[f32; 8]) -> [u16; 8] { - [ - f32_to_f16_fallback(v[0]), - f32_to_f16_fallback(v[1]), - f32_to_f16_fallback(v[2]), - f32_to_f16_fallback(v[3]), - f32_to_f16_fallback(v[4]), - f32_to_f16_fallback(v[5]), - f32_to_f16_fallback(v[6]), - f32_to_f16_fallback(v[7]), - ] -} - -#[inline] -fn f16x8_to_f64x8_fallback(v: &[u16; 8]) -> [f64; 8] { - [ - f16_to_f64_fallback(v[0]), - f16_to_f64_fallback(v[1]), - f16_to_f64_fallback(v[2]), - f16_to_f64_fallback(v[3]), - f16_to_f64_fallback(v[4]), - f16_to_f64_fallback(v[5]), - f16_to_f64_fallback(v[6]), - f16_to_f64_fallback(v[7]), - ] -} - -#[inline] -fn f64x8_to_f16x8_fallback(v: &[f64; 8]) -> [u16; 8] { - [ - f64_to_f16_fallback(v[0]), - f64_to_f16_fallback(v[1]), - f64_to_f16_fallback(v[2]), - f64_to_f16_fallback(v[3]), - f64_to_f16_fallback(v[4]), - f64_to_f16_fallback(v[5]), - f64_to_f16_fallback(v[6]), - f64_to_f16_fallback(v[7]), - ] -} - -#[inline] -fn slice_fallback<S: Copy, D>(src: &[S], dst: &mut [D], f: fn(S) -> D) { - assert_eq!(src.len(), dst.len()); - for (s, d) in src.iter().copied().zip(dst.iter_mut()) { - *d = f(s); - } -} - -/////////////// x86/x86_64 f16c //////////////// -#[cfg(all( - feature = "use-intrinsics", - any(target_arch = "x86", target_arch = "x86_64") -))] -mod x86 { - use core::{mem::MaybeUninit, ptr}; - - #[cfg(target_arch = "x86")] - use core::arch::x86::{ - __m128, __m128i, __m256, _mm256_cvtph_ps, _mm256_cvtps_ph, _mm_cvtph_ps, - _MM_FROUND_TO_NEAREST_INT, - }; - #[cfg(target_arch = "x86_64")] - use core::arch::x86_64::{ - __m128, __m128i, __m256, _mm256_cvtph_ps, _mm256_cvtps_ph, _mm_cvtph_ps, _mm_cvtps_ph, - _MM_FROUND_TO_NEAREST_INT, - }; - - use super::convert_chunked_slice_8; - - #[target_feature(enable = "f16c")] - #[inline] - pub(super) unsafe fn f16_to_f32_x86_f16c(i: u16) -> f32 { - let mut vec = MaybeUninit::<__m128i>::zeroed(); - vec.as_mut_ptr().cast::<u16>().write(i); - let retval = _mm_cvtph_ps(vec.assume_init()); - *(&retval as *const __m128).cast() - } - - #[target_feature(enable = "f16c")] - #[inline] - pub(super) unsafe fn f32_to_f16_x86_f16c(f: f32) -> u16 { - let mut vec = MaybeUninit::<__m128>::zeroed(); - vec.as_mut_ptr().cast::<f32>().write(f); - let retval = _mm_cvtps_ph(vec.assume_init(), _MM_FROUND_TO_NEAREST_INT); - *(&retval as *const __m128i).cast() - } - - #[target_feature(enable = "f16c")] - #[inline] - pub(super) unsafe fn f16x4_to_f32x4_x86_f16c(v: &[u16; 4]) -> [f32; 4] { - let mut vec = MaybeUninit::<__m128i>::zeroed(); - ptr::copy_nonoverlapping(v.as_ptr(), vec.as_mut_ptr().cast(), 4); - let retval = _mm_cvtph_ps(vec.assume_init()); - *(&retval as *const __m128).cast() - } - - #[target_feature(enable = "f16c")] - #[inline] - pub(super) unsafe fn f32x4_to_f16x4_x86_f16c(v: &[f32; 4]) -> [u16; 4] { - let mut vec = MaybeUninit::<__m128>::uninit(); - ptr::copy_nonoverlapping(v.as_ptr(), vec.as_mut_ptr().cast(), 4); - let retval = _mm_cvtps_ph(vec.assume_init(), _MM_FROUND_TO_NEAREST_INT); - *(&retval as *const __m128i).cast() - } - - #[target_feature(enable = "f16c")] - #[inline] - pub(super) unsafe fn f16x4_to_f64x4_x86_f16c(v: &[u16; 4]) -> [f64; 4] { - let array = f16x4_to_f32x4_x86_f16c(v); - // Let compiler vectorize this regular cast for now. - // TODO: investigate auto-detecting sse2/avx convert features - [ - array[0] as f64, - array[1] as f64, - array[2] as f64, - array[3] as f64, - ] - } - - #[target_feature(enable = "f16c")] - #[inline] - pub(super) unsafe fn f64x4_to_f16x4_x86_f16c(v: &[f64; 4]) -> [u16; 4] { - // Let compiler vectorize this regular cast for now. - // TODO: investigate auto-detecting sse2/avx convert features - let v = [v[0] as f32, v[1] as f32, v[2] as f32, v[3] as f32]; - f32x4_to_f16x4_x86_f16c(&v) - } - - #[target_feature(enable = "f16c")] - #[inline] - pub(super) unsafe fn f16x8_to_f32x8_x86_f16c(v: &[u16; 8]) -> [f32; 8] { - let mut vec = MaybeUninit::<__m128i>::zeroed(); - ptr::copy_nonoverlapping(v.as_ptr(), vec.as_mut_ptr().cast(), 8); - let retval = _mm256_cvtph_ps(vec.assume_init()); - *(&retval as *const __m256).cast() - } - - #[target_feature(enable = "f16c")] - #[inline] - pub(super) unsafe fn f32x8_to_f16x8_x86_f16c(v: &[f32; 8]) -> [u16; 8] { - let mut vec = MaybeUninit::<__m256>::uninit(); - ptr::copy_nonoverlapping(v.as_ptr(), vec.as_mut_ptr().cast(), 8); - let retval = _mm256_cvtps_ph(vec.assume_init(), _MM_FROUND_TO_NEAREST_INT); - *(&retval as *const __m128i).cast() - } - - #[target_feature(enable = "f16c")] - #[inline] - pub(super) unsafe fn f16x8_to_f64x8_x86_f16c(v: &[u16; 8]) -> [f64; 8] { - let array = f16x8_to_f32x8_x86_f16c(v); - // Let compiler vectorize this regular cast for now. - // TODO: investigate auto-detecting sse2/avx convert features - [ - array[0] as f64, - array[1] as f64, - array[2] as f64, - array[3] as f64, - array[4] as f64, - array[5] as f64, - array[6] as f64, - array[7] as f64, - ] - } - - #[target_feature(enable = "f16c")] - #[inline] - pub(super) unsafe fn f64x8_to_f16x8_x86_f16c(v: &[f64; 8]) -> [u16; 8] { - // Let compiler vectorize this regular cast for now. - // TODO: investigate auto-detecting sse2/avx convert features - let v = [ - v[0] as f32, - v[1] as f32, - v[2] as f32, - v[3] as f32, - v[4] as f32, - v[5] as f32, - v[6] as f32, - v[7] as f32, - ]; - f32x8_to_f16x8_x86_f16c(&v) - } -} diff --git a/vendor/half/src/leading_zeros.rs b/vendor/half/src/leading_zeros.rs deleted file mode 100644 index 6c73148..0000000 --- a/vendor/half/src/leading_zeros.rs +++ /dev/null @@ -1,62 +0,0 @@ -// https://doc.rust-lang.org/std/primitive.u16.html#method.leading_zeros - -#[cfg(not(any(all( - target_arch = "spirv", - not(all( - target_feature = "IntegerFunctions2INTEL", - target_feature = "SPV_INTEL_shader_integer_functions2" - )) -))))] -pub(crate) const fn leading_zeros_u16(x: u16) -> u32 { - x.leading_zeros() -} - -#[cfg(all( - target_arch = "spirv", - not(all( - target_feature = "IntegerFunctions2INTEL", - target_feature = "SPV_INTEL_shader_integer_functions2" - )) -))] -pub(crate) const fn leading_zeros_u16(x: u16) -> u32 { - leading_zeros_u16_fallback(x) -} - -#[cfg(any( - test, - all( - target_arch = "spirv", - not(all( - target_feature = "IntegerFunctions2INTEL", - target_feature = "SPV_INTEL_shader_integer_functions2" - )) - ) -))] -const fn leading_zeros_u16_fallback(mut x: u16) -> u32 { - use crunchy::unroll; - let mut c = 0; - let msb = 1 << 15; - unroll! { for i in 0 .. 16 { - if x & msb == 0 { - c += 1; - } else { - return c; - } - #[allow(unused_assignments)] - if i < 15 { - x <<= 1; - } - }} - c -} - -#[cfg(test)] -mod test { - - #[test] - fn leading_zeros_u16_fallback() { - for x in [44, 97, 304, 1179, 23571] { - assert_eq!(super::leading_zeros_u16_fallback(x), x.leading_zeros()); - } - } -} diff --git a/vendor/half/src/lib.rs b/vendor/half/src/lib.rs deleted file mode 100644 index f821945..0000000 --- a/vendor/half/src/lib.rs +++ /dev/null @@ -1,233 +0,0 @@ -//! A crate that provides support for half-precision 16-bit floating point types. -//! -//! This crate provides the [`f16`] type, which is an implementation of the IEEE 754-2008 standard -//! [`binary16`] a.k.a `half` floating point type. This 16-bit floating point type is intended for -//! efficient storage where the full range and precision of a larger floating point value is not -//! required. This is especially useful for image storage formats. -//! -//! This crate also provides a [`bf16`] type, an alternative 16-bit floating point format. The -//! [`bfloat16`] format is a truncated IEEE 754 standard `binary32` float that preserves the -//! exponent to allow the same range as [`f32`] but with only 8 bits of precision (instead of 11 -//! bits for [`f16`]). See the [`bf16`] type for details. -//! -//! Because [`f16`] and [`bf16`] are primarily for efficient storage, floating point operations such -//! as addition, multiplication, etc. are not implemented by hardware. While this crate does provide -//! the appropriate trait implementations for basic operations, they each convert the value to -//! [`f32`] before performing the operation and then back afterward. When performing complex -//! arithmetic, manually convert to and from [`f32`] before and after to reduce repeated conversions -//! for each operation. -//! -//! This crate also provides a [`slice`][mod@slice] module for zero-copy in-place conversions of -//! [`u16`] slices to both [`f16`] and [`bf16`], as well as efficient vectorized conversions of -//! larger buffers of floating point values to and from these half formats. -//! -//! The crate uses `#[no_std]` by default, so can be used in embedded environments without using the -//! Rust [`std`] library. A `std` feature to enable support for the standard library is available, -//! see the [Cargo Features](#cargo-features) section below. -//! -//! A [`prelude`] module is provided for easy importing of available utility traits. -//! -//! # Serialization -//! -//! When the `serde` feature is enabled, [`f16`] and [`bf16`] will be serialized as a newtype of -//! [`u16`] by default. In binary formats this is ideal, as it will generally use just two bytes for -//! storage. For string formats like JSON, however, this isn't as useful, and due to design -//! limitations of serde, it's not possible for the default `Serialize` implementation to support -//! different serialization for different formats. -//! -//! Instead, it's up to the containter type of the floats to control how it is serialized. This can -//! easily be controlled when using the derive macros using `#[serde(serialize_with="")]` -//! attributes. For both [`f16`] and [`bf16`] a `serialize_as_f32` and `serialize_as_string` are -//! provided for use with this attribute. -//! -//! Deserialization of both float types supports deserializing from the default serialization, -//! strings, and `f32`/`f64` values, so no additional work is required. -//! -//! # Cargo Features -//! -//! This crate supports a number of optional cargo features. None of these features are enabled by -//! default, even `std`. -//! -//! - **`use-intrinsics`** -- Use [`core::arch`] hardware intrinsics for `f16` and `bf16` conversions -//! if available on the compiler target. This feature currently only works on nightly Rust -//! until the corresponding intrinsics are stabilized. -//! -//! When this feature is enabled and the hardware supports it, the functions and traits in the -//! [`slice`][mod@slice] module will use vectorized SIMD intructions for increased efficiency. -//! -//! By default, without this feature, conversions are done only in software, which will also be -//! the fallback if the target does not have hardware support. Note that without the `std` -//! feature enabled, no runtime CPU feature detection is used, so the hardware support is only -//! compiled if the compiler target supports the CPU feature. -//! -//! - **`alloc`** -- Enable use of the [`alloc`] crate when not using the `std` library. -//! -//! Among other functions, this enables the [`vec`] module, which contains zero-copy -//! conversions for the [`Vec`] type. This allows fast conversion between raw `Vec<u16>` bits and -//! `Vec<f16>` or `Vec<bf16>` arrays, and vice versa. -//! -//! - **`std`** -- Enable features that depend on the Rust [`std`] library. This also enables the -//! `alloc` feature automatically. -//! -//! Enabling the `std` feature also enables runtime CPU feature detection when the -//! `use-intrsincis` feature is also enabled. Without this feature detection, intrinsics are only -//! used when compiler target supports the target feature. -//! -//! - **`serde`** -- Adds support for the [`serde`] crate by implementing [`Serialize`] and -//! [`Deserialize`] traits for both [`f16`] and [`bf16`]. -//! -//! - **`num-traits`** -- Adds support for the [`num-traits`] crate by implementing [`ToPrimitive`], -//! [`FromPrimitive`], [`AsPrimitive`], [`Num`], [`Float`], [`FloatCore`], and [`Bounded`] traits -//! for both [`f16`] and [`bf16`]. -//! -//! - **`bytemuck`** -- Adds support for the [`bytemuck`] crate by implementing [`Zeroable`] and -//! [`Pod`] traits for both [`f16`] and [`bf16`]. -//! -//! - **`zerocopy`** -- Adds support for the [`zerocopy`] crate by implementing [`AsBytes`] and -//! [`FromBytes`] traits for both [`f16`] and [`bf16`]. -//! -//! [`alloc`]: https://doc.rust-lang.org/alloc/ -//! [`std`]: https://doc.rust-lang.org/std/ -//! [`binary16`]: https://en.wikipedia.org/wiki/Half-precision_floating-point_format -//! [`bfloat16`]: https://en.wikipedia.org/wiki/Bfloat16_floating-point_format -//! [`serde`]: https://crates.io/crates/serde -//! [`bytemuck`]: https://crates.io/crates/bytemuck -//! [`num-traits`]: https://crates.io/crates/num-traits -//! [`zerocopy`]: https://crates.io/crates/zerocopy -#![cfg_attr( - feature = "alloc", - doc = " -[`vec`]: mod@vec" -)] -#![cfg_attr( - not(feature = "alloc"), - doc = " -[`vec`]: # -[`Vec`]: https://docs.rust-lang.org/stable/alloc/vec/struct.Vec.html" -)] -#![cfg_attr( - feature = "serde", - doc = " -[`Serialize`]: serde::Serialize -[`Deserialize`]: serde::Deserialize" -)] -#![cfg_attr( - not(feature = "serde"), - doc = " -[`Serialize`]: https://docs.rs/serde/*/serde/trait.Serialize.html -[`Deserialize`]: https://docs.rs/serde/*/serde/trait.Deserialize.html" -)] -#![cfg_attr( - feature = "num-traits", - doc = " -[`ToPrimitive`]: ::num_traits::ToPrimitive -[`FromPrimitive`]: ::num_traits::FromPrimitive -[`AsPrimitive`]: ::num_traits::AsPrimitive -[`Num`]: ::num_traits::Num -[`Float`]: ::num_traits::Float -[`FloatCore`]: ::num_traits::float::FloatCore -[`Bounded`]: ::num_traits::Bounded" -)] -#![cfg_attr( - not(feature = "num-traits"), - doc = " -[`ToPrimitive`]: https://docs.rs/num-traits/*/num_traits/cast/trait.ToPrimitive.html -[`FromPrimitive`]: https://docs.rs/num-traits/*/num_traits/cast/trait.FromPrimitive.html -[`AsPrimitive`]: https://docs.rs/num-traits/*/num_traits/cast/trait.AsPrimitive.html -[`Num`]: https://docs.rs/num-traits/*/num_traits/trait.Num.html -[`Float`]: https://docs.rs/num-traits/*/num_traits/float/trait.Float.html -[`FloatCore`]: https://docs.rs/num-traits/*/num_traits/float/trait.FloatCore.html -[`Bounded`]: https://docs.rs/num-traits/*/num_traits/bounds/trait.Bounded.html" -)] -#![cfg_attr( - feature = "bytemuck", - doc = " -[`Zeroable`]: bytemuck::Zeroable -[`Pod`]: bytemuck::Pod" -)] -#![cfg_attr( - not(feature = "bytemuck"), - doc = " -[`Zeroable`]: https://docs.rs/bytemuck/*/bytemuck/trait.Zeroable.html -[`Pod`]: https://docs.rs/bytemuck/*bytemuck/trait.Pod.html" -)] -#![cfg_attr( - feature = "zerocopy", - doc = " -[`AsBytes`]: zerocopy::AsBytes -[`FromBytes`]: zerocopy::FromBytes" -)] -#![cfg_attr( - not(feature = "zerocopy"), - doc = " -[`AsBytes`]: https://docs.rs/zerocopy/*/zerocopy/trait.AsBytes.html -[`FromBytes`]: https://docs.rs/zerocopy/*/zerocopy/trait.FromBytes.html" -)] -#![warn( - missing_docs, - missing_copy_implementations, - trivial_numeric_casts, - future_incompatible -)] -#![cfg_attr(not(target_arch = "spirv"), warn(missing_debug_implementations))] -#![allow(clippy::verbose_bit_mask, clippy::cast_lossless)] -#![cfg_attr(not(feature = "std"), no_std)] -#![cfg_attr( - all( - feature = "use-intrinsics", - any(target_arch = "x86", target_arch = "x86_64") - ), - feature(stdsimd, f16c_target_feature) -)] -#![doc(html_root_url = "https://docs.rs/half/2.2.1")] -#![doc(test(attr(deny(warnings), allow(unused))))] -#![cfg_attr(docsrs, feature(doc_cfg))] - -#[cfg(feature = "alloc")] -extern crate alloc; - -mod bfloat; -mod binary16; -mod leading_zeros; -#[cfg(feature = "num-traits")] -mod num_traits; - -#[cfg(not(target_arch = "spirv"))] -pub mod slice; -#[cfg(feature = "alloc")] -#[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] -pub mod vec; - -pub use bfloat::bf16; -pub use binary16::f16; - -/// A collection of the most used items and traits in this crate for easy importing. -/// -/// # Examples -/// -/// ```rust -/// use half::prelude::*; -/// ``` -pub mod prelude { - #[doc(no_inline)] - pub use crate::{bf16, f16}; - - #[cfg(not(target_arch = "spirv"))] - #[doc(no_inline)] - pub use crate::slice::{HalfBitsSliceExt, HalfFloatSliceExt}; - - #[cfg(feature = "alloc")] - #[doc(no_inline)] - #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] - pub use crate::vec::{HalfBitsVecExt, HalfFloatVecExt}; -} - -// Keep this module private to crate -mod private { - use crate::{bf16, f16}; - - pub trait SealedHalf {} - - impl SealedHalf for f16 {} - impl SealedHalf for bf16 {} -} diff --git a/vendor/half/src/num_traits.rs b/vendor/half/src/num_traits.rs deleted file mode 100644 index 4318699..0000000 --- a/vendor/half/src/num_traits.rs +++ /dev/null @@ -1,1483 +0,0 @@ -use crate::{bf16, f16}; -use core::cmp::Ordering; -use core::{num::FpCategory, ops::Div}; -use num_traits::{ - AsPrimitive, Bounded, FloatConst, FromPrimitive, Num, NumCast, One, ToPrimitive, Zero, -}; - -impl ToPrimitive for f16 { - #[inline] - fn to_i64(&self) -> Option<i64> { - Self::to_f32(*self).to_i64() - } - #[inline] - fn to_u64(&self) -> Option<u64> { - Self::to_f32(*self).to_u64() - } - #[inline] - fn to_i8(&self) -> Option<i8> { - Self::to_f32(*self).to_i8() - } - #[inline] - fn to_u8(&self) -> Option<u8> { - Self::to_f32(*self).to_u8() - } - #[inline] - fn to_i16(&self) -> Option<i16> { - Self::to_f32(*self).to_i16() - } - #[inline] - fn to_u16(&self) -> Option<u16> { - Self::to_f32(*self).to_u16() - } - #[inline] - fn to_i32(&self) -> Option<i32> { - Self::to_f32(*self).to_i32() - } - #[inline] - fn to_u32(&self) -> Option<u32> { - Self::to_f32(*self).to_u32() - } - #[inline] - fn to_f32(&self) -> Option<f32> { - Some(Self::to_f32(*self)) - } - #[inline] - fn to_f64(&self) -> Option<f64> { - Some(Self::to_f64(*self)) - } -} - -impl FromPrimitive for f16 { - #[inline] - fn from_i64(n: i64) -> Option<Self> { - n.to_f32().map(Self::from_f32) - } - #[inline] - fn from_u64(n: u64) -> Option<Self> { - n.to_f32().map(Self::from_f32) - } - #[inline] - fn from_i8(n: i8) -> Option<Self> { - n.to_f32().map(Self::from_f32) - } - #[inline] - fn from_u8(n: u8) -> Option<Self> { - n.to_f32().map(Self::from_f32) - } - #[inline] - fn from_i16(n: i16) -> Option<Self> { - n.to_f32().map(Self::from_f32) - } - #[inline] - fn from_u16(n: u16) -> Option<Self> { - n.to_f32().map(Self::from_f32) - } - #[inline] - fn from_i32(n: i32) -> Option<Self> { - n.to_f32().map(Self::from_f32) - } - #[inline] - fn from_u32(n: u32) -> Option<Self> { - n.to_f32().map(Self::from_f32) - } - #[inline] - fn from_f32(n: f32) -> Option<Self> { - n.to_f32().map(Self::from_f32) - } - #[inline] - fn from_f64(n: f64) -> Option<Self> { - n.to_f64().map(Self::from_f64) - } -} - -impl Num for f16 { - type FromStrRadixErr = <f32 as Num>::FromStrRadixErr; - - #[inline] - fn from_str_radix(str: &str, radix: u32) -> Result<Self, Self::FromStrRadixErr> { - Ok(Self::from_f32(f32::from_str_radix(str, radix)?)) - } -} - -impl One for f16 { - #[inline] - fn one() -> Self { - Self::ONE - } -} - -impl Zero for f16 { - #[inline] - fn zero() -> Self { - Self::ZERO - } - - #[inline] - fn is_zero(&self) -> bool { - *self == Self::ZERO - } -} - -impl NumCast for f16 { - #[inline] - fn from<T: ToPrimitive>(n: T) -> Option<Self> { - n.to_f32().map(Self::from_f32) - } -} - -impl num_traits::float::FloatCore for f16 { - #[inline] - fn infinity() -> Self { - Self::INFINITY - } - - #[inline] - fn neg_infinity() -> Self { - Self::NEG_INFINITY - } - - #[inline] - fn nan() -> Self { - Self::NAN - } - - #[inline] - fn neg_zero() -> Self { - Self::NEG_ZERO - } - - #[inline] - fn min_value() -> Self { - Self::MIN - } - - #[inline] - fn min_positive_value() -> Self { - Self::MIN_POSITIVE - } - - #[inline] - fn epsilon() -> Self { - Self::EPSILON - } - - #[inline] - fn max_value() -> Self { - Self::MAX - } - - #[inline] - fn is_nan(self) -> bool { - self.is_nan() - } - - #[inline] - fn is_infinite(self) -> bool { - self.is_infinite() - } - - #[inline] - fn is_finite(self) -> bool { - self.is_finite() - } - - #[inline] - fn is_normal(self) -> bool { - self.is_normal() - } - - #[inline] - fn classify(self) -> FpCategory { - self.classify() - } - - #[inline] - fn floor(self) -> Self { - Self::from_f32(self.to_f32().floor()) - } - - #[inline] - fn ceil(self) -> Self { - Self::from_f32(self.to_f32().ceil()) - } - - #[inline] - fn round(self) -> Self { - Self::from_f32(self.to_f32().round()) - } - - #[inline] - fn trunc(self) -> Self { - Self::from_f32(self.to_f32().trunc()) - } - - #[inline] - fn fract(self) -> Self { - Self::from_f32(self.to_f32().fract()) - } - - #[inline] - fn abs(self) -> Self { - Self::from_bits(self.to_bits() & 0x7FFF) - } - - #[inline] - fn signum(self) -> Self { - self.signum() - } - - #[inline] - fn is_sign_positive(self) -> bool { - self.is_sign_positive() - } - - #[inline] - fn is_sign_negative(self) -> bool { - self.is_sign_negative() - } - - fn min(self, other: Self) -> Self { - match self.partial_cmp(&other) { - None => { - if self.is_nan() { - other - } else { - self - } - } - Some(Ordering::Greater) | Some(Ordering::Equal) => other, - Some(Ordering::Less) => self, - } - } - - fn max(self, other: Self) -> Self { - match self.partial_cmp(&other) { - None => { - if self.is_nan() { - other - } else { - self - } - } - Some(Ordering::Greater) | Some(Ordering::Equal) => self, - Some(Ordering::Less) => other, - } - } - - #[inline] - fn recip(self) -> Self { - Self::from_f32(self.to_f32().recip()) - } - - #[inline] - fn powi(self, exp: i32) -> Self { - Self::from_f32(self.to_f32().powi(exp)) - } - - #[inline] - fn to_degrees(self) -> Self { - Self::from_f32(self.to_f32().to_degrees()) - } - - #[inline] - fn to_radians(self) -> Self { - Self::from_f32(self.to_f32().to_radians()) - } - - #[inline] - fn integer_decode(self) -> (u64, i16, i8) { - num_traits::float::FloatCore::integer_decode(self.to_f32()) - } -} - -impl num_traits::float::Float for f16 { - #[inline] - fn nan() -> Self { - Self::NAN - } - - #[inline] - fn infinity() -> Self { - Self::INFINITY - } - - #[inline] - fn neg_infinity() -> Self { - Self::NEG_INFINITY - } - - #[inline] - fn neg_zero() -> Self { - Self::NEG_ZERO - } - - #[inline] - fn min_value() -> Self { - Self::MIN - } - - #[inline] - fn min_positive_value() -> Self { - Self::MIN_POSITIVE - } - - #[inline] - fn epsilon() -> Self { - Self::EPSILON - } - - #[inline] - fn max_value() -> Self { - Self::MAX - } - - #[inline] - fn is_nan(self) -> bool { - self.is_nan() - } - - #[inline] - fn is_infinite(self) -> bool { - self.is_infinite() - } - - #[inline] - fn is_finite(self) -> bool { - self.is_finite() - } - - #[inline] - fn is_normal(self) -> bool { - self.is_normal() - } - - #[inline] - fn classify(self) -> FpCategory { - self.classify() - } - - #[inline] - fn floor(self) -> Self { - Self::from_f32(self.to_f32().floor()) - } - - #[inline] - fn ceil(self) -> Self { - Self::from_f32(self.to_f32().ceil()) - } - - #[inline] - fn round(self) -> Self { - Self::from_f32(self.to_f32().round()) - } - - #[inline] - fn trunc(self) -> Self { - Self::from_f32(self.to_f32().trunc()) - } - - #[inline] - fn fract(self) -> Self { - Self::from_f32(self.to_f32().fract()) - } - - #[inline] - fn abs(self) -> Self { - Self::from_f32(self.to_f32().abs()) - } - - #[inline] - fn signum(self) -> Self { - Self::from_f32(self.to_f32().signum()) - } - - #[inline] - fn is_sign_positive(self) -> bool { - self.is_sign_positive() - } - - #[inline] - fn is_sign_negative(self) -> bool { - self.is_sign_negative() - } - - #[inline] - fn mul_add(self, a: Self, b: Self) -> Self { - Self::from_f32(self.to_f32().mul_add(a.to_f32(), b.to_f32())) - } - - #[inline] - fn recip(self) -> Self { - Self::from_f32(self.to_f32().recip()) - } - - #[inline] - fn powi(self, n: i32) -> Self { - Self::from_f32(self.to_f32().powi(n)) - } - - #[inline] - fn powf(self, n: Self) -> Self { - Self::from_f32(self.to_f32().powf(n.to_f32())) - } - - #[inline] - fn sqrt(self) -> Self { - Self::from_f32(self.to_f32().sqrt()) - } - - #[inline] - fn exp(self) -> Self { - Self::from_f32(self.to_f32().exp()) - } - - #[inline] - fn exp2(self) -> Self { - Self::from_f32(self.to_f32().exp2()) - } - - #[inline] - fn ln(self) -> Self { - Self::from_f32(self.to_f32().ln()) - } - - #[inline] - fn log(self, base: Self) -> Self { - Self::from_f32(self.to_f32().log(base.to_f32())) - } - - #[inline] - fn log2(self) -> Self { - Self::from_f32(self.to_f32().log2()) - } - - #[inline] - fn log10(self) -> Self { - Self::from_f32(self.to_f32().log10()) - } - - #[inline] - fn to_degrees(self) -> Self { - Self::from_f32(self.to_f32().to_degrees()) - } - - #[inline] - fn to_radians(self) -> Self { - Self::from_f32(self.to_f32().to_radians()) - } - - #[inline] - fn max(self, other: Self) -> Self { - self.max(other) - } - - #[inline] - fn min(self, other: Self) -> Self { - self.min(other) - } - - #[inline] - fn abs_sub(self, other: Self) -> Self { - Self::from_f32((self.to_f32() - other.to_f32()).max(0.0)) - } - - #[inline] - fn cbrt(self) -> Self { - Self::from_f32(self.to_f32().cbrt()) - } - - #[inline] - fn hypot(self, other: Self) -> Self { - Self::from_f32(self.to_f32().hypot(other.to_f32())) - } - - #[inline] - fn sin(self) -> Self { - Self::from_f32(self.to_f32().sin()) - } - - #[inline] - fn cos(self) -> Self { - Self::from_f32(self.to_f32().cos()) - } - - #[inline] - fn tan(self) -> Self { - Self::from_f32(self.to_f32().tan()) - } - - #[inline] - fn asin(self) -> Self { - Self::from_f32(self.to_f32().asin()) - } - - #[inline] - fn acos(self) -> Self { - Self::from_f32(self.to_f32().acos()) - } - - #[inline] - fn atan(self) -> Self { - Self::from_f32(self.to_f32().atan()) - } - - #[inline] - fn atan2(self, other: Self) -> Self { - Self::from_f32(self.to_f32().atan2(other.to_f32())) - } - - #[inline] - fn sin_cos(self) -> (Self, Self) { - let (sin, cos) = self.to_f32().sin_cos(); - (Self::from_f32(sin), Self::from_f32(cos)) - } - - #[inline] - fn exp_m1(self) -> Self { - Self::from_f32(self.to_f32().exp_m1()) - } - - #[inline] - fn ln_1p(self) -> Self { - Self::from_f32(self.to_f32().ln_1p()) - } - - #[inline] - fn sinh(self) -> Self { - Self::from_f32(self.to_f32().sinh()) - } - - #[inline] - fn cosh(self) -> Self { - Self::from_f32(self.to_f32().cosh()) - } - - #[inline] - fn tanh(self) -> Self { - Self::from_f32(self.to_f32().tanh()) - } - - #[inline] - fn asinh(self) -> Self { - Self::from_f32(self.to_f32().asinh()) - } - - #[inline] - fn acosh(self) -> Self { - Self::from_f32(self.to_f32().acosh()) - } - - #[inline] - fn atanh(self) -> Self { - Self::from_f32(self.to_f32().atanh()) - } - - #[inline] - fn integer_decode(self) -> (u64, i16, i8) { - num_traits::float::Float::integer_decode(self.to_f32()) - } -} - -impl FloatConst for f16 { - #[inline] - fn E() -> Self { - Self::E - } - - #[inline] - fn FRAC_1_PI() -> Self { - Self::FRAC_1_PI - } - - #[inline] - fn FRAC_1_SQRT_2() -> Self { - Self::FRAC_1_SQRT_2 - } - - #[inline] - fn FRAC_2_PI() -> Self { - Self::FRAC_2_PI - } - - #[inline] - fn FRAC_2_SQRT_PI() -> Self { - Self::FRAC_2_SQRT_PI - } - - #[inline] - fn FRAC_PI_2() -> Self { - Self::FRAC_PI_2 - } - - #[inline] - fn FRAC_PI_3() -> Self { - Self::FRAC_PI_3 - } - - #[inline] - fn FRAC_PI_4() -> Self { - Self::FRAC_PI_4 - } - - #[inline] - fn FRAC_PI_6() -> Self { - Self::FRAC_PI_6 - } - - #[inline] - fn FRAC_PI_8() -> Self { - Self::FRAC_PI_8 - } - - #[inline] - fn LN_10() -> Self { - Self::LN_10 - } - - #[inline] - fn LN_2() -> Self { - Self::LN_2 - } - - #[inline] - fn LOG10_E() -> Self { - Self::LOG10_E - } - - #[inline] - fn LOG2_E() -> Self { - Self::LOG2_E - } - - #[inline] - fn PI() -> Self { - Self::PI - } - - fn SQRT_2() -> Self { - Self::SQRT_2 - } - - #[inline] - fn LOG10_2() -> Self - where - Self: Sized + Div<Self, Output = Self>, - { - Self::LOG10_2 - } - - #[inline] - fn LOG2_10() -> Self - where - Self: Sized + Div<Self, Output = Self>, - { - Self::LOG2_10 - } -} - -impl Bounded for f16 { - #[inline] - fn min_value() -> Self { - f16::MIN - } - - #[inline] - fn max_value() -> Self { - f16::MAX - } -} - -macro_rules! impl_as_primitive_to_f16 { - ($ty:ty, $meth:ident) => { - impl AsPrimitive<$ty> for f16 { - #[inline] - fn as_(self) -> $ty { - self.$meth().as_() - } - } - }; -} - -impl AsPrimitive<f16> for f16 { - #[inline] - fn as_(self) -> f16 { - self - } -} - -impl_as_primitive_to_f16!(i64, to_f32); -impl_as_primitive_to_f16!(u64, to_f32); -impl_as_primitive_to_f16!(i8, to_f32); -impl_as_primitive_to_f16!(u8, to_f32); -impl_as_primitive_to_f16!(i16, to_f32); -impl_as_primitive_to_f16!(u16, to_f32); -impl_as_primitive_to_f16!(i32, to_f32); -impl_as_primitive_to_f16!(u32, to_f32); -impl_as_primitive_to_f16!(isize, to_f32); -impl_as_primitive_to_f16!(usize, to_f32); -impl_as_primitive_to_f16!(f32, to_f32); -impl_as_primitive_to_f16!(f64, to_f64); - -macro_rules! impl_as_primitive_f16_from { - ($ty:ty, $meth:ident) => { - impl AsPrimitive<f16> for $ty { - #[inline] - fn as_(self) -> f16 { - f16::$meth(self.as_()) - } - } - }; -} - -impl_as_primitive_f16_from!(i64, from_f32); -impl_as_primitive_f16_from!(u64, from_f32); -impl_as_primitive_f16_from!(i8, from_f32); -impl_as_primitive_f16_from!(u8, from_f32); -impl_as_primitive_f16_from!(i16, from_f32); -impl_as_primitive_f16_from!(u16, from_f32); -impl_as_primitive_f16_from!(i32, from_f32); -impl_as_primitive_f16_from!(u32, from_f32); -impl_as_primitive_f16_from!(isize, from_f32); -impl_as_primitive_f16_from!(usize, from_f32); -impl_as_primitive_f16_from!(f32, from_f32); -impl_as_primitive_f16_from!(f64, from_f64); - -impl ToPrimitive for bf16 { - #[inline] - fn to_i64(&self) -> Option<i64> { - Self::to_f32(*self).to_i64() - } - #[inline] - fn to_u64(&self) -> Option<u64> { - Self::to_f32(*self).to_u64() - } - #[inline] - fn to_i8(&self) -> Option<i8> { - Self::to_f32(*self).to_i8() - } - #[inline] - fn to_u8(&self) -> Option<u8> { - Self::to_f32(*self).to_u8() - } - #[inline] - fn to_i16(&self) -> Option<i16> { - Self::to_f32(*self).to_i16() - } - #[inline] - fn to_u16(&self) -> Option<u16> { - Self::to_f32(*self).to_u16() - } - #[inline] - fn to_i32(&self) -> Option<i32> { - Self::to_f32(*self).to_i32() - } - #[inline] - fn to_u32(&self) -> Option<u32> { - Self::to_f32(*self).to_u32() - } - #[inline] - fn to_f32(&self) -> Option<f32> { - Some(Self::to_f32(*self)) - } - #[inline] - fn to_f64(&self) -> Option<f64> { - Some(Self::to_f64(*self)) - } -} - -impl FromPrimitive for bf16 { - #[inline] - fn from_i64(n: i64) -> Option<Self> { - n.to_f32().map(Self::from_f32) - } - #[inline] - fn from_u64(n: u64) -> Option<Self> { - n.to_f32().map(Self::from_f32) - } - #[inline] - fn from_i8(n: i8) -> Option<Self> { - n.to_f32().map(Self::from_f32) - } - #[inline] - fn from_u8(n: u8) -> Option<Self> { - n.to_f32().map(Self::from_f32) - } - #[inline] - fn from_i16(n: i16) -> Option<Self> { - n.to_f32().map(Self::from_f32) - } - #[inline] - fn from_u16(n: u16) -> Option<Self> { - n.to_f32().map(Self::from_f32) - } - #[inline] - fn from_i32(n: i32) -> Option<Self> { - n.to_f32().map(Self::from_f32) - } - #[inline] - fn from_u32(n: u32) -> Option<Self> { - n.to_f32().map(Self::from_f32) - } - #[inline] - fn from_f32(n: f32) -> Option<Self> { - n.to_f32().map(Self::from_f32) - } - #[inline] - fn from_f64(n: f64) -> Option<Self> { - n.to_f64().map(Self::from_f64) - } -} - -impl Num for bf16 { - type FromStrRadixErr = <f32 as Num>::FromStrRadixErr; - - #[inline] - fn from_str_radix(str: &str, radix: u32) -> Result<Self, Self::FromStrRadixErr> { - Ok(Self::from_f32(f32::from_str_radix(str, radix)?)) - } -} - -impl One for bf16 { - #[inline] - fn one() -> Self { - Self::ONE - } -} - -impl Zero for bf16 { - #[inline] - fn zero() -> Self { - Self::ZERO - } - - #[inline] - fn is_zero(&self) -> bool { - *self == Self::ZERO - } -} - -impl NumCast for bf16 { - #[inline] - fn from<T: ToPrimitive>(n: T) -> Option<Self> { - n.to_f32().map(Self::from_f32) - } -} - -impl num_traits::float::FloatCore for bf16 { - #[inline] - fn infinity() -> Self { - Self::INFINITY - } - - #[inline] - fn neg_infinity() -> Self { - Self::NEG_INFINITY - } - - #[inline] - fn nan() -> Self { - Self::NAN - } - - #[inline] - fn neg_zero() -> Self { - Self::NEG_ZERO - } - - #[inline] - fn min_value() -> Self { - Self::MIN - } - - #[inline] - fn min_positive_value() -> Self { - Self::MIN_POSITIVE - } - - #[inline] - fn epsilon() -> Self { - Self::EPSILON - } - - #[inline] - fn max_value() -> Self { - Self::MAX - } - - #[inline] - fn is_nan(self) -> bool { - self.is_nan() - } - - #[inline] - fn is_infinite(self) -> bool { - self.is_infinite() - } - - #[inline] - fn is_finite(self) -> bool { - self.is_finite() - } - - #[inline] - fn is_normal(self) -> bool { - self.is_normal() - } - - #[inline] - fn classify(self) -> FpCategory { - self.classify() - } - - #[inline] - fn floor(self) -> Self { - Self::from_f32(self.to_f32().floor()) - } - - #[inline] - fn ceil(self) -> Self { - Self::from_f32(self.to_f32().ceil()) - } - - #[inline] - fn round(self) -> Self { - Self::from_f32(self.to_f32().round()) - } - - #[inline] - fn trunc(self) -> Self { - Self::from_f32(self.to_f32().trunc()) - } - - #[inline] - fn fract(self) -> Self { - Self::from_f32(self.to_f32().fract()) - } - - #[inline] - fn abs(self) -> Self { - Self::from_bits(self.to_bits() & 0x7FFF) - } - - #[inline] - fn signum(self) -> Self { - self.signum() - } - - #[inline] - fn is_sign_positive(self) -> bool { - self.is_sign_positive() - } - - #[inline] - fn is_sign_negative(self) -> bool { - self.is_sign_negative() - } - - fn min(self, other: Self) -> Self { - match self.partial_cmp(&other) { - None => { - if self.is_nan() { - other - } else { - self - } - } - Some(Ordering::Greater) | Some(Ordering::Equal) => other, - Some(Ordering::Less) => self, - } - } - - fn max(self, other: Self) -> Self { - match self.partial_cmp(&other) { - None => { - if self.is_nan() { - other - } else { - self - } - } - Some(Ordering::Greater) | Some(Ordering::Equal) => self, - Some(Ordering::Less) => other, - } - } - - #[inline] - fn recip(self) -> Self { - Self::from_f32(self.to_f32().recip()) - } - - #[inline] - fn powi(self, exp: i32) -> Self { - Self::from_f32(self.to_f32().powi(exp)) - } - - #[inline] - fn to_degrees(self) -> Self { - Self::from_f32(self.to_f32().to_degrees()) - } - - #[inline] - fn to_radians(self) -> Self { - Self::from_f32(self.to_f32().to_radians()) - } - - #[inline] - fn integer_decode(self) -> (u64, i16, i8) { - num_traits::float::FloatCore::integer_decode(self.to_f32()) - } -} - -impl num_traits::float::Float for bf16 { - #[inline] - fn nan() -> Self { - Self::NAN - } - - #[inline] - fn infinity() -> Self { - Self::INFINITY - } - - #[inline] - fn neg_infinity() -> Self { - Self::NEG_INFINITY - } - - #[inline] - fn neg_zero() -> Self { - Self::NEG_ZERO - } - - #[inline] - fn min_value() -> Self { - Self::MIN - } - - #[inline] - fn min_positive_value() -> Self { - Self::MIN_POSITIVE - } - - #[inline] - fn epsilon() -> Self { - Self::EPSILON - } - - #[inline] - fn max_value() -> Self { - Self::MAX - } - - #[inline] - fn is_nan(self) -> bool { - self.is_nan() - } - - #[inline] - fn is_infinite(self) -> bool { - self.is_infinite() - } - - #[inline] - fn is_finite(self) -> bool { - self.is_finite() - } - - #[inline] - fn is_normal(self) -> bool { - self.is_normal() - } - - #[inline] - fn classify(self) -> FpCategory { - self.classify() - } - - #[inline] - fn floor(self) -> Self { - Self::from_f32(self.to_f32().floor()) - } - - #[inline] - fn ceil(self) -> Self { - Self::from_f32(self.to_f32().ceil()) - } - - #[inline] - fn round(self) -> Self { - Self::from_f32(self.to_f32().round()) - } - - #[inline] - fn trunc(self) -> Self { - Self::from_f32(self.to_f32().trunc()) - } - - #[inline] - fn fract(self) -> Self { - Self::from_f32(self.to_f32().fract()) - } - - #[inline] - fn abs(self) -> Self { - Self::from_f32(self.to_f32().abs()) - } - - #[inline] - fn signum(self) -> Self { - Self::from_f32(self.to_f32().signum()) - } - - #[inline] - fn is_sign_positive(self) -> bool { - self.is_sign_positive() - } - - #[inline] - fn is_sign_negative(self) -> bool { - self.is_sign_negative() - } - - #[inline] - fn mul_add(self, a: Self, b: Self) -> Self { - Self::from_f32(self.to_f32().mul_add(a.to_f32(), b.to_f32())) - } - - #[inline] - fn recip(self) -> Self { - Self::from_f32(self.to_f32().recip()) - } - - #[inline] - fn powi(self, n: i32) -> Self { - Self::from_f32(self.to_f32().powi(n)) - } - - #[inline] - fn powf(self, n: Self) -> Self { - Self::from_f32(self.to_f32().powf(n.to_f32())) - } - - #[inline] - fn sqrt(self) -> Self { - Self::from_f32(self.to_f32().sqrt()) - } - - #[inline] - fn exp(self) -> Self { - Self::from_f32(self.to_f32().exp()) - } - - #[inline] - fn exp2(self) -> Self { - Self::from_f32(self.to_f32().exp2()) - } - - #[inline] - fn ln(self) -> Self { - Self::from_f32(self.to_f32().ln()) - } - - #[inline] - fn log(self, base: Self) -> Self { - Self::from_f32(self.to_f32().log(base.to_f32())) - } - - #[inline] - fn log2(self) -> Self { - Self::from_f32(self.to_f32().log2()) - } - - #[inline] - fn log10(self) -> Self { - Self::from_f32(self.to_f32().log10()) - } - - #[inline] - fn to_degrees(self) -> Self { - Self::from_f32(self.to_f32().to_degrees()) - } - - #[inline] - fn to_radians(self) -> Self { - Self::from_f32(self.to_f32().to_radians()) - } - - #[inline] - fn max(self, other: Self) -> Self { - self.max(other) - } - - #[inline] - fn min(self, other: Self) -> Self { - self.min(other) - } - - #[inline] - fn abs_sub(self, other: Self) -> Self { - Self::from_f32((self.to_f32() - other.to_f32()).max(0.0)) - } - - #[inline] - fn cbrt(self) -> Self { - Self::from_f32(self.to_f32().cbrt()) - } - - #[inline] - fn hypot(self, other: Self) -> Self { - Self::from_f32(self.to_f32().hypot(other.to_f32())) - } - - #[inline] - fn sin(self) -> Self { - Self::from_f32(self.to_f32().sin()) - } - - #[inline] - fn cos(self) -> Self { - Self::from_f32(self.to_f32().cos()) - } - - #[inline] - fn tan(self) -> Self { - Self::from_f32(self.to_f32().tan()) - } - - #[inline] - fn asin(self) -> Self { - Self::from_f32(self.to_f32().asin()) - } - - #[inline] - fn acos(self) -> Self { - Self::from_f32(self.to_f32().acos()) - } - - #[inline] - fn atan(self) -> Self { - Self::from_f32(self.to_f32().atan()) - } - - #[inline] - fn atan2(self, other: Self) -> Self { - Self::from_f32(self.to_f32().atan2(other.to_f32())) - } - - #[inline] - fn sin_cos(self) -> (Self, Self) { - let (sin, cos) = self.to_f32().sin_cos(); - (Self::from_f32(sin), Self::from_f32(cos)) - } - - #[inline] - fn exp_m1(self) -> Self { - Self::from_f32(self.to_f32().exp_m1()) - } - - #[inline] - fn ln_1p(self) -> Self { - Self::from_f32(self.to_f32().ln_1p()) - } - - #[inline] - fn sinh(self) -> Self { - Self::from_f32(self.to_f32().sinh()) - } - - #[inline] - fn cosh(self) -> Self { - Self::from_f32(self.to_f32().cosh()) - } - - #[inline] - fn tanh(self) -> Self { - Self::from_f32(self.to_f32().tanh()) - } - - #[inline] - fn asinh(self) -> Self { - Self::from_f32(self.to_f32().asinh()) - } - - #[inline] - fn acosh(self) -> Self { - Self::from_f32(self.to_f32().acosh()) - } - - #[inline] - fn atanh(self) -> Self { - Self::from_f32(self.to_f32().atanh()) - } - - #[inline] - fn integer_decode(self) -> (u64, i16, i8) { - num_traits::float::Float::integer_decode(self.to_f32()) - } -} - -impl FloatConst for bf16 { - #[inline] - fn E() -> Self { - Self::E - } - - #[inline] - fn FRAC_1_PI() -> Self { - Self::FRAC_1_PI - } - - #[inline] - fn FRAC_1_SQRT_2() -> Self { - Self::FRAC_1_SQRT_2 - } - - #[inline] - fn FRAC_2_PI() -> Self { - Self::FRAC_2_PI - } - - #[inline] - fn FRAC_2_SQRT_PI() -> Self { - Self::FRAC_2_SQRT_PI - } - - #[inline] - fn FRAC_PI_2() -> Self { - Self::FRAC_PI_2 - } - - #[inline] - fn FRAC_PI_3() -> Self { - Self::FRAC_PI_3 - } - - #[inline] - fn FRAC_PI_4() -> Self { - Self::FRAC_PI_4 - } - - #[inline] - fn FRAC_PI_6() -> Self { - Self::FRAC_PI_6 - } - - #[inline] - fn FRAC_PI_8() -> Self { - Self::FRAC_PI_8 - } - - #[inline] - fn LN_10() -> Self { - Self::LN_10 - } - - #[inline] - fn LN_2() -> Self { - Self::LN_2 - } - - #[inline] - fn LOG10_E() -> Self { - Self::LOG10_E - } - - #[inline] - fn LOG2_E() -> Self { - Self::LOG2_E - } - - #[inline] - fn PI() -> Self { - Self::PI - } - - #[inline] - fn SQRT_2() -> Self { - Self::SQRT_2 - } - - #[inline] - fn LOG10_2() -> Self - where - Self: Sized + Div<Self, Output = Self>, - { - Self::LOG10_2 - } - - #[inline] - fn LOG2_10() -> Self - where - Self: Sized + Div<Self, Output = Self>, - { - Self::LOG2_10 - } -} - -impl Bounded for bf16 { - #[inline] - fn min_value() -> Self { - bf16::MIN - } - - #[inline] - fn max_value() -> Self { - bf16::MAX - } -} - -impl AsPrimitive<bf16> for bf16 { - #[inline] - fn as_(self) -> bf16 { - self - } -} - -macro_rules! impl_as_primitive_to_bf16 { - ($ty:ty, $meth:ident) => { - impl AsPrimitive<$ty> for bf16 { - #[inline] - fn as_(self) -> $ty { - self.$meth().as_() - } - } - }; -} - -impl_as_primitive_to_bf16!(i64, to_f32); -impl_as_primitive_to_bf16!(u64, to_f32); -impl_as_primitive_to_bf16!(i8, to_f32); -impl_as_primitive_to_bf16!(u8, to_f32); -impl_as_primitive_to_bf16!(i16, to_f32); -impl_as_primitive_to_bf16!(u16, to_f32); -impl_as_primitive_to_bf16!(i32, to_f32); -impl_as_primitive_to_bf16!(u32, to_f32); -impl_as_primitive_to_bf16!(isize, to_f32); -impl_as_primitive_to_bf16!(usize, to_f32); -impl_as_primitive_to_bf16!(f32, to_f32); -impl_as_primitive_to_bf16!(f64, to_f64); - -macro_rules! impl_as_primitive_bf16_from { - ($ty:ty, $meth:ident) => { - impl AsPrimitive<bf16> for $ty { - #[inline] - fn as_(self) -> bf16 { - bf16::$meth(self.as_()) - } - } - }; -} - -impl_as_primitive_bf16_from!(i64, from_f32); -impl_as_primitive_bf16_from!(u64, from_f32); -impl_as_primitive_bf16_from!(i8, from_f32); -impl_as_primitive_bf16_from!(u8, from_f32); -impl_as_primitive_bf16_from!(i16, from_f32); -impl_as_primitive_bf16_from!(u16, from_f32); -impl_as_primitive_bf16_from!(i32, from_f32); -impl_as_primitive_bf16_from!(u32, from_f32); -impl_as_primitive_bf16_from!(isize, from_f32); -impl_as_primitive_bf16_from!(usize, from_f32); -impl_as_primitive_bf16_from!(f32, from_f32); -impl_as_primitive_bf16_from!(f64, from_f64); diff --git a/vendor/half/src/slice.rs b/vendor/half/src/slice.rs deleted file mode 100644 index f1e9feb..0000000 --- a/vendor/half/src/slice.rs +++ /dev/null @@ -1,854 +0,0 @@ -//! Contains utility functions and traits to convert between slices of [`u16`] bits and [`f16`] or -//! [`bf16`] numbers. -//! -//! The utility [`HalfBitsSliceExt`] sealed extension trait is implemented for `[u16]` slices, -//! while the utility [`HalfFloatSliceExt`] sealed extension trait is implemented for both `[f16]` -//! and `[bf16]` slices. These traits provide efficient conversions and reinterpret casting of -//! larger buffers of floating point values, and are automatically included in the -//! [`prelude`][crate::prelude] module. - -use crate::{bf16, binary16::convert, f16}; -#[cfg(feature = "alloc")] -use alloc::vec::Vec; -use core::slice; - -/// Extensions to `[f16]` and `[bf16]` slices to support conversion and reinterpret operations. -/// -/// This trait is sealed and cannot be implemented outside of this crate. -pub trait HalfFloatSliceExt: private::SealedHalfFloatSlice { - /// Reinterprets a slice of [`f16`] or [`bf16`] numbers as a slice of [`u16`] bits. - /// - /// This is a zero-copy operation. The reinterpreted slice has the same lifetime and memory - /// location as `self`. - /// - /// # Examples - /// - /// ```rust - /// # use half::prelude::*; - /// let float_buffer = [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]; - /// let int_buffer = float_buffer.reinterpret_cast(); - /// - /// assert_eq!(int_buffer, [float_buffer[0].to_bits(), float_buffer[1].to_bits(), float_buffer[2].to_bits()]); - /// ``` - #[must_use] - fn reinterpret_cast(&self) -> &[u16]; - - /// Reinterprets a mutable slice of [`f16`] or [`bf16`] numbers as a mutable slice of [`u16`]. - /// bits - /// - /// This is a zero-copy operation. The transmuted slice has the same lifetime as the original, - /// which prevents mutating `self` as long as the returned `&mut [u16]` is borrowed. - /// - /// # Examples - /// - /// ```rust - /// # use half::prelude::*; - /// let mut float_buffer = [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]; - /// - /// { - /// let int_buffer = float_buffer.reinterpret_cast_mut(); - /// - /// assert_eq!(int_buffer, [f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]); - /// - /// // Mutating the u16 slice will mutating the original - /// int_buffer[0] = 0; - /// } - /// - /// // Note that we need to drop int_buffer before using float_buffer again or we will get a borrow error. - /// assert_eq!(float_buffer, [f16::from_f32(0.), f16::from_f32(2.), f16::from_f32(3.)]); - /// ``` - #[must_use] - fn reinterpret_cast_mut(&mut self) -> &mut [u16]; - - /// Converts all of the elements of a `[f32]` slice into [`f16`] or [`bf16`] values in `self`. - /// - /// The length of `src` must be the same as `self`. - /// - /// The conversion operation is vectorized over the slice, meaning the conversion may be more - /// efficient than converting individual elements on some hardware that supports SIMD - /// conversions. See [crate documentation](crate) for more information on hardware conversion - /// support. - /// - /// # Panics - /// - /// This function will panic if the two slices have different lengths. - /// - /// # Examples - /// ```rust - /// # use half::prelude::*; - /// // Initialize an empty buffer - /// let mut buffer = [0u16; 4]; - /// let buffer = buffer.reinterpret_cast_mut::<f16>(); - /// - /// let float_values = [1., 2., 3., 4.]; - /// - /// // Now convert - /// buffer.convert_from_f32_slice(&float_values); - /// - /// assert_eq!(buffer, [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.), f16::from_f32(4.)]); - /// ``` - fn convert_from_f32_slice(&mut self, src: &[f32]); - - /// Converts all of the elements of a `[f64]` slice into [`f16`] or [`bf16`] values in `self`. - /// - /// The length of `src` must be the same as `self`. - /// - /// The conversion operation is vectorized over the slice, meaning the conversion may be more - /// efficient than converting individual elements on some hardware that supports SIMD - /// conversions. See [crate documentation](crate) for more information on hardware conversion - /// support. - /// - /// # Panics - /// - /// This function will panic if the two slices have different lengths. - /// - /// # Examples - /// ```rust - /// # use half::prelude::*; - /// // Initialize an empty buffer - /// let mut buffer = [0u16; 4]; - /// let buffer = buffer.reinterpret_cast_mut::<f16>(); - /// - /// let float_values = [1., 2., 3., 4.]; - /// - /// // Now convert - /// buffer.convert_from_f64_slice(&float_values); - /// - /// assert_eq!(buffer, [f16::from_f64(1.), f16::from_f64(2.), f16::from_f64(3.), f16::from_f64(4.)]); - /// ``` - fn convert_from_f64_slice(&mut self, src: &[f64]); - - /// Converts all of the [`f16`] or [`bf16`] elements of `self` into [`f32`] values in `dst`. - /// - /// The length of `src` must be the same as `self`. - /// - /// The conversion operation is vectorized over the slice, meaning the conversion may be more - /// efficient than converting individual elements on some hardware that supports SIMD - /// conversions. See [crate documentation](crate) for more information on hardware conversion - /// support. - /// - /// # Panics - /// - /// This function will panic if the two slices have different lengths. - /// - /// # Examples - /// ```rust - /// # use half::prelude::*; - /// // Initialize an empty buffer - /// let mut buffer = [0f32; 4]; - /// - /// let half_values = [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.), f16::from_f32(4.)]; - /// - /// // Now convert - /// half_values.convert_to_f32_slice(&mut buffer); - /// - /// assert_eq!(buffer, [1., 2., 3., 4.]); - /// ``` - fn convert_to_f32_slice(&self, dst: &mut [f32]); - - /// Converts all of the [`f16`] or [`bf16`] elements of `self` into [`f64`] values in `dst`. - /// - /// The length of `src` must be the same as `self`. - /// - /// The conversion operation is vectorized over the slice, meaning the conversion may be more - /// efficient than converting individual elements on some hardware that supports SIMD - /// conversions. See [crate documentation](crate) for more information on hardware conversion - /// support. - /// - /// # Panics - /// - /// This function will panic if the two slices have different lengths. - /// - /// # Examples - /// ```rust - /// # use half::prelude::*; - /// // Initialize an empty buffer - /// let mut buffer = [0f64; 4]; - /// - /// let half_values = [f16::from_f64(1.), f16::from_f64(2.), f16::from_f64(3.), f16::from_f64(4.)]; - /// - /// // Now convert - /// half_values.convert_to_f64_slice(&mut buffer); - /// - /// assert_eq!(buffer, [1., 2., 3., 4.]); - /// ``` - fn convert_to_f64_slice(&self, dst: &mut [f64]); - - // Because trait is sealed, we can get away with different interfaces between features. - - /// Converts all of the [`f16`] or [`bf16`] elements of `self` into [`f32`] values in a new - /// vector - /// - /// The conversion operation is vectorized over the slice, meaning the conversion may be more - /// efficient than converting individual elements on some hardware that supports SIMD - /// conversions. See [crate documentation](crate) for more information on hardware conversion - /// support. - /// - /// This method is only available with the `std` or `alloc` feature. - /// - /// # Examples - /// ```rust - /// # use half::prelude::*; - /// let half_values = [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.), f16::from_f32(4.)]; - /// let vec = half_values.to_f32_vec(); - /// - /// assert_eq!(vec, vec![1., 2., 3., 4.]); - /// ``` - #[cfg(any(feature = "alloc", feature = "std"))] - #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] - #[must_use] - fn to_f32_vec(&self) -> Vec<f32>; - - /// Converts all of the [`f16`] or [`bf16`] elements of `self` into [`f64`] values in a new - /// vector. - /// - /// The conversion operation is vectorized over the slice, meaning the conversion may be more - /// efficient than converting individual elements on some hardware that supports SIMD - /// conversions. See [crate documentation](crate) for more information on hardware conversion - /// support. - /// - /// This method is only available with the `std` or `alloc` feature. - /// - /// # Examples - /// ```rust - /// # use half::prelude::*; - /// let half_values = [f16::from_f64(1.), f16::from_f64(2.), f16::from_f64(3.), f16::from_f64(4.)]; - /// let vec = half_values.to_f64_vec(); - /// - /// assert_eq!(vec, vec![1., 2., 3., 4.]); - /// ``` - #[cfg(feature = "alloc")] - #[cfg_attr(docsrs, doc(cfg(feature = "alloc")))] - #[must_use] - fn to_f64_vec(&self) -> Vec<f64>; -} - -/// Extensions to `[u16]` slices to support reinterpret operations. -/// -/// This trait is sealed and cannot be implemented outside of this crate. -pub trait HalfBitsSliceExt: private::SealedHalfBitsSlice { - /// Reinterprets a slice of [`u16`] bits as a slice of [`f16`] or [`bf16`] numbers. - /// - /// `H` is the type to cast to, and must be either the [`f16`] or [`bf16`] type. - /// - /// This is a zero-copy operation. The reinterpreted slice has the same lifetime and memory - /// location as `self`. - /// - /// # Examples - /// - /// ```rust - /// # use half::prelude::*; - /// let int_buffer = [f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]; - /// let float_buffer: &[f16] = int_buffer.reinterpret_cast(); - /// - /// assert_eq!(float_buffer, [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]); - /// - /// // You may have to specify the cast type directly if the compiler can't infer the type. - /// // The following is also valid in Rust. - /// let typed_buffer = int_buffer.reinterpret_cast::<f16>(); - /// ``` - #[must_use] - fn reinterpret_cast<H>(&self) -> &[H] - where - H: crate::private::SealedHalf; - - /// Reinterprets a mutable slice of [`u16`] bits as a mutable slice of [`f16`] or [`bf16`] - /// numbers. - /// - /// `H` is the type to cast to, and must be either the [`f16`] or [`bf16`] type. - /// - /// This is a zero-copy operation. The transmuted slice has the same lifetime as the original, - /// which prevents mutating `self` as long as the returned `&mut [f16]` is borrowed. - /// - /// # Examples - /// - /// ```rust - /// # use half::prelude::*; - /// let mut int_buffer = [f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]; - /// - /// { - /// let float_buffer: &mut [f16] = int_buffer.reinterpret_cast_mut(); - /// - /// assert_eq!(float_buffer, [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]); - /// - /// // Mutating the f16 slice will mutating the original - /// float_buffer[0] = f16::from_f32(0.); - /// } - /// - /// // Note that we need to drop float_buffer before using int_buffer again or we will get a borrow error. - /// assert_eq!(int_buffer, [f16::from_f32(0.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]); - /// - /// // You may have to specify the cast type directly if the compiler can't infer the type. - /// // The following is also valid in Rust. - /// let typed_buffer = int_buffer.reinterpret_cast_mut::<f16>(); - /// ``` - #[must_use] - fn reinterpret_cast_mut<H>(&mut self) -> &mut [H] - where - H: crate::private::SealedHalf; -} - -mod private { - use crate::{bf16, f16}; - - pub trait SealedHalfFloatSlice {} - impl SealedHalfFloatSlice for [f16] {} - impl SealedHalfFloatSlice for [bf16] {} - - pub trait SealedHalfBitsSlice {} - impl SealedHalfBitsSlice for [u16] {} -} - -impl HalfFloatSliceExt for [f16] { - #[inline] - fn reinterpret_cast(&self) -> &[u16] { - let pointer = self.as_ptr() as *const u16; - let length = self.len(); - // SAFETY: We are reconstructing full length of original slice, using its same lifetime, - // and the size of elements are identical - unsafe { slice::from_raw_parts(pointer, length) } - } - - #[inline] - fn reinterpret_cast_mut(&mut self) -> &mut [u16] { - let pointer = self.as_mut_ptr().cast::<u16>(); - let length = self.len(); - // SAFETY: We are reconstructing full length of original slice, using its same lifetime, - // and the size of elements are identical - unsafe { slice::from_raw_parts_mut(pointer, length) } - } - - fn convert_from_f32_slice(&mut self, src: &[f32]) { - assert_eq!( - self.len(), - src.len(), - "destination and source slices have different lengths" - ); - - convert::f32_to_f16_slice(src, self.reinterpret_cast_mut()) - } - - fn convert_from_f64_slice(&mut self, src: &[f64]) { - assert_eq!( - self.len(), - src.len(), - "destination and source slices have different lengths" - ); - - convert::f64_to_f16_slice(src, self.reinterpret_cast_mut()) - } - - fn convert_to_f32_slice(&self, dst: &mut [f32]) { - assert_eq!( - self.len(), - dst.len(), - "destination and source slices have different lengths" - ); - - convert::f16_to_f32_slice(self.reinterpret_cast(), dst) - } - - fn convert_to_f64_slice(&self, dst: &mut [f64]) { - assert_eq!( - self.len(), - dst.len(), - "destination and source slices have different lengths" - ); - - convert::f16_to_f64_slice(self.reinterpret_cast(), dst) - } - - #[cfg(any(feature = "alloc", feature = "std"))] - #[inline] - #[allow(clippy::uninit_vec)] - fn to_f32_vec(&self) -> Vec<f32> { - let mut vec = Vec::with_capacity(self.len()); - // SAFETY: convert will initialize every value in the vector without reading them, - // so this is safe to do instead of double initialize from resize, and we're setting it to - // same value as capacity. - unsafe { vec.set_len(self.len()) }; - self.convert_to_f32_slice(&mut vec); - vec - } - - #[cfg(any(feature = "alloc", feature = "std"))] - #[inline] - #[allow(clippy::uninit_vec)] - fn to_f64_vec(&self) -> Vec<f64> { - let mut vec = Vec::with_capacity(self.len()); - // SAFETY: convert will initialize every value in the vector without reading them, - // so this is safe to do instead of double initialize from resize, and we're setting it to - // same value as capacity. - unsafe { vec.set_len(self.len()) }; - self.convert_to_f64_slice(&mut vec); - vec - } -} - -impl HalfFloatSliceExt for [bf16] { - #[inline] - fn reinterpret_cast(&self) -> &[u16] { - let pointer = self.as_ptr() as *const u16; - let length = self.len(); - // SAFETY: We are reconstructing full length of original slice, using its same lifetime, - // and the size of elements are identical - unsafe { slice::from_raw_parts(pointer, length) } - } - - #[inline] - fn reinterpret_cast_mut(&mut self) -> &mut [u16] { - let pointer = self.as_mut_ptr().cast::<u16>(); - let length = self.len(); - // SAFETY: We are reconstructing full length of original slice, using its same lifetime, - // and the size of elements are identical - unsafe { slice::from_raw_parts_mut(pointer, length) } - } - - fn convert_from_f32_slice(&mut self, src: &[f32]) { - assert_eq!( - self.len(), - src.len(), - "destination and source slices have different lengths" - ); - - // Just use regular loop here until there's any bf16 SIMD support. - for (i, f) in src.iter().enumerate() { - self[i] = bf16::from_f32(*f); - } - } - - fn convert_from_f64_slice(&mut self, src: &[f64]) { - assert_eq!( - self.len(), - src.len(), - "destination and source slices have different lengths" - ); - - // Just use regular loop here until there's any bf16 SIMD support. - for (i, f) in src.iter().enumerate() { - self[i] = bf16::from_f64(*f); - } - } - - fn convert_to_f32_slice(&self, dst: &mut [f32]) { - assert_eq!( - self.len(), - dst.len(), - "destination and source slices have different lengths" - ); - - // Just use regular loop here until there's any bf16 SIMD support. - for (i, f) in self.iter().enumerate() { - dst[i] = f.to_f32(); - } - } - - fn convert_to_f64_slice(&self, dst: &mut [f64]) { - assert_eq!( - self.len(), - dst.len(), - "destination and source slices have different lengths" - ); - - // Just use regular loop here until there's any bf16 SIMD support. - for (i, f) in self.iter().enumerate() { - dst[i] = f.to_f64(); - } - } - - #[cfg(any(feature = "alloc", feature = "std"))] - #[inline] - #[allow(clippy::uninit_vec)] - fn to_f32_vec(&self) -> Vec<f32> { - let mut vec = Vec::with_capacity(self.len()); - // SAFETY: convert will initialize every value in the vector without reading them, - // so this is safe to do instead of double initialize from resize, and we're setting it to - // same value as capacity. - unsafe { vec.set_len(self.len()) }; - self.convert_to_f32_slice(&mut vec); - vec - } - - #[cfg(any(feature = "alloc", feature = "std"))] - #[inline] - #[allow(clippy::uninit_vec)] - fn to_f64_vec(&self) -> Vec<f64> { - let mut vec = Vec::with_capacity(self.len()); - // SAFETY: convert will initialize every value in the vector without reading them, - // so this is safe to do instead of double initialize from resize, and we're setting it to - // same value as capacity. - unsafe { vec.set_len(self.len()) }; - self.convert_to_f64_slice(&mut vec); - vec - } -} - -impl HalfBitsSliceExt for [u16] { - // Since we sealed all the traits involved, these are safe. - #[inline] - fn reinterpret_cast<H>(&self) -> &[H] - where - H: crate::private::SealedHalf, - { - let pointer = self.as_ptr() as *const H; - let length = self.len(); - // SAFETY: We are reconstructing full length of original slice, using its same lifetime, - // and the size of elements are identical - unsafe { slice::from_raw_parts(pointer, length) } - } - - #[inline] - fn reinterpret_cast_mut<H>(&mut self) -> &mut [H] - where - H: crate::private::SealedHalf, - { - let pointer = self.as_mut_ptr() as *mut H; - let length = self.len(); - // SAFETY: We are reconstructing full length of original slice, using its same lifetime, - // and the size of elements are identical - unsafe { slice::from_raw_parts_mut(pointer, length) } - } -} - -#[allow(clippy::float_cmp)] -#[cfg(test)] -mod test { - use super::{HalfBitsSliceExt, HalfFloatSliceExt}; - use crate::{bf16, f16}; - - #[test] - fn test_slice_conversions_f16() { - let bits = &[ - f16::E.to_bits(), - f16::PI.to_bits(), - f16::EPSILON.to_bits(), - f16::FRAC_1_SQRT_2.to_bits(), - ]; - let numbers = &[f16::E, f16::PI, f16::EPSILON, f16::FRAC_1_SQRT_2]; - - // Convert from bits to numbers - let from_bits = bits.reinterpret_cast::<f16>(); - assert_eq!(from_bits, numbers); - - // Convert from numbers back to bits - let to_bits = from_bits.reinterpret_cast(); - assert_eq!(to_bits, bits); - } - - #[test] - fn test_mutablility_f16() { - let mut bits_array = [f16::PI.to_bits()]; - let bits = &mut bits_array[..]; - - { - // would not compile without these braces - let numbers = bits.reinterpret_cast_mut(); - numbers[0] = f16::E; - } - - assert_eq!(bits, &[f16::E.to_bits()]); - - bits[0] = f16::LN_2.to_bits(); - assert_eq!(bits, &[f16::LN_2.to_bits()]); - } - - #[test] - fn test_slice_conversions_bf16() { - let bits = &[ - bf16::E.to_bits(), - bf16::PI.to_bits(), - bf16::EPSILON.to_bits(), - bf16::FRAC_1_SQRT_2.to_bits(), - ]; - let numbers = &[bf16::E, bf16::PI, bf16::EPSILON, bf16::FRAC_1_SQRT_2]; - - // Convert from bits to numbers - let from_bits = bits.reinterpret_cast::<bf16>(); - assert_eq!(from_bits, numbers); - - // Convert from numbers back to bits - let to_bits = from_bits.reinterpret_cast(); - assert_eq!(to_bits, bits); - } - - #[test] - fn test_mutablility_bf16() { - let mut bits_array = [bf16::PI.to_bits()]; - let bits = &mut bits_array[..]; - - { - // would not compile without these braces - let numbers = bits.reinterpret_cast_mut(); - numbers[0] = bf16::E; - } - - assert_eq!(bits, &[bf16::E.to_bits()]); - - bits[0] = bf16::LN_2.to_bits(); - assert_eq!(bits, &[bf16::LN_2.to_bits()]); - } - - #[test] - fn slice_convert_f16_f32() { - // Exact chunks - let vf32 = [1., 2., 3., 4., 5., 6., 7., 8.]; - let vf16 = [ - f16::from_f32(1.), - f16::from_f32(2.), - f16::from_f32(3.), - f16::from_f32(4.), - f16::from_f32(5.), - f16::from_f32(6.), - f16::from_f32(7.), - f16::from_f32(8.), - ]; - let mut buf32 = vf32; - let mut buf16 = vf16; - - vf16.convert_to_f32_slice(&mut buf32); - assert_eq!(&vf32, &buf32); - - buf16.convert_from_f32_slice(&vf32); - assert_eq!(&vf16, &buf16); - - // Partial with chunks - let vf32 = [1., 2., 3., 4., 5., 6., 7., 8., 9.]; - let vf16 = [ - f16::from_f32(1.), - f16::from_f32(2.), - f16::from_f32(3.), - f16::from_f32(4.), - f16::from_f32(5.), - f16::from_f32(6.), - f16::from_f32(7.), - f16::from_f32(8.), - f16::from_f32(9.), - ]; - let mut buf32 = vf32; - let mut buf16 = vf16; - - vf16.convert_to_f32_slice(&mut buf32); - assert_eq!(&vf32, &buf32); - - buf16.convert_from_f32_slice(&vf32); - assert_eq!(&vf16, &buf16); - - // Partial with chunks - let vf32 = [1., 2.]; - let vf16 = [f16::from_f32(1.), f16::from_f32(2.)]; - let mut buf32 = vf32; - let mut buf16 = vf16; - - vf16.convert_to_f32_slice(&mut buf32); - assert_eq!(&vf32, &buf32); - - buf16.convert_from_f32_slice(&vf32); - assert_eq!(&vf16, &buf16); - } - - #[test] - fn slice_convert_bf16_f32() { - // Exact chunks - let vf32 = [1., 2., 3., 4., 5., 6., 7., 8.]; - let vf16 = [ - bf16::from_f32(1.), - bf16::from_f32(2.), - bf16::from_f32(3.), - bf16::from_f32(4.), - bf16::from_f32(5.), - bf16::from_f32(6.), - bf16::from_f32(7.), - bf16::from_f32(8.), - ]; - let mut buf32 = vf32; - let mut buf16 = vf16; - - vf16.convert_to_f32_slice(&mut buf32); - assert_eq!(&vf32, &buf32); - - buf16.convert_from_f32_slice(&vf32); - assert_eq!(&vf16, &buf16); - - // Partial with chunks - let vf32 = [1., 2., 3., 4., 5., 6., 7., 8., 9.]; - let vf16 = [ - bf16::from_f32(1.), - bf16::from_f32(2.), - bf16::from_f32(3.), - bf16::from_f32(4.), - bf16::from_f32(5.), - bf16::from_f32(6.), - bf16::from_f32(7.), - bf16::from_f32(8.), - bf16::from_f32(9.), - ]; - let mut buf32 = vf32; - let mut buf16 = vf16; - - vf16.convert_to_f32_slice(&mut buf32); - assert_eq!(&vf32, &buf32); - - buf16.convert_from_f32_slice(&vf32); - assert_eq!(&vf16, &buf16); - - // Partial with chunks - let vf32 = [1., 2.]; - let vf16 = [bf16::from_f32(1.), bf16::from_f32(2.)]; - let mut buf32 = vf32; - let mut buf16 = vf16; - - vf16.convert_to_f32_slice(&mut buf32); - assert_eq!(&vf32, &buf32); - - buf16.convert_from_f32_slice(&vf32); - assert_eq!(&vf16, &buf16); - } - - #[test] - fn slice_convert_f16_f64() { - // Exact chunks - let vf64 = [1., 2., 3., 4., 5., 6., 7., 8.]; - let vf16 = [ - f16::from_f64(1.), - f16::from_f64(2.), - f16::from_f64(3.), - f16::from_f64(4.), - f16::from_f64(5.), - f16::from_f64(6.), - f16::from_f64(7.), - f16::from_f64(8.), - ]; - let mut buf64 = vf64; - let mut buf16 = vf16; - - vf16.convert_to_f64_slice(&mut buf64); - assert_eq!(&vf64, &buf64); - - buf16.convert_from_f64_slice(&vf64); - assert_eq!(&vf16, &buf16); - - // Partial with chunks - let vf64 = [1., 2., 3., 4., 5., 6., 7., 8., 9.]; - let vf16 = [ - f16::from_f64(1.), - f16::from_f64(2.), - f16::from_f64(3.), - f16::from_f64(4.), - f16::from_f64(5.), - f16::from_f64(6.), - f16::from_f64(7.), - f16::from_f64(8.), - f16::from_f64(9.), - ]; - let mut buf64 = vf64; - let mut buf16 = vf16; - - vf16.convert_to_f64_slice(&mut buf64); - assert_eq!(&vf64, &buf64); - - buf16.convert_from_f64_slice(&vf64); - assert_eq!(&vf16, &buf16); - - // Partial with chunks - let vf64 = [1., 2.]; - let vf16 = [f16::from_f64(1.), f16::from_f64(2.)]; - let mut buf64 = vf64; - let mut buf16 = vf16; - - vf16.convert_to_f64_slice(&mut buf64); - assert_eq!(&vf64, &buf64); - - buf16.convert_from_f64_slice(&vf64); - assert_eq!(&vf16, &buf16); - } - - #[test] - fn slice_convert_bf16_f64() { - // Exact chunks - let vf64 = [1., 2., 3., 4., 5., 6., 7., 8.]; - let vf16 = [ - bf16::from_f64(1.), - bf16::from_f64(2.), - bf16::from_f64(3.), - bf16::from_f64(4.), - bf16::from_f64(5.), - bf16::from_f64(6.), - bf16::from_f64(7.), - bf16::from_f64(8.), - ]; - let mut buf64 = vf64; - let mut buf16 = vf16; - - vf16.convert_to_f64_slice(&mut buf64); - assert_eq!(&vf64, &buf64); - - buf16.convert_from_f64_slice(&vf64); - assert_eq!(&vf16, &buf16); - - // Partial with chunks - let vf64 = [1., 2., 3., 4., 5., 6., 7., 8., 9.]; - let vf16 = [ - bf16::from_f64(1.), - bf16::from_f64(2.), - bf16::from_f64(3.), - bf16::from_f64(4.), - bf16::from_f64(5.), - bf16::from_f64(6.), - bf16::from_f64(7.), - bf16::from_f64(8.), - bf16::from_f64(9.), - ]; - let mut buf64 = vf64; - let mut buf16 = vf16; - - vf16.convert_to_f64_slice(&mut buf64); - assert_eq!(&vf64, &buf64); - - buf16.convert_from_f64_slice(&vf64); - assert_eq!(&vf16, &buf16); - - // Partial with chunks - let vf64 = [1., 2.]; - let vf16 = [bf16::from_f64(1.), bf16::from_f64(2.)]; - let mut buf64 = vf64; - let mut buf16 = vf16; - - vf16.convert_to_f64_slice(&mut buf64); - assert_eq!(&vf64, &buf64); - - buf16.convert_from_f64_slice(&vf64); - assert_eq!(&vf16, &buf16); - } - - #[test] - #[should_panic] - fn convert_from_f32_slice_len_mismatch_panics() { - let mut slice1 = [f16::ZERO; 3]; - let slice2 = [0f32; 4]; - slice1.convert_from_f32_slice(&slice2); - } - - #[test] - #[should_panic] - fn convert_from_f64_slice_len_mismatch_panics() { - let mut slice1 = [f16::ZERO; 3]; - let slice2 = [0f64; 4]; - slice1.convert_from_f64_slice(&slice2); - } - - #[test] - #[should_panic] - fn convert_to_f32_slice_len_mismatch_panics() { - let slice1 = [f16::ZERO; 3]; - let mut slice2 = [0f32; 4]; - slice1.convert_to_f32_slice(&mut slice2); - } - - #[test] - #[should_panic] - fn convert_to_f64_slice_len_mismatch_panics() { - let slice1 = [f16::ZERO; 3]; - let mut slice2 = [0f64; 4]; - slice1.convert_to_f64_slice(&mut slice2); - } -} diff --git a/vendor/half/src/vec.rs b/vendor/half/src/vec.rs deleted file mode 100644 index 27ad3e7..0000000 --- a/vendor/half/src/vec.rs +++ /dev/null @@ -1,274 +0,0 @@ -//! Contains utility functions and traits to convert between vectors of [`u16`] bits and [`f16`] or -//! [`bf16`] vectors. -//! -//! The utility [`HalfBitsVecExt`] sealed extension trait is implemented for [`Vec<u16>`] vectors, -//! while the utility [`HalfFloatVecExt`] sealed extension trait is implemented for both -//! [`Vec<f16>`] and [`Vec<bf16>`] vectors. These traits provide efficient conversions and -//! reinterpret casting of larger buffers of floating point values, and are automatically included -//! in the [`prelude`][crate::prelude] module. -//! -//! This module is only available with the `std` or `alloc` feature. - -use super::{bf16, f16, slice::HalfFloatSliceExt}; -#[cfg(feature = "alloc")] -use alloc::vec::Vec; -use core::mem; - -/// Extensions to [`Vec<f16>`] and [`Vec<bf16>`] to support reinterpret operations. -/// -/// This trait is sealed and cannot be implemented outside of this crate. -pub trait HalfFloatVecExt: private::SealedHalfFloatVec { - /// Reinterprets a vector of [`f16`]or [`bf16`] numbers as a vector of [`u16`] bits. - /// - /// This is a zero-copy operation. The reinterpreted vector has the same memory location as - /// `self`. - /// - /// # Examples - /// - /// ```rust - /// # use half::prelude::*; - /// let float_buffer = vec![f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]; - /// let int_buffer = float_buffer.reinterpret_into(); - /// - /// assert_eq!(int_buffer, [f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]); - /// ``` - #[must_use] - fn reinterpret_into(self) -> Vec<u16>; - - /// Converts all of the elements of a `[f32]` slice into a new [`f16`] or [`bf16`] vector. - /// - /// The conversion operation is vectorized over the slice, meaning the conversion may be more - /// efficient than converting individual elements on some hardware that supports SIMD - /// conversions. See [crate documentation][crate] for more information on hardware conversion - /// support. - /// - /// # Examples - /// ```rust - /// # use half::prelude::*; - /// let float_values = [1., 2., 3., 4.]; - /// let vec: Vec<f16> = Vec::from_f32_slice(&float_values); - /// - /// assert_eq!(vec, vec![f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.), f16::from_f32(4.)]); - /// ``` - #[must_use] - fn from_f32_slice(slice: &[f32]) -> Self; - - /// Converts all of the elements of a `[f64]` slice into a new [`f16`] or [`bf16`] vector. - /// - /// The conversion operation is vectorized over the slice, meaning the conversion may be more - /// efficient than converting individual elements on some hardware that supports SIMD - /// conversions. See [crate documentation][crate] for more information on hardware conversion - /// support. - /// - /// # Examples - /// ```rust - /// # use half::prelude::*; - /// let float_values = [1., 2., 3., 4.]; - /// let vec: Vec<f16> = Vec::from_f64_slice(&float_values); - /// - /// assert_eq!(vec, vec![f16::from_f64(1.), f16::from_f64(2.), f16::from_f64(3.), f16::from_f64(4.)]); - /// ``` - #[must_use] - fn from_f64_slice(slice: &[f64]) -> Self; -} - -/// Extensions to [`Vec<u16>`] to support reinterpret operations. -/// -/// This trait is sealed and cannot be implemented outside of this crate. -pub trait HalfBitsVecExt: private::SealedHalfBitsVec { - /// Reinterprets a vector of [`u16`] bits as a vector of [`f16`] or [`bf16`] numbers. - /// - /// `H` is the type to cast to, and must be either the [`f16`] or [`bf16`] type. - /// - /// This is a zero-copy operation. The reinterpreted vector has the same memory location as - /// `self`. - /// - /// # Examples - /// - /// ```rust - /// # use half::prelude::*; - /// let int_buffer = vec![f16::from_f32(1.).to_bits(), f16::from_f32(2.).to_bits(), f16::from_f32(3.).to_bits()]; - /// let float_buffer = int_buffer.reinterpret_into::<f16>(); - /// - /// assert_eq!(float_buffer, [f16::from_f32(1.), f16::from_f32(2.), f16::from_f32(3.)]); - /// ``` - #[must_use] - fn reinterpret_into<H>(self) -> Vec<H> - where - H: crate::private::SealedHalf; -} - -mod private { - use crate::{bf16, f16}; - #[cfg(feature = "alloc")] - use alloc::vec::Vec; - - pub trait SealedHalfFloatVec {} - impl SealedHalfFloatVec for Vec<f16> {} - impl SealedHalfFloatVec for Vec<bf16> {} - - pub trait SealedHalfBitsVec {} - impl SealedHalfBitsVec for Vec<u16> {} -} - -impl HalfFloatVecExt for Vec<f16> { - #[inline] - fn reinterpret_into(mut self) -> Vec<u16> { - // An f16 array has same length and capacity as u16 array - let length = self.len(); - let capacity = self.capacity(); - - // Actually reinterpret the contents of the Vec<f16> as u16, - // knowing that structs are represented as only their members in memory, - // which is the u16 part of `f16(u16)` - let pointer = self.as_mut_ptr() as *mut u16; - - // Prevent running a destructor on the old Vec<u16>, so the pointer won't be deleted - mem::forget(self); - - // Finally construct a new Vec<f16> from the raw pointer - // SAFETY: We are reconstructing full length and capacity of original vector, - // using its original pointer, and the size of elements are identical. - unsafe { Vec::from_raw_parts(pointer, length, capacity) } - } - - #[allow(clippy::uninit_vec)] - fn from_f32_slice(slice: &[f32]) -> Self { - let mut vec = Vec::with_capacity(slice.len()); - // SAFETY: convert will initialize every value in the vector without reading them, - // so this is safe to do instead of double initialize from resize, and we're setting it to - // same value as capacity. - unsafe { vec.set_len(slice.len()) }; - vec.convert_from_f32_slice(slice); - vec - } - - #[allow(clippy::uninit_vec)] - fn from_f64_slice(slice: &[f64]) -> Self { - let mut vec = Vec::with_capacity(slice.len()); - // SAFETY: convert will initialize every value in the vector without reading them, - // so this is safe to do instead of double initialize from resize, and we're setting it to - // same value as capacity. - unsafe { vec.set_len(slice.len()) }; - vec.convert_from_f64_slice(slice); - vec - } -} - -impl HalfFloatVecExt for Vec<bf16> { - #[inline] - fn reinterpret_into(mut self) -> Vec<u16> { - // An f16 array has same length and capacity as u16 array - let length = self.len(); - let capacity = self.capacity(); - - // Actually reinterpret the contents of the Vec<f16> as u16, - // knowing that structs are represented as only their members in memory, - // which is the u16 part of `f16(u16)` - let pointer = self.as_mut_ptr() as *mut u16; - - // Prevent running a destructor on the old Vec<u16>, so the pointer won't be deleted - mem::forget(self); - - // Finally construct a new Vec<f16> from the raw pointer - // SAFETY: We are reconstructing full length and capacity of original vector, - // using its original pointer, and the size of elements are identical. - unsafe { Vec::from_raw_parts(pointer, length, capacity) } - } - - #[allow(clippy::uninit_vec)] - fn from_f32_slice(slice: &[f32]) -> Self { - let mut vec = Vec::with_capacity(slice.len()); - // SAFETY: convert will initialize every value in the vector without reading them, - // so this is safe to do instead of double initialize from resize, and we're setting it to - // same value as capacity. - unsafe { vec.set_len(slice.len()) }; - vec.convert_from_f32_slice(slice); - vec - } - - #[allow(clippy::uninit_vec)] - fn from_f64_slice(slice: &[f64]) -> Self { - let mut vec = Vec::with_capacity(slice.len()); - // SAFETY: convert will initialize every value in the vector without reading them, - // so this is safe to do instead of double initialize from resize, and we're setting it to - // same value as capacity. - unsafe { vec.set_len(slice.len()) }; - vec.convert_from_f64_slice(slice); - vec - } -} - -impl HalfBitsVecExt for Vec<u16> { - // This is safe because all traits are sealed - #[inline] - fn reinterpret_into<H>(mut self) -> Vec<H> - where - H: crate::private::SealedHalf, - { - // An f16 array has same length and capacity as u16 array - let length = self.len(); - let capacity = self.capacity(); - - // Actually reinterpret the contents of the Vec<u16> as f16, - // knowing that structs are represented as only their members in memory, - // which is the u16 part of `f16(u16)` - let pointer = self.as_mut_ptr() as *mut H; - - // Prevent running a destructor on the old Vec<u16>, so the pointer won't be deleted - mem::forget(self); - - // Finally construct a new Vec<f16> from the raw pointer - // SAFETY: We are reconstructing full length and capacity of original vector, - // using its original pointer, and the size of elements are identical. - unsafe { Vec::from_raw_parts(pointer, length, capacity) } - } -} - -#[cfg(test)] -mod test { - use super::{HalfBitsVecExt, HalfFloatVecExt}; - use crate::{bf16, f16}; - #[cfg(all(feature = "alloc", not(feature = "std")))] - use alloc::vec; - - #[test] - fn test_vec_conversions_f16() { - let numbers = vec![f16::E, f16::PI, f16::EPSILON, f16::FRAC_1_SQRT_2]; - let bits = vec![ - f16::E.to_bits(), - f16::PI.to_bits(), - f16::EPSILON.to_bits(), - f16::FRAC_1_SQRT_2.to_bits(), - ]; - let bits_cloned = bits.clone(); - - // Convert from bits to numbers - let from_bits = bits.reinterpret_into::<f16>(); - assert_eq!(&from_bits[..], &numbers[..]); - - // Convert from numbers back to bits - let to_bits = from_bits.reinterpret_into(); - assert_eq!(&to_bits[..], &bits_cloned[..]); - } - - #[test] - fn test_vec_conversions_bf16() { - let numbers = vec![bf16::E, bf16::PI, bf16::EPSILON, bf16::FRAC_1_SQRT_2]; - let bits = vec![ - bf16::E.to_bits(), - bf16::PI.to_bits(), - bf16::EPSILON.to_bits(), - bf16::FRAC_1_SQRT_2.to_bits(), - ]; - let bits_cloned = bits.clone(); - - // Convert from bits to numbers - let from_bits = bits.reinterpret_into::<bf16>(); - assert_eq!(&from_bits[..], &numbers[..]); - - // Convert from numbers back to bits - let to_bits = from_bits.reinterpret_into(); - assert_eq!(&to_bits[..], &bits_cloned[..]); - } -} |