diff options
Diffstat (limited to 'vendor/jpeg-decoder/src')
-rw-r--r-- | vendor/jpeg-decoder/src/arch/mod.rs | 46 | ||||
-rw-r--r-- | vendor/jpeg-decoder/src/arch/neon.rs | 221 | ||||
-rw-r--r-- | vendor/jpeg-decoder/src/arch/ssse3.rs | 288 | ||||
-rw-r--r-- | vendor/jpeg-decoder/src/decoder.rs | 1493 | ||||
-rw-r--r-- | vendor/jpeg-decoder/src/decoder/lossless.rs | 259 | ||||
-rw-r--r-- | vendor/jpeg-decoder/src/error.rs | 75 | ||||
-rw-r--r-- | vendor/jpeg-decoder/src/huffman.rs | 346 | ||||
-rw-r--r-- | vendor/jpeg-decoder/src/idct.rs | 657 | ||||
-rw-r--r-- | vendor/jpeg-decoder/src/lib.rs | 66 | ||||
-rw-r--r-- | vendor/jpeg-decoder/src/marker.rs | 136 | ||||
-rw-r--r-- | vendor/jpeg-decoder/src/parser.rs | 685 | ||||
-rw-r--r-- | vendor/jpeg-decoder/src/upsampler.rs | 252 | ||||
-rw-r--r-- | vendor/jpeg-decoder/src/worker/immediate.rs | 80 | ||||
-rw-r--r-- | vendor/jpeg-decoder/src/worker/mod.rs | 128 | ||||
-rw-r--r-- | vendor/jpeg-decoder/src/worker/multithreaded.rs | 123 | ||||
-rw-r--r-- | vendor/jpeg-decoder/src/worker/rayon.rs | 221 |
16 files changed, 5076 insertions, 0 deletions
diff --git a/vendor/jpeg-decoder/src/arch/mod.rs b/vendor/jpeg-decoder/src/arch/mod.rs new file mode 100644 index 0000000..15b46c5 --- /dev/null +++ b/vendor/jpeg-decoder/src/arch/mod.rs @@ -0,0 +1,46 @@ +#![allow(unsafe_code)] + +mod neon; +mod ssse3; + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +use std::is_x86_feature_detected; + +/// Arch-specific implementation of YCbCr conversion. Returns the number of pixels that were +/// converted. +pub fn get_color_convert_line_ycbcr() -> Option<unsafe fn(&[u8], &[u8], &[u8], &mut [u8]) -> usize> +{ + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + #[allow(unsafe_code)] + { + if is_x86_feature_detected!("ssse3") { + return Some(ssse3::color_convert_line_ycbcr); + } + } + // Runtime detection is not needed on aarch64. + #[cfg(all(feature = "nightly_aarch64_neon", target_arch = "aarch64"))] + { + return Some(neon::color_convert_line_ycbcr); + } + #[allow(unreachable_code)] + None +} + +/// Arch-specific implementation of 8x8 IDCT. +pub fn get_dequantize_and_idct_block_8x8( +) -> Option<unsafe fn(&[i16; 64], &[u16; 64], usize, &mut [u8])> { + #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] + #[allow(unsafe_code)] + { + if is_x86_feature_detected!("ssse3") { + return Some(ssse3::dequantize_and_idct_block_8x8); + } + } + // Runtime detection is not needed on aarch64. + #[cfg(all(feature = "nightly_aarch64_neon", target_arch = "aarch64"))] + { + return Some(neon::dequantize_and_idct_block_8x8); + } + #[allow(unreachable_code)] + None +} diff --git a/vendor/jpeg-decoder/src/arch/neon.rs b/vendor/jpeg-decoder/src/arch/neon.rs new file mode 100644 index 0000000..4843578 --- /dev/null +++ b/vendor/jpeg-decoder/src/arch/neon.rs @@ -0,0 +1,221 @@ +#[cfg(all(feature = "nightly_aarch64_neon", target_arch = "aarch64"))] +use core::arch::aarch64::*; + +#[cfg(all(feature = "nightly_aarch64_neon", target_arch = "aarch64"))] +#[target_feature(enable = "neon")] +unsafe fn idct8(data: &mut [int16x8_t; 8]) { + // The fixed-point constants here are obtained by taking the fractional part of the constants + // from the non-SIMD implementation and scaling them up by 1<<15. This is because + // vqrdmulhq_n_s16(a, b) is effectively equivalent to (a*b)>>15 (except for possibly some + // slight differences in rounding). + + // The code here is effectively equivalent to the calls to "kernel" in idct.rs, except that it + // doesn't apply any further scaling and fixed point constants have a different precision. + + let p2 = data[2]; + let p3 = data[6]; + let p1 = vqrdmulhq_n_s16(vqaddq_s16(p2, p3), 17734); // 0.5411961 + let t2 = vqsubq_s16( + vqsubq_s16(p1, p3), + vqrdmulhq_n_s16(p3, 27779), // 0.847759065 + ); + let t3 = vqaddq_s16(p1, vqrdmulhq_n_s16(p2, 25079)); // 0.765366865 + + let p2 = data[0]; + let p3 = data[4]; + let t0 = vqaddq_s16(p2, p3); + let t1 = vqsubq_s16(p2, p3); + + let x0 = vqaddq_s16(t0, t3); + let x3 = vqsubq_s16(t0, t3); + let x1 = vqaddq_s16(t1, t2); + let x2 = vqsubq_s16(t1, t2); + + let t0 = data[7]; + let t1 = data[5]; + let t2 = data[3]; + let t3 = data[1]; + + let p3 = vqaddq_s16(t0, t2); + let p4 = vqaddq_s16(t1, t3); + let p1 = vqaddq_s16(t0, t3); + let p2 = vqaddq_s16(t1, t2); + let p5 = vqaddq_s16(p3, p4); + let p5 = vqaddq_s16(p5, vqrdmulhq_n_s16(p5, 5763)); // 0.175875602 + + let t0 = vqrdmulhq_n_s16(t0, 9786); // 0.298631336 + let t1 = vqaddq_s16( + vqaddq_s16(t1, t1), + vqrdmulhq_n_s16(t1, 1741), // 0.053119869 + ); + let t2 = vqaddq_s16( + vqaddq_s16(t2, vqaddq_s16(t2, t2)), + vqrdmulhq_n_s16(t2, 2383), // 0.072711026 + ); + let t3 = vqaddq_s16(t3, vqrdmulhq_n_s16(t3, 16427)); // 0.501321110 + + let p1 = vqsubq_s16(p5, vqrdmulhq_n_s16(p1, 29490)); // 0.899976223 + let p2 = vqsubq_s16( + vqsubq_s16(vqsubq_s16(p5, p2), p2), + vqrdmulhq_n_s16(p2, 18446), // 0.562915447 + ); + + let p3 = vqsubq_s16( + vqrdmulhq_n_s16(p3, -31509), // -0.961570560 + p3, + ); + let p4 = vqrdmulhq_n_s16(p4, -12785); // -0.390180644 + + let t3 = vqaddq_s16(vqaddq_s16(p1, p4), t3); + let t2 = vqaddq_s16(vqaddq_s16(p2, p3), t2); + let t1 = vqaddq_s16(vqaddq_s16(p2, p4), t1); + let t0 = vqaddq_s16(vqaddq_s16(p1, p3), t0); + + data[0] = vqaddq_s16(x0, t3); + data[7] = vqsubq_s16(x0, t3); + data[1] = vqaddq_s16(x1, t2); + data[6] = vqsubq_s16(x1, t2); + data[2] = vqaddq_s16(x2, t1); + data[5] = vqsubq_s16(x2, t1); + data[3] = vqaddq_s16(x3, t0); + data[4] = vqsubq_s16(x3, t0); +} + +#[cfg(all(feature = "nightly_aarch64_neon", target_arch = "aarch64"))] +#[target_feature(enable = "neon")] +unsafe fn transpose8(data: &mut [int16x8_t; 8]) { + // Use NEON's 2x2 matrix transposes (vtrn) to do the transposition in each 4x4 block, then + // combine the 4x4 blocks. + let a01 = vtrnq_s16(data[0], data[1]); + let a23 = vtrnq_s16(data[2], data[3]); + + let four0 = vtrnq_s32(vreinterpretq_s32_s16(a01.0), vreinterpretq_s32_s16(a23.0)); + let four1 = vtrnq_s32(vreinterpretq_s32_s16(a01.1), vreinterpretq_s32_s16(a23.1)); + + let a45 = vtrnq_s16(data[4], data[5]); + let a67 = vtrnq_s16(data[6], data[7]); + + let four2 = vtrnq_s32(vreinterpretq_s32_s16(a45.0), vreinterpretq_s32_s16(a67.0)); + let four3 = vtrnq_s32(vreinterpretq_s32_s16(a45.1), vreinterpretq_s32_s16(a67.1)); + + data[0] = vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(four0.0), vget_low_s32(four2.0))); + data[1] = vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(four1.0), vget_low_s32(four3.0))); + data[2] = vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(four0.1), vget_low_s32(four2.1))); + data[3] = vreinterpretq_s16_s32(vcombine_s32(vget_low_s32(four1.1), vget_low_s32(four3.1))); + data[4] = vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(four0.0), vget_high_s32(four2.0))); + data[5] = vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(four1.0), vget_high_s32(four3.0))); + data[6] = vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(four0.1), vget_high_s32(four2.1))); + data[7] = vreinterpretq_s16_s32(vcombine_s32(vget_high_s32(four1.1), vget_high_s32(four3.1))); +} + +#[cfg(all(feature = "nightly_aarch64_neon", target_arch = "aarch64"))] +#[target_feature(enable = "neon")] +pub unsafe fn dequantize_and_idct_block_8x8( + coefficients: &[i16; 64], + quantization_table: &[u16; 64], + output_linestride: usize, + output: &mut [u8], +) { + // The loop below will write to positions [output_linestride * i, output_linestride * i + 8) + // for 0<=i<8. Thus, the last accessed position is at an offset of output_linestrade * 7 + 7, + // and if that position is in-bounds, so are all other accesses. + assert!( + output.len() + > output_linestride + .checked_mul(7) + .unwrap() + .checked_add(7) + .unwrap() + ); + + const SHIFT: i32 = 3; + + // Read the DCT coefficients, scale them up and dequantize them. + let mut data = [vdupq_n_s16(0); 8]; + for i in 0..8 { + data[i] = vshlq_n_s16( + vmulq_s16( + vld1q_s16(coefficients.as_ptr().wrapping_add(i * 8)), + vreinterpretq_s16_u16(vld1q_u16(quantization_table.as_ptr().wrapping_add(i * 8))), + ), + SHIFT, + ); + } + + // Usual column IDCT - transpose - column IDCT - transpose approach. + idct8(&mut data); + transpose8(&mut data); + idct8(&mut data); + transpose8(&mut data); + + for i in 0..8 { + // The two passes of the IDCT algorithm give us a factor of 8, so the shift here is + // increased by 3. + // As values will be stored in a u8, they need to be 128-centered and not 0-centered. + // We add 128 with the appropriate shift for that purpose. + const OFFSET: i16 = 128 << (SHIFT + 3); + // We want rounding right shift, so we should add (1/2) << (SHIFT+3) before shifting. + const ROUNDING_BIAS: i16 = (1 << (SHIFT + 3)) >> 1; + + let data_with_offset = vqaddq_s16(data[i], vdupq_n_s16(OFFSET + ROUNDING_BIAS)); + + vst1_u8( + output.as_mut_ptr().wrapping_add(output_linestride * i), + vqshrun_n_s16(data_with_offset, SHIFT + 3), + ); + } +} + +#[cfg(all(feature = "nightly_aarch64_neon", target_arch = "aarch64"))] +#[target_feature(enable = "neon")] +pub unsafe fn color_convert_line_ycbcr(y: &[u8], cb: &[u8], cr: &[u8], output: &mut [u8]) -> usize { + assert!(output.len() % 3 == 0); + let num = output.len() / 3; + assert!(num <= y.len()); + assert!(num <= cb.len()); + assert!(num <= cr.len()); + let num_vecs = num / 8; + + for i in 0..num_vecs { + const SHIFT: i32 = 6; + // Load. + let y = vld1_u8(y.as_ptr().wrapping_add(i * 8)); + let cb = vld1_u8(cb.as_ptr().wrapping_add(i * 8)); + let cr = vld1_u8(cr.as_ptr().wrapping_add(i * 8)); + + // Convert to 16 bit and shift. + let y = vreinterpretq_s16_u16(vshll_n_u8(y, SHIFT)); + let cb = vreinterpretq_s16_u16(vshll_n_u8(cb, SHIFT)); + let cr = vreinterpretq_s16_u16(vshll_n_u8(cr, SHIFT)); + + // Add offsets + let y = vqaddq_s16(y, vdupq_n_s16((1 << SHIFT) >> 1)); + let c128 = vdupq_n_s16(128 << SHIFT); + let cb = vqsubq_s16(cb, c128); + let cr = vqsubq_s16(cr, c128); + + // Compute cr * 1.402, cb * 0.34414, cr * 0.71414, cb * 1.772 + let cr_140200 = vqaddq_s16(vqrdmulhq_n_s16(cr, 13173), cr); + let cb_034414 = vqrdmulhq_n_s16(cb, 11276); + let cr_071414 = vqrdmulhq_n_s16(cr, 23401); + let cb_177200 = vqaddq_s16(vqrdmulhq_n_s16(cb, 25297), cb); + + // Last conversion step. + let r = vqaddq_s16(y, cr_140200); + let g = vqsubq_s16(y, vqaddq_s16(cb_034414, cr_071414)); + let b = vqaddq_s16(y, cb_177200); + + // Shift back and convert to u8. + let r = vqshrun_n_s16(r, SHIFT); + let g = vqshrun_n_s16(g, SHIFT); + let b = vqshrun_n_s16(b, SHIFT); + + // Shuffle + store. + vst3_u8( + output.as_mut_ptr().wrapping_add(24 * i), + uint8x8x3_t(r, g, b), + ); + } + + num_vecs * 8 +} diff --git a/vendor/jpeg-decoder/src/arch/ssse3.rs b/vendor/jpeg-decoder/src/arch/ssse3.rs new file mode 100644 index 0000000..374a70c --- /dev/null +++ b/vendor/jpeg-decoder/src/arch/ssse3.rs @@ -0,0 +1,288 @@ +#[cfg(target_arch = "x86")] +use std::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use std::arch::x86_64::*; + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[target_feature(enable = "ssse3")] +unsafe fn idct8(data: &mut [__m128i; 8]) { + // The fixed-point constants here are obtained by taking the fractional part of the constants + // from the non-SIMD implementation and scaling them up by 1<<15. This is because + // _mm_mulhrs_epi16(a, b) is effectively equivalent to (a*b)>>15 (except for possibly some + // slight differences in rounding). + + // The code here is effectively equivalent to the calls to "kernel" in idct.rs, except that it + // doesn't apply any further scaling and fixed point constants have a different precision. + + let p2 = data[2]; + let p3 = data[6]; + let p1 = _mm_mulhrs_epi16(_mm_adds_epi16(p2, p3), _mm_set1_epi16(17734)); // 0.5411961 + let t2 = _mm_subs_epi16( + _mm_subs_epi16(p1, p3), + _mm_mulhrs_epi16(p3, _mm_set1_epi16(27779)), // 0.847759065 + ); + let t3 = _mm_adds_epi16(p1, _mm_mulhrs_epi16(p2, _mm_set1_epi16(25079))); // 0.765366865 + + let p2 = data[0]; + let p3 = data[4]; + let t0 = _mm_adds_epi16(p2, p3); + let t1 = _mm_subs_epi16(p2, p3); + + let x0 = _mm_adds_epi16(t0, t3); + let x3 = _mm_subs_epi16(t0, t3); + let x1 = _mm_adds_epi16(t1, t2); + let x2 = _mm_subs_epi16(t1, t2); + + let t0 = data[7]; + let t1 = data[5]; + let t2 = data[3]; + let t3 = data[1]; + + let p3 = _mm_adds_epi16(t0, t2); + let p4 = _mm_adds_epi16(t1, t3); + let p1 = _mm_adds_epi16(t0, t3); + let p2 = _mm_adds_epi16(t1, t2); + let p5 = _mm_adds_epi16(p3, p4); + let p5 = _mm_adds_epi16(p5, _mm_mulhrs_epi16(p5, _mm_set1_epi16(5763))); // 0.175875602 + + let t0 = _mm_mulhrs_epi16(t0, _mm_set1_epi16(9786)); // 0.298631336 + let t1 = _mm_adds_epi16( + _mm_adds_epi16(t1, t1), + _mm_mulhrs_epi16(t1, _mm_set1_epi16(1741)), // 0.053119869 + ); + let t2 = _mm_adds_epi16( + _mm_adds_epi16(t2, _mm_adds_epi16(t2, t2)), + _mm_mulhrs_epi16(t2, _mm_set1_epi16(2383)), // 0.072711026 + ); + let t3 = _mm_adds_epi16(t3, _mm_mulhrs_epi16(t3, _mm_set1_epi16(16427))); // 0.501321110 + + let p1 = _mm_subs_epi16(p5, _mm_mulhrs_epi16(p1, _mm_set1_epi16(29490))); // 0.899976223 + let p2 = _mm_subs_epi16( + _mm_subs_epi16(_mm_subs_epi16(p5, p2), p2), + _mm_mulhrs_epi16(p2, _mm_set1_epi16(18446)), // 0.562915447 + ); + + let p3 = _mm_subs_epi16( + _mm_mulhrs_epi16(p3, _mm_set1_epi16(-31509)), // -0.961570560 + p3, + ); + let p4 = _mm_mulhrs_epi16(p4, _mm_set1_epi16(-12785)); // -0.390180644 + + let t3 = _mm_adds_epi16(_mm_adds_epi16(p1, p4), t3); + let t2 = _mm_adds_epi16(_mm_adds_epi16(p2, p3), t2); + let t1 = _mm_adds_epi16(_mm_adds_epi16(p2, p4), t1); + let t0 = _mm_adds_epi16(_mm_adds_epi16(p1, p3), t0); + + data[0] = _mm_adds_epi16(x0, t3); + data[7] = _mm_subs_epi16(x0, t3); + data[1] = _mm_adds_epi16(x1, t2); + data[6] = _mm_subs_epi16(x1, t2); + data[2] = _mm_adds_epi16(x2, t1); + data[5] = _mm_subs_epi16(x2, t1); + data[3] = _mm_adds_epi16(x3, t0); + data[4] = _mm_subs_epi16(x3, t0); +} + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[target_feature(enable = "ssse3")] +unsafe fn transpose8(data: &mut [__m128i; 8]) { + // Transpose a 8x8 matrix with a sequence of interleaving operations. + // Naming: dABl contains elements from the *l*ower halves of vectors A and B, interleaved, i.e. + // A0 B0 A1 B1 ... + // dABCDll contains elements from the lower quarter (ll) of vectors A, B, C, D, interleaved - + // A0 B0 C0 D0 A1 B1 C1 D1 ... + let d01l = _mm_unpacklo_epi16(data[0], data[1]); + let d23l = _mm_unpacklo_epi16(data[2], data[3]); + let d45l = _mm_unpacklo_epi16(data[4], data[5]); + let d67l = _mm_unpacklo_epi16(data[6], data[7]); + let d01h = _mm_unpackhi_epi16(data[0], data[1]); + let d23h = _mm_unpackhi_epi16(data[2], data[3]); + let d45h = _mm_unpackhi_epi16(data[4], data[5]); + let d67h = _mm_unpackhi_epi16(data[6], data[7]); + // Operating on 32-bits will interleave *consecutive pairs* of 16-bit integers. + let d0123ll = _mm_unpacklo_epi32(d01l, d23l); + let d0123lh = _mm_unpackhi_epi32(d01l, d23l); + let d4567ll = _mm_unpacklo_epi32(d45l, d67l); + let d4567lh = _mm_unpackhi_epi32(d45l, d67l); + let d0123hl = _mm_unpacklo_epi32(d01h, d23h); + let d0123hh = _mm_unpackhi_epi32(d01h, d23h); + let d4567hl = _mm_unpacklo_epi32(d45h, d67h); + let d4567hh = _mm_unpackhi_epi32(d45h, d67h); + // Operating on 64-bits will interleave *consecutive quadruples* of 16-bit integers. + data[0] = _mm_unpacklo_epi64(d0123ll, d4567ll); + data[1] = _mm_unpackhi_epi64(d0123ll, d4567ll); + data[2] = _mm_unpacklo_epi64(d0123lh, d4567lh); + data[3] = _mm_unpackhi_epi64(d0123lh, d4567lh); + data[4] = _mm_unpacklo_epi64(d0123hl, d4567hl); + data[5] = _mm_unpackhi_epi64(d0123hl, d4567hl); + data[6] = _mm_unpacklo_epi64(d0123hh, d4567hh); + data[7] = _mm_unpackhi_epi64(d0123hh, d4567hh); +} + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[target_feature(enable = "ssse3")] +pub unsafe fn dequantize_and_idct_block_8x8( + coefficients: &[i16; 64], + quantization_table: &[u16; 64], + output_linestride: usize, + output: &mut [u8], +) { + // The loop below will write to positions [output_linestride * i, output_linestride * i + 8) + // for 0<=i<8. Thus, the last accessed position is at an offset of output_linestrade * 7 + 7, + // and if that position is in-bounds, so are all other accesses. + assert!( + output.len() + > output_linestride + .checked_mul(7) + .unwrap() + .checked_add(7) + .unwrap() + ); + + #[cfg(target_arch = "x86")] + use std::arch::x86::*; + #[cfg(target_arch = "x86_64")] + use std::arch::x86_64::*; + + const SHIFT: i32 = 3; + + // Read the DCT coefficients, scale them up and dequantize them. + let mut data = [_mm_setzero_si128(); 8]; + for i in 0..8 { + data[i] = _mm_slli_epi16( + _mm_mullo_epi16( + _mm_loadu_si128(coefficients.as_ptr().wrapping_add(i * 8) as *const _), + _mm_loadu_si128(quantization_table.as_ptr().wrapping_add(i * 8) as *const _), + ), + SHIFT, + ); + } + + // Usual column IDCT - transpose - column IDCT - transpose approach. + idct8(&mut data); + transpose8(&mut data); + idct8(&mut data); + transpose8(&mut data); + + for i in 0..8 { + let mut buf = [0u8; 16]; + // The two passes of the IDCT algorithm give us a factor of 8, so the shift here is + // increased by 3. + // As values will be stored in a u8, they need to be 128-centered and not 0-centered. + // We add 128 with the appropriate shift for that purpose. + const OFFSET: i16 = 128 << (SHIFT + 3); + // We want rounding right shift, so we should add (1/2) << (SHIFT+3) before shifting. + const ROUNDING_BIAS: i16 = (1 << (SHIFT + 3)) >> 1; + + let data_with_offset = _mm_adds_epi16(data[i], _mm_set1_epi16(OFFSET + ROUNDING_BIAS)); + + _mm_storeu_si128( + buf.as_mut_ptr() as *mut _, + _mm_packus_epi16( + _mm_srai_epi16(data_with_offset, SHIFT + 3), + _mm_setzero_si128(), + ), + ); + std::ptr::copy_nonoverlapping::<u8>( + buf.as_ptr(), + output.as_mut_ptr().wrapping_add(output_linestride * i) as *mut _, + 8, + ); + } +} + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[target_feature(enable = "ssse3")] +pub unsafe fn color_convert_line_ycbcr(y: &[u8], cb: &[u8], cr: &[u8], output: &mut [u8]) -> usize { + assert!(output.len() % 3 == 0); + let num = output.len() / 3; + assert!(num <= y.len()); + assert!(num <= cb.len()); + assert!(num <= cr.len()); + // _mm_loadu_si64 generates incorrect code for Rust <1.58. To circumvent this, we use a full + // 128-bit load, but that requires leaving an extra vector of border to the scalar code. + // From Rust 1.58 on, the _mm_loadu_si128 can be replaced with _mm_loadu_si64 and this + // .saturating_sub() can be removed. + let num_vecs = (num / 8).saturating_sub(1); + + for i in 0..num_vecs { + const SHIFT: i32 = 6; + // Load. + let y = _mm_loadu_si128(y.as_ptr().wrapping_add(i * 8) as *const _); + let cb = _mm_loadu_si128(cb.as_ptr().wrapping_add(i * 8) as *const _); + let cr = _mm_loadu_si128(cr.as_ptr().wrapping_add(i * 8) as *const _); + + // Convert to 16 bit. + let shuf16 = _mm_setr_epi8( + 0, -0x7F, 1, -0x7F, 2, -0x7F, 3, -0x7F, 4, -0x7F, 5, -0x7F, 6, -0x7F, 7, -0x7F, + ); + let y = _mm_slli_epi16(_mm_shuffle_epi8(y, shuf16), SHIFT); + let cb = _mm_slli_epi16(_mm_shuffle_epi8(cb, shuf16), SHIFT); + let cr = _mm_slli_epi16(_mm_shuffle_epi8(cr, shuf16), SHIFT); + + // Add offsets + let c128 = _mm_set1_epi16(128 << SHIFT); + let y = _mm_adds_epi16(y, _mm_set1_epi16((1 << SHIFT) >> 1)); + let cb = _mm_subs_epi16(cb, c128); + let cr = _mm_subs_epi16(cr, c128); + + // Compute cr * 1.402, cb * 0.34414, cr * 0.71414, cb * 1.772 + let cr_140200 = _mm_adds_epi16(_mm_mulhrs_epi16(cr, _mm_set1_epi16(13173)), cr); + let cb_034414 = _mm_mulhrs_epi16(cb, _mm_set1_epi16(11276)); + let cr_071414 = _mm_mulhrs_epi16(cr, _mm_set1_epi16(23401)); + let cb_177200 = _mm_adds_epi16(_mm_mulhrs_epi16(cb, _mm_set1_epi16(25297)), cb); + + // Last conversion step. + let r = _mm_adds_epi16(y, cr_140200); + let g = _mm_subs_epi16(y, _mm_adds_epi16(cb_034414, cr_071414)); + let b = _mm_adds_epi16(y, cb_177200); + + // Shift back and convert to u8. + let zero = _mm_setzero_si128(); + let r = _mm_packus_epi16(_mm_srai_epi16(r, SHIFT), zero); + let g = _mm_packus_epi16(_mm_srai_epi16(g, SHIFT), zero); + let b = _mm_packus_epi16(_mm_srai_epi16(b, SHIFT), zero); + + // Shuffle rrrrrrrrggggggggbbbbbbbb to rgbrgbrgb... + + // Control vectors for _mm_shuffle_epi8. -0x7F is selected so that the resulting position + // after _mm_shuffle_epi8 will be filled with 0, so that the r, g, and b vectors can then + // be OR-ed together. + let shufr = _mm_setr_epi8( + 0, -0x7F, -0x7F, 1, -0x7F, -0x7F, 2, -0x7F, -0x7F, 3, -0x7F, -0x7F, 4, -0x7F, -0x7F, 5, + ); + let shufg = _mm_setr_epi8( + -0x7F, 0, -0x7F, -0x7F, 1, -0x7F, -0x7F, 2, -0x7F, -0x7F, 3, -0x7F, -0x7F, 4, -0x7F, + -0x7F, + ); + let shufb = _mm_alignr_epi8(shufg, shufg, 15); + + let rgb_low = _mm_or_si128( + _mm_shuffle_epi8(r, shufr), + _mm_or_si128(_mm_shuffle_epi8(g, shufg), _mm_shuffle_epi8(b, shufb)), + ); + + // For the next part of the rgb vectors, we need to select R values from 6 up, G and B from + // 5 up. The highest bit of -0x7F + 6 is still set, so the corresponding location will + // still be 0. + let shufr1 = _mm_add_epi8(shufb, _mm_set1_epi8(6)); + let shufg1 = _mm_add_epi8(shufr, _mm_set1_epi8(5)); + let shufb1 = _mm_add_epi8(shufg, _mm_set1_epi8(5)); + + let rgb_hi = _mm_or_si128( + _mm_shuffle_epi8(r, shufr1), + _mm_or_si128(_mm_shuffle_epi8(g, shufg1), _mm_shuffle_epi8(b, shufb1)), + ); + + let mut data = [0u8; 32]; + _mm_storeu_si128(data.as_mut_ptr() as *mut _, rgb_low); + _mm_storeu_si128(data.as_mut_ptr().wrapping_add(16) as *mut _, rgb_hi); + std::ptr::copy_nonoverlapping::<u8>( + data.as_ptr(), + output.as_mut_ptr().wrapping_add(24 * i), + 24, + ); + } + + num_vecs * 8 +} diff --git a/vendor/jpeg-decoder/src/decoder.rs b/vendor/jpeg-decoder/src/decoder.rs new file mode 100644 index 0000000..795ad1e --- /dev/null +++ b/vendor/jpeg-decoder/src/decoder.rs @@ -0,0 +1,1493 @@ +use crate::error::{Error, Result, UnsupportedFeature}; +use crate::huffman::{fill_default_mjpeg_tables, HuffmanDecoder, HuffmanTable}; +use crate::marker::Marker; +use crate::parser::{ + parse_app, parse_com, parse_dht, parse_dqt, parse_dri, parse_sof, parse_sos, + AdobeColorTransform, AppData, CodingProcess, Component, Dimensions, EntropyCoding, FrameInfo, + IccChunk, ScanInfo, +}; +use crate::read_u8; +use crate::upsampler::Upsampler; +use crate::worker::{compute_image_parallel, PreferWorkerKind, RowData, Worker, WorkerScope}; +use alloc::borrow::ToOwned; +use alloc::sync::Arc; +use alloc::vec::Vec; +use alloc::{format, vec}; +use core::cmp; +use core::mem; +use core::ops::Range; +use std::convert::TryInto; +use std::io::Read; + +pub const MAX_COMPONENTS: usize = 4; + +mod lossless; +use self::lossless::compute_image_lossless; + +#[cfg_attr(rustfmt, rustfmt_skip)] +static UNZIGZAG: [u8; 64] = [ + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63, +]; + +/// An enumeration over combinations of color spaces and bit depths a pixel can have. +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum PixelFormat { + /// Luminance (grayscale), 8 bits + L8, + /// Luminance (grayscale), 16 bits + L16, + /// RGB, 8 bits per channel + RGB24, + /// CMYK, 8 bits per channel + CMYK32, +} + +impl PixelFormat { + /// Determine the size in bytes of each pixel in this format + pub fn pixel_bytes(&self) -> usize { + match self { + PixelFormat::L8 => 1, + PixelFormat::L16 => 2, + PixelFormat::RGB24 => 3, + PixelFormat::CMYK32 => 4, + } + } +} + +/// Represents metadata of an image. +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct ImageInfo { + /// The width of the image, in pixels. + pub width: u16, + /// The height of the image, in pixels. + pub height: u16, + /// The pixel format of the image. + pub pixel_format: PixelFormat, + /// The coding process of the image. + pub coding_process: CodingProcess, +} + +/// Describes the colour transform to apply before binary data is returned +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +#[non_exhaustive] +pub enum ColorTransform { + /// No transform should be applied and the data is returned as-is. + None, + /// Unknown colour transformation + Unknown, + /// Grayscale transform should be applied (expects 1 channel) + Grayscale, + /// RGB transform should be applied. + RGB, + /// YCbCr transform should be applied. + YCbCr, + /// CMYK transform should be applied. + CMYK, + /// YCCK transform should be applied. + YCCK, + /// big gamut Y/Cb/Cr, bg-sYCC + JcsBgYcc, + /// big gamut red/green/blue, bg-sRGB + JcsBgRgb, +} + +/// JPEG decoder +pub struct Decoder<R> { + reader: R, + + frame: Option<FrameInfo>, + dc_huffman_tables: Vec<Option<HuffmanTable>>, + ac_huffman_tables: Vec<Option<HuffmanTable>>, + quantization_tables: [Option<Arc<[u16; 64]>>; 4], + + restart_interval: u16, + + adobe_color_transform: Option<AdobeColorTransform>, + color_transform: Option<ColorTransform>, + + is_jfif: bool, + is_mjpeg: bool, + + icc_markers: Vec<IccChunk>, + + exif_data: Option<Vec<u8>>, + + // Used for progressive JPEGs. + coefficients: Vec<Vec<i16>>, + // Bitmask of which coefficients has been completely decoded. + coefficients_finished: [u64; MAX_COMPONENTS], + + // Maximum allowed size of decoded image buffer + decoding_buffer_size_limit: usize, +} + +impl<R: Read> Decoder<R> { + /// Creates a new `Decoder` using the reader `reader`. + pub fn new(reader: R) -> Decoder<R> { + Decoder { + reader, + frame: None, + dc_huffman_tables: vec![None, None, None, None], + ac_huffman_tables: vec![None, None, None, None], + quantization_tables: [None, None, None, None], + restart_interval: 0, + adobe_color_transform: None, + color_transform: None, + is_jfif: false, + is_mjpeg: false, + icc_markers: Vec::new(), + exif_data: None, + coefficients: Vec::new(), + coefficients_finished: [0; MAX_COMPONENTS], + decoding_buffer_size_limit: usize::MAX, + } + } + + /// Colour transform to use when decoding the image. App segments relating to colour transforms + /// will be ignored. + pub fn set_color_transform(&mut self, transform: ColorTransform) { + self.color_transform = Some(transform); + } + + /// Set maximum buffer size allowed for decoded images + pub fn set_max_decoding_buffer_size(&mut self, max: usize) { + self.decoding_buffer_size_limit = max; + } + + /// Returns metadata about the image. + /// + /// The returned value will be `None` until a call to either `read_info` or `decode` has + /// returned `Ok`. + pub fn info(&self) -> Option<ImageInfo> { + match self.frame { + Some(ref frame) => { + let pixel_format = match frame.components.len() { + 1 => match frame.precision { + 8 => PixelFormat::L8, + 16 => PixelFormat::L16, + _ => panic!(), + }, + 3 => PixelFormat::RGB24, + 4 => PixelFormat::CMYK32, + _ => panic!(), + }; + + Some(ImageInfo { + width: frame.output_size.width, + height: frame.output_size.height, + pixel_format, + coding_process: frame.coding_process, + }) + } + None => None, + } + } + + /// Returns raw exif data, starting at the TIFF header, if the image contains any. + /// + /// The returned value will be `None` until a call to `decode` has returned `Ok`. + pub fn exif_data(&self) -> Option<&[u8]> { + self.exif_data.as_deref() + } + + /// Returns the embeded icc profile if the image contains one. + pub fn icc_profile(&self) -> Option<Vec<u8>> { + let mut marker_present: [Option<&IccChunk>; 256] = [None; 256]; + let num_markers = self.icc_markers.len(); + if num_markers == 0 || num_markers >= 255 { + return None; + } + // check the validity of the markers + for chunk in &self.icc_markers { + if usize::from(chunk.num_markers) != num_markers { + // all the lengths must match + return None; + } + if chunk.seq_no == 0 { + return None; + } + if marker_present[usize::from(chunk.seq_no)].is_some() { + // duplicate seq_no + return None; + } else { + marker_present[usize::from(chunk.seq_no)] = Some(chunk); + } + } + + // assemble them together by seq_no failing if any are missing + let mut data = Vec::new(); + // seq_no's start at 1 + for &chunk in marker_present.get(1..=num_markers)? { + data.extend_from_slice(&chunk?.data); + } + Some(data) + } + + /// Heuristic to avoid starting thread, synchronization if we expect a small amount of + /// parallelism to be utilized. + fn select_worker(frame: &FrameInfo, worker_preference: PreferWorkerKind) -> PreferWorkerKind { + const PARALLELISM_THRESHOLD: u64 = 128 * 128; + + match worker_preference { + PreferWorkerKind::Immediate => PreferWorkerKind::Immediate, + PreferWorkerKind::Multithreaded => { + let width: u64 = frame.output_size.width.into(); + let height: u64 = frame.output_size.width.into(); + if width * height > PARALLELISM_THRESHOLD { + PreferWorkerKind::Multithreaded + } else { + PreferWorkerKind::Immediate + } + } + } + } + + /// Tries to read metadata from the image without decoding it. + /// + /// If successful, the metadata can be obtained using the `info` method. + pub fn read_info(&mut self) -> Result<()> { + WorkerScope::with(|worker| self.decode_internal(true, worker)).map(|_| ()) + } + + /// Configure the decoder to scale the image during decoding. + /// + /// This efficiently scales the image by the smallest supported scale + /// factor that produces an image larger than or equal to the requested + /// size in at least one axis. The currently implemented scale factors + /// are 1/8, 1/4, 1/2 and 1. + /// + /// To generate a thumbnail of an exact size, pass the desired size and + /// then scale to the final size using a traditional resampling algorithm. + pub fn scale(&mut self, requested_width: u16, requested_height: u16) -> Result<(u16, u16)> { + self.read_info()?; + let frame = self.frame.as_mut().unwrap(); + let idct_size = crate::idct::choose_idct_size( + frame.image_size, + Dimensions { + width: requested_width, + height: requested_height, + }, + ); + frame.update_idct_size(idct_size)?; + Ok((frame.output_size.width, frame.output_size.height)) + } + + /// Decodes the image and returns the decoded pixels if successful. + pub fn decode(&mut self) -> Result<Vec<u8>> { + WorkerScope::with(|worker| self.decode_internal(false, worker)) + } + + fn decode_internal( + &mut self, + stop_after_metadata: bool, + worker_scope: &WorkerScope, + ) -> Result<Vec<u8>> { + if stop_after_metadata && self.frame.is_some() { + // The metadata has already been read. + return Ok(Vec::new()); + } else if self.frame.is_none() + && (read_u8(&mut self.reader)? != 0xFF + || Marker::from_u8(read_u8(&mut self.reader)?) != Some(Marker::SOI)) + { + return Err(Error::Format( + "first two bytes are not an SOI marker".to_owned(), + )); + } + + let mut previous_marker = Marker::SOI; + let mut pending_marker = None; + let mut scans_processed = 0; + let mut planes = vec![ + Vec::<u8>::new(); + self.frame + .as_ref() + .map_or(0, |frame| frame.components.len()) + ]; + let mut planes_u16 = vec![ + Vec::<u16>::new(); + self.frame + .as_ref() + .map_or(0, |frame| frame.components.len()) + ]; + + loop { + let marker = match pending_marker.take() { + Some(m) => m, + None => self.read_marker()?, + }; + + match marker { + // Frame header + Marker::SOF(..) => { + // Section 4.10 + // "An image contains only one frame in the cases of sequential and + // progressive coding processes; an image contains multiple frames for the + // hierarchical mode." + if self.frame.is_some() { + return Err(Error::Unsupported(UnsupportedFeature::Hierarchical)); + } + + let frame = parse_sof(&mut self.reader, marker)?; + let component_count = frame.components.len(); + + if frame.is_differential { + return Err(Error::Unsupported(UnsupportedFeature::Hierarchical)); + } + if frame.entropy_coding == EntropyCoding::Arithmetic { + return Err(Error::Unsupported( + UnsupportedFeature::ArithmeticEntropyCoding, + )); + } + if frame.precision != 8 && frame.coding_process != CodingProcess::Lossless { + return Err(Error::Unsupported(UnsupportedFeature::SamplePrecision( + frame.precision, + ))); + } + if frame.precision != 8 && frame.precision != 16 { + return Err(Error::Unsupported(UnsupportedFeature::SamplePrecision( + frame.precision, + ))); + } + if component_count != 1 && component_count != 3 && component_count != 4 { + return Err(Error::Unsupported(UnsupportedFeature::ComponentCount( + component_count as u8, + ))); + } + + // Make sure we support the subsampling ratios used. + let _ = Upsampler::new( + &frame.components, + frame.image_size.width, + frame.image_size.height, + )?; + + self.frame = Some(frame); + + if stop_after_metadata { + return Ok(Vec::new()); + } + + planes = vec![Vec::new(); component_count]; + planes_u16 = vec![Vec::new(); component_count]; + } + + // Scan header + Marker::SOS => { + if self.frame.is_none() { + return Err(Error::Format("scan encountered before frame".to_owned())); + } + + let frame = self.frame.clone().unwrap(); + let scan = parse_sos(&mut self.reader, &frame)?; + + if frame.coding_process == CodingProcess::DctProgressive + && self.coefficients.is_empty() + { + self.coefficients = frame + .components + .iter() + .map(|c| { + let block_count = + c.block_size.width as usize * c.block_size.height as usize; + vec![0; block_count * 64] + }) + .collect(); + } + + if frame.coding_process == CodingProcess::Lossless { + let (marker, data) = self.decode_scan_lossless(&frame, &scan)?; + + for (i, plane) in data + .into_iter() + .enumerate() + .filter(|&(_, ref plane)| !plane.is_empty()) + { + planes_u16[i] = plane; + } + pending_marker = marker; + } else { + // This was previously buggy, so let's explain the log here a bit. When a + // progressive frame is encoded then the coefficients (DC, AC) of each + // component (=color plane) can be split amongst scans. In particular it can + // happen or at least occurs in the wild that a scan contains coefficient 0 of + // all components. If now one but not all components had all other coefficients + // delivered in previous scans then such a scan contains all components but + // completes only some of them! (This is technically NOT permitted for all + // other coefficients as the standard dictates that scans with coefficients + // other than the 0th must only contain ONE component so we would either + // complete it or not. We may want to detect and error in case more component + // are part of a scan than allowed.) What a weird edge case. + // + // But this means we track precisely which components get completed here. + let mut finished = [false; MAX_COMPONENTS]; + + if scan.successive_approximation_low == 0 { + for (&i, component_finished) in + scan.component_indices.iter().zip(&mut finished) + { + if self.coefficients_finished[i] == !0 { + continue; + } + for j in scan.spectral_selection.clone() { + self.coefficients_finished[i] |= 1 << j; + } + if self.coefficients_finished[i] == !0 { + *component_finished = true; + } + } + } + + let preference = + Self::select_worker(&frame, PreferWorkerKind::Multithreaded); + + let (marker, data) = worker_scope + .get_or_init_worker(preference, |worker| { + self.decode_scan(&frame, &scan, worker, &finished) + })?; + + if let Some(data) = data { + for (i, plane) in data + .into_iter() + .enumerate() + .filter(|&(_, ref plane)| !plane.is_empty()) + { + if self.coefficients_finished[i] == !0 { + planes[i] = plane; + } + } + } + + pending_marker = marker; + } + + scans_processed += 1; + } + + // Table-specification and miscellaneous markers + // Quantization table-specification + Marker::DQT => { + let tables = parse_dqt(&mut self.reader)?; + + for (i, &table) in tables.iter().enumerate() { + if let Some(table) = table { + let mut unzigzagged_table = [0u16; 64]; + + for j in 0..64 { + unzigzagged_table[UNZIGZAG[j] as usize] = table[j]; + } + + self.quantization_tables[i] = Some(Arc::new(unzigzagged_table)); + } + } + } + // Huffman table-specification + Marker::DHT => { + let is_baseline = self.frame.as_ref().map(|frame| frame.is_baseline); + let (dc_tables, ac_tables) = parse_dht(&mut self.reader, is_baseline)?; + + let current_dc_tables = mem::take(&mut self.dc_huffman_tables); + self.dc_huffman_tables = dc_tables + .into_iter() + .zip(current_dc_tables.into_iter()) + .map(|(a, b)| a.or(b)) + .collect(); + + let current_ac_tables = mem::take(&mut self.ac_huffman_tables); + self.ac_huffman_tables = ac_tables + .into_iter() + .zip(current_ac_tables.into_iter()) + .map(|(a, b)| a.or(b)) + .collect(); + } + // Arithmetic conditioning table-specification + Marker::DAC => { + return Err(Error::Unsupported( + UnsupportedFeature::ArithmeticEntropyCoding, + )) + } + // Restart interval definition + Marker::DRI => self.restart_interval = parse_dri(&mut self.reader)?, + // Comment + Marker::COM => { + let _comment = parse_com(&mut self.reader)?; + } + // Application data + Marker::APP(..) => { + if let Some(data) = parse_app(&mut self.reader, marker)? { + match data { + AppData::Adobe(color_transform) => { + self.adobe_color_transform = Some(color_transform) + } + AppData::Jfif => { + // From the JFIF spec: + // "The APP0 marker is used to identify a JPEG FIF file. + // The JPEG FIF APP0 marker is mandatory right after the SOI marker." + // Some JPEGs in the wild does not follow this though, so we allow + // JFIF headers anywhere APP0 markers are allowed. + /* + if previous_marker != Marker::SOI { + return Err(Error::Format("the JFIF APP0 marker must come right after the SOI marker".to_owned())); + } + */ + + self.is_jfif = true; + } + AppData::Avi1 => self.is_mjpeg = true, + AppData::Icc(icc) => self.icc_markers.push(icc), + AppData::Exif(data) => self.exif_data = Some(data), + } + } + } + // Restart + Marker::RST(..) => { + // Some encoders emit a final RST marker after entropy-coded data, which + // decode_scan does not take care of. So if we encounter one, we ignore it. + if previous_marker != Marker::SOS { + return Err(Error::Format( + "RST found outside of entropy-coded data".to_owned(), + )); + } + } + + // Define number of lines + Marker::DNL => { + // Section B.2.1 + // "If a DNL segment (see B.2.5) is present, it shall immediately follow the first scan." + if previous_marker != Marker::SOS || scans_processed != 1 { + return Err(Error::Format( + "DNL is only allowed immediately after the first scan".to_owned(), + )); + } + + return Err(Error::Unsupported(UnsupportedFeature::DNL)); + } + + // Hierarchical mode markers + Marker::DHP | Marker::EXP => { + return Err(Error::Unsupported(UnsupportedFeature::Hierarchical)) + } + + // End of image + Marker::EOI => break, + + _ => { + return Err(Error::Format(format!( + "{:?} marker found where not allowed", + marker + ))) + } + } + + previous_marker = marker; + } + + if self.frame.is_none() { + return Err(Error::Format( + "end of image encountered before frame".to_owned(), + )); + } + + let frame = self.frame.as_ref().unwrap(); + let preference = Self::select_worker(&frame, PreferWorkerKind::Multithreaded); + + worker_scope.get_or_init_worker(preference, |worker| { + self.decode_planes(worker, planes, planes_u16) + }) + } + + fn decode_planes( + &mut self, + worker: &mut dyn Worker, + mut planes: Vec<Vec<u8>>, + planes_u16: Vec<Vec<u16>>, + ) -> Result<Vec<u8>> { + if self.frame.is_none() { + return Err(Error::Format( + "end of image encountered before frame".to_owned(), + )); + } + + let frame = self.frame.as_ref().unwrap(); + + if { + let required_mem = frame + .components + .len() + .checked_mul(frame.output_size.width.into()) + .and_then(|m| m.checked_mul(frame.output_size.height.into())); + required_mem.map_or(true, |m| self.decoding_buffer_size_limit < m) + } { + return Err(Error::Format( + "size of decoded image exceeds maximum allowed size".to_owned(), + )); + } + + // If we're decoding a progressive jpeg and a component is unfinished, render what we've got + if frame.coding_process == CodingProcess::DctProgressive + && self.coefficients.len() == frame.components.len() + { + for (i, component) in frame.components.iter().enumerate() { + // Only dealing with unfinished components + if self.coefficients_finished[i] == !0 { + continue; + } + + let quantization_table = + match self.quantization_tables[component.quantization_table_index].clone() { + Some(quantization_table) => quantization_table, + None => continue, + }; + + // Get the worker prepared + let row_data = RowData { + index: i, + component: component.clone(), + quantization_table, + }; + worker.start(row_data)?; + + // Send the rows over to the worker and collect the result + let coefficients_per_mcu_row = usize::from(component.block_size.width) + * usize::from(component.vertical_sampling_factor) + * 64; + + let mut tasks = (0..frame.mcu_size.height).map(|mcu_y| { + let offset = usize::from(mcu_y) * coefficients_per_mcu_row; + let row_coefficients = + self.coefficients[i][offset..offset + coefficients_per_mcu_row].to_vec(); + (i, row_coefficients) + }); + + // FIXME: additional potential work stealing opportunities for rayon case if we + // also internally can parallelize over components. + worker.append_rows(&mut tasks)?; + planes[i] = worker.get_result(i)?; + } + } + + if frame.coding_process == CodingProcess::Lossless { + compute_image_lossless(frame, planes_u16) + } else { + compute_image( + &frame.components, + planes, + frame.output_size, + self.determine_color_transform(), + ) + } + } + + fn determine_color_transform(&self) -> ColorTransform { + if let Some(color_transform) = self.color_transform { + return color_transform; + } + + let frame = self.frame.as_ref().unwrap(); + + if frame.components.len() == 1 { + return ColorTransform::Grayscale; + } + + // Using logic for determining colour as described here: https://entropymine.wordpress.com/2018/10/22/how-is-a-jpeg-images-color-type-determined/ + + if frame.components.len() == 3 { + match ( + frame.components[0].identifier, + frame.components[1].identifier, + frame.components[2].identifier, + ) { + (1, 2, 3) => { + return ColorTransform::YCbCr; + } + (1, 34, 35) => { + return ColorTransform::JcsBgYcc; + } + (82, 71, 66) => { + return ColorTransform::RGB; + } + (114, 103, 98) => { + return ColorTransform::JcsBgRgb; + } + _ => {} + } + + if self.is_jfif { + return ColorTransform::YCbCr; + } + } + + if let Some(colour_transform) = self.adobe_color_transform { + match colour_transform { + AdobeColorTransform::Unknown => { + if frame.components.len() == 3 { + return ColorTransform::RGB; + } else if frame.components.len() == 4 { + return ColorTransform::CMYK; + } + } + AdobeColorTransform::YCbCr => { + return ColorTransform::YCbCr; + } + AdobeColorTransform::YCCK => { + return ColorTransform::YCCK; + } + } + } else if frame.components.len() == 4 { + return ColorTransform::CMYK; + } + + if frame.components.len() == 4 { + ColorTransform::YCCK + } else if frame.components.len() == 3 { + ColorTransform::YCbCr + } else { + ColorTransform::Unknown + } + } + + fn read_marker(&mut self) -> Result<Marker> { + loop { + // This should be an error as the JPEG spec doesn't allow extraneous data between marker segments. + // libjpeg allows this though and there are images in the wild utilising it, so we are + // forced to support this behavior. + // Sony Ericsson P990i is an example of a device which produce this sort of JPEGs. + while read_u8(&mut self.reader)? != 0xFF {} + + // Section B.1.1.2 + // All markers are assigned two-byte codes: an X’FF’ byte followed by a + // byte which is not equal to 0 or X’FF’ (see Table B.1). Any marker may + // optionally be preceded by any number of fill bytes, which are bytes + // assigned code X’FF’. + let mut byte = read_u8(&mut self.reader)?; + + // Section B.1.1.2 + // "Any marker may optionally be preceded by any number of fill bytes, which are bytes assigned code X’FF’." + while byte == 0xFF { + byte = read_u8(&mut self.reader)?; + } + + if byte != 0x00 && byte != 0xFF { + return Ok(Marker::from_u8(byte).unwrap()); + } + } + } + + fn decode_scan( + &mut self, + frame: &FrameInfo, + scan: &ScanInfo, + worker: &mut dyn Worker, + finished: &[bool; MAX_COMPONENTS], + ) -> Result<(Option<Marker>, Option<Vec<Vec<u8>>>)> { + assert!(scan.component_indices.len() <= MAX_COMPONENTS); + + let components: Vec<Component> = scan + .component_indices + .iter() + .map(|&i| frame.components[i].clone()) + .collect(); + + // Verify that all required quantization tables has been set. + if components + .iter() + .any(|component| self.quantization_tables[component.quantization_table_index].is_none()) + { + return Err(Error::Format("use of unset quantization table".to_owned())); + } + + if self.is_mjpeg { + fill_default_mjpeg_tables( + scan, + &mut self.dc_huffman_tables, + &mut self.ac_huffman_tables, + ); + } + + // Verify that all required huffman tables has been set. + if scan.spectral_selection.start == 0 + && scan + .dc_table_indices + .iter() + .any(|&i| self.dc_huffman_tables[i].is_none()) + { + return Err(Error::Format( + "scan makes use of unset dc huffman table".to_owned(), + )); + } + if scan.spectral_selection.end > 1 + && scan + .ac_table_indices + .iter() + .any(|&i| self.ac_huffman_tables[i].is_none()) + { + return Err(Error::Format( + "scan makes use of unset ac huffman table".to_owned(), + )); + } + + // Prepare the worker thread for the work to come. + for (i, component) in components.iter().enumerate() { + if finished[i] { + let row_data = RowData { + index: i, + component: component.clone(), + quantization_table: self.quantization_tables + [component.quantization_table_index] + .clone() + .unwrap(), + }; + + worker.start(row_data)?; + } + } + + let is_progressive = frame.coding_process == CodingProcess::DctProgressive; + let is_interleaved = components.len() > 1; + let mut dummy_block = [0i16; 64]; + let mut huffman = HuffmanDecoder::new(); + let mut dc_predictors = [0i16; MAX_COMPONENTS]; + let mut mcus_left_until_restart = self.restart_interval; + let mut expected_rst_num = 0; + let mut eob_run = 0; + let mut mcu_row_coefficients = vec![vec![]; components.len()]; + + if !is_progressive { + for (i, component) in components.iter().enumerate().filter(|&(i, _)| finished[i]) { + let coefficients_per_mcu_row = component.block_size.width as usize + * component.vertical_sampling_factor as usize + * 64; + mcu_row_coefficients[i] = vec![0i16; coefficients_per_mcu_row]; + } + } + + // 4.8.2 + // When reading from the stream, if the data is non-interleaved then an MCU consists of + // exactly one block (effectively a 1x1 sample). + let (mcu_horizontal_samples, mcu_vertical_samples) = if is_interleaved { + let horizontal = components + .iter() + .map(|component| component.horizontal_sampling_factor as u16) + .collect::<Vec<_>>(); + let vertical = components + .iter() + .map(|component| component.vertical_sampling_factor as u16) + .collect::<Vec<_>>(); + (horizontal, vertical) + } else { + (vec![1], vec![1]) + }; + + // This also affects how many MCU values we read from stream. If it's a non-interleaved stream, + // the MCUs will be exactly the block count. + let (max_mcu_x, max_mcu_y) = if is_interleaved { + (frame.mcu_size.width, frame.mcu_size.height) + } else { + ( + components[0].block_size.width, + components[0].block_size.height, + ) + }; + + for mcu_y in 0..max_mcu_y { + if mcu_y * 8 >= frame.image_size.height { + break; + } + + for mcu_x in 0..max_mcu_x { + if mcu_x * 8 >= frame.image_size.width { + break; + } + + if self.restart_interval > 0 { + if mcus_left_until_restart == 0 { + match huffman.take_marker(&mut self.reader)? { + Some(Marker::RST(n)) => { + if n != expected_rst_num { + return Err(Error::Format(format!( + "found RST{} where RST{} was expected", + n, expected_rst_num + ))); + } + + huffman.reset(); + // Section F.2.1.3.1 + dc_predictors = [0i16; MAX_COMPONENTS]; + // Section G.1.2.2 + eob_run = 0; + + expected_rst_num = (expected_rst_num + 1) % 8; + mcus_left_until_restart = self.restart_interval; + } + Some(marker) => { + return Err(Error::Format(format!( + "found marker {:?} inside scan where RST{} was expected", + marker, expected_rst_num + ))) + } + None => { + return Err(Error::Format(format!( + "no marker found where RST{} was expected", + expected_rst_num + ))) + } + } + } + + mcus_left_until_restart -= 1; + } + + for (i, component) in components.iter().enumerate() { + for v_pos in 0..mcu_vertical_samples[i] { + for h_pos in 0..mcu_horizontal_samples[i] { + let coefficients = if is_progressive { + let block_y = (mcu_y * mcu_vertical_samples[i] + v_pos) as usize; + let block_x = (mcu_x * mcu_horizontal_samples[i] + h_pos) as usize; + let block_offset = + (block_y * component.block_size.width as usize + block_x) * 64; + &mut self.coefficients[scan.component_indices[i]] + [block_offset..block_offset + 64] + } else if finished[i] { + // Because the worker thread operates in batches as if we were always interleaved, we + // need to distinguish between a single-shot buffer and one that's currently in process + // (for a non-interleaved) stream + let mcu_batch_current_row = if is_interleaved { + 0 + } else { + mcu_y % component.vertical_sampling_factor as u16 + }; + + let block_y = (mcu_batch_current_row * mcu_vertical_samples[i] + + v_pos) as usize; + let block_x = (mcu_x * mcu_horizontal_samples[i] + h_pos) as usize; + let block_offset = + (block_y * component.block_size.width as usize + block_x) * 64; + &mut mcu_row_coefficients[i][block_offset..block_offset + 64] + } else { + &mut dummy_block[..64] + } + .try_into() + .unwrap(); + + if scan.successive_approximation_high == 0 { + decode_block( + &mut self.reader, + coefficients, + &mut huffman, + self.dc_huffman_tables[scan.dc_table_indices[i]].as_ref(), + self.ac_huffman_tables[scan.ac_table_indices[i]].as_ref(), + scan.spectral_selection.clone(), + scan.successive_approximation_low, + &mut eob_run, + &mut dc_predictors[i], + )?; + } else { + decode_block_successive_approximation( + &mut self.reader, + coefficients, + &mut huffman, + self.ac_huffman_tables[scan.ac_table_indices[i]].as_ref(), + scan.spectral_selection.clone(), + scan.successive_approximation_low, + &mut eob_run, + )?; + } + } + } + } + } + + // Send the coefficients from this MCU row to the worker thread for dequantization and idct. + for (i, component) in components.iter().enumerate() { + if finished[i] { + // In the event of non-interleaved streams, if we're still building the buffer out, + // keep going; don't send it yet. We also need to ensure we don't skip over the last + // row(s) of the image. + if !is_interleaved && (mcu_y + 1) * 8 < frame.image_size.height { + if (mcu_y + 1) % component.vertical_sampling_factor as u16 > 0 { + continue; + } + } + + let coefficients_per_mcu_row = component.block_size.width as usize + * component.vertical_sampling_factor as usize + * 64; + + let row_coefficients = if is_progressive { + // Because non-interleaved streams will have multiple MCU rows concatenated together, + // the row for calculating the offset is different. + let worker_mcu_y = if is_interleaved { + mcu_y + } else { + // Explicitly doing floor-division here + mcu_y / component.vertical_sampling_factor as u16 + }; + + let offset = worker_mcu_y as usize * coefficients_per_mcu_row; + self.coefficients[scan.component_indices[i]] + [offset..offset + coefficients_per_mcu_row] + .to_vec() + } else { + mem::replace( + &mut mcu_row_coefficients[i], + vec![0i16; coefficients_per_mcu_row], + ) + }; + + // FIXME: additional potential work stealing opportunities for rayon case if we + // also internally can parallelize over components. + worker.append_row((i, row_coefficients))?; + } + } + } + + let mut marker = huffman.take_marker(&mut self.reader)?; + while let Some(Marker::RST(_)) = marker { + marker = self.read_marker().ok(); + } + + if finished.iter().any(|&c| c) { + // Retrieve all the data from the worker thread. + let mut data = vec![Vec::new(); frame.components.len()]; + + for (i, &component_index) in scan.component_indices.iter().enumerate() { + if finished[i] { + data[component_index] = worker.get_result(i)?; + } + } + + Ok((marker, Some(data))) + } else { + Ok((marker, None)) + } + } +} + +fn decode_block<R: Read>( + reader: &mut R, + coefficients: &mut [i16; 64], + huffman: &mut HuffmanDecoder, + dc_table: Option<&HuffmanTable>, + ac_table: Option<&HuffmanTable>, + spectral_selection: Range<u8>, + successive_approximation_low: u8, + eob_run: &mut u16, + dc_predictor: &mut i16, +) -> Result<()> { + debug_assert_eq!(coefficients.len(), 64); + + if spectral_selection.start == 0 { + // Section F.2.2.1 + // Figure F.12 + let value = huffman.decode(reader, dc_table.unwrap())?; + let diff = match value { + 0 => 0, + 1..=11 => huffman.receive_extend(reader, value)?, + _ => { + // Section F.1.2.1.1 + // Table F.1 + return Err(Error::Format( + "invalid DC difference magnitude category".to_owned(), + )); + } + }; + + // Malicious JPEG files can cause this add to overflow, therefore we use wrapping_add. + // One example of such a file is tests/crashtest/images/dc-predictor-overflow.jpg + *dc_predictor = dc_predictor.wrapping_add(diff); + coefficients[0] = *dc_predictor << successive_approximation_low; + } + + let mut index = cmp::max(spectral_selection.start, 1); + + if index < spectral_selection.end && *eob_run > 0 { + *eob_run -= 1; + return Ok(()); + } + + // Section F.1.2.2.1 + while index < spectral_selection.end { + if let Some((value, run)) = huffman.decode_fast_ac(reader, ac_table.unwrap())? { + index += run; + + if index >= spectral_selection.end { + break; + } + + coefficients[UNZIGZAG[index as usize] as usize] = value << successive_approximation_low; + index += 1; + } else { + let byte = huffman.decode(reader, ac_table.unwrap())?; + let r = byte >> 4; + let s = byte & 0x0f; + + if s == 0 { + match r { + 15 => index += 16, // Run length of 16 zero coefficients. + _ => { + *eob_run = (1 << r) - 1; + + if r > 0 { + *eob_run += huffman.get_bits(reader, r)?; + } + + break; + } + } + } else { + index += r; + + if index >= spectral_selection.end { + break; + } + + coefficients[UNZIGZAG[index as usize] as usize] = + huffman.receive_extend(reader, s)? << successive_approximation_low; + index += 1; + } + } + } + + Ok(()) +} + +fn decode_block_successive_approximation<R: Read>( + reader: &mut R, + coefficients: &mut [i16; 64], + huffman: &mut HuffmanDecoder, + ac_table: Option<&HuffmanTable>, + spectral_selection: Range<u8>, + successive_approximation_low: u8, + eob_run: &mut u16, +) -> Result<()> { + debug_assert_eq!(coefficients.len(), 64); + + let bit = 1 << successive_approximation_low; + + if spectral_selection.start == 0 { + // Section G.1.2.1 + + if huffman.get_bits(reader, 1)? == 1 { + coefficients[0] |= bit; + } + } else { + // Section G.1.2.3 + + if *eob_run > 0 { + *eob_run -= 1; + refine_non_zeroes(reader, coefficients, huffman, spectral_selection, 64, bit)?; + return Ok(()); + } + + let mut index = spectral_selection.start; + + while index < spectral_selection.end { + let byte = huffman.decode(reader, ac_table.unwrap())?; + let r = byte >> 4; + let s = byte & 0x0f; + + let mut zero_run_length = r; + let mut value = 0; + + match s { + 0 => { + match r { + 15 => { + // Run length of 16 zero coefficients. + // We don't need to do anything special here, zero_run_length is 15 + // and then value (which is zero) gets written, resulting in 16 + // zero coefficients. + } + _ => { + *eob_run = (1 << r) - 1; + + if r > 0 { + *eob_run += huffman.get_bits(reader, r)?; + } + + // Force end of block. + zero_run_length = 64; + } + } + } + 1 => { + if huffman.get_bits(reader, 1)? == 1 { + value = bit; + } else { + value = -bit; + } + } + _ => return Err(Error::Format("unexpected huffman code".to_owned())), + } + + let range = Range { + start: index, + end: spectral_selection.end, + }; + index = refine_non_zeroes(reader, coefficients, huffman, range, zero_run_length, bit)?; + + if value != 0 { + coefficients[UNZIGZAG[index as usize] as usize] = value; + } + + index += 1; + } + } + + Ok(()) +} + +fn refine_non_zeroes<R: Read>( + reader: &mut R, + coefficients: &mut [i16; 64], + huffman: &mut HuffmanDecoder, + range: Range<u8>, + zrl: u8, + bit: i16, +) -> Result<u8> { + debug_assert_eq!(coefficients.len(), 64); + + let last = range.end - 1; + let mut zero_run_length = zrl; + + for i in range { + let index = UNZIGZAG[i as usize] as usize; + + let coefficient = &mut coefficients[index]; + + if *coefficient == 0 { + if zero_run_length == 0 { + return Ok(i); + } + + zero_run_length -= 1; + } else if huffman.get_bits(reader, 1)? == 1 && *coefficient & bit == 0 { + if *coefficient > 0 { + *coefficient = coefficient + .checked_add(bit) + .ok_or_else(|| Error::Format("Coefficient overflow".to_owned()))?; + } else { + *coefficient = coefficient + .checked_sub(bit) + .ok_or_else(|| Error::Format("Coefficient overflow".to_owned()))?; + } + } + } + + Ok(last) +} + +fn compute_image( + components: &[Component], + mut data: Vec<Vec<u8>>, + output_size: Dimensions, + color_transform: ColorTransform, +) -> Result<Vec<u8>> { + if data.is_empty() || data.iter().any(Vec::is_empty) { + return Err(Error::Format("not all components have data".to_owned())); + } + + if components.len() == 1 { + let component = &components[0]; + let mut decoded: Vec<u8> = data.remove(0); + + let width = component.size.width as usize; + let height = component.size.height as usize; + let size = width * height; + let line_stride = component.block_size.width as usize * component.dct_scale; + + // if the image width is a multiple of the block size, + // then we don't have to move bytes in the decoded data + if usize::from(output_size.width) != line_stride { + // The first line already starts at index 0, so we need to move only lines 1..height + // We move from the top down because all lines are being moved backwards. + for y in 1..height { + let destination_idx = y * width; + let source_idx = y * line_stride; + let end = source_idx + width; + decoded.copy_within(source_idx..end, destination_idx); + } + } + decoded.resize(size, 0); + Ok(decoded) + } else { + compute_image_parallel(components, data, output_size, color_transform) + } +} + +pub(crate) fn choose_color_convert_func( + component_count: usize, + color_transform: ColorTransform, +) -> Result<fn(&[Vec<u8>], &mut [u8])> { + match component_count { + 3 => match color_transform { + ColorTransform::None => Ok(color_no_convert), + ColorTransform::Grayscale => Err(Error::Format( + "Invalid number of channels (3) for Grayscale data".to_string(), + )), + ColorTransform::RGB => Ok(color_convert_line_rgb), + ColorTransform::YCbCr => Ok(color_convert_line_ycbcr), + ColorTransform::CMYK => Err(Error::Format( + "Invalid number of channels (3) for CMYK data".to_string(), + )), + ColorTransform::YCCK => Err(Error::Format( + "Invalid number of channels (3) for YCCK data".to_string(), + )), + ColorTransform::JcsBgYcc => Err(Error::Unsupported( + UnsupportedFeature::ColorTransform(ColorTransform::JcsBgYcc), + )), + ColorTransform::JcsBgRgb => Err(Error::Unsupported( + UnsupportedFeature::ColorTransform(ColorTransform::JcsBgRgb), + )), + ColorTransform::Unknown => Err(Error::Format("Unknown colour transform".to_string())), + }, + 4 => match color_transform { + ColorTransform::None => Ok(color_no_convert), + ColorTransform::Grayscale => Err(Error::Format( + "Invalid number of channels (4) for Grayscale data".to_string(), + )), + ColorTransform::RGB => Err(Error::Format( + "Invalid number of channels (4) for RGB data".to_string(), + )), + ColorTransform::YCbCr => Err(Error::Format( + "Invalid number of channels (4) for YCbCr data".to_string(), + )), + ColorTransform::CMYK => Ok(color_convert_line_cmyk), + ColorTransform::YCCK => Ok(color_convert_line_ycck), + + ColorTransform::JcsBgYcc => Err(Error::Unsupported( + UnsupportedFeature::ColorTransform(ColorTransform::JcsBgYcc), + )), + ColorTransform::JcsBgRgb => Err(Error::Unsupported( + UnsupportedFeature::ColorTransform(ColorTransform::JcsBgRgb), + )), + ColorTransform::Unknown => Err(Error::Format("Unknown colour transform".to_string())), + }, + _ => panic!(), + } +} + +fn color_convert_line_rgb(data: &[Vec<u8>], output: &mut [u8]) { + assert!(data.len() == 3, "wrong number of components for rgb"); + let [r, g, b]: &[Vec<u8>; 3] = data.try_into().unwrap(); + for (((chunk, r), g), b) in output + .chunks_exact_mut(3) + .zip(r.iter()) + .zip(g.iter()) + .zip(b.iter()) + { + chunk[0] = *r; + chunk[1] = *g; + chunk[2] = *b; + } +} + +fn color_convert_line_ycbcr(data: &[Vec<u8>], output: &mut [u8]) { + assert!(data.len() == 3, "wrong number of components for ycbcr"); + let [y, cb, cr]: &[_; 3] = data.try_into().unwrap(); + + #[cfg(not(feature = "platform_independent"))] + let arch_specific_pixels = { + if let Some(ycbcr) = crate::arch::get_color_convert_line_ycbcr() { + #[allow(unsafe_code)] + unsafe { + ycbcr(y, cb, cr, output) + } + } else { + 0 + } + }; + + #[cfg(feature = "platform_independent")] + let arch_specific_pixels = 0; + + for (((chunk, y), cb), cr) in output + .chunks_exact_mut(3) + .zip(y.iter()) + .zip(cb.iter()) + .zip(cr.iter()) + .skip(arch_specific_pixels) + { + let (r, g, b) = ycbcr_to_rgb(*y, *cb, *cr); + chunk[0] = r; + chunk[1] = g; + chunk[2] = b; + } +} + +fn color_convert_line_ycck(data: &[Vec<u8>], output: &mut [u8]) { + assert!(data.len() == 4, "wrong number of components for ycck"); + let [c, m, y, k]: &[Vec<u8>; 4] = data.try_into().unwrap(); + + for ((((chunk, c), m), y), k) in output + .chunks_exact_mut(4) + .zip(c.iter()) + .zip(m.iter()) + .zip(y.iter()) + .zip(k.iter()) + { + let (r, g, b) = ycbcr_to_rgb(*c, *m, *y); + chunk[0] = r; + chunk[1] = g; + chunk[2] = b; + chunk[3] = 255 - *k; + } +} + +fn color_convert_line_cmyk(data: &[Vec<u8>], output: &mut [u8]) { + assert!(data.len() == 4, "wrong number of components for cmyk"); + let [c, m, y, k]: &[Vec<u8>; 4] = data.try_into().unwrap(); + + for ((((chunk, c), m), y), k) in output + .chunks_exact_mut(4) + .zip(c.iter()) + .zip(m.iter()) + .zip(y.iter()) + .zip(k.iter()) + { + chunk[0] = 255 - c; + chunk[1] = 255 - m; + chunk[2] = 255 - y; + chunk[3] = 255 - k; + } +} + +fn color_no_convert(data: &[Vec<u8>], output: &mut [u8]) { + let mut output_iter = output.iter_mut(); + + for pixel in data { + for d in pixel { + *(output_iter.next().unwrap()) = *d; + } + } +} + +const FIXED_POINT_OFFSET: i32 = 20; +const HALF: i32 = (1 << FIXED_POINT_OFFSET) / 2; + +// ITU-R BT.601 +// Based on libjpeg-turbo's jdcolext.c +fn ycbcr_to_rgb(y: u8, cb: u8, cr: u8) -> (u8, u8, u8) { + let y = y as i32 * (1 << FIXED_POINT_OFFSET) + HALF; + let cb = cb as i32 - 128; + let cr = cr as i32 - 128; + + let r = clamp_fixed_point(y + stbi_f2f(1.40200) * cr); + let g = clamp_fixed_point(y - stbi_f2f(0.34414) * cb - stbi_f2f(0.71414) * cr); + let b = clamp_fixed_point(y + stbi_f2f(1.77200) * cb); + (r, g, b) +} + +fn stbi_f2f(x: f32) -> i32 { + (x * ((1 << FIXED_POINT_OFFSET) as f32) + 0.5) as i32 +} + +fn clamp_fixed_point(value: i32) -> u8 { + (value >> FIXED_POINT_OFFSET).min(255).max(0) as u8 +} diff --git a/vendor/jpeg-decoder/src/decoder/lossless.rs b/vendor/jpeg-decoder/src/decoder/lossless.rs new file mode 100644 index 0000000..6422220 --- /dev/null +++ b/vendor/jpeg-decoder/src/decoder/lossless.rs @@ -0,0 +1,259 @@ +use std::io::Read; +use crate::decoder::{Decoder, MAX_COMPONENTS}; +use crate::error::{Error, Result}; +use crate::huffman::HuffmanDecoder; +use crate::marker::Marker; +use crate::parser::Predictor; +use crate::parser::{Component, FrameInfo, ScanInfo}; + +impl<R: Read> Decoder<R> { + /// decode_scan_lossless + pub fn decode_scan_lossless( + &mut self, + frame: &FrameInfo, + scan: &ScanInfo, + ) -> Result<(Option<Marker>, Vec<Vec<u16>>)> { + let ncomp = scan.component_indices.len(); + let npixel = frame.image_size.height as usize * frame.image_size.width as usize; + assert!(ncomp <= MAX_COMPONENTS); + let mut results = vec![vec![0u16; npixel]; ncomp]; + + let components: Vec<Component> = scan + .component_indices + .iter() + .map(|&i| frame.components[i].clone()) + .collect(); + + // Verify that all required huffman tables has been set. + if scan + .dc_table_indices + .iter() + .any(|&i| self.dc_huffman_tables[i].is_none()) + { + return Err(Error::Format( + "scan makes use of unset dc huffman table".to_owned(), + )); + } + + let mut huffman = HuffmanDecoder::new(); + let reader = &mut self.reader; + let mut mcus_left_until_restart = self.restart_interval; + let mut expected_rst_num = 0; + let mut ra = [0u16; MAX_COMPONENTS]; + let mut rb = [0u16; MAX_COMPONENTS]; + let mut rc = [0u16; MAX_COMPONENTS]; + + let width = frame.image_size.width as usize; + let height = frame.image_size.height as usize; + + let mut differences = vec![Vec::with_capacity(npixel); ncomp]; + for _mcu_y in 0..height { + for _mcu_x in 0..width { + if self.restart_interval > 0 { + if mcus_left_until_restart == 0 { + match huffman.take_marker(reader)? { + Some(Marker::RST(n)) => { + if n != expected_rst_num { + return Err(Error::Format(format!( + "found RST{} where RST{} was expected", + n, expected_rst_num + ))); + } + + huffman.reset(); + + expected_rst_num = (expected_rst_num + 1) % 8; + mcus_left_until_restart = self.restart_interval; + } + Some(marker) => { + return Err(Error::Format(format!( + "found marker {:?} inside scan where RST{} was expected", + marker, expected_rst_num + ))) + } + None => { + return Err(Error::Format(format!( + "no marker found where RST{} was expected", + expected_rst_num + ))) + } + } + } + + mcus_left_until_restart -= 1; + } + + for (i, _component) in components.iter().enumerate() { + let dc_table = self.dc_huffman_tables[scan.dc_table_indices[i]] + .as_ref() + .unwrap(); + let value = huffman.decode(reader, dc_table)?; + let diff = match value { + 0 => 0, + 1..=15 => huffman.receive_extend(reader, value)? as i32, + 16 => 32768, + _ => { + // Section F.1.2.1.1 + // Table F.1 + return Err(Error::Format( + "invalid DC difference magnitude category".to_owned(), + )); + } + }; + differences[i].push(diff); + } + } + } + + if scan.predictor_selection == Predictor::Ra { + for (i, _component) in components.iter().enumerate() { + // calculate the top left pixel + let diff = differences[i][0]; + let prediction = 1 << (frame.precision - scan.point_transform - 1) as i32; + let result = ((prediction + diff) & 0xFFFF) as u16; // modulo 2^16 + let result = result << scan.point_transform; + results[i][0] = result; + + // calculate leftmost column, using top pixel as predictor + let mut previous = result; + for mcu_y in 1..height { + let diff = differences[i][mcu_y * width]; + let prediction = previous as i32; + let result = ((prediction + diff) & 0xFFFF) as u16; // modulo 2^16 + let result = result << scan.point_transform; + results[i][mcu_y * width] = result; + previous = result; + } + + // calculate rows, using left pixel as predictor + for mcu_y in 0..height { + for mcu_x in 1..width { + let diff = differences[i][mcu_y * width + mcu_x]; + let prediction = results[i][mcu_y * width + mcu_x - 1] as i32; + let result = ((prediction + diff) & 0xFFFF) as u16; // modulo 2^16 + let result = result << scan.point_transform; + results[i][mcu_y * width + mcu_x] = result; + } + } + } + } else { + for mcu_y in 0..height { + for mcu_x in 0..width { + for (i, _component) in components.iter().enumerate() { + let diff = differences[i][mcu_y * width + mcu_x]; + + // The following lines could be further optimized, e.g. moving the checks + // and updates of the previous values into the prediction function or + // iterating such that diagonals with mcu_x + mcu_y = const are computed at + // the same time to exploit independent predictions in this case + if mcu_x > 0 { + ra[i] = results[i][mcu_y * frame.image_size.width as usize + mcu_x - 1]; + } + if mcu_y > 0 { + rb[i] = + results[i][(mcu_y - 1) * frame.image_size.width as usize + mcu_x]; + if mcu_x > 0 { + rc[i] = results[i] + [(mcu_y - 1) * frame.image_size.width as usize + (mcu_x - 1)]; + } + } + let prediction = predict( + ra[i] as i32, + rb[i] as i32, + rc[i] as i32, + scan.predictor_selection, + scan.point_transform, + frame.precision, + mcu_x, + mcu_y, + self.restart_interval > 0 + && mcus_left_until_restart == self.restart_interval - 1, + ); + let result = ((prediction + diff) & 0xFFFF) as u16; // modulo 2^16 + results[i][mcu_y * width + mcu_x] = result << scan.point_transform; + } + } + } + } + + let mut marker = huffman.take_marker(&mut self.reader)?; + while let Some(Marker::RST(_)) = marker { + marker = self.read_marker().ok(); + } + Ok((marker, results)) + } +} + +/// H.1.2.1 +fn predict( + ra: i32, + rb: i32, + rc: i32, + predictor: Predictor, + point_transform: u8, + input_precision: u8, + ix: usize, + iy: usize, + restart: bool, +) -> i32 { + if (ix == 0 && iy == 0) || restart { + // start of first line or restart + if input_precision > 1 + point_transform { + 1 << (input_precision - point_transform - 1) + } else { + 0 + } + } else if iy == 0 { + // rest of first line + ra + } else if ix == 0 { + // start of other line + rb + } else { + // use predictor Table H.1 + match predictor { + Predictor::NoPrediction => 0, + Predictor::Ra => ra, + Predictor::Rb => rb, + Predictor::Rc => rc, + Predictor::RaRbRc1 => ra + rb - rc, + Predictor::RaRbRc2 => ra + ((rb - rc) >> 1), + Predictor::RaRbRc3 => rb + ((ra - rc) >> 1), + Predictor::RaRb => (ra + rb) / 2, + } + } +} + +pub fn compute_image_lossless(frame: &FrameInfo, mut data: Vec<Vec<u16>>) -> Result<Vec<u8>> { + if data.is_empty() || data.iter().any(Vec::is_empty) { + return Err(Error::Format("not all components have data".to_owned())); + } + let output_size = frame.output_size; + let components = &frame.components; + let ncomp = components.len(); + + if ncomp == 1 { + let decoded = convert_to_u8(frame, data.remove(0)); + Ok(decoded) + } else { + let mut decoded: Vec<u16> = + vec![0u16; ncomp * output_size.width as usize * output_size.height as usize]; + for (x, chunk) in decoded.chunks_mut(ncomp).enumerate() { + for (i, (component_data, _)) in data.iter().zip(components.iter()).enumerate() { + chunk[i] = component_data[x]; + } + } + let decoded = convert_to_u8(frame, decoded); + Ok(decoded) + } +} + +fn convert_to_u8(frame: &FrameInfo, data: Vec<u16>) -> Vec<u8> { + if frame.precision == 8 { + data.iter().map(|x| *x as u8).collect() + } else { + // we output native endian, which is the standard for image-rs + let ne_bytes: Vec<_> = data.iter().map(|x| x.to_ne_bytes()).collect(); + ne_bytes.concat() + } +} diff --git a/vendor/jpeg-decoder/src/error.rs b/vendor/jpeg-decoder/src/error.rs new file mode 100644 index 0000000..c5fe7b6 --- /dev/null +++ b/vendor/jpeg-decoder/src/error.rs @@ -0,0 +1,75 @@ +use alloc::boxed::Box; +use alloc::fmt; +use alloc::string::String; +use core::result; +use std::error::Error as StdError; +use std::io::Error as IoError; + +use crate::ColorTransform; + +pub type Result<T> = result::Result<T, Error>; + +/// An enumeration over JPEG features (currently) unsupported by this library. +/// +/// Support for features listed here may be included in future versions of this library. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum UnsupportedFeature { + /// Hierarchical JPEG. + Hierarchical, + /// JPEG using arithmetic entropy coding instead of Huffman coding. + ArithmeticEntropyCoding, + /// Sample precision in bits. 8 bit sample precision is what is currently supported in non-lossless coding process. + SamplePrecision(u8), + /// Number of components in an image. 1, 3 and 4 components are currently supported. + ComponentCount(u8), + /// An image can specify a zero height in the frame header and use the DNL (Define Number of + /// Lines) marker at the end of the first scan to define the number of lines in the frame. + DNL, + /// Subsampling ratio. + SubsamplingRatio, + /// A subsampling ratio not representable as an integer. + NonIntegerSubsamplingRatio, + /// Colour transform + ColorTransform(ColorTransform), +} + +/// Errors that can occur while decoding a JPEG image. +#[derive(Debug)] +pub enum Error { + /// The image is not formatted properly. The string contains detailed information about the + /// error. + Format(String), + /// The image makes use of a JPEG feature not (currently) supported by this library. + Unsupported(UnsupportedFeature), + /// An I/O error occurred while decoding the image. + Io(IoError), + /// An internal error occurred while decoding the image. + Internal(Box<dyn StdError + Send + Sync + 'static>), //TODO: not used, can be removed with the next version bump +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + Error::Format(ref desc) => write!(f, "invalid JPEG format: {}", desc), + Error::Unsupported(ref feat) => write!(f, "unsupported JPEG feature: {:?}", feat), + Error::Io(ref err) => err.fmt(f), + Error::Internal(ref err) => err.fmt(f), + } + } +} + +impl StdError for Error { + fn source(&self) -> Option<&(dyn StdError + 'static)> { + match *self { + Error::Io(ref err) => Some(err), + Error::Internal(ref err) => Some(&**err), + _ => None, + } + } +} + +impl From<IoError> for Error { + fn from(err: IoError) -> Error { + Error::Io(err) + } +} diff --git a/vendor/jpeg-decoder/src/huffman.rs b/vendor/jpeg-decoder/src/huffman.rs new file mode 100644 index 0000000..fca57c1 --- /dev/null +++ b/vendor/jpeg-decoder/src/huffman.rs @@ -0,0 +1,346 @@ +use alloc::borrow::ToOwned; +use alloc::vec; +use alloc::vec::Vec; +use core::iter; +use std::io::Read; +use crate::read_u8; +use crate::error::{Error, Result}; +use crate::marker::Marker; +use crate::parser::ScanInfo; + +const LUT_BITS: u8 = 8; + +#[derive(Debug)] +pub struct HuffmanDecoder { + bits: u64, + num_bits: u8, + marker: Option<Marker>, +} + +impl HuffmanDecoder { + pub fn new() -> HuffmanDecoder { + HuffmanDecoder { + bits: 0, + num_bits: 0, + marker: None, + } + } + + // Section F.2.2.3 + // Figure F.16 + pub fn decode<R: Read>(&mut self, reader: &mut R, table: &HuffmanTable) -> Result<u8> { + if self.num_bits < 16 { + self.read_bits(reader)?; + } + + let (value, size) = table.lut[self.peek_bits(LUT_BITS) as usize]; + + if size > 0 { + self.consume_bits(size); + Ok(value) + } + else { + let bits = self.peek_bits(16); + + for i in LUT_BITS .. 16 { + let code = (bits >> (15 - i)) as i32; + + if code <= table.maxcode[i as usize] { + self.consume_bits(i + 1); + + let index = (code + table.delta[i as usize]) as usize; + return Ok(table.values[index]); + } + } + + Err(Error::Format("failed to decode huffman code".to_owned())) + } + } + + pub fn decode_fast_ac<R: Read>(&mut self, reader: &mut R, table: &HuffmanTable) -> Result<Option<(i16, u8)>> { + if let Some(ref ac_lut) = table.ac_lut { + if self.num_bits < LUT_BITS { + self.read_bits(reader)?; + } + + let (value, run_size) = ac_lut[self.peek_bits(LUT_BITS) as usize]; + + if run_size != 0 { + let run = run_size >> 4; + let size = run_size & 0x0f; + + self.consume_bits(size); + return Ok(Some((value, run))); + } + } + + Ok(None) + } + + #[inline] + pub fn get_bits<R: Read>(&mut self, reader: &mut R, count: u8) -> Result<u16> { + if self.num_bits < count { + self.read_bits(reader)?; + } + + let bits = self.peek_bits(count); + self.consume_bits(count); + + Ok(bits) + } + + #[inline] + pub fn receive_extend<R: Read>(&mut self, reader: &mut R, count: u8) -> Result<i16> { + let value = self.get_bits(reader, count)?; + Ok(extend(value, count)) + } + + pub fn reset(&mut self) { + self.bits = 0; + self.num_bits = 0; + } + + pub fn take_marker<R: Read>(&mut self, reader: &mut R) -> Result<Option<Marker>> { + self.read_bits(reader).map(|_| self.marker.take()) + } + + #[inline] + fn peek_bits(&mut self, count: u8) -> u16 { + debug_assert!(count <= 16); + debug_assert!(self.num_bits >= count); + + ((self.bits >> (64 - count)) & ((1 << count) - 1)) as u16 + } + + #[inline] + fn consume_bits(&mut self, count: u8) { + debug_assert!(self.num_bits >= count); + + self.bits <<= count as usize; + self.num_bits -= count; + } + + fn read_bits<R: Read>(&mut self, reader: &mut R) -> Result<()> { + while self.num_bits <= 56 { + // Fill with zero bits if we have reached the end. + let byte = match self.marker { + Some(_) => 0, + None => read_u8(reader)?, + }; + + if byte == 0xFF { + let mut next_byte = read_u8(reader)?; + + // Check for byte stuffing. + if next_byte != 0x00 { + // We seem to have reached the end of entropy-coded data and encountered a + // marker. Since we can't put data back into the reader, we have to continue + // reading to identify the marker so we can pass it on. + + // Section B.1.1.2 + // "Any marker may optionally be preceded by any number of fill bytes, which are bytes assigned code X’FF’." + while next_byte == 0xFF { + next_byte = read_u8(reader)?; + } + + match next_byte { + 0x00 => return Err(Error::Format("FF 00 found where marker was expected".to_owned())), + _ => self.marker = Some(Marker::from_u8(next_byte).unwrap()), + } + + continue; + } + } + + self.bits |= (byte as u64) << (56 - self.num_bits); + self.num_bits += 8; + } + + Ok(()) + } +} + +// Section F.2.2.1 +// Figure F.12 +fn extend(value: u16, count: u8) -> i16 { + let vt = 1 << (count as u16 - 1); + + if value < vt { + value as i16 + (-1 << count as i16) + 1 + } else { + value as i16 + } +} + +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum HuffmanTableClass { + DC, + AC, +} + +pub struct HuffmanTable { + values: Vec<u8>, + delta: [i32; 16], + maxcode: [i32; 16], + + lut: [(u8, u8); 1 << LUT_BITS], + ac_lut: Option<[(i16, u8); 1 << LUT_BITS]>, +} + +impl HuffmanTable { + pub fn new(bits: &[u8; 16], values: &[u8], class: HuffmanTableClass) -> Result<HuffmanTable> { + let (huffcode, huffsize) = derive_huffman_codes(bits)?; + + // Section F.2.2.3 + // Figure F.15 + // delta[i] is set to VALPTR(I) - MINCODE(I) + let mut delta = [0i32; 16]; + let mut maxcode = [-1i32; 16]; + let mut j = 0; + + for i in 0 .. 16 { + if bits[i] != 0 { + delta[i] = j as i32 - huffcode[j] as i32; + j += bits[i] as usize; + maxcode[i] = huffcode[j - 1] as i32; + } + } + + // Build a lookup table for faster decoding. + let mut lut = [(0u8, 0u8); 1 << LUT_BITS]; + + for (i, &size) in huffsize.iter().enumerate().filter(|&(_, &size)| size <= LUT_BITS) { + let bits_remaining = LUT_BITS - size; + let start = (huffcode[i] << bits_remaining) as usize; + + let val = (values[i], size); + for b in &mut lut[start..][..1 << bits_remaining] { + *b = val; + } + } + + // Build a lookup table for small AC coefficients which both decodes the value and does the + // equivalent of receive_extend. + let ac_lut = match class { + HuffmanTableClass::DC => None, + HuffmanTableClass::AC => { + let mut table = [(0i16, 0u8); 1 << LUT_BITS]; + + for (i, &(value, size)) in lut.iter().enumerate() { + let run_length = value >> 4; + let magnitude_category = value & 0x0f; + + if magnitude_category > 0 && size + magnitude_category <= LUT_BITS { + let unextended_ac_value = (((i << size) & ((1 << LUT_BITS) - 1)) >> (LUT_BITS - magnitude_category)) as u16; + let ac_value = extend(unextended_ac_value, magnitude_category); + + table[i] = (ac_value, (run_length << 4) | (size + magnitude_category)); + } + } + + Some(table) + }, + }; + + Ok(HuffmanTable { + values: values.to_vec(), + delta, + maxcode, + lut, + ac_lut, + }) + } +} + +// Section C.2 +fn derive_huffman_codes(bits: &[u8; 16]) -> Result<(Vec<u16>, Vec<u8>)> { + // Figure C.1 + let huffsize = bits.iter() + .enumerate() + .fold(Vec::new(), |mut acc, (i, &value)| { + acc.extend(iter::repeat((i + 1) as u8).take(value as usize)); + acc + }); + + // Figure C.2 + let mut huffcode = vec![0u16; huffsize.len()]; + let mut code_size = huffsize[0]; + let mut code = 0u32; + + for (i, &size) in huffsize.iter().enumerate() { + while code_size < size { + code <<= 1; + code_size += 1; + } + + if code >= (1u32 << size) { + return Err(Error::Format("bad huffman code length".to_owned())); + } + + huffcode[i] = code as u16; + code += 1; + } + + Ok((huffcode, huffsize)) +} + +// https://www.loc.gov/preservation/digital/formats/fdd/fdd000063.shtml +// "Avery Lee, writing in the rec.video.desktop newsgroup in 2001, commented that "MJPEG, or at +// least the MJPEG in AVIs having the MJPG fourcc, is restricted JPEG with a fixed -- and +// *omitted* -- Huffman table. The JPEG must be YCbCr colorspace, it must be 4:2:2, and it must +// use basic Huffman encoding, not arithmetic or progressive.... You can indeed extract the +// MJPEG frames and decode them with a regular JPEG decoder, but you have to prepend the DHT +// segment to them, or else the decoder won't have any idea how to decompress the data. +// The exact table necessary is given in the OpenDML spec."" +pub fn fill_default_mjpeg_tables(scan: &ScanInfo, + dc_huffman_tables: &mut[Option<HuffmanTable>], + ac_huffman_tables: &mut[Option<HuffmanTable>]) { + // Section K.3.3 + + if dc_huffman_tables[0].is_none() && scan.dc_table_indices.iter().any(|&i| i == 0) { + // Table K.3 + dc_huffman_tables[0] = Some(HuffmanTable::new( + &[0x00, 0x01, 0x05, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00], + &[0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B], HuffmanTableClass::DC).unwrap()); + } + if dc_huffman_tables[1].is_none() && scan.dc_table_indices.iter().any(|&i| i == 1) { + // Table K.4 + dc_huffman_tables[1] = Some(HuffmanTable::new( + &[0x00, 0x03, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00], + &[0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B], HuffmanTableClass::DC).unwrap()); + } + if ac_huffman_tables[0].is_none() && scan.ac_table_indices.iter().any(|&i| i == 0) { + // Table K.5 + ac_huffman_tables[0] = Some(HuffmanTable::new( + &[0x00, 0x02, 0x01, 0x03, 0x03, 0x02, 0x04, 0x03, 0x05, 0x05, 0x04, 0x04, 0x00, 0x00, 0x01, 0x7D], + &[0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, 0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07, + 0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xA1, 0x08, 0x23, 0x42, 0xB1, 0xC1, 0x15, 0x52, 0xD1, 0xF0, + 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0A, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x25, 0x26, 0x27, 0x28, + 0x29, 0x2A, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, + 0x4A, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, + 0x6A, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, + 0x8A, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, + 0xA8, 0xA9, 0xAA, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xC2, 0xC3, 0xC4, 0xC5, + 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xE1, 0xE2, + 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, + 0xF9, 0xFA + ], HuffmanTableClass::AC).unwrap()); + } + if ac_huffman_tables[1].is_none() && scan.ac_table_indices.iter().any(|&i| i == 1) { + // Table K.6 + ac_huffman_tables[1] = Some(HuffmanTable::new( + &[0x00, 0x02, 0x01, 0x02, 0x04, 0x04, 0x03, 0x04, 0x07, 0x05, 0x04, 0x04, 0x00, 0x01, 0x02, 0x77], + &[0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21, 0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71, + 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91, 0xA1, 0xB1, 0xC1, 0x09, 0x23, 0x33, 0x52, 0xF0, + 0x15, 0x62, 0x72, 0xD1, 0x0A, 0x16, 0x24, 0x34, 0xE1, 0x25, 0xF1, 0x17, 0x18, 0x19, 0x1A, 0x26, + 0x27, 0x28, 0x29, 0x2A, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, + 0x49, 0x4A, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, + 0x69, 0x6A, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8A, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0xA2, 0xA3, 0xA4, 0xA5, + 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xC2, 0xC3, + 0xC4, 0xC5, 0xC6, 0xC7, 0xC8, 0xC9, 0xCA, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, + 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7, 0xE8, 0xE9, 0xEA, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, + 0xF9, 0xFA + ], HuffmanTableClass::AC).unwrap()); + } +} diff --git a/vendor/jpeg-decoder/src/idct.rs b/vendor/jpeg-decoder/src/idct.rs new file mode 100644 index 0000000..ad8dc4c --- /dev/null +++ b/vendor/jpeg-decoder/src/idct.rs @@ -0,0 +1,657 @@ +// Malicious JPEG files can cause operations in the idct to overflow. +// One example is tests/crashtest/images/imagetestsuite/b0b8914cc5f7a6eff409f16d8cc236c5.jpg +// That's why wrapping operators are needed. + +// Note: we have many values that are straight from a reference. +// Do not warn on them or try to automatically change them. +#![allow(clippy::excessive_precision)] +// Note: consistency for unrolled, scaled offset loops +#![allow(clippy::erasing_op)] +#![allow(clippy::identity_op)] +use crate::parser::Dimensions; +use core::{convert::TryFrom, num::Wrapping}; + +pub(crate) fn choose_idct_size(full_size: Dimensions, requested_size: Dimensions) -> usize { + fn scaled(len: u16, scale: usize) -> u16 { + ((len as u32 * scale as u32 - 1) / 8 + 1) as u16 + } + + for &scale in &[1, 2, 4] { + if scaled(full_size.width, scale) >= requested_size.width + || scaled(full_size.height, scale) >= requested_size.height + { + return scale; + } + } + + 8 +} + +#[test] +fn test_choose_idct_size() { + assert_eq!( + choose_idct_size( + Dimensions { + width: 5472, + height: 3648 + }, + Dimensions { + width: 200, + height: 200 + } + ), + 1 + ); + assert_eq!( + choose_idct_size( + Dimensions { + width: 5472, + height: 3648 + }, + Dimensions { + width: 500, + height: 500 + } + ), + 1 + ); + assert_eq!( + choose_idct_size( + Dimensions { + width: 5472, + height: 3648 + }, + Dimensions { + width: 684, + height: 456 + } + ), + 1 + ); + assert_eq!( + choose_idct_size( + Dimensions { + width: 5472, + height: 3648 + }, + Dimensions { + width: 999, + height: 456 + } + ), + 1 + ); + assert_eq!( + choose_idct_size( + Dimensions { + width: 5472, + height: 3648 + }, + Dimensions { + width: 684, + height: 999 + } + ), + 1 + ); + assert_eq!( + choose_idct_size( + Dimensions { + width: 500, + height: 333 + }, + Dimensions { + width: 63, + height: 42 + } + ), + 1 + ); + + assert_eq!( + choose_idct_size( + Dimensions { + width: 5472, + height: 3648 + }, + Dimensions { + width: 685, + height: 999 + } + ), + 2 + ); + assert_eq!( + choose_idct_size( + Dimensions { + width: 5472, + height: 3648 + }, + Dimensions { + width: 1000, + height: 1000 + } + ), + 2 + ); + assert_eq!( + choose_idct_size( + Dimensions { + width: 5472, + height: 3648 + }, + Dimensions { + width: 1400, + height: 1400 + } + ), + 4 + ); + + assert_eq!( + choose_idct_size( + Dimensions { + width: 5472, + height: 3648 + }, + Dimensions { + width: 5472, + height: 3648 + } + ), + 8 + ); + assert_eq!( + choose_idct_size( + Dimensions { + width: 5472, + height: 3648 + }, + Dimensions { + width: 16384, + height: 16384 + } + ), + 8 + ); + assert_eq!( + choose_idct_size( + Dimensions { + width: 1, + height: 1 + }, + Dimensions { + width: 65535, + height: 65535 + } + ), + 8 + ); + assert_eq!( + choose_idct_size( + Dimensions { + width: 5472, + height: 3648 + }, + Dimensions { + width: 16384, + height: 16384 + } + ), + 8 + ); +} + +pub(crate) fn dequantize_and_idct_block( + scale: usize, + coefficients: &[i16; 64], + quantization_table: &[u16; 64], + output_linestride: usize, + output: &mut [u8], +) { + match scale { + 8 => dequantize_and_idct_block_8x8( + coefficients, + quantization_table, + output_linestride, + output, + ), + 4 => dequantize_and_idct_block_4x4( + coefficients, + quantization_table, + output_linestride, + output, + ), + 2 => dequantize_and_idct_block_2x2( + coefficients, + quantization_table, + output_linestride, + output, + ), + 1 => dequantize_and_idct_block_1x1( + coefficients, + quantization_table, + output_linestride, + output, + ), + _ => panic!("Unsupported IDCT scale {}/8", scale), + } +} + +pub fn dequantize_and_idct_block_8x8( + coefficients: &[i16; 64], + quantization_table: &[u16; 64], + output_linestride: usize, + output: &mut [u8], +) { + #[cfg(not(feature = "platform_independent"))] + if let Some(idct) = crate::arch::get_dequantize_and_idct_block_8x8() { + #[allow(unsafe_code)] + unsafe { + return idct(coefficients, quantization_table, output_linestride, output); + } + } + + let output = output.chunks_mut(output_linestride); + dequantize_and_idct_block_8x8_inner(coefficients, quantization_table, output) +} + +// This is based on stb_image's 'stbi__idct_block'. +fn dequantize_and_idct_block_8x8_inner<'a, I>( + coefficients: &[i16; 64], + quantization_table: &[u16; 64], + output: I, +) where + I: IntoIterator<Item = &'a mut [u8]>, + I::IntoIter: ExactSizeIterator<Item = &'a mut [u8]>, +{ + let output = output.into_iter(); + debug_assert!( + output.len() >= 8, + "Output iterator has the wrong length: {}", + output.len() + ); + + let mut temp = [Wrapping(0); 64]; + + // columns + for i in 0..8 { + if coefficients[i + 8] == 0 + && coefficients[i + 16] == 0 + && coefficients[i + 24] == 0 + && coefficients[i + 32] == 0 + && coefficients[i + 40] == 0 + && coefficients[i + 48] == 0 + && coefficients[i + 56] == 0 + { + let dcterm = dequantize(coefficients[i], quantization_table[i]) << 2; + temp[i] = dcterm; + temp[i + 8] = dcterm; + temp[i + 16] = dcterm; + temp[i + 24] = dcterm; + temp[i + 32] = dcterm; + temp[i + 40] = dcterm; + temp[i + 48] = dcterm; + temp[i + 56] = dcterm; + } else { + let s0 = dequantize(coefficients[i], quantization_table[i]); + let s1 = dequantize(coefficients[i + 8], quantization_table[i + 8]); + let s2 = dequantize(coefficients[i + 16], quantization_table[i + 16]); + let s3 = dequantize(coefficients[i + 24], quantization_table[i + 24]); + let s4 = dequantize(coefficients[i + 32], quantization_table[i + 32]); + let s5 = dequantize(coefficients[i + 40], quantization_table[i + 40]); + let s6 = dequantize(coefficients[i + 48], quantization_table[i + 48]); + let s7 = dequantize(coefficients[i + 56], quantization_table[i + 56]); + + let Kernel { + xs: [x0, x1, x2, x3], + ts: [t0, t1, t2, t3], + } = kernel( + [s0, s1, s2, s3, s4, s5, s6, s7], + // constants scaled things up by 1<<12; let's bring them back + // down, but keep 2 extra bits of precision + 512, + ); + + temp[i] = (x0 + t3) >> 10; + temp[i + 56] = (x0 - t3) >> 10; + temp[i + 8] = (x1 + t2) >> 10; + temp[i + 48] = (x1 - t2) >> 10; + temp[i + 16] = (x2 + t1) >> 10; + temp[i + 40] = (x2 - t1) >> 10; + temp[i + 24] = (x3 + t0) >> 10; + temp[i + 32] = (x3 - t0) >> 10; + } + } + + for (chunk, output_chunk) in temp.chunks_exact(8).zip(output) { + let chunk = <&[_; 8]>::try_from(chunk).unwrap(); + + // constants scaled things up by 1<<12, plus we had 1<<2 from first + // loop, plus horizontal and vertical each scale by sqrt(8) so together + // we've got an extra 1<<3, so 1<<17 total we need to remove. + // so we want to round that, which means adding 0.5 * 1<<17, + // aka 65536. Also, we'll end up with -128 to 127 that we want + // to encode as 0..255 by adding 128, so we'll add that before the shift + const X_SCALE: i32 = 65536 + (128 << 17); + + // eliminate downstream bounds checks + let output_chunk = &mut output_chunk[..8]; + + // TODO When the minimum rust version supports it + // let [s0, rest @ ..] = chunk; + let (s0, rest) = chunk.split_first().unwrap(); + if *rest == [Wrapping(0); 7] { + let dcterm = stbi_clamp((stbi_fsh(*s0) + Wrapping(X_SCALE)) >> 17); + output_chunk[0] = dcterm; + output_chunk[1] = dcterm; + output_chunk[2] = dcterm; + output_chunk[3] = dcterm; + output_chunk[4] = dcterm; + output_chunk[5] = dcterm; + output_chunk[6] = dcterm; + output_chunk[7] = dcterm; + } else { + let Kernel { + xs: [x0, x1, x2, x3], + ts: [t0, t1, t2, t3], + } = kernel(*chunk, X_SCALE); + + output_chunk[0] = stbi_clamp((x0 + t3) >> 17); + output_chunk[7] = stbi_clamp((x0 - t3) >> 17); + output_chunk[1] = stbi_clamp((x1 + t2) >> 17); + output_chunk[6] = stbi_clamp((x1 - t2) >> 17); + output_chunk[2] = stbi_clamp((x2 + t1) >> 17); + output_chunk[5] = stbi_clamp((x2 - t1) >> 17); + output_chunk[3] = stbi_clamp((x3 + t0) >> 17); + output_chunk[4] = stbi_clamp((x3 - t0) >> 17); + } + } +} + +struct Kernel { + xs: [Wrapping<i32>; 4], + ts: [Wrapping<i32>; 4], +} + +#[inline] +fn kernel_x([s0, s2, s4, s6]: [Wrapping<i32>; 4], x_scale: i32) -> [Wrapping<i32>; 4] { + // Even `chunk` indicies + let (t2, t3); + { + let p2 = s2; + let p3 = s6; + + let p1 = (p2 + p3) * stbi_f2f(0.5411961); + t2 = p1 + p3 * stbi_f2f(-1.847759065); + t3 = p1 + p2 * stbi_f2f(0.765366865); + } + + let (t0, t1); + { + let p2 = s0; + let p3 = s4; + + t0 = stbi_fsh(p2 + p3); + t1 = stbi_fsh(p2 - p3); + } + + let x0 = t0 + t3; + let x3 = t0 - t3; + let x1 = t1 + t2; + let x2 = t1 - t2; + + let x_scale = Wrapping(x_scale); + + [x0 + x_scale, x1 + x_scale, x2 + x_scale, x3 + x_scale] +} + +#[inline] +fn kernel_t([s1, s3, s5, s7]: [Wrapping<i32>; 4]) -> [Wrapping<i32>; 4] { + // Odd `chunk` indicies + let mut t0 = s7; + let mut t1 = s5; + let mut t2 = s3; + let mut t3 = s1; + + let p3 = t0 + t2; + let p4 = t1 + t3; + let p1 = t0 + t3; + let p2 = t1 + t2; + let p5 = (p3 + p4) * stbi_f2f(1.175875602); + + t0 *= stbi_f2f(0.298631336); + t1 *= stbi_f2f(2.053119869); + t2 *= stbi_f2f(3.072711026); + t3 *= stbi_f2f(1.501321110); + + let p1 = p5 + p1 * stbi_f2f(-0.899976223); + let p2 = p5 + p2 * stbi_f2f(-2.562915447); + let p3 = p3 * stbi_f2f(-1.961570560); + let p4 = p4 * stbi_f2f(-0.390180644); + + t3 += p1 + p4; + t2 += p2 + p3; + t1 += p2 + p4; + t0 += p1 + p3; + + [t0, t1, t2, t3] +} + +#[inline] +fn kernel([s0, s1, s2, s3, s4, s5, s6, s7]: [Wrapping<i32>; 8], x_scale: i32) -> Kernel { + Kernel { + xs: kernel_x([s0, s2, s4, s6], x_scale), + ts: kernel_t([s1, s3, s5, s7]), + } +} + +#[inline(always)] +fn dequantize(c: i16, q: u16) -> Wrapping<i32> { + Wrapping(i32::from(c) * i32::from(q)) +} + +// 4x4 and 2x2 IDCT based on Rakesh Dugad and Narendra Ahuja: "A Fast Scheme for Image Size Change in the Compressed Domain" (2001). +// http://sylvana.net/jpegcrop/jidctred/ +fn dequantize_and_idct_block_4x4( + coefficients: &[i16; 64], + quantization_table: &[u16; 64], + output_linestride: usize, + output: &mut [u8], +) { + debug_assert_eq!(coefficients.len(), 64); + let mut temp = [Wrapping(0i32); 4 * 4]; + + const CONST_BITS: usize = 12; + const PASS1_BITS: usize = 2; + const FINAL_BITS: usize = CONST_BITS + PASS1_BITS + 3; + + // columns + for i in 0..4 { + let s0 = Wrapping(coefficients[i + 8 * 0] as i32 * quantization_table[i + 8 * 0] as i32); + let s1 = Wrapping(coefficients[i + 8 * 1] as i32 * quantization_table[i + 8 * 1] as i32); + let s2 = Wrapping(coefficients[i + 8 * 2] as i32 * quantization_table[i + 8 * 2] as i32); + let s3 = Wrapping(coefficients[i + 8 * 3] as i32 * quantization_table[i + 8 * 3] as i32); + + let x0 = (s0 + s2) << PASS1_BITS; + let x2 = (s0 - s2) << PASS1_BITS; + + let p1 = (s1 + s3) * stbi_f2f(0.541196100); + let t0 = (p1 + s3 * stbi_f2f(-1.847759065) + Wrapping(512)) >> (CONST_BITS - PASS1_BITS); + let t2 = (p1 + s1 * stbi_f2f(0.765366865) + Wrapping(512)) >> (CONST_BITS - PASS1_BITS); + + temp[i + 4 * 0] = x0 + t2; + temp[i + 4 * 3] = x0 - t2; + temp[i + 4 * 1] = x2 + t0; + temp[i + 4 * 2] = x2 - t0; + } + + for i in 0..4 { + let s0 = temp[i * 4 + 0]; + let s1 = temp[i * 4 + 1]; + let s2 = temp[i * 4 + 2]; + let s3 = temp[i * 4 + 3]; + + let x0 = (s0 + s2) << CONST_BITS; + let x2 = (s0 - s2) << CONST_BITS; + + let p1 = (s1 + s3) * stbi_f2f(0.541196100); + let t0 = p1 + s3 * stbi_f2f(-1.847759065); + let t2 = p1 + s1 * stbi_f2f(0.765366865); + + // constants scaled things up by 1<<12, plus we had 1<<2 from first + // loop, plus horizontal and vertical each scale by sqrt(8) so together + // we've got an extra 1<<3, so 1<<17 total we need to remove. + // so we want to round that, which means adding 0.5 * 1<<17, + // aka 65536. Also, we'll end up with -128 to 127 that we want + // to encode as 0..255 by adding 128, so we'll add that before the shift + let x0 = x0 + Wrapping(1 << (FINAL_BITS - 1)) + Wrapping(128 << FINAL_BITS); + let x2 = x2 + Wrapping(1 << (FINAL_BITS - 1)) + Wrapping(128 << FINAL_BITS); + + let output = &mut output[i * output_linestride..][..4]; + output[0] = stbi_clamp((x0 + t2) >> FINAL_BITS); + output[3] = stbi_clamp((x0 - t2) >> FINAL_BITS); + output[1] = stbi_clamp((x2 + t0) >> FINAL_BITS); + output[2] = stbi_clamp((x2 - t0) >> FINAL_BITS); + } +} + +fn dequantize_and_idct_block_2x2( + coefficients: &[i16; 64], + quantization_table: &[u16; 64], + output_linestride: usize, + output: &mut [u8], +) { + debug_assert_eq!(coefficients.len(), 64); + + const SCALE_BITS: usize = 3; + + // Column 0 + let s00 = Wrapping(coefficients[8 * 0] as i32 * quantization_table[8 * 0] as i32); + let s10 = Wrapping(coefficients[8 * 1] as i32 * quantization_table[8 * 1] as i32); + + let x0 = s00 + s10; + let x2 = s00 - s10; + + // Column 1 + let s01 = Wrapping(coefficients[8 * 0 + 1] as i32 * quantization_table[8 * 0 + 1] as i32); + let s11 = Wrapping(coefficients[8 * 1 + 1] as i32 * quantization_table[8 * 1 + 1] as i32); + + let x1 = s01 + s11; + let x3 = s01 - s11; + + let x0 = x0 + Wrapping(1 << (SCALE_BITS - 1)) + Wrapping(128 << SCALE_BITS); + let x2 = x2 + Wrapping(1 << (SCALE_BITS - 1)) + Wrapping(128 << SCALE_BITS); + + // Row 0 + output[0] = stbi_clamp((x0 + x1) >> SCALE_BITS); + output[1] = stbi_clamp((x0 - x1) >> SCALE_BITS); + + // Row 1 + output[output_linestride + 0] = stbi_clamp((x2 + x3) >> SCALE_BITS); + output[output_linestride + 1] = stbi_clamp((x2 - x3) >> SCALE_BITS); +} + +fn dequantize_and_idct_block_1x1( + coefficients: &[i16; 64], + quantization_table: &[u16; 64], + _output_linestride: usize, + output: &mut [u8], +) { + debug_assert_eq!(coefficients.len(), 64); + + let s0 = (Wrapping(coefficients[0] as i32 * quantization_table[0] as i32) + Wrapping(128 * 8)) / Wrapping(8); + output[0] = stbi_clamp(s0); +} + +// take a -128..127 value and stbi__clamp it and convert to 0..255 +fn stbi_clamp(x: Wrapping<i32>) -> u8 { + x.0.max(0).min(255) as u8 +} + +fn stbi_f2f(x: f32) -> Wrapping<i32> { + Wrapping((x * 4096.0 + 0.5) as i32) +} + +fn stbi_fsh(x: Wrapping<i32>) -> Wrapping<i32> { + x << 12 +} + +#[test] +fn test_dequantize_and_idct_block_8x8() { + #[cfg_attr(rustfmt, rustfmt_skip)] + let coefficients: [i16; 8 * 8] = [ + -14, -39, 58, -2, 3, 3, 0, 1, + 11, 27, 4, -3, 3, 0, 1, 0, + -6, -13, -9, -1, -2, -1, 0, 0, + -4, 0, -1, -2, 0, 0, 0, 0, + 3, 0, 0, 0, 0, 0, 0, 0, + -3, -2, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0 + ]; + + #[cfg_attr(rustfmt, rustfmt_skip)] + let quantization_table: [u16; 8 * 8] = [ + 8, 6, 5, 8, 12, 20, 26, 31, + 6, 6, 7, 10, 13, 29, 30, 28, + 7, 7, 8, 12, 20, 29, 35, 28, + 7, 9, 11, 15, 26, 44, 40, 31, + 9, 11, 19, 28, 34, 55, 52, 39, + 12, 18, 28, 32, 41, 52, 57, 46, + 25, 32, 39, 44, 52, 61, 60, 51, + 36, 46, 48, 49, 56, 50, 52, 50 + ]; + let output_linestride: usize = 8; + let mut output = [0u8; 8 * 8]; + dequantize_and_idct_block_8x8( + &coefficients, + &quantization_table, + output_linestride, + &mut output, + ); + #[cfg_attr(rustfmt, rustfmt_skip)] + let expected_output = [ + 118, 92, 110, 83, 77, 93, 144, 198, + 172, 116, 114, 87, 78, 93, 146, 191, + 194, 107, 91, 76, 71, 93, 160, 198, + 196, 100, 80, 74, 67, 92, 174, 209, + 182, 104, 88, 81, 68, 89, 178, 206, + 105, 64, 59, 59, 63, 94, 183, 201, + 35, 27, 28, 37, 72, 121, 203, 204, + 37, 45, 41, 47, 98, 154, 223, 208 + ]; + for i in 0..64 { + assert!((output[i] as i16 - expected_output[i] as i16).abs() <= 1); + } +} + +#[test] +fn test_dequantize_and_idct_block_8x8_all_zero() { + let mut output = [0u8; 8 * 8]; + dequantize_and_idct_block_8x8(&[0; 8 * 8], &[666; 8 * 8], 8, &mut output); + assert_eq!(&output[..], &[128; 8 * 8][..]); +} + +#[test] +fn test_dequantize_and_idct_block_8x8_saturated() { + // Arch-specific IDCT implementations need not handle i16::MAX values. + #[cfg(not(feature = "platform_independent"))] + if crate::arch::get_dequantize_and_idct_block_8x8().is_some() { + return; + } + let mut output = [0u8; 8 * 8]; + dequantize_and_idct_block_8x8(&[i16::MAX; 8 * 8], &[u16::MAX; 8 * 8], 8, &mut output); + #[cfg_attr(rustfmt, rustfmt_skip)] + let expected = [ + 0, 0, 0, 255, 255, 0, 0, 255, + 0, 0, 215, 0, 0, 255, 255, 0, + 255, 255, 255, 255, 255, 0, 0, 255, + 0, 0, 255, 0, 255, 0, 255, 255, + 0, 0, 255, 255, 0, 255, 0, 0, + 255, 255, 0, 255, 255, 255, 170, 0, + 0, 255, 0, 0, 0, 0, 0, 255, + 255, 255, 0, 255, 0, 255, 0, 0 + ]; + assert_eq!(&output[..], &expected[..]); +} diff --git a/vendor/jpeg-decoder/src/lib.rs b/vendor/jpeg-decoder/src/lib.rs new file mode 100644 index 0000000..ff1ceb1 --- /dev/null +++ b/vendor/jpeg-decoder/src/lib.rs @@ -0,0 +1,66 @@ +//! This crate contains a JPEG decoder. +//! +//! # Examples +//! +//! ``` +//! use jpeg_decoder::Decoder; +//! use std::fs::File; +//! use std::io::BufReader; +//! +//! let file = File::open("tests/reftest/images/extraneous-data.jpg").expect("failed to open file"); +//! let mut decoder = Decoder::new(BufReader::new(file)); +//! let pixels = decoder.decode().expect("failed to decode image"); +//! let metadata = decoder.info().unwrap(); +//! ``` +//! +//! Get metadata from a file without decoding it: +//! +//! ``` +//! use jpeg_decoder::Decoder; +//! use std::fs::File; +//! use std::io::BufReader; +//! +//! let file = File::open("tests/reftest/images/extraneous-data.jpg").expect("failed to open file"); +//! let mut decoder = Decoder::new(BufReader::new(file)); +//! decoder.read_info().expect("failed to read metadata"); +//! let metadata = decoder.info().unwrap(); +//! ``` + +#![deny(missing_docs)] +#![deny(unsafe_code)] +#![cfg_attr(feature = "platform_independent", forbid(unsafe_code))] + +extern crate alloc; +extern crate core; + +#[cfg(feature = "rayon")] +extern crate rayon; + +pub use decoder::{ColorTransform, Decoder, ImageInfo, PixelFormat}; +pub use error::{Error, UnsupportedFeature}; +pub use parser::CodingProcess; + +use std::io; + +#[cfg(not(feature = "platform_independent"))] +mod arch; +mod decoder; +mod error; +mod huffman; +mod idct; +mod marker; +mod parser; +mod upsampler; +mod worker; + +fn read_u8<R: io::Read>(reader: &mut R) -> io::Result<u8> { + let mut buf = [0]; + reader.read_exact(&mut buf)?; + Ok(buf[0]) +} + +fn read_u16_from_be<R: io::Read>(reader: &mut R) -> io::Result<u16> { + let mut buf = [0, 0]; + reader.read_exact(&mut buf)?; + Ok(u16::from_be_bytes(buf)) +} diff --git a/vendor/jpeg-decoder/src/marker.rs b/vendor/jpeg-decoder/src/marker.rs new file mode 100644 index 0000000..2fe74be --- /dev/null +++ b/vendor/jpeg-decoder/src/marker.rs @@ -0,0 +1,136 @@ +// Table B.1 +#[derive(Clone, Copy, Debug, PartialEq)] +// Note: Established names. +#[allow(clippy::upper_case_acronyms)] +pub enum Marker { + /// Start Of Frame markers + /// + /// - SOF(0): Baseline DCT (Huffman coding) + /// - SOF(1): Extended sequential DCT (Huffman coding) + /// - SOF(2): Progressive DCT (Huffman coding) + /// - SOF(3): Lossless (sequential) (Huffman coding) + /// - SOF(5): Differential sequential DCT (Huffman coding) + /// - SOF(6): Differential progressive DCT (Huffman coding) + /// - SOF(7): Differential lossless (sequential) (Huffman coding) + /// - SOF(9): Extended sequential DCT (arithmetic coding) + /// - SOF(10): Progressive DCT (arithmetic coding) + /// - SOF(11): Lossless (sequential) (arithmetic coding) + /// - SOF(13): Differential sequential DCT (arithmetic coding) + /// - SOF(14): Differential progressive DCT (arithmetic coding) + /// - SOF(15): Differential lossless (sequential) (arithmetic coding) + SOF(u8), + /// Reserved for JPEG extensions + JPG, + /// Define Huffman table(s) + DHT, + /// Define arithmetic coding conditioning(s) + DAC, + /// Restart with modulo 8 count `m` + RST(u8), + /// Start of image + SOI, + /// End of image + EOI, + /// Start of scan + SOS, + /// Define quantization table(s) + DQT, + /// Define number of lines + DNL, + /// Define restart interval + DRI, + /// Define hierarchical progression + DHP, + /// Expand reference component(s) + EXP, + /// Reserved for application segments + APP(u8), + /// Reserved for JPEG extensions + JPGn(u8), + /// Comment + COM, + /// For temporary private use in arithmetic coding + TEM, + /// Reserved + RES, +} + +impl Marker { + pub fn has_length(self) -> bool { + use self::Marker::*; + ! matches!(self, RST(..) | SOI | EOI | TEM) + } + + pub fn from_u8(n: u8) -> Option<Marker> { + use self::Marker::*; + match n { + 0x00 => None, // Byte stuffing + 0x01 => Some(TEM), + 0x02 ..= 0xBF => Some(RES), + 0xC0 => Some(SOF(0)), + 0xC1 => Some(SOF(1)), + 0xC2 => Some(SOF(2)), + 0xC3 => Some(SOF(3)), + 0xC4 => Some(DHT), + 0xC5 => Some(SOF(5)), + 0xC6 => Some(SOF(6)), + 0xC7 => Some(SOF(7)), + 0xC8 => Some(JPG), + 0xC9 => Some(SOF(9)), + 0xCA => Some(SOF(10)), + 0xCB => Some(SOF(11)), + 0xCC => Some(DAC), + 0xCD => Some(SOF(13)), + 0xCE => Some(SOF(14)), + 0xCF => Some(SOF(15)), + 0xD0 => Some(RST(0)), + 0xD1 => Some(RST(1)), + 0xD2 => Some(RST(2)), + 0xD3 => Some(RST(3)), + 0xD4 => Some(RST(4)), + 0xD5 => Some(RST(5)), + 0xD6 => Some(RST(6)), + 0xD7 => Some(RST(7)), + 0xD8 => Some(SOI), + 0xD9 => Some(EOI), + 0xDA => Some(SOS), + 0xDB => Some(DQT), + 0xDC => Some(DNL), + 0xDD => Some(DRI), + 0xDE => Some(DHP), + 0xDF => Some(EXP), + 0xE0 => Some(APP(0)), + 0xE1 => Some(APP(1)), + 0xE2 => Some(APP(2)), + 0xE3 => Some(APP(3)), + 0xE4 => Some(APP(4)), + 0xE5 => Some(APP(5)), + 0xE6 => Some(APP(6)), + 0xE7 => Some(APP(7)), + 0xE8 => Some(APP(8)), + 0xE9 => Some(APP(9)), + 0xEA => Some(APP(10)), + 0xEB => Some(APP(11)), + 0xEC => Some(APP(12)), + 0xED => Some(APP(13)), + 0xEE => Some(APP(14)), + 0xEF => Some(APP(15)), + 0xF0 => Some(JPGn(0)), + 0xF1 => Some(JPGn(1)), + 0xF2 => Some(JPGn(2)), + 0xF3 => Some(JPGn(3)), + 0xF4 => Some(JPGn(4)), + 0xF5 => Some(JPGn(5)), + 0xF6 => Some(JPGn(6)), + 0xF7 => Some(JPGn(7)), + 0xF8 => Some(JPGn(8)), + 0xF9 => Some(JPGn(9)), + 0xFA => Some(JPGn(10)), + 0xFB => Some(JPGn(11)), + 0xFC => Some(JPGn(12)), + 0xFD => Some(JPGn(13)), + 0xFE => Some(COM), + 0xFF => None, // Fill byte + } + } +} diff --git a/vendor/jpeg-decoder/src/parser.rs b/vendor/jpeg-decoder/src/parser.rs new file mode 100644 index 0000000..72ba00d --- /dev/null +++ b/vendor/jpeg-decoder/src/parser.rs @@ -0,0 +1,685 @@ +use alloc::borrow::ToOwned; +use alloc::{format, vec}; +use alloc::vec::Vec; +use core::ops::{self, Range}; +use std::io::{self, Read}; +use crate::{read_u16_from_be, read_u8}; +use crate::error::{Error, Result, UnsupportedFeature}; +use crate::huffman::{HuffmanTable, HuffmanTableClass}; +use crate::marker::Marker; +use crate::marker::Marker::*; + +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct Dimensions { + pub width: u16, + pub height: u16, +} + +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum EntropyCoding { + Huffman, + Arithmetic, +} + +/// Represents the coding process of an image. +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum CodingProcess { + /// Sequential Discrete Cosine Transform + DctSequential, + /// Progressive Discrete Cosine Transform + DctProgressive, + /// Lossless + Lossless, +} + +// Table H.1 +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum Predictor { + NoPrediction, + Ra, + Rb, + Rc, + RaRbRc1, // Ra + Rb - Rc + RaRbRc2, // Ra + ((Rb - Rc) >> 1) + RaRbRc3, // Rb + ((Ra - Rb) >> 1) + RaRb, // (Ra + Rb)/2 +} + + +#[derive(Clone)] +pub struct FrameInfo { + pub is_baseline: bool, + pub is_differential: bool, + pub coding_process: CodingProcess, + pub entropy_coding: EntropyCoding, + pub precision: u8, + + pub image_size: Dimensions, + pub output_size: Dimensions, + pub mcu_size: Dimensions, + pub components: Vec<Component>, +} + +#[derive(Debug)] +pub struct ScanInfo { + pub component_indices: Vec<usize>, + pub dc_table_indices: Vec<usize>, + pub ac_table_indices: Vec<usize>, + + pub spectral_selection: Range<u8>, + pub predictor_selection: Predictor, // for lossless + pub successive_approximation_high: u8, + pub successive_approximation_low: u8, + pub point_transform: u8, // for lossless +} + +#[derive(Clone, Debug)] +pub struct Component { + pub identifier: u8, + + pub horizontal_sampling_factor: u8, + pub vertical_sampling_factor: u8, + + pub quantization_table_index: usize, + + pub dct_scale: usize, + + pub size: Dimensions, + pub block_size: Dimensions, +} + +#[derive(Debug)] +pub enum AppData { + Adobe(AdobeColorTransform), + Jfif, + Avi1, + Icc(IccChunk), + Exif(Vec<u8>), +} + +// http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/JPEG.html#Adobe +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum AdobeColorTransform { + // RGB or CMYK + Unknown, + YCbCr, + // YCbCrK + YCCK, +} +#[derive(Debug)] +pub struct IccChunk { + pub num_markers: u8, + pub seq_no: u8, + pub data: Vec<u8>, +} + +impl FrameInfo { + pub(crate) fn update_idct_size(&mut self, idct_size: usize) -> Result<()> { + for component in &mut self.components { + component.dct_scale = idct_size; + } + + update_component_sizes(self.image_size, &mut self.components)?; + + self.output_size = Dimensions { + width: (self.image_size.width as f32 * idct_size as f32 / 8.0).ceil() as u16, + height: (self.image_size.height as f32 * idct_size as f32 / 8.0).ceil() as u16 + }; + + Ok(()) + } +} + +fn read_length<R: Read>(reader: &mut R, marker: Marker) -> Result<usize> { + assert!(marker.has_length()); + + // length is including itself. + let length = usize::from(read_u16_from_be(reader)?); + + if length < 2 { + return Err(Error::Format(format!("encountered {:?} with invalid length {}", marker, length))); + } + + Ok(length - 2) +} + +fn skip_bytes<R: Read>(reader: &mut R, length: usize) -> Result<()> { + let length = length as u64; + let to_skip = &mut reader.by_ref().take(length); + let copied = io::copy(to_skip, &mut io::sink())?; + if copied < length { + Err(Error::Io(io::ErrorKind::UnexpectedEof.into())) + } else { + Ok(()) + } +} + +// Section B.2.2 +pub fn parse_sof<R: Read>(reader: &mut R, marker: Marker) -> Result<FrameInfo> { + let length = read_length(reader, marker)?; + + if length <= 6 { + return Err(Error::Format("invalid length in SOF".to_owned())); + } + + let is_baseline = marker == SOF(0); + let is_differential = match marker { + SOF(0 ..= 3) | SOF(9 ..= 11) => false, + SOF(5 ..= 7) | SOF(13 ..= 15) => true, + _ => panic!(), + }; + let coding_process = match marker { + SOF(0) | SOF(1) | SOF(5) | SOF(9) | SOF(13) => CodingProcess::DctSequential, + SOF(2) | SOF(6) | SOF(10) | SOF(14) => CodingProcess::DctProgressive, + SOF(3) | SOF(7) | SOF(11) | SOF(15) => CodingProcess::Lossless, + _ => panic!(), + }; + let entropy_coding = match marker { + SOF(0 ..= 3) | SOF(5 ..= 7) => EntropyCoding::Huffman, + SOF(9 ..= 11) | SOF(13 ..= 15) => EntropyCoding::Arithmetic, + _ => panic!(), + }; + + let precision = read_u8(reader)?; + + match precision { + 8 => {}, + 12 => { + if is_baseline { + return Err(Error::Format("12 bit sample precision is not allowed in baseline".to_owned())); + } + }, + _ => { + if coding_process != CodingProcess::Lossless || precision > 16 { + return Err(Error::Format(format!("invalid precision {} in frame header", precision))) + } + }, + } + + let height = read_u16_from_be(reader)?; + let width = read_u16_from_be(reader)?; + + // height: + // "Value 0 indicates that the number of lines shall be defined by the DNL marker and + // parameters at the end of the first scan (see B.2.5)." + if height == 0 { + return Err(Error::Unsupported(UnsupportedFeature::DNL)); + } + + if width == 0 { + return Err(Error::Format("zero width in frame header".to_owned())); + } + + let component_count = read_u8(reader)?; + + if component_count == 0 { + return Err(Error::Format("zero component count in frame header".to_owned())); + } + if coding_process == CodingProcess::DctProgressive && component_count > 4 { + return Err(Error::Format("progressive frame with more than 4 components".to_owned())); + } + + if length != 6 + 3 * component_count as usize { + return Err(Error::Format("invalid length in SOF".to_owned())); + } + + let mut components: Vec<Component> = Vec::with_capacity(component_count as usize); + + for _ in 0 .. component_count { + let identifier = read_u8(reader)?; + + // Each component's identifier must be unique. + if components.iter().any(|c| c.identifier == identifier) { + return Err(Error::Format(format!("duplicate frame component identifier {}", identifier))); + } + + let byte = read_u8(reader)?; + let horizontal_sampling_factor = byte >> 4; + let vertical_sampling_factor = byte & 0x0f; + + if horizontal_sampling_factor == 0 || horizontal_sampling_factor > 4 { + return Err(Error::Format(format!("invalid horizontal sampling factor {}", horizontal_sampling_factor))); + } + if vertical_sampling_factor == 0 || vertical_sampling_factor > 4 { + return Err(Error::Format(format!("invalid vertical sampling factor {}", vertical_sampling_factor))); + } + + let quantization_table_index = read_u8(reader)?; + + if quantization_table_index > 3 || (coding_process == CodingProcess::Lossless && quantization_table_index != 0) { + return Err(Error::Format(format!("invalid quantization table index {}", quantization_table_index))); + } + + components.push(Component { + identifier, + horizontal_sampling_factor, + vertical_sampling_factor, + quantization_table_index: quantization_table_index as usize, + dct_scale: 8, + size: Dimensions {width: 0, height: 0}, + block_size: Dimensions {width: 0, height: 0}, + }); + } + + let mcu_size = update_component_sizes(Dimensions { width, height }, &mut components)?; + + Ok(FrameInfo { + is_baseline, + is_differential, + coding_process, + entropy_coding, + precision, + image_size: Dimensions { width, height }, + output_size: Dimensions { width, height }, + mcu_size, + components, + }) +} + +/// Returns ceil(x/y), requires x>0 +fn ceil_div(x: u32, y: u32) -> Result<u16> { + if x == 0 || y == 0 { + // TODO Determine how this error is reached. Can we validate input + // earlier and error out then? + return Err(Error::Format("invalid dimensions".to_owned())); + } + Ok((1 + ((x - 1) / y)) as u16) +} + +fn update_component_sizes(size: Dimensions, components: &mut [Component]) -> Result<Dimensions> { + let h_max = components.iter().map(|c| c.horizontal_sampling_factor).max().unwrap() as u32; + let v_max = components.iter().map(|c| c.vertical_sampling_factor).max().unwrap() as u32; + + let mcu_size = Dimensions { + width: ceil_div(size.width as u32, h_max * 8)?, + height: ceil_div(size.height as u32, v_max * 8)?, + }; + + for component in components { + component.size.width = ceil_div(size.width as u32 * component.horizontal_sampling_factor as u32 * component.dct_scale as u32, h_max * 8)?; + component.size.height = ceil_div(size.height as u32 * component.vertical_sampling_factor as u32 * component.dct_scale as u32, v_max * 8)?; + + component.block_size.width = mcu_size.width * component.horizontal_sampling_factor as u16; + component.block_size.height = mcu_size.height * component.vertical_sampling_factor as u16; + } + + Ok(mcu_size) +} + +#[test] +fn test_update_component_sizes() { + let mut components = [Component { + identifier: 1, + horizontal_sampling_factor: 2, + vertical_sampling_factor: 2, + quantization_table_index: 0, + dct_scale: 8, + size: Dimensions { width: 0, height: 0 }, + block_size: Dimensions { width: 0, height: 0 }, + }]; + let mcu = update_component_sizes( + Dimensions { width: 800, height: 280 }, + &mut components).unwrap(); + assert_eq!(mcu, Dimensions { width: 50, height: 18 }); + assert_eq!(components[0].block_size, Dimensions { width: 100, height: 36 }); + assert_eq!(components[0].size, Dimensions { width: 800, height: 280 }); +} + +// Section B.2.3 +pub fn parse_sos<R: Read>(reader: &mut R, frame: &FrameInfo) -> Result<ScanInfo> { + let length = read_length(reader, SOS)?; + if 0 == length { + return Err(Error::Format("zero length in SOS".to_owned())); + } + + let component_count = read_u8(reader)?; + + if component_count == 0 || component_count > 4 { + return Err(Error::Format(format!("invalid component count {} in scan header", component_count))); + } + + if length != 4 + 2 * component_count as usize { + return Err(Error::Format("invalid length in SOS".to_owned())); + } + + let mut component_indices = Vec::with_capacity(component_count as usize); + let mut dc_table_indices = Vec::with_capacity(component_count as usize); + let mut ac_table_indices = Vec::with_capacity(component_count as usize); + + for _ in 0 .. component_count { + let identifier = read_u8(reader)?; + + let component_index = match frame.components.iter().position(|c| c.identifier == identifier) { + Some(value) => value, + None => return Err(Error::Format(format!("scan component identifier {} does not match any of the component identifiers defined in the frame", identifier))), + }; + + // Each of the scan's components must be unique. + if component_indices.contains(&component_index) { + return Err(Error::Format(format!("duplicate scan component identifier {}", identifier))); + } + + // "... the ordering in the scan header shall follow the ordering in the frame header." + if component_index < *component_indices.iter().max().unwrap_or(&0) { + return Err(Error::Format("the scan component order does not follow the order in the frame header".to_owned())); + } + + let byte = read_u8(reader)?; + let dc_table_index = byte >> 4; + let ac_table_index = byte & 0x0f; + + if dc_table_index > 3 || (frame.is_baseline && dc_table_index > 1) { + return Err(Error::Format(format!("invalid dc table index {}", dc_table_index))); + } + if ac_table_index > 3 || (frame.is_baseline && ac_table_index > 1) { + return Err(Error::Format(format!("invalid ac table index {}", ac_table_index))); + } + + component_indices.push(component_index); + dc_table_indices.push(dc_table_index as usize); + ac_table_indices.push(ac_table_index as usize); + } + + let blocks_per_mcu = component_indices.iter().map(|&i| { + frame.components[i].horizontal_sampling_factor as u32 * frame.components[i].vertical_sampling_factor as u32 + }).fold(0, ops::Add::add); + + if component_count > 1 && blocks_per_mcu > 10 { + return Err(Error::Format("scan with more than one component and more than 10 blocks per MCU".to_owned())); + } + + // Also utilized as 'Predictor' in lossless coding, as MEAN in JPEG-LS etc. + let spectral_selection_start = read_u8(reader)?; + // Also utilized as ILV parameter in JPEG-LS. + let mut spectral_selection_end = read_u8(reader)?; + + let byte = read_u8(reader)?; + let successive_approximation_high = byte >> 4; + let successive_approximation_low = byte & 0x0f; + + // The Differential Pulse-Mode prediction used (similar to png). Only utilized in Lossless + // coding. Don't confuse with the JPEG-LS parameter coded using the same scan info portion. + let predictor_selection; + let point_transform = successive_approximation_low; + + if frame.coding_process == CodingProcess::DctProgressive { + predictor_selection = Predictor::NoPrediction; + if spectral_selection_end > 63 || spectral_selection_start > spectral_selection_end || + (spectral_selection_start == 0 && spectral_selection_end != 0) { + return Err(Error::Format(format!("invalid spectral selection parameters: ss={}, se={}", spectral_selection_start, spectral_selection_end))); + } + if spectral_selection_start != 0 && component_count != 1 { + return Err(Error::Format("spectral selection scan with AC coefficients can't have more than one component".to_owned())); + } + + if successive_approximation_high > 13 || successive_approximation_low > 13 { + return Err(Error::Format(format!("invalid successive approximation parameters: ah={}, al={}", successive_approximation_high, successive_approximation_low))); + } + + // Section G.1.1.1.2 + // "Each scan which follows the first scan for a given band progressively improves + // the precision of the coefficients by one bit, until full precision is reached." + if successive_approximation_high != 0 && successive_approximation_high != successive_approximation_low + 1 { + return Err(Error::Format("successive approximation scan with more than one bit of improvement".to_owned())); + } + } + else if frame.coding_process == CodingProcess::Lossless { + if spectral_selection_end != 0 { + return Err(Error::Format("spectral selection end shall be zero in lossless scan".to_owned())); + } + if successive_approximation_high != 0 { + return Err(Error::Format("successive approximation high shall be zero in lossless scan".to_owned())); + } + predictor_selection = match spectral_selection_start { + 0 => Predictor::NoPrediction, + 1 => Predictor::Ra, + 2 => Predictor::Rb, + 3 => Predictor::Rc, + 4 => Predictor::RaRbRc1, + 5 => Predictor::RaRbRc2, + 6 => Predictor::RaRbRc3, + 7 => Predictor::RaRb, + _ => { + return Err(Error::Format(format!("invalid predictor selection value: {}", spectral_selection_start))); + }, + }; + } + else { + predictor_selection = Predictor::NoPrediction; + if spectral_selection_end == 0 { + spectral_selection_end = 63; + } + if spectral_selection_start != 0 || spectral_selection_end != 63 { + return Err(Error::Format("spectral selection is not allowed in non-progressive scan".to_owned())); + } + if successive_approximation_high != 0 || successive_approximation_low != 0 { + return Err(Error::Format("successive approximation is not allowed in non-progressive scan".to_owned())); + } + } + + Ok(ScanInfo { + component_indices, + dc_table_indices, + ac_table_indices, + spectral_selection: Range { + start: spectral_selection_start, + end: spectral_selection_end + 1, + }, + predictor_selection, + successive_approximation_high, + successive_approximation_low, + point_transform, + }) +} + +// Section B.2.4.1 +pub fn parse_dqt<R: Read>(reader: &mut R) -> Result<[Option<[u16; 64]>; 4]> { + let mut length = read_length(reader, DQT)?; + let mut tables = [None; 4]; + + // Each DQT segment may contain multiple quantization tables. + while length > 0 { + let byte = read_u8(reader)?; + let precision = (byte >> 4) as usize; + let index = (byte & 0x0f) as usize; + + // The combination of 8-bit sample precision and 16-bit quantization tables is explicitly + // disallowed by the JPEG spec: + // "An 8-bit DCT-based process shall not use a 16-bit precision quantization table." + // "Pq: Quantization table element precision – Specifies the precision of the Qk + // values. Value 0 indicates 8-bit Qk values; value 1 indicates 16-bit Qk values. Pq + // shall be zero for 8 bit sample precision P (see B.2.2)." + // libjpeg allows this behavior though, and there are images in the wild using it. So to + // match libjpeg's behavior we are deviating from the JPEG spec here. + if precision > 1 { + return Err(Error::Format(format!("invalid precision {} in DQT", precision))); + } + if index > 3 { + return Err(Error::Format(format!("invalid destination identifier {} in DQT", index))); + } + if length < 65 + 64 * precision { + return Err(Error::Format("invalid length in DQT".to_owned())); + } + + let mut table = [0u16; 64]; + + for item in table.iter_mut() { + *item = match precision { + 0 => u16::from(read_u8(reader)?), + 1 => read_u16_from_be(reader)?, + _ => unreachable!(), + }; + } + + if table.iter().any(|&val| val == 0) { + return Err(Error::Format("quantization table contains element with a zero value".to_owned())); + } + + tables[index] = Some(table); + length -= 65 + 64 * precision; + } + + Ok(tables) +} + +// Section B.2.4.2 +pub fn parse_dht<R: Read>(reader: &mut R, is_baseline: Option<bool>) -> Result<(Vec<Option<HuffmanTable>>, Vec<Option<HuffmanTable>>)> { + let mut length = read_length(reader, DHT)?; + let mut dc_tables = vec![None, None, None, None]; + let mut ac_tables = vec![None, None, None, None]; + + // Each DHT segment may contain multiple huffman tables. + while length > 17 { + let byte = read_u8(reader)?; + let class = byte >> 4; + let index = (byte & 0x0f) as usize; + + if class != 0 && class != 1 { + return Err(Error::Format(format!("invalid class {} in DHT", class))); + } + if is_baseline == Some(true) && index > 1 { + return Err(Error::Format("a maximum of two huffman tables per class are allowed in baseline".to_owned())); + } + if index > 3 { + return Err(Error::Format(format!("invalid destination identifier {} in DHT", index))); + } + + let mut counts = [0u8; 16]; + reader.read_exact(&mut counts)?; + + let size = counts.iter().map(|&val| val as usize).fold(0, ops::Add::add); + + if size == 0 { + return Err(Error::Format("encountered table with zero length in DHT".to_owned())); + } + else if size > 256 { + return Err(Error::Format("encountered table with excessive length in DHT".to_owned())); + } + else if size > length - 17 { + return Err(Error::Format("invalid length in DHT".to_owned())); + } + + let mut values = vec![0u8; size]; + reader.read_exact(&mut values)?; + + match class { + 0 => dc_tables[index] = Some(HuffmanTable::new(&counts, &values, HuffmanTableClass::DC)?), + 1 => ac_tables[index] = Some(HuffmanTable::new(&counts, &values, HuffmanTableClass::AC)?), + _ => unreachable!(), + } + + length -= 17 + size; + } + + if length != 0 { + return Err(Error::Format("invalid length in DHT".to_owned())); + } + + Ok((dc_tables, ac_tables)) +} + +// Section B.2.4.4 +pub fn parse_dri<R: Read>(reader: &mut R) -> Result<u16> { + let length = read_length(reader, DRI)?; + + if length != 2 { + return Err(Error::Format("DRI with invalid length".to_owned())); + } + + Ok(read_u16_from_be(reader)?) +} + +// Section B.2.4.5 +pub fn parse_com<R: Read>(reader: &mut R) -> Result<Vec<u8>> { + let length = read_length(reader, COM)?; + let mut buffer = vec![0u8; length]; + + reader.read_exact(&mut buffer)?; + + Ok(buffer) +} + +// Section B.2.4.6 +pub fn parse_app<R: Read>(reader: &mut R, marker: Marker) -> Result<Option<AppData>> { + let length = read_length(reader, marker)?; + let mut bytes_read = 0; + let mut result = None; + + match marker { + APP(0) => { + if length >= 5 { + let mut buffer = [0u8; 5]; + reader.read_exact(&mut buffer)?; + bytes_read = buffer.len(); + + // http://www.w3.org/Graphics/JPEG/jfif3.pdf + if buffer[0..5] == *b"JFIF\0" { + result = Some(AppData::Jfif); + // https://sno.phy.queensu.ca/~phil/exiftool/TagNames/JPEG.html#AVI1 + } else if buffer[0..5] == *b"AVI1\0" { + result = Some(AppData::Avi1); + } + } + } + // Exif Data + APP(1) => { + if length >= 6 { + let mut buffer = [0u8; 6]; + reader.read_exact(&mut buffer)?; + bytes_read = buffer.len(); + + // https://web.archive.org/web/20190624045241if_/http://www.cipa.jp:80/std/documents/e/DC-008-Translation-2019-E.pdf + // 4.5.4 Basic Structure of JPEG Compressed Data + if buffer == *b"Exif\x00\x00" { + let mut data = vec![0; length - bytes_read]; + reader.read_exact(&mut data)?; + bytes_read += data.len(); + result = Some(AppData::Exif(data)); + } + } + } + APP(2) => { + if length > 14 { + let mut buffer = [0u8; 14]; + reader.read_exact(&mut buffer)?; + bytes_read = buffer.len(); + + // http://www.color.org/ICC_Minor_Revision_for_Web.pdf + // B.4 Embedding ICC profiles in JFIF files + if buffer[0..12] == *b"ICC_PROFILE\0" { + let mut data = vec![0; length - bytes_read]; + reader.read_exact(&mut data)?; + bytes_read += data.len(); + result = Some(AppData::Icc(IccChunk { + seq_no: buffer[12], + num_markers: buffer[13], + data, + })); + } + } + } + APP(14) => { + if length >= 12 { + let mut buffer = [0u8; 12]; + reader.read_exact(&mut buffer)?; + bytes_read = buffer.len(); + + // http://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/JPEG.html#Adobe + if buffer[0 .. 6] == *b"Adobe\0" { + let color_transform = match buffer[11] { + 0 => AdobeColorTransform::Unknown, + 1 => AdobeColorTransform::YCbCr, + 2 => AdobeColorTransform::YCCK, + _ => return Err(Error::Format("invalid color transform in adobe app segment".to_owned())), + }; + + result = Some(AppData::Adobe(color_transform)); + } + } + }, + _ => {}, + } + + skip_bytes(reader, length - bytes_read)?; + Ok(result) +} diff --git a/vendor/jpeg-decoder/src/upsampler.rs b/vendor/jpeg-decoder/src/upsampler.rs new file mode 100644 index 0000000..a5c39d4 --- /dev/null +++ b/vendor/jpeg-decoder/src/upsampler.rs @@ -0,0 +1,252 @@ +use alloc::boxed::Box; +use alloc::vec; +use alloc::vec::Vec; +use crate::error::{Error, Result, UnsupportedFeature}; +use crate::parser::Component; + +pub struct Upsampler { + components: Vec<UpsamplerComponent>, + line_buffer_size: usize +} + +struct UpsamplerComponent { + upsampler: Box<dyn Upsample + Sync>, + width: usize, + height: usize, + row_stride: usize, +} + +impl Upsampler { + pub fn new(components: &[Component], output_width: u16, output_height: u16) -> Result<Upsampler> { + let h_max = components.iter().map(|c| c.horizontal_sampling_factor).max().unwrap(); + let v_max = components.iter().map(|c| c.vertical_sampling_factor).max().unwrap(); + let mut upsampler_components = Vec::with_capacity(components.len()); + + for component in components { + let upsampler = choose_upsampler((component.horizontal_sampling_factor, + component.vertical_sampling_factor), + (h_max, v_max), + output_width, + output_height)?; + upsampler_components.push(UpsamplerComponent { + upsampler, + width: component.size.width as usize, + height: component.size.height as usize, + row_stride: component.block_size.width as usize * component.dct_scale, + }); + } + + let buffer_size = components.iter().map(|c| c.size.width).max().unwrap() as usize * h_max as usize; + + Ok(Upsampler { + components: upsampler_components, + line_buffer_size: buffer_size + }) + } + + pub fn upsample_and_interleave_row(&self, component_data: &[Vec<u8>], row: usize, output_width: usize, output: &mut [u8], color_convert: fn(&[Vec<u8>], &mut [u8])) { + let component_count = component_data.len(); + let mut line_buffers = vec![vec![0u8; self.line_buffer_size]; component_count]; + + debug_assert_eq!(component_count, self.components.len()); + + for (i, component) in self.components.iter().enumerate() { + component.upsampler.upsample_row(&component_data[i], + component.width, + component.height, + component.row_stride, + row, + output_width, + &mut line_buffers[i]); + } + color_convert(&line_buffers, output); + } +} + +struct UpsamplerH1V1; +struct UpsamplerH2V1; +struct UpsamplerH1V2; +struct UpsamplerH2V2; + +struct UpsamplerGeneric { + horizontal_scaling_factor: u8, + vertical_scaling_factor: u8 +} + +fn choose_upsampler(sampling_factors: (u8, u8), + max_sampling_factors: (u8, u8), + output_width: u16, + output_height: u16) -> Result<Box<dyn Upsample + Sync>> { + let h1 = sampling_factors.0 == max_sampling_factors.0 || output_width == 1; + let v1 = sampling_factors.1 == max_sampling_factors.1 || output_height == 1; + let h2 = sampling_factors.0 * 2 == max_sampling_factors.0; + let v2 = sampling_factors.1 * 2 == max_sampling_factors.1; + + if h1 && v1 { + Ok(Box::new(UpsamplerH1V1)) + } + else if h2 && v1 { + Ok(Box::new(UpsamplerH2V1)) + } + else if h1 && v2 { + Ok(Box::new(UpsamplerH1V2)) + } + else if h2 && v2 { + Ok(Box::new(UpsamplerH2V2)) + } + else { + if max_sampling_factors.0 % sampling_factors.0 != 0 || max_sampling_factors.1 % sampling_factors.1 != 0 { + Err(Error::Unsupported(UnsupportedFeature::NonIntegerSubsamplingRatio)) + } + else { + Ok(Box::new(UpsamplerGeneric { + horizontal_scaling_factor: max_sampling_factors.0 / sampling_factors.0, + vertical_scaling_factor: max_sampling_factors.1 / sampling_factors.1 + })) + } + } +} + +trait Upsample { + fn upsample_row(&self, + input: &[u8], + input_width: usize, + input_height: usize, + row_stride: usize, + row: usize, + output_width: usize, + output: &mut [u8]); +} + +impl Upsample for UpsamplerH1V1 { + fn upsample_row(&self, + input: &[u8], + _input_width: usize, + _input_height: usize, + row_stride: usize, + row: usize, + output_width: usize, + output: &mut [u8]) { + let input = &input[row * row_stride ..]; + + output[..output_width].copy_from_slice(&input[..output_width]); + } +} + +impl Upsample for UpsamplerH2V1 { + fn upsample_row(&self, + input: &[u8], + input_width: usize, + _input_height: usize, + row_stride: usize, + row: usize, + _output_width: usize, + output: &mut [u8]) { + let input = &input[row * row_stride ..]; + + if input_width == 1 { + output[0] = input[0]; + output[1] = input[0]; + return; + } + + output[0] = input[0]; + output[1] = ((input[0] as u32 * 3 + input[1] as u32 + 2) >> 2) as u8; + + for i in 1 .. input_width - 1 { + let sample = 3 * input[i] as u32 + 2; + output[i * 2] = ((sample + input[i - 1] as u32) >> 2) as u8; + output[i * 2 + 1] = ((sample + input[i + 1] as u32) >> 2) as u8; + } + + output[(input_width - 1) * 2] = ((input[input_width - 1] as u32 * 3 + input[input_width - 2] as u32 + 2) >> 2) as u8; + output[(input_width - 1) * 2 + 1] = input[input_width - 1]; + } +} + +impl Upsample for UpsamplerH1V2 { + fn upsample_row(&self, + input: &[u8], + _input_width: usize, + input_height: usize, + row_stride: usize, + row: usize, + output_width: usize, + output: &mut [u8]) { + let row_near = row as f32 / 2.0; + // If row_near's fractional is 0.0 we want row_far to be the previous row and if it's 0.5 we + // want it to be the next row. + let row_far = (row_near + row_near.fract() * 3.0 - 0.25).min((input_height - 1) as f32); + + let input_near = &input[row_near as usize * row_stride ..]; + let input_far = &input[row_far as usize * row_stride ..]; + + let output = &mut output[..output_width]; + let input_near = &input_near[..output_width]; + let input_far = &input_far[..output_width]; + for i in 0..output_width { + output[i] = ((3 * input_near[i] as u32 + input_far[i] as u32 + 2) >> 2) as u8; + } + } +} + +impl Upsample for UpsamplerH2V2 { + fn upsample_row(&self, + input: &[u8], + input_width: usize, + input_height: usize, + row_stride: usize, + row: usize, + _output_width: usize, + output: &mut [u8]) { + let row_near = row as f32 / 2.0; + // If row_near's fractional is 0.0 we want row_far to be the previous row and if it's 0.5 we + // want it to be the next row. + let row_far = (row_near + row_near.fract() * 3.0 - 0.25).min((input_height - 1) as f32); + + let input_near = &input[row_near as usize * row_stride ..]; + let input_far = &input[row_far as usize * row_stride ..]; + + if input_width == 1 { + let value = ((3 * input_near[0] as u32 + input_far[0] as u32 + 2) >> 2) as u8; + output[0] = value; + output[1] = value; + return; + } + + let mut t1 = 3 * input_near[0] as u32 + input_far[0] as u32; + output[0] = ((t1 + 2) >> 2) as u8; + + for i in 1 .. input_width { + let t0 = t1; + t1 = 3 * input_near[i] as u32 + input_far[i] as u32; + + output[i * 2 - 1] = ((3 * t0 + t1 + 8) >> 4) as u8; + output[i * 2] = ((3 * t1 + t0 + 8) >> 4) as u8; + } + + output[input_width * 2 - 1] = ((t1 + 2) >> 2) as u8; + } +} + +impl Upsample for UpsamplerGeneric { + // Uses nearest neighbor sampling + fn upsample_row(&self, + input: &[u8], + input_width: usize, + _input_height: usize, + row_stride: usize, + row: usize, + _output_width: usize, + output: &mut [u8]) { + let mut index = 0; + let start = (row / self.vertical_scaling_factor as usize) * row_stride; + let input = &input[start..(start + input_width)]; + for val in input { + for _ in 0..self.horizontal_scaling_factor { + output[index] = *val; + index += 1; + } + } + } +} diff --git a/vendor/jpeg-decoder/src/worker/immediate.rs b/vendor/jpeg-decoder/src/worker/immediate.rs new file mode 100644 index 0000000..8c6e7db --- /dev/null +++ b/vendor/jpeg-decoder/src/worker/immediate.rs @@ -0,0 +1,80 @@ +use alloc::vec; +use alloc::vec::Vec; +use core::mem; +use core::convert::TryInto; +use crate::decoder::MAX_COMPONENTS; +use crate::error::Result; +use crate::idct::dequantize_and_idct_block; +use crate::alloc::sync::Arc; +use crate::parser::Component; +use super::{RowData, Worker}; + +pub struct ImmediateWorker { + offsets: [usize; MAX_COMPONENTS], + results: Vec<Vec<u8>>, + components: Vec<Option<Component>>, + quantization_tables: Vec<Option<Arc<[u16; 64]>>>, +} + +impl Default for ImmediateWorker { + fn default() -> Self { + ImmediateWorker { + offsets: [0; MAX_COMPONENTS], + results: vec![Vec::new(); MAX_COMPONENTS], + components: vec![None; MAX_COMPONENTS], + quantization_tables: vec![None; MAX_COMPONENTS], + } + } +} + +impl ImmediateWorker { + pub fn start_immediate(&mut self, data: RowData) { + assert!(self.results[data.index].is_empty()); + + self.offsets[data.index] = 0; + self.results[data.index].resize(data.component.block_size.width as usize * data.component.block_size.height as usize * data.component.dct_scale * data.component.dct_scale, 0u8); + self.components[data.index] = Some(data.component); + self.quantization_tables[data.index] = Some(data.quantization_table); + } + + pub fn append_row_immediate(&mut self, (index, data): (usize, Vec<i16>)) { + // Convert coefficients from a MCU row to samples. + + let component = self.components[index].as_ref().unwrap(); + let quantization_table = self.quantization_tables[index].as_ref().unwrap(); + let block_count = component.block_size.width as usize * component.vertical_sampling_factor as usize; + let line_stride = component.block_size.width as usize * component.dct_scale; + + assert_eq!(data.len(), block_count * 64); + + for i in 0..block_count { + let x = (i % component.block_size.width as usize) * component.dct_scale; + let y = (i / component.block_size.width as usize) * component.dct_scale; + + let coefficients = data[i * 64..(i + 1) * 64].try_into().unwrap(); + let output = &mut self.results[index][self.offsets[index] + y * line_stride + x..]; + + dequantize_and_idct_block(component.dct_scale, coefficients, quantization_table, line_stride, output); + } + + self.offsets[index] += block_count * component.dct_scale * component.dct_scale; + } + + pub fn get_result_immediate(&mut self, index: usize) -> Vec<u8> { + mem::take(&mut self.results[index]) + } +} + +impl Worker for ImmediateWorker { + fn start(&mut self, data: RowData) -> Result<()> { + self.start_immediate(data); + Ok(()) + } + fn append_row(&mut self, row: (usize, Vec<i16>)) -> Result<()> { + self.append_row_immediate(row); + Ok(()) + } + fn get_result(&mut self, index: usize) -> Result<Vec<u8>> { + Ok(self.get_result_immediate(index)) + } +} diff --git a/vendor/jpeg-decoder/src/worker/mod.rs b/vendor/jpeg-decoder/src/worker/mod.rs new file mode 100644 index 0000000..d6c2b10 --- /dev/null +++ b/vendor/jpeg-decoder/src/worker/mod.rs @@ -0,0 +1,128 @@ +mod immediate; +mod multithreaded; +#[cfg(all( + not(any(target_arch = "asmjs", target_arch = "wasm32")), + feature = "rayon" +))] +mod rayon; + +use crate::decoder::{choose_color_convert_func, ColorTransform}; +use crate::error::Result; +use crate::parser::{Component, Dimensions}; +use crate::upsampler::Upsampler; + +use alloc::sync::Arc; +use alloc::vec::Vec; +use core::cell::RefCell; + +pub struct RowData { + pub index: usize, + pub component: Component, + pub quantization_table: Arc<[u16; 64]>, +} + +pub trait Worker { + fn start(&mut self, row_data: RowData) -> Result<()>; + fn append_row(&mut self, row: (usize, Vec<i16>)) -> Result<()>; + fn get_result(&mut self, index: usize) -> Result<Vec<u8>>; + /// Default implementation for spawning multiple tasks. + fn append_rows(&mut self, row: &mut dyn Iterator<Item = (usize, Vec<i16>)>) -> Result<()> { + for item in row { + self.append_row(item)?; + } + Ok(()) + } +} + +#[allow(dead_code)] +pub enum PreferWorkerKind { + Immediate, + Multithreaded, +} + +#[derive(Default)] +pub struct WorkerScope { + inner: core::cell::RefCell<Option<WorkerScopeInner>>, +} + +enum WorkerScopeInner { + #[cfg(all( + not(any(target_arch = "asmjs", target_arch = "wasm32")), + feature = "rayon" + ))] + Rayon(rayon::Scoped), + #[cfg(not(any(target_arch = "asmjs", target_arch = "wasm32")))] + Multithreaded(multithreaded::MpscWorker), + Immediate(immediate::ImmediateWorker), +} + +impl WorkerScope { + pub fn with<T>(with: impl FnOnce(&Self) -> T) -> T { + with(&WorkerScope { + inner: RefCell::default(), + }) + } + + pub fn get_or_init_worker<T>( + &self, + prefer: PreferWorkerKind, + f: impl FnOnce(&mut dyn Worker) -> T, + ) -> T { + let mut inner = self.inner.borrow_mut(); + let inner = inner.get_or_insert_with(move || match prefer { + #[cfg(all( + not(any(target_arch = "asmjs", target_arch = "wasm32")), + feature = "rayon" + ))] + PreferWorkerKind::Multithreaded => WorkerScopeInner::Rayon(Default::default()), + #[allow(unreachable_patterns)] + #[cfg(not(any(target_arch = "asmjs", target_arch = "wasm32")))] + PreferWorkerKind::Multithreaded => WorkerScopeInner::Multithreaded(Default::default()), + _ => WorkerScopeInner::Immediate(Default::default()), + }); + + f(match &mut *inner { + #[cfg(all( + not(any(target_arch = "asmjs", target_arch = "wasm32")), + feature = "rayon" + ))] + WorkerScopeInner::Rayon(worker) => worker, + #[cfg(not(any(target_arch = "asmjs", target_arch = "wasm32")))] + WorkerScopeInner::Multithreaded(worker) => worker, + WorkerScopeInner::Immediate(worker) => worker, + }) + } +} + +pub fn compute_image_parallel( + components: &[Component], + data: Vec<Vec<u8>>, + output_size: Dimensions, + color_transform: ColorTransform, +) -> Result<Vec<u8>> { + #[cfg(all( + not(any(target_arch = "asmjs", target_arch = "wasm32")), + feature = "rayon" + ))] + return rayon::compute_image_parallel(components, data, output_size, color_transform); + + #[allow(unreachable_code)] + { + let color_convert_func = choose_color_convert_func(components.len(), color_transform)?; + let upsampler = Upsampler::new(components, output_size.width, output_size.height)?; + let line_size = output_size.width as usize * components.len(); + let mut image = vec![0u8; line_size * output_size.height as usize]; + + for (row, line) in image.chunks_mut(line_size).enumerate() { + upsampler.upsample_and_interleave_row( + &data, + row, + output_size.width as usize, + line, + color_convert_func, + ); + } + + Ok(image) + } +} diff --git a/vendor/jpeg-decoder/src/worker/multithreaded.rs b/vendor/jpeg-decoder/src/worker/multithreaded.rs new file mode 100644 index 0000000..c820702 --- /dev/null +++ b/vendor/jpeg-decoder/src/worker/multithreaded.rs @@ -0,0 +1,123 @@ +//! This module implements per-component parallelism. +//! It should be possible to implement per-row parallelism as well, +//! which should also boost performance of grayscale images +//! and allow scaling to more cores. +//! However, that would be more complex, so we use this as a starting point. + +use super::immediate::ImmediateWorker; +use super::{RowData, Worker}; +use crate::decoder::MAX_COMPONENTS; +use crate::error::Result; +use std::{ + mem, + sync::mpsc::{self, Receiver, Sender}, +}; + +enum WorkerMsg { + Start(RowData), + AppendRow(Vec<i16>), + GetResult(Sender<Vec<u8>>), +} + +#[derive(Default)] +pub struct MpscWorker { + senders: [Option<Sender<WorkerMsg>>; MAX_COMPONENTS], +} + +impl MpscWorker { + fn start_with( + &mut self, + row_data: RowData, + spawn_worker: impl FnOnce(usize) -> Result<Sender<WorkerMsg>>, + ) -> Result<()> { + // if there is no worker thread for this component yet, start one + let component = row_data.index; + if let None = self.senders[component] { + let sender = spawn_worker(component)?; + self.senders[component] = Some(sender); + } + + // we do the "take out value and put it back in once we're done" dance here + // and in all other message-passing methods because there's not that many rows + // and this should be cheaper than spawning MAX_COMPONENTS many threads up front + let sender = self.senders[component].as_mut().unwrap(); + sender + .send(WorkerMsg::Start(row_data)) + .expect("jpeg-decoder worker thread error"); + Ok(()) + } + + fn append_row(&mut self, row: (usize, Vec<i16>)) -> Result<()> { + let component = row.0; + let sender = self.senders[component].as_mut().unwrap(); + sender + .send(WorkerMsg::AppendRow(row.1)) + .expect("jpeg-decoder worker thread error"); + Ok(()) + } + + fn get_result_with( + &mut self, + index: usize, + collect: impl FnOnce(Receiver<Vec<u8>>) -> Vec<u8>, + ) -> Result<Vec<u8>> { + let (tx, rx) = mpsc::channel(); + let sender = mem::take(&mut self.senders[index]).unwrap(); + sender + .send(WorkerMsg::GetResult(tx)) + .expect("jpeg-decoder worker thread error"); + Ok(collect(rx)) + } +} + +impl Worker for MpscWorker { + fn start(&mut self, row_data: RowData) -> Result<()> { + self.start_with(row_data, spawn_worker_thread) + } + fn append_row(&mut self, row: (usize, Vec<i16>)) -> Result<()> { + MpscWorker::append_row(self, row) + } + fn get_result(&mut self, index: usize) -> Result<Vec<u8>> { + self.get_result_with(index, collect_worker_thread) + } +} + +fn create_worker() -> (Sender<WorkerMsg>, impl FnOnce() + 'static) { + let (tx, rx) = mpsc::channel(); + let closure = move || { + let mut worker = ImmediateWorker::default(); + + while let Ok(message) = rx.recv() { + match message { + WorkerMsg::Start(mut data) => { + // we always set component index to 0 for worker threads + // because they only ever handle one per thread and we don't want them + // to attempt to access nonexistent components + data.index = 0; + worker.start_immediate(data); + } + WorkerMsg::AppendRow(row) => { + worker.append_row_immediate((0, row)); + } + WorkerMsg::GetResult(chan) => { + let _ = chan.send(worker.get_result_immediate(0)); + break; + } + } + } + }; + + (tx, closure) +} + +fn spawn_worker_thread(component: usize) -> Result<Sender<WorkerMsg>> { + let (tx, worker) = create_worker(); + let thread_builder = + std::thread::Builder::new().name(format!("worker thread for component {}", component)); + thread_builder.spawn(worker)?; + Ok(tx) +} + +fn collect_worker_thread(rx: Receiver<Vec<u8>>) -> Vec<u8> { + rx.recv().expect("jpeg-decoder worker thread error") +} diff --git a/vendor/jpeg-decoder/src/worker/rayon.rs b/vendor/jpeg-decoder/src/worker/rayon.rs new file mode 100644 index 0000000..ec7df25 --- /dev/null +++ b/vendor/jpeg-decoder/src/worker/rayon.rs @@ -0,0 +1,221 @@ +use core::convert::TryInto; + +use rayon::iter::{IndexedParallelIterator, ParallelIterator}; +use rayon::slice::ParallelSliceMut; + +use crate::decoder::{choose_color_convert_func, ColorTransform}; +use crate::error::Result; +use crate::idct::dequantize_and_idct_block; +use crate::parser::Component; +use crate::upsampler::Upsampler; +use crate::{decoder::MAX_COMPONENTS, parser::Dimensions}; + +use std::sync::Arc; + +use super::{RowData, Worker}; + +/// Technically similar to `immediate::ImmediateWorker` but we copy it since we may prefer +/// different style of managing the memory allocation, something that multiple actors can access in +/// parallel. +#[derive(Default)] +struct ImmediateWorker { + offsets: [usize; MAX_COMPONENTS], + results: [Vec<u8>; MAX_COMPONENTS], + components: [Option<Component>; MAX_COMPONENTS], + quantization_tables: [Option<Arc<[u16; 64]>>; MAX_COMPONENTS], +} + +#[derive(Clone, Copy)] +struct ComponentMetadata { + block_width: usize, + block_count: usize, + line_stride: usize, + dct_scale: usize, +} + +#[derive(Default)] +pub struct Scoped { + inner: ImmediateWorker, +} + +impl ImmediateWorker { + pub fn start_immediate(&mut self, data: RowData) { + let elements = data.component.block_size.width as usize + * data.component.block_size.height as usize + * data.component.dct_scale + * data.component.dct_scale; + self.offsets[data.index] = 0; + self.results[data.index].resize(elements, 0u8); + self.components[data.index] = Some(data.component); + self.quantization_tables[data.index] = Some(data.quantization_table); + } + + pub fn get_result_immediate(&mut self, index: usize) -> Vec<u8> { + core::mem::take(&mut self.results[index]) + } + + pub fn component_metadata(&self, index: usize) -> Option<ComponentMetadata> { + let component = self.components[index].as_ref()?; + let block_size = component.block_size; + let block_width = block_size.width as usize; + let block_count = block_size.width as usize * component.vertical_sampling_factor as usize; + let line_stride = block_size.width as usize * component.dct_scale; + let dct_scale = component.dct_scale; + + Some(ComponentMetadata { + block_width, + block_count, + line_stride, + dct_scale, + }) + } + + pub fn append_row_locked( + quantization_table: Arc<[u16; 64]>, + metadata: ComponentMetadata, + data: Vec<i16>, + result_block: &mut [u8], + ) { + // Convert coefficients from a MCU row to samples. + let ComponentMetadata { + block_count, + line_stride, + block_width, + dct_scale, + } = metadata; + + assert_eq!(data.len(), block_count * 64); + + let mut output_buffer = [0; 64]; + for i in 0..block_count { + let x = (i % block_width) * dct_scale; + let y = (i / block_width) * dct_scale; + + let coefficients: &[i16; 64] = &data[i * 64..(i + 1) * 64].try_into().unwrap(); + + // Write to a temporary intermediate buffer, a 8x8 'image'. + dequantize_and_idct_block( + dct_scale, + coefficients, + &*quantization_table, + 8, + &mut output_buffer, + ); + + let write_back = &mut result_block[y * line_stride + x..]; + + let buffered_lines = output_buffer.chunks_mut(8); + let back_lines = write_back.chunks_mut(line_stride); + + for (buf, back) in buffered_lines.zip(back_lines).take(dct_scale) { + back[..dct_scale].copy_from_slice(&buf[..dct_scale]); + } + } + } +} + +impl Worker for Scoped { + fn start(&mut self, row_data: RowData) -> Result<()> { + self.inner.start_immediate(row_data); + Ok(()) + } + + fn append_row(&mut self, row: (usize, Vec<i16>)) -> Result<()> { + let inner = &mut self.inner; + let (index, data) = row; + + let quantization_table = inner.quantization_tables[index].as_ref().unwrap().clone(); + let metadata = inner.component_metadata(index).unwrap(); + let result_block = &mut inner.results[index][inner.offsets[index]..]; + inner.offsets[index] += metadata.bytes_used(); + + ImmediateWorker::append_row_locked(quantization_table, metadata, data, result_block); + Ok(()) + } + + fn get_result(&mut self, index: usize) -> Result<Vec<u8>> { + let result = self.inner.get_result_immediate(index); + Ok(result) + } + + // Magic sauce, these _may_ run in parallel. + fn append_rows(&mut self, iter: &mut dyn Iterator<Item = (usize, Vec<i16>)>) -> Result<()> { + let inner = &mut self.inner; + rayon::in_place_scope(|scope| { + let metadatas = [ + inner.component_metadata(0), + inner.component_metadata(1), + inner.component_metadata(2), + inner.component_metadata(3), + ]; + + let [res0, res1, res2, res3] = &mut inner.results; + + // Lazily get the blocks. Note: if we've already collected results from a component + // then the result vector has already been deallocated/taken. But no more tasks should + // be created for it. + let mut result_blocks = [ + res0.get_mut(inner.offsets[0]..).unwrap_or(&mut []), + res1.get_mut(inner.offsets[1]..).unwrap_or(&mut []), + res2.get_mut(inner.offsets[2]..).unwrap_or(&mut []), + res3.get_mut(inner.offsets[3]..).unwrap_or(&mut []), + ]; + + // First we schedule everything, making sure their index is right etc. + for (index, data) in iter { + let metadata = metadatas[index].unwrap(); + let quantization_table = inner.quantization_tables[index].as_ref().unwrap().clone(); + + inner.offsets[index] += metadata.bytes_used(); + let (result_block, tail) = + core::mem::take(&mut result_blocks[index]).split_at_mut(metadata.bytes_used()); + result_blocks[index] = tail; + + scope.spawn(move |_| { + ImmediateWorker::append_row_locked( + quantization_table, + metadata, + data, + result_block, + ) + }); + } + }); + + Ok(()) + } +} + +impl ComponentMetadata { + fn bytes_used(&self) -> usize { + self.block_count * self.dct_scale * self.dct_scale + } +} + +pub fn compute_image_parallel( + components: &[Component], + data: Vec<Vec<u8>>, + output_size: Dimensions, + color_transform: ColorTransform, +) -> Result<Vec<u8>> { + let color_convert_func = choose_color_convert_func(components.len(), color_transform)?; + let upsampler = Upsampler::new(components, output_size.width, output_size.height)?; + let line_size = output_size.width as usize * components.len(); + let mut image = vec![0u8; line_size * output_size.height as usize]; + + image + .par_chunks_mut(line_size) + .with_max_len(1) + .enumerate() + .for_each(|(row, line)| { + upsampler.upsample_and_interleave_row( + &data, + row, + output_size.width as usize, + line, + color_convert_func, + ); + }); + + Ok(image) +} |