From 842f4a85693b418af81560738aa3136ac500d9b1 Mon Sep 17 00:00:00 2001 From: Valentin Popov Date: Tue, 10 Feb 2026 08:38:58 +0000 Subject: Implement LZSS decompression with optional XOR decryption - Added `lzss_decompress_simple` function for LZSS decompression in `lzss.rs`. - Introduced `XorState` struct and `xor_stream` function for XOR decryption in `xor.rs`. - Updated `mod.rs` to include new LZSS and XOR modules. - Refactored `parse_library` function in `parse.rs` to utilize the new XOR decryption functionality. - Cleaned up and organized code in `lib.rs` by removing redundant functions and structures. - Added tests for new functionality in `tests.rs`. --- crates/rsli/src/compress/lzh.rs | 292 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 292 insertions(+) create mode 100644 crates/rsli/src/compress/lzh.rs (limited to 'crates/rsli/src/compress/lzh.rs') diff --git a/crates/rsli/src/compress/lzh.rs b/crates/rsli/src/compress/lzh.rs new file mode 100644 index 0000000..93f2267 --- /dev/null +++ b/crates/rsli/src/compress/lzh.rs @@ -0,0 +1,292 @@ +use super::xor::xor_stream; +use crate::error::Error; +use crate::Result; + +pub(crate) const LZH_N: usize = 4096; +pub(crate) const LZH_F: usize = 60; +pub(crate) const LZH_THRESHOLD: usize = 2; +pub(crate) const LZH_N_CHAR: usize = 256 - LZH_THRESHOLD + LZH_F; +pub(crate) const LZH_T: usize = LZH_N_CHAR * 2 - 1; +pub(crate) const LZH_R: usize = LZH_T - 1; +pub(crate) const LZH_MAX_FREQ: u16 = 0x8000; + +/// LZSS-Huffman decompression with optional XOR pre-decryption +pub fn lzss_huffman_decompress( + data: &[u8], + expected_size: usize, + xor_key: Option, +) -> Result> { + // TODO: Full optimization for Huffman variant (rare in practice) + // For now, fallback to separate XOR step for Huffman + if let Some(key) = xor_key { + let decrypted = xor_stream(data, key); + let mut decoder = LzhDecoder::new(&decrypted); + decoder.decode(expected_size) + } else { + let mut decoder = LzhDecoder::new(data); + decoder.decode(expected_size) + } +} + +struct LzhDecoder<'a> { + bit_reader: BitReader<'a>, + text: [u8; LZH_N], + freq: [u16; LZH_T + 1], + parent: [usize; LZH_T + LZH_N_CHAR], + son: [usize; LZH_T], + d_code: [u8; 256], + d_len: [u8; 256], + ring_pos: usize, +} + +impl<'a> LzhDecoder<'a> { + fn new(data: &'a [u8]) -> Self { + let mut decoder = Self { + bit_reader: BitReader::new(data), + text: [0x20u8; LZH_N], + freq: [0u16; LZH_T + 1], + parent: [0usize; LZH_T + LZH_N_CHAR], + son: [0usize; LZH_T], + d_code: [0u8; 256], + d_len: [0u8; 256], + ring_pos: LZH_N - LZH_F, + }; + decoder.init_tables(); + decoder.start_huff(); + decoder + } + + fn decode(&mut self, expected_size: usize) -> Result> { + let mut out = Vec::with_capacity(expected_size); + + while out.len() < expected_size { + let c = self.decode_char(); + if c < 256 { + let byte = c as u8; + out.push(byte); + self.text[self.ring_pos] = byte; + self.ring_pos = (self.ring_pos + 1) & (LZH_N - 1); + } else { + let mut offset = self.decode_position(); + offset = (self.ring_pos.wrapping_sub(offset).wrapping_sub(1)) & (LZH_N - 1); + let mut length = c.saturating_sub(253); + + while length > 0 && out.len() < expected_size { + let byte = self.text[offset]; + out.push(byte); + self.text[self.ring_pos] = byte; + self.ring_pos = (self.ring_pos + 1) & (LZH_N - 1); + offset = (offset + 1) & (LZH_N - 1); + length -= 1; + } + } + } + + if out.len() != expected_size { + return Err(Error::DecompressionFailed("lzss-huffman")); + } + Ok(out) + } + + fn init_tables(&mut self) { + let d_code_group_counts = [1usize, 3, 8, 12, 24, 16]; + let d_len_group_counts = [32usize, 48, 64, 48, 48, 16]; + + let mut group_index = 0u8; + let mut idx = 0usize; + let mut run = 32usize; + for count in d_code_group_counts { + for _ in 0..count { + for _ in 0..run { + self.d_code[idx] = group_index; + idx += 1; + } + group_index = group_index.wrapping_add(1); + } + run >>= 1; + } + + let mut len = 3u8; + idx = 0; + for count in d_len_group_counts { + for _ in 0..count { + self.d_len[idx] = len; + idx += 1; + } + len = len.saturating_add(1); + } + } + + fn start_huff(&mut self) { + for i in 0..LZH_N_CHAR { + self.freq[i] = 1; + self.son[i] = i + LZH_T; + self.parent[i + LZH_T] = i; + } + + let mut i = 0usize; + let mut j = LZH_N_CHAR; + while j <= LZH_R { + self.freq[j] = self.freq[i].saturating_add(self.freq[i + 1]); + self.son[j] = i; + self.parent[i] = j; + self.parent[i + 1] = j; + i += 2; + j += 1; + } + + self.freq[LZH_T] = u16::MAX; + self.parent[LZH_R] = 0; + } + + fn decode_char(&mut self) -> usize { + let mut node = self.son[LZH_R]; + while node < LZH_T { + let bit = usize::from(self.bit_reader.read_bit_or_zero()); + node = self.son[node + bit]; + } + + let c = node - LZH_T; + self.update(c); + c + } + + fn decode_position(&mut self) -> usize { + let i = self.bit_reader.read_bits_or_zero(8) as usize; + let mut c = usize::from(self.d_code[i]) << 6; + let mut j = usize::from(self.d_len[i]).saturating_sub(2); + + while j > 0 { + j -= 1; + c |= usize::from(self.bit_reader.read_bit_or_zero()) << j; + } + + c | (i & 0x3F) + } + + fn update(&mut self, c: usize) { + if self.freq[LZH_R] == LZH_MAX_FREQ { + self.reconstruct(); + } + + let mut current = self.parent[c + LZH_T]; + loop { + self.freq[current] = self.freq[current].saturating_add(1); + let freq = self.freq[current]; + + if current + 1 < self.freq.len() && freq > self.freq[current + 1] { + let mut swap_idx = current + 1; + while swap_idx + 1 < self.freq.len() && freq > self.freq[swap_idx + 1] { + swap_idx += 1; + } + + self.freq.swap(current, swap_idx); + + let left = self.son[current]; + let right = self.son[swap_idx]; + self.son[current] = right; + self.son[swap_idx] = left; + + self.parent[left] = swap_idx; + if left < LZH_T { + self.parent[left + 1] = swap_idx; + } + + self.parent[right] = current; + if right < LZH_T { + self.parent[right + 1] = current; + } + + current = swap_idx; + } + + current = self.parent[current]; + if current == 0 { + break; + } + } + } + + fn reconstruct(&mut self) { + let mut j = 0usize; + for i in 0..LZH_T { + if self.son[i] >= LZH_T { + self.freq[j] = (self.freq[i].saturating_add(1)) / 2; + self.son[j] = self.son[i]; + j += 1; + } + } + + let mut i = 0usize; + let mut current = LZH_N_CHAR; + while current < LZH_T { + let sum = self.freq[i].saturating_add(self.freq[i + 1]); + self.freq[current] = sum; + + let mut insert_at = current; + while insert_at > 0 && sum < self.freq[insert_at - 1] { + insert_at -= 1; + } + + for move_idx in (insert_at..current).rev() { + self.freq[move_idx + 1] = self.freq[move_idx]; + self.son[move_idx + 1] = self.son[move_idx]; + } + + self.freq[insert_at] = sum; + self.son[insert_at] = i; + + i += 2; + current += 1; + } + + for idx in 0..LZH_T { + let node = self.son[idx]; + self.parent[node] = idx; + if node < LZH_T { + self.parent[node + 1] = idx; + } + } + + self.freq[LZH_T] = u16::MAX; + self.parent[LZH_R] = 0; + } +} + +struct BitReader<'a> { + data: &'a [u8], + byte_pos: usize, + bit_mask: u8, +} + +impl<'a> BitReader<'a> { + fn new(data: &'a [u8]) -> Self { + Self { + data, + byte_pos: 0, + bit_mask: 0x80, + } + } + + fn read_bit_or_zero(&mut self) -> u8 { + let Some(byte) = self.data.get(self.byte_pos).copied() else { + return 0; + }; + + let bit = if (byte & self.bit_mask) != 0 { 1 } else { 0 }; + self.bit_mask >>= 1; + if self.bit_mask == 0 { + self.bit_mask = 0x80; + self.byte_pos = self.byte_pos.saturating_add(1); + } + bit + } + + fn read_bits_or_zero(&mut self, bits: usize) -> u32 { + let mut value = 0u32; + for _ in 0..bits { + value = (value << 1) | u32::from(self.read_bit_or_zero()); + } + value + } +} -- cgit v1.2.3