use std::cmp; use std::io; use std::io::prelude::*; use std::mem; use super::{corrupt, read_into, GzBuilder, GzHeader, GzHeaderParser}; use crate::crc::CrcReader; use crate::deflate; use crate::Compression; fn copy(into: &mut [u8], from: &[u8], pos: &mut usize) -> usize { let min = cmp::min(into.len(), from.len() - *pos); for (slot, val) in into.iter_mut().zip(from[*pos..*pos + min].iter()) { *slot = *val; } *pos += min; min } /// A gzip streaming encoder /// /// This structure implements a [`Read`] interface. When read from, it reads /// uncompressed data from the underlying [`BufRead`] and provides the compressed data. /// /// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html /// /// # Examples /// /// ``` /// use std::io::prelude::*; /// use std::io; /// use flate2::Compression; /// use flate2::bufread::GzEncoder; /// use std::fs::File; /// use std::io::BufReader; /// /// // Opens sample file, compresses the contents and returns a Vector or error /// // File wrapped in a BufReader implements BufRead /// /// fn open_hello_world() -> io::Result> { /// let f = File::open("examples/hello_world.txt")?; /// let b = BufReader::new(f); /// let mut gz = GzEncoder::new(b, Compression::fast()); /// let mut buffer = Vec::new(); /// gz.read_to_end(&mut buffer)?; /// Ok(buffer) /// } /// ``` #[derive(Debug)] pub struct GzEncoder { inner: deflate::bufread::DeflateEncoder>, header: Vec, pos: usize, eof: bool, } pub fn gz_encoder(header: Vec, r: R, lvl: Compression) -> GzEncoder { let crc = CrcReader::new(r); GzEncoder { inner: deflate::bufread::DeflateEncoder::new(crc, lvl), header, pos: 0, eof: false, } } impl GzEncoder { /// Creates a new encoder which will use the given compression level. /// /// The encoder is not configured specially for the emitted header. For /// header configuration, see the `GzBuilder` type. /// /// The data read from the stream `r` will be compressed and available /// through the returned reader. pub fn new(r: R, level: Compression) -> GzEncoder { GzBuilder::new().buf_read(r, level) } fn read_footer(&mut self, into: &mut [u8]) -> io::Result { if self.pos == 8 { return Ok(0); } let crc = self.inner.get_ref().crc(); let ref arr = [ (crc.sum() >> 0) as u8, (crc.sum() >> 8) as u8, (crc.sum() >> 16) as u8, (crc.sum() >> 24) as u8, (crc.amount() >> 0) as u8, (crc.amount() >> 8) as u8, (crc.amount() >> 16) as u8, (crc.amount() >> 24) as u8, ]; Ok(copy(into, arr, &mut self.pos)) } } impl GzEncoder { /// Acquires a reference to the underlying reader. pub fn get_ref(&self) -> &R { self.inner.get_ref().get_ref() } /// Acquires a mutable reference to the underlying reader. /// /// Note that mutation of the reader may result in surprising results if /// this encoder is continued to be used. pub fn get_mut(&mut self) -> &mut R { self.inner.get_mut().get_mut() } /// Returns the underlying stream, consuming this encoder pub fn into_inner(self) -> R { self.inner.into_inner().into_inner() } } #[inline] fn finish(buf: &[u8; 8]) -> (u32, u32) { let crc = ((buf[0] as u32) << 0) | ((buf[1] as u32) << 8) | ((buf[2] as u32) << 16) | ((buf[3] as u32) << 24); let amt = ((buf[4] as u32) << 0) | ((buf[5] as u32) << 8) | ((buf[6] as u32) << 16) | ((buf[7] as u32) << 24); (crc, amt) } impl Read for GzEncoder { fn read(&mut self, mut into: &mut [u8]) -> io::Result { let mut amt = 0; if self.eof { return self.read_footer(into); } else if self.pos < self.header.len() { amt += copy(into, &self.header, &mut self.pos); if amt == into.len() { return Ok(amt); } let tmp = into; into = &mut tmp[amt..]; } match self.inner.read(into)? { 0 => { self.eof = true; self.pos = 0; self.read_footer(into) } n => Ok(amt + n), } } } impl Write for GzEncoder { fn write(&mut self, buf: &[u8]) -> io::Result { self.get_mut().write(buf) } fn flush(&mut self) -> io::Result<()> { self.get_mut().flush() } } /// A decoder for a single member of a [gzip file]. /// /// This structure implements a [`Read`] interface. When read from, it reads /// compressed data from the underlying [`BufRead`] and provides the uncompressed data. /// /// After reading a single member of the gzip data this reader will return /// Ok(0) even if there are more bytes available in the underlying reader. /// If you need the following bytes, call `into_inner()` after Ok(0) to /// recover the underlying reader. /// /// To handle gzip files that may have multiple members, see [`MultiGzDecoder`] /// or read more /// [in the introduction](../index.html#about-multi-member-gzip-files). /// /// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 /// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html /// /// # Examples /// /// ``` /// use std::io::prelude::*; /// use std::io; /// # use flate2::Compression; /// # use flate2::write::GzEncoder; /// use flate2::bufread::GzDecoder; /// /// # fn main() { /// # let mut e = GzEncoder::new(Vec::new(), Compression::default()); /// # e.write_all(b"Hello World").unwrap(); /// # let bytes = e.finish().unwrap(); /// # println!("{}", decode_reader(bytes).unwrap()); /// # } /// # /// // Uncompresses a Gz Encoded vector of bytes and returns a string or error /// // Here &[u8] implements BufRead /// /// fn decode_reader(bytes: Vec) -> io::Result { /// let mut gz = GzDecoder::new(&bytes[..]); /// let mut s = String::new(); /// gz.read_to_string(&mut s)?; /// Ok(s) /// } /// ``` #[derive(Debug)] pub struct GzDecoder { state: GzState, reader: CrcReader>, multi: bool, } #[derive(Debug)] enum GzState { Header(GzHeaderParser), Body(GzHeader), Finished(GzHeader, usize, [u8; 8]), Err(io::Error), End(Option), } impl GzDecoder { /// Creates a new decoder from the given reader, immediately parsing the /// gzip header. pub fn new(mut r: R) -> GzDecoder { let mut header_parser = GzHeaderParser::new(); let state = match header_parser.parse(&mut r) { Ok(_) => GzState::Body(GzHeader::from(header_parser)), Err(ref err) if io::ErrorKind::WouldBlock == err.kind() => { GzState::Header(header_parser) } Err(err) => GzState::Err(err), }; GzDecoder { state, reader: CrcReader::new(deflate::bufread::DeflateDecoder::new(r)), multi: false, } } fn multi(mut self, flag: bool) -> GzDecoder { self.multi = flag; self } } impl GzDecoder { /// Returns the header associated with this stream, if it was valid pub fn header(&self) -> Option<&GzHeader> { match &self.state { GzState::Body(header) | GzState::Finished(header, _, _) => Some(header), GzState::End(header) => header.as_ref(), _ => None, } } /// Acquires a reference to the underlying reader. pub fn get_ref(&self) -> &R { self.reader.get_ref().get_ref() } /// Acquires a mutable reference to the underlying stream. /// /// Note that mutation of the stream may result in surprising results if /// this decoder is continued to be used. pub fn get_mut(&mut self) -> &mut R { self.reader.get_mut().get_mut() } /// Consumes this decoder, returning the underlying reader. pub fn into_inner(self) -> R { self.reader.into_inner().into_inner() } } impl Read for GzDecoder { fn read(&mut self, into: &mut [u8]) -> io::Result { loop { match &mut self.state { GzState::Header(parser) => { parser.parse(self.reader.get_mut().get_mut())?; self.state = GzState::Body(GzHeader::from(mem::take(parser))); } GzState::Body(header) => { if into.is_empty() { return Ok(0); } match self.reader.read(into)? { 0 => { self.state = GzState::Finished(mem::take(header), 0, [0; 8]); } n => { return Ok(n); } } } GzState::Finished(header, pos, buf) => { if *pos < buf.len() { *pos += read_into(self.reader.get_mut().get_mut(), &mut buf[*pos..])?; } else { let (crc, amt) = finish(&buf); if crc != self.reader.crc().sum() || amt != self.reader.crc().amount() { self.state = GzState::End(Some(mem::take(header))); return Err(corrupt()); } else if self.multi { let is_eof = self .reader .get_mut() .get_mut() .fill_buf() .map(|buf| buf.is_empty())?; if is_eof { self.state = GzState::End(Some(mem::take(header))); } else { self.reader.reset(); self.reader.get_mut().reset_data(); self.state = GzState::Header(GzHeaderParser::new()) } } else { self.state = GzState::End(Some(mem::take(header))); } } } GzState::Err(err) => { let result = Err(mem::replace(err, io::ErrorKind::Other.into())); self.state = GzState::End(None); return result; } GzState::End(_) => return Ok(0), } } } } impl Write for GzDecoder { fn write(&mut self, buf: &[u8]) -> io::Result { self.get_mut().write(buf) } fn flush(&mut self) -> io::Result<()> { self.get_mut().flush() } } /// A gzip streaming decoder that decodes a [gzip file] that may have multiple members. /// /// This structure implements a [`Read`] interface. When read from, it reads /// compressed data from the underlying [`BufRead`] and provides the uncompressed data. /// /// A gzip file consists of a series of *members* concatenated one after another. /// MultiGzDecoder decodes all members from the data and only returns Ok(0) when the /// underlying reader does. For a file, this reads to the end of the file. /// /// To handle members seperately, see [GzDecoder] or read more /// [in the introduction](../index.html#about-multi-member-gzip-files). /// /// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5 /// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html /// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html /// /// # Examples /// /// ``` /// use std::io::prelude::*; /// use std::io; /// # use flate2::Compression; /// # use flate2::write::GzEncoder; /// use flate2::bufread::MultiGzDecoder; /// /// # fn main() { /// # let mut e = GzEncoder::new(Vec::new(), Compression::default()); /// # e.write_all(b"Hello World").unwrap(); /// # let bytes = e.finish().unwrap(); /// # println!("{}", decode_reader(bytes).unwrap()); /// # } /// # /// // Uncompresses a Gz Encoded vector of bytes and returns a string or error /// // Here &[u8] implements BufRead /// /// fn decode_reader(bytes: Vec) -> io::Result { /// let mut gz = MultiGzDecoder::new(&bytes[..]); /// let mut s = String::new(); /// gz.read_to_string(&mut s)?; /// Ok(s) /// } /// ``` #[derive(Debug)] pub struct MultiGzDecoder(GzDecoder); impl MultiGzDecoder { /// Creates a new decoder from the given reader, immediately parsing the /// (first) gzip header. If the gzip stream contains multiple members all will /// be decoded. pub fn new(r: R) -> MultiGzDecoder { MultiGzDecoder(GzDecoder::new(r).multi(true)) } } impl MultiGzDecoder { /// Returns the current header associated with this stream, if it's valid pub fn header(&self) -> Option<&GzHeader> { self.0.header() } /// Acquires a reference to the underlying reader. pub fn get_ref(&self) -> &R { self.0.get_ref() } /// Acquires a mutable reference to the underlying stream. /// /// Note that mutation of the stream may result in surprising results if /// this decoder is continued to be used. pub fn get_mut(&mut self) -> &mut R { self.0.get_mut() } /// Consumes this decoder, returning the underlying reader. pub fn into_inner(self) -> R { self.0.into_inner() } } impl Read for MultiGzDecoder { fn read(&mut self, into: &mut [u8]) -> io::Result { self.0.read(into) } } #[cfg(test)] mod test { use crate::bufread::GzDecoder; use crate::gz::write; use crate::Compression; use std::io::{Read, Write}; // GzDecoder consumes one gzip member and then returns 0 for subsequent reads, allowing any // additional data to be consumed by the caller. #[test] fn decode_extra_data() { let expected = "Hello World"; let compressed = { let mut e = write::GzEncoder::new(Vec::new(), Compression::default()); e.write(expected.as_ref()).unwrap(); let mut b = e.finish().unwrap(); b.push(b'x'); b }; let mut output = Vec::new(); let mut decoder = GzDecoder::new(compressed.as_slice()); let decoded_bytes = decoder.read_to_end(&mut output).unwrap(); assert_eq!(decoded_bytes, output.len()); let actual = std::str::from_utf8(&output).expect("String parsing error"); assert_eq!( actual, expected, "after decompression we obtain the original input" ); output.clear(); assert_eq!( decoder.read(&mut output).unwrap(), 0, "subsequent read of decoder returns 0, but inner reader can return additional data" ); let mut reader = decoder.into_inner(); assert_eq!( reader.read_to_end(&mut output).unwrap(), 1, "extra data is accessible in underlying buf-read" ); assert_eq!(output, b"x"); } }