diff options
Diffstat (limited to 'vendor/flate2/src/gz/mod.rs')
-rw-r--r-- | vendor/flate2/src/gz/mod.rs | 644 |
1 files changed, 644 insertions, 0 deletions
diff --git a/vendor/flate2/src/gz/mod.rs b/vendor/flate2/src/gz/mod.rs new file mode 100644 index 0000000..31a6961 --- /dev/null +++ b/vendor/flate2/src/gz/mod.rs @@ -0,0 +1,644 @@ +use std::ffi::CString; +use std::io::{BufRead, Error, ErrorKind, Read, Result, Write}; +use std::time; + +use crate::bufreader::BufReader; +use crate::{Compression, Crc}; + +pub static FHCRC: u8 = 1 << 1; +pub static FEXTRA: u8 = 1 << 2; +pub static FNAME: u8 = 1 << 3; +pub static FCOMMENT: u8 = 1 << 4; +pub static FRESERVED: u8 = 1 << 5 | 1 << 6 | 1 << 7; + +pub mod bufread; +pub mod read; +pub mod write; + +// The maximum length of the header filename and comment fields. More than +// enough for these fields in reasonable use, but prevents possible attacks. +const MAX_HEADER_BUF: usize = 65535; + +/// A structure representing the header of a gzip stream. +/// +/// The header can contain metadata about the file that was compressed, if +/// present. +#[derive(PartialEq, Clone, Debug, Default)] +pub struct GzHeader { + extra: Option<Vec<u8>>, + filename: Option<Vec<u8>>, + comment: Option<Vec<u8>>, + operating_system: u8, + mtime: u32, +} + +impl GzHeader { + /// Returns the `filename` field of this gzip stream's header, if present. + pub fn filename(&self) -> Option<&[u8]> { + self.filename.as_ref().map(|s| &s[..]) + } + + /// Returns the `extra` field of this gzip stream's header, if present. + pub fn extra(&self) -> Option<&[u8]> { + self.extra.as_ref().map(|s| &s[..]) + } + + /// Returns the `comment` field of this gzip stream's header, if present. + pub fn comment(&self) -> Option<&[u8]> { + self.comment.as_ref().map(|s| &s[..]) + } + + /// Returns the `operating_system` field of this gzip stream's header. + /// + /// There are predefined values for various operating systems. + /// 255 means that the value is unknown. + pub fn operating_system(&self) -> u8 { + self.operating_system + } + + /// This gives the most recent modification time of the original file being compressed. + /// + /// The time is in Unix format, i.e., seconds since 00:00:00 GMT, Jan. 1, 1970. + /// (Note that this may cause problems for MS-DOS and other systems that use local + /// rather than Universal time.) If the compressed data did not come from a file, + /// `mtime` is set to the time at which compression started. + /// `mtime` = 0 means no time stamp is available. + /// + /// The usage of `mtime` is discouraged because of Year 2038 problem. + pub fn mtime(&self) -> u32 { + self.mtime + } + + /// Returns the most recent modification time represented by a date-time type. + /// Returns `None` if the value of the underlying counter is 0, + /// indicating no time stamp is available. + /// + /// + /// The time is measured as seconds since 00:00:00 GMT, Jan. 1 1970. + /// See [`mtime`](#method.mtime) for more detail. + pub fn mtime_as_datetime(&self) -> Option<time::SystemTime> { + if self.mtime == 0 { + None + } else { + let duration = time::Duration::new(u64::from(self.mtime), 0); + let datetime = time::UNIX_EPOCH + duration; + Some(datetime) + } + } +} + +#[derive(Debug)] +pub enum GzHeaderState { + Start(u8, [u8; 10]), + Xlen(Option<Box<Crc>>, u8, [u8; 2]), + Extra(Option<Box<Crc>>, u16), + Filename(Option<Box<Crc>>), + Comment(Option<Box<Crc>>), + Crc(Option<Box<Crc>>, u8, [u8; 2]), + Complete, +} + +impl Default for GzHeaderState { + fn default() -> Self { + Self::Complete + } +} + +#[derive(Debug, Default)] +pub struct GzHeaderParser { + state: GzHeaderState, + flags: u8, + header: GzHeader, +} + +impl GzHeaderParser { + fn new() -> Self { + GzHeaderParser { + state: GzHeaderState::Start(0, [0; 10]), + flags: 0, + header: GzHeader::default(), + } + } + + fn parse<'a, R: Read>(&mut self, r: &'a mut R) -> Result<()> { + loop { + match &mut self.state { + GzHeaderState::Start(count, buffer) => { + while (*count as usize) < buffer.len() { + *count += read_into(r, &mut buffer[*count as usize..])? as u8; + } + // Gzip identification bytes + if buffer[0] != 0x1f || buffer[1] != 0x8b { + return Err(bad_header()); + } + // Gzip compression method (8 = deflate) + if buffer[2] != 8 { + return Err(bad_header()); + } + self.flags = buffer[3]; + // RFC1952: "must give an error indication if any reserved bit is non-zero" + if self.flags & FRESERVED != 0 { + return Err(bad_header()); + } + self.header.mtime = ((buffer[4] as u32) << 0) + | ((buffer[5] as u32) << 8) + | ((buffer[6] as u32) << 16) + | ((buffer[7] as u32) << 24); + let _xfl = buffer[8]; + self.header.operating_system = buffer[9]; + let crc = if self.flags & FHCRC != 0 { + let mut crc = Box::new(Crc::new()); + crc.update(buffer); + Some(crc) + } else { + None + }; + self.state = GzHeaderState::Xlen(crc, 0, [0; 2]); + } + GzHeaderState::Xlen(crc, count, buffer) => { + if self.flags & FEXTRA != 0 { + while (*count as usize) < buffer.len() { + *count += read_into(r, &mut buffer[*count as usize..])? as u8; + } + if let Some(crc) = crc { + crc.update(buffer); + } + let xlen = parse_le_u16(&buffer); + self.header.extra = Some(vec![0; xlen as usize]); + self.state = GzHeaderState::Extra(crc.take(), 0); + } else { + self.state = GzHeaderState::Filename(crc.take()); + } + } + GzHeaderState::Extra(crc, count) => { + debug_assert!(self.header.extra.is_some()); + let extra = self.header.extra.as_mut().unwrap(); + while (*count as usize) < extra.len() { + *count += read_into(r, &mut extra[*count as usize..])? as u16; + } + if let Some(crc) = crc { + crc.update(extra); + } + self.state = GzHeaderState::Filename(crc.take()); + } + GzHeaderState::Filename(crc) => { + if self.flags & FNAME != 0 { + let filename = self.header.filename.get_or_insert_with(Vec::new); + read_to_nul(r, filename)?; + if let Some(crc) = crc { + crc.update(filename); + crc.update(b"\0"); + } + } + self.state = GzHeaderState::Comment(crc.take()); + } + GzHeaderState::Comment(crc) => { + if self.flags & FCOMMENT != 0 { + let comment = self.header.comment.get_or_insert_with(Vec::new); + read_to_nul(r, comment)?; + if let Some(crc) = crc { + crc.update(comment); + crc.update(b"\0"); + } + } + self.state = GzHeaderState::Crc(crc.take(), 0, [0; 2]); + } + GzHeaderState::Crc(crc, count, buffer) => { + if let Some(crc) = crc { + debug_assert!(self.flags & FHCRC != 0); + while (*count as usize) < buffer.len() { + *count += read_into(r, &mut buffer[*count as usize..])? as u8; + } + let stored_crc = parse_le_u16(&buffer); + let calced_crc = crc.sum() as u16; + if stored_crc != calced_crc { + return Err(corrupt()); + } + } + self.state = GzHeaderState::Complete; + } + GzHeaderState::Complete => { + return Ok(()); + } + } + } + } + + fn header(&self) -> Option<&GzHeader> { + match self.state { + GzHeaderState::Complete => Some(&self.header), + _ => None, + } + } +} + +impl From<GzHeaderParser> for GzHeader { + fn from(parser: GzHeaderParser) -> Self { + debug_assert!(matches!(parser.state, GzHeaderState::Complete)); + parser.header + } +} + +// Attempt to fill the `buffer` from `r`. Return the number of bytes read. +// Return an error if EOF is read before the buffer is full. This differs +// from `read` in that Ok(0) means that more data may be available. +fn read_into<R: Read>(r: &mut R, buffer: &mut [u8]) -> Result<usize> { + debug_assert!(!buffer.is_empty()); + match r.read(buffer) { + Ok(0) => Err(ErrorKind::UnexpectedEof.into()), + Ok(n) => Ok(n), + Err(ref e) if e.kind() == ErrorKind::Interrupted => Ok(0), + Err(e) => Err(e), + } +} + +// Read `r` up to the first nul byte, pushing non-nul bytes to `buffer`. +fn read_to_nul<R: Read>(r: &mut R, buffer: &mut Vec<u8>) -> Result<()> { + let mut bytes = r.bytes(); + loop { + match bytes.next().transpose()? { + Some(byte) if byte == 0 => { + return Ok(()); + } + Some(_) if buffer.len() == MAX_HEADER_BUF => { + return Err(Error::new( + ErrorKind::InvalidInput, + "gzip header field too long", + )); + } + Some(byte) => { + buffer.push(byte); + } + None => { + return Err(ErrorKind::UnexpectedEof.into()); + } + } + } +} + +fn parse_le_u16(buffer: &[u8; 2]) -> u16 { + (buffer[0] as u16) | ((buffer[1] as u16) << 8) +} + +fn bad_header() -> Error { + Error::new(ErrorKind::InvalidInput, "invalid gzip header") +} + +fn corrupt() -> Error { + Error::new( + ErrorKind::InvalidInput, + "corrupt gzip stream does not have a matching checksum", + ) +} + +/// A builder structure to create a new gzip Encoder. +/// +/// This structure controls header configuration options such as the filename. +/// +/// # Examples +/// +/// ``` +/// use std::io::prelude::*; +/// # use std::io; +/// use std::fs::File; +/// use flate2::GzBuilder; +/// use flate2::Compression; +/// +/// // GzBuilder opens a file and writes a sample string using GzBuilder pattern +/// +/// # fn sample_builder() -> Result<(), io::Error> { +/// let f = File::create("examples/hello_world.gz")?; +/// let mut gz = GzBuilder::new() +/// .filename("hello_world.txt") +/// .comment("test file, please delete") +/// .write(f, Compression::default()); +/// gz.write_all(b"hello world")?; +/// gz.finish()?; +/// # Ok(()) +/// # } +/// ``` +#[derive(Debug)] +pub struct GzBuilder { + extra: Option<Vec<u8>>, + filename: Option<CString>, + comment: Option<CString>, + operating_system: Option<u8>, + mtime: u32, +} + +impl Default for GzBuilder { + fn default() -> Self { + Self::new() + } +} + +impl GzBuilder { + /// Create a new blank builder with no header by default. + pub fn new() -> GzBuilder { + GzBuilder { + extra: None, + filename: None, + comment: None, + operating_system: None, + mtime: 0, + } + } + + /// Configure the `mtime` field in the gzip header. + pub fn mtime(mut self, mtime: u32) -> GzBuilder { + self.mtime = mtime; + self + } + + /// Configure the `operating_system` field in the gzip header. + pub fn operating_system(mut self, os: u8) -> GzBuilder { + self.operating_system = Some(os); + self + } + + /// Configure the `extra` field in the gzip header. + pub fn extra<T: Into<Vec<u8>>>(mut self, extra: T) -> GzBuilder { + self.extra = Some(extra.into()); + self + } + + /// Configure the `filename` field in the gzip header. + /// + /// # Panics + /// + /// Panics if the `filename` slice contains a zero. + pub fn filename<T: Into<Vec<u8>>>(mut self, filename: T) -> GzBuilder { + self.filename = Some(CString::new(filename.into()).unwrap()); + self + } + + /// Configure the `comment` field in the gzip header. + /// + /// # Panics + /// + /// Panics if the `comment` slice contains a zero. + pub fn comment<T: Into<Vec<u8>>>(mut self, comment: T) -> GzBuilder { + self.comment = Some(CString::new(comment.into()).unwrap()); + self + } + + /// Consume this builder, creating a writer encoder in the process. + /// + /// The data written to the returned encoder will be compressed and then + /// written out to the supplied parameter `w`. + pub fn write<W: Write>(self, w: W, lvl: Compression) -> write::GzEncoder<W> { + write::gz_encoder(self.into_header(lvl), w, lvl) + } + + /// Consume this builder, creating a reader encoder in the process. + /// + /// Data read from the returned encoder will be the compressed version of + /// the data read from the given reader. + pub fn read<R: Read>(self, r: R, lvl: Compression) -> read::GzEncoder<R> { + read::gz_encoder(self.buf_read(BufReader::new(r), lvl)) + } + + /// Consume this builder, creating a reader encoder in the process. + /// + /// Data read from the returned encoder will be the compressed version of + /// the data read from the given reader. + pub fn buf_read<R>(self, r: R, lvl: Compression) -> bufread::GzEncoder<R> + where + R: BufRead, + { + bufread::gz_encoder(self.into_header(lvl), r, lvl) + } + + fn into_header(self, lvl: Compression) -> Vec<u8> { + let GzBuilder { + extra, + filename, + comment, + operating_system, + mtime, + } = self; + let mut flg = 0; + let mut header = vec![0u8; 10]; + if let Some(v) = extra { + flg |= FEXTRA; + header.push((v.len() >> 0) as u8); + header.push((v.len() >> 8) as u8); + header.extend(v); + } + if let Some(filename) = filename { + flg |= FNAME; + header.extend(filename.as_bytes_with_nul().iter().copied()); + } + if let Some(comment) = comment { + flg |= FCOMMENT; + header.extend(comment.as_bytes_with_nul().iter().copied()); + } + header[0] = 0x1f; + header[1] = 0x8b; + header[2] = 8; + header[3] = flg; + header[4] = (mtime >> 0) as u8; + header[5] = (mtime >> 8) as u8; + header[6] = (mtime >> 16) as u8; + header[7] = (mtime >> 24) as u8; + header[8] = if lvl.0 >= Compression::best().0 { + 2 + } else if lvl.0 <= Compression::fast().0 { + 4 + } else { + 0 + }; + + // Typically this byte indicates what OS the gz stream was created on, + // but in an effort to have cross-platform reproducible streams just + // default this value to 255. I'm not sure that if we "correctly" set + // this it'd do anything anyway... + header[9] = operating_system.unwrap_or(255); + header + } +} + +#[cfg(test)] +mod tests { + use std::io::prelude::*; + + use super::{read, write, GzBuilder, GzHeaderParser}; + use crate::{Compression, GzHeader}; + use rand::{thread_rng, Rng}; + + #[test] + fn roundtrip() { + let mut e = write::GzEncoder::new(Vec::new(), Compression::default()); + e.write_all(b"foo bar baz").unwrap(); + let inner = e.finish().unwrap(); + let mut d = read::GzDecoder::new(&inner[..]); + let mut s = String::new(); + d.read_to_string(&mut s).unwrap(); + assert_eq!(s, "foo bar baz"); + } + + #[test] + fn roundtrip_zero() { + let e = write::GzEncoder::new(Vec::new(), Compression::default()); + let inner = e.finish().unwrap(); + let mut d = read::GzDecoder::new(&inner[..]); + let mut s = String::new(); + d.read_to_string(&mut s).unwrap(); + assert_eq!(s, ""); + } + + #[test] + fn roundtrip_big() { + let mut real = Vec::new(); + let mut w = write::GzEncoder::new(Vec::new(), Compression::default()); + let v = crate::random_bytes().take(1024).collect::<Vec<_>>(); + for _ in 0..200 { + let to_write = &v[..thread_rng().gen_range(0..v.len())]; + real.extend(to_write.iter().copied()); + w.write_all(to_write).unwrap(); + } + let result = w.finish().unwrap(); + let mut r = read::GzDecoder::new(&result[..]); + let mut v = Vec::new(); + r.read_to_end(&mut v).unwrap(); + assert_eq!(v, real); + } + + #[test] + fn roundtrip_big2() { + let v = crate::random_bytes().take(1024 * 1024).collect::<Vec<_>>(); + let mut r = read::GzDecoder::new(read::GzEncoder::new(&v[..], Compression::default())); + let mut res = Vec::new(); + r.read_to_end(&mut res).unwrap(); + assert_eq!(res, v); + } + + // A Rust implementation of CRC that closely matches the C code in RFC1952. + // Only use this to create CRCs for tests. + struct Rfc1952Crc { + /* Table of CRCs of all 8-bit messages. */ + crc_table: [u32; 256], + } + + impl Rfc1952Crc { + fn new() -> Self { + let mut crc = Rfc1952Crc { + crc_table: [0; 256], + }; + /* Make the table for a fast CRC. */ + for n in 0usize..256 { + let mut c = n as u32; + for _k in 0..8 { + if c & 1 != 0 { + c = 0xedb88320 ^ (c >> 1); + } else { + c = c >> 1; + } + } + crc.crc_table[n] = c; + } + crc + } + + /* + Update a running crc with the bytes buf and return + the updated crc. The crc should be initialized to zero. Pre- and + post-conditioning (one's complement) is performed within this + function so it shouldn't be done by the caller. + */ + fn update_crc(&self, crc: u32, buf: &[u8]) -> u32 { + let mut c = crc ^ 0xffffffff; + + for b in buf { + c = self.crc_table[(c as u8 ^ *b) as usize] ^ (c >> 8); + } + c ^ 0xffffffff + } + + /* Return the CRC of the bytes buf. */ + fn crc(&self, buf: &[u8]) -> u32 { + self.update_crc(0, buf) + } + } + + #[test] + fn roundtrip_header() { + let mut header = GzBuilder::new() + .mtime(1234) + .operating_system(57) + .filename("filename") + .comment("comment") + .into_header(Compression::fast()); + + // Add a CRC to the header + header[3] = header[3] ^ super::FHCRC; + let rfc1952_crc = Rfc1952Crc::new(); + let crc32 = rfc1952_crc.crc(&header); + let crc16 = crc32 as u16; + header.extend(&crc16.to_le_bytes()); + + let mut parser = GzHeaderParser::new(); + parser.parse(&mut header.as_slice()).unwrap(); + let actual = parser.header().unwrap(); + assert_eq!( + actual, + &GzHeader { + extra: None, + filename: Some("filename".as_bytes().to_vec()), + comment: Some("comment".as_bytes().to_vec()), + operating_system: 57, + mtime: 1234 + } + ) + } + + #[test] + fn fields() { + let r = vec![0, 2, 4, 6]; + let e = GzBuilder::new() + .filename("foo.rs") + .comment("bar") + .extra(vec![0, 1, 2, 3]) + .read(&r[..], Compression::default()); + let mut d = read::GzDecoder::new(e); + assert_eq!(d.header().unwrap().filename(), Some(&b"foo.rs"[..])); + assert_eq!(d.header().unwrap().comment(), Some(&b"bar"[..])); + assert_eq!(d.header().unwrap().extra(), Some(&b"\x00\x01\x02\x03"[..])); + let mut res = Vec::new(); + d.read_to_end(&mut res).unwrap(); + assert_eq!(res, vec![0, 2, 4, 6]); + } + + #[test] + fn keep_reading_after_end() { + let mut e = write::GzEncoder::new(Vec::new(), Compression::default()); + e.write_all(b"foo bar baz").unwrap(); + let inner = e.finish().unwrap(); + let mut d = read::GzDecoder::new(&inner[..]); + let mut s = String::new(); + d.read_to_string(&mut s).unwrap(); + assert_eq!(s, "foo bar baz"); + d.read_to_string(&mut s).unwrap(); + assert_eq!(s, "foo bar baz"); + } + + #[test] + fn qc_reader() { + ::quickcheck::quickcheck(test as fn(_) -> _); + + fn test(v: Vec<u8>) -> bool { + let r = read::GzEncoder::new(&v[..], Compression::default()); + let mut r = read::GzDecoder::new(r); + let mut v2 = Vec::new(); + r.read_to_end(&mut v2).unwrap(); + v == v2 + } + } + + #[test] + fn flush_after_write() { + let mut f = write::GzEncoder::new(Vec::new(), Compression::default()); + write!(f, "Hello world").unwrap(); + f.flush().unwrap(); + } +} |