aboutsummaryrefslogtreecommitdiff
path: root/vendor/flate2/src/gz/mod.rs
diff options
context:
space:
mode:
authorValentin Popov <valentin@popov.link>2024-01-08 00:21:28 +0300
committerValentin Popov <valentin@popov.link>2024-01-08 00:21:28 +0300
commit1b6a04ca5504955c571d1c97504fb45ea0befee4 (patch)
tree7579f518b23313e8a9748a88ab6173d5e030b227 /vendor/flate2/src/gz/mod.rs
parent5ecd8cf2cba827454317368b68571df0d13d7842 (diff)
downloadfparkan-1b6a04ca5504955c571d1c97504fb45ea0befee4.tar.xz
fparkan-1b6a04ca5504955c571d1c97504fb45ea0befee4.zip
Initial vendor packages
Signed-off-by: Valentin Popov <valentin@popov.link>
Diffstat (limited to 'vendor/flate2/src/gz/mod.rs')
-rw-r--r--vendor/flate2/src/gz/mod.rs644
1 files changed, 644 insertions, 0 deletions
diff --git a/vendor/flate2/src/gz/mod.rs b/vendor/flate2/src/gz/mod.rs
new file mode 100644
index 0000000..31a6961
--- /dev/null
+++ b/vendor/flate2/src/gz/mod.rs
@@ -0,0 +1,644 @@
+use std::ffi::CString;
+use std::io::{BufRead, Error, ErrorKind, Read, Result, Write};
+use std::time;
+
+use crate::bufreader::BufReader;
+use crate::{Compression, Crc};
+
+pub static FHCRC: u8 = 1 << 1;
+pub static FEXTRA: u8 = 1 << 2;
+pub static FNAME: u8 = 1 << 3;
+pub static FCOMMENT: u8 = 1 << 4;
+pub static FRESERVED: u8 = 1 << 5 | 1 << 6 | 1 << 7;
+
+pub mod bufread;
+pub mod read;
+pub mod write;
+
+// The maximum length of the header filename and comment fields. More than
+// enough for these fields in reasonable use, but prevents possible attacks.
+const MAX_HEADER_BUF: usize = 65535;
+
+/// A structure representing the header of a gzip stream.
+///
+/// The header can contain metadata about the file that was compressed, if
+/// present.
+#[derive(PartialEq, Clone, Debug, Default)]
+pub struct GzHeader {
+ extra: Option<Vec<u8>>,
+ filename: Option<Vec<u8>>,
+ comment: Option<Vec<u8>>,
+ operating_system: u8,
+ mtime: u32,
+}
+
+impl GzHeader {
+ /// Returns the `filename` field of this gzip stream's header, if present.
+ pub fn filename(&self) -> Option<&[u8]> {
+ self.filename.as_ref().map(|s| &s[..])
+ }
+
+ /// Returns the `extra` field of this gzip stream's header, if present.
+ pub fn extra(&self) -> Option<&[u8]> {
+ self.extra.as_ref().map(|s| &s[..])
+ }
+
+ /// Returns the `comment` field of this gzip stream's header, if present.
+ pub fn comment(&self) -> Option<&[u8]> {
+ self.comment.as_ref().map(|s| &s[..])
+ }
+
+ /// Returns the `operating_system` field of this gzip stream's header.
+ ///
+ /// There are predefined values for various operating systems.
+ /// 255 means that the value is unknown.
+ pub fn operating_system(&self) -> u8 {
+ self.operating_system
+ }
+
+ /// This gives the most recent modification time of the original file being compressed.
+ ///
+ /// The time is in Unix format, i.e., seconds since 00:00:00 GMT, Jan. 1, 1970.
+ /// (Note that this may cause problems for MS-DOS and other systems that use local
+ /// rather than Universal time.) If the compressed data did not come from a file,
+ /// `mtime` is set to the time at which compression started.
+ /// `mtime` = 0 means no time stamp is available.
+ ///
+ /// The usage of `mtime` is discouraged because of Year 2038 problem.
+ pub fn mtime(&self) -> u32 {
+ self.mtime
+ }
+
+ /// Returns the most recent modification time represented by a date-time type.
+ /// Returns `None` if the value of the underlying counter is 0,
+ /// indicating no time stamp is available.
+ ///
+ ///
+ /// The time is measured as seconds since 00:00:00 GMT, Jan. 1 1970.
+ /// See [`mtime`](#method.mtime) for more detail.
+ pub fn mtime_as_datetime(&self) -> Option<time::SystemTime> {
+ if self.mtime == 0 {
+ None
+ } else {
+ let duration = time::Duration::new(u64::from(self.mtime), 0);
+ let datetime = time::UNIX_EPOCH + duration;
+ Some(datetime)
+ }
+ }
+}
+
+#[derive(Debug)]
+pub enum GzHeaderState {
+ Start(u8, [u8; 10]),
+ Xlen(Option<Box<Crc>>, u8, [u8; 2]),
+ Extra(Option<Box<Crc>>, u16),
+ Filename(Option<Box<Crc>>),
+ Comment(Option<Box<Crc>>),
+ Crc(Option<Box<Crc>>, u8, [u8; 2]),
+ Complete,
+}
+
+impl Default for GzHeaderState {
+ fn default() -> Self {
+ Self::Complete
+ }
+}
+
+#[derive(Debug, Default)]
+pub struct GzHeaderParser {
+ state: GzHeaderState,
+ flags: u8,
+ header: GzHeader,
+}
+
+impl GzHeaderParser {
+ fn new() -> Self {
+ GzHeaderParser {
+ state: GzHeaderState::Start(0, [0; 10]),
+ flags: 0,
+ header: GzHeader::default(),
+ }
+ }
+
+ fn parse<'a, R: Read>(&mut self, r: &'a mut R) -> Result<()> {
+ loop {
+ match &mut self.state {
+ GzHeaderState::Start(count, buffer) => {
+ while (*count as usize) < buffer.len() {
+ *count += read_into(r, &mut buffer[*count as usize..])? as u8;
+ }
+ // Gzip identification bytes
+ if buffer[0] != 0x1f || buffer[1] != 0x8b {
+ return Err(bad_header());
+ }
+ // Gzip compression method (8 = deflate)
+ if buffer[2] != 8 {
+ return Err(bad_header());
+ }
+ self.flags = buffer[3];
+ // RFC1952: "must give an error indication if any reserved bit is non-zero"
+ if self.flags & FRESERVED != 0 {
+ return Err(bad_header());
+ }
+ self.header.mtime = ((buffer[4] as u32) << 0)
+ | ((buffer[5] as u32) << 8)
+ | ((buffer[6] as u32) << 16)
+ | ((buffer[7] as u32) << 24);
+ let _xfl = buffer[8];
+ self.header.operating_system = buffer[9];
+ let crc = if self.flags & FHCRC != 0 {
+ let mut crc = Box::new(Crc::new());
+ crc.update(buffer);
+ Some(crc)
+ } else {
+ None
+ };
+ self.state = GzHeaderState::Xlen(crc, 0, [0; 2]);
+ }
+ GzHeaderState::Xlen(crc, count, buffer) => {
+ if self.flags & FEXTRA != 0 {
+ while (*count as usize) < buffer.len() {
+ *count += read_into(r, &mut buffer[*count as usize..])? as u8;
+ }
+ if let Some(crc) = crc {
+ crc.update(buffer);
+ }
+ let xlen = parse_le_u16(&buffer);
+ self.header.extra = Some(vec![0; xlen as usize]);
+ self.state = GzHeaderState::Extra(crc.take(), 0);
+ } else {
+ self.state = GzHeaderState::Filename(crc.take());
+ }
+ }
+ GzHeaderState::Extra(crc, count) => {
+ debug_assert!(self.header.extra.is_some());
+ let extra = self.header.extra.as_mut().unwrap();
+ while (*count as usize) < extra.len() {
+ *count += read_into(r, &mut extra[*count as usize..])? as u16;
+ }
+ if let Some(crc) = crc {
+ crc.update(extra);
+ }
+ self.state = GzHeaderState::Filename(crc.take());
+ }
+ GzHeaderState::Filename(crc) => {
+ if self.flags & FNAME != 0 {
+ let filename = self.header.filename.get_or_insert_with(Vec::new);
+ read_to_nul(r, filename)?;
+ if let Some(crc) = crc {
+ crc.update(filename);
+ crc.update(b"\0");
+ }
+ }
+ self.state = GzHeaderState::Comment(crc.take());
+ }
+ GzHeaderState::Comment(crc) => {
+ if self.flags & FCOMMENT != 0 {
+ let comment = self.header.comment.get_or_insert_with(Vec::new);
+ read_to_nul(r, comment)?;
+ if let Some(crc) = crc {
+ crc.update(comment);
+ crc.update(b"\0");
+ }
+ }
+ self.state = GzHeaderState::Crc(crc.take(), 0, [0; 2]);
+ }
+ GzHeaderState::Crc(crc, count, buffer) => {
+ if let Some(crc) = crc {
+ debug_assert!(self.flags & FHCRC != 0);
+ while (*count as usize) < buffer.len() {
+ *count += read_into(r, &mut buffer[*count as usize..])? as u8;
+ }
+ let stored_crc = parse_le_u16(&buffer);
+ let calced_crc = crc.sum() as u16;
+ if stored_crc != calced_crc {
+ return Err(corrupt());
+ }
+ }
+ self.state = GzHeaderState::Complete;
+ }
+ GzHeaderState::Complete => {
+ return Ok(());
+ }
+ }
+ }
+ }
+
+ fn header(&self) -> Option<&GzHeader> {
+ match self.state {
+ GzHeaderState::Complete => Some(&self.header),
+ _ => None,
+ }
+ }
+}
+
+impl From<GzHeaderParser> for GzHeader {
+ fn from(parser: GzHeaderParser) -> Self {
+ debug_assert!(matches!(parser.state, GzHeaderState::Complete));
+ parser.header
+ }
+}
+
+// Attempt to fill the `buffer` from `r`. Return the number of bytes read.
+// Return an error if EOF is read before the buffer is full. This differs
+// from `read` in that Ok(0) means that more data may be available.
+fn read_into<R: Read>(r: &mut R, buffer: &mut [u8]) -> Result<usize> {
+ debug_assert!(!buffer.is_empty());
+ match r.read(buffer) {
+ Ok(0) => Err(ErrorKind::UnexpectedEof.into()),
+ Ok(n) => Ok(n),
+ Err(ref e) if e.kind() == ErrorKind::Interrupted => Ok(0),
+ Err(e) => Err(e),
+ }
+}
+
+// Read `r` up to the first nul byte, pushing non-nul bytes to `buffer`.
+fn read_to_nul<R: Read>(r: &mut R, buffer: &mut Vec<u8>) -> Result<()> {
+ let mut bytes = r.bytes();
+ loop {
+ match bytes.next().transpose()? {
+ Some(byte) if byte == 0 => {
+ return Ok(());
+ }
+ Some(_) if buffer.len() == MAX_HEADER_BUF => {
+ return Err(Error::new(
+ ErrorKind::InvalidInput,
+ "gzip header field too long",
+ ));
+ }
+ Some(byte) => {
+ buffer.push(byte);
+ }
+ None => {
+ return Err(ErrorKind::UnexpectedEof.into());
+ }
+ }
+ }
+}
+
+fn parse_le_u16(buffer: &[u8; 2]) -> u16 {
+ (buffer[0] as u16) | ((buffer[1] as u16) << 8)
+}
+
+fn bad_header() -> Error {
+ Error::new(ErrorKind::InvalidInput, "invalid gzip header")
+}
+
+fn corrupt() -> Error {
+ Error::new(
+ ErrorKind::InvalidInput,
+ "corrupt gzip stream does not have a matching checksum",
+ )
+}
+
+/// A builder structure to create a new gzip Encoder.
+///
+/// This structure controls header configuration options such as the filename.
+///
+/// # Examples
+///
+/// ```
+/// use std::io::prelude::*;
+/// # use std::io;
+/// use std::fs::File;
+/// use flate2::GzBuilder;
+/// use flate2::Compression;
+///
+/// // GzBuilder opens a file and writes a sample string using GzBuilder pattern
+///
+/// # fn sample_builder() -> Result<(), io::Error> {
+/// let f = File::create("examples/hello_world.gz")?;
+/// let mut gz = GzBuilder::new()
+/// .filename("hello_world.txt")
+/// .comment("test file, please delete")
+/// .write(f, Compression::default());
+/// gz.write_all(b"hello world")?;
+/// gz.finish()?;
+/// # Ok(())
+/// # }
+/// ```
+#[derive(Debug)]
+pub struct GzBuilder {
+ extra: Option<Vec<u8>>,
+ filename: Option<CString>,
+ comment: Option<CString>,
+ operating_system: Option<u8>,
+ mtime: u32,
+}
+
+impl Default for GzBuilder {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl GzBuilder {
+ /// Create a new blank builder with no header by default.
+ pub fn new() -> GzBuilder {
+ GzBuilder {
+ extra: None,
+ filename: None,
+ comment: None,
+ operating_system: None,
+ mtime: 0,
+ }
+ }
+
+ /// Configure the `mtime` field in the gzip header.
+ pub fn mtime(mut self, mtime: u32) -> GzBuilder {
+ self.mtime = mtime;
+ self
+ }
+
+ /// Configure the `operating_system` field in the gzip header.
+ pub fn operating_system(mut self, os: u8) -> GzBuilder {
+ self.operating_system = Some(os);
+ self
+ }
+
+ /// Configure the `extra` field in the gzip header.
+ pub fn extra<T: Into<Vec<u8>>>(mut self, extra: T) -> GzBuilder {
+ self.extra = Some(extra.into());
+ self
+ }
+
+ /// Configure the `filename` field in the gzip header.
+ ///
+ /// # Panics
+ ///
+ /// Panics if the `filename` slice contains a zero.
+ pub fn filename<T: Into<Vec<u8>>>(mut self, filename: T) -> GzBuilder {
+ self.filename = Some(CString::new(filename.into()).unwrap());
+ self
+ }
+
+ /// Configure the `comment` field in the gzip header.
+ ///
+ /// # Panics
+ ///
+ /// Panics if the `comment` slice contains a zero.
+ pub fn comment<T: Into<Vec<u8>>>(mut self, comment: T) -> GzBuilder {
+ self.comment = Some(CString::new(comment.into()).unwrap());
+ self
+ }
+
+ /// Consume this builder, creating a writer encoder in the process.
+ ///
+ /// The data written to the returned encoder will be compressed and then
+ /// written out to the supplied parameter `w`.
+ pub fn write<W: Write>(self, w: W, lvl: Compression) -> write::GzEncoder<W> {
+ write::gz_encoder(self.into_header(lvl), w, lvl)
+ }
+
+ /// Consume this builder, creating a reader encoder in the process.
+ ///
+ /// Data read from the returned encoder will be the compressed version of
+ /// the data read from the given reader.
+ pub fn read<R: Read>(self, r: R, lvl: Compression) -> read::GzEncoder<R> {
+ read::gz_encoder(self.buf_read(BufReader::new(r), lvl))
+ }
+
+ /// Consume this builder, creating a reader encoder in the process.
+ ///
+ /// Data read from the returned encoder will be the compressed version of
+ /// the data read from the given reader.
+ pub fn buf_read<R>(self, r: R, lvl: Compression) -> bufread::GzEncoder<R>
+ where
+ R: BufRead,
+ {
+ bufread::gz_encoder(self.into_header(lvl), r, lvl)
+ }
+
+ fn into_header(self, lvl: Compression) -> Vec<u8> {
+ let GzBuilder {
+ extra,
+ filename,
+ comment,
+ operating_system,
+ mtime,
+ } = self;
+ let mut flg = 0;
+ let mut header = vec![0u8; 10];
+ if let Some(v) = extra {
+ flg |= FEXTRA;
+ header.push((v.len() >> 0) as u8);
+ header.push((v.len() >> 8) as u8);
+ header.extend(v);
+ }
+ if let Some(filename) = filename {
+ flg |= FNAME;
+ header.extend(filename.as_bytes_with_nul().iter().copied());
+ }
+ if let Some(comment) = comment {
+ flg |= FCOMMENT;
+ header.extend(comment.as_bytes_with_nul().iter().copied());
+ }
+ header[0] = 0x1f;
+ header[1] = 0x8b;
+ header[2] = 8;
+ header[3] = flg;
+ header[4] = (mtime >> 0) as u8;
+ header[5] = (mtime >> 8) as u8;
+ header[6] = (mtime >> 16) as u8;
+ header[7] = (mtime >> 24) as u8;
+ header[8] = if lvl.0 >= Compression::best().0 {
+ 2
+ } else if lvl.0 <= Compression::fast().0 {
+ 4
+ } else {
+ 0
+ };
+
+ // Typically this byte indicates what OS the gz stream was created on,
+ // but in an effort to have cross-platform reproducible streams just
+ // default this value to 255. I'm not sure that if we "correctly" set
+ // this it'd do anything anyway...
+ header[9] = operating_system.unwrap_or(255);
+ header
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use std::io::prelude::*;
+
+ use super::{read, write, GzBuilder, GzHeaderParser};
+ use crate::{Compression, GzHeader};
+ use rand::{thread_rng, Rng};
+
+ #[test]
+ fn roundtrip() {
+ let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
+ e.write_all(b"foo bar baz").unwrap();
+ let inner = e.finish().unwrap();
+ let mut d = read::GzDecoder::new(&inner[..]);
+ let mut s = String::new();
+ d.read_to_string(&mut s).unwrap();
+ assert_eq!(s, "foo bar baz");
+ }
+
+ #[test]
+ fn roundtrip_zero() {
+ let e = write::GzEncoder::new(Vec::new(), Compression::default());
+ let inner = e.finish().unwrap();
+ let mut d = read::GzDecoder::new(&inner[..]);
+ let mut s = String::new();
+ d.read_to_string(&mut s).unwrap();
+ assert_eq!(s, "");
+ }
+
+ #[test]
+ fn roundtrip_big() {
+ let mut real = Vec::new();
+ let mut w = write::GzEncoder::new(Vec::new(), Compression::default());
+ let v = crate::random_bytes().take(1024).collect::<Vec<_>>();
+ for _ in 0..200 {
+ let to_write = &v[..thread_rng().gen_range(0..v.len())];
+ real.extend(to_write.iter().copied());
+ w.write_all(to_write).unwrap();
+ }
+ let result = w.finish().unwrap();
+ let mut r = read::GzDecoder::new(&result[..]);
+ let mut v = Vec::new();
+ r.read_to_end(&mut v).unwrap();
+ assert_eq!(v, real);
+ }
+
+ #[test]
+ fn roundtrip_big2() {
+ let v = crate::random_bytes().take(1024 * 1024).collect::<Vec<_>>();
+ let mut r = read::GzDecoder::new(read::GzEncoder::new(&v[..], Compression::default()));
+ let mut res = Vec::new();
+ r.read_to_end(&mut res).unwrap();
+ assert_eq!(res, v);
+ }
+
+ // A Rust implementation of CRC that closely matches the C code in RFC1952.
+ // Only use this to create CRCs for tests.
+ struct Rfc1952Crc {
+ /* Table of CRCs of all 8-bit messages. */
+ crc_table: [u32; 256],
+ }
+
+ impl Rfc1952Crc {
+ fn new() -> Self {
+ let mut crc = Rfc1952Crc {
+ crc_table: [0; 256],
+ };
+ /* Make the table for a fast CRC. */
+ for n in 0usize..256 {
+ let mut c = n as u32;
+ for _k in 0..8 {
+ if c & 1 != 0 {
+ c = 0xedb88320 ^ (c >> 1);
+ } else {
+ c = c >> 1;
+ }
+ }
+ crc.crc_table[n] = c;
+ }
+ crc
+ }
+
+ /*
+ Update a running crc with the bytes buf and return
+ the updated crc. The crc should be initialized to zero. Pre- and
+ post-conditioning (one's complement) is performed within this
+ function so it shouldn't be done by the caller.
+ */
+ fn update_crc(&self, crc: u32, buf: &[u8]) -> u32 {
+ let mut c = crc ^ 0xffffffff;
+
+ for b in buf {
+ c = self.crc_table[(c as u8 ^ *b) as usize] ^ (c >> 8);
+ }
+ c ^ 0xffffffff
+ }
+
+ /* Return the CRC of the bytes buf. */
+ fn crc(&self, buf: &[u8]) -> u32 {
+ self.update_crc(0, buf)
+ }
+ }
+
+ #[test]
+ fn roundtrip_header() {
+ let mut header = GzBuilder::new()
+ .mtime(1234)
+ .operating_system(57)
+ .filename("filename")
+ .comment("comment")
+ .into_header(Compression::fast());
+
+ // Add a CRC to the header
+ header[3] = header[3] ^ super::FHCRC;
+ let rfc1952_crc = Rfc1952Crc::new();
+ let crc32 = rfc1952_crc.crc(&header);
+ let crc16 = crc32 as u16;
+ header.extend(&crc16.to_le_bytes());
+
+ let mut parser = GzHeaderParser::new();
+ parser.parse(&mut header.as_slice()).unwrap();
+ let actual = parser.header().unwrap();
+ assert_eq!(
+ actual,
+ &GzHeader {
+ extra: None,
+ filename: Some("filename".as_bytes().to_vec()),
+ comment: Some("comment".as_bytes().to_vec()),
+ operating_system: 57,
+ mtime: 1234
+ }
+ )
+ }
+
+ #[test]
+ fn fields() {
+ let r = vec![0, 2, 4, 6];
+ let e = GzBuilder::new()
+ .filename("foo.rs")
+ .comment("bar")
+ .extra(vec![0, 1, 2, 3])
+ .read(&r[..], Compression::default());
+ let mut d = read::GzDecoder::new(e);
+ assert_eq!(d.header().unwrap().filename(), Some(&b"foo.rs"[..]));
+ assert_eq!(d.header().unwrap().comment(), Some(&b"bar"[..]));
+ assert_eq!(d.header().unwrap().extra(), Some(&b"\x00\x01\x02\x03"[..]));
+ let mut res = Vec::new();
+ d.read_to_end(&mut res).unwrap();
+ assert_eq!(res, vec![0, 2, 4, 6]);
+ }
+
+ #[test]
+ fn keep_reading_after_end() {
+ let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
+ e.write_all(b"foo bar baz").unwrap();
+ let inner = e.finish().unwrap();
+ let mut d = read::GzDecoder::new(&inner[..]);
+ let mut s = String::new();
+ d.read_to_string(&mut s).unwrap();
+ assert_eq!(s, "foo bar baz");
+ d.read_to_string(&mut s).unwrap();
+ assert_eq!(s, "foo bar baz");
+ }
+
+ #[test]
+ fn qc_reader() {
+ ::quickcheck::quickcheck(test as fn(_) -> _);
+
+ fn test(v: Vec<u8>) -> bool {
+ let r = read::GzEncoder::new(&v[..], Compression::default());
+ let mut r = read::GzDecoder::new(r);
+ let mut v2 = Vec::new();
+ r.read_to_end(&mut v2).unwrap();
+ v == v2
+ }
+ }
+
+ #[test]
+ fn flush_after_write() {
+ let mut f = write::GzEncoder::new(Vec::new(), Compression::default());
+ write!(f, "Hello world").unwrap();
+ f.flush().unwrap();
+ }
+}