diff options
Diffstat (limited to 'vendor/png/src/text_metadata.rs')
-rw-r--r-- | vendor/png/src/text_metadata.rs | 586 |
1 files changed, 586 insertions, 0 deletions
diff --git a/vendor/png/src/text_metadata.rs b/vendor/png/src/text_metadata.rs new file mode 100644 index 0000000..42f8df3 --- /dev/null +++ b/vendor/png/src/text_metadata.rs @@ -0,0 +1,586 @@ +//! # Text chunks (tEXt/zTXt/iTXt) structs and functions +//! +//! The [PNG spec](https://www.w3.org/TR/2003/REC-PNG-20031110/#11textinfo) optionally allows for +//! embedded text chunks in the file. They may appear either before or after the image data +//! chunks. There are three kinds of text chunks. +//! - `tEXt`: This has a `keyword` and `text` field, and is ISO 8859-1 encoded. +//! - `zTXt`: This is semantically the same as `tEXt`, i.e. it has the same fields and +//! encoding, but the `text` field is compressed before being written into the PNG file. +//! - `iTXt`: This chunk allows for its `text` field to be any valid UTF-8, and supports +//! compression of the text field as well. +//! +//! The `ISO 8859-1` encoding technically doesn't allow any control characters +//! to be used, but in practice these values are encountered anyway. This can +//! either be the extended `ISO-8859-1` encoding with control characters or the +//! `Windows-1252` encoding. This crate assumes the `ISO-8859-1` encoding is +//! used. +//! +//! ## Reading text chunks +//! +//! As a PNG is decoded, any text chunk encountered is appended the +//! [`Info`](`crate::common::Info`) struct, in the `uncompressed_latin1_text`, +//! `compressed_latin1_text`, and the `utf8_text` fields depending on whether the encountered +//! chunk is `tEXt`, `zTXt`, or `iTXt`. +//! +//! ``` +//! use std::fs::File; +//! use std::iter::FromIterator; +//! use std::path::PathBuf; +//! +//! // Opening a png file that has a zTXt chunk +//! let decoder = png::Decoder::new( +//! File::open(PathBuf::from_iter([ +//! "tests", +//! "text_chunk_examples", +//! "ztxt_example.png", +//! ])) +//! .unwrap(), +//! ); +//! let mut reader = decoder.read_info().unwrap(); +//! // If the text chunk is before the image data frames, `reader.info()` already contains the text. +//! for text_chunk in &reader.info().compressed_latin1_text { +//! println!("{:?}", text_chunk.keyword); // Prints the keyword +//! println!("{:#?}", text_chunk); // Prints out the text chunk. +//! // To get the uncompressed text, use the `get_text` method. +//! println!("{}", text_chunk.get_text().unwrap()); +//! } +//! ``` +//! +//! ## Writing text chunks +//! +//! There are two ways to write text chunks: the first is to add the appropriate text structs directly to the encoder header before the header is written to file. +//! To add a text chunk at any point in the stream, use the `write_text_chunk` method. +//! +//! ``` +//! # use png::text_metadata::{ITXtChunk, ZTXtChunk}; +//! # use std::env; +//! # use std::fs::File; +//! # use std::io::BufWriter; +//! # use std::iter::FromIterator; +//! # use std::path::PathBuf; +//! # let file = File::create(PathBuf::from_iter(["target", "text_chunk.png"])).unwrap(); +//! # let ref mut w = BufWriter::new(file); +//! let mut encoder = png::Encoder::new(w, 2, 1); // Width is 2 pixels and height is 1. +//! encoder.set_color(png::ColorType::Rgba); +//! encoder.set_depth(png::BitDepth::Eight); +//! // Adding text chunks to the header +//! encoder +//! .add_text_chunk( +//! "Testing tEXt".to_string(), +//! "This is a tEXt chunk that will appear before the IDAT chunks.".to_string(), +//! ) +//! .unwrap(); +//! encoder +//! .add_ztxt_chunk( +//! "Testing zTXt".to_string(), +//! "This is a zTXt chunk that is compressed in the png file.".to_string(), +//! ) +//! .unwrap(); +//! encoder +//! .add_itxt_chunk( +//! "Testing iTXt".to_string(), +//! "iTXt chunks support all of UTF8. Example: हिंदी.".to_string(), +//! ) +//! .unwrap(); +//! +//! let mut writer = encoder.write_header().unwrap(); +//! +//! let data = [255, 0, 0, 255, 0, 0, 0, 255]; // An array containing a RGBA sequence. First pixel is red and second pixel is black. +//! writer.write_image_data(&data).unwrap(); // Save +//! +//! // We can add a tEXt/zTXt/iTXt at any point before the encoder is dropped from scope. These chunks will be at the end of the png file. +//! let tail_ztxt_chunk = ZTXtChunk::new("Comment".to_string(), "A zTXt chunk after the image data.".to_string()); +//! writer.write_text_chunk(&tail_ztxt_chunk).unwrap(); +//! +//! // The fields of the text chunk are public, so they can be mutated before being written to the file. +//! let mut tail_itxt_chunk = ITXtChunk::new("Author".to_string(), "सायंतन खान".to_string()); +//! tail_itxt_chunk.compressed = true; +//! tail_itxt_chunk.language_tag = "hi".to_string(); +//! tail_itxt_chunk.translated_keyword = "लेखक".to_string(); +//! writer.write_text_chunk(&tail_itxt_chunk).unwrap(); +//! ``` + +#![warn(missing_docs)] + +use crate::{chunk, encoder, DecodingError, EncodingError}; +use flate2::write::ZlibEncoder; +use flate2::Compression; +use miniz_oxide::inflate::{decompress_to_vec_zlib, decompress_to_vec_zlib_with_limit}; +use std::{convert::TryFrom, io::Write}; + +/// Default decompression limit for compressed text chunks. +pub const DECOMPRESSION_LIMIT: usize = 2097152; // 2 MiB + +/// Text encoding errors that is wrapped by the standard EncodingError type +#[derive(Debug, Clone, Copy)] +pub(crate) enum TextEncodingError { + /// Unrepresentable characters in string + Unrepresentable, + /// Keyword longer than 79 bytes or empty + InvalidKeywordSize, + /// Error encountered while compressing text + CompressionError, +} + +/// Text decoding error that is wrapped by the standard DecodingError type +#[derive(Debug, Clone, Copy)] +pub(crate) enum TextDecodingError { + /// Unrepresentable characters in string + Unrepresentable, + /// Keyword longer than 79 bytes or empty + InvalidKeywordSize, + /// Missing null separator + MissingNullSeparator, + /// Compressed text cannot be uncompressed + InflationError, + /// Needs more space to decompress + OutOfDecompressionSpace, + /// Using an unspecified value for the compression method + InvalidCompressionMethod, + /// Using a byte that is not 0 or 255 as compression flag in iTXt chunk + InvalidCompressionFlag, + /// Missing the compression flag + MissingCompressionFlag, +} + +/// A generalized text chunk trait +pub trait EncodableTextChunk { + /// Encode text chunk as Vec<u8> to a `Write` + fn encode<W: Write>(&self, w: &mut W) -> Result<(), EncodingError>; +} + +/// Struct representing a tEXt chunk +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct TEXtChunk { + /// Keyword field of the tEXt chunk. Needs to be between 1-79 bytes when encoded as Latin-1. + pub keyword: String, + /// Text field of tEXt chunk. Can be at most 2GB. + pub text: String, +} + +fn decode_iso_8859_1(text: &[u8]) -> String { + text.iter().map(|&b| b as char).collect() +} + +fn encode_iso_8859_1(text: &str) -> Result<Vec<u8>, TextEncodingError> { + encode_iso_8859_1_iter(text).collect() +} + +fn encode_iso_8859_1_into(buf: &mut Vec<u8>, text: &str) -> Result<(), TextEncodingError> { + for b in encode_iso_8859_1_iter(text) { + buf.push(b?); + } + Ok(()) +} + +fn encode_iso_8859_1_iter(text: &str) -> impl Iterator<Item = Result<u8, TextEncodingError>> + '_ { + text.chars() + .map(|c| u8::try_from(c as u32).map_err(|_| TextEncodingError::Unrepresentable)) +} + +fn decode_ascii(text: &[u8]) -> Result<&str, TextDecodingError> { + if text.is_ascii() { + // `from_utf8` cannot panic because we're already checked that `text` is ASCII-7. + // And this is the only safe way to get ASCII-7 string from `&[u8]`. + Ok(std::str::from_utf8(text).expect("unreachable")) + } else { + Err(TextDecodingError::Unrepresentable) + } +} + +impl TEXtChunk { + /// Constructs a new TEXtChunk. + /// Not sure whether it should take &str or String. + pub fn new(keyword: impl Into<String>, text: impl Into<String>) -> Self { + Self { + keyword: keyword.into(), + text: text.into(), + } + } + + /// Decodes a slice of bytes to a String using Latin-1 decoding. + /// The decoder runs in strict mode, and any decoding errors are passed along to the caller. + pub(crate) fn decode( + keyword_slice: &[u8], + text_slice: &[u8], + ) -> Result<Self, TextDecodingError> { + if keyword_slice.is_empty() || keyword_slice.len() > 79 { + return Err(TextDecodingError::InvalidKeywordSize); + } + + Ok(Self { + keyword: decode_iso_8859_1(keyword_slice), + text: decode_iso_8859_1(text_slice), + }) + } +} + +impl EncodableTextChunk for TEXtChunk { + /// Encodes TEXtChunk to a Writer. The keyword and text are separated by a byte of zeroes. + fn encode<W: Write>(&self, w: &mut W) -> Result<(), EncodingError> { + let mut data = encode_iso_8859_1(&self.keyword)?; + + if data.is_empty() || data.len() > 79 { + return Err(TextEncodingError::InvalidKeywordSize.into()); + } + + data.push(0); + + encode_iso_8859_1_into(&mut data, &self.text)?; + + encoder::write_chunk(w, chunk::tEXt, &data) + } +} + +/// Struct representing a zTXt chunk +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct ZTXtChunk { + /// Keyword field of the tEXt chunk. Needs to be between 1-79 bytes when encoded as Latin-1. + pub keyword: String, + /// Text field of zTXt chunk. It is compressed by default, but can be uncompressed if necessary. + text: OptCompressed, +} + +/// Private enum encoding the compressed and uncompressed states of zTXt/iTXt text field. +#[derive(Clone, Debug, PartialEq, Eq)] +enum OptCompressed { + /// Compressed version of text field. Can be at most 2GB. + Compressed(Vec<u8>), + /// Uncompressed text field. + Uncompressed(String), +} + +impl ZTXtChunk { + /// Creates a new ZTXt chunk. + pub fn new(keyword: impl Into<String>, text: impl Into<String>) -> Self { + Self { + keyword: keyword.into(), + text: OptCompressed::Uncompressed(text.into()), + } + } + + pub(crate) fn decode( + keyword_slice: &[u8], + compression_method: u8, + text_slice: &[u8], + ) -> Result<Self, TextDecodingError> { + if keyword_slice.is_empty() || keyword_slice.len() > 79 { + return Err(TextDecodingError::InvalidKeywordSize); + } + + if compression_method != 0 { + return Err(TextDecodingError::InvalidCompressionMethod); + } + + Ok(Self { + keyword: decode_iso_8859_1(keyword_slice), + text: OptCompressed::Compressed(text_slice.to_vec()), + }) + } + + /// Decompresses the inner text, mutating its own state. Can only handle decompressed text up to `DECOMPRESSION_LIMIT` bytes. + pub fn decompress_text(&mut self) -> Result<(), DecodingError> { + self.decompress_text_with_limit(DECOMPRESSION_LIMIT) + } + + /// Decompresses the inner text, mutating its own state. Can only handle decompressed text up to `limit` bytes. + pub fn decompress_text_with_limit(&mut self, limit: usize) -> Result<(), DecodingError> { + match &self.text { + OptCompressed::Compressed(v) => { + let uncompressed_raw = match decompress_to_vec_zlib_with_limit(&v[..], limit) { + Ok(s) => s, + Err(err) if err.status == miniz_oxide::inflate::TINFLStatus::HasMoreOutput => { + return Err(DecodingError::from( + TextDecodingError::OutOfDecompressionSpace, + )); + } + Err(_) => { + return Err(DecodingError::from(TextDecodingError::InflationError)); + } + }; + self.text = OptCompressed::Uncompressed(decode_iso_8859_1(&uncompressed_raw)); + } + OptCompressed::Uncompressed(_) => {} + }; + Ok(()) + } + + /// Decompresses the inner text, and returns it as a `String`. + /// If decompression uses more the 2MiB, first call decompress with limit, and then this method. + pub fn get_text(&self) -> Result<String, DecodingError> { + match &self.text { + OptCompressed::Compressed(v) => { + let uncompressed_raw = decompress_to_vec_zlib(&v[..]) + .map_err(|_| DecodingError::from(TextDecodingError::InflationError))?; + Ok(decode_iso_8859_1(&uncompressed_raw)) + } + OptCompressed::Uncompressed(s) => Ok(s.clone()), + } + } + + /// Compresses the inner text, mutating its own state. + pub fn compress_text(&mut self) -> Result<(), EncodingError> { + match &self.text { + OptCompressed::Uncompressed(s) => { + let uncompressed_raw = encode_iso_8859_1(s)?; + let mut encoder = ZlibEncoder::new(Vec::new(), Compression::fast()); + encoder + .write_all(&uncompressed_raw) + .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?; + self.text = OptCompressed::Compressed( + encoder + .finish() + .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?, + ); + } + OptCompressed::Compressed(_) => {} + } + + Ok(()) + } +} + +impl EncodableTextChunk for ZTXtChunk { + fn encode<W: Write>(&self, w: &mut W) -> Result<(), EncodingError> { + let mut data = encode_iso_8859_1(&self.keyword)?; + + if data.is_empty() || data.len() > 79 { + return Err(TextEncodingError::InvalidKeywordSize.into()); + } + + // Null separator + data.push(0); + + // Compression method: the only valid value is 0, as of 2021. + data.push(0); + + match &self.text { + OptCompressed::Compressed(v) => { + data.extend_from_slice(&v[..]); + } + OptCompressed::Uncompressed(s) => { + // This code may have a bug. Check for correctness. + let uncompressed_raw = encode_iso_8859_1(s)?; + let mut encoder = ZlibEncoder::new(data, Compression::fast()); + encoder + .write_all(&uncompressed_raw) + .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?; + data = encoder + .finish() + .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?; + } + }; + + encoder::write_chunk(w, chunk::zTXt, &data) + } +} + +/// Struct encoding an iTXt chunk +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct ITXtChunk { + /// The keyword field. This needs to be between 1-79 bytes when encoded as Latin-1. + pub keyword: String, + /// Indicates whether the text will be (or was) compressed in the PNG. + pub compressed: bool, + /// A hyphen separated list of languages that the keyword is translated to. This is ASCII-7 encoded. + pub language_tag: String, + /// Translated keyword. This is UTF-8 encoded. + pub translated_keyword: String, + /// Text field of iTXt chunk. It is compressed by default, but can be uncompressed if necessary. + text: OptCompressed, +} + +impl ITXtChunk { + /// Constructs a new iTXt chunk. Leaves all but keyword and text to default values. + pub fn new(keyword: impl Into<String>, text: impl Into<String>) -> Self { + Self { + keyword: keyword.into(), + compressed: false, + language_tag: "".to_string(), + translated_keyword: "".to_string(), + text: OptCompressed::Uncompressed(text.into()), + } + } + + pub(crate) fn decode( + keyword_slice: &[u8], + compression_flag: u8, + compression_method: u8, + language_tag_slice: &[u8], + translated_keyword_slice: &[u8], + text_slice: &[u8], + ) -> Result<Self, TextDecodingError> { + if keyword_slice.is_empty() || keyword_slice.len() > 79 { + return Err(TextDecodingError::InvalidKeywordSize); + } + let keyword = decode_iso_8859_1(keyword_slice); + + let compressed = match compression_flag { + 0 => false, + 1 => true, + _ => return Err(TextDecodingError::InvalidCompressionFlag), + }; + + if compressed && compression_method != 0 { + return Err(TextDecodingError::InvalidCompressionMethod); + } + + let language_tag = decode_ascii(language_tag_slice)?.to_owned(); + + let translated_keyword = std::str::from_utf8(translated_keyword_slice) + .map_err(|_| TextDecodingError::Unrepresentable)? + .to_string(); + let text = if compressed { + OptCompressed::Compressed(text_slice.to_vec()) + } else { + OptCompressed::Uncompressed( + String::from_utf8(text_slice.to_vec()) + .map_err(|_| TextDecodingError::Unrepresentable)?, + ) + }; + + Ok(Self { + keyword, + compressed, + language_tag, + translated_keyword, + text, + }) + } + + /// Decompresses the inner text, mutating its own state. Can only handle decompressed text up to `DECOMPRESSION_LIMIT` bytes. + pub fn decompress_text(&mut self) -> Result<(), DecodingError> { + self.decompress_text_with_limit(DECOMPRESSION_LIMIT) + } + + /// Decompresses the inner text, mutating its own state. Can only handle decompressed text up to `limit` bytes. + pub fn decompress_text_with_limit(&mut self, limit: usize) -> Result<(), DecodingError> { + match &self.text { + OptCompressed::Compressed(v) => { + let uncompressed_raw = match decompress_to_vec_zlib_with_limit(&v[..], limit) { + Ok(s) => s, + Err(err) if err.status == miniz_oxide::inflate::TINFLStatus::HasMoreOutput => { + return Err(DecodingError::from( + TextDecodingError::OutOfDecompressionSpace, + )); + } + Err(_) => { + return Err(DecodingError::from(TextDecodingError::InflationError)); + } + }; + self.text = OptCompressed::Uncompressed( + String::from_utf8(uncompressed_raw) + .map_err(|_| TextDecodingError::Unrepresentable)?, + ); + } + OptCompressed::Uncompressed(_) => {} + }; + Ok(()) + } + + /// Decompresses the inner text, and returns it as a `String`. + /// If decompression takes more than 2 MiB, try `decompress_text_with_limit` followed by this method. + pub fn get_text(&self) -> Result<String, DecodingError> { + match &self.text { + OptCompressed::Compressed(v) => { + let uncompressed_raw = decompress_to_vec_zlib(&v[..]) + .map_err(|_| DecodingError::from(TextDecodingError::InflationError))?; + String::from_utf8(uncompressed_raw) + .map_err(|_| TextDecodingError::Unrepresentable.into()) + } + OptCompressed::Uncompressed(s) => Ok(s.clone()), + } + } + + /// Compresses the inner text, mutating its own state. + pub fn compress_text(&mut self) -> Result<(), EncodingError> { + match &self.text { + OptCompressed::Uncompressed(s) => { + let uncompressed_raw = s.as_bytes(); + let mut encoder = ZlibEncoder::new(Vec::new(), Compression::fast()); + encoder + .write_all(uncompressed_raw) + .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?; + self.text = OptCompressed::Compressed( + encoder + .finish() + .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?, + ); + } + OptCompressed::Compressed(_) => {} + } + + Ok(()) + } +} + +impl EncodableTextChunk for ITXtChunk { + fn encode<W: Write>(&self, w: &mut W) -> Result<(), EncodingError> { + // Keyword + let mut data = encode_iso_8859_1(&self.keyword)?; + + if data.is_empty() || data.len() > 79 { + return Err(TextEncodingError::InvalidKeywordSize.into()); + } + + // Null separator + data.push(0); + + // Compression flag + if self.compressed { + data.push(1); + } else { + data.push(0); + } + + // Compression method + data.push(0); + + // Language tag + if !self.language_tag.is_ascii() { + return Err(EncodingError::from(TextEncodingError::Unrepresentable)); + } + data.extend(self.language_tag.as_bytes()); + + // Null separator + data.push(0); + + // Translated keyword + data.extend_from_slice(self.translated_keyword.as_bytes()); + + // Null separator + data.push(0); + + // Text + if self.compressed { + match &self.text { + OptCompressed::Compressed(v) => { + data.extend_from_slice(&v[..]); + } + OptCompressed::Uncompressed(s) => { + let uncompressed_raw = s.as_bytes(); + let mut encoder = ZlibEncoder::new(data, Compression::fast()); + encoder + .write_all(uncompressed_raw) + .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?; + data = encoder + .finish() + .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?; + } + } + } else { + match &self.text { + OptCompressed::Compressed(v) => { + let uncompressed_raw = decompress_to_vec_zlib(&v[..]) + .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?; + data.extend_from_slice(&uncompressed_raw[..]); + } + OptCompressed::Uncompressed(s) => { + data.extend_from_slice(s.as_bytes()); + } + } + } + + encoder::write_chunk(w, chunk::iTXt, &data) + } +} |