//! # Text chunks (tEXt/zTXt/iTXt) structs and functions //! //! The [PNG spec](https://www.w3.org/TR/2003/REC-PNG-20031110/#11textinfo) optionally allows for //! embedded text chunks in the file. They may appear either before or after the image data //! chunks. There are three kinds of text chunks. //! - `tEXt`: This has a `keyword` and `text` field, and is ISO 8859-1 encoded. //! - `zTXt`: This is semantically the same as `tEXt`, i.e. it has the same fields and //! encoding, but the `text` field is compressed before being written into the PNG file. //! - `iTXt`: This chunk allows for its `text` field to be any valid UTF-8, and supports //! compression of the text field as well. //! //! The `ISO 8859-1` encoding technically doesn't allow any control characters //! to be used, but in practice these values are encountered anyway. This can //! either be the extended `ISO-8859-1` encoding with control characters or the //! `Windows-1252` encoding. This crate assumes the `ISO-8859-1` encoding is //! used. //! //! ## Reading text chunks //! //! As a PNG is decoded, any text chunk encountered is appended the //! [`Info`](`crate::common::Info`) struct, in the `uncompressed_latin1_text`, //! `compressed_latin1_text`, and the `utf8_text` fields depending on whether the encountered //! chunk is `tEXt`, `zTXt`, or `iTXt`. //! //! ``` //! use std::fs::File; //! use std::iter::FromIterator; //! use std::path::PathBuf; //! //! // Opening a png file that has a zTXt chunk //! let decoder = png::Decoder::new( //! File::open(PathBuf::from_iter([ //! "tests", //! "text_chunk_examples", //! "ztxt_example.png", //! ])) //! .unwrap(), //! ); //! let mut reader = decoder.read_info().unwrap(); //! // If the text chunk is before the image data frames, `reader.info()` already contains the text. //! for text_chunk in &reader.info().compressed_latin1_text { //! println!("{:?}", text_chunk.keyword); // Prints the keyword //! println!("{:#?}", text_chunk); // Prints out the text chunk. //! // To get the uncompressed text, use the `get_text` method. //! println!("{}", text_chunk.get_text().unwrap()); //! } //! ``` //! //! ## Writing text chunks //! //! There are two ways to write text chunks: the first is to add the appropriate text structs directly to the encoder header before the header is written to file. //! To add a text chunk at any point in the stream, use the `write_text_chunk` method. //! //! ``` //! # use png::text_metadata::{ITXtChunk, ZTXtChunk}; //! # use std::env; //! # use std::fs::File; //! # use std::io::BufWriter; //! # use std::iter::FromIterator; //! # use std::path::PathBuf; //! # let file = File::create(PathBuf::from_iter(["target", "text_chunk.png"])).unwrap(); //! # let ref mut w = BufWriter::new(file); //! let mut encoder = png::Encoder::new(w, 2, 1); // Width is 2 pixels and height is 1. //! encoder.set_color(png::ColorType::Rgba); //! encoder.set_depth(png::BitDepth::Eight); //! // Adding text chunks to the header //! encoder //! .add_text_chunk( //! "Testing tEXt".to_string(), //! "This is a tEXt chunk that will appear before the IDAT chunks.".to_string(), //! ) //! .unwrap(); //! encoder //! .add_ztxt_chunk( //! "Testing zTXt".to_string(), //! "This is a zTXt chunk that is compressed in the png file.".to_string(), //! ) //! .unwrap(); //! encoder //! .add_itxt_chunk( //! "Testing iTXt".to_string(), //! "iTXt chunks support all of UTF8. Example: हिंदी.".to_string(), //! ) //! .unwrap(); //! //! let mut writer = encoder.write_header().unwrap(); //! //! let data = [255, 0, 0, 255, 0, 0, 0, 255]; // An array containing a RGBA sequence. First pixel is red and second pixel is black. //! writer.write_image_data(&data).unwrap(); // Save //! //! // We can add a tEXt/zTXt/iTXt at any point before the encoder is dropped from scope. These chunks will be at the end of the png file. //! let tail_ztxt_chunk = ZTXtChunk::new("Comment".to_string(), "A zTXt chunk after the image data.".to_string()); //! writer.write_text_chunk(&tail_ztxt_chunk).unwrap(); //! //! // The fields of the text chunk are public, so they can be mutated before being written to the file. //! let mut tail_itxt_chunk = ITXtChunk::new("Author".to_string(), "सायंतन खान".to_string()); //! tail_itxt_chunk.compressed = true; //! tail_itxt_chunk.language_tag = "hi".to_string(); //! tail_itxt_chunk.translated_keyword = "लेखक".to_string(); //! writer.write_text_chunk(&tail_itxt_chunk).unwrap(); //! ``` #![warn(missing_docs)] use crate::{chunk, encoder, DecodingError, EncodingError}; use flate2::write::ZlibEncoder; use flate2::Compression; use miniz_oxide::inflate::{decompress_to_vec_zlib, decompress_to_vec_zlib_with_limit}; use std::{convert::TryFrom, io::Write}; /// Default decompression limit for compressed text chunks. pub const DECOMPRESSION_LIMIT: usize = 2097152; // 2 MiB /// Text encoding errors that is wrapped by the standard EncodingError type #[derive(Debug, Clone, Copy)] pub(crate) enum TextEncodingError { /// Unrepresentable characters in string Unrepresentable, /// Keyword longer than 79 bytes or empty InvalidKeywordSize, /// Error encountered while compressing text CompressionError, } /// Text decoding error that is wrapped by the standard DecodingError type #[derive(Debug, Clone, Copy)] pub(crate) enum TextDecodingError { /// Unrepresentable characters in string Unrepresentable, /// Keyword longer than 79 bytes or empty InvalidKeywordSize, /// Missing null separator MissingNullSeparator, /// Compressed text cannot be uncompressed InflationError, /// Needs more space to decompress OutOfDecompressionSpace, /// Using an unspecified value for the compression method InvalidCompressionMethod, /// Using a byte that is not 0 or 255 as compression flag in iTXt chunk InvalidCompressionFlag, /// Missing the compression flag MissingCompressionFlag, } /// A generalized text chunk trait pub trait EncodableTextChunk { /// Encode text chunk as Vec to a `Write` fn encode(&self, w: &mut W) -> Result<(), EncodingError>; } /// Struct representing a tEXt chunk #[derive(Clone, Debug, PartialEq, Eq)] pub struct TEXtChunk { /// Keyword field of the tEXt chunk. Needs to be between 1-79 bytes when encoded as Latin-1. pub keyword: String, /// Text field of tEXt chunk. Can be at most 2GB. pub text: String, } fn decode_iso_8859_1(text: &[u8]) -> String { text.iter().map(|&b| b as char).collect() } fn encode_iso_8859_1(text: &str) -> Result, TextEncodingError> { encode_iso_8859_1_iter(text).collect() } fn encode_iso_8859_1_into(buf: &mut Vec, text: &str) -> Result<(), TextEncodingError> { for b in encode_iso_8859_1_iter(text) { buf.push(b?); } Ok(()) } fn encode_iso_8859_1_iter(text: &str) -> impl Iterator> + '_ { text.chars() .map(|c| u8::try_from(c as u32).map_err(|_| TextEncodingError::Unrepresentable)) } fn decode_ascii(text: &[u8]) -> Result<&str, TextDecodingError> { if text.is_ascii() { // `from_utf8` cannot panic because we're already checked that `text` is ASCII-7. // And this is the only safe way to get ASCII-7 string from `&[u8]`. Ok(std::str::from_utf8(text).expect("unreachable")) } else { Err(TextDecodingError::Unrepresentable) } } impl TEXtChunk { /// Constructs a new TEXtChunk. /// Not sure whether it should take &str or String. pub fn new(keyword: impl Into, text: impl Into) -> Self { Self { keyword: keyword.into(), text: text.into(), } } /// Decodes a slice of bytes to a String using Latin-1 decoding. /// The decoder runs in strict mode, and any decoding errors are passed along to the caller. pub(crate) fn decode( keyword_slice: &[u8], text_slice: &[u8], ) -> Result { if keyword_slice.is_empty() || keyword_slice.len() > 79 { return Err(TextDecodingError::InvalidKeywordSize); } Ok(Self { keyword: decode_iso_8859_1(keyword_slice), text: decode_iso_8859_1(text_slice), }) } } impl EncodableTextChunk for TEXtChunk { /// Encodes TEXtChunk to a Writer. The keyword and text are separated by a byte of zeroes. fn encode(&self, w: &mut W) -> Result<(), EncodingError> { let mut data = encode_iso_8859_1(&self.keyword)?; if data.is_empty() || data.len() > 79 { return Err(TextEncodingError::InvalidKeywordSize.into()); } data.push(0); encode_iso_8859_1_into(&mut data, &self.text)?; encoder::write_chunk(w, chunk::tEXt, &data) } } /// Struct representing a zTXt chunk #[derive(Clone, Debug, PartialEq, Eq)] pub struct ZTXtChunk { /// Keyword field of the tEXt chunk. Needs to be between 1-79 bytes when encoded as Latin-1. pub keyword: String, /// Text field of zTXt chunk. It is compressed by default, but can be uncompressed if necessary. text: OptCompressed, } /// Private enum encoding the compressed and uncompressed states of zTXt/iTXt text field. #[derive(Clone, Debug, PartialEq, Eq)] enum OptCompressed { /// Compressed version of text field. Can be at most 2GB. Compressed(Vec), /// Uncompressed text field. Uncompressed(String), } impl ZTXtChunk { /// Creates a new ZTXt chunk. pub fn new(keyword: impl Into, text: impl Into) -> Self { Self { keyword: keyword.into(), text: OptCompressed::Uncompressed(text.into()), } } pub(crate) fn decode( keyword_slice: &[u8], compression_method: u8, text_slice: &[u8], ) -> Result { if keyword_slice.is_empty() || keyword_slice.len() > 79 { return Err(TextDecodingError::InvalidKeywordSize); } if compression_method != 0 { return Err(TextDecodingError::InvalidCompressionMethod); } Ok(Self { keyword: decode_iso_8859_1(keyword_slice), text: OptCompressed::Compressed(text_slice.to_vec()), }) } /// Decompresses the inner text, mutating its own state. Can only handle decompressed text up to `DECOMPRESSION_LIMIT` bytes. pub fn decompress_text(&mut self) -> Result<(), DecodingError> { self.decompress_text_with_limit(DECOMPRESSION_LIMIT) } /// Decompresses the inner text, mutating its own state. Can only handle decompressed text up to `limit` bytes. pub fn decompress_text_with_limit(&mut self, limit: usize) -> Result<(), DecodingError> { match &self.text { OptCompressed::Compressed(v) => { let uncompressed_raw = match decompress_to_vec_zlib_with_limit(&v[..], limit) { Ok(s) => s, Err(err) if err.status == miniz_oxide::inflate::TINFLStatus::HasMoreOutput => { return Err(DecodingError::from( TextDecodingError::OutOfDecompressionSpace, )); } Err(_) => { return Err(DecodingError::from(TextDecodingError::InflationError)); } }; self.text = OptCompressed::Uncompressed(decode_iso_8859_1(&uncompressed_raw)); } OptCompressed::Uncompressed(_) => {} }; Ok(()) } /// Decompresses the inner text, and returns it as a `String`. /// If decompression uses more the 2MiB, first call decompress with limit, and then this method. pub fn get_text(&self) -> Result { match &self.text { OptCompressed::Compressed(v) => { let uncompressed_raw = decompress_to_vec_zlib(&v[..]) .map_err(|_| DecodingError::from(TextDecodingError::InflationError))?; Ok(decode_iso_8859_1(&uncompressed_raw)) } OptCompressed::Uncompressed(s) => Ok(s.clone()), } } /// Compresses the inner text, mutating its own state. pub fn compress_text(&mut self) -> Result<(), EncodingError> { match &self.text { OptCompressed::Uncompressed(s) => { let uncompressed_raw = encode_iso_8859_1(s)?; let mut encoder = ZlibEncoder::new(Vec::new(), Compression::fast()); encoder .write_all(&uncompressed_raw) .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?; self.text = OptCompressed::Compressed( encoder .finish() .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?, ); } OptCompressed::Compressed(_) => {} } Ok(()) } } impl EncodableTextChunk for ZTXtChunk { fn encode(&self, w: &mut W) -> Result<(), EncodingError> { let mut data = encode_iso_8859_1(&self.keyword)?; if data.is_empty() || data.len() > 79 { return Err(TextEncodingError::InvalidKeywordSize.into()); } // Null separator data.push(0); // Compression method: the only valid value is 0, as of 2021. data.push(0); match &self.text { OptCompressed::Compressed(v) => { data.extend_from_slice(&v[..]); } OptCompressed::Uncompressed(s) => { // This code may have a bug. Check for correctness. let uncompressed_raw = encode_iso_8859_1(s)?; let mut encoder = ZlibEncoder::new(data, Compression::fast()); encoder .write_all(&uncompressed_raw) .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?; data = encoder .finish() .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?; } }; encoder::write_chunk(w, chunk::zTXt, &data) } } /// Struct encoding an iTXt chunk #[derive(Clone, Debug, PartialEq, Eq)] pub struct ITXtChunk { /// The keyword field. This needs to be between 1-79 bytes when encoded as Latin-1. pub keyword: String, /// Indicates whether the text will be (or was) compressed in the PNG. pub compressed: bool, /// A hyphen separated list of languages that the keyword is translated to. This is ASCII-7 encoded. pub language_tag: String, /// Translated keyword. This is UTF-8 encoded. pub translated_keyword: String, /// Text field of iTXt chunk. It is compressed by default, but can be uncompressed if necessary. text: OptCompressed, } impl ITXtChunk { /// Constructs a new iTXt chunk. Leaves all but keyword and text to default values. pub fn new(keyword: impl Into, text: impl Into) -> Self { Self { keyword: keyword.into(), compressed: false, language_tag: "".to_string(), translated_keyword: "".to_string(), text: OptCompressed::Uncompressed(text.into()), } } pub(crate) fn decode( keyword_slice: &[u8], compression_flag: u8, compression_method: u8, language_tag_slice: &[u8], translated_keyword_slice: &[u8], text_slice: &[u8], ) -> Result { if keyword_slice.is_empty() || keyword_slice.len() > 79 { return Err(TextDecodingError::InvalidKeywordSize); } let keyword = decode_iso_8859_1(keyword_slice); let compressed = match compression_flag { 0 => false, 1 => true, _ => return Err(TextDecodingError::InvalidCompressionFlag), }; if compressed && compression_method != 0 { return Err(TextDecodingError::InvalidCompressionMethod); } let language_tag = decode_ascii(language_tag_slice)?.to_owned(); let translated_keyword = std::str::from_utf8(translated_keyword_slice) .map_err(|_| TextDecodingError::Unrepresentable)? .to_string(); let text = if compressed { OptCompressed::Compressed(text_slice.to_vec()) } else { OptCompressed::Uncompressed( String::from_utf8(text_slice.to_vec()) .map_err(|_| TextDecodingError::Unrepresentable)?, ) }; Ok(Self { keyword, compressed, language_tag, translated_keyword, text, }) } /// Decompresses the inner text, mutating its own state. Can only handle decompressed text up to `DECOMPRESSION_LIMIT` bytes. pub fn decompress_text(&mut self) -> Result<(), DecodingError> { self.decompress_text_with_limit(DECOMPRESSION_LIMIT) } /// Decompresses the inner text, mutating its own state. Can only handle decompressed text up to `limit` bytes. pub fn decompress_text_with_limit(&mut self, limit: usize) -> Result<(), DecodingError> { match &self.text { OptCompressed::Compressed(v) => { let uncompressed_raw = match decompress_to_vec_zlib_with_limit(&v[..], limit) { Ok(s) => s, Err(err) if err.status == miniz_oxide::inflate::TINFLStatus::HasMoreOutput => { return Err(DecodingError::from( TextDecodingError::OutOfDecompressionSpace, )); } Err(_) => { return Err(DecodingError::from(TextDecodingError::InflationError)); } }; self.text = OptCompressed::Uncompressed( String::from_utf8(uncompressed_raw) .map_err(|_| TextDecodingError::Unrepresentable)?, ); } OptCompressed::Uncompressed(_) => {} }; Ok(()) } /// Decompresses the inner text, and returns it as a `String`. /// If decompression takes more than 2 MiB, try `decompress_text_with_limit` followed by this method. pub fn get_text(&self) -> Result { match &self.text { OptCompressed::Compressed(v) => { let uncompressed_raw = decompress_to_vec_zlib(&v[..]) .map_err(|_| DecodingError::from(TextDecodingError::InflationError))?; String::from_utf8(uncompressed_raw) .map_err(|_| TextDecodingError::Unrepresentable.into()) } OptCompressed::Uncompressed(s) => Ok(s.clone()), } } /// Compresses the inner text, mutating its own state. pub fn compress_text(&mut self) -> Result<(), EncodingError> { match &self.text { OptCompressed::Uncompressed(s) => { let uncompressed_raw = s.as_bytes(); let mut encoder = ZlibEncoder::new(Vec::new(), Compression::fast()); encoder .write_all(uncompressed_raw) .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?; self.text = OptCompressed::Compressed( encoder .finish() .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?, ); } OptCompressed::Compressed(_) => {} } Ok(()) } } impl EncodableTextChunk for ITXtChunk { fn encode(&self, w: &mut W) -> Result<(), EncodingError> { // Keyword let mut data = encode_iso_8859_1(&self.keyword)?; if data.is_empty() || data.len() > 79 { return Err(TextEncodingError::InvalidKeywordSize.into()); } // Null separator data.push(0); // Compression flag if self.compressed { data.push(1); } else { data.push(0); } // Compression method data.push(0); // Language tag if !self.language_tag.is_ascii() { return Err(EncodingError::from(TextEncodingError::Unrepresentable)); } data.extend(self.language_tag.as_bytes()); // Null separator data.push(0); // Translated keyword data.extend_from_slice(self.translated_keyword.as_bytes()); // Null separator data.push(0); // Text if self.compressed { match &self.text { OptCompressed::Compressed(v) => { data.extend_from_slice(&v[..]); } OptCompressed::Uncompressed(s) => { let uncompressed_raw = s.as_bytes(); let mut encoder = ZlibEncoder::new(data, Compression::fast()); encoder .write_all(uncompressed_raw) .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?; data = encoder .finish() .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?; } } } else { match &self.text { OptCompressed::Compressed(v) => { let uncompressed_raw = decompress_to_vec_zlib(&v[..]) .map_err(|_| EncodingError::from(TextEncodingError::CompressionError))?; data.extend_from_slice(&uncompressed_raw[..]); } OptCompressed::Uncompressed(s) => { data.extend_from_slice(s.as_bytes()); } } } encoder::write_chunk(w, chunk::iTXt, &data) } }