diff --git a/apk_frauder/src/lib.rs b/apk_frauder/src/lib.rs new file mode 100644 index 0000000..a3843b7 --- /dev/null +++ b/apk_frauder/src/lib.rs @@ -0,0 +1,718 @@ +use std::io::{Cursor, Read, Seek, SeekFrom, Write}; + +use androscalpel_serializer::{ReadSeek, Result, Serializable}; + +mod cp437; + +pub enum Encoding { + CP437, + UTF8, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serializable, Default)] +//struct Signature(pub [u8; 4]); +struct Signature(pub u32); + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ExtraField { + Zip64(Zip64ExtraField), + Generic(GenericExtraField), +} + +impl ExtraField { + fn to_generic(&self) -> Result { + match self { + Self::Zip64(field) => field.to_generic_field(), + Self::Generic(field) => Ok(field.clone()), + } + } +} + +impl Serializable for ExtraField { + fn serialize(&self, output: &mut dyn Write) -> Result<()> { + self.to_generic()?.serialize(output) + } + + fn deserialize(input: &mut dyn ReadSeek) -> Result { + Ok(Self::Generic(GenericExtraField::deserialize(input)?)) + + /* + match field.id { + Zip64ExtraField::ID => Ok(Self::Zip64(Zip64ExtraField::from_generic(&field)?)), + _ => Ok(Self::Generic(field)), + } + */ + } + + fn size(&self) -> usize { + self.to_generic().unwrap().size() + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Zip64ExtraField { + original_size: Option, + compressed_size: Option, + offset_header: Option, + disk_number: Option, +} + +impl Zip64ExtraField { + const ID: u16 = 0x0001; + fn to_generic_field(&self) -> Result { + let mut data = Cursor::new(Vec::::new()); + if let Some(original_size) = self.original_size { + original_size.serialize(&mut data)?; + } + if let Some(compressed_size) = self.compressed_size { + compressed_size.serialize(&mut data)?; + } + if let Some(offset_header) = self.offset_header { + offset_header.serialize(&mut data)?; + } + if let Some(disk_number) = self.disk_number { + disk_number.serialize(&mut data)?; + } + + Ok(GenericExtraField { + id: Self::ID, + data: data.into_inner(), + }) + } + + fn from_generic( + field: &GenericExtraField, + original_size: bool, + compressed_size: bool, + offset_header: bool, + disk_number: bool, + ) -> Result { + assert_eq!(field.id, Self::ID); + let mut data = Cursor::new(&field.data); + let original_size = if original_size { + Some(u64::deserialize(&mut data)?) + } else { + None + }; + let compressed_size = if compressed_size { + Some(u64::deserialize(&mut data)?) + } else { + None + }; + let offset_header = if offset_header { + Some(u64::deserialize(&mut data)?) + } else { + None + }; + let disk_number = if disk_number { + Some(u32::deserialize(&mut data)?) + } else { + None + }; + Ok(Self { + original_size, + compressed_size, + offset_header, + disk_number, + }) + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct GenericExtraField { + pub id: u16, + pub data: Vec, +} + +impl Serializable for GenericExtraField { + fn serialize(&self, output: &mut dyn Write) -> Result<()> { + self.id.serialize(output)?; + (self.data.len() as u16).serialize(output)?; + for c in &self.data { + c.serialize(output)?; + } + Ok(()) + } + + fn deserialize(input: &mut dyn ReadSeek) -> Result { + let id = u16::deserialize(input)?; + let data_size = u16::deserialize(input)?; + let mut data = vec![]; + for _ in 0..data_size { + data.push(u8::deserialize(input)?); + } + Ok(Self { id, data }) + } + + fn size(&self) -> usize { + 4 + self.data.len() + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct FileHeader { + // signature: Signature(0x02014b50) + pub version_made_by: u16, + pub version_needed_to_extract: u16, + pub general_purpose_flag: u16, + pub compression_method: u16, + pub last_mod_file_time: u16, + pub last_mod_file_data: u16, + pub crc_32: u32, + pub compressed_size: u32, + pub uncompressed_size: u32, + // file_name_length: u16, + // extra_field_length: u16, + // file_comment_length: u16, + pub disk_number_start: u16, + pub internal_file_attributes: u16, + pub external_file_attributes: u32, + pub offset_local_header: u32, + pub file_name: Vec, + pub extra_field: Vec, + /// Remaining bytes in the extra_fields that could not be parsed as ExtraField + pub malformed_extra_field: Vec, + pub file_comment: Vec, +} + +impl Serializable for FileHeader { + fn serialize(&self, output: &mut dyn Write) -> Result<()> { + Self::SIGNATURE.serialize(output)?; + self.version_made_by.serialize(output)?; + self.version_needed_to_extract.serialize(output)?; + self.general_purpose_flag.serialize(output)?; + self.compression_method.serialize(output)?; + self.last_mod_file_time.serialize(output)?; + self.last_mod_file_data.serialize(output)?; + self.crc_32.serialize(output)?; + self.compressed_size.serialize(output)?; + self.uncompressed_size.serialize(output)?; + (self.file_name.len() as u16).serialize(output)?; + (self.extra_field.len() as u16).serialize(output)?; + (self.file_comment.len() as u16).serialize(output)?; + self.disk_number_start.serialize(output)?; + self.internal_file_attributes.serialize(output)?; + self.external_file_attributes.serialize(output)?; + self.offset_local_header.serialize(output)?; + for c in &self.file_name { + c.serialize(output)?; + } + for c in &self.extra_field { + c.serialize(output)?; + } + for c in &self.malformed_extra_field { + c.serialize(output)?; + } + for c in &self.file_comment { + c.serialize(output)?; + } + Ok(()) + } + + fn deserialize(input: &mut dyn ReadSeek) -> Result { + let signature = Signature::deserialize(input)?; + assert_eq!(signature, Self::SIGNATURE); // TODO + let version_made_by = u16::deserialize(input)?; + let version_needed_to_extract = u16::deserialize(input)?; + let general_purpose_flag = u16::deserialize(input)?; + let compression_method = u16::deserialize(input)?; + let last_mod_file_time = u16::deserialize(input)?; + let last_mod_file_data = u16::deserialize(input)?; + let crc_32 = u32::deserialize(input)?; + let compressed_size = u32::deserialize(input)?; + let uncompressed_size = u32::deserialize(input)?; + let file_name_length = u16::deserialize(input)?; + let extra_field_length = u16::deserialize(input)?; + let file_comment_length = u16::deserialize(input)?; + let disk_number_start = u16::deserialize(input)?; + let internal_file_attributes = u16::deserialize(input)?; + let external_file_attributes = u32::deserialize(input)?; + let offset_local_header = u32::deserialize(input)?; + let mut file_name = vec![]; + for _ in 0..file_name_length { + file_name.push(u8::deserialize(input)?); + } + let mut header = Self { + version_made_by, + version_needed_to_extract, + general_purpose_flag, + compression_method, + last_mod_file_time, + last_mod_file_data, + crc_32, + compressed_size, + uncompressed_size, + disk_number_start, + internal_file_attributes, + external_file_attributes, + offset_local_header, + file_name, + extra_field: vec![], + malformed_extra_field: vec![], + file_comment: vec![], + }; + //let end_of_extra_field = input.stream_position().unwrap() + extra_field_length as u64; + let extra_field_off = input.stream_position().unwrap(); + let mut extra_size_read = 0; + while extra_size_read < extra_field_length as usize { + let field_off = input.stream_position().unwrap(); + let field = ExtraField::deserialize(input); + + if let Err(err) = field { + println!( + "Failed to parsed extra field in {}: {err:?}", + header.get_name() + ); + input.seek(SeekFrom::Start(field_off)).unwrap(); + break; + } else { + let field = field.unwrap(); + extra_size_read += field.size(); + header.extra_field.push(field); + } + } + if extra_size_read > extra_field_length as usize { + println!("Failed to parsed last extra field in {}", header.get_name()); + let size = header.extra_field.pop().unwrap().size(); + input.seek(SeekFrom::Current(-(size as i64))).unwrap(); + } + let mut extra_size_read = input.stream_position().unwrap() - extra_field_off; + while extra_size_read < extra_field_length as u64 { + header.malformed_extra_field.push(u8::deserialize(input)?); + extra_size_read += 1; + } + //input.seek(SeekFrom::Start(end_of_extra_field)).unwrap(); + for _ in 0..file_comment_length { + header.file_comment.push(u8::deserialize(input)?); + } + + for field in &mut header.extra_field { + if let ExtraField::Generic(GenericExtraField { + id: Zip64ExtraField::ID, + data, + }) = field + { + let original_size = uncompressed_size == u32::MAX; + let compressed_size = compressed_size == u32::MAX; + let offset_header = offset_local_header == u32::MAX; + let disk_number = disk_number_start == u16::MAX; + let zip64_filed = Zip64ExtraField::from_generic( + &GenericExtraField { + id: Zip64ExtraField::ID, + data: data.clone(), + }, + original_size, + compressed_size, + offset_header, + disk_number, + ) + .unwrap(); + *field = ExtraField::Zip64(zip64_filed); + } + } + Ok(header) + } + + fn size(&self) -> usize { + Self::MIN_SIZE + + self.file_name.len() + + self.extra_field.iter().map(|f| f.size()).sum::() + + self.malformed_extra_field.len() + + self.file_comment.len() + } +} + +impl FileHeader { + const SIGNATURE: Signature = Signature(0x02014b50); + const MIN_SIZE: usize = 4 + 6 * 2 + 4 * 3 + 5 * 2 + 4 * 2; + + const MASK_UTF8_FILENAME: u16 = 1 << 11; + + pub fn get_name_encoding(&self) -> Encoding { + if self.general_purpose_flag & Self::MASK_UTF8_FILENAME != 0 { + Encoding::UTF8 + } else { + Encoding::CP437 + } + } + + pub fn get_name(&self) -> String { + match self.get_name_encoding() { + Encoding::UTF8 => std::str::from_utf8(&self.file_name).unwrap().into(), + Encoding::CP437 => cp437::cp437_to_string(&self.file_name), + } + } + + pub fn get_uncompressed_size(&self) -> u64 { + if self.uncompressed_size != u32::MAX { + self.uncompressed_size as u64 + } else if let Some(ExtraField::Zip64(Zip64ExtraField { + original_size: Some(original_size), + .. + })) = self + .extra_field + .iter() + .find(|f| matches!(f, ExtraField::Zip64(_))) + { + *original_size + } else { + self.uncompressed_size as u64 + } + } + + pub fn get_compressed_size(&self) -> u64 { + if self.compressed_size != u32::MAX { + self.compressed_size as u64 + } else if let Some(ExtraField::Zip64(Zip64ExtraField { + compressed_size: Some(compressed_size), + .. + })) = self + .extra_field + .iter() + .find(|f| matches!(f, ExtraField::Zip64(_))) + { + *compressed_size + } else { + self.compressed_size as u64 + } + } + + pub fn get_offset_local_header(&self) -> u64 { + if self.offset_local_header != u32::MAX { + self.offset_local_header as u64 + } else if let Some(ExtraField::Zip64(Zip64ExtraField { + offset_header: Some(offset_header), + .. + })) = self + .extra_field + .iter() + .find(|f| matches!(f, ExtraField::Zip64(_))) + { + *offset_header + } else { + self.offset_local_header as u64 + } + } + + pub fn get_disk_number_start(&self) -> u32 { + if self.disk_number_start != u16::MAX { + self.disk_number_start as u32 + } else if let Some(ExtraField::Zip64(Zip64ExtraField { + disk_number: Some(disk_number), + .. + })) = self + .extra_field + .iter() + .find(|f| matches!(f, ExtraField::Zip64(_))) + { + *disk_number + } else { + self.disk_number_start as u32 + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Zip64EndCentralDirectory { + // signature: Signature + // size_zip64_edf_record: u64 + pub version_made_by: u16, + pub version_needed_to_extract: u16, + pub number_of_this_disk: u32, + pub disk_number_of_central_directory_start: u32, + pub number_entry_in_central_directory_on_this_disk: u64, + pub number_entry_in_central_directory: u64, + pub size_of_central_directory: u64, + pub offset_central_directory: u64, + pub extensible_data: Vec, +} + +impl Zip64EndCentralDirectory { + const SIGNATURE: Signature = Signature(0x06064b50); + const MIN_SIZE: usize = 4 + 8 + 2 + 2 + 4 + 4 + 8 + 8 + 8 + 8; // + 0; +} + +impl Serializable for Zip64EndCentralDirectory { + fn serialize(&self, output: &mut dyn Write) -> Result<()> { + Self::SIGNATURE.serialize(output)?; + ((self.size() - 12) as u64).serialize(output)?; + self.version_made_by.serialize(output)?; + self.version_needed_to_extract.serialize(output)?; + self.number_of_this_disk.serialize(output)?; + self.disk_number_of_central_directory_start + .serialize(output)?; + self.number_entry_in_central_directory_on_this_disk + .serialize(output)?; + self.number_entry_in_central_directory.serialize(output)?; + self.size_of_central_directory.serialize(output)?; + self.offset_central_directory.serialize(output)?; + for d in &self.extensible_data { + d.serialize(output)?; + } + Ok(()) + } + + fn deserialize(input: &mut dyn ReadSeek) -> Result { + let signature = Signature::deserialize(input)?; + assert_eq!(signature, Self::SIGNATURE); // TODO + let size_zip64_edf_record = u64::deserialize(input)?; + let version_made_by = u16::deserialize(input)?; + let version_needed_to_extract = u16::deserialize(input)?; + let number_of_this_disk = u32::deserialize(input)?; + let disk_number_of_central_directory_start = u32::deserialize(input)?; + let number_entry_in_central_directory_on_this_disk = u64::deserialize(input)?; + let number_entry_in_central_directory = u64::deserialize(input)?; + let size_of_central_directory = u64::deserialize(input)?; + let offset_central_directory = u64::deserialize(input)?; + let mut extensible_data = vec![]; + for _ in 0..(size_zip64_edf_record as usize + 12 - Self::MIN_SIZE) { + extensible_data.push(u8::deserialize(input)?); + } + Ok(Self { + version_made_by, + version_needed_to_extract, + number_of_this_disk, + disk_number_of_central_directory_start, + number_entry_in_central_directory_on_this_disk, + number_entry_in_central_directory, + size_of_central_directory, + offset_central_directory, + extensible_data, + }) + } + + fn size(&self) -> usize { + Self::MIN_SIZE + self.extensible_data.len() + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Serializable)] +pub struct Zip64EndCentralDirectoryLocator { + #[prefix(Self::SIGNATURE.0.to_le_bytes())] + pub disk_number_of_zip64_end_central_directory_start: u32, + pub offset_zip64_end_of_central_directory_record: u64, + pub total_number_of_disks: u32, +} + +impl Zip64EndCentralDirectoryLocator { + const SIGNATURE: Signature = Signature(0x07064b50); + const SIZE: usize = 4 + 4 + 8 + 4; +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct EndCentralDirectory { + //signature: Self::SIGNATURE, + pub disk_number: u16, + pub disk_number_of_central_directory_start: u16, + pub number_of_entries_in_central_directory_on_disk: u16, + pub number_of_entries_in_central_directory: u16, + pub size_central_directory: u32, + pub offset_central_directory: u32, + //file_comment_length: u16, + pub comment: Vec, +} + +impl EndCentralDirectory { + const SIGNATURE: Signature = Signature(0x06054b50); //Signature([0x06, 0x05, 0x4b, 0x50]); + const MIN_SIZE: usize = 4 + 4 * 2 + 2 * 4 + 2; // + 0; +} + +impl Serializable for EndCentralDirectory { + fn serialize(&self, output: &mut dyn Write) -> Result<()> { + Self::SIGNATURE.serialize(output)?; + self.disk_number.serialize(output)?; + self.disk_number_of_central_directory_start + .serialize(output)?; + self.number_of_entries_in_central_directory_on_disk + .serialize(output)?; + self.number_of_entries_in_central_directory + .serialize(output)?; + self.size_central_directory.serialize(output)?; + self.offset_central_directory.serialize(output)?; + (self.comment.len() as u16).serialize(output)?; + for c in &self.comment { + c.serialize(output)?; + } + Ok(()) + } + + fn deserialize(input: &mut dyn ReadSeek) -> Result { + let signature = Signature::deserialize(input)?; + let disk_number = u16::deserialize(input)?; + let disk_number_of_central_directory_start = u16::deserialize(input)?; + let number_of_entries_in_central_directory_on_disk = u16::deserialize(input)?; + let number_of_entries_in_central_directory = u16::deserialize(input)?; + let size_central_directory = u32::deserialize(input)?; + let offset_central_directory = u32::deserialize(input)?; + let file_comment_length = u16::deserialize(input)?; + let mut comment = vec![]; + for _ in 0..file_comment_length { + comment.push(u8::deserialize(input)?); + } + + assert_eq!(signature, Self::SIGNATURE); // TODO + Ok(Self { + disk_number, + disk_number_of_central_directory_start, + number_of_entries_in_central_directory_on_disk, + number_of_entries_in_central_directory, + size_central_directory, + offset_central_directory, + comment, + }) + } + + fn size(&self) -> usize { + Self::MIN_SIZE + self.comment.len() + } +} + +pub struct ZipFile { + pub end_of_central_directory: EndCentralDirectory, + pub zip64_end_of_central_directory: Option, + pub files: Vec, + pub data: T, +} + +impl ZipFile { + pub fn new(mut reader: T) -> Self { + let end_of_central_directory_off = + Self::get_end_of_central_directory_offset(&mut reader).unwrap(); + reader + .seek(SeekFrom::Start(end_of_central_directory_off)) + .unwrap(); + let end_of_central_directory = EndCentralDirectory::deserialize(&mut reader).unwrap(); + println!("{end_of_central_directory:#?}"); + reader + .seek(SeekFrom::Start( + end_of_central_directory_off - Zip64EndCentralDirectoryLocator::SIZE as u64, + )) + .unwrap(); + let zip64_ecd_locator = Zip64EndCentralDirectoryLocator::deserialize(&mut reader).ok(); + let zip64_end_of_central_directory = if let Some(zip64_ecd_locator) = zip64_ecd_locator { + assert_eq!( + zip64_ecd_locator.disk_number_of_zip64_end_central_directory_start, + 0 + ); + assert!(zip64_ecd_locator.total_number_of_disks <= 1); + println!("Zip64 ECD Locator {:#?}", zip64_ecd_locator); + let zip64_edc_record_off = + zip64_ecd_locator.offset_zip64_end_of_central_directory_record; + reader.seek(SeekFrom::Start(zip64_edc_record_off)).unwrap(); + let zip64_edc_reccord = Zip64EndCentralDirectory::deserialize(&mut reader).ok(); + println!("{zip64_edc_reccord:#?}"); + zip64_edc_reccord + } else { + None + }; + + // At this point python's ziplib recompute the location of the central directory from the + // location of the end of central directory in case the zip was concanated after a file. + // We probably don't need that for now. + let mut zip_file = Self { + end_of_central_directory, + zip64_end_of_central_directory, + data: reader, + files: vec![], + }; + zip_file + .data + .seek(SeekFrom::Start(zip_file.get_ed_offset())) + .unwrap(); + + let mut size_read = 0; + let cd_size = zip_file.get_ed_size(); + while size_read < cd_size { + let file_header = FileHeader::deserialize(&mut zip_file.data).unwrap(); + println!("{file_header:#?}"); + size_read += file_header.size() as u64; + zip_file.files.push(file_header); + } + assert_eq!(size_read, cd_size); + for f in &zip_file.files { + println!("{f:#?}"); + } + zip_file + } + + pub fn is_zip64(&self) -> bool { + self.zip64_end_of_central_directory.is_some() + } + + pub fn get_disk_num(&self) -> u32 { + if let Some(zip64_end_of_central_directory) = &self.zip64_end_of_central_directory { + zip64_end_of_central_directory.number_of_this_disk + } else { + self.end_of_central_directory.disk_number as u32 + } + } + + pub fn get_disk_ed_start(&self) -> u32 { + if let Some(zip64_end_of_central_directory) = &self.zip64_end_of_central_directory { + zip64_end_of_central_directory.disk_number_of_central_directory_start + } else { + self.end_of_central_directory + .disk_number_of_central_directory_start as u32 + } + } + + pub fn get_number_entries_on_disk(&self) -> u64 { + if let Some(zip64_end_of_central_directory) = &self.zip64_end_of_central_directory { + zip64_end_of_central_directory.number_entry_in_central_directory_on_this_disk + } else { + self.end_of_central_directory + .number_of_entries_in_central_directory_on_disk as u64 + } + } + + pub fn get_number_entries(&self) -> u64 { + if let Some(zip64_end_of_central_directory) = &self.zip64_end_of_central_directory { + zip64_end_of_central_directory.number_entry_in_central_directory + } else { + self.end_of_central_directory + .number_of_entries_in_central_directory as u64 + } + } + + pub fn get_ed_size(&self) -> u64 { + if let Some(zip64_end_of_central_directory) = &self.zip64_end_of_central_directory { + zip64_end_of_central_directory.size_of_central_directory + } else { + self.end_of_central_directory.size_central_directory as u64 + } + } + + pub fn get_ed_offset(&self) -> u64 { + if let Some(zip64_end_of_central_directory) = &self.zip64_end_of_central_directory { + zip64_end_of_central_directory.offset_central_directory + } else { + self.end_of_central_directory.offset_central_directory as u64 + } + } + + pub fn get_end_of_central_directory_offset(reader: &mut T) -> Option { + let file_size = reader.seek(SeekFrom::End(0)).unwrap(); + let mut sig = Signature::default(); + let mut comment_size = 0; + while sig != EndCentralDirectory::SIGNATURE { + reader + .seek(SeekFrom::End( + -(EndCentralDirectory::MIN_SIZE as i64) - comment_size, + )) + .unwrap(); + sig = Signature::deserialize(reader).unwrap(); + comment_size += 1; + if comment_size > 65536 + || comment_size as usize + EndCentralDirectory::MIN_SIZE > file_size as usize + { + return None; + } + } + comment_size -= 1; + Some(file_size - comment_size as u64 - EndCentralDirectory::MIN_SIZE as u64) + } + + pub fn get_file_names(&self) -> Vec { + self.files.iter().map(|f| f.get_name()).collect() + } +} diff --git a/apk_frauder/src/main.rs b/apk_frauder/src/main.rs index ba3ba56..66fd308 100644 --- a/apk_frauder/src/main.rs +++ b/apk_frauder/src/main.rs @@ -1,732 +1,13 @@ +use apk_frauder::ZipFile; use std::fs::File; -use std::io::{Cursor, Read, Seek, SeekFrom, Write}; - -use androscalpel_serializer::{ReadSeek, Result, Serializable}; - -mod cp437; - -enum Encoding { - CP437, - UTF8, -} - -#[derive(Debug, Clone, PartialEq, Eq, Serializable, Default)] -//struct Signature(pub [u8; 4]); -struct Signature(pub u32); - -#[derive(Debug, Clone, PartialEq, Eq)] -enum ExtraField { - Zip64(Zip64ExtraField), - Generic(GenericExtraField), -} - -impl ExtraField { - fn to_generic(&self) -> Result { - match self { - Self::Zip64(field) => field.to_generic_field(), - Self::Generic(field) => Ok(field.clone()), - } - } -} - -impl Serializable for ExtraField { - fn serialize(&self, output: &mut dyn Write) -> Result<()> { - self.to_generic()?.serialize(output) - } - - fn deserialize(input: &mut dyn ReadSeek) -> Result { - Ok(Self::Generic(GenericExtraField::deserialize(input)?)) - - /* - match field.id { - Zip64ExtraField::ID => Ok(Self::Zip64(Zip64ExtraField::from_generic(&field)?)), - _ => Ok(Self::Generic(field)), - } - */ - } - - fn size(&self) -> usize { - self.to_generic().unwrap().size() - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -struct Zip64ExtraField { - original_size: Option, - compressed_size: Option, - offset_header: Option, - disk_number: Option, -} - -impl Zip64ExtraField { - const ID: u16 = 0x0001; - fn to_generic_field(&self) -> Result { - let mut data = Cursor::new(Vec::::new()); - if let Some(original_size) = self.original_size { - original_size.serialize(&mut data)?; - } - if let Some(compressed_size) = self.compressed_size { - compressed_size.serialize(&mut data)?; - } - if let Some(offset_header) = self.offset_header { - offset_header.serialize(&mut data)?; - } - if let Some(disk_number) = self.disk_number { - disk_number.serialize(&mut data)?; - } - - Ok(GenericExtraField { - id: Self::ID, - data: data.into_inner(), - }) - } - - fn from_generic( - field: &GenericExtraField, - original_size: bool, - compressed_size: bool, - offset_header: bool, - disk_number: bool, - ) -> Result { - assert_eq!(field.id, Self::ID); - let mut data = Cursor::new(&field.data); - let original_size = if original_size { - Some(u64::deserialize(&mut data)?) - } else { - None - }; - let compressed_size = if compressed_size { - Some(u64::deserialize(&mut data)?) - } else { - None - }; - let offset_header = if offset_header { - Some(u64::deserialize(&mut data)?) - } else { - None - }; - let disk_number = if disk_number { - Some(u32::deserialize(&mut data)?) - } else { - None - }; - Ok(Self { - original_size, - compressed_size, - offset_header, - disk_number, - }) - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -struct GenericExtraField { - id: u16, - data: Vec, -} - -impl Serializable for GenericExtraField { - fn serialize(&self, output: &mut dyn Write) -> Result<()> { - self.id.serialize(output)?; - (self.data.len() as u16).serialize(output)?; - for c in &self.data { - c.serialize(output)?; - } - Ok(()) - } - - fn deserialize(input: &mut dyn ReadSeek) -> Result { - let id = u16::deserialize(input)?; - let data_size = u16::deserialize(input)?; - let mut data = vec![]; - for _ in 0..data_size { - data.push(u8::deserialize(input)?); - } - Ok(Self { id, data }) - } - - fn size(&self) -> usize { - 4 + self.data.len() - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -struct FileHeader { - // signature: Signature(0x02014b50) - version_made_by: u16, - version_needed_to_extract: u16, - general_purpose_flag: u16, - compression_method: u16, - last_mod_file_time: u16, - last_mod_file_data: u16, - crc_32: u32, - compressed_size: u32, - uncompressed_size: u32, - // file_name_length: u16, - // extra_field_length: u16, - // file_comment_length: u16, - disk_number_start: u16, - internal_file_attributes: u16, - external_file_attributes: u32, - offset_local_header: u32, - file_name: Vec, - extra_field: Vec, - /// Remaining bytes in the extra_fields that could not be parsed as ExtraField - malformed_extra_field: Vec, - file_comment: Vec, -} - -impl Serializable for FileHeader { - fn serialize(&self, output: &mut dyn Write) -> Result<()> { - Self::SIGNATURE.serialize(output)?; - self.version_made_by.serialize(output)?; - self.version_needed_to_extract.serialize(output)?; - self.general_purpose_flag.serialize(output)?; - self.compression_method.serialize(output)?; - self.last_mod_file_time.serialize(output)?; - self.last_mod_file_data.serialize(output)?; - self.crc_32.serialize(output)?; - self.compressed_size.serialize(output)?; - self.uncompressed_size.serialize(output)?; - (self.file_name.len() as u16).serialize(output)?; - (self.extra_field.len() as u16).serialize(output)?; - (self.file_comment.len() as u16).serialize(output)?; - self.disk_number_start.serialize(output)?; - self.internal_file_attributes.serialize(output)?; - self.external_file_attributes.serialize(output)?; - self.offset_local_header.serialize(output)?; - for c in &self.file_name { - c.serialize(output)?; - } - for c in &self.extra_field { - c.serialize(output)?; - } - for c in &self.malformed_extra_field { - c.serialize(output)?; - } - for c in &self.file_comment { - c.serialize(output)?; - } - Ok(()) - } - - fn deserialize(input: &mut dyn ReadSeek) -> Result { - let signature = Signature::deserialize(input)?; - assert_eq!(signature, Self::SIGNATURE); // TODO - let version_made_by = u16::deserialize(input)?; - let version_needed_to_extract = u16::deserialize(input)?; - let general_purpose_flag = u16::deserialize(input)?; - let compression_method = u16::deserialize(input)?; - let last_mod_file_time = u16::deserialize(input)?; - let last_mod_file_data = u16::deserialize(input)?; - let crc_32 = u32::deserialize(input)?; - let compressed_size = u32::deserialize(input)?; - let uncompressed_size = u32::deserialize(input)?; - let file_name_length = u16::deserialize(input)?; - let extra_field_length = u16::deserialize(input)?; - let file_comment_length = u16::deserialize(input)?; - let disk_number_start = u16::deserialize(input)?; - let internal_file_attributes = u16::deserialize(input)?; - let external_file_attributes = u32::deserialize(input)?; - let offset_local_header = u32::deserialize(input)?; - let mut file_name = vec![]; - for _ in 0..file_name_length { - file_name.push(u8::deserialize(input)?); - } - let mut header = Self { - version_made_by, - version_needed_to_extract, - general_purpose_flag, - compression_method, - last_mod_file_time, - last_mod_file_data, - crc_32, - compressed_size, - uncompressed_size, - disk_number_start, - internal_file_attributes, - external_file_attributes, - offset_local_header, - file_name, - extra_field: vec![], - malformed_extra_field: vec![], - file_comment: vec![], - }; - //let end_of_extra_field = input.stream_position().unwrap() + extra_field_length as u64; - let extra_field_off = input.stream_position().unwrap(); - let mut extra_size_read = 0; - while extra_size_read < extra_field_length as usize { - let field_off = input.stream_position().unwrap(); - let field = ExtraField::deserialize(input); - - if let Err(err) = field { - println!( - "Failed to parsed extra field in {}: {err:?}", - header.get_name() - ); - input.seek(SeekFrom::Start(field_off)).unwrap(); - break; - } else { - let field = field.unwrap(); - extra_size_read += field.size(); - header.extra_field.push(field); - } - } - if extra_size_read > extra_field_length as usize { - println!("Failed to parsed last extra field in {}", header.get_name()); - let size = header.extra_field.pop().unwrap().size(); - input.seek(SeekFrom::Current(-(size as i64))).unwrap(); - } - let mut extra_size_read = input.stream_position().unwrap() - extra_field_off; - while extra_size_read < extra_field_length as u64 { - header.malformed_extra_field.push(u8::deserialize(input)?); - extra_size_read += 1; - } - //input.seek(SeekFrom::Start(end_of_extra_field)).unwrap(); - for _ in 0..file_comment_length { - header.file_comment.push(u8::deserialize(input)?); - } - - for field in &mut header.extra_field { - if let ExtraField::Generic(GenericExtraField { - id: Zip64ExtraField::ID, - data, - }) = field - { - let original_size = uncompressed_size == u32::MAX; - let compressed_size = compressed_size == u32::MAX; - let offset_header = offset_local_header == u32::MAX; - let disk_number = disk_number_start == u16::MAX; - let zip64_filed = Zip64ExtraField::from_generic( - &GenericExtraField { - id: Zip64ExtraField::ID, - data: data.clone(), - }, - original_size, - compressed_size, - offset_header, - disk_number, - ) - .unwrap(); - *field = ExtraField::Zip64(zip64_filed); - } - } - Ok(header) - } - - fn size(&self) -> usize { - Self::MIN_SIZE - + self.file_name.len() - + self.extra_field.iter().map(|f| f.size()).sum::() - + self.malformed_extra_field.len() - + self.file_comment.len() - } -} - -impl FileHeader { - const SIGNATURE: Signature = Signature(0x02014b50); - const MIN_SIZE: usize = 4 + 6 * 2 + 4 * 3 + 5 * 2 + 4 * 2; - - const MASK_UTF8_FILENAME: u16 = 1 << 11; - - fn get_name_encoding(&self) -> Encoding { - if self.general_purpose_flag & Self::MASK_UTF8_FILENAME != 0 { - Encoding::UTF8 - } else { - Encoding::CP437 - } - } - - fn get_name(&self) -> String { - match self.get_name_encoding() { - Encoding::UTF8 => std::str::from_utf8(&self.file_name).unwrap().into(), - Encoding::CP437 => cp437::cp437_to_string(&self.file_name), - } - } - - #[allow(dead_code)] - fn get_uncompressed_size(&self) -> u64 { - if self.uncompressed_size != u32::MAX { - self.uncompressed_size as u64 - } else if let Some(ExtraField::Zip64(Zip64ExtraField { - original_size: Some(original_size), - .. - })) = self - .extra_field - .iter() - .find(|f| matches!(f, ExtraField::Zip64(_))) - { - *original_size - } else { - self.uncompressed_size as u64 - } - } - #[allow(dead_code)] - fn get_compressed_size(&self) -> u64 { - if self.compressed_size != u32::MAX { - self.compressed_size as u64 - } else if let Some(ExtraField::Zip64(Zip64ExtraField { - compressed_size: Some(compressed_size), - .. - })) = self - .extra_field - .iter() - .find(|f| matches!(f, ExtraField::Zip64(_))) - { - *compressed_size - } else { - self.compressed_size as u64 - } - } - #[allow(dead_code)] - fn get_offset_local_header(&self) -> u64 { - if self.offset_local_header != u32::MAX { - self.offset_local_header as u64 - } else if let Some(ExtraField::Zip64(Zip64ExtraField { - offset_header: Some(offset_header), - .. - })) = self - .extra_field - .iter() - .find(|f| matches!(f, ExtraField::Zip64(_))) - { - *offset_header - } else { - self.offset_local_header as u64 - } - } - #[allow(dead_code)] - fn get_disk_number_start(&self) -> u32 { - if self.disk_number_start != u16::MAX { - self.disk_number_start as u32 - } else if let Some(ExtraField::Zip64(Zip64ExtraField { - disk_number: Some(disk_number), - .. - })) = self - .extra_field - .iter() - .find(|f| matches!(f, ExtraField::Zip64(_))) - { - *disk_number - } else { - self.disk_number_start as u32 - } - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -struct Zip64EndCentralDirectory { - // signature: Signature - // size_zip64_edf_record: u64 - version_made_by: u16, - version_needed_to_extract: u16, - number_of_this_disk: u32, - disk_number_of_central_directory_start: u32, - number_entry_in_central_directory_on_this_disk: u64, - number_entry_in_central_directory: u64, - size_of_central_directory: u64, - offset_central_directory: u64, - extensible_data: Vec, -} - -impl Zip64EndCentralDirectory { - const SIGNATURE: Signature = Signature(0x06064b50); - const MIN_SIZE: usize = 4 + 8 + 2 + 2 + 4 + 4 + 8 + 8 + 8 + 8; // + 0; -} - -impl Serializable for Zip64EndCentralDirectory { - fn serialize(&self, output: &mut dyn Write) -> Result<()> { - Self::SIGNATURE.serialize(output)?; - ((self.size() - 12) as u64).serialize(output)?; - self.version_made_by.serialize(output)?; - self.version_needed_to_extract.serialize(output)?; - self.number_of_this_disk.serialize(output)?; - self.disk_number_of_central_directory_start - .serialize(output)?; - self.number_entry_in_central_directory_on_this_disk - .serialize(output)?; - self.number_entry_in_central_directory.serialize(output)?; - self.size_of_central_directory.serialize(output)?; - self.offset_central_directory.serialize(output)?; - for d in &self.extensible_data { - d.serialize(output)?; - } - Ok(()) - } - - fn deserialize(input: &mut dyn ReadSeek) -> Result { - let signature = Signature::deserialize(input)?; - assert_eq!(signature, Self::SIGNATURE); // TODO - let size_zip64_edf_record = u64::deserialize(input)?; - let version_made_by = u16::deserialize(input)?; - let version_needed_to_extract = u16::deserialize(input)?; - let number_of_this_disk = u32::deserialize(input)?; - let disk_number_of_central_directory_start = u32::deserialize(input)?; - let number_entry_in_central_directory_on_this_disk = u64::deserialize(input)?; - let number_entry_in_central_directory = u64::deserialize(input)?; - let size_of_central_directory = u64::deserialize(input)?; - let offset_central_directory = u64::deserialize(input)?; - let mut extensible_data = vec![]; - for _ in 0..(size_zip64_edf_record as usize + 12 - Self::MIN_SIZE) { - extensible_data.push(u8::deserialize(input)?); - } - Ok(Self { - version_made_by, - version_needed_to_extract, - number_of_this_disk, - disk_number_of_central_directory_start, - number_entry_in_central_directory_on_this_disk, - number_entry_in_central_directory, - size_of_central_directory, - offset_central_directory, - extensible_data, - }) - } - - fn size(&self) -> usize { - Self::MIN_SIZE + self.extensible_data.len() - } -} - -#[derive(Debug, Clone, PartialEq, Eq, Serializable)] -struct Zip64EndCentralDirectoryLocator { - #[prefix(Self::SIGNATURE.0.to_le_bytes())] - disk_number_of_zip64_end_central_directory_start: u32, - offset_zip64_end_of_central_directory_record: u64, - total_number_of_disks: u32, -} - -impl Zip64EndCentralDirectoryLocator { - const SIGNATURE: Signature = Signature(0x07064b50); - const SIZE: usize = 4 + 4 + 8 + 4; -} - -#[derive(Debug, Clone, PartialEq, Eq)] -struct EndCentralDirectory { - //signature: Self::SIGNATURE, - disk_number: u16, - disk_number_of_central_directory_start: u16, - number_of_entries_in_central_directory_on_disk: u16, - number_of_entries_in_central_directory: u16, - size_central_directory: u32, - offset_central_directory: u32, - //file_comment_length: u16, - comment: Vec, -} - -impl EndCentralDirectory { - const SIGNATURE: Signature = Signature(0x06054b50); //Signature([0x06, 0x05, 0x4b, 0x50]); - const MIN_SIZE: usize = 4 + 4 * 2 + 2 * 4 + 2; // + 0; -} - -impl Serializable for EndCentralDirectory { - fn serialize(&self, output: &mut dyn Write) -> Result<()> { - Self::SIGNATURE.serialize(output)?; - self.disk_number.serialize(output)?; - self.disk_number_of_central_directory_start - .serialize(output)?; - self.number_of_entries_in_central_directory_on_disk - .serialize(output)?; - self.number_of_entries_in_central_directory - .serialize(output)?; - self.size_central_directory.serialize(output)?; - self.offset_central_directory.serialize(output)?; - (self.comment.len() as u16).serialize(output)?; - for c in &self.comment { - c.serialize(output)?; - } - Ok(()) - } - - fn deserialize(input: &mut dyn ReadSeek) -> Result { - let signature = Signature::deserialize(input)?; - let disk_number = u16::deserialize(input)?; - let disk_number_of_central_directory_start = u16::deserialize(input)?; - let number_of_entries_in_central_directory_on_disk = u16::deserialize(input)?; - let number_of_entries_in_central_directory = u16::deserialize(input)?; - let size_central_directory = u32::deserialize(input)?; - let offset_central_directory = u32::deserialize(input)?; - let file_comment_length = u16::deserialize(input)?; - let mut comment = vec![]; - for _ in 0..file_comment_length { - comment.push(u8::deserialize(input)?); - } - - assert_eq!(signature, Self::SIGNATURE); // TODO - Ok(Self { - disk_number, - disk_number_of_central_directory_start, - number_of_entries_in_central_directory_on_disk, - number_of_entries_in_central_directory, - size_central_directory, - offset_central_directory, - comment, - }) - } - - fn size(&self) -> usize { - Self::MIN_SIZE + self.comment.len() - } -} - -struct ZipFile { - end_of_central_directory: EndCentralDirectory, - zip64_end_of_central_directory: Option, - files: Vec, - data: T, -} - -impl ZipFile { - fn new(mut reader: T) -> Self { - let end_of_central_directory_off = - Self::get_end_of_central_directory_offset(&mut reader).unwrap(); - reader - .seek(SeekFrom::Start(end_of_central_directory_off)) - .unwrap(); - let end_of_central_directory = EndCentralDirectory::deserialize(&mut reader).unwrap(); - println!("{end_of_central_directory:#?}"); - reader - .seek(SeekFrom::Start( - end_of_central_directory_off - Zip64EndCentralDirectoryLocator::SIZE as u64, - )) - .unwrap(); - let zip64_ecd_locator = Zip64EndCentralDirectoryLocator::deserialize(&mut reader).ok(); - let zip64_end_of_central_directory = if let Some(zip64_ecd_locator) = zip64_ecd_locator { - assert_eq!( - zip64_ecd_locator.disk_number_of_zip64_end_central_directory_start, - 0 - ); - assert!(zip64_ecd_locator.total_number_of_disks <= 1); - println!("Zip64 ECD Locator {:#?}", zip64_ecd_locator); - let zip64_edc_record_off = - zip64_ecd_locator.offset_zip64_end_of_central_directory_record; - reader.seek(SeekFrom::Start(zip64_edc_record_off)).unwrap(); - let zip64_edc_reccord = Zip64EndCentralDirectory::deserialize(&mut reader).ok(); - println!("{zip64_edc_reccord:#?}"); - zip64_edc_reccord - } else { - None - }; - - // At this point python's ziplib recompute the location of the central directory from the - // location of the end of central directory in case the zip was concanated after a file. - // We probably don't need that for now. - let mut zip_file = Self { - end_of_central_directory, - zip64_end_of_central_directory, - data: reader, - files: vec![], - }; - zip_file - .data - .seek(SeekFrom::Start(zip_file.get_ed_offset())) - .unwrap(); - - let mut size_read = 0; - let cd_size = zip_file.get_ed_size(); - while size_read < cd_size { - let file_header = FileHeader::deserialize(&mut zip_file.data).unwrap(); - println!("{file_header:#?}"); - size_read += file_header.size() as u64; - zip_file.files.push(file_header); - } - assert_eq!(size_read, cd_size); - for f in &zip_file.files { - println!("{f:#?}"); - } - zip_file - } - - #[allow(dead_code)] - fn is_zip64(&self) -> bool { - self.zip64_end_of_central_directory.is_some() - } - - #[allow(dead_code)] - fn get_disk_num(&self) -> u32 { - if let Some(zip64_end_of_central_directory) = &self.zip64_end_of_central_directory { - zip64_end_of_central_directory.number_of_this_disk - } else { - self.end_of_central_directory.disk_number as u32 - } - } - - #[allow(dead_code)] - fn get_disk_ed_start(&self) -> u32 { - if let Some(zip64_end_of_central_directory) = &self.zip64_end_of_central_directory { - zip64_end_of_central_directory.disk_number_of_central_directory_start - } else { - self.end_of_central_directory - .disk_number_of_central_directory_start as u32 - } - } - - #[allow(dead_code)] - fn get_number_entries_on_disk(&self) -> u64 { - if let Some(zip64_end_of_central_directory) = &self.zip64_end_of_central_directory { - zip64_end_of_central_directory.number_entry_in_central_directory_on_this_disk - } else { - self.end_of_central_directory - .number_of_entries_in_central_directory_on_disk as u64 - } - } - - #[allow(dead_code)] - fn get_number_entries(&self) -> u64 { - if let Some(zip64_end_of_central_directory) = &self.zip64_end_of_central_directory { - zip64_end_of_central_directory.number_entry_in_central_directory - } else { - self.end_of_central_directory - .number_of_entries_in_central_directory as u64 - } - } - - fn get_ed_size(&self) -> u64 { - if let Some(zip64_end_of_central_directory) = &self.zip64_end_of_central_directory { - zip64_end_of_central_directory.size_of_central_directory - } else { - self.end_of_central_directory.size_central_directory as u64 - } - } - - fn get_ed_offset(&self) -> u64 { - if let Some(zip64_end_of_central_directory) = &self.zip64_end_of_central_directory { - zip64_end_of_central_directory.offset_central_directory - } else { - self.end_of_central_directory.offset_central_directory as u64 - } - } - - fn get_end_of_central_directory_offset(reader: &mut T) -> Option { - let file_size = reader.seek(SeekFrom::End(0)).unwrap(); - let mut sig = Signature::default(); - let mut comment_size = 0; - while sig != EndCentralDirectory::SIGNATURE { - reader - .seek(SeekFrom::End( - -(EndCentralDirectory::MIN_SIZE as i64) - comment_size, - )) - .unwrap(); - sig = Signature::deserialize(reader).unwrap(); - comment_size += 1; - if comment_size > 65536 - || comment_size as usize + EndCentralDirectory::MIN_SIZE > file_size as usize - { - return None; - } - } - comment_size -= 1; - Some(file_size - comment_size as u64 - EndCentralDirectory::MIN_SIZE as u64) - } - - fn get_file_names(&self) -> Vec { - self.files.iter().map(|f| f.get_name()).collect() - } -} fn main() { //let file = File::open("app-release.apk").expect("failed to open file"); let file = File::open("tst_64.zip").expect("failed to open file"); let zip_file = ZipFile::new(file); println!("{}", zip_file.get_file_names().join("\n")); + println!( + "uncompressed size: {}", + zip_file.files[0].get_uncompressed_size() + ); }