From 3aebd5176834d69ef4c710dbe4acea047ce50c53 Mon Sep 17 00:00:00 2001 From: Jean-Marie Mineau Date: Mon, 15 Jan 2024 11:08:20 +0100 Subject: [PATCH] parse zip64 extra field --- apk_frauder/src/main.rs | 309 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 289 insertions(+), 20 deletions(-) diff --git a/apk_frauder/src/main.rs b/apk_frauder/src/main.rs index edcec4b..ba3ba56 100644 --- a/apk_frauder/src/main.rs +++ b/apk_frauder/src/main.rs @@ -1,5 +1,5 @@ use std::fs::File; -use std::io::{Read, Seek, SeekFrom, Write}; +use std::io::{Cursor, Read, Seek, SeekFrom, Write}; use androscalpel_serializer::{ReadSeek, Result, Serializable}; @@ -14,6 +14,142 @@ enum Encoding { //struct Signature(pub [u8; 4]); struct Signature(pub u32); +#[derive(Debug, Clone, PartialEq, Eq)] +enum ExtraField { + Zip64(Zip64ExtraField), + Generic(GenericExtraField), +} + +impl ExtraField { + fn to_generic(&self) -> Result { + match self { + Self::Zip64(field) => field.to_generic_field(), + Self::Generic(field) => Ok(field.clone()), + } + } +} + +impl Serializable for ExtraField { + fn serialize(&self, output: &mut dyn Write) -> Result<()> { + self.to_generic()?.serialize(output) + } + + fn deserialize(input: &mut dyn ReadSeek) -> Result { + Ok(Self::Generic(GenericExtraField::deserialize(input)?)) + + /* + match field.id { + Zip64ExtraField::ID => Ok(Self::Zip64(Zip64ExtraField::from_generic(&field)?)), + _ => Ok(Self::Generic(field)), + } + */ + } + + fn size(&self) -> usize { + self.to_generic().unwrap().size() + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +struct Zip64ExtraField { + original_size: Option, + compressed_size: Option, + offset_header: Option, + disk_number: Option, +} + +impl Zip64ExtraField { + const ID: u16 = 0x0001; + fn to_generic_field(&self) -> Result { + let mut data = Cursor::new(Vec::::new()); + if let Some(original_size) = self.original_size { + original_size.serialize(&mut data)?; + } + if let Some(compressed_size) = self.compressed_size { + compressed_size.serialize(&mut data)?; + } + if let Some(offset_header) = self.offset_header { + offset_header.serialize(&mut data)?; + } + if let Some(disk_number) = self.disk_number { + disk_number.serialize(&mut data)?; + } + + Ok(GenericExtraField { + id: Self::ID, + data: data.into_inner(), + }) + } + + fn from_generic( + field: &GenericExtraField, + original_size: bool, + compressed_size: bool, + offset_header: bool, + disk_number: bool, + ) -> Result { + assert_eq!(field.id, Self::ID); + let mut data = Cursor::new(&field.data); + let original_size = if original_size { + Some(u64::deserialize(&mut data)?) + } else { + None + }; + let compressed_size = if compressed_size { + Some(u64::deserialize(&mut data)?) + } else { + None + }; + let offset_header = if offset_header { + Some(u64::deserialize(&mut data)?) + } else { + None + }; + let disk_number = if disk_number { + Some(u32::deserialize(&mut data)?) + } else { + None + }; + Ok(Self { + original_size, + compressed_size, + offset_header, + disk_number, + }) + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +struct GenericExtraField { + id: u16, + data: Vec, +} + +impl Serializable for GenericExtraField { + fn serialize(&self, output: &mut dyn Write) -> Result<()> { + self.id.serialize(output)?; + (self.data.len() as u16).serialize(output)?; + for c in &self.data { + c.serialize(output)?; + } + Ok(()) + } + + fn deserialize(input: &mut dyn ReadSeek) -> Result { + let id = u16::deserialize(input)?; + let data_size = u16::deserialize(input)?; + let mut data = vec![]; + for _ in 0..data_size { + data.push(u8::deserialize(input)?); + } + Ok(Self { id, data }) + } + + fn size(&self) -> usize { + 4 + self.data.len() + } +} + #[derive(Debug, Clone, PartialEq, Eq)] struct FileHeader { // signature: Signature(0x02014b50) @@ -34,9 +170,9 @@ struct FileHeader { external_file_attributes: u32, offset_local_header: u32, file_name: Vec, - extra_field: Vec, // TODO: - // zip64 https://github.com/python/cpython/blob/cc11c76da7a099bb4535ab8ca507e55263fc35b6/Lib/zipfile/__init__.py#L500 - // https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT 4.5 Extensible data fields + extra_field: Vec, + /// Remaining bytes in the extra_fields that could not be parsed as ExtraField + malformed_extra_field: Vec, file_comment: Vec, } @@ -65,6 +201,9 @@ impl Serializable for FileHeader { for c in &self.extra_field { c.serialize(output)?; } + for c in &self.malformed_extra_field { + c.serialize(output)?; + } for c in &self.file_comment { c.serialize(output)?; } @@ -91,18 +230,10 @@ impl Serializable for FileHeader { let external_file_attributes = u32::deserialize(input)?; let offset_local_header = u32::deserialize(input)?; let mut file_name = vec![]; - let mut extra_field = vec![]; - let mut file_comment = vec![]; for _ in 0..file_name_length { file_name.push(u8::deserialize(input)?); } - for _ in 0..extra_field_length { - extra_field.push(u8::deserialize(input)?); - } - for _ in 0..file_comment_length { - file_comment.push(u8::deserialize(input)?); - } - Ok(Self { + let mut header = Self { version_made_by, version_needed_to_extract, general_purpose_flag, @@ -117,13 +248,78 @@ impl Serializable for FileHeader { external_file_attributes, offset_local_header, file_name, - extra_field, - file_comment, - }) + extra_field: vec![], + malformed_extra_field: vec![], + file_comment: vec![], + }; + //let end_of_extra_field = input.stream_position().unwrap() + extra_field_length as u64; + let extra_field_off = input.stream_position().unwrap(); + let mut extra_size_read = 0; + while extra_size_read < extra_field_length as usize { + let field_off = input.stream_position().unwrap(); + let field = ExtraField::deserialize(input); + + if let Err(err) = field { + println!( + "Failed to parsed extra field in {}: {err:?}", + header.get_name() + ); + input.seek(SeekFrom::Start(field_off)).unwrap(); + break; + } else { + let field = field.unwrap(); + extra_size_read += field.size(); + header.extra_field.push(field); + } + } + if extra_size_read > extra_field_length as usize { + println!("Failed to parsed last extra field in {}", header.get_name()); + let size = header.extra_field.pop().unwrap().size(); + input.seek(SeekFrom::Current(-(size as i64))).unwrap(); + } + let mut extra_size_read = input.stream_position().unwrap() - extra_field_off; + while extra_size_read < extra_field_length as u64 { + header.malformed_extra_field.push(u8::deserialize(input)?); + extra_size_read += 1; + } + //input.seek(SeekFrom::Start(end_of_extra_field)).unwrap(); + for _ in 0..file_comment_length { + header.file_comment.push(u8::deserialize(input)?); + } + + for field in &mut header.extra_field { + if let ExtraField::Generic(GenericExtraField { + id: Zip64ExtraField::ID, + data, + }) = field + { + let original_size = uncompressed_size == u32::MAX; + let compressed_size = compressed_size == u32::MAX; + let offset_header = offset_local_header == u32::MAX; + let disk_number = disk_number_start == u16::MAX; + let zip64_filed = Zip64ExtraField::from_generic( + &GenericExtraField { + id: Zip64ExtraField::ID, + data: data.clone(), + }, + original_size, + compressed_size, + offset_header, + disk_number, + ) + .unwrap(); + *field = ExtraField::Zip64(zip64_filed); + } + } + Ok(header) } fn size(&self) -> usize { - Self::MIN_SIZE + self.file_name.len() + self.extra_field.len() + self.file_comment.len() + Self::MIN_SIZE + + self.file_name.len() + + self.extra_field.iter().map(|f| f.size()).sum::() + + self.malformed_extra_field.len() + + self.file_comment.len() } } @@ -147,6 +343,75 @@ impl FileHeader { Encoding::CP437 => cp437::cp437_to_string(&self.file_name), } } + + #[allow(dead_code)] + fn get_uncompressed_size(&self) -> u64 { + if self.uncompressed_size != u32::MAX { + self.uncompressed_size as u64 + } else if let Some(ExtraField::Zip64(Zip64ExtraField { + original_size: Some(original_size), + .. + })) = self + .extra_field + .iter() + .find(|f| matches!(f, ExtraField::Zip64(_))) + { + *original_size + } else { + self.uncompressed_size as u64 + } + } + #[allow(dead_code)] + fn get_compressed_size(&self) -> u64 { + if self.compressed_size != u32::MAX { + self.compressed_size as u64 + } else if let Some(ExtraField::Zip64(Zip64ExtraField { + compressed_size: Some(compressed_size), + .. + })) = self + .extra_field + .iter() + .find(|f| matches!(f, ExtraField::Zip64(_))) + { + *compressed_size + } else { + self.compressed_size as u64 + } + } + #[allow(dead_code)] + fn get_offset_local_header(&self) -> u64 { + if self.offset_local_header != u32::MAX { + self.offset_local_header as u64 + } else if let Some(ExtraField::Zip64(Zip64ExtraField { + offset_header: Some(offset_header), + .. + })) = self + .extra_field + .iter() + .find(|f| matches!(f, ExtraField::Zip64(_))) + { + *offset_header + } else { + self.offset_local_header as u64 + } + } + #[allow(dead_code)] + fn get_disk_number_start(&self) -> u32 { + if self.disk_number_start != u16::MAX { + self.disk_number_start as u32 + } else if let Some(ExtraField::Zip64(Zip64ExtraField { + disk_number: Some(disk_number), + .. + })) = self + .extra_field + .iter() + .find(|f| matches!(f, ExtraField::Zip64(_))) + { + *disk_number + } else { + self.disk_number_start as u32 + } + } } #[derive(Debug, Clone, PartialEq, Eq)] @@ -361,22 +626,23 @@ impl ZipFile { let cd_size = zip_file.get_ed_size(); while size_read < cd_size { let file_header = FileHeader::deserialize(&mut zip_file.data).unwrap(); + println!("{file_header:#?}"); size_read += file_header.size() as u64; zip_file.files.push(file_header); } assert_eq!(size_read, cd_size); for f in &zip_file.files { - println!("{f:?}"); + println!("{f:#?}"); } zip_file } - /* + #[allow(dead_code)] fn is_zip64(&self) -> bool { self.zip64_end_of_central_directory.is_some() } - */ + #[allow(dead_code)] fn get_disk_num(&self) -> u32 { if let Some(zip64_end_of_central_directory) = &self.zip64_end_of_central_directory { zip64_end_of_central_directory.number_of_this_disk @@ -385,6 +651,7 @@ impl ZipFile { } } + #[allow(dead_code)] fn get_disk_ed_start(&self) -> u32 { if let Some(zip64_end_of_central_directory) = &self.zip64_end_of_central_directory { zip64_end_of_central_directory.disk_number_of_central_directory_start @@ -394,6 +661,7 @@ impl ZipFile { } } + #[allow(dead_code)] fn get_number_entries_on_disk(&self) -> u64 { if let Some(zip64_end_of_central_directory) = &self.zip64_end_of_central_directory { zip64_end_of_central_directory.number_entry_in_central_directory_on_this_disk @@ -403,6 +671,7 @@ impl ZipFile { } } + #[allow(dead_code)] fn get_number_entries(&self) -> u64 { if let Some(zip64_end_of_central_directory) = &self.zip64_end_of_central_directory { zip64_end_of_central_directory.number_entry_in_central_directory