use crate::{ FileHeader, FileInfo, LocalFileHeader, Signature, apk_signing_block::ApkSigningBlock, apk_signing_block::Magic, compression::CompressionMethod, data_descriptor::{DataDescriptor, DataDescriptor32, DataDescriptor64}, end_of_central_directory::EndCentralDirectory, end_of_central_directory::Zip64EndCentralDirectory, end_of_central_directory::Zip64EndCentralDirectoryLocator, general_purpose_flags, }; use androscalpel_serializer::Serializable; use anyhow::{Context, Result, bail}; use flate2::read::DeflateDecoder; use log::{info, warn}; use std::collections::HashMap; use std::io::{Read, Seek, SeekFrom}; #[derive(Debug, PartialEq, Eq)] pub struct ZipFileReader { pub end_of_central_directory: EndCentralDirectory, pub zip64_end_of_central_directory: Option, pub files: Vec, pub apk_sign_block: Option, pub data: T, } impl ZipFileReader { pub fn new(mut reader: T) -> Result { let end_of_central_directory_off = Self::get_end_of_central_directory_offset(&mut reader) .context("end of centrall directory not found, probably not a zip")?; reader .seek(SeekFrom::Start(end_of_central_directory_off)) .context("Failed to seek to end of central directory")?; let end_of_central_directory = EndCentralDirectory::deserialize(&mut reader) .context("Failed to deserialize end of central directory")?; let zip64_end_of_central_directory = if reader .seek(SeekFrom::Start( end_of_central_directory_off - Zip64EndCentralDirectoryLocator::SIZE as u64, )) .is_ok() { let zip64_ecd_locator = Zip64EndCentralDirectoryLocator::deserialize(&mut reader).ok(); if let Some(zip64_ecd_locator) = zip64_ecd_locator { assert_eq!( zip64_ecd_locator.disk_number_of_zip64_end_central_directory_start, 0 ); assert!(zip64_ecd_locator.total_number_of_disks <= 1); let zip64_edc_record_off = zip64_ecd_locator.offset_zip64_end_of_central_directory_record; reader .seek(SeekFrom::Start(zip64_edc_record_off)) .context("Failed to seek to end of zip64 central directory")?; Zip64EndCentralDirectory::deserialize(&mut reader).ok() } else { None } } else { None }; //println!("{:#?}", end_of_central_directory); // At this point python's ziplib recompute the location of the central directory from the // location of the end of central directory in case the zip was concanated after a file. // We probably don't need that for now. let mut zip_file = Self { end_of_central_directory, zip64_end_of_central_directory, data: reader, files: vec![], apk_sign_block: None, }; zip_file .data .seek(SeekFrom::Start(zip_file.get_cd_offset())) .context("Failed to seek to central directory")?; let mut size_read = 0; let cd_size = zip_file.get_cd_size(); while size_read < cd_size { let header = FileHeader::deserialize(&mut zip_file.data) .context("Failed to deserialize file header")?; //println!("{:#?}", header); size_read += header.size() as u64; let pos_in_dir = zip_file .data .stream_position() .context("Failed to get stream position")?; if header.general_purpose_flags & general_purpose_flags::MASK_ENCRYPTED_CENTRAL_DIR != 0 { bail!("Central directory encryption not supported"); } zip_file .data .seek(SeekFrom::Start(header.get_offset_local_header())) .context("Failled to seek to local header")?; let local_header = LocalFileHeader::deserialize(&mut zip_file.data) .context("Failed to deserialize local file header")?; let data_descriptor = if (local_header.general_purpose_flags & general_purpose_flags::MASK_USE_DATA_DESCRIPTOR != 0) || (header.general_purpose_flags & general_purpose_flags::MASK_USE_DATA_DESCRIPTOR != 0) { warn!("Data Descriptor support is experimental"); zip_file .data .seek(SeekFrom::Current(header.compressed_size as i64)) .context("failed to seek to after the file data")?; if zip_file.zip64_end_of_central_directory.is_some() { Some(DataDescriptor::Zip64( DataDescriptor64::deserialize(&mut zip_file.data) .context("Failed to deserialize data descriptor 64")?, )) } else { Some(DataDescriptor::Zip32( DataDescriptor32::deserialize(&mut zip_file.data) .context("Failed to deserialize data descriptor")?, )) } } else { None }; zip_file .data .seek(SeekFrom::Start(pos_in_dir)) .context("Failed to seek to position in directory")?; zip_file.files.push(FileInfo { local_header, header, data_descriptor, }); } assert_eq!(size_read, cd_size); if zip_file.get_cd_offset() > 16 { zip_file .data .seek(SeekFrom::Start(zip_file.get_cd_offset() - 16)) .context("Failed to seek to central directory")?; let magic = Magic::deserialize(&mut zip_file.data).context("Failed to deserialize Magic")?; if magic == ApkSigningBlock::MAGIC { zip_file .data .seek(SeekFrom::Start(zip_file.get_cd_offset() - 16 - 8)) .context("Failed to seek to central directory")?; let block_size = u64::deserialize(&mut zip_file.data) .context("Failed to deserialize block size")?; zip_file .data .seek(SeekFrom::Start(zip_file.get_cd_offset() - block_size - 8)) .context("Failed to seek to central directory")?; zip_file.apk_sign_block = ApkSigningBlock::deserialize(&mut zip_file.data).ok(); } } Ok(zip_file) } pub fn is_zip64(&self) -> bool { self.zip64_end_of_central_directory.is_some() } pub fn get_disk_num(&self) -> u32 { if let Some(zip64_end_of_central_directory) = &self.zip64_end_of_central_directory { zip64_end_of_central_directory.number_of_this_disk } else { self.end_of_central_directory.disk_number as u32 } } pub fn get_disk_ed_start(&self) -> u32 { if let Some(zip64_end_of_central_directory) = &self.zip64_end_of_central_directory { zip64_end_of_central_directory.disk_number_of_central_directory_start } else { self.end_of_central_directory .disk_number_of_central_directory_start as u32 } } pub fn get_number_entries_on_disk(&self) -> u64 { if let Some(zip64_end_of_central_directory) = &self.zip64_end_of_central_directory { zip64_end_of_central_directory.number_entry_in_central_directory_on_this_disk } else { self.end_of_central_directory .number_of_entries_in_central_directory_on_disk as u64 } } pub fn get_number_entries(&self) -> u64 { if let Some(zip64_end_of_central_directory) = &self.zip64_end_of_central_directory { zip64_end_of_central_directory.number_entry_in_central_directory } else { self.end_of_central_directory .number_of_entries_in_central_directory as u64 } } pub fn get_cd_size(&self) -> u64 { if let Some(zip64_end_of_central_directory) = &self.zip64_end_of_central_directory { zip64_end_of_central_directory.size_of_central_directory } else { self.end_of_central_directory.size_central_directory as u64 } } pub fn get_cd_offset(&self) -> u64 { if let Some(zip64_end_of_central_directory) = &self.zip64_end_of_central_directory { zip64_end_of_central_directory.offset_central_directory } else { self.end_of_central_directory.offset_central_directory as u64 } } pub fn get_end_of_central_directory_offset(reader: &mut T) -> Option { let file_size = reader.seek(SeekFrom::End(0)).ok()?; let mut sig = Signature::default(); let mut comment_size = 0; while sig != EndCentralDirectory::SIGNATURE { reader .seek(SeekFrom::End( -(EndCentralDirectory::MIN_SIZE as i64) - comment_size, )) .ok()?; sig = Signature::deserialize(reader).ok()?; comment_size += 1; if comment_size > 65536 || comment_size as usize + EndCentralDirectory::MIN_SIZE > file_size as usize { return None; } } comment_size -= 1; Some(file_size - comment_size as u64 - EndCentralDirectory::MIN_SIZE as u64) } pub fn get_file_names(&self) -> Vec { self.files.iter().map(|f| f.get_name()).collect() } /// List files used to signe jar file (apk signature v1) found in the zip file: /// META-INF/MANIFEST.MF /// META-INF/*.SF /// META-INF/*.DSA /// META-INF/*.RSA /// META-INF/SIG-* pub fn get_jar_sig_files(&self) -> Vec<&FileInfo> { self.files .iter() .filter(|file| match_v1_signature_file(&file.get_name())) .collect() } /// Test if the zipfile contains files used to signe jar file (apk signature v1): /// META-INF/MANIFEST.MF /// META-INF/*.SF /// META-INF/*.DSA /// META-INF/*.RSA /// META-INF/SIG-* /// /// TODO: there is a field `X-Android-APK-Signed` in .SF that indicate the use of v2 and v3 /// (and v4?) signature. pub fn is_signed_v1(&self) -> bool { !self.get_jar_sig_files().is_empty() } /// Test if the zipfile as apk signature block. pub fn is_signed_v2(&self) -> bool { self.apk_sign_block.is_some() } /// Remove v1 signature files from the file index. /// /// This function does no modify the original file, only the index store /// in the struct. pub fn unlink_signature_files(&mut self) { self.files .retain(|file| !match_v1_signature_file(&file.get_name())); } /// Unlink bytecode files. /// /// This function does no modify the original file, only the index store /// in the struct. pub fn unlink_bytecode_files(&mut self) { self.files .retain(|file| !match_dexfile_name(&file.get_name())); } pub fn check_holes(&self) { let mut files: Vec<&FileInfo> = self.files.iter().collect(); files.sort_by_key(|f| f.get_offset_local_header()); let mut lst_offset = 0; for file in files.iter() { if file.get_offset_local_header() != lst_offset { info!( "Hole in zip before {} between 0x{:x} and 0x{:x}", file.get_name(), lst_offset, file.get_offset_local_header() ); } lst_offset += file.local_header.size() as u64; lst_offset += file.get_compressed_size(); } if let Some(apk_sign_block) = &self.apk_sign_block { let apk_sb_off = self.get_cd_offset() - apk_sign_block.size() as u64; if apk_sb_off != lst_offset { info!( "Hole in zip before apk signing block, between 0x{:x} and 0x{:x}", lst_offset, apk_sb_off ); } lst_offset = self.get_cd_offset(); } if self.get_cd_offset() != lst_offset { info!( "Hole in zip before central directory between 0x{:x} and 0x{:x}", lst_offset, self.get_cd_offset() ); } } pub fn get_bin(&mut self, offset: u64, size: usize) -> Result> { self.data .seek(SeekFrom::Start(offset)) .context("Failed to seek to data")?; let mut data = vec![0u8; size]; self.data .read_exact(&mut data) .context("failed to read data")?; /* for _ in 0..size { data.push(u8::deserialize(&mut self.data).unwrap()); } */ Ok(data) } pub fn read_file_as_vec(&mut self, name: &str) -> Result> { let file = self .get_file_info(name) .with_context(|| format!("Failed to get info for {name}"))?; let offset = file.get_file_offset(); let size_c = file.header.compressed_size as usize; let size = file.header.uncompressed_size as usize; let compression_method = file.header.compression_method; let mut data = vec![0u8; size_c]; self.data .seek(SeekFrom::Start(offset)) .with_context(|| format!("Failed to seek to start of file {name} (at 0x{offset:x})"))?; self.data .read_exact(&mut data) .with_context(|| format!("Failed to read data for file {name}"))?; match compression_method { CompressionMethod::Stored => {} CompressionMethod::Deflated => { let mut decomp_data = vec![0u8; size]; let mut deflater = DeflateDecoder::new(&data[..]); deflater .read_exact(&mut decomp_data) .with_context(|| format!("Failed to decompress data for file {name}"))?; data = decomp_data } _ => unimplemented!(), } Ok(data) } pub fn get_file_info(&self, name: &str) -> Option<&FileInfo> { self.files.iter().find(|&file| file.get_name() == name) } pub fn get_classes_file_info(&self) -> Vec<&FileInfo> { let files_map: HashMap = self .files .iter() .by_ref() .filter(|&file| match_dexfile_name(&file.get_name())) .map(|file| (file.get_name(), file)) .collect(); let mut files = vec![]; let mut i = 0; loop { let name = if i == 0 { "classes.dex".into() } else { format!("classes{}.dex", i + 1) }; if let Some(file) = files_map.get(&name) { files.push(*file); } else { break; } i += 1; } files } } // Not worth a regex /// Check if a file is a bytecode file (from its name) fn match_dexfile_name(name: &str) -> bool { if name.len() < 11 { return false; } if &name[0..7] != "classes" { return false; } if &name[name.len() - 4..name.len()] != ".dex" { return false; } name.len() == 11 || name[7..name.len() - 4].parse::().is_ok() } /// Check if a file is used to sign the APK with signature scheme v1. fn match_v1_signature_file(name: &str) -> bool { let l = name.len(); (name == "META-INF/MANIFEST.MF") || (l >= 13 && &name[..9] == "META-INF/" && &name[l - 3..] == ".SF") || (l >= 14 && &name[..9] == "META-INF/" && &name[l - 4..] == ".DSA") || (l >= 14 && &name[..9] == "META-INF/" && &name[l - 4..] == ".RSA") || (l >= 14 && &name[..13] == "META-INF/SIG-") }