diff --git a/androscalpel/src/apk.rs b/androscalpel/src/apk.rs index d8466f8..d6302cf 100644 --- a/androscalpel/src/apk.rs +++ b/androscalpel/src/apk.rs @@ -2,6 +2,7 @@ use anyhow::{anyhow, bail, Context}; use serde::{Deserialize, Serialize}; +use std::cmp::max; use std::collections::{HashMap, HashSet}; use std::fs::File; use std::io::{Cursor, Read, Seek}; @@ -38,25 +39,25 @@ impl DexFile { pub fn get_all_types(&self) -> HashSet { let mut v = TypeCollector::default(); v.visit_dex_file(self).unwrap(); - v.types + v.result() } pub fn get_all_method_ids(&self) -> HashSet { let mut v = MethodIdCollector::default(); v.visit_dex_file(self).unwrap(); - v.method_ids + v.result() } pub fn get_all_protos(&self) -> HashSet { let mut v = MethodTypeCollector::default(); v.visit_dex_file(self).unwrap(); - v.method_types + v.result() } pub fn get_all_field_ids(&self) -> HashSet { let mut v = FieldIdCollector::default(); v.visit_dex_file(self).unwrap(); - v.field_ids + v.result() } /// Check if the `DexFile` is exiding the format limits. @@ -94,6 +95,34 @@ impl DexFile { .len() <= u16::MAX as usize) } + + /// Split the dex file into 2 + pub fn _split(self) -> (Self, Self) { + let mut classes0 = HashMap::new(); + let mut classes1 = HashMap::new(); + let mut i = 0; + for (name, class) in self.classes.into_iter() { + if i == 0 { + classes0.insert(name, class); + i = 1; + } else { + classes1.insert(name, class); + i = 0; + } + } + ( + Self { + classes: classes0, + not_referenced_strings: self.not_referenced_strings, + bin_cache: None, + }, + Self { + classes: classes1, + not_referenced_strings: HashSet::new(), + bin_cache: None, + }, + ) + } } impl Visitable for DexFile { @@ -3439,6 +3468,18 @@ impl Apk { /// pub fn redistribute_classes(&mut self) { // TODO: handle cases where several classes have the same name. + let mut dex_i = 0; + loop { + let name = if dex_i == 0 { + "classes.dex".into() + } else { + format!("classes{}.dex", dex_i + 1) + }; + if self.dex_files.contains_key(&name) { + break; + }; + dex_i += 1; + } while let Some(fname) = self.dex_files.iter().find_map(|(name, dex)| { if dex.is_overflowing() { Some(name.clone()) @@ -3447,7 +3488,42 @@ impl Apk { } }) { let dex = self.dex_files.get_mut(&fname).unwrap(); - let class_name = dex.classes.keys().find(|_| true).unwrap().clone(); + + /* + * TODO: check impact on inheritance topology + // If the dex is overflowing by a big margin + if (dex.get_all_types().len() > u16::MAX as usize + ((u16::MAX / 10) as usize)) + || (dex.get_all_protos().len() > u16::MAX as usize + ((u16::MAX / 10) as usize)) + || (dex.get_all_field_ids().len() > u16::MAX as usize + ((u16::MAX / 10) as usize)) + || (dex.get_all_method_ids().len() > u16::MAX as usize + ((u16::MAX / 10) as usize)) + { + let dex = self.dex_files.remove(&fname).unwrap(); + let (dex1, dex2) = dex.split(); + self.dex_files.insert(fname, dex1); + let fname2 = if dex_i == 0 { + "classes.dex".into() + } else { + format!("classes{}.dex", dex_i + 1) + }; + dex_i += 1; + self.dex_files.insert(fname2, dex2); + } else { + */ + let class_name = dex + .classes + .iter() + .max_by_key(|(_, class)| { + max( + max(class.get_all_types().len(), class.get_all_protos().len()), + max( + class.get_all_field_ids().len(), + class.get_all_method_ids().len(), + ), + ) + }) + .map(|(name, _)| name) + .unwrap() + .clone(); let class = dex.classes.remove(&class_name).unwrap(); if let Some(dex) = self .dex_files @@ -3457,22 +3533,17 @@ impl Apk { dex.classes.insert(class_name, class); } else { // If not file has space for the class, create a new file - let mut i = 0; - let new_fname = loop { - let name = if i == 0 { - "classes.dex".into() - } else { - format!("classes{}.dex", i + 1) - }; - if self.dex_files.contains_key(&name) { - break name; - }; - i += 1; - }; let mut new_dex = DexFile::default(); new_dex.classes.insert(class_name, class); + let new_fname = if dex_i == 0 { + "classes.dex".into() + } else { + format!("classes{}.dex", dex_i + 1) + }; + dex_i += 1; self.dex_files.insert(new_fname, new_dex); } + //} } } } diff --git a/androscalpel/src/visitor.rs b/androscalpel/src/visitor.rs index 64a8e75..2fb01c5 100644 --- a/androscalpel/src/visitor.rs +++ b/androscalpel/src/visitor.rs @@ -1,11 +1,12 @@ //! The visitor trait and common implementations. use crate::{ - Apk, CallSite, Class, Code, DexAnnotation, DexAnnotationItem, DexFile, DexString, DexValue, - Field, FieldVisibility, HiddenApiData, HiddenApiDomain, HiddenApiPermission, IdEnum, IdField, - IdMethod, IdMethodType, IdType, Method, MethodHandle, MethodVisibility, Result, - ins::Instruction, scalar::*, + ins::Instruction, scalar::*, Apk, CallSite, Class, Code, DexAnnotation, DexAnnotationItem, + DexFile, DexString, DexValue, Field, FieldVisibility, HiddenApiData, HiddenApiDomain, + HiddenApiPermission, IdEnum, IdField, IdMethod, IdMethodType, IdType, Method, MethodHandle, + MethodVisibility, Result, }; +use rayon::prelude::*; use std::collections::HashSet; pub trait Visitor: Sized { @@ -275,6 +276,23 @@ impl Visitor for TypeCollector { self.types.insert(ty.clone()); Ok(()) } + + fn visit_dex_file(&mut self, dex: &DexFile) -> Result<()> { + let results: Vec<_> = dex + .classes + .par_iter() + .map(|(ty, cls)| { + let mut v = Self::default(); + v.visit_type(ty) + .map(|_| v.visit_class(cls)) + .map(|_| v.result()) + }) + .collect::>()?; + for r in results.into_iter() { + self.types.extend(r); + } + Ok(()) + } } #[derive(Debug, Default)] @@ -293,6 +311,23 @@ impl Visitor for MethodTypeCollector { self.method_types.insert(mty.clone()); Ok(()) } + + fn visit_dex_file(&mut self, dex: &DexFile) -> Result<()> { + let results: Vec<_> = dex + .classes + .par_iter() + .map(|(ty, cls)| { + let mut v = Self::default(); + v.visit_type(ty) + .map(|_| v.visit_class(cls)) + .map(|_| v.result()) + }) + .collect::>()?; + for r in results.into_iter() { + self.method_types.extend(r); + } + Ok(()) + } } #[derive(Debug, Default)] @@ -311,6 +346,23 @@ impl Visitor for FieldIdCollector { self.field_ids.insert(id.clone()); Ok(()) } + + fn visit_dex_file(&mut self, dex: &DexFile) -> Result<()> { + let results: Vec<_> = dex + .classes + .par_iter() + .map(|(ty, cls)| { + let mut v = Self::default(); + v.visit_type(ty) + .map(|_| v.visit_class(cls)) + .map(|_| v.result()) + }) + .collect::>()?; + for r in results.into_iter() { + self.field_ids.extend(r); + } + Ok(()) + } } #[derive(Debug, Default)] @@ -329,6 +381,23 @@ impl Visitor for MethodIdCollector { self.method_ids.insert(id.clone()); Ok(()) } + + fn visit_dex_file(&mut self, dex: &DexFile) -> Result<()> { + let results: Vec<_> = dex + .classes + .par_iter() + .map(|(ty, cls)| { + let mut v = Self::default(); + v.visit_type(ty) + .map(|_| v.visit_class(cls)) + .map(|_| v.result()) + }) + .collect::>()?; + for r in results.into_iter() { + self.method_ids.extend(r); + } + Ok(()) + } } #[derive(Debug, Default)]