From 91859170c3a2cca98be26e4adbc58a9a316323bf Mon Sep 17 00:00:00 2001 From: Jean-Marie Mineau Date: Tue, 14 Jan 2025 20:29:17 +0100 Subject: [PATCH] start separating classes in files --- androscalpel/src/apk.rs | 184 +++++++++++++++++++++++---- androscalpel_serializer/src/debug.rs | 2 +- 2 files changed, 157 insertions(+), 29 deletions(-) diff --git a/androscalpel/src/apk.rs b/androscalpel/src/apk.rs index 41a77ea..e8d2aa7 100644 --- a/androscalpel/src/apk.rs +++ b/androscalpel/src/apk.rs @@ -16,19 +16,44 @@ use androscalpel_serializer::Instruction as InsFormat; use androscalpel_serializer::*; use rayon::prelude::*; +#[derive(Debug, Clone, PartialEq, Default, Deserialize, Serialize)] +pub struct DexFile { + /// The classes in the dex file. + pub classes: HashMap, + /// Set of strings found in the dex file that are not referenced in classes. + pub not_referenced_strings: HashSet, + /// The binary of the dexfile. + #[serde(skip_serializing)] + pub(crate) bin_cache: Option>, // TODO: invalidate the cache !!! +} + /// Represent an apk. #[pyclass] -#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Default)] +#[derive(Debug, Clone, PartialEq, Default, Deserialize, Serialize)] pub struct Apk { - #[pyo3(get)] - pub classes: HashMap, - #[pyo3(get)] - pub not_referenced_strings: HashSet, + pub dex_files: HashMap, // TODO: use accessort for chache invalidation } impl Apk { /// Add the content of a dex file to the apk. - pub fn add_dex_file(&mut self, data: &[u8], label_each_ins: bool) -> Result<()> { + /// + /// # Parameters + /// - `name`: the name of the dex file + /// - `data`: the dex file binary + /// - `label_each_ins`: if set to true, insert a label before each instruction + /// indicating the instruction address + /// - `cache`: if set to true, copy and cache the binary data format. + pub fn add_dex_file( + &mut self, + name: &str, + data: &[u8], + label_each_ins: bool, + cache: bool, + ) -> Result<()> { + let name: String = name.into(); + if self.dex_files.contains_key(&name) { + bail!("{name} already exist in the application") + } let mut dex = DexFileReader::new(data)?; let classes = dex .get_class_defs() @@ -36,10 +61,17 @@ impl Apk { .enumerate() .map(|(idx, class)| self.get_class_from_dex_file(class, idx, &dex, label_each_ins)) .map(|class| class.map(|class| (class.descriptor.clone(), class))) - .collect::, _>>()?; - self.classes.par_extend(classes); - self.not_referenced_strings - .extend(dex.get_not_resolved_strings()?.into_iter().map(DexString)); + .collect::, _>>()?; + let dex_file = DexFile { + classes, + not_referenced_strings: dex + .get_not_resolved_strings()? + .into_iter() + .map(DexString) + .collect(), + bin_cache: if cache { Some(data.to_vec()) } else { None }, + }; + self.dex_files.insert(name, dex_file); Ok(()) } @@ -2834,7 +2866,8 @@ impl Apk { Ok(methods) } - pub fn gen_raw_dex(&self) -> Result>> { + pub fn gen_raw_dex(&self) -> Result>> { + /* let mut dex_writers = vec![]; let mut dex_writer = DexWriter::new(); for class_ in self.classes.values() { @@ -2865,6 +2898,46 @@ impl Apk { .into_iter() .map(|mut dex_writer| dex_writer.gen_dex_file_to_vec()) .collect() + */ + let mut bin_dex_files = HashMap::new(); + // TODO: Multithread + for ( + name, + DexFile { + classes, + not_referenced_strings, + bin_cache, + }, + ) in self.dex_files.iter() + { + if let Some(bin_cache) = bin_cache { + bin_dex_files.insert(name.clone(), bin_cache.clone()); + } else { + let mut writer = DexWriter::new(); + for class_ in classes.values() { + match writer.add_class(class_) { + Ok(()) => (), + Err(DexWritterError::OutOfSpace(_)) => bail!( + "{name} dex file has to many class/method/field to be serialize in dex format" + ), + } + } + for string in not_referenced_strings { + writer.add_string(string.clone()); + } + bin_dex_files.insert(name.clone(), writer.gen_dex_file_to_vec()?); + }; + } + Ok(bin_dex_files) + } + + /// Search for the given class. If several classes with the same name are found, + /// return the class used by android (classes.dex over classes2.dex over classes3.dex ...), + /// and if not found in files used by android, look in any other files (classes0.dex, + /// classses1.dex, classes02.dex, assets/stuff.dex, ...), and select the first one in + /// alphabetical order of dex file name. + pub fn get_class_mut(&mut self, class_id: IdType) -> Option<&mut Class> { + todo!() } } @@ -2873,30 +2946,75 @@ impl Apk { #[new] pub fn new() -> Self { Self { - classes: HashMap::new(), - not_referenced_strings: HashSet::new(), + dex_files: HashMap::new(), } } #[pyo3(name = "add_dex_file")] - pub fn py_add_dex_file(&mut self, data: &[u8], label_each_ins: Option) -> Result<()> { - self.add_dex_file(data, label_each_ins.unwrap_or(false)) + pub fn py_add_dex_file( + &mut self, + name: &str, + data: &[u8], + label_each_ins: Option, + cache: Option, + ) -> Result<()> { + self.add_dex_file( + name, + data, + label_each_ins.unwrap_or(false), + cache.unwrap_or(false), // TODO: change to true when cache invalidation is setup + ) } - pub fn add_class(&mut self, class: Class) -> Result<()> { + pub fn add_class(&mut self, dex_file: &str, class: Class) -> Result<()> { + let file: String = dex_file.into(); let id = class.descriptor.clone(); - if self.classes.get(&id).is_some() { - bail!("class {} already exists in the apk", id.__str__()); + if !self.dex_files.contains_key(&file) { + self.dex_files.insert(file.clone(), DexFile::default()); } - self.classes.insert(id, class); + if self + .dex_files + .get(&file) + .unwrap() + .classes + .get(&id) + .is_some() + { + bail!("class {} already exists in {}", id.__str__(), &file); + } + self.dex_files + .get_mut(&file) + .unwrap() + .classes + .insert(id, class); Ok(()) } - pub fn set_method_code(&mut self, method_id: IdMethod, code: Option) -> Result<()> { - let class = self - .classes - .get_mut(&method_id.class_) - .ok_or_else(|| anyhow!("Class {} not found", method_id.class_.__repr__()))?; + pub fn set_method_code( + &mut self, + method_id: IdMethod, + code: Option, + dex_file: Option<&str>, + ) -> Result<()> { + let ty = &method_id.class_; + + let class = if let Some(dex_file) = dex_file { + self.dex_files + .get_mut(&dex_file.to_string()) + .with_context(|| format!("file {} not found in apk", dex_file))? + .classes + .get_mut(&method_id.class_) + .ok_or_else(|| { + anyhow!( + "Class {} not found in file {}", + method_id.class_.__repr__(), + dex_file + ) + })? + } else { + self.get_class_mut(ty.clone()) + .with_context(|| format!("{} not found in apk", method_id.class_.__repr__()))? + }; let method = class .direct_methods .get_mut(&method_id) @@ -2911,11 +3029,11 @@ impl Apk { } #[pyo3(name = "gen_raw_dex")] //Sad GIL noise - pub fn py_gen_raw_dex(&self, py: Python<'_>) -> Result> { + pub fn py_gen_raw_dex(&self, py: Python<'_>) -> Result> { Ok(self .gen_raw_dex()? .into_iter() - .map(|bytes| PyBytes::new_bound(py, &bytes).into()) + .map(|(file_name, bytes)| (file_name, PyBytes::new_bound(py, &bytes).into())) .collect()) } @@ -2928,7 +3046,17 @@ impl Apk { Ok(serde_json::from_str(json)?) } - pub fn remove_class(&mut self, class: &IdType) { - self.classes.remove(class); + pub fn remove_class(&mut self, class: &IdType, dex_file: Option<&str>) -> Result<()> { + // TODO: remove all if dex_file is not provided + if let Some(dex_file) = dex_file { + self.dex_files + .get_mut(&dex_file.to_string()) + .with_context(|| format!("file {} not found in apk", dex_file))? + .classes + .remove(class); + } else { + todo!() + } + Ok(()) } } diff --git a/androscalpel_serializer/src/debug.rs b/androscalpel_serializer/src/debug.rs index bf2f4f4..e9e94e4 100644 --- a/androscalpel_serializer/src/debug.rs +++ b/androscalpel_serializer/src/debug.rs @@ -678,7 +678,7 @@ mod test { fn test_get_expl_debug() { const RAW_DEBUG: [u8; 10] = [23, 0, 14, 135, 3, 0, 16, 2, 150, 0]; let debug = DebugInfoItem::deserialize_from_slice(&RAW_DEBUG).unwrap(); - let mut reader = DebugInfoReader::new(&debug); + let mut reader = DebugInfoReader::new(debug.clone()); let mut list_info = vec![]; loop { list_info.push(reader.next_info());