diff --git a/androscalpel/src/apk.rs b/androscalpel/src/apk.rs index 38fbbba..ec82ae0 100644 --- a/androscalpel/src/apk.rs +++ b/androscalpel/src/apk.rs @@ -101,13 +101,16 @@ impl Apk { /// Extract a class from a dex file reader. /// `class_item_idx` if the index of the `class_def_item` of the class, **not** the /// `class_idx`. - fn get_class_from_dex_file( + fn get_class_from_dex_file( &self, class_item: &ClassDefItem, class_item_idx: usize, dex: &DexFileReader, - label_each_ins: bool, - ) -> Result { + label_ins: F, + ) -> Result + where + F: FnMut(&IdMethod, &instructions::Instruction, usize) -> Option + Clone, + { let descriptor = Self::get_id_type_from_idx(class_item.class_idx as usize, dex)?; let superclass = if class_item.superclass_idx == NO_INDEX.0 { None @@ -196,7 +199,7 @@ impl Apk { for mut method in Self::get_method_list_from_encoded_field_list( &data.direct_methods, dex, - label_each_ins, + label_ins.clone(), )? { if let Some(hiddenapi) = &hiddenapi { method.hiddenapi = Some((&hiddenapi[hiddenapi_i]).into()); @@ -207,7 +210,7 @@ impl Apk { for mut method in Self::get_method_list_from_encoded_field_list( &data.virtual_methods, dex, - label_each_ins, + label_ins.clone(), )? { if let Some(hiddenapi) = &hiddenapi { method.hiddenapi = Some((&hiddenapi[hiddenapi_i]).into()); @@ -729,13 +732,16 @@ impl Apk { /// Return a [`Method`] from it's idx (index in `method_ids`) in the dex file and its access flags /// ([`EncodedMethod.access_flags`]) and code offset ([`EncodedMethod.code_off`]). - pub fn get_method_from_idx( + pub fn get_method_from_idx( idx: usize, Uleb128(access_flags): Uleb128, Uleb128(code_off): Uleb128, dex: &DexFileReader, - label_each_ins: bool, - ) -> Result { + mut label_ins: F, + ) -> Result + where + F: FnMut(&IdMethod, &instructions::Instruction, usize) -> Option + Clone, + { let descriptor = Self::get_id_method_from_idx(idx, dex)?; let is_public = (access_flags & ACC_PUBLIC) != 0; @@ -807,8 +813,10 @@ impl Apk { let code = if code_off == 0 { None } else { - match Self::get_code_from_off(code_off, dex, label_each_ins) - .with_context(|| format!("Failed to parse code of method {}", descriptor.__str__())) + match Self::get_code_from_off(code_off, dex, |ins, addr| { + label_ins(&descriptor, ins, addr) + }) + .with_context(|| format!("Failed to parse code of method {}", descriptor.__str__())) { Err(e) => { error!( @@ -844,20 +852,19 @@ impl Apk { } /// Convert an instruction format to an instruction. - fn instruction_format_to_instruction( + fn instruction_format_to_instruction( format: &InsFormat, addr: usize, insns_ref: &HashMap, dex: &DexFileReader, - label_each_ins: bool, - ) -> Result)>> { + label_ins: &mut F, + ) -> Result)>> + where + F: FnMut(&instructions::Instruction, usize) -> Option, + { use crate::instructions::*; use InsFormat::*; let mut labels = HashMap::new(); - if label_each_ins { - let label = format!("label_{addr:08X}"); - labels.insert(addr, label.clone()); - } let ins = match format.clone() { Format10X { op: 0x00 } => Instruction::Nop {}, Format12X { op: 0x01, va, vb } => Instruction::Move { @@ -2657,15 +2664,19 @@ impl Apk { }; ins.sanity_check() .with_context(|| anyhow!("Invalid instruction {ins:?} found at {addr}: {format:?}"))?; + if let Some(label) = label_ins(&ins, addr) { + //let label = format!("label_{addr:08X}"); + labels.insert(addr, label); + } + Ok(Some((ins, labels))) } /// Return a [`Code`] from it's offset in the dex file. - pub fn get_code_from_off( - offset: u32, - dex: &DexFileReader, - label_each_ins: bool, - ) -> Result { + pub fn get_code_from_off(offset: u32, dex: &DexFileReader, mut label_ins: F) -> Result + where + F: FnMut(&instructions::Instruction, usize) -> Option, + { use crate::instructions::Instruction; let code_item = dex.get_struct_at_offset::(offset)?; @@ -2774,7 +2785,7 @@ impl Apk { addr, &instructions_raw, dex, - label_each_ins, + &mut label_ins, )? { instructions.push((addr, ins)); addr += ins_f.size() / 2; @@ -2876,11 +2887,14 @@ impl Apk { /// The index of the fields is computed by summing the [`EncodedMethod.field_idx_diff`] of the /// previous element of the list the diff of the current field, so the list must be preserved /// as in the dex file. - pub fn get_method_list_from_encoded_field_list( + pub fn get_method_list_from_encoded_field_list( encoded_methods: &[EncodedMethod], dex: &DexFileReader, - label_each_ins: bool, - ) -> Result> { + label_ins: F, + ) -> Result> + where + F: FnMut(&IdMethod, &instructions::Instruction, usize) -> Option + Clone, + { let mut idx = 0; let mut methods = vec![]; for method in encoded_methods { @@ -2891,7 +2905,7 @@ impl Apk { method.access_flags, method.code_off, dex, - label_each_ins, + label_ins.clone(), )?); } Ok(methods) @@ -3049,8 +3063,17 @@ impl Apk { /// Load all android files in an application. /// This **does not include any .dex file that android would not load. + /// + /// - `label_ins` is a function that take an method id and instruction and return + /// true is a label "label_{addr:08X}" should be added befor the instruction. #[cfg(feature = "external-zip-reader")] - pub fn load_apk(apk: impl Read + Seek, label_each_ins: bool, cache: bool) -> Result { + pub fn load_apk(apk: impl Read + Seek, label_ins: F, cache: bool) -> Result + where + F: FnMut(&IdMethod, &instructions::Instruction, usize) -> Option + + Clone + + Send + + Sync, + { let mut zip = ZipArchive::new(apk)?; let mut apk = Self::default(); let file_names: HashSet = zip.file_names().map(|s| s.into()).collect(); @@ -3073,15 +3096,25 @@ impl Apk { let mut data = vec![]; // TODO: It sould not be necessary to clone data, but right now DexFileReader takes a &[u8] std::io::copy(&mut zip.by_name(&name)?, &mut data)?; - apk.add_dex_file(&name, &data, label_each_ins, cache)?; + apk.add_dex_file(&name, &data, label_ins.clone(), cache)?; } Ok(apk) } /// Load all android files in an application. /// This **does not include any .dex file that android would not load. + /// + /// - `label_ins`: Function that take a method id, instruction and address and return + /// a label, if a label needs to be inserted before the instruction. + /// - `cache`: if set to true, copy and cache the binary data format. #[cfg(not(feature = "external-zip-reader"))] - pub fn load_apk(apk: impl Read + Seek, label_each_ins: bool, cache: bool) -> Result { + pub fn load_apk(apk: impl Read + Seek, label_ins: F, cache: bool) -> Result + where + F: FnMut(&IdMethod, &instructions::Instruction, usize) -> Option + + Clone + + Send + + Sync, + { let mut apk_z = ZipFileReader::new(apk); let mut apk = Self::default(); let dex_names = apk_z @@ -3091,10 +3124,56 @@ impl Apk { .collect::>(); for name in dex_names { let data = apk_z.read_file_as_vec(&name); - apk.add_dex_file(&name, &data, label_each_ins, cache)?; + apk.add_dex_file(&name, &data, label_ins.clone(), cache)?; } Ok(apk) } + + /// Add the content of a dex file to the apk. + /// + /// # Parameters + /// - `name`: the name of the dex file + /// - `data`: the dex file binary + /// - `label_ins`: Function that take a method id, instruction and address and return + /// a label, if a label needs to be inserted before the instruction. + /// - `cache`: if set to true, copy and cache the binary data format. + pub fn add_dex_file( + &mut self, + name: &str, + data: &[u8], + label_ins: F, + cache: bool, + ) -> Result<()> + where + F: FnMut(&IdMethod, &instructions::Instruction, usize) -> Option + + Clone + + Send + + Sync, + { + let name: String = name.into(); + if self.dex_files.contains_key(&name) { + bail!("{name} already exist in the application") + } + let mut dex = DexFileReader::new(data)?; + let classes = dex + .get_class_defs() + .par_iter() + .enumerate() + .map(|(idx, class)| self.get_class_from_dex_file(class, idx, &dex, label_ins.clone())) + .map(|class| class.map(|class| (class.descriptor.clone(), class))) + .collect::, _>>()?; + let dex_file = DexFile { + classes, + not_referenced_strings: dex + .get_not_resolved_strings()? + .into_iter() + .map(DexString) + .collect(), + bin_cache: if cache { Some(data.to_vec()) } else { None }, + }; + self.dex_files.insert(name, dex_file); + Ok(()) + } } #[cfg_attr(feature = "python", pymethods)] @@ -3113,7 +3192,17 @@ impl Apk { #[cfg_attr(feature = "python", pyo3(signature = (apk, label_each_ins=false, cache=false)))] pub fn load_apk_path(apk: PathBuf, label_each_ins: bool, cache: bool) -> Result { let file = File::open(apk)?; - Self::load_apk(file, label_each_ins, cache) + Self::load_apk( + file, + |_, _, addr| { + if label_each_ins { + Some(format!("label_{addr:08X}")) + } else { + None + } + }, + cache, + ) } /// Load all android files in an application. @@ -3121,7 +3210,17 @@ impl Apk { #[cfg_attr(feature = "python", staticmethod)] #[cfg_attr(feature = "python", pyo3(signature = (apk, label_each_ins=false, cache=false)))] pub fn load_apk_bin(apk: &[u8], label_each_ins: bool, cache: bool) -> Result { - Self::load_apk(Cursor::new(apk), label_each_ins, cache) + Self::load_apk( + Cursor::new(apk), + |_, _, addr| { + if label_each_ins { + Some(format!("label_{addr:08X}")) + } else { + None + } + }, + cache, + ) } // TODO: change cache to true when cache invalidation is setup @@ -3133,37 +3232,26 @@ impl Apk { /// - `label_each_ins`: if set to true, insert a label before each instruction /// indicating the instruction address /// - `cache`: if set to true, copy and cache the binary data format. - #[cfg_attr(feature = "python", pyo3(signature = (name, data, label_each_ins=false, cache=false)))] - pub fn add_dex_file( + #[cfg_attr(feature = "python", pyo3(name = "add_dex_file", signature = (name, data, label_each_ins=false, cache=false)))] + pub fn add_dex_file_py( &mut self, name: &str, data: &[u8], label_each_ins: bool, cache: bool, ) -> Result<()> { - let name: String = name.into(); - if self.dex_files.contains_key(&name) { - bail!("{name} already exist in the application") - } - let mut dex = DexFileReader::new(data)?; - let classes = dex - .get_class_defs() - .par_iter() - .enumerate() - .map(|(idx, class)| self.get_class_from_dex_file(class, idx, &dex, label_each_ins)) - .map(|class| class.map(|class| (class.descriptor.clone(), class))) - .collect::, _>>()?; - let dex_file = DexFile { - classes, - not_referenced_strings: dex - .get_not_resolved_strings()? - .into_iter() - .map(DexString) - .collect(), - bin_cache: if cache { Some(data.to_vec()) } else { None }, - }; - self.dex_files.insert(name, dex_file); - Ok(()) + self.add_dex_file( + name, + data, + |_, _, addr| { + if label_each_ins { + Some(format!("label_{addr:08X}")) + } else { + None + } + }, + cache, + ) } /// List the classes implemented in the application. diff --git a/androscalpel/src/dex_id.rs b/androscalpel/src/dex_id.rs index f9e95ae..61406f7 100644 --- a/androscalpel/src/dex_id.rs +++ b/androscalpel/src/dex_id.rs @@ -86,12 +86,12 @@ impl IdMethodType { /// Try to parse a smali representation of a prototype into a IdMethodType. /// /// ``` - /// use androscalpel::IdMethodType; + /// use androscalpel::{IdMethodType, IdType}; /// /// let proto = IdMethodType::from_smali("(Landroidx/core/util/Predicate;Landroidx/core/util/Predicate;Ljava/lang/Object;)Z").unwrap(); /// assert_eq!( /// proto, - /// IdMethodType( + /// IdMethodType::new( /// IdType::boolean(), /// vec![ /// IdType::class("androidx/core/util/Predicate"), @@ -282,7 +282,7 @@ impl IdType { /// id types. /// /// ``` - /// use androscalpel::IdType + /// use androscalpel::IdType; /// /// let id_type = IdType::from_smali( /// "Landroidx/core/util/Predicate;" @@ -706,7 +706,7 @@ impl IdField { /// Try to parse a smali representation of a field into a IdField. /// /// ``` - /// use androscalpel::IdField; + /// use androscalpel::{IdField, IdType}; /// /// let proto = IdField::from_smali("Ljava/lang/annotation/ElementType;->FIELD:Ljava/lang/annotation/ElementType;").unwrap(); /// assert_eq!( @@ -875,7 +875,7 @@ impl IdMethod { /// Try to parse a smali representation of method into a IdMethod. /// /// ``` - /// use androscalpel::IdMethod; + /// use androscalpel::{IdType, IdMethod, IdMethodType}; /// /// let id_method = IdMethod::from_smali( /// "Landroidx/core/util/Predicate;->lambda$and$0(Landroidx/core/util/Predicate;Landroidx/core/util/Predicate;Ljava/lang/Object;)Z" diff --git a/androscalpel/src/tests/mod.rs b/androscalpel/src/tests/mod.rs index 66df9e6..4ea7bb3 100644 --- a/androscalpel/src/tests/mod.rs +++ b/androscalpel/src/tests/mod.rs @@ -42,7 +42,7 @@ fn get_hello_world_apk() -> &'static Apk { HELLO_WORLD_APK.get_or_init(|| { let mut apk = Apk::new(); let start = Instant::now(); - apk.add_dex_file("classes.dex", get_hello_world_dex(), false, false) + apk.add_dex_file("classes.dex", get_hello_world_dex(), |_, _, _| None, false) .unwrap(); let duration = start.elapsed(); write_to_report(&format!("Parsing classes_hello_world.dex: {duration:?}")); @@ -142,7 +142,7 @@ fn test_generated_apk_equivalence() { let new_dex = get_hello_world_recompilled(); let mut new_apk = Apk::new(); new_apk - .add_dex_file("classes.dex", &new_dex, false, false) + .add_dex_file("classes.dex", &new_dex, |_, _, _| None, false) .unwrap(); /* @@ -575,7 +575,7 @@ fn test_2_from_json() { let dex = apk.gen_raw_dex().unwrap().remove("classes.dex").unwrap(); let mut new_apk = Apk::new(); new_apk - .add_dex_file("classes.dex", &dex, false, false) + .add_dex_file("classes.dex", &dex, |_, _, _| None, false) .unwrap(); assert_eq!(apk, new_apk); } @@ -638,7 +638,7 @@ fn test_hidden_api() { let apktool_result = std::io::BufReader::new(apktool_result); let apktool_result: sj::Value = sj::from_reader(apktool_result).unwrap(); let mut apk = Apk::new(); - apk.add_dex_file("classes.dex", &dex_raw, false, false) + apk.add_dex_file("classes.dex", &dex_raw, |_, _, _| None, false) .unwrap(); for cls in apktool_result.as_object().unwrap().keys() { assert!(