From bc3392d94636ec3302367ef7cbb5886a10652bc1 Mon Sep 17 00:00:00 2001 From: Jean-Marie Mineau Date: Fri, 10 Jan 2025 17:45:00 +0100 Subject: [PATCH] WIP, TODO: regenerate debug info from new debug instruction --- androscalpel/src/apk.rs | 110 ++++++++++++++++++++------- androscalpel/src/code.rs | 6 -- androscalpel/src/instructions.rs | 40 ++++++---- androscalpel_serializer/src/debug.rs | 24 +++++- 4 files changed, 130 insertions(+), 50 deletions(-) diff --git a/androscalpel/src/apk.rs b/androscalpel/src/apk.rs index b822d0a..6430f8c 100644 --- a/androscalpel/src/apk.rs +++ b/androscalpel/src/apk.rs @@ -26,17 +26,15 @@ pub struct Apk { pub not_referenced_strings: HashSet, } -const LABEL_EACH_INST: bool = true; - impl Apk { /// Add the content of a dex file to the apk. - pub fn add_dex_file(&mut self, data: &[u8]) -> Result<()> { + pub fn add_dex_file(&mut self, data: &[u8], label_each_ins: bool) -> Result<()> { let mut dex = DexFileReader::new(data)?; let classes = dex .get_class_defs() .par_iter() .enumerate() - .map(|(idx, class)| self.get_class_from_dex_file(class, idx, &dex)) + .map(|(idx, class)| self.get_class_from_dex_file(class, idx, &dex, label_each_ins)) .map(|class| class.map(|class| (class.descriptor.clone(), class))) .collect::, _>>()?; self.classes.par_extend(classes); @@ -53,6 +51,7 @@ impl Apk { class_item: &ClassDefItem, class_item_idx: usize, dex: &DexFileReader, + label_each_ins: bool, ) -> Result { let descriptor = Self::get_id_type_from_idx(class_item.class_idx as usize, dex)?; let superclass = if class_item.superclass_idx == NO_INDEX.0 { @@ -139,18 +138,22 @@ impl Apk { hiddenapi_i += 1; } } - for mut method in - Self::get_method_list_from_encoded_field_list(&data.direct_methods, dex)? - { + for mut method in Self::get_method_list_from_encoded_field_list( + &data.direct_methods, + dex, + label_each_ins, + )? { if let Some(hiddenapi) = &hiddenapi { method.hiddenapi = Some((&hiddenapi[hiddenapi_i]).into()); hiddenapi_i += 1; } direct_methods.insert(method.descriptor.clone(), method); } - for mut method in - Self::get_method_list_from_encoded_field_list(&data.virtual_methods, dex)? - { + for mut method in Self::get_method_list_from_encoded_field_list( + &data.virtual_methods, + dex, + label_each_ins, + )? { if let Some(hiddenapi) = &hiddenapi { method.hiddenapi = Some((&hiddenapi[hiddenapi_i]).into()); hiddenapi_i += 1; @@ -676,6 +679,7 @@ impl Apk { Uleb128(access_flags): Uleb128, Uleb128(code_off): Uleb128, dex: &DexFileReader, + label_each_ins: bool, ) -> Result { let descriptor = Self::get_id_method_from_idx(idx, dex)?; @@ -748,9 +752,11 @@ impl Apk { let code = if code_off == 0 { None } else { - Some(Self::get_code_from_off(code_off, dex).with_context(|| { - format!("Failed to parse code of method {}", descriptor.__str__()) - })?) + Some( + Self::get_code_from_off(code_off, dex, label_each_ins).with_context(|| { + format!("Failed to parse code of method {}", descriptor.__str__()) + })?, + ) }; Ok(Method { @@ -780,11 +786,12 @@ impl Apk { addr: usize, insns_ref: &HashMap, dex: &DexFileReader, + label_each_ins: bool, ) -> Result)>> { use crate::instructions::*; use InsFormat::*; let mut labels = HashMap::new(); - if LABEL_EACH_INST { + if label_each_ins { let label = format!("label_{addr:08X}"); labels.insert(addr, label.clone()); } @@ -2591,7 +2598,11 @@ impl Apk { } /// Return a [`Code`] from it's offset in the dex file. - pub fn get_code_from_off(offset: u32, dex: &DexFileReader) -> Result { + pub fn get_code_from_off( + offset: u32, + dex: &DexFileReader, + label_each_ins: bool, + ) -> Result { use crate::instructions::Instruction; let code_item = dex.get_struct_at_offset::(offset)?; @@ -2613,14 +2624,12 @@ impl Apk { } else { None }; - let debug_info = if let Some(debug_info) = debug_info { - let mut cursor = std::io::Cursor::new(vec![]); - debug_info - .bytecode - .serialize(&mut cursor, DbgBytecode::EndSequence)?; - (debug_info.line_start.0, cursor.into_inner()) + let (mut current_debug_info, mut debug_infos) = if let Some(debug_info) = debug_info { + let mut debug_infos = DebugStateMachine::new(&debug_info); + let current_debug_info = debug_infos.next_info(); + (current_debug_info, Some(debug_infos)) } else { - (0, vec![]) + (DebugInfo::EndOfData, None) }; let mut labels: HashMap = HashMap::new(); let mut tries = HashMap::new(); @@ -2697,9 +2706,13 @@ impl Apk { let mut instructions = vec![]; addr = 0; for ins_f in &code_item.insns { - if let Some((ins, ins_labels)) = - Self::instruction_format_to_instruction(ins_f, addr, &instructions_raw, dex)? - { + if let Some((ins, ins_labels)) = Self::instruction_format_to_instruction( + ins_f, + addr, + &instructions_raw, + dex, + label_each_ins, + )? { instructions.push((addr, ins)); addr += ins_f.size() / 2; for (key, val) in &ins_labels { @@ -2715,6 +2728,46 @@ impl Apk { } let mut insns = vec![]; for (addr, ins) in instructions { + while current_debug_info != DebugInfo::EndOfData + && addr <= current_debug_info.get_addr() as usize + { + insns.push(match current_debug_info { + DebugInfo::DefLocal { reg, val, .. } => Instruction::DebugLocal { + reg, + name: val + .name_idx + .map(|idx| dex.get_string(idx)) + .transpose()? + .map(|str| DexString(str).into()), + type_: val + .type_idx + .map(|idx| Self::get_id_type_from_idx(idx as usize, dex)) + .transpose()?, + signature: val + .sig_idx + .map(|idx| dex.get_string(idx)) + .transpose()? + .map(|str| DexString(str).into()), + }, + DebugInfo::EndLocal { reg, .. } => Instruction::DebugEndLocal { reg }, + DebugInfo::PrologueEnd { .. } => Instruction::DebugEndPrologue {}, + DebugInfo::EpilogueBegin { .. } => Instruction::DebugBeginEpilogue {}, + DebugInfo::SetSourceFile { + source_file_idx, .. + } => Instruction::DebugSourceFile { + file: source_file_idx + .map(|idx| dex.get_string(idx)) + .transpose()? + .map(|str| DexString(str).into()), + }, + DebugInfo::SetLineNumber { line_num, .. } => Instruction::DebugLine { + number: line_num as usize, + }, + DebugInfo::EndOfData => { + panic!("Found EndOfData debug info, that should no happend here.") + } + }); + } if let Some(try_) = tries.remove(&addr) { insns.push(try_); } @@ -2741,7 +2794,6 @@ impl Apk { registers_size: code_item.registers_size, ins_size: code_item.ins_size, outs_size: code_item.outs_size, - debug_info, parameter_names, insns, }) @@ -2757,6 +2809,7 @@ impl Apk { pub fn get_method_list_from_encoded_field_list( encoded_methods: &[EncodedMethod], dex: &DexFileReader, + label_each_ins: bool, ) -> Result> { let mut idx = 0; let mut methods = vec![]; @@ -2768,6 +2821,7 @@ impl Apk { method.access_flags, method.code_off, dex, + label_each_ins, )?); } Ok(methods) @@ -2818,8 +2872,8 @@ impl Apk { } #[pyo3(name = "add_dex_file")] - pub fn py_add_dex_file(&mut self, data: &[u8]) -> Result<()> { - self.add_dex_file(data) + pub fn py_add_dex_file(&mut self, data: &[u8], label_each_ins: Option) -> Result<()> { + self.add_dex_file(data, label_each_ins.unwrap_or(false)) } pub fn add_class(&mut self, class: Class) -> Result<()> { diff --git a/androscalpel/src/code.rs b/androscalpel/src/code.rs index 1a706ff..cfb9ca8 100644 --- a/androscalpel/src/code.rs +++ b/androscalpel/src/code.rs @@ -31,10 +31,6 @@ pub struct Code { /// The number of words of outgoing argument space #[pyo3(get)] pub outs_size: u16, - // TODO: implement - /// The debug info - #[pyo3(get)] - pub debug_info: (u32, Vec), // Should be stripped, copying like this just don't work /// The names of the parameters if given #[pyo3(get)] pub parameter_names: Option>>, @@ -50,7 +46,6 @@ impl PartialEq for Code { (comparable_self.registers_size == comparable_other.registers_size) && (comparable_self.ins_size == comparable_other.ins_size) && (comparable_self.outs_size == comparable_other.outs_size) - && (comparable_self.debug_info == comparable_other.debug_info) && (comparable_self.insns == comparable_other.insns) } } @@ -82,7 +77,6 @@ impl Code { outs_size, insns, parameter_names, - debug_info: (0, vec![]), } } diff --git a/androscalpel/src/instructions.rs b/androscalpel/src/instructions.rs index 5ec2b4f..f3e374b 100644 --- a/androscalpel/src/instructions.rs +++ b/androscalpel/src/instructions.rs @@ -649,7 +649,7 @@ pub enum Instruction { /// Debug information. Indicate the beginning of the Epilogue DebugBeginEpilogue {}, /// Debug information. Indicate the source file of the following instructions. - DebugSourceFile { file: String }, + DebugSourceFile { file: Option }, /// Debug information. Indicate the line number of the following instructions. DebugLine { number: usize }, } @@ -1217,19 +1217,20 @@ impl Visitable for Instruction { Self::DebugLocal { reg: _, name: _, - type_, + type_: Some(type_), signature: _, - } => { - if let Some(type_) = type_ { - v.visit_type(type_) - } else { - Ok(()) - } - } + } => v.visit_type(type_), + Self::DebugLocal { + reg: _, + name: _, + type_: None, + signature: _, + } => Ok(()), Self::DebugEndLocal { reg: _ } => Ok(()), Self::DebugEndPrologue {} => Ok(()), Self::DebugBeginEpilogue {} => Ok(()), - Self::DebugSourceFile { file } => v.visit_string(&(file.as_str().into())), + Self::DebugSourceFile { file: Some(file) } => v.visit_string(&(file.as_str().into())), + Self::DebugSourceFile { file: None } => Ok(()), Self::DebugLine { number: _ } => Ok(()), } } @@ -1889,9 +1890,13 @@ impl VisitableMut for Instruction { Self::DebugEndLocal { reg: _ } => Ok(self), Self::DebugEndPrologue {} => Ok(self), Self::DebugBeginEpilogue {} => Ok(self), - Self::DebugSourceFile { file } => v - .visit_string(file.as_str().into()) - .map(|file| Self::DebugSourceFile { file: file.into() }), + Self::DebugSourceFile { file: Some(file) } => { + v.visit_string(file.as_str().into()) + .map(|file| Self::DebugSourceFile { + file: Some(file.into()), + }) + } + Self::DebugSourceFile { file: None } => Ok(self), Self::DebugLine { number: _ } => Ok(self), } } @@ -2828,7 +2833,9 @@ impl Instruction { Self::DebugEndPrologue {} => ".prologue".into(), Self::DebugBeginEpilogue {} => ".epilogue".into(), // TODO: check if/how apktool/smali handles empty change of src file - Self::DebugSourceFile { file } => format!(".source_file {file}"), + Self::DebugSourceFile { file: Some(file) } => format!(".source_file {file}"), + // TODO: find a better representation + Self::DebugSourceFile { file: None } => ".source_file unknown".into(), Self::DebugLine { number } => format!(".line {number}"), } } @@ -3636,7 +3643,10 @@ impl Instruction { Self::DebugEndPrologue {} => "Instruction::DebugEndPrologue".into(), Self::DebugBeginEpilogue {} => "Instruction::DebugBeginEpilogue".into(), // TODO: check if/how apktool/smali handles empty change of src file - Self::DebugSourceFile { file } => format!("Instruction::DebugSourceFile({file})"), + Self::DebugSourceFile { file: Some(file) } => { + format!("Instruction::DebugSourceFile({file})") + } + Self::DebugSourceFile { file: None } => "Instruction::DebugSourceFile(None)".into(), Self::DebugLine { number } => format!("Instruction::DebugLine({number})"), } } diff --git a/androscalpel_serializer/src/debug.rs b/androscalpel_serializer/src/debug.rs index 8cbac6f..4c868f9 100644 --- a/androscalpel_serializer/src/debug.rs +++ b/androscalpel_serializer/src/debug.rs @@ -300,6 +300,20 @@ pub enum DebugInfo { EndOfData, } +impl DebugInfo { + pub fn get_addr(&self) -> u32 { + match self { + Self::DefLocal { addr, .. } => *addr, + Self::EndLocal { addr, .. } => *addr, + Self::PrologueEnd { addr } => *addr, + Self::EpilogueBegin { addr } => *addr, + Self::SetSourceFile { addr, .. } => *addr, + Self::SetLineNumber { addr, .. } => *addr, + Self::EndOfData => u32::MAX, // TODO should be an Option::None? + } + } +} + /// A state machine that interpret a [`DebugInfoItem`]. #[derive(Debug, PartialEq, Eq, Clone)] pub struct DebugStateMachine<'a> { @@ -344,7 +358,7 @@ impl<'a> DebugStateMachine<'a> { pub fn get_ins(&self) -> Result { if self.pc >= self.debug_info.bytecode.len() { return Err(Error::OutOfBound( - "Try to read an instruction out of bound, maybe after the enf of the debug sequence." + "Try to read an instruction out of bound, maybe after the end of the debug sequence." .into() )); } @@ -513,6 +527,14 @@ impl<'a> DebugStateMachine<'a> { } } } + + pub fn next_info(&mut self) -> DebugInfo { + loop { + if let Some(info) = self.tick() { + return info; + } + } + } } #[cfg(test)]