WIP, TODO: regenerate debug info from new debug instruction

This commit is contained in:
Jean-Marie Mineau 2025-01-10 17:45:00 +01:00
parent f9f511013d
commit bc3392d946
Signed by: histausse
GPG key ID: B66AEEDA9B645AD2
4 changed files with 130 additions and 50 deletions

View file

@ -26,17 +26,15 @@ pub struct Apk {
pub not_referenced_strings: HashSet<DexString>, pub not_referenced_strings: HashSet<DexString>,
} }
const LABEL_EACH_INST: bool = true;
impl Apk { impl Apk {
/// Add the content of a dex file to the apk. /// Add the content of a dex file to the apk.
pub fn add_dex_file(&mut self, data: &[u8]) -> Result<()> { pub fn add_dex_file(&mut self, data: &[u8], label_each_ins: bool) -> Result<()> {
let mut dex = DexFileReader::new(data)?; let mut dex = DexFileReader::new(data)?;
let classes = dex let classes = dex
.get_class_defs() .get_class_defs()
.par_iter() .par_iter()
.enumerate() .enumerate()
.map(|(idx, class)| self.get_class_from_dex_file(class, idx, &dex)) .map(|(idx, class)| self.get_class_from_dex_file(class, idx, &dex, label_each_ins))
.map(|class| class.map(|class| (class.descriptor.clone(), class))) .map(|class| class.map(|class| (class.descriptor.clone(), class)))
.collect::<Result<Vec<_>, _>>()?; .collect::<Result<Vec<_>, _>>()?;
self.classes.par_extend(classes); self.classes.par_extend(classes);
@ -53,6 +51,7 @@ impl Apk {
class_item: &ClassDefItem, class_item: &ClassDefItem,
class_item_idx: usize, class_item_idx: usize,
dex: &DexFileReader, dex: &DexFileReader,
label_each_ins: bool,
) -> Result<Class> { ) -> Result<Class> {
let descriptor = Self::get_id_type_from_idx(class_item.class_idx as usize, dex)?; let descriptor = Self::get_id_type_from_idx(class_item.class_idx as usize, dex)?;
let superclass = if class_item.superclass_idx == NO_INDEX.0 { let superclass = if class_item.superclass_idx == NO_INDEX.0 {
@ -139,18 +138,22 @@ impl Apk {
hiddenapi_i += 1; hiddenapi_i += 1;
} }
} }
for mut method in for mut method in Self::get_method_list_from_encoded_field_list(
Self::get_method_list_from_encoded_field_list(&data.direct_methods, dex)? &data.direct_methods,
{ dex,
label_each_ins,
)? {
if let Some(hiddenapi) = &hiddenapi { if let Some(hiddenapi) = &hiddenapi {
method.hiddenapi = Some((&hiddenapi[hiddenapi_i]).into()); method.hiddenapi = Some((&hiddenapi[hiddenapi_i]).into());
hiddenapi_i += 1; hiddenapi_i += 1;
} }
direct_methods.insert(method.descriptor.clone(), method); direct_methods.insert(method.descriptor.clone(), method);
} }
for mut method in for mut method in Self::get_method_list_from_encoded_field_list(
Self::get_method_list_from_encoded_field_list(&data.virtual_methods, dex)? &data.virtual_methods,
{ dex,
label_each_ins,
)? {
if let Some(hiddenapi) = &hiddenapi { if let Some(hiddenapi) = &hiddenapi {
method.hiddenapi = Some((&hiddenapi[hiddenapi_i]).into()); method.hiddenapi = Some((&hiddenapi[hiddenapi_i]).into());
hiddenapi_i += 1; hiddenapi_i += 1;
@ -676,6 +679,7 @@ impl Apk {
Uleb128(access_flags): Uleb128, Uleb128(access_flags): Uleb128,
Uleb128(code_off): Uleb128, Uleb128(code_off): Uleb128,
dex: &DexFileReader, dex: &DexFileReader,
label_each_ins: bool,
) -> Result<Method> { ) -> Result<Method> {
let descriptor = Self::get_id_method_from_idx(idx, dex)?; let descriptor = Self::get_id_method_from_idx(idx, dex)?;
@ -748,9 +752,11 @@ impl Apk {
let code = if code_off == 0 { let code = if code_off == 0 {
None None
} else { } else {
Some(Self::get_code_from_off(code_off, dex).with_context(|| { Some(
Self::get_code_from_off(code_off, dex, label_each_ins).with_context(|| {
format!("Failed to parse code of method {}", descriptor.__str__()) format!("Failed to parse code of method {}", descriptor.__str__())
})?) })?,
)
}; };
Ok(Method { Ok(Method {
@ -780,11 +786,12 @@ impl Apk {
addr: usize, addr: usize,
insns_ref: &HashMap<usize, &InsFormat>, insns_ref: &HashMap<usize, &InsFormat>,
dex: &DexFileReader, dex: &DexFileReader,
label_each_ins: bool,
) -> Result<Option<(instructions::Instruction, HashMap<usize, String>)>> { ) -> Result<Option<(instructions::Instruction, HashMap<usize, String>)>> {
use crate::instructions::*; use crate::instructions::*;
use InsFormat::*; use InsFormat::*;
let mut labels = HashMap::new(); let mut labels = HashMap::new();
if LABEL_EACH_INST { if label_each_ins {
let label = format!("label_{addr:08X}"); let label = format!("label_{addr:08X}");
labels.insert(addr, label.clone()); labels.insert(addr, label.clone());
} }
@ -2591,7 +2598,11 @@ impl Apk {
} }
/// Return a [`Code`] from it's offset in the dex file. /// Return a [`Code`] from it's offset in the dex file.
pub fn get_code_from_off(offset: u32, dex: &DexFileReader) -> Result<Code> { pub fn get_code_from_off(
offset: u32,
dex: &DexFileReader,
label_each_ins: bool,
) -> Result<Code> {
use crate::instructions::Instruction; use crate::instructions::Instruction;
let code_item = dex.get_struct_at_offset::<CodeItem>(offset)?; let code_item = dex.get_struct_at_offset::<CodeItem>(offset)?;
@ -2613,14 +2624,12 @@ impl Apk {
} else { } else {
None None
}; };
let debug_info = if let Some(debug_info) = debug_info { let (mut current_debug_info, mut debug_infos) = if let Some(debug_info) = debug_info {
let mut cursor = std::io::Cursor::new(vec![]); let mut debug_infos = DebugStateMachine::new(&debug_info);
debug_info let current_debug_info = debug_infos.next_info();
.bytecode (current_debug_info, Some(debug_infos))
.serialize(&mut cursor, DbgBytecode::EndSequence)?;
(debug_info.line_start.0, cursor.into_inner())
} else { } else {
(0, vec![]) (DebugInfo::EndOfData, None)
}; };
let mut labels: HashMap<usize, String> = HashMap::new(); let mut labels: HashMap<usize, String> = HashMap::new();
let mut tries = HashMap::new(); let mut tries = HashMap::new();
@ -2697,9 +2706,13 @@ impl Apk {
let mut instructions = vec![]; let mut instructions = vec![];
addr = 0; addr = 0;
for ins_f in &code_item.insns { for ins_f in &code_item.insns {
if let Some((ins, ins_labels)) = if let Some((ins, ins_labels)) = Self::instruction_format_to_instruction(
Self::instruction_format_to_instruction(ins_f, addr, &instructions_raw, dex)? ins_f,
{ addr,
&instructions_raw,
dex,
label_each_ins,
)? {
instructions.push((addr, ins)); instructions.push((addr, ins));
addr += ins_f.size() / 2; addr += ins_f.size() / 2;
for (key, val) in &ins_labels { for (key, val) in &ins_labels {
@ -2715,6 +2728,46 @@ impl Apk {
} }
let mut insns = vec![]; let mut insns = vec![];
for (addr, ins) in instructions { for (addr, ins) in instructions {
while current_debug_info != DebugInfo::EndOfData
&& addr <= current_debug_info.get_addr() as usize
{
insns.push(match current_debug_info {
DebugInfo::DefLocal { reg, val, .. } => Instruction::DebugLocal {
reg,
name: val
.name_idx
.map(|idx| dex.get_string(idx))
.transpose()?
.map(|str| DexString(str).into()),
type_: val
.type_idx
.map(|idx| Self::get_id_type_from_idx(idx as usize, dex))
.transpose()?,
signature: val
.sig_idx
.map(|idx| dex.get_string(idx))
.transpose()?
.map(|str| DexString(str).into()),
},
DebugInfo::EndLocal { reg, .. } => Instruction::DebugEndLocal { reg },
DebugInfo::PrologueEnd { .. } => Instruction::DebugEndPrologue {},
DebugInfo::EpilogueBegin { .. } => Instruction::DebugBeginEpilogue {},
DebugInfo::SetSourceFile {
source_file_idx, ..
} => Instruction::DebugSourceFile {
file: source_file_idx
.map(|idx| dex.get_string(idx))
.transpose()?
.map(|str| DexString(str).into()),
},
DebugInfo::SetLineNumber { line_num, .. } => Instruction::DebugLine {
number: line_num as usize,
},
DebugInfo::EndOfData => {
panic!("Found EndOfData debug info, that should no happend here.")
}
});
}
if let Some(try_) = tries.remove(&addr) { if let Some(try_) = tries.remove(&addr) {
insns.push(try_); insns.push(try_);
} }
@ -2741,7 +2794,6 @@ impl Apk {
registers_size: code_item.registers_size, registers_size: code_item.registers_size,
ins_size: code_item.ins_size, ins_size: code_item.ins_size,
outs_size: code_item.outs_size, outs_size: code_item.outs_size,
debug_info,
parameter_names, parameter_names,
insns, insns,
}) })
@ -2757,6 +2809,7 @@ impl Apk {
pub fn get_method_list_from_encoded_field_list( pub fn get_method_list_from_encoded_field_list(
encoded_methods: &[EncodedMethod], encoded_methods: &[EncodedMethod],
dex: &DexFileReader, dex: &DexFileReader,
label_each_ins: bool,
) -> Result<Vec<Method>> { ) -> Result<Vec<Method>> {
let mut idx = 0; let mut idx = 0;
let mut methods = vec![]; let mut methods = vec![];
@ -2768,6 +2821,7 @@ impl Apk {
method.access_flags, method.access_flags,
method.code_off, method.code_off,
dex, dex,
label_each_ins,
)?); )?);
} }
Ok(methods) Ok(methods)
@ -2818,8 +2872,8 @@ impl Apk {
} }
#[pyo3(name = "add_dex_file")] #[pyo3(name = "add_dex_file")]
pub fn py_add_dex_file(&mut self, data: &[u8]) -> Result<()> { pub fn py_add_dex_file(&mut self, data: &[u8], label_each_ins: Option<bool>) -> Result<()> {
self.add_dex_file(data) self.add_dex_file(data, label_each_ins.unwrap_or(false))
} }
pub fn add_class(&mut self, class: Class) -> Result<()> { pub fn add_class(&mut self, class: Class) -> Result<()> {

View file

@ -31,10 +31,6 @@ pub struct Code {
/// The number of words of outgoing argument space /// The number of words of outgoing argument space
#[pyo3(get)] #[pyo3(get)]
pub outs_size: u16, pub outs_size: u16,
// TODO: implement
/// The debug info
#[pyo3(get)]
pub debug_info: (u32, Vec<u8>), // Should be stripped, copying like this just don't work
/// The names of the parameters if given /// The names of the parameters if given
#[pyo3(get)] #[pyo3(get)]
pub parameter_names: Option<Vec<Option<DexString>>>, pub parameter_names: Option<Vec<Option<DexString>>>,
@ -50,7 +46,6 @@ impl PartialEq for Code {
(comparable_self.registers_size == comparable_other.registers_size) (comparable_self.registers_size == comparable_other.registers_size)
&& (comparable_self.ins_size == comparable_other.ins_size) && (comparable_self.ins_size == comparable_other.ins_size)
&& (comparable_self.outs_size == comparable_other.outs_size) && (comparable_self.outs_size == comparable_other.outs_size)
&& (comparable_self.debug_info == comparable_other.debug_info)
&& (comparable_self.insns == comparable_other.insns) && (comparable_self.insns == comparable_other.insns)
} }
} }
@ -82,7 +77,6 @@ impl Code {
outs_size, outs_size,
insns, insns,
parameter_names, parameter_names,
debug_info: (0, vec![]),
} }
} }

View file

@ -649,7 +649,7 @@ pub enum Instruction {
/// Debug information. Indicate the beginning of the Epilogue /// Debug information. Indicate the beginning of the Epilogue
DebugBeginEpilogue {}, DebugBeginEpilogue {},
/// Debug information. Indicate the source file of the following instructions. /// Debug information. Indicate the source file of the following instructions.
DebugSourceFile { file: String }, DebugSourceFile { file: Option<String> },
/// Debug information. Indicate the line number of the following instructions. /// Debug information. Indicate the line number of the following instructions.
DebugLine { number: usize }, DebugLine { number: usize },
} }
@ -1217,19 +1217,20 @@ impl<V: Visitor> Visitable<V> for Instruction {
Self::DebugLocal { Self::DebugLocal {
reg: _, reg: _,
name: _, name: _,
type_, type_: Some(type_),
signature: _, signature: _,
} => { } => v.visit_type(type_),
if let Some(type_) = type_ { Self::DebugLocal {
v.visit_type(type_) reg: _,
} else { name: _,
Ok(()) type_: None,
} signature: _,
} } => Ok(()),
Self::DebugEndLocal { reg: _ } => Ok(()), Self::DebugEndLocal { reg: _ } => Ok(()),
Self::DebugEndPrologue {} => Ok(()), Self::DebugEndPrologue {} => Ok(()),
Self::DebugBeginEpilogue {} => Ok(()), Self::DebugBeginEpilogue {} => Ok(()),
Self::DebugSourceFile { file } => v.visit_string(&(file.as_str().into())), Self::DebugSourceFile { file: Some(file) } => v.visit_string(&(file.as_str().into())),
Self::DebugSourceFile { file: None } => Ok(()),
Self::DebugLine { number: _ } => Ok(()), Self::DebugLine { number: _ } => Ok(()),
} }
} }
@ -1889,9 +1890,13 @@ impl<V: VisitorMut> VisitableMut<V> for Instruction {
Self::DebugEndLocal { reg: _ } => Ok(self), Self::DebugEndLocal { reg: _ } => Ok(self),
Self::DebugEndPrologue {} => Ok(self), Self::DebugEndPrologue {} => Ok(self),
Self::DebugBeginEpilogue {} => Ok(self), Self::DebugBeginEpilogue {} => Ok(self),
Self::DebugSourceFile { file } => v Self::DebugSourceFile { file: Some(file) } => {
.visit_string(file.as_str().into()) v.visit_string(file.as_str().into())
.map(|file| Self::DebugSourceFile { file: file.into() }), .map(|file| Self::DebugSourceFile {
file: Some(file.into()),
})
}
Self::DebugSourceFile { file: None } => Ok(self),
Self::DebugLine { number: _ } => Ok(self), Self::DebugLine { number: _ } => Ok(self),
} }
} }
@ -2828,7 +2833,9 @@ impl Instruction {
Self::DebugEndPrologue {} => ".prologue".into(), Self::DebugEndPrologue {} => ".prologue".into(),
Self::DebugBeginEpilogue {} => ".epilogue".into(), Self::DebugBeginEpilogue {} => ".epilogue".into(),
// TODO: check if/how apktool/smali handles empty change of src file // TODO: check if/how apktool/smali handles empty change of src file
Self::DebugSourceFile { file } => format!(".source_file {file}"), Self::DebugSourceFile { file: Some(file) } => format!(".source_file {file}"),
// TODO: find a better representation
Self::DebugSourceFile { file: None } => ".source_file unknown".into(),
Self::DebugLine { number } => format!(".line {number}"), Self::DebugLine { number } => format!(".line {number}"),
} }
} }
@ -3636,7 +3643,10 @@ impl Instruction {
Self::DebugEndPrologue {} => "Instruction::DebugEndPrologue".into(), Self::DebugEndPrologue {} => "Instruction::DebugEndPrologue".into(),
Self::DebugBeginEpilogue {} => "Instruction::DebugBeginEpilogue".into(), Self::DebugBeginEpilogue {} => "Instruction::DebugBeginEpilogue".into(),
// TODO: check if/how apktool/smali handles empty change of src file // TODO: check if/how apktool/smali handles empty change of src file
Self::DebugSourceFile { file } => format!("Instruction::DebugSourceFile({file})"), Self::DebugSourceFile { file: Some(file) } => {
format!("Instruction::DebugSourceFile({file})")
}
Self::DebugSourceFile { file: None } => "Instruction::DebugSourceFile(None)".into(),
Self::DebugLine { number } => format!("Instruction::DebugLine({number})"), Self::DebugLine { number } => format!("Instruction::DebugLine({number})"),
} }
} }

View file

@ -300,6 +300,20 @@ pub enum DebugInfo {
EndOfData, EndOfData,
} }
impl DebugInfo {
pub fn get_addr(&self) -> u32 {
match self {
Self::DefLocal { addr, .. } => *addr,
Self::EndLocal { addr, .. } => *addr,
Self::PrologueEnd { addr } => *addr,
Self::EpilogueBegin { addr } => *addr,
Self::SetSourceFile { addr, .. } => *addr,
Self::SetLineNumber { addr, .. } => *addr,
Self::EndOfData => u32::MAX, // TODO should be an Option::None?
}
}
}
/// A state machine that interpret a [`DebugInfoItem`]. /// A state machine that interpret a [`DebugInfoItem`].
#[derive(Debug, PartialEq, Eq, Clone)] #[derive(Debug, PartialEq, Eq, Clone)]
pub struct DebugStateMachine<'a> { pub struct DebugStateMachine<'a> {
@ -344,7 +358,7 @@ impl<'a> DebugStateMachine<'a> {
pub fn get_ins(&self) -> Result<DbgBytecode> { pub fn get_ins(&self) -> Result<DbgBytecode> {
if self.pc >= self.debug_info.bytecode.len() { if self.pc >= self.debug_info.bytecode.len() {
return Err(Error::OutOfBound( return Err(Error::OutOfBound(
"Try to read an instruction out of bound, maybe after the enf of the debug sequence." "Try to read an instruction out of bound, maybe after the end of the debug sequence."
.into() .into()
)); ));
} }
@ -513,6 +527,14 @@ impl<'a> DebugStateMachine<'a> {
} }
} }
} }
pub fn next_info(&mut self) -> DebugInfo {
loop {
if let Some(info) = self.tick() {
return info;
}
}
}
} }
#[cfg(test)] #[cfg(test)]