WIP, TODO: regenerate debug info from new debug instruction

This commit is contained in:
Jean-Marie Mineau 2025-01-10 17:45:00 +01:00
parent f9f511013d
commit bc3392d946
Signed by: histausse
GPG key ID: B66AEEDA9B645AD2
4 changed files with 130 additions and 50 deletions

View file

@ -26,17 +26,15 @@ pub struct Apk {
pub not_referenced_strings: HashSet<DexString>,
}
const LABEL_EACH_INST: bool = true;
impl Apk {
/// Add the content of a dex file to the apk.
pub fn add_dex_file(&mut self, data: &[u8]) -> Result<()> {
pub fn add_dex_file(&mut self, data: &[u8], label_each_ins: bool) -> Result<()> {
let mut dex = DexFileReader::new(data)?;
let classes = dex
.get_class_defs()
.par_iter()
.enumerate()
.map(|(idx, class)| self.get_class_from_dex_file(class, idx, &dex))
.map(|(idx, class)| self.get_class_from_dex_file(class, idx, &dex, label_each_ins))
.map(|class| class.map(|class| (class.descriptor.clone(), class)))
.collect::<Result<Vec<_>, _>>()?;
self.classes.par_extend(classes);
@ -53,6 +51,7 @@ impl Apk {
class_item: &ClassDefItem,
class_item_idx: usize,
dex: &DexFileReader,
label_each_ins: bool,
) -> Result<Class> {
let descriptor = Self::get_id_type_from_idx(class_item.class_idx as usize, dex)?;
let superclass = if class_item.superclass_idx == NO_INDEX.0 {
@ -139,18 +138,22 @@ impl Apk {
hiddenapi_i += 1;
}
}
for mut method in
Self::get_method_list_from_encoded_field_list(&data.direct_methods, dex)?
{
for mut method in Self::get_method_list_from_encoded_field_list(
&data.direct_methods,
dex,
label_each_ins,
)? {
if let Some(hiddenapi) = &hiddenapi {
method.hiddenapi = Some((&hiddenapi[hiddenapi_i]).into());
hiddenapi_i += 1;
}
direct_methods.insert(method.descriptor.clone(), method);
}
for mut method in
Self::get_method_list_from_encoded_field_list(&data.virtual_methods, dex)?
{
for mut method in Self::get_method_list_from_encoded_field_list(
&data.virtual_methods,
dex,
label_each_ins,
)? {
if let Some(hiddenapi) = &hiddenapi {
method.hiddenapi = Some((&hiddenapi[hiddenapi_i]).into());
hiddenapi_i += 1;
@ -676,6 +679,7 @@ impl Apk {
Uleb128(access_flags): Uleb128,
Uleb128(code_off): Uleb128,
dex: &DexFileReader,
label_each_ins: bool,
) -> Result<Method> {
let descriptor = Self::get_id_method_from_idx(idx, dex)?;
@ -748,9 +752,11 @@ impl Apk {
let code = if code_off == 0 {
None
} else {
Some(Self::get_code_from_off(code_off, dex).with_context(|| {
Some(
Self::get_code_from_off(code_off, dex, label_each_ins).with_context(|| {
format!("Failed to parse code of method {}", descriptor.__str__())
})?)
})?,
)
};
Ok(Method {
@ -780,11 +786,12 @@ impl Apk {
addr: usize,
insns_ref: &HashMap<usize, &InsFormat>,
dex: &DexFileReader,
label_each_ins: bool,
) -> Result<Option<(instructions::Instruction, HashMap<usize, String>)>> {
use crate::instructions::*;
use InsFormat::*;
let mut labels = HashMap::new();
if LABEL_EACH_INST {
if label_each_ins {
let label = format!("label_{addr:08X}");
labels.insert(addr, label.clone());
}
@ -2591,7 +2598,11 @@ impl Apk {
}
/// Return a [`Code`] from it's offset in the dex file.
pub fn get_code_from_off(offset: u32, dex: &DexFileReader) -> Result<Code> {
pub fn get_code_from_off(
offset: u32,
dex: &DexFileReader,
label_each_ins: bool,
) -> Result<Code> {
use crate::instructions::Instruction;
let code_item = dex.get_struct_at_offset::<CodeItem>(offset)?;
@ -2613,14 +2624,12 @@ impl Apk {
} else {
None
};
let debug_info = if let Some(debug_info) = debug_info {
let mut cursor = std::io::Cursor::new(vec![]);
debug_info
.bytecode
.serialize(&mut cursor, DbgBytecode::EndSequence)?;
(debug_info.line_start.0, cursor.into_inner())
let (mut current_debug_info, mut debug_infos) = if let Some(debug_info) = debug_info {
let mut debug_infos = DebugStateMachine::new(&debug_info);
let current_debug_info = debug_infos.next_info();
(current_debug_info, Some(debug_infos))
} else {
(0, vec![])
(DebugInfo::EndOfData, None)
};
let mut labels: HashMap<usize, String> = HashMap::new();
let mut tries = HashMap::new();
@ -2697,9 +2706,13 @@ impl Apk {
let mut instructions = vec![];
addr = 0;
for ins_f in &code_item.insns {
if let Some((ins, ins_labels)) =
Self::instruction_format_to_instruction(ins_f, addr, &instructions_raw, dex)?
{
if let Some((ins, ins_labels)) = Self::instruction_format_to_instruction(
ins_f,
addr,
&instructions_raw,
dex,
label_each_ins,
)? {
instructions.push((addr, ins));
addr += ins_f.size() / 2;
for (key, val) in &ins_labels {
@ -2715,6 +2728,46 @@ impl Apk {
}
let mut insns = vec![];
for (addr, ins) in instructions {
while current_debug_info != DebugInfo::EndOfData
&& addr <= current_debug_info.get_addr() as usize
{
insns.push(match current_debug_info {
DebugInfo::DefLocal { reg, val, .. } => Instruction::DebugLocal {
reg,
name: val
.name_idx
.map(|idx| dex.get_string(idx))
.transpose()?
.map(|str| DexString(str).into()),
type_: val
.type_idx
.map(|idx| Self::get_id_type_from_idx(idx as usize, dex))
.transpose()?,
signature: val
.sig_idx
.map(|idx| dex.get_string(idx))
.transpose()?
.map(|str| DexString(str).into()),
},
DebugInfo::EndLocal { reg, .. } => Instruction::DebugEndLocal { reg },
DebugInfo::PrologueEnd { .. } => Instruction::DebugEndPrologue {},
DebugInfo::EpilogueBegin { .. } => Instruction::DebugBeginEpilogue {},
DebugInfo::SetSourceFile {
source_file_idx, ..
} => Instruction::DebugSourceFile {
file: source_file_idx
.map(|idx| dex.get_string(idx))
.transpose()?
.map(|str| DexString(str).into()),
},
DebugInfo::SetLineNumber { line_num, .. } => Instruction::DebugLine {
number: line_num as usize,
},
DebugInfo::EndOfData => {
panic!("Found EndOfData debug info, that should no happend here.")
}
});
}
if let Some(try_) = tries.remove(&addr) {
insns.push(try_);
}
@ -2741,7 +2794,6 @@ impl Apk {
registers_size: code_item.registers_size,
ins_size: code_item.ins_size,
outs_size: code_item.outs_size,
debug_info,
parameter_names,
insns,
})
@ -2757,6 +2809,7 @@ impl Apk {
pub fn get_method_list_from_encoded_field_list(
encoded_methods: &[EncodedMethod],
dex: &DexFileReader,
label_each_ins: bool,
) -> Result<Vec<Method>> {
let mut idx = 0;
let mut methods = vec![];
@ -2768,6 +2821,7 @@ impl Apk {
method.access_flags,
method.code_off,
dex,
label_each_ins,
)?);
}
Ok(methods)
@ -2818,8 +2872,8 @@ impl Apk {
}
#[pyo3(name = "add_dex_file")]
pub fn py_add_dex_file(&mut self, data: &[u8]) -> Result<()> {
self.add_dex_file(data)
pub fn py_add_dex_file(&mut self, data: &[u8], label_each_ins: Option<bool>) -> Result<()> {
self.add_dex_file(data, label_each_ins.unwrap_or(false))
}
pub fn add_class(&mut self, class: Class) -> Result<()> {

View file

@ -31,10 +31,6 @@ pub struct Code {
/// The number of words of outgoing argument space
#[pyo3(get)]
pub outs_size: u16,
// TODO: implement
/// The debug info
#[pyo3(get)]
pub debug_info: (u32, Vec<u8>), // Should be stripped, copying like this just don't work
/// The names of the parameters if given
#[pyo3(get)]
pub parameter_names: Option<Vec<Option<DexString>>>,
@ -50,7 +46,6 @@ impl PartialEq for Code {
(comparable_self.registers_size == comparable_other.registers_size)
&& (comparable_self.ins_size == comparable_other.ins_size)
&& (comparable_self.outs_size == comparable_other.outs_size)
&& (comparable_self.debug_info == comparable_other.debug_info)
&& (comparable_self.insns == comparable_other.insns)
}
}
@ -82,7 +77,6 @@ impl Code {
outs_size,
insns,
parameter_names,
debug_info: (0, vec![]),
}
}

View file

@ -649,7 +649,7 @@ pub enum Instruction {
/// Debug information. Indicate the beginning of the Epilogue
DebugBeginEpilogue {},
/// Debug information. Indicate the source file of the following instructions.
DebugSourceFile { file: String },
DebugSourceFile { file: Option<String> },
/// Debug information. Indicate the line number of the following instructions.
DebugLine { number: usize },
}
@ -1217,19 +1217,20 @@ impl<V: Visitor> Visitable<V> for Instruction {
Self::DebugLocal {
reg: _,
name: _,
type_,
type_: Some(type_),
signature: _,
} => {
if let Some(type_) = type_ {
v.visit_type(type_)
} else {
Ok(())
}
}
} => v.visit_type(type_),
Self::DebugLocal {
reg: _,
name: _,
type_: None,
signature: _,
} => Ok(()),
Self::DebugEndLocal { reg: _ } => Ok(()),
Self::DebugEndPrologue {} => Ok(()),
Self::DebugBeginEpilogue {} => Ok(()),
Self::DebugSourceFile { file } => v.visit_string(&(file.as_str().into())),
Self::DebugSourceFile { file: Some(file) } => v.visit_string(&(file.as_str().into())),
Self::DebugSourceFile { file: None } => Ok(()),
Self::DebugLine { number: _ } => Ok(()),
}
}
@ -1889,9 +1890,13 @@ impl<V: VisitorMut> VisitableMut<V> for Instruction {
Self::DebugEndLocal { reg: _ } => Ok(self),
Self::DebugEndPrologue {} => Ok(self),
Self::DebugBeginEpilogue {} => Ok(self),
Self::DebugSourceFile { file } => v
.visit_string(file.as_str().into())
.map(|file| Self::DebugSourceFile { file: file.into() }),
Self::DebugSourceFile { file: Some(file) } => {
v.visit_string(file.as_str().into())
.map(|file| Self::DebugSourceFile {
file: Some(file.into()),
})
}
Self::DebugSourceFile { file: None } => Ok(self),
Self::DebugLine { number: _ } => Ok(self),
}
}
@ -2828,7 +2833,9 @@ impl Instruction {
Self::DebugEndPrologue {} => ".prologue".into(),
Self::DebugBeginEpilogue {} => ".epilogue".into(),
// TODO: check if/how apktool/smali handles empty change of src file
Self::DebugSourceFile { file } => format!(".source_file {file}"),
Self::DebugSourceFile { file: Some(file) } => format!(".source_file {file}"),
// TODO: find a better representation
Self::DebugSourceFile { file: None } => ".source_file unknown".into(),
Self::DebugLine { number } => format!(".line {number}"),
}
}
@ -3636,7 +3643,10 @@ impl Instruction {
Self::DebugEndPrologue {} => "Instruction::DebugEndPrologue".into(),
Self::DebugBeginEpilogue {} => "Instruction::DebugBeginEpilogue".into(),
// TODO: check if/how apktool/smali handles empty change of src file
Self::DebugSourceFile { file } => format!("Instruction::DebugSourceFile({file})"),
Self::DebugSourceFile { file: Some(file) } => {
format!("Instruction::DebugSourceFile({file})")
}
Self::DebugSourceFile { file: None } => "Instruction::DebugSourceFile(None)".into(),
Self::DebugLine { number } => format!("Instruction::DebugLine({number})"),
}
}

View file

@ -300,6 +300,20 @@ pub enum DebugInfo {
EndOfData,
}
impl DebugInfo {
pub fn get_addr(&self) -> u32 {
match self {
Self::DefLocal { addr, .. } => *addr,
Self::EndLocal { addr, .. } => *addr,
Self::PrologueEnd { addr } => *addr,
Self::EpilogueBegin { addr } => *addr,
Self::SetSourceFile { addr, .. } => *addr,
Self::SetLineNumber { addr, .. } => *addr,
Self::EndOfData => u32::MAX, // TODO should be an Option::None?
}
}
}
/// A state machine that interpret a [`DebugInfoItem`].
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct DebugStateMachine<'a> {
@ -344,7 +358,7 @@ impl<'a> DebugStateMachine<'a> {
pub fn get_ins(&self) -> Result<DbgBytecode> {
if self.pc >= self.debug_info.bytecode.len() {
return Err(Error::OutOfBound(
"Try to read an instruction out of bound, maybe after the enf of the debug sequence."
"Try to read an instruction out of bound, maybe after the end of the debug sequence."
.into()
));
}
@ -513,6 +527,14 @@ impl<'a> DebugStateMachine<'a> {
}
}
}
pub fn next_info(&mut self) -> DebugInfo {
loop {
if let Some(info) = self.tick() {
return info;
}
}
}
}
#[cfg(test)]