From f15ad78d44d78d188580698a2cfd3da1b0f4389d Mon Sep 17 00:00:00 2001 From: Jean-Marie 'Histausse' Mineau Date: Wed, 26 Feb 2025 12:04:23 +0100 Subject: [PATCH 1/8] add method --- androscalpel/src/code_analysis/method_cfg.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/androscalpel/src/code_analysis/method_cfg.rs b/androscalpel/src/code_analysis/method_cfg.rs index e97ca50..6a43524 100644 --- a/androscalpel/src/code_analysis/method_cfg.rs +++ b/androscalpel/src/code_analysis/method_cfg.rs @@ -26,6 +26,12 @@ pub struct MethodCFG<'a> { pub nodes: Vec>, } +impl Method { + pub fn get_cfg(&self) -> Result { + MethodCFG::new(self) + } +} + impl<'a> MethodCFG<'a> { pub fn new(method: &'a Method) -> Result { let insns: &'a [Instruction] = if let Some(code) = method.code.as_ref() { From fe0dd2d6c8e9e5c8320434f34da67f3802e28250 Mon Sep 17 00:00:00 2001 From: Jean-Marie 'Histausse' Mineau Date: Mon, 3 Mar 2025 13:30:44 +0100 Subject: [PATCH 2/8] fix non static reg type --- androscalpel/src/code_analysis/register_type.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/androscalpel/src/code_analysis/register_type.rs b/androscalpel/src/code_analysis/register_type.rs index 78f8403..15cbc96 100644 --- a/androscalpel/src/code_analysis/register_type.rs +++ b/androscalpel/src/code_analysis/register_type.rs @@ -55,6 +55,10 @@ impl MethodCFG<'_> { } // Initialize the entry block from function signature: let mut i = (code.registers_size - code.ins_size) as usize; + if !self.method.is_static { + end_block_reg_tys[0][i] = RegType::Object; // 'this' + i += 1; + } for arg in &self.method.descriptor.proto.get_parameters() { if arg.is_class() || arg.is_array() { end_block_reg_tys[0][i] = RegType::Object; From d906c4d3d2e2be7934930d24b187963c78978977 Mon Sep 17 00:00:00 2001 From: Jean-Marie 'Histausse' Mineau Date: Mon, 3 Mar 2025 13:48:08 +0100 Subject: [PATCH 3/8] add more debug info in error --- androscalpel/src/dex_writer.rs | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/androscalpel/src/dex_writer.rs b/androscalpel/src/dex_writer.rs index 78a8e44..8dd0aaf 100644 --- a/androscalpel/src/dex_writer.rs +++ b/androscalpel/src/dex_writer.rs @@ -503,9 +503,14 @@ impl DexWriter { // for context } Instruction::Goto { label } => { - let (min_addr, max_addr) = label_min_max_addrs - .get(label) - .ok_or(anyhow!("Label {label} not found in label estimation map"))?; + let (min_addr, max_addr) = label_min_max_addrs.get(label).ok_or(anyhow!( + "Label {label} not found in label estimation map, known labels are {}", + label_min_max_addrs + .keys() + .map(|string| string.as_str()) + .collect::>() + .join(", ") + ))?; let size = Instruction::goto_size_from_branch_offset_interval( addr, *min_addr, *max_addr, )?; @@ -948,7 +953,8 @@ impl DexWriter { // No if let because ownership gunfooterie let code_off = if class.direct_methods.get(id).unwrap().code.is_some() { let code_off = self.section_manager.get_aligned_size(Section::CodeItem); - self.insert_code_item(id.clone(), true)?; + self.insert_code_item(id.clone(), true) + .with_context(|| format!("Failed to serialize code of {}", id.__str__()))?; Uleb128(code_off + 1) } else { Uleb128(0) @@ -983,7 +989,8 @@ impl DexWriter { // No if let because ownership gunfooterie let code_off = if class.virtual_methods.get(id).unwrap().code.is_some() { let code_off = self.section_manager.get_aligned_size(Section::CodeItem); - self.insert_code_item(id.clone(), false)?; + self.insert_code_item(id.clone(), false) + .with_context(|| format!("Failed to serialize code of {}", id.__str__()))?; Uleb128(code_off + 1) } else { Uleb128(0) From e2dc3381b6809d57c2efabe516c16161c5fdccbc Mon Sep 17 00:00:00 2001 From: Jean-Marie 'Histausse' Mineau Date: Mon, 3 Mar 2025 13:57:11 +0100 Subject: [PATCH 4/8] add more debug info in error --- androscalpel/src/dex_writer.rs | 4 ++-- androscalpel/src/instructions.rs | 14 ++++++++++++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/androscalpel/src/dex_writer.rs b/androscalpel/src/dex_writer.rs index 8dd0aaf..b74fd0b 100644 --- a/androscalpel/src/dex_writer.rs +++ b/androscalpel/src/dex_writer.rs @@ -504,7 +504,7 @@ impl DexWriter { } Instruction::Goto { label } => { let (min_addr, max_addr) = label_min_max_addrs.get(label).ok_or(anyhow!( - "Label {label} not found in label estimation map, known labels are {}", + "Label {label} not found in label estimation map, known labels are [{}]", label_min_max_addrs .keys() .map(|string| string.as_str()) @@ -825,7 +825,7 @@ impl DexWriter { format!( "Failed to convert instruction {} (found in code of {}) to raw instruction", ins.__repr__(), - method_id.__repr__() + method_id.__str__() ) })?; addr += ins.size() / 2; diff --git a/androscalpel/src/instructions.rs b/androscalpel/src/instructions.rs index bfc5302..1aac5e1 100644 --- a/androscalpel/src/instructions.rs +++ b/androscalpel/src/instructions.rs @@ -1978,8 +1978,13 @@ macro_rules! sanity_check_22b_or_22s { macro_rules! raw_ins_if { ($ins_op:tt, $label_addrs:ident, $label:ident, $addr:ident, $r1:ident, $r2:ident) => {{ let label_addr = $label_addrs.get($label).ok_or(anyhow!( - "Label {} not found in code, but found `if` with this label", + "Label {} not found in code, but found `if` with this label. Known labels are [{}].", $label, + $label_addrs + .keys() + .map(|string| string.as_str()) + .collect::>() + .join(", ") ))?; let branch_offset = *label_addr as i32 - $addr as i32; if branch_offset > i16::MAX as i32 || branch_offset < i16::MIN as i32 { @@ -1997,8 +2002,13 @@ macro_rules! raw_ins_if { macro_rules! raw_ins_ifz { ($ins_op:tt, $label_addrs:ident, $label:ident, $addr:ident, $r1:ident) => {{ let label_addr = $label_addrs.get($label).ok_or(anyhow!( - "Label {} not found in code, but found `if` with this label", + "Label {} not found in code, but found `if` with this label. Known labels are [{}].", $label, + $label_addrs + .keys() + .map(|string| string.as_str()) + .collect::>() + .join(", ") ))?; let branch_offset = *label_addr as i32 - $addr as i32; if branch_offset > i16::MAX as i32 || branch_offset < i16::MIN as i32 { From be5922a726dfa46e0a42530347b691e2a7877c47 Mon Sep 17 00:00:00 2001 From: Jean-Marie 'Histausse' Mineau Date: Mon, 3 Mar 2025 15:03:39 +0100 Subject: [PATCH 5/8] allow multiple label at the same address when parsing .dex --- androscalpel/src/apk.rs | 178 ++++++++-------------------------------- 1 file changed, 36 insertions(+), 142 deletions(-) diff --git a/androscalpel/src/apk.rs b/androscalpel/src/apk.rs index cb8670b..111ebe5 100644 --- a/androscalpel/src/apk.rs +++ b/androscalpel/src/apk.rs @@ -852,19 +852,20 @@ impl Apk { } /// Convert an instruction format to an instruction. + #[allow(clippy::type_complexity)] fn instruction_format_to_instruction( format: &InsFormat, addr: usize, insns_ref: &HashMap, dex: &DexFileReader, label_ins: &mut F, - ) -> Result)>> + ) -> Result>)>> where F: FnMut(&instructions::Instruction, usize) -> Option, { use crate::instructions::*; use InsFormat::*; - let mut labels = HashMap::new(); + let mut labels = HashMap::<_, Vec<_>>::new(); let ins = match format.clone() { Format10X { op: 0x00 } => Instruction::Nop {}, Format12X { op: 0x01, va, vb } => Instruction::Move { @@ -1037,12 +1038,7 @@ impl Apk { addr - (-(a as i64) as usize) }; let label = format!("label_{dest_addr:08X}"); - if let Some(old_label) = labels.insert(dest_addr, label.clone()) { - if old_label != label { - // TODO: internal error, panic? - bail!("There is already a label at 0x{dest_addr:08X} with an invalid name"); - } - } + labels.entry(dest_addr).or_default().push(label.clone()); Instruction::Goto { label } } Format20T { op: 0x29, a } => { @@ -1055,12 +1051,7 @@ impl Apk { addr - (-(a as i64) as usize) }; let label = format!("label_{dest_addr:08X}"); - if let Some(old_label) = labels.insert(dest_addr, label.clone()) { - if old_label != label { - // TODO: internal error, panic? - bail!("There is already a label at 0x{dest_addr:08X} with an invalid name"); - } - } + labels.entry(dest_addr).or_default().push(label.clone()); Instruction::Goto { label } } Format30T { op: 0x2a, a } => { @@ -1073,12 +1064,7 @@ impl Apk { addr - (-(a as i64) as usize) }; let label = format!("label_{dest_addr:08X}"); - if let Some(old_label) = labels.insert(dest_addr, label.clone()) { - if old_label != label { - // TODO: internal error, panic? - bail!("There is already a label at 0x{dest_addr:08X} with an invalid name"); - } - } + labels.entry(dest_addr).or_default().push(label.clone()); Instruction::Goto { label } } Format31T { op: 0x2b, va, b } => { @@ -1110,15 +1096,7 @@ impl Apk { addr - (-(target as i64) as usize) }; let label = format!("label_{dest_addr:08X}"); - if let Some(old_label) = labels.insert(dest_addr, label.clone()) { - if old_label != label { - // TODO: internal error, panic? - bail!( - "There is already a label at 0x{dest_addr:08X} with \ - an invalid name" - ); - } - } + labels.entry(dest_addr).or_default().push(label.clone()); branches.insert(key, label); key += 1; } @@ -1164,15 +1142,7 @@ impl Apk { addr - (-target as usize) }; let label = format!("label_{dest_addr:08X}"); - if let Some(old_label) = labels.insert(dest_addr, label.clone()) { - if old_label != label { - // TODO: internal error, panic? - bail!( - "There is already a label at 0x{dest_addr:08X} with \ - an invalid name" - ); - } - } + labels.entry(dest_addr).or_default().push(label.clone()); branches.insert(key, label); } Instruction::Switch { reg: va, branches } @@ -1257,12 +1227,7 @@ impl Apk { addr - (-(c as i64)) as usize }; let label = format!("label_{dest_addr:08X}"); - if let Some(old_label) = labels.insert(dest_addr, label.clone()) { - if old_label != label { - // TODO: internal error, panic? - bail!("There is already a label at 0x{dest_addr:08X} with an invalid name"); - } - } + labels.entry(dest_addr).or_default().push(label.clone()); Instruction::IfEq { a: va, b: vb, @@ -1287,12 +1252,7 @@ impl Apk { addr - (-(c as i64)) as usize }; let label = format!("label_{dest_addr:08X}"); - if let Some(old_label) = labels.insert(dest_addr, label.clone()) { - if old_label != label { - // TODO: internal error, panic? - bail!("There is already a label at 0x{dest_addr:08X} with an invalid name"); - } - } + labels.entry(dest_addr).or_default().push(label.clone()); Instruction::IfNe { a: va, b: vb, @@ -1317,12 +1277,7 @@ impl Apk { addr - (-(c as i64)) as usize }; let label = format!("label_{dest_addr:08X}"); - if let Some(old_label) = labels.insert(dest_addr, label.clone()) { - if old_label != label { - // TODO: internal error, panic? - bail!("There is already a label at 0x{dest_addr:08X} with an invalid name"); - } - } + labels.entry(dest_addr).or_default().push(label.clone()); Instruction::IfLt { a: va, b: vb, @@ -1347,12 +1302,7 @@ impl Apk { addr - (-(c as i64)) as usize }; let label = format!("label_{dest_addr:08X}"); - if let Some(old_label) = labels.insert(dest_addr, label.clone()) { - if old_label != label { - // TODO: internal error, panic? - bail!("There is already a label at 0x{dest_addr:08X} with an invalid name"); - } - } + labels.entry(dest_addr).or_default().push(label.clone()); Instruction::IfGe { a: va, b: vb, @@ -1377,12 +1327,7 @@ impl Apk { addr - (-(c as i64)) as usize }; let label = format!("label_{dest_addr:08X}"); - if let Some(old_label) = labels.insert(dest_addr, label.clone()) { - if old_label != label { - // TODO: internal error, panic? - bail!("There is already a label at 0x{dest_addr:08X} with an invalid name"); - } - } + labels.entry(dest_addr).or_default().push(label.clone()); Instruction::IfGt { a: va, b: vb, @@ -1407,12 +1352,7 @@ impl Apk { addr - (-(c as i64)) as usize }; let label = format!("label_{dest_addr:08X}"); - if let Some(old_label) = labels.insert(dest_addr, label.clone()) { - if old_label != label { - // TODO: internal error, panic? - bail!("There is already a label at 0x{dest_addr:08X} with an invalid name"); - } - } + labels.entry(dest_addr).or_default().push(label.clone()); Instruction::IfLe { a: va, b: vb, @@ -1432,12 +1372,7 @@ impl Apk { addr - (-(b as i64)) as usize }; let label = format!("label_{dest_addr:08X}"); - if let Some(old_label) = labels.insert(dest_addr, label.clone()) { - if old_label != label { - // TODO: internal error, panic? - bail!("There is already a label at 0x{dest_addr:08X} with an invalid name"); - } - } + labels.entry(dest_addr).or_default().push(label.clone()); Instruction::IfEqZ { a: va, label } } Format21T { op: 0x39, va, b } => { @@ -1453,12 +1388,7 @@ impl Apk { addr - (-(b as i64)) as usize }; let label = format!("label_{dest_addr:08X}"); - if let Some(old_label) = labels.insert(dest_addr, label.clone()) { - if old_label != label { - // TODO: internal error, panic? - bail!("There is already a label at 0x{dest_addr:08X} with an invalid name"); - } - } + labels.entry(dest_addr).or_default().push(label.clone()); Instruction::IfNeZ { a: va, label } } Format21T { op: 0x3a, va, b } => { @@ -1474,12 +1404,7 @@ impl Apk { addr - (-(b as i64)) as usize }; let label = format!("label_{dest_addr:08X}"); - if let Some(old_label) = labels.insert(dest_addr, label.clone()) { - if old_label != label { - // TODO: internal error, panic? - bail!("There is already a label at 0x{dest_addr:08X} with an invalid name"); - } - } + labels.entry(dest_addr).or_default().push(label.clone()); Instruction::IfLtZ { a: va, label } } Format21T { op: 0x3b, va, b } => { @@ -1495,12 +1420,7 @@ impl Apk { addr - (-(b as i64)) as usize }; let label = format!("label_{dest_addr:08X}"); - if let Some(old_label) = labels.insert(dest_addr, label.clone()) { - if old_label != label { - // TODO: internal error, panic? - bail!("There is already a label at 0x{dest_addr:08X} with an invalid name"); - } - } + labels.entry(dest_addr).or_default().push(label.clone()); Instruction::IfGeZ { a: va, label } } Format21T { op: 0x3c, va, b } => { @@ -1516,12 +1436,7 @@ impl Apk { addr - (-(b as i64)) as usize }; let label = format!("label_{dest_addr:08X}"); - if let Some(old_label) = labels.insert(dest_addr, label.clone()) { - if old_label != label { - // TODO: internal error, panic? - bail!("There is already a label at 0x{dest_addr:08X} with an invalid name"); - } - } + labels.entry(dest_addr).or_default().push(label.clone()); Instruction::IfGtZ { a: va, label } } Format21T { op: 0x3d, va, b } => { @@ -1537,12 +1452,7 @@ impl Apk { addr - (-(b as i64)) as usize }; let label = format!("label_{dest_addr:08X}"); - if let Some(old_label) = labels.insert(dest_addr, label.clone()) { - if old_label != label { - // TODO: internal error, panic? - bail!("There is already a label at 0x{dest_addr:08X} with an invalid name"); - } - } + labels.entry(dest_addr).or_default().push(label.clone()); Instruction::IfLeZ { a: va, label } } Format23X { @@ -2666,7 +2576,7 @@ impl Apk { .with_context(|| anyhow!("Invalid instruction {ins:?} found at {addr}: {format:?}"))?; if let Some(label) = label_ins(&ins, addr) { //let label = format!("label_{addr:08X}"); - labels.insert(addr, label); + labels.entry(addr).or_default().push(label); } Ok(Some((ins, labels))) @@ -2705,7 +2615,7 @@ impl Apk { } else { (DebugInfo::EndOfData, None) }; - let mut labels: HashMap = HashMap::new(); + let mut labels: HashMap> = HashMap::new(); let mut tries = HashMap::new(); for TryItem { start_addr, @@ -2725,12 +2635,7 @@ impl Apk { .get_handler_at_offset(handler_off)?; let default_handler = if let Some(Uleb128(addr)) = *catch_all_addr { let label = format!("label_{addr:08X}"); - if let Some(label_) = labels.insert(addr as usize, label.clone()) { - if label_ != label { - // TODO: that's an internal error - bail!("Label collision at 0x{addr:08X}: {label_} and {label}"); - } - } + labels.entry(addr as usize).or_default().push(label.clone()); Some(label) } else { None @@ -2742,22 +2647,15 @@ impl Apk { } in handlers.iter().cloned() { let label = format!("label_{addr:08X}"); - if let Some(label_) = labels.insert(addr as usize, label.clone()) { - if label_ != label { - // TODO: that's an internal error - bail!("Label collision at 0x{addr:08X}: {label_} and {label}"); - } - } + labels.entry(addr as usize).or_default().push(label.clone()); handlers_.push((Self::get_id_type_from_idx(type_idx as usize, dex)?, label)) } let dest_addr = start_addr + insn_count as u32; let end_label = format!("label_{dest_addr:08X}"); - if let Some(label_) = labels.insert(dest_addr as usize, end_label.clone()) { - if label_ != end_label { - // TODO: that's an internal error - bail!("Label collision at 0x{dest_addr:08X}: {label_} and {end_label}"); - } - } + labels + .entry(dest_addr as usize) + .or_default() + .push(end_label.clone()); let try_ = Instruction::Try { end_label, handlers: handlers_, @@ -2789,14 +2687,6 @@ impl Apk { )? { instructions.push((addr, ins)); addr += ins_f.size() / 2; - for (key, val) in &ins_labels { - if let Some(val_) = ins_labels.get(key) { - if val_ != val { - // TODO: internal error, panic? - bail!("Label collision at 0x{key:08X}: {val_} and {val}"); - } - } - } labels.extend(ins_labels); } } @@ -2852,8 +2742,10 @@ impl Apk { if let Some(try_) = tries.remove(&addr) { insns.push(try_); } - if let Some(label) = labels.remove(&addr) { - insns.push(Instruction::Label { name: label }); + if let Some(label_l) = labels.remove(&addr) { + for label in label_l.into_iter() { + insns.push(Instruction::Label { name: label }); + } } insns.push(ins); } @@ -2862,8 +2754,10 @@ impl Apk { if let Some(try_) = tries.remove(&addr) { insns.push(try_); } - if let Some(label) = labels.remove(&addr) { - insns.push(Instruction::Label { name: label }); + if let Some(label_l) = labels.remove(&addr) { + for label in label_l.into_iter() { + insns.push(Instruction::Label { name: label }); + } } if !labels.is_empty() { bail!("Could not put all label as instructions (label out of insns?)"); From 112ae0db7de2bf3824e7d6494875014efb255dc7 Mon Sep 17 00:00:00 2001 From: Jean-Marie 'Histausse' Mineau Date: Mon, 3 Mar 2025 15:55:09 +0100 Subject: [PATCH 6/8] fix hashmap mergin --- androscalpel/src/apk.rs | 4 +++- androscalpel/src/dex_writer.rs | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/androscalpel/src/apk.rs b/androscalpel/src/apk.rs index 111ebe5..1de2a9d 100644 --- a/androscalpel/src/apk.rs +++ b/androscalpel/src/apk.rs @@ -2687,7 +2687,9 @@ impl Apk { )? { instructions.push((addr, ins)); addr += ins_f.size() / 2; - labels.extend(ins_labels); + for (addr, mut label_l) in ins_labels.into_iter() { + labels.entry(addr).or_default().append(&mut label_l); + } } } let mut insns = vec![]; diff --git a/androscalpel/src/dex_writer.rs b/androscalpel/src/dex_writer.rs index b74fd0b..ddc6549 100644 --- a/androscalpel/src/dex_writer.rs +++ b/androscalpel/src/dex_writer.rs @@ -504,7 +504,7 @@ impl DexWriter { } Instruction::Goto { label } => { let (min_addr, max_addr) = label_min_max_addrs.get(label).ok_or(anyhow!( - "Label {label} not found in label estimation map, known labels are [{}]", + "Label {label} not found in label estimation map but used in Goto instruction. Known labels are [{}].", label_min_max_addrs .keys() .map(|string| string.as_str()) From 0a77c3af8663313b40b2c1d627a90e93442aa73a Mon Sep 17 00:00:00 2001 From: Jean-Marie 'Histausse' Mineau Date: Mon, 3 Mar 2025 16:25:18 +0100 Subject: [PATCH 7/8] test tweaking the graph --- androscalpel/src/code_analysis/register_type.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/androscalpel/src/code_analysis/register_type.rs b/androscalpel/src/code_analysis/register_type.rs index 15cbc96..52584a8 100644 --- a/androscalpel/src/code_analysis/register_type.rs +++ b/androscalpel/src/code_analysis/register_type.rs @@ -110,6 +110,7 @@ impl MethodCFG<'_> { ); dot_string += " style=\"dashed\";\n"; dot_string += " color=\"black\";\n"; + dot_string += " rankdir=\"TB\";\n"; dot_string += &format!( " label=\"Register Types{}\";\n", self.method.descriptor.__str__() From ff2d63035239a39f10fe3d09084f5ce6f47ab4a3 Mon Sep 17 00:00:00 2001 From: Jean-Marie 'Histausse' Mineau Date: Mon, 3 Mar 2025 17:39:00 +0100 Subject: [PATCH 8/8] fix cfg node labeling --- androscalpel/src/code_analysis/method_cfg.rs | 3 +-- androscalpel/src/code_analysis/register_type.rs | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/androscalpel/src/code_analysis/method_cfg.rs b/androscalpel/src/code_analysis/method_cfg.rs index 6a43524..10d7c7d 100644 --- a/androscalpel/src/code_analysis/method_cfg.rs +++ b/androscalpel/src/code_analysis/method_cfg.rs @@ -159,7 +159,6 @@ impl<'a> MethodCFG<'a> { last_labels.push(name.clone()); } else { nodes_next_label.push(vec![]); - last_labels.push(name.clone()); nodes.push(MethodCFGNode { code_block: &insns[start_last_block..i], labels: last_labels, @@ -167,7 +166,7 @@ impl<'a> MethodCFG<'a> { prev_nodes: vec![], }); start_last_block = i; - last_labels = vec![]; + last_labels = vec![name.clone()]; } } Instruction::ReturnVoid {} diff --git a/androscalpel/src/code_analysis/register_type.rs b/androscalpel/src/code_analysis/register_type.rs index 52584a8..c294517 100644 --- a/androscalpel/src/code_analysis/register_type.rs +++ b/androscalpel/src/code_analysis/register_type.rs @@ -73,11 +73,11 @@ impl MethodCFG<'_> { i += 1; } } - let mut changed = true; while changed { - let mut new_end_block_reg_tys = vec![]; - for node in &self.nodes { + // The first node is empty and depend on the function signature, it must not change + let mut new_end_block_reg_tys = vec![end_block_reg_tys[0].clone()]; + for node in self.nodes.iter().skip(1) { new_end_block_reg_tys.push(transform_reg_ty( &merge_input(node, nb_reg, &end_block_reg_tys), node,