diff --git a/androscalpel/src/apk.rs b/androscalpel/src/apk.rs index e75b07f..62ffef3 100644 --- a/androscalpel/src/apk.rs +++ b/androscalpel/src/apk.rs @@ -838,7 +838,34 @@ impl Apk { let reg_values = (vc..vc + a as u16).collect(); Instruction::FilledNewArray(FilledNewArray::new(type_, reg_values)?) } - Format31T { op: 0x26, va, b } => todo!(), // Fill Array Data + Format31T { op: 0x26, va, b } => { + if b < 0 && -b as usize > addr { + bail!( + "Found fill-array-data-payload v{va} +{b} at {addr}, \ + data location invalid (negative addresse)" + ); + } + let data_addr = if b > 0 { + addr + b as usize + } else { + addr - (-b as usize) + }; + if let Some(ins) = insns_ref.get(&data_addr) { + if let FormatFillArrayDataPayload { elt_width, data } = ins { + Instruction::FillArrayData(FillArrayData::new(va, *elt_width, data.clone())) + } else { + bail!( + "Found fill-array-data-payload v{va} +{b} at 0x{addr:x}, \ + found {ins:?} at {data_addr:x}, expected a fill-array-data-payload" + ) + } + } else { + bail!( + "Found fill-array-data-payload v{va} +{b} at 0x{addr:x}, \ + no instruction found at {data_addr:x}" + ) + } + } Format11X { op: 0x27, va } => Instruction::Throw(Throw::new(va)), Format10T { op: 0x28, a } => { if a < 0 && (-a) as usize > addr { @@ -847,7 +874,7 @@ impl Apk { let dest_addr = if a > 0 { addr + a as usize } else { - addr - a as usize + addr - (-a as usize) }; let label = format!("label_{dest_addr:80X}"); if let Some(old_label) = labels.insert(dest_addr, label) { @@ -865,7 +892,7 @@ impl Apk { let dest_addr = if a > 0 { addr + a as usize } else { - addr - a as usize + addr - (-a as usize) }; let label = format!("label_{dest_addr:80X}"); if let Some(old_label) = labels.insert(dest_addr, label) { @@ -883,7 +910,7 @@ impl Apk { let dest_addr = if a > 0 { addr + a as usize } else { - addr - a as usize + addr - (-a as usize) }; let label = format!("label_{dest_addr:80X}"); if let Some(old_label) = labels.insert(dest_addr, label) { @@ -894,8 +921,115 @@ impl Apk { } Instruction::Goto(Goto::new(label)) } - Format31T { op: 0x2b, va, b } => todo!(), // Packed Switch - Format31T { op: 0x2c, va, b } => todo!(), // Sparsed Switch + Format31T { op: 0x2b, va, b } => { + if b < 0 && -b as usize > addr { + bail!( + "Found packed-switch v{va} +{b} at {addr}, \ + data location invalid (negative addresse)" + ); + } + let data_addr = if b > 0 { + addr + b as usize + } else { + addr - (-b as usize) + }; + if let Some(ins) = insns_ref.get(&data_addr) { + if let FormatPackedSwitchPayload { first_key, targets } = ins { + let mut branches = HashMap::new(); + let mut key = *first_key; + for target in targets.iter().cloned() { + if target < 0 && (-target) as usize > addr { + bail!( + "Found switch branche to offset {target} at 0x{addr:0x}: \ + the destination is invalid (negative addresse)" + ); + } + let dest_addr = if target > 0 { + addr + target as usize + } else { + addr - (-target as usize) + }; + let label = format!("label_{dest_addr:80X}"); + if let Some(old_label) = labels.insert(dest_addr, label) { + if old_label != label { + // TODO: internal error, panic? + bail!( + "There is already a label at 0x{dest_addr:80X} with \ + an invalid name" + ); + } + } + branches.insert(key, label); + key += 1; + } + Instruction::Switch(Switch::new(va, branches)) + } else { + bail!( + "Found packed-switch v{va} +{b} at 0x{addr:x}, \ + found {ins:?} at {data_addr:x}, expected a packed-switch-payload" + ) + } + } else { + bail!( + "Found packed-switch v{va} +{b} at 0x{addr:x}, \ + no instruction found at {data_addr:x}" + ) + } + } + Format31T { op: 0x2c, va, b } => { + if b < 0 && -b as usize > addr { + bail!( + "Found sparsed-switch v{va} +{b} at {addr}, \ + data location invalid (negative addresse)" + ); + } + let data_addr = if b > 0 { + addr + b as usize + } else { + addr - (-b as usize) + }; + if let Some(ins) = insns_ref.get(&data_addr) { + if let FormatSparseSwitchPayload { key_targets } = ins { + let mut branches = HashMap::new(); + for (key, target) in key_targets.iter().cloned() { + if target < 0 && (-target) as usize > addr { + bail!( + "Found switch branche to offset {target} at 0x{addr:0x}: \ + the destination is invalid (negative addresse)" + ); + } + let dest_addr = if target > 0 { + addr + target as usize + } else { + addr - (-target as usize) + }; + let label = format!("label_{dest_addr:80X}"); + if let Some(old_label) = labels.insert(dest_addr, label) { + if old_label != label { + // TODO: internal error, panic? + bail!( + "There is already a label at 0x{dest_addr:80X} with \ + an invalid name" + ); + } + } + branches.insert(key, label); + key += 1; + } + Instruction::Switch(Switch::new(va, branches)) + } else { + bail!( + "Found packed-switch v{va} +{b} at 0x{addr:x}, \ + found {ins:?} at {data_addr:x}, expected a packed-switch-payload" + ) + } + } else { + bail!( + "Found packed-switch v{va} +{b} at 0x{addr:x}, \ + no instruction found at {data_addr:x}" + ) + } + } Format23X { op: 0x2d, va, @@ -2026,6 +2160,8 @@ impl Apk { /// Return a [`Code`] from it's offset in the dex file. pub fn get_code_from_off(offset: u32, dex: &DexFileReader) -> Result { + use crate::instructions::Instruction; + use crate::instructions::{Label, Try}; let code_item = dex.get_struct_at_offset::(offset)?; let debug_info = if code_item.debug_info_off == 0 { vec![] @@ -2033,7 +2169,8 @@ impl Apk { dex.get_struct_at_offset::(code_item.debug_info_off)? .serialize_to_vec()? // no dealing with that right now }; - let mut _tries = vec![]; + let mut labels: HashMap = HashMap::new(); + let mut tries = HashMap::new(); for TryItem { start_addr, insn_count, @@ -2050,16 +2187,40 @@ impl Apk { "Inconsistant code_item: found try blocks but no handler list" ))? .get_handler_at_offset(handler_off)?; - let catch_all_addr = catch_all_addr.map(|Uleb128(val)| val); + let default_handler = if let Some(Uleb128(addr)) = catch_all_addr.clone() { + let label = format!("label_{addr:80X}"); + if let Some(label_) = labels.insert(addr as usize, label) { + if label_ != label { + bail!("Label collision at 0x{addr:80X}: {label_} and {label}"); + } + } + Some(label) + } else { + None + }; let mut handlers_ = vec![]; for EncodedTypeAddrPair { type_idx: Uleb128(type_idx), addr: Uleb128(addr), - } in handlers + } in handlers.iter().cloned() { - handlers_.push((Self::get_id_type_from_idx(*type_idx as usize, dex)?, *addr)) + let label = format!("label_{addr:80X}"); + if let Some(label_) = labels.insert(addr as usize, label) { + if label_ != label { + bail!("Label collision at 0x{addr:80X}: {label_} and {label}"); + } + } + handlers_.push((Self::get_id_type_from_idx(type_idx as usize, dex)?, label)) + } + let dest_addr = start_addr + insn_count as u32; + let end_label = format!("label_{dest_addr:80X}"); + let try_ = Instruction::Try(Try::new(end_label, handlers_, default_handler)); + if let Some(try__) = tries.insert(start_addr as usize, try_) { + bail!( + "Found two try blocks at the same address 0x{start_addr:80X}: \ + {try__:?}, {try_:?}" + ); } - _tries.push((start_addr, insn_count, (handlers_, catch_all_addr))); } let mut instructions_raw = HashMap::new(); @@ -2070,23 +2231,37 @@ impl Apk { } let mut instructions = vec![]; addr = 0; - let mut labels = HashMap::new(); for ins_f in code_item.insns { let (ins, ins_labels) = Self::instruction_format_to_instruction(ins_f, addr, &instructions_raw, dex)?; instructions.push((addr, ins)); addr += ins_f.size() / 2; - labels.extend(&mut ins_labels); // TODO: handle collisions + for (key, val) in ins_labels { + if let Some(val_) = ins_labels.get(&key) { + if val_ != &val { + // TODO: internal error, panic? + bail!("Label collision at 0x{key:80X}: {val_} and {val}"); + } + } + } + labels.extend(ins_labels); + } + let mut insns = vec![]; + for (addr, ins) in instructions { + if let Some(try_) = tries.remove(&addr) { + insns.push(try_); + } + if let Some(label) = labels.remove(&addr) { + insns.push(Instruction::Label(Label::new(label))); + } + insns.push(ins); } - // TODO: add try blocks and labels - let insns = instructions.into_iter().map(|(_, ins)| ins).collect(); Ok(Code { registers_size: code_item.registers_size, ins_size: code_item.ins_size, outs_size: code_item.outs_size, debug_info, insns, - //tries, }) } diff --git a/androscalpel/src/instructions.rs b/androscalpel/src/instructions.rs index 1d251ac..810226f 100644 --- a/androscalpel/src/instructions.rs +++ b/androscalpel/src/instructions.rs @@ -206,6 +206,8 @@ pub enum Instruction { InvokeCustom(InvokeCustom), ConstMethodHandle(ConstMethodHandle), ConstMethodType(ConstMethodType), + Try(Try), + Label(Label), } impl Instruction { @@ -403,6 +405,8 @@ impl Instruction { Self::InvokeCustom(ins) => ins.__str__(), Self::ConstMethodHandle(ins) => ins.__str__(), Self::ConstMethodType(ins) => ins.__str__(), + Self::Try(ins) => ins.__str__(), + Self::Label(ins) => ins.__str__(), } } @@ -600,6 +604,8 @@ impl Instruction { Self::InvokeCustom(ins) => ins.__repr__(), Self::ConstMethodHandle(ins) => ins.__repr__(), Self::ConstMethodType(ins) => ins.__repr__(), + Self::Try(ins) => ins.__repr__(), + Self::Label(ins) => ins.__repr__(), } } } @@ -990,6 +996,10 @@ impl<'source> FromPyObject<'source> for Instruction { Ok(Self::ConstMethodHandle(ins)) } else if let Ok(ins) = ConstMethodType::extract(ob) { Ok(Self::ConstMethodType(ins)) + } else if let Ok(ins) = Try::extract(ob) { + Ok(Self::Try(ins)) + } else if let Ok(ins) = Label::extract(ob) { + Ok(Self::Label(ins)) } else { Err(PyErr::new::(format!( "{} is not a castable as an Instruction", @@ -1194,6 +1204,8 @@ impl IntoPy for Instruction { Self::InvokeCustom(ins) => ins.into_py(py), Self::ConstMethodHandle(ins) => ins.into_py(py), Self::ConstMethodType(ins) => ins.into_py(py), + Self::Try(ins) => ins.into_py(py), + Self::Label(ins) => ins.into_py(py), } } } @@ -8732,9 +8744,96 @@ impl ConstMethodType { pub fn __repr__(&self) -> String { format!( - "Instruction(ConstMethodHandle ConstMethodType({}, {}))", + "Instruction(ConstMethodType({}, {}))", self.to, self.proto.__repr__() ) } } + +/// Try block. It does not match an dalvik instruction but is derived from the code item struct. +#[pyclass] +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Try { + pub end_label: String, + /// The list of exceptions and their associated handler label. + /// + /// # Warning + /// + /// The handler are sorted: if severat Exception Type match an exceptions, the + /// the handler used is the first in the list. + pub handlers: Vec<(IdType, String)>, + pub default_handler: Option, +} + +#[pymethods] +impl Try { + #[new] + pub fn new( + end_label: String, + handlers: Vec<(IdType, String)>, + default_handler: Option, + ) -> Self { + Self { + end_label, + default_handler, + handlers, + } + } + + pub fn __str__(&self) -> String { + let handlers = self + .handlers + .iter() + .map(|(ty, label)| format!(" {}: {label}", ty.__str__())) + .collect::>() + .join("\n "); + let default_handler = if let Some(label) = self.default_handler { + format!(" default: {label}") + } else { + "".into() + }; + + format!( + "try until {}{}{}", + self.end_label, handlers, default_handler + ) + } + + pub fn __repr__(&self) -> String { + let handlers = self + .handlers + .iter() + .map(|(ty, label)| format!("{}, {label}", ty.__repr__())) + .collect::>() + .join(", "); + format!( + "Instruction(Try({}, [{}], {:?}))", + self.end_label, handlers, self.default_handler, + ) + } +} + +/// Label marker. It does not match an dalvik instruction, but it's use as a marker for a +/// jump destination. +#[pyclass] +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Label { + pub name: String, +} + +#[pymethods] +impl Label { + #[new] + pub fn new(name: String) -> Self { + Self { name } + } + + pub fn __str__(&self) -> String { + format!("{}:", self.name) + } + + pub fn __repr__(&self) -> String { + format!("Instruction(Label({}))", self.name) + } +}