convert bytecode to a more editable format

This commit is contained in:
Jean-Marie Mineau 2023-12-17 19:14:33 +01:00
parent 29c43a68b2
commit 0c928e4fd2
Signed by: histausse
GPG key ID: B66AEEDA9B645AD2
2 changed files with 291 additions and 17 deletions

View file

@ -838,7 +838,34 @@ impl Apk {
let reg_values = (vc..vc + a as u16).collect(); let reg_values = (vc..vc + a as u16).collect();
Instruction::FilledNewArray(FilledNewArray::new(type_, reg_values)?) Instruction::FilledNewArray(FilledNewArray::new(type_, reg_values)?)
} }
Format31T { op: 0x26, va, b } => todo!(), // Fill Array Data Format31T { op: 0x26, va, b } => {
if b < 0 && -b as usize > addr {
bail!(
"Found fill-array-data-payload v{va} +{b} at {addr}, \
data location invalid (negative addresse)"
);
}
let data_addr = if b > 0 {
addr + b as usize
} else {
addr - (-b as usize)
};
if let Some(ins) = insns_ref.get(&data_addr) {
if let FormatFillArrayDataPayload { elt_width, data } = ins {
Instruction::FillArrayData(FillArrayData::new(va, *elt_width, data.clone()))
} else {
bail!(
"Found fill-array-data-payload v{va} +{b} at 0x{addr:x}, \
found {ins:?} at {data_addr:x}, expected a fill-array-data-payload"
)
}
} else {
bail!(
"Found fill-array-data-payload v{va} +{b} at 0x{addr:x}, \
no instruction found at {data_addr:x}"
)
}
}
Format11X { op: 0x27, va } => Instruction::Throw(Throw::new(va)), Format11X { op: 0x27, va } => Instruction::Throw(Throw::new(va)),
Format10T { op: 0x28, a } => { Format10T { op: 0x28, a } => {
if a < 0 && (-a) as usize > addr { if a < 0 && (-a) as usize > addr {
@ -847,7 +874,7 @@ impl Apk {
let dest_addr = if a > 0 { let dest_addr = if a > 0 {
addr + a as usize addr + a as usize
} else { } else {
addr - a as usize addr - (-a as usize)
}; };
let label = format!("label_{dest_addr:80X}"); let label = format!("label_{dest_addr:80X}");
if let Some(old_label) = labels.insert(dest_addr, label) { if let Some(old_label) = labels.insert(dest_addr, label) {
@ -865,7 +892,7 @@ impl Apk {
let dest_addr = if a > 0 { let dest_addr = if a > 0 {
addr + a as usize addr + a as usize
} else { } else {
addr - a as usize addr - (-a as usize)
}; };
let label = format!("label_{dest_addr:80X}"); let label = format!("label_{dest_addr:80X}");
if let Some(old_label) = labels.insert(dest_addr, label) { if let Some(old_label) = labels.insert(dest_addr, label) {
@ -883,7 +910,7 @@ impl Apk {
let dest_addr = if a > 0 { let dest_addr = if a > 0 {
addr + a as usize addr + a as usize
} else { } else {
addr - a as usize addr - (-a as usize)
}; };
let label = format!("label_{dest_addr:80X}"); let label = format!("label_{dest_addr:80X}");
if let Some(old_label) = labels.insert(dest_addr, label) { if let Some(old_label) = labels.insert(dest_addr, label) {
@ -894,8 +921,115 @@ impl Apk {
} }
Instruction::Goto(Goto::new(label)) Instruction::Goto(Goto::new(label))
} }
Format31T { op: 0x2b, va, b } => todo!(), // Packed Switch Format31T { op: 0x2b, va, b } => {
Format31T { op: 0x2c, va, b } => todo!(), // Sparsed Switch if b < 0 && -b as usize > addr {
bail!(
"Found packed-switch v{va} +{b} at {addr}, \
data location invalid (negative addresse)"
);
}
let data_addr = if b > 0 {
addr + b as usize
} else {
addr - (-b as usize)
};
if let Some(ins) = insns_ref.get(&data_addr) {
if let FormatPackedSwitchPayload { first_key, targets } = ins {
let mut branches = HashMap::new();
let mut key = *first_key;
for target in targets.iter().cloned() {
if target < 0 && (-target) as usize > addr {
bail!(
"Found switch branche to offset {target} at 0x{addr:0x}: \
the destination is invalid (negative addresse)"
);
}
let dest_addr = if target > 0 {
addr + target as usize
} else {
addr - (-target as usize)
};
let label = format!("label_{dest_addr:80X}");
if let Some(old_label) = labels.insert(dest_addr, label) {
if old_label != label {
// TODO: internal error, panic?
bail!(
"There is already a label at 0x{dest_addr:80X} with \
an invalid name"
);
}
}
branches.insert(key, label);
key += 1;
}
Instruction::Switch(Switch::new(va, branches))
} else {
bail!(
"Found packed-switch v{va} +{b} at 0x{addr:x}, \
found {ins:?} at {data_addr:x}, expected a packed-switch-payload"
)
}
} else {
bail!(
"Found packed-switch v{va} +{b} at 0x{addr:x}, \
no instruction found at {data_addr:x}"
)
}
}
Format31T { op: 0x2c, va, b } => {
if b < 0 && -b as usize > addr {
bail!(
"Found sparsed-switch v{va} +{b} at {addr}, \
data location invalid (negative addresse)"
);
}
let data_addr = if b > 0 {
addr + b as usize
} else {
addr - (-b as usize)
};
if let Some(ins) = insns_ref.get(&data_addr) {
if let FormatSparseSwitchPayload { key_targets } = ins {
let mut branches = HashMap::new();
for (key, target) in key_targets.iter().cloned() {
if target < 0 && (-target) as usize > addr {
bail!(
"Found switch branche to offset {target} at 0x{addr:0x}: \
the destination is invalid (negative addresse)"
);
}
let dest_addr = if target > 0 {
addr + target as usize
} else {
addr - (-target as usize)
};
let label = format!("label_{dest_addr:80X}");
if let Some(old_label) = labels.insert(dest_addr, label) {
if old_label != label {
// TODO: internal error, panic?
bail!(
"There is already a label at 0x{dest_addr:80X} with \
an invalid name"
);
}
}
branches.insert(key, label);
key += 1;
}
Instruction::Switch(Switch::new(va, branches))
} else {
bail!(
"Found packed-switch v{va} +{b} at 0x{addr:x}, \
found {ins:?} at {data_addr:x}, expected a packed-switch-payload"
)
}
} else {
bail!(
"Found packed-switch v{va} +{b} at 0x{addr:x}, \
no instruction found at {data_addr:x}"
)
}
}
Format23X { Format23X {
op: 0x2d, op: 0x2d,
va, va,
@ -2026,6 +2160,8 @@ impl Apk {
/// Return a [`Code`] from it's offset in the dex file. /// Return a [`Code`] from it's offset in the dex file.
pub fn get_code_from_off(offset: u32, dex: &DexFileReader) -> Result<Code> { pub fn get_code_from_off(offset: u32, dex: &DexFileReader) -> Result<Code> {
use crate::instructions::Instruction;
use crate::instructions::{Label, Try};
let code_item = dex.get_struct_at_offset::<CodeItem>(offset)?; let code_item = dex.get_struct_at_offset::<CodeItem>(offset)?;
let debug_info = if code_item.debug_info_off == 0 { let debug_info = if code_item.debug_info_off == 0 {
vec![] vec![]
@ -2033,7 +2169,8 @@ impl Apk {
dex.get_struct_at_offset::<DebugInfoItem>(code_item.debug_info_off)? dex.get_struct_at_offset::<DebugInfoItem>(code_item.debug_info_off)?
.serialize_to_vec()? // no dealing with that right now .serialize_to_vec()? // no dealing with that right now
}; };
let mut _tries = vec![]; let mut labels: HashMap<usize, String> = HashMap::new();
let mut tries = HashMap::new();
for TryItem { for TryItem {
start_addr, start_addr,
insn_count, insn_count,
@ -2050,16 +2187,40 @@ impl Apk {
"Inconsistant code_item: found try blocks but no handler list" "Inconsistant code_item: found try blocks but no handler list"
))? ))?
.get_handler_at_offset(handler_off)?; .get_handler_at_offset(handler_off)?;
let catch_all_addr = catch_all_addr.map(|Uleb128(val)| val); let default_handler = if let Some(Uleb128(addr)) = catch_all_addr.clone() {
let label = format!("label_{addr:80X}");
if let Some(label_) = labels.insert(addr as usize, label) {
if label_ != label {
bail!("Label collision at 0x{addr:80X}: {label_} and {label}");
}
}
Some(label)
} else {
None
};
let mut handlers_ = vec![]; let mut handlers_ = vec![];
for EncodedTypeAddrPair { for EncodedTypeAddrPair {
type_idx: Uleb128(type_idx), type_idx: Uleb128(type_idx),
addr: Uleb128(addr), addr: Uleb128(addr),
} in handlers } in handlers.iter().cloned()
{ {
handlers_.push((Self::get_id_type_from_idx(*type_idx as usize, dex)?, *addr)) let label = format!("label_{addr:80X}");
if let Some(label_) = labels.insert(addr as usize, label) {
if label_ != label {
bail!("Label collision at 0x{addr:80X}: {label_} and {label}");
}
}
handlers_.push((Self::get_id_type_from_idx(type_idx as usize, dex)?, label))
}
let dest_addr = start_addr + insn_count as u32;
let end_label = format!("label_{dest_addr:80X}");
let try_ = Instruction::Try(Try::new(end_label, handlers_, default_handler));
if let Some(try__) = tries.insert(start_addr as usize, try_) {
bail!(
"Found two try blocks at the same address 0x{start_addr:80X}: \
{try__:?}, {try_:?}"
);
} }
_tries.push((start_addr, insn_count, (handlers_, catch_all_addr)));
} }
let mut instructions_raw = HashMap::new(); let mut instructions_raw = HashMap::new();
@ -2070,23 +2231,37 @@ impl Apk {
} }
let mut instructions = vec![]; let mut instructions = vec![];
addr = 0; addr = 0;
let mut labels = HashMap::new();
for ins_f in code_item.insns { for ins_f in code_item.insns {
let (ins, ins_labels) = let (ins, ins_labels) =
Self::instruction_format_to_instruction(ins_f, addr, &instructions_raw, dex)?; Self::instruction_format_to_instruction(ins_f, addr, &instructions_raw, dex)?;
instructions.push((addr, ins)); instructions.push((addr, ins));
addr += ins_f.size() / 2; addr += ins_f.size() / 2;
labels.extend(&mut ins_labels); // TODO: handle collisions for (key, val) in ins_labels {
if let Some(val_) = ins_labels.get(&key) {
if val_ != &val {
// TODO: internal error, panic?
bail!("Label collision at 0x{key:80X}: {val_} and {val}");
}
}
}
labels.extend(ins_labels);
}
let mut insns = vec![];
for (addr, ins) in instructions {
if let Some(try_) = tries.remove(&addr) {
insns.push(try_);
}
if let Some(label) = labels.remove(&addr) {
insns.push(Instruction::Label(Label::new(label)));
}
insns.push(ins);
} }
// TODO: add try blocks and labels
let insns = instructions.into_iter().map(|(_, ins)| ins).collect();
Ok(Code { Ok(Code {
registers_size: code_item.registers_size, registers_size: code_item.registers_size,
ins_size: code_item.ins_size, ins_size: code_item.ins_size,
outs_size: code_item.outs_size, outs_size: code_item.outs_size,
debug_info, debug_info,
insns, insns,
//tries,
}) })
} }

View file

@ -206,6 +206,8 @@ pub enum Instruction {
InvokeCustom(InvokeCustom), InvokeCustom(InvokeCustom),
ConstMethodHandle(ConstMethodHandle), ConstMethodHandle(ConstMethodHandle),
ConstMethodType(ConstMethodType), ConstMethodType(ConstMethodType),
Try(Try),
Label(Label),
} }
impl Instruction { impl Instruction {
@ -403,6 +405,8 @@ impl Instruction {
Self::InvokeCustom(ins) => ins.__str__(), Self::InvokeCustom(ins) => ins.__str__(),
Self::ConstMethodHandle(ins) => ins.__str__(), Self::ConstMethodHandle(ins) => ins.__str__(),
Self::ConstMethodType(ins) => ins.__str__(), Self::ConstMethodType(ins) => ins.__str__(),
Self::Try(ins) => ins.__str__(),
Self::Label(ins) => ins.__str__(),
} }
} }
@ -600,6 +604,8 @@ impl Instruction {
Self::InvokeCustom(ins) => ins.__repr__(), Self::InvokeCustom(ins) => ins.__repr__(),
Self::ConstMethodHandle(ins) => ins.__repr__(), Self::ConstMethodHandle(ins) => ins.__repr__(),
Self::ConstMethodType(ins) => ins.__repr__(), Self::ConstMethodType(ins) => ins.__repr__(),
Self::Try(ins) => ins.__repr__(),
Self::Label(ins) => ins.__repr__(),
} }
} }
} }
@ -990,6 +996,10 @@ impl<'source> FromPyObject<'source> for Instruction {
Ok(Self::ConstMethodHandle(ins)) Ok(Self::ConstMethodHandle(ins))
} else if let Ok(ins) = ConstMethodType::extract(ob) { } else if let Ok(ins) = ConstMethodType::extract(ob) {
Ok(Self::ConstMethodType(ins)) Ok(Self::ConstMethodType(ins))
} else if let Ok(ins) = Try::extract(ob) {
Ok(Self::Try(ins))
} else if let Ok(ins) = Label::extract(ob) {
Ok(Self::Label(ins))
} else { } else {
Err(PyErr::new::<PyTypeError, _>(format!( Err(PyErr::new::<PyTypeError, _>(format!(
"{} is not a castable as an Instruction", "{} is not a castable as an Instruction",
@ -1194,6 +1204,8 @@ impl IntoPy<PyObject> for Instruction {
Self::InvokeCustom(ins) => ins.into_py(py), Self::InvokeCustom(ins) => ins.into_py(py),
Self::ConstMethodHandle(ins) => ins.into_py(py), Self::ConstMethodHandle(ins) => ins.into_py(py),
Self::ConstMethodType(ins) => ins.into_py(py), Self::ConstMethodType(ins) => ins.into_py(py),
Self::Try(ins) => ins.into_py(py),
Self::Label(ins) => ins.into_py(py),
} }
} }
} }
@ -8732,9 +8744,96 @@ impl ConstMethodType {
pub fn __repr__(&self) -> String { pub fn __repr__(&self) -> String {
format!( format!(
"Instruction(ConstMethodHandle ConstMethodType({}, {}))", "Instruction(ConstMethodType({}, {}))",
self.to, self.to,
self.proto.__repr__() self.proto.__repr__()
) )
} }
} }
/// Try block. It does not match an dalvik instruction but is derived from the code item struct.
#[pyclass]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Try {
pub end_label: String,
/// The list of exceptions and their associated handler label.
///
/// # Warning
///
/// The handler are sorted: if severat Exception Type match an exceptions, the
/// the handler used is the first in the list.
pub handlers: Vec<(IdType, String)>,
pub default_handler: Option<String>,
}
#[pymethods]
impl Try {
#[new]
pub fn new(
end_label: String,
handlers: Vec<(IdType, String)>,
default_handler: Option<String>,
) -> Self {
Self {
end_label,
default_handler,
handlers,
}
}
pub fn __str__(&self) -> String {
let handlers = self
.handlers
.iter()
.map(|(ty, label)| format!(" {}: {label}", ty.__str__()))
.collect::<Vec<_>>()
.join("\n ");
let default_handler = if let Some(label) = self.default_handler {
format!(" default: {label}")
} else {
"".into()
};
format!(
"try until {}{}{}",
self.end_label, handlers, default_handler
)
}
pub fn __repr__(&self) -> String {
let handlers = self
.handlers
.iter()
.map(|(ty, label)| format!("{}, {label}", ty.__repr__()))
.collect::<Vec<_>>()
.join(", ");
format!(
"Instruction(Try({}, [{}], {:?}))",
self.end_label, handlers, self.default_handler,
)
}
}
/// Label marker. It does not match an dalvik instruction, but it's use as a marker for a
/// jump destination.
#[pyclass]
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Label {
pub name: String,
}
#[pymethods]
impl Label {
#[new]
pub fn new(name: String) -> Self {
Self { name }
}
pub fn __str__(&self) -> String {
format!("{}:", self.name)
}
pub fn __repr__(&self) -> String {
format!("Instruction(Label({}))", self.name)
}
}