From 2d164362a7be9f225a7573ce6d8cf67658cfc060 Mon Sep 17 00:00:00 2001 From: Jean-Marie Mineau Date: Fri, 15 Dec 2023 14:55:55 +0100 Subject: [PATCH] add instruction using pseudo-instruction format --- androscalpel/src/instructions.rs | 110 +++++++++++++++++- .../src/items/instructions.rs | 107 ++++++++++++++++- 2 files changed, 209 insertions(+), 8 deletions(-) diff --git a/androscalpel/src/instructions.rs b/androscalpel/src/instructions.rs index 6384334..5de54de 100644 --- a/androscalpel/src/instructions.rs +++ b/androscalpel/src/instructions.rs @@ -8,6 +8,8 @@ use crate::{DexString, IdField, IdMethod, IdMethodType, IdType, MethodHandle, Re use anyhow::anyhow; use pyo3::prelude::*; +use std::collections::HashMap; + #[pyclass] #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct CallSite; // TODO @@ -743,7 +745,83 @@ impl FilledNewArray { } } -// TODO: fill-array-data +#[pyclass] +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct FillArrayData { + pub arr: u8, + pub elt_width: u16, + pub data: Vec, +} + +#[pymethods] +impl FillArrayData { + #[new] + pub fn new(arr: u8, elt_width: u16, data: Vec) -> Self { + Self { + arr, + elt_width, + data, + } + } + + pub fn __str__(&self) -> String { + let data: String = if self.data.len() / self.elt_width as usize == 0 { + "".into() + } else if self.data.len() / self.elt_width as usize <= 2 { + let mut arr = "".into(); + for (i, v) in self.data.iter().enumerate() { + if i == 0 { + arr += "0x" + } else if i % self.elt_width as usize == 0 { + arr += " 0x" + } + arr += format!("{v:02x}").as_str(); + } + arr + } else { + let mut arr = "0x".into(); + for v in &self.data[..self.elt_width as usize] { + arr += format!("{v:02x}").as_str(); + } + arr += " ... 0x"; + for v in &self.data[self.data.len() - self.elt_width as usize..] { + arr += format!("{v:02x}").as_str(); + } + + arr + }; + format!("fill-array-data {} {}", self.arr, data) + } + + pub fn __repr__(&self) -> String { + let data: String = if self.data.len() / self.elt_width as usize == 0 { + "".into() + } else if self.data.len() / self.elt_width as usize <= 2 { + let mut arr = "".into(); + for (i, v) in self.data.iter().enumerate() { + if i == 0 { + arr += "0x" + } else if i % self.elt_width as usize == 0 { + arr += ", 0x" + } + arr += format!("{v:02x}").as_str(); + } + arr + } else { + let mut arr = "0x".into(); + for v in &self.data[..self.elt_width as usize] { + arr += format!("{v:02x}").as_str(); + } + arr += ", ..., 0x"; + for v in &self.data[self.data.len() - self.elt_width as usize..] { + arr += format!("{v:02x}").as_str(); + } + + arr + }; + format!("Instruction(FillArrayData({}, [{}]))", self.arr, data) + } +} /// Throws the exception in the register. #[pyclass] @@ -791,8 +869,36 @@ impl Goto { } } -// TODO packed-switch +/// Jump to a label depending on the value of a register. If the value +/// is not matched, continue the extecution at the next instruction. +#[pyclass] +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Switch { + pub reg: u8, + pub branches: HashMap, +} +#[pymethods] +impl Switch { + #[new] + pub fn new(reg: u8, branches: HashMap) -> Self { + Self { reg, branches } + } + + pub fn __str__(&self) -> String { + let mut branches_str: String = "".into(); + let mut branches: Vec<_> = self.branches.iter().collect(); + branches.sort_by_key(|(key, _)| key); + for (key, label) in branches { + branches_str += &format!("\n {key}: goto {label}"); + } + format!("switch {} {}", self.reg, branches_str) + } + + pub fn __repr__(&self) -> String { + format!("Instruction(Switch({}, ...))", self.reg) + } +} /// Store the result of the comparison between the registers. /// /// - b < c: a = -1 diff --git a/androscalpel_serializer/src/items/instructions.rs b/androscalpel_serializer/src/items/instructions.rs index 8425e7c..5e99312 100644 --- a/androscalpel_serializer/src/items/instructions.rs +++ b/androscalpel_serializer/src/items/instructions.rs @@ -4,6 +4,7 @@ use crate::{Error, ReadSeek, Result, Serializable}; //use log::debug; use std::io::{SeekFrom, Write}; +use std::iter::zip; /// An instruction, following the formats described at /// @@ -208,6 +209,17 @@ pub enum Instruction { op: u8, b: i64, }, + FormatPackedSwitchPayload { + first_key: i32, + targets: Vec, + }, + FormatSparseSwitchPayload { + key_targets: Vec<(i32, i32)>, + }, + FormatFillArrayDataPayload { + elt_width: u16, + data: Vec, + }, } impl Instruction { @@ -595,6 +607,48 @@ impl Instruction { let b = i64::deserialize(input)?; Ok(Self::Format51L { va, op, b }) } + + pub fn deserialize_packed_switch(input: &mut dyn ReadSeek) -> Result { + let _ = u16::deserialize(input)?; + let size = u16::deserialize(input)?; + let first_key = i32::deserialize(input)?; + let mut targets = vec![]; + for _ in 0..size { + targets.push(i32::deserialize(input)?); + } + Ok(Self::FormatPackedSwitchPayload { first_key, targets }) + } + + pub fn deserialize_sparse_switch(input: &mut dyn ReadSeek) -> Result { + let _ = u16::deserialize(input)?; + let size = u16::deserialize(input)?; + let mut keys = vec![]; + let mut targets = vec![]; + for _ in 0..size { + keys.push(i32::deserialize(input)?); + } + for _ in 0..size { + targets.push(i32::deserialize(input)?); + } + let key_targets = zip(keys, targets).collect(); + Ok(Self::FormatSparseSwitchPayload { key_targets }) + } + + pub fn deserialize_fill_array_data(input: &mut dyn ReadSeek) -> Result { + let _ = u16::deserialize(input)?; + let elt_width = u16::deserialize(input)?; + let size = u32::deserialize(input)?; + let len = size * elt_width as u32; + let mut data = vec![]; + for _ in 0..len { + data.push(u8::deserialize(input)?); + } + if len % 2 != 0 { + let _ = u8::deserialize(input)?; + } + + Ok(Self::FormatFillArrayDataPayload { elt_width, data }) + } } impl Serializable for Instruction { @@ -771,7 +825,6 @@ impl Serializable for Instruction { op.serialize(output)?; 0u8.serialize(output)?; let [a_h0, a_h1, a_l0, a_l1] = a.to_be_bytes(); - // TODO: check the bytes order u32::from_be_bytes([a_l0, a_l1, a_h0, a_h1]).serialize(output) } Self::Format32X { op, va, vb } => { @@ -784,21 +837,18 @@ impl Serializable for Instruction { op.serialize(output)?; va.serialize(output)?; let [b_h0, b_h1, b_l0, b_l1] = b.to_be_bytes(); - // TODO: check the bytes order u32::from_be_bytes([b_l0, b_l1, b_h0, b_h1]).serialize(output) } Self::Format31T { va, op, b } => { op.serialize(output)?; va.serialize(output)?; let [b_h0, b_h1, b_l0, b_l1] = b.to_be_bytes(); - // TODO: check the bytes order u32::from_be_bytes([b_l0, b_l1, b_h0, b_h1]).serialize(output) } Self::Format31C { va, op, b } => { op.serialize(output)?; va.serialize(output)?; let [b_h0, b_h1, b_l0, b_l1] = b.to_be_bytes(); - // TODO: check the bytes order u32::from_be_bytes([b_l0, b_l1, b_h0, b_h1]).serialize(output) } Self::Format35C { @@ -1053,6 +1103,41 @@ impl Serializable for Instruction { va.serialize(output)?; b.serialize(output) } + Self::FormatPackedSwitchPayload { first_key, targets } => { + 0x0100u16.serialize(output)?; + let size = targets.len() as u16; + size.serialize(output)?; + first_key.serialize(output)?; + for target in targets { + target.serialize(output)?; + } + Ok(()) + } + Self::FormatSparseSwitchPayload { key_targets } => { + 0x0200u16.serialize(output)?; + let size = key_targets.len() as u16; + size.serialize(output)?; + for (key, _) in key_targets { + key.serialize(output)?; + } + for (_, target) in key_targets { + target.serialize(output)?; + } + Ok(()) + } + Self::FormatFillArrayDataPayload { elt_width, data } => { + 0x0300u16.serialize(output)?; + elt_width.serialize(output)?; + let size = (data.len() / *elt_width as usize) as u32; + size.serialize(output)?; + for d in data { + d.serialize(output)?; + } + if data.len() % 2 != 0 { + 0u8.serialize(output)?; + } + Ok(()) + } } } @@ -1061,13 +1146,18 @@ impl Serializable for Instruction { Error::SerializationError(format!("Failled to get position in steam: {err}")) })?; let op = u8::deserialize(input)?; - let _ = u8::deserialize(input)?; + let id = u8::deserialize(input)?; input.seek(SeekFrom::Start(pos)).map_err(|err| { Error::SerializationError(format!("Failled to get to position in steam: {err}")) })?; match op { - 0x00 => Self::deserialize_10x(input), + 0x00 => match id { + 0x01 => Self::deserialize_packed_switch(input), + 0x02 => Self::deserialize_sparse_switch(input), + 0x03 => Self::deserialize_fill_array_data(input), + _ => Self::deserialize_10x(input), + }, 0x01 => Self::deserialize_12x(input), 0x02 => Self::deserialize_22x(input), 0x03 => Self::deserialize_32x(input), @@ -1172,6 +1262,11 @@ impl Serializable for Instruction { Self::Format45CC { .. } => 8, Self::Format4RCC { .. } => 8, Self::Format51L { .. } => 10, + Self::FormatPackedSwitchPayload { targets, .. } => 2 + 2 + 4 + targets.len() * 4, + Self::FormatSparseSwitchPayload { key_targets } => 2 + 2 + key_targets.len() * 8, + Self::FormatFillArrayDataPayload { data, .. } => { + 2 + 2 + 4 + data.len() + (data.len() % 2) + } } } }