From a02329f7de5b688073c48f4a31ce2ce5a5f8cc50 Mon Sep 17 00:00:00 2001 From: Jean-Marie 'Histausse' Mineau Date: Fri, 21 Feb 2025 19:47:54 +0100 Subject: [PATCH] resolve register types --- androscalpel/src/code_analysis/method_cfg.rs | 38 +- androscalpel/src/code_analysis/mod.rs | 3 +- .../src/code_analysis/register_type.rs | 516 ++++++++++++++++++ 3 files changed, 541 insertions(+), 16 deletions(-) create mode 100644 androscalpel/src/code_analysis/register_type.rs diff --git a/androscalpel/src/code_analysis/method_cfg.rs b/androscalpel/src/code_analysis/method_cfg.rs index 7ba060e..0794316 100644 --- a/androscalpel/src/code_analysis/method_cfg.rs +++ b/androscalpel/src/code_analysis/method_cfg.rs @@ -1,27 +1,29 @@ //! The Control Flow Graph for a method. -use crate::{IdMethod, Instruction, Method, Result}; +use crate::{Instruction, Method, Result}; use anyhow::Context; use std::collections::HashMap; const EMPTY_INSNS_SLICE: &[Instruction] = &[]; /// A basic block of code of a method. -struct MethodCFGNode<'a> { +#[derive(Debug, PartialEq)] +pub struct MethodCFGNode<'a> { /// Code represented by the block - code_block: &'a [Instruction], + pub code_block: &'a [Instruction], /// Labels at the begining of the node if they exists - labels: Vec, + pub labels: Vec, /// Indices in CodeGraph.nodes of the next nodes - next_nodes: Vec, + pub next_nodes: Vec, /// Indices in CodeGraph.nodes of the previous nodes - prev_nodes: Vec, + pub prev_nodes: Vec, } /// The CFG for a method, with potentially additionnal informations. +#[derive(Debug, PartialEq)] pub struct MethodCFG<'a> { - method: &'a IdMethod, - nodes: Vec>, + pub method: &'a Method, + pub nodes: Vec>, } impl<'a> MethodCFG<'a> { @@ -214,27 +216,33 @@ impl<'a> MethodCFG<'a> { nodes[*j].prev_nodes.push(i); } } - Ok(Self { - method: &method.descriptor, - nodes, - }) + Ok(Self { method, nodes }) } /// Serialize the graph to dot format. - pub fn to_dot(&self) -> String { + pub fn to_dot(&self, add_reg_ty: bool) -> String { let mut dot_string: String = "digraph {\n".into(); dot_string += "overlap=false;\n"; dot_string += &self.to_dot_subgraph(); + dot_string += "\n"; + if add_reg_ty { + dot_string += &self.reg_types_dot(); + } dot_string += "}"; dot_string } + // method call: cluster_{mth}:node_{i}:i{j}:e -> cluster_{mth2}:n ? + /// Serialize the graph to dot format. pub fn to_dot_subgraph(&self) -> String { - let mut dot_string = format!("subgraph \"cluster_{}\" {{\n", self.method.__str__()); + let mut dot_string = format!( + "subgraph \"cluster_{}\" {{\n", + self.method.descriptor.__str__() + ); dot_string += " style=\"dashed\";\n"; dot_string += " color=\"black\";\n"; - dot_string += &format!(" label=\"{}\";\n", self.method.__str__()); + dot_string += &format!(" label=\"{}\";\n", self.method.descriptor.__str__()); for (i, node) in self.nodes.iter().enumerate() { let block_name = if i == 0 { "ENTRY".into() diff --git a/androscalpel/src/code_analysis/mod.rs b/androscalpel/src/code_analysis/mod.rs index c93e214..86c6b1c 100644 --- a/androscalpel/src/code_analysis/mod.rs +++ b/androscalpel/src/code_analysis/mod.rs @@ -3,5 +3,6 @@ //! This is module is quite experimental but can be usefull. pub mod method_cfg; - +pub mod register_type; pub use method_cfg::*; +pub use register_type::RegType; diff --git a/androscalpel/src/code_analysis/register_type.rs b/androscalpel/src/code_analysis/register_type.rs new file mode 100644 index 0000000..e7a1898 --- /dev/null +++ b/androscalpel/src/code_analysis/register_type.rs @@ -0,0 +1,516 @@ +//! Compute the register types at each label of a method. + +use super::{MethodCFG, MethodCFGNode}; +use crate::Instruction; +use std::collections::HashMap; + +/// The different possible types of a register +#[derive(Debug, PartialEq, Clone, Copy)] +pub enum RegType { + /// The register content is not yet defined + Undefined, + /// The register contains a java object. It can be a classical object + /// an array, a type, etc + Object, + /// The register contains a 32 bit scalar + SimpleScalar, + /// The register contains the first 32 bits of a wide register. If not + /// followed by a `SecondWideScalar`, it should be considered as a `SimpleScalar`? + FirstWideScalar, + /// The register contains the last 32 bits of a wide register. If not + /// preceded by a `FirstWideScalar`, it should be considered as a `SimpleScalar`? + SecondWideScalar, + /// The register can be either a scalar or an object. + Any, +} + +impl RegType { + pub fn to_str(&self) -> &'static str { + match self { + RegType::Undefined => "undef", + RegType::Object => "object", + RegType::SimpleScalar => "scalar", + RegType::FirstWideScalar => "wide1", + RegType::SecondWideScalar => "wide2", + RegType::Any => "any", + } + } +} + +impl MethodCFG<'_> { + pub fn get_reg_types(&self) -> HashMap> { + let code = if let Some(code) = self.method.code.as_ref() { + code + } else { + return HashMap::new(); + }; + let nb_reg = code.registers_size as usize; + let mut end_block_reg_tys: Vec<_> = self + .nodes + .iter() + .map(|_| vec![RegType::Undefined; nb_reg]) + .collect(); + if end_block_reg_tys.is_empty() { + return HashMap::new(); + } + // Initialize the entry block from function signature: + let mut i = (code.registers_size - code.ins_size) as usize; + for arg in &self.method.descriptor.proto.get_parameters() { + if arg.is_class() || arg.is_array() { + end_block_reg_tys[0][i] = RegType::Object; + i += 1; + } else if arg.is_long() || arg.is_double() { + end_block_reg_tys[0][i] = RegType::FirstWideScalar; + i += 1; + end_block_reg_tys[0][i] = RegType::SecondWideScalar; + i += 1; + } else { + end_block_reg_tys[0][i] = RegType::SimpleScalar; + i += 1; + } + } + + let mut changed = true; + while changed { + let mut new_end_block_reg_tys = vec![]; + for node in &self.nodes { + new_end_block_reg_tys.push(transform_reg_ty( + &merge_input(node, nb_reg, &end_block_reg_tys), + node, + )) + } + changed = end_block_reg_tys != new_end_block_reg_tys; + end_block_reg_tys = new_end_block_reg_tys; + } + let start_block_reg_tys = &self + .nodes + .iter() + .map(|node| merge_input(node, nb_reg, &end_block_reg_tys)) + .collect::>(); + self.nodes + .iter() + .enumerate() + .flat_map(|(i, node)| { + node.labels + .iter() + .map(move |label| (label.clone(), start_block_reg_tys[i].clone())) + }) + .collect() + } + + pub(crate) fn reg_types_dot(&self) -> String { + let types = self.get_reg_types(); + let mut dot_string = format!( + "subgraph \"cluster_reg_types_{}\" {{\n", + self.method.descriptor.__str__() + ); + dot_string += " style=\"dashed\";\n"; + dot_string += " color=\"black\";\n"; + dot_string += &format!( + " label=\"Register Types{}\";\n", + self.method.descriptor.__str__() + ); + + for (label, regs) in &types { + let mut node_label = String::new(); + for reg in regs { + node_label += "|"; + node_label += reg.to_str(); + } + node_label += "|"; + dot_string += &format!(" node_{label} [shape=record,style=filled,fillcolor=lightgrey,label=\"{node_label}\"];\n"); + } + + dot_string += "}\n"; + dot_string + } +} + +fn merge_input(node: &MethodCFGNode, nb_reg: usize, inputs: &[Vec]) -> Vec { + use RegType::*; + let mut reg_tys = vec![Undefined; nb_reg]; + for i in &node.prev_nodes { + for (r, ty) in inputs[*i].iter().enumerate() { + reg_tys[r] = match (reg_tys[r], ty) { + (Undefined, _) => *ty, + (_, Undefined) => reg_tys[r], + (_, Any) => Any, + (Any, _) => Any, + + (Object, Object) => Object, + (Object, SimpleScalar) => Any, + (Object, FirstWideScalar) => Any, + (Object, SecondWideScalar) => Any, + + (SimpleScalar, Object) => Any, + (SimpleScalar, SimpleScalar) => SimpleScalar, + (SimpleScalar, FirstWideScalar) => SimpleScalar, + (SimpleScalar, SecondWideScalar) => SimpleScalar, + + (FirstWideScalar, Object) => Any, + (FirstWideScalar, SimpleScalar) => SimpleScalar, + (FirstWideScalar, FirstWideScalar) => FirstWideScalar, + (FirstWideScalar, SecondWideScalar) => SimpleScalar, + + (SecondWideScalar, Object) => Any, + (SecondWideScalar, SimpleScalar) => SimpleScalar, + (SecondWideScalar, FirstWideScalar) => SimpleScalar, + (SecondWideScalar, SecondWideScalar) => SecondWideScalar, + } + } + } + reg_tys +} + +fn transform_reg_ty(input_types: &[RegType], cfg: &MethodCFGNode) -> Vec { + use Instruction::*; + use RegType::*; + let mut types = input_types.to_vec(); + for ins in cfg.code_block { + match ins { + Move { to, .. } => types[*to as usize] = SimpleScalar, // E: mism + MoveWide { to, .. } => { + types[*to as usize] = FirstWideScalar; + types[*to as usize + 1] = SecondWideScalar; + } + MoveObject { to, .. } => types[*to as usize] = Object, + MoveResult { to: reg } | Return { reg } | Const { reg, .. } | Switch { reg, .. } => { + types[*reg as usize] = SimpleScalar + } + MoveResultWide { to: reg } | ReturnWide { reg } | ConstWide { reg, .. } => { + types[*reg as usize] = FirstWideScalar; + types[*reg as usize + 1] = SecondWideScalar; + } + MoveResultObject { to: reg } + | MoveException { to: reg } + | ReturnObject { reg } + | ConstString { reg, .. } + | ConstClass { reg, .. } + | NewInstance { reg, .. } + | Throw { reg } + | ConstMethodHandle { to: reg, .. } + | ConstMethodType { to: reg, .. } => types[*reg as usize] = Object, + InstanceOf { dest, obj: _, .. } => { + // types[*obj as usize] = Object; not sure about this one + types[*dest as usize] = SimpleScalar; + } + ArrayLength { dest, arr } => { + types[*arr as usize] = Object; + types[*dest as usize] = SimpleScalar; + } + NewArray { reg, size_reg, .. } => { + types[*reg as usize] = Object; + types[*size_reg as usize] = SimpleScalar; + } + FilledNewArray { type_, reg_values } => { + let reg_ty = if type_.is_class() || type_.is_array() { + Object + //} else if type_.is_long() || type_.is_double() { + // SimpleScalar // Not supposed to happend so default to simple scalar just in + // case + } else { + SimpleScalar + }; + for i in reg_values { + types[*i as usize] = reg_ty; + } + } + CmpLFloat { dest, b, c } | CmpGFloat { dest, b, c } => { + types[*dest as usize] = SimpleScalar; + types[*b as usize] = SimpleScalar; + types[*c as usize] = SimpleScalar; + } + CmpLDouble { dest, b, c } | CmpGDouble { dest, b, c } | CmpLong { dest, b, c } => { + types[*dest as usize] = SimpleScalar; + types[*b as usize] = FirstWideScalar; + types[*b as usize + 1] = SecondWideScalar; + types[*c as usize] = FirstWideScalar; + types[*c as usize + 1] = SecondWideScalar; + } + IfEq { a, b, .. } + | IfNe { a, b, .. } + | IfLt { a, b, .. } + | IfGe { a, b, .. } + | IfGt { a, b, .. } + | IfLe { a, b, .. } => { + types[*a as usize] = SimpleScalar; + types[*b as usize] = SimpleScalar; + } + IfEqZ { a, .. } + | IfNeZ { a, .. } + | IfLtZ { a, .. } + | IfGeZ { a, .. } + | IfGtZ { a, .. } + | IfLeZ { a, .. } => { + types[*a as usize] = SimpleScalar; + } + AGet { + dest: r, + arr: obj, + idx, + } + | AGetBoolean { + dest: r, + arr: obj, + idx, + } + | AGetByte { + dest: r, + arr: obj, + idx, + } + | AGetChar { + dest: r, + arr: obj, + idx, + } + | AGetShort { + dest: r, + arr: obj, + idx, + } + | APut { + from: r, + arr: obj, + idx, + } + | APutBoolean { + from: r, + arr: obj, + idx, + } + | APutByte { + from: r, + arr: obj, + idx, + } + | APutChar { + from: r, + arr: obj, + idx, + } + | APutShort { + from: r, + arr: obj, + idx, + } => { + types[*r as usize] = SimpleScalar; + types[*obj as usize] = Object; + types[*idx as usize] = SimpleScalar; + } + AGetWide { + dest: r, + arr: obj, + idx, + } + | APutWide { + from: r, + arr: obj, + idx, + } => { + types[*r as usize] = FirstWideScalar; + types[*r as usize + 1] = SecondWideScalar; + types[*obj as usize] = Object; + types[*idx as usize] = SimpleScalar; + } + AGetObject { + dest: r, + arr: obj, + idx, + } + | APutObject { + from: r, + arr: obj, + idx, + } => { + types[*r as usize] = Object; + types[*obj as usize] = Object; + types[*idx as usize] = SimpleScalar; + } + IGet { to: r, obj, .. } + | IGetBoolean { to: r, obj, .. } + | IGetByte { to: r, obj, .. } + | IGetChar { to: r, obj, .. } + | IGetShort { to: r, obj, .. } + | IPut { from: r, obj, .. } + | IPutBoolean { from: r, obj, .. } + | IPutByte { from: r, obj, .. } + | IPutChar { from: r, obj, .. } + | IPutShort { from: r, obj, .. } => { + types[*r as usize] = SimpleScalar; + types[*obj as usize] = Object; + } + IGetWide { to: r, obj, .. } | IPutWide { from: r, obj, .. } => { + types[*r as usize] = FirstWideScalar; + types[*r as usize] = SecondWideScalar; + types[*obj as usize] = Object; + } + IGetObject { to: r, obj, .. } | IPutObject { from: r, obj, .. } => { + types[*r as usize] = Object; + types[*obj as usize] = Object; + } + SGet { to: r, .. } + | SGetBoolean { to: r, .. } + | SGetByte { to: r, .. } + | SGetChar { to: r, .. } + | SGetShort { to: r, .. } + | SPut { from: r, .. } + | SPutBoolean { from: r, .. } + | SPutByte { from: r, .. } + | SPutChar { from: r, .. } + | SPutShort { from: r, .. } => { + types[*r as usize] = SimpleScalar; + } + SGetWide { to: r, .. } | SPutWide { from: r, .. } => { + types[*r as usize] = FirstWideScalar; + types[*r as usize] = SecondWideScalar; + } + SGetObject { to: r, .. } | SPutObject { from: r, .. } => { + types[*r as usize] = Object; + } + /* They are information to get from the method type, but, meh, this is not + * necessary (we should have the type from when the reg was initialized) + InvokeVirtual { method: _, args: _ } + | InvokeSuper { method: _, args: _ } + | InvokeDirect { method: _, args: _ } + | InvokeStatic { method: _, args: _ } + | InvokeInterface { method: _, args: _ } => todo!(), + InvokePolymorphic { method:_, proto: _, args: _ } => todo!(), + InvokeCustom { call_site: _, args: _ } => todo!(), + */ + NegInt { dest, val } + | NotInt { dest, val } + | NegFloat { dest, val } + | NegDouble { dest, val } + | IntToFloat { dest, val } + | FloatToInt { dest, val } + | IntToByte { dest, val } + | IntToChar { dest, val } + | IntToShort { dest, val } => { + types[*dest as usize] = SimpleScalar; + types[*val as usize] = SimpleScalar; + } + NegLong { dest, val } + | NotLong { dest, val } + | LongToDouble { dest, val } + | DoubleToLong { dest, val } => { + types[*dest as usize] = FirstWideScalar; + types[*dest as usize + 1] = SecondWideScalar; + types[*val as usize] = FirstWideScalar; + types[*val as usize + 1] = SecondWideScalar; + } + IntToLong { dest, val } + | IntToDouble { dest, val } + | FloatToLong { dest, val } + | FloatToDouble { dest, val } => { + types[*dest as usize] = FirstWideScalar; + types[*dest as usize + 1] = SecondWideScalar; + types[*val as usize] = SimpleScalar; + } + LongToInt { dest, val } + | LongToFloat { dest, val } + | DoubleToInt { dest, val } + | DoubleToFloat { dest, val } => { + types[*dest as usize] = SimpleScalar; + types[*val as usize] = FirstWideScalar; + types[*val as usize + 1] = SecondWideScalar; + } + AddInt { dest, b, c } + | SubInt { dest, b, c } + | MulInt { dest, b, c } + | DivInt { dest, b, c } + | RemInt { dest, b, c } + | AndInt { dest, b, c } + | OrInt { dest, b, c } + | XorInt { dest, b, c } + | ShlInt { dest, b, c } + | ShrInt { dest, b, c } + | UshrInt { dest, b, c } + | AddFloat { dest, b, c } + | SubFloat { dest, b, c } + | MulFloat { dest, b, c } + | DivFloat { dest, b, c } + | RemFloat { dest, b, c } => { + types[*dest as usize] = SimpleScalar; + types[*b as usize] = SimpleScalar; + types[*c as usize] = SimpleScalar; + } + AddLong { dest, b, c } + | SubLong { dest, b, c } + | MulLong { dest, b, c } + | DivLong { dest, b, c } + | RemLong { dest, b, c } + | AndLong { dest, b, c } + | OrLong { dest, b, c } + | XorLong { dest, b, c } + | ShlLong { dest, b, c } + | ShrLong { dest, b, c } + | UshrLong { dest, b, c } + | AddDouble { dest, b, c } + | SubDouble { dest, b, c } + | MulDouble { dest, b, c } + | DivDouble { dest, b, c } + | RemDouble { dest, b, c } => { + types[*dest as usize] = FirstWideScalar; + types[*dest as usize + 1] = SecondWideScalar; + types[*b as usize] = FirstWideScalar; + types[*b as usize + 1] = SecondWideScalar; + types[*c as usize] = FirstWideScalar; + types[*c as usize + 1] = SecondWideScalar; + } + AddInt2Addr { dest, b } + | SubInt2Addr { dest, b } + | MulInt2Addr { dest, b } + | DivInt2Addr { dest, b } + | RemInt2Addr { dest, b } + | AndInt2Addr { dest, b } + | OrInt2Addr { dest, b } + | XorInt2Addr { dest, b } + | ShlInt2Addr { dest, b } + | ShrInt2Addr { dest, b } + | UshrInt2Addr { dest, b } + | AddFloat2Addr { dest, b } + | SubFloat2Addr { dest, b } + | MulFloat2Addr { dest, b } + | DivFloat2Addr { dest, b } + | RemFloat2Addr { dest, b } + | AddIntLit { dest, b, .. } + | RsubIntLit { dest, b, .. } + | MulIntLit { dest, b, .. } + | DivIntLit { dest, b, .. } + | RemIntLit { dest, b, .. } + | AndIntLit { dest, b, .. } + | OrIntLit { dest, b, .. } + | XorIntLit { dest, b, .. } + | ShlIntLit { dest, b, .. } + | ShrIntLit { dest, b, .. } + | UshrIntLit { dest, b, .. } => { + types[*dest as usize] = SimpleScalar; + types[*b as usize] = SimpleScalar; + } + AddLong2Addr { dest, b } + | SubLong2Addr { dest, b } + | MulLong2Addr { dest, b } + | DivLong2Addr { dest, b } + | RemLong2Addr { dest, b } + | AndLong2Addr { dest, b } + | OrLong2Addr { dest, b } + | XorLong2Addr { dest, b } + | ShlLong2Addr { dest, b } + | ShrLong2Addr { dest, b } + | UshrLong2Addr { dest, b } + | AddDouble2Addr { dest, b } + | SubDouble2Addr { dest, b } + | MulDouble2Addr { dest, b } + | DivDouble2Addr { dest, b } + | RemDouble2Addr { dest, b } => { + types[*dest as usize] = FirstWideScalar; + types[*dest as usize + 1] = SecondWideScalar; + types[*b as usize] = FirstWideScalar; + types[*b as usize + 1] = SecondWideScalar; + } + _ => (), + } + } + types +}