diff --git a/androscalpel/Cargo.toml b/androscalpel/Cargo.toml index 4369668..73f3e96 100644 --- a/androscalpel/Cargo.toml +++ b/androscalpel/Cargo.toml @@ -26,7 +26,8 @@ zip = {version = "2.2.2", optional = true} pretty_assertions = "1.4.1" [features] -default = [] +default = ["code-analysis"] # TODO: need refactoring to https://github.com/PyO3/pyo3/issues/2935#issuecomment-2560930677 or cfg_eval https://github.com/rust-lang/rust/issues/82679 python = ["pyo3", "pyo3-log"] # Currently not supported external-zip-reader = ["zip"] +code-analysis = [] diff --git a/androscalpel/src/code_analysis/method_cfg.rs b/androscalpel/src/code_analysis/method_cfg.rs new file mode 100644 index 0000000..61335c7 --- /dev/null +++ b/androscalpel/src/code_analysis/method_cfg.rs @@ -0,0 +1,288 @@ +//! The Control Flow Graph for a method. + +use crate::{IdMethod, Instruction, Method, Result}; +use anyhow::Context; +use std::collections::HashMap; + +const EMPTY_INSNS_SLICE: &[Instruction] = &[]; + +/// A basic block of code of a method. +struct MethodCFGNode<'a> { + /// Code represented by the block + code_block: &'a [Instruction], + /// Labels at the begining of the node if they exists + labels: Vec, + /// Indices in CodeGraph.nodes of the next nodes + next_nodes: Vec, + /// Indices in CodeGraph.nodes of the previous nodes + prev_nodes: Vec, +} + +/// The CFG for a method, with potentially additionnal informations. +pub struct MethodCFG<'a> { + method: &'a IdMethod, + nodes: Vec>, +} + +impl<'a> MethodCFG<'a> { + pub fn new(method: &'a Method) -> Result { + let insns: &'a [Instruction] = if let Some(code) = method.code.as_ref() { + &code.insns + } else { + EMPTY_INSNS_SLICE + }; + + let mut nodes = vec![MethodCFGNode { + code_block: &insns[0..0], + labels: vec![], + next_nodes: vec![], + prev_nodes: vec![], + }]; + let mut nodes_next_label = vec![vec![]]; + let nb_insns = insns.len(); + if nb_insns != 0 { + nodes[0].next_nodes.push(1); + } + let mut start_last_block = 0; + let mut last_labels = vec![]; + let mut block_started = false; + let mut try_block: Vec<(String, Vec)> = vec![]; + for (i, ins) in insns.iter().enumerate() { + match ins { + // TODO: handle error better: list ins that can throw exceptions better + Instruction::Throw { .. } + | Instruction::InvokeVirtual { .. } + | Instruction::InvokeSuper { .. } + | Instruction::InvokeDirect { .. } + | Instruction::InvokeDirect { .. } + | Instruction::InvokeInterface { .. } + | Instruction::InvokePolymorphic { .. } + | Instruction::InvokeCustom { .. } + if !try_block.is_empty() => + { + nodes_next_label.push(try_block.last().unwrap().1.clone()); + let next_nodes = + if i + 1 < nb_insns && !matches!(ins, Instruction::Throw { .. }) { + vec![nodes.len() + 1] // If no exception, continue to next ins + } else { + vec![] + }; + nodes.push(MethodCFGNode { + code_block: &insns[start_last_block..i + 1], + labels: last_labels, + next_nodes, + prev_nodes: vec![], + }); + start_last_block = i + 1; + last_labels = vec![]; + block_started = false; + } + Instruction::Goto { label } => { + nodes_next_label.push(vec![label.clone()]); + nodes.push(MethodCFGNode { + code_block: &insns[start_last_block..i + 1], + labels: last_labels, + next_nodes: vec![], // Do not continue the execution at next ins + prev_nodes: vec![], + }); + start_last_block = i + 1; + last_labels = vec![]; + block_started = false; + } + Instruction::Switch { branches, .. } => { + nodes_next_label.push(branches.values().cloned().collect()); + let next_nodes = if i + 1 < nb_insns { + vec![nodes.len() + 1] // If no branches match, continue execution + } else { + vec![] + }; + nodes.push(MethodCFGNode { + code_block: &insns[start_last_block..i + 1], + labels: last_labels, + next_nodes, + prev_nodes: vec![], + }); + start_last_block = i + 1; + last_labels = vec![]; + block_started = false; + } + Instruction::IfEq { label, .. } + | Instruction::IfNe { label, .. } + | Instruction::IfLt { label, .. } + | Instruction::IfGe { label, .. } + | Instruction::IfGt { label, .. } + | Instruction::IfLe { label, .. } + | Instruction::IfEqZ { label, .. } + | Instruction::IfNeZ { label, .. } + | Instruction::IfLtZ { label, .. } + | Instruction::IfGeZ { label, .. } + | Instruction::IfGtZ { label, .. } + | Instruction::IfLeZ { label, .. } => { + nodes_next_label.push(vec![label.clone()]); + let next_nodes = if i + 1 < nb_insns { + vec![nodes.len() + 1] // depending on test, continue execution + } else { + vec![] + }; + nodes.push(MethodCFGNode { + code_block: &insns[start_last_block..i + 1], + labels: last_labels, + next_nodes, + prev_nodes: vec![], + }); + start_last_block = i + 1; + last_labels = vec![]; + block_started = false; + } + Instruction::Try { + end_label, + handlers, + default_handler, + } => { + let mut branches: Vec<_> = + handlers.iter().map(|(_, label)| label.clone()).collect(); + if let Some(default_handler) = default_handler.as_ref().cloned() { + branches.push(default_handler); + } + try_block.push((end_label.clone(), branches)) + } + Instruction::Label { name } => { + if !block_started { + last_labels.push(name.clone()); + } else { + nodes_next_label.push(vec![]); + last_labels.push(name.clone()); + nodes.push(MethodCFGNode { + code_block: &insns[start_last_block..i], + labels: last_labels, + next_nodes: vec![nodes.len() + 1], + prev_nodes: vec![], + }); + start_last_block = i; + last_labels = vec![]; + } + } + Instruction::ReturnVoid {} + | Instruction::Return { .. } + | Instruction::ReturnWide { .. } + | Instruction::ReturnObject { .. } + | Instruction::Throw { .. } => { + nodes_next_label.push(vec![]); + nodes.push(MethodCFGNode { + code_block: &insns[start_last_block..i + 1], + labels: last_labels, + next_nodes: vec![], // Do not continue the execution at next ins + prev_nodes: vec![], + }); + start_last_block = i + 1; + last_labels = vec![]; + block_started = false; + } + _ => { + if !ins.is_pseudo_ins() { + block_started = true; + } + } + } + } + if start_last_block != nb_insns { + nodes_next_label.push(vec![]); + nodes.push(MethodCFGNode { + code_block: &insns[start_last_block..nb_insns], + labels: last_labels, + next_nodes: vec![], + prev_nodes: vec![], + }); + } + let label_to_node: HashMap = nodes + .iter() + .enumerate() + .flat_map(|(i, node)| node.labels.clone().into_iter().map(move |lab| (lab, i))) + .collect(); + for (node, labels) in nodes.iter_mut().zip(nodes_next_label) { + for label in labels { + node.next_nodes + .push(*label_to_node.get(&label).with_context(|| { + format!("found jumb to label '{}' but label not found", label) + })?); + } + } + + for i in 0..nodes.len() { + let next_nodes = nodes[i].next_nodes.clone(); + for j in &next_nodes { + nodes[*j].prev_nodes.push(i); + } + } + Ok(Self { + method: &method.descriptor, + nodes, + }) + } + + /// Serialize the graph to dot format. + pub fn to_dot(&self) -> String { + let mut dot_string: String = "digraph {\n".into(); + dot_string += "overlap=false;\n"; + dot_string += &self.to_dot_subgraph(); + dot_string += "}"; + dot_string + } + + /// Serialize the graph to dot format. + pub fn to_dot_subgraph(&self) -> String { + let mut dot_string = format!("subgraph \"cluster_{}\" {{\n", self.method.__str__()); + dot_string += " style=\"dashed\";\n"; + dot_string += " color=\"black\";\n"; + dot_string += &format!(" label=\"{}\";\n", self.method.__str__()); + for (i, node) in self.nodes.iter().enumerate() { + let block_name = if i == 0 { + "ENTRY".into() + } else if !node.labels.is_empty() { + format!("block '{}'", node.labels[0]) + } else { + format!("block {i}") + }; + let label = if node.code_block.is_empty() { + format!("{{\\< {block_name} \\>}}") + } else { + let mut label = format!("{{\\< {block_name} \\>:\\l\\\n"); + for ins in node.code_block { + label += "|"; + label += ins + .__str__() + .replace(" ", "\\ ") + .replace(">", "\\>") + .replace("<", "\\<") + .replace("\"", "\\\"") + .replace("{", "\\{") + .replace("}", "\\}") + .as_str(); + label += "\\l\\\n"; + } + label += "}"; + label + }; + dot_string += &format!( + " node_{i} [shape=record,style=filled,fillcolor=lightgrey,label=\"{label}\"];\n\n" + ); + } + dot_string += + " node_end [shape=record,style=filled,fillcolor=lightgrey,label=\"{\\< EXIT \\>}\"];\n\n"; + + for (i, node) in self.nodes.iter().enumerate() { + for j in &node.next_nodes { + if *j == i + 1 { + dot_string += &format!(" node_{i}:s -> node_{j}:n [style=\"solid,bold\",color=black,weight=100,constraint=true];\n"); + } else { + dot_string += &format!(" node_{i}:s -> node_{j}:n [style=\"solid,bold\",color=black,weight=10,constraint=true];\n"); + } + } + if node.next_nodes.is_empty() { + dot_string += &format!(" node_{i}:s -> node_end:n [style=\"solid,bold\",color=black,weight=10,constraint=true];\n"); + } + } + dot_string += "}\n"; + dot_string + } +} diff --git a/androscalpel/src/code_analysis/mod.rs b/androscalpel/src/code_analysis/mod.rs new file mode 100644 index 0000000..c93e214 --- /dev/null +++ b/androscalpel/src/code_analysis/mod.rs @@ -0,0 +1,7 @@ +//! Module for more advanced code analysis. +//! +//! This is module is quite experimental but can be usefull. + +pub mod method_cfg; + +pub use method_cfg::*; diff --git a/androscalpel/src/lib.rs b/androscalpel/src/lib.rs index f72bbcb..8f53c65 100644 --- a/androscalpel/src/lib.rs +++ b/androscalpel/src/lib.rs @@ -22,6 +22,9 @@ pub mod scalar; pub mod value; pub mod visitor; +#[cfg(feature = "code-analysis")] +pub mod code_analysis; + pub use annotation::*; pub use apk::*; pub use class::*; @@ -39,6 +42,9 @@ pub use scalar::*; pub use value::*; pub use visitor::*; +#[cfg(feature = "code-analysis")] +pub use code_analysis::*; + #[cfg(test)] mod tests;