add code analysis features to project
This commit is contained in:
parent
e75e6b160e
commit
812640bd4a
4 changed files with 303 additions and 1 deletions
|
|
@ -26,7 +26,8 @@ zip = {version = "2.2.2", optional = true}
|
||||||
pretty_assertions = "1.4.1"
|
pretty_assertions = "1.4.1"
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = []
|
default = ["code-analysis"]
|
||||||
# TODO: need refactoring to https://github.com/PyO3/pyo3/issues/2935#issuecomment-2560930677 or cfg_eval https://github.com/rust-lang/rust/issues/82679
|
# TODO: need refactoring to https://github.com/PyO3/pyo3/issues/2935#issuecomment-2560930677 or cfg_eval https://github.com/rust-lang/rust/issues/82679
|
||||||
python = ["pyo3", "pyo3-log"] # Currently not supported
|
python = ["pyo3", "pyo3-log"] # Currently not supported
|
||||||
external-zip-reader = ["zip"]
|
external-zip-reader = ["zip"]
|
||||||
|
code-analysis = []
|
||||||
|
|
|
||||||
288
androscalpel/src/code_analysis/method_cfg.rs
Normal file
288
androscalpel/src/code_analysis/method_cfg.rs
Normal file
|
|
@ -0,0 +1,288 @@
|
||||||
|
//! The Control Flow Graph for a method.
|
||||||
|
|
||||||
|
use crate::{IdMethod, Instruction, Method, Result};
|
||||||
|
use anyhow::Context;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
const EMPTY_INSNS_SLICE: &[Instruction] = &[];
|
||||||
|
|
||||||
|
/// A basic block of code of a method.
|
||||||
|
struct MethodCFGNode<'a> {
|
||||||
|
/// Code represented by the block
|
||||||
|
code_block: &'a [Instruction],
|
||||||
|
/// Labels at the begining of the node if they exists
|
||||||
|
labels: Vec<String>,
|
||||||
|
/// Indices in CodeGraph.nodes of the next nodes
|
||||||
|
next_nodes: Vec<usize>,
|
||||||
|
/// Indices in CodeGraph.nodes of the previous nodes
|
||||||
|
prev_nodes: Vec<usize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The CFG for a method, with potentially additionnal informations.
|
||||||
|
pub struct MethodCFG<'a> {
|
||||||
|
method: &'a IdMethod,
|
||||||
|
nodes: Vec<MethodCFGNode<'a>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> MethodCFG<'a> {
|
||||||
|
pub fn new(method: &'a Method) -> Result<Self> {
|
||||||
|
let insns: &'a [Instruction] = if let Some(code) = method.code.as_ref() {
|
||||||
|
&code.insns
|
||||||
|
} else {
|
||||||
|
EMPTY_INSNS_SLICE
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut nodes = vec![MethodCFGNode {
|
||||||
|
code_block: &insns[0..0],
|
||||||
|
labels: vec![],
|
||||||
|
next_nodes: vec![],
|
||||||
|
prev_nodes: vec![],
|
||||||
|
}];
|
||||||
|
let mut nodes_next_label = vec![vec![]];
|
||||||
|
let nb_insns = insns.len();
|
||||||
|
if nb_insns != 0 {
|
||||||
|
nodes[0].next_nodes.push(1);
|
||||||
|
}
|
||||||
|
let mut start_last_block = 0;
|
||||||
|
let mut last_labels = vec![];
|
||||||
|
let mut block_started = false;
|
||||||
|
let mut try_block: Vec<(String, Vec<String>)> = vec![];
|
||||||
|
for (i, ins) in insns.iter().enumerate() {
|
||||||
|
match ins {
|
||||||
|
// TODO: handle error better: list ins that can throw exceptions better
|
||||||
|
Instruction::Throw { .. }
|
||||||
|
| Instruction::InvokeVirtual { .. }
|
||||||
|
| Instruction::InvokeSuper { .. }
|
||||||
|
| Instruction::InvokeDirect { .. }
|
||||||
|
| Instruction::InvokeDirect { .. }
|
||||||
|
| Instruction::InvokeInterface { .. }
|
||||||
|
| Instruction::InvokePolymorphic { .. }
|
||||||
|
| Instruction::InvokeCustom { .. }
|
||||||
|
if !try_block.is_empty() =>
|
||||||
|
{
|
||||||
|
nodes_next_label.push(try_block.last().unwrap().1.clone());
|
||||||
|
let next_nodes =
|
||||||
|
if i + 1 < nb_insns && !matches!(ins, Instruction::Throw { .. }) {
|
||||||
|
vec![nodes.len() + 1] // If no exception, continue to next ins
|
||||||
|
} else {
|
||||||
|
vec![]
|
||||||
|
};
|
||||||
|
nodes.push(MethodCFGNode {
|
||||||
|
code_block: &insns[start_last_block..i + 1],
|
||||||
|
labels: last_labels,
|
||||||
|
next_nodes,
|
||||||
|
prev_nodes: vec![],
|
||||||
|
});
|
||||||
|
start_last_block = i + 1;
|
||||||
|
last_labels = vec![];
|
||||||
|
block_started = false;
|
||||||
|
}
|
||||||
|
Instruction::Goto { label } => {
|
||||||
|
nodes_next_label.push(vec![label.clone()]);
|
||||||
|
nodes.push(MethodCFGNode {
|
||||||
|
code_block: &insns[start_last_block..i + 1],
|
||||||
|
labels: last_labels,
|
||||||
|
next_nodes: vec![], // Do not continue the execution at next ins
|
||||||
|
prev_nodes: vec![],
|
||||||
|
});
|
||||||
|
start_last_block = i + 1;
|
||||||
|
last_labels = vec![];
|
||||||
|
block_started = false;
|
||||||
|
}
|
||||||
|
Instruction::Switch { branches, .. } => {
|
||||||
|
nodes_next_label.push(branches.values().cloned().collect());
|
||||||
|
let next_nodes = if i + 1 < nb_insns {
|
||||||
|
vec![nodes.len() + 1] // If no branches match, continue execution
|
||||||
|
} else {
|
||||||
|
vec![]
|
||||||
|
};
|
||||||
|
nodes.push(MethodCFGNode {
|
||||||
|
code_block: &insns[start_last_block..i + 1],
|
||||||
|
labels: last_labels,
|
||||||
|
next_nodes,
|
||||||
|
prev_nodes: vec![],
|
||||||
|
});
|
||||||
|
start_last_block = i + 1;
|
||||||
|
last_labels = vec![];
|
||||||
|
block_started = false;
|
||||||
|
}
|
||||||
|
Instruction::IfEq { label, .. }
|
||||||
|
| Instruction::IfNe { label, .. }
|
||||||
|
| Instruction::IfLt { label, .. }
|
||||||
|
| Instruction::IfGe { label, .. }
|
||||||
|
| Instruction::IfGt { label, .. }
|
||||||
|
| Instruction::IfLe { label, .. }
|
||||||
|
| Instruction::IfEqZ { label, .. }
|
||||||
|
| Instruction::IfNeZ { label, .. }
|
||||||
|
| Instruction::IfLtZ { label, .. }
|
||||||
|
| Instruction::IfGeZ { label, .. }
|
||||||
|
| Instruction::IfGtZ { label, .. }
|
||||||
|
| Instruction::IfLeZ { label, .. } => {
|
||||||
|
nodes_next_label.push(vec![label.clone()]);
|
||||||
|
let next_nodes = if i + 1 < nb_insns {
|
||||||
|
vec![nodes.len() + 1] // depending on test, continue execution
|
||||||
|
} else {
|
||||||
|
vec![]
|
||||||
|
};
|
||||||
|
nodes.push(MethodCFGNode {
|
||||||
|
code_block: &insns[start_last_block..i + 1],
|
||||||
|
labels: last_labels,
|
||||||
|
next_nodes,
|
||||||
|
prev_nodes: vec![],
|
||||||
|
});
|
||||||
|
start_last_block = i + 1;
|
||||||
|
last_labels = vec![];
|
||||||
|
block_started = false;
|
||||||
|
}
|
||||||
|
Instruction::Try {
|
||||||
|
end_label,
|
||||||
|
handlers,
|
||||||
|
default_handler,
|
||||||
|
} => {
|
||||||
|
let mut branches: Vec<_> =
|
||||||
|
handlers.iter().map(|(_, label)| label.clone()).collect();
|
||||||
|
if let Some(default_handler) = default_handler.as_ref().cloned() {
|
||||||
|
branches.push(default_handler);
|
||||||
|
}
|
||||||
|
try_block.push((end_label.clone(), branches))
|
||||||
|
}
|
||||||
|
Instruction::Label { name } => {
|
||||||
|
if !block_started {
|
||||||
|
last_labels.push(name.clone());
|
||||||
|
} else {
|
||||||
|
nodes_next_label.push(vec![]);
|
||||||
|
last_labels.push(name.clone());
|
||||||
|
nodes.push(MethodCFGNode {
|
||||||
|
code_block: &insns[start_last_block..i],
|
||||||
|
labels: last_labels,
|
||||||
|
next_nodes: vec![nodes.len() + 1],
|
||||||
|
prev_nodes: vec![],
|
||||||
|
});
|
||||||
|
start_last_block = i;
|
||||||
|
last_labels = vec![];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Instruction::ReturnVoid {}
|
||||||
|
| Instruction::Return { .. }
|
||||||
|
| Instruction::ReturnWide { .. }
|
||||||
|
| Instruction::ReturnObject { .. }
|
||||||
|
| Instruction::Throw { .. } => {
|
||||||
|
nodes_next_label.push(vec![]);
|
||||||
|
nodes.push(MethodCFGNode {
|
||||||
|
code_block: &insns[start_last_block..i + 1],
|
||||||
|
labels: last_labels,
|
||||||
|
next_nodes: vec![], // Do not continue the execution at next ins
|
||||||
|
prev_nodes: vec![],
|
||||||
|
});
|
||||||
|
start_last_block = i + 1;
|
||||||
|
last_labels = vec![];
|
||||||
|
block_started = false;
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
if !ins.is_pseudo_ins() {
|
||||||
|
block_started = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if start_last_block != nb_insns {
|
||||||
|
nodes_next_label.push(vec![]);
|
||||||
|
nodes.push(MethodCFGNode {
|
||||||
|
code_block: &insns[start_last_block..nb_insns],
|
||||||
|
labels: last_labels,
|
||||||
|
next_nodes: vec![],
|
||||||
|
prev_nodes: vec![],
|
||||||
|
});
|
||||||
|
}
|
||||||
|
let label_to_node: HashMap<String, usize> = nodes
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.flat_map(|(i, node)| node.labels.clone().into_iter().map(move |lab| (lab, i)))
|
||||||
|
.collect();
|
||||||
|
for (node, labels) in nodes.iter_mut().zip(nodes_next_label) {
|
||||||
|
for label in labels {
|
||||||
|
node.next_nodes
|
||||||
|
.push(*label_to_node.get(&label).with_context(|| {
|
||||||
|
format!("found jumb to label '{}' but label not found", label)
|
||||||
|
})?);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for i in 0..nodes.len() {
|
||||||
|
let next_nodes = nodes[i].next_nodes.clone();
|
||||||
|
for j in &next_nodes {
|
||||||
|
nodes[*j].prev_nodes.push(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(Self {
|
||||||
|
method: &method.descriptor,
|
||||||
|
nodes,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Serialize the graph to dot format.
|
||||||
|
pub fn to_dot(&self) -> String {
|
||||||
|
let mut dot_string: String = "digraph {\n".into();
|
||||||
|
dot_string += "overlap=false;\n";
|
||||||
|
dot_string += &self.to_dot_subgraph();
|
||||||
|
dot_string += "}";
|
||||||
|
dot_string
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Serialize the graph to dot format.
|
||||||
|
pub fn to_dot_subgraph(&self) -> String {
|
||||||
|
let mut dot_string = format!("subgraph \"cluster_{}\" {{\n", self.method.__str__());
|
||||||
|
dot_string += " style=\"dashed\";\n";
|
||||||
|
dot_string += " color=\"black\";\n";
|
||||||
|
dot_string += &format!(" label=\"{}\";\n", self.method.__str__());
|
||||||
|
for (i, node) in self.nodes.iter().enumerate() {
|
||||||
|
let block_name = if i == 0 {
|
||||||
|
"ENTRY".into()
|
||||||
|
} else if !node.labels.is_empty() {
|
||||||
|
format!("block '{}'", node.labels[0])
|
||||||
|
} else {
|
||||||
|
format!("block {i}")
|
||||||
|
};
|
||||||
|
let label = if node.code_block.is_empty() {
|
||||||
|
format!("{{\\< {block_name} \\>}}")
|
||||||
|
} else {
|
||||||
|
let mut label = format!("{{\\< {block_name} \\>:\\l\\\n");
|
||||||
|
for ins in node.code_block {
|
||||||
|
label += "|";
|
||||||
|
label += ins
|
||||||
|
.__str__()
|
||||||
|
.replace(" ", "\\ ")
|
||||||
|
.replace(">", "\\>")
|
||||||
|
.replace("<", "\\<")
|
||||||
|
.replace("\"", "\\\"")
|
||||||
|
.replace("{", "\\{")
|
||||||
|
.replace("}", "\\}")
|
||||||
|
.as_str();
|
||||||
|
label += "\\l\\\n";
|
||||||
|
}
|
||||||
|
label += "}";
|
||||||
|
label
|
||||||
|
};
|
||||||
|
dot_string += &format!(
|
||||||
|
" node_{i} [shape=record,style=filled,fillcolor=lightgrey,label=\"{label}\"];\n\n"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
dot_string +=
|
||||||
|
" node_end [shape=record,style=filled,fillcolor=lightgrey,label=\"{\\< EXIT \\>}\"];\n\n";
|
||||||
|
|
||||||
|
for (i, node) in self.nodes.iter().enumerate() {
|
||||||
|
for j in &node.next_nodes {
|
||||||
|
if *j == i + 1 {
|
||||||
|
dot_string += &format!(" node_{i}:s -> node_{j}:n [style=\"solid,bold\",color=black,weight=100,constraint=true];\n");
|
||||||
|
} else {
|
||||||
|
dot_string += &format!(" node_{i}:s -> node_{j}:n [style=\"solid,bold\",color=black,weight=10,constraint=true];\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if node.next_nodes.is_empty() {
|
||||||
|
dot_string += &format!(" node_{i}:s -> node_end:n [style=\"solid,bold\",color=black,weight=10,constraint=true];\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dot_string += "}\n";
|
||||||
|
dot_string
|
||||||
|
}
|
||||||
|
}
|
||||||
7
androscalpel/src/code_analysis/mod.rs
Normal file
7
androscalpel/src/code_analysis/mod.rs
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
//! Module for more advanced code analysis.
|
||||||
|
//!
|
||||||
|
//! This is module is quite experimental but can be usefull.
|
||||||
|
|
||||||
|
pub mod method_cfg;
|
||||||
|
|
||||||
|
pub use method_cfg::*;
|
||||||
|
|
@ -22,6 +22,9 @@ pub mod scalar;
|
||||||
pub mod value;
|
pub mod value;
|
||||||
pub mod visitor;
|
pub mod visitor;
|
||||||
|
|
||||||
|
#[cfg(feature = "code-analysis")]
|
||||||
|
pub mod code_analysis;
|
||||||
|
|
||||||
pub use annotation::*;
|
pub use annotation::*;
|
||||||
pub use apk::*;
|
pub use apk::*;
|
||||||
pub use class::*;
|
pub use class::*;
|
||||||
|
|
@ -39,6 +42,9 @@ pub use scalar::*;
|
||||||
pub use value::*;
|
pub use value::*;
|
||||||
pub use visitor::*;
|
pub use visitor::*;
|
||||||
|
|
||||||
|
#[cfg(feature = "code-analysis")]
|
||||||
|
pub use code_analysis::*;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests;
|
mod tests;
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue