add code analysis features to project

This commit is contained in:
Jean-Marie 'Histausse' Mineau 2025-02-21 14:34:10 +01:00
parent e75e6b160e
commit 812640bd4a
Signed by: histausse
GPG key ID: B66AEEDA9B645AD2
4 changed files with 303 additions and 1 deletions

View file

@ -26,7 +26,8 @@ zip = {version = "2.2.2", optional = true}
pretty_assertions = "1.4.1" pretty_assertions = "1.4.1"
[features] [features]
default = [] default = ["code-analysis"]
# TODO: need refactoring to https://github.com/PyO3/pyo3/issues/2935#issuecomment-2560930677 or cfg_eval https://github.com/rust-lang/rust/issues/82679 # TODO: need refactoring to https://github.com/PyO3/pyo3/issues/2935#issuecomment-2560930677 or cfg_eval https://github.com/rust-lang/rust/issues/82679
python = ["pyo3", "pyo3-log"] # Currently not supported python = ["pyo3", "pyo3-log"] # Currently not supported
external-zip-reader = ["zip"] external-zip-reader = ["zip"]
code-analysis = []

View file

@ -0,0 +1,288 @@
//! The Control Flow Graph for a method.
use crate::{IdMethod, Instruction, Method, Result};
use anyhow::Context;
use std::collections::HashMap;
const EMPTY_INSNS_SLICE: &[Instruction] = &[];
/// A basic block of code of a method.
struct MethodCFGNode<'a> {
/// Code represented by the block
code_block: &'a [Instruction],
/// Labels at the begining of the node if they exists
labels: Vec<String>,
/// Indices in CodeGraph.nodes of the next nodes
next_nodes: Vec<usize>,
/// Indices in CodeGraph.nodes of the previous nodes
prev_nodes: Vec<usize>,
}
/// The CFG for a method, with potentially additionnal informations.
pub struct MethodCFG<'a> {
method: &'a IdMethod,
nodes: Vec<MethodCFGNode<'a>>,
}
impl<'a> MethodCFG<'a> {
pub fn new(method: &'a Method) -> Result<Self> {
let insns: &'a [Instruction] = if let Some(code) = method.code.as_ref() {
&code.insns
} else {
EMPTY_INSNS_SLICE
};
let mut nodes = vec![MethodCFGNode {
code_block: &insns[0..0],
labels: vec![],
next_nodes: vec![],
prev_nodes: vec![],
}];
let mut nodes_next_label = vec![vec![]];
let nb_insns = insns.len();
if nb_insns != 0 {
nodes[0].next_nodes.push(1);
}
let mut start_last_block = 0;
let mut last_labels = vec![];
let mut block_started = false;
let mut try_block: Vec<(String, Vec<String>)> = vec![];
for (i, ins) in insns.iter().enumerate() {
match ins {
// TODO: handle error better: list ins that can throw exceptions better
Instruction::Throw { .. }
| Instruction::InvokeVirtual { .. }
| Instruction::InvokeSuper { .. }
| Instruction::InvokeDirect { .. }
| Instruction::InvokeDirect { .. }
| Instruction::InvokeInterface { .. }
| Instruction::InvokePolymorphic { .. }
| Instruction::InvokeCustom { .. }
if !try_block.is_empty() =>
{
nodes_next_label.push(try_block.last().unwrap().1.clone());
let next_nodes =
if i + 1 < nb_insns && !matches!(ins, Instruction::Throw { .. }) {
vec![nodes.len() + 1] // If no exception, continue to next ins
} else {
vec![]
};
nodes.push(MethodCFGNode {
code_block: &insns[start_last_block..i + 1],
labels: last_labels,
next_nodes,
prev_nodes: vec![],
});
start_last_block = i + 1;
last_labels = vec![];
block_started = false;
}
Instruction::Goto { label } => {
nodes_next_label.push(vec![label.clone()]);
nodes.push(MethodCFGNode {
code_block: &insns[start_last_block..i + 1],
labels: last_labels,
next_nodes: vec![], // Do not continue the execution at next ins
prev_nodes: vec![],
});
start_last_block = i + 1;
last_labels = vec![];
block_started = false;
}
Instruction::Switch { branches, .. } => {
nodes_next_label.push(branches.values().cloned().collect());
let next_nodes = if i + 1 < nb_insns {
vec![nodes.len() + 1] // If no branches match, continue execution
} else {
vec![]
};
nodes.push(MethodCFGNode {
code_block: &insns[start_last_block..i + 1],
labels: last_labels,
next_nodes,
prev_nodes: vec![],
});
start_last_block = i + 1;
last_labels = vec![];
block_started = false;
}
Instruction::IfEq { label, .. }
| Instruction::IfNe { label, .. }
| Instruction::IfLt { label, .. }
| Instruction::IfGe { label, .. }
| Instruction::IfGt { label, .. }
| Instruction::IfLe { label, .. }
| Instruction::IfEqZ { label, .. }
| Instruction::IfNeZ { label, .. }
| Instruction::IfLtZ { label, .. }
| Instruction::IfGeZ { label, .. }
| Instruction::IfGtZ { label, .. }
| Instruction::IfLeZ { label, .. } => {
nodes_next_label.push(vec![label.clone()]);
let next_nodes = if i + 1 < nb_insns {
vec![nodes.len() + 1] // depending on test, continue execution
} else {
vec![]
};
nodes.push(MethodCFGNode {
code_block: &insns[start_last_block..i + 1],
labels: last_labels,
next_nodes,
prev_nodes: vec![],
});
start_last_block = i + 1;
last_labels = vec![];
block_started = false;
}
Instruction::Try {
end_label,
handlers,
default_handler,
} => {
let mut branches: Vec<_> =
handlers.iter().map(|(_, label)| label.clone()).collect();
if let Some(default_handler) = default_handler.as_ref().cloned() {
branches.push(default_handler);
}
try_block.push((end_label.clone(), branches))
}
Instruction::Label { name } => {
if !block_started {
last_labels.push(name.clone());
} else {
nodes_next_label.push(vec![]);
last_labels.push(name.clone());
nodes.push(MethodCFGNode {
code_block: &insns[start_last_block..i],
labels: last_labels,
next_nodes: vec![nodes.len() + 1],
prev_nodes: vec![],
});
start_last_block = i;
last_labels = vec![];
}
}
Instruction::ReturnVoid {}
| Instruction::Return { .. }
| Instruction::ReturnWide { .. }
| Instruction::ReturnObject { .. }
| Instruction::Throw { .. } => {
nodes_next_label.push(vec![]);
nodes.push(MethodCFGNode {
code_block: &insns[start_last_block..i + 1],
labels: last_labels,
next_nodes: vec![], // Do not continue the execution at next ins
prev_nodes: vec![],
});
start_last_block = i + 1;
last_labels = vec![];
block_started = false;
}
_ => {
if !ins.is_pseudo_ins() {
block_started = true;
}
}
}
}
if start_last_block != nb_insns {
nodes_next_label.push(vec![]);
nodes.push(MethodCFGNode {
code_block: &insns[start_last_block..nb_insns],
labels: last_labels,
next_nodes: vec![],
prev_nodes: vec![],
});
}
let label_to_node: HashMap<String, usize> = nodes
.iter()
.enumerate()
.flat_map(|(i, node)| node.labels.clone().into_iter().map(move |lab| (lab, i)))
.collect();
for (node, labels) in nodes.iter_mut().zip(nodes_next_label) {
for label in labels {
node.next_nodes
.push(*label_to_node.get(&label).with_context(|| {
format!("found jumb to label '{}' but label not found", label)
})?);
}
}
for i in 0..nodes.len() {
let next_nodes = nodes[i].next_nodes.clone();
for j in &next_nodes {
nodes[*j].prev_nodes.push(i);
}
}
Ok(Self {
method: &method.descriptor,
nodes,
})
}
/// Serialize the graph to dot format.
pub fn to_dot(&self) -> String {
let mut dot_string: String = "digraph {\n".into();
dot_string += "overlap=false;\n";
dot_string += &self.to_dot_subgraph();
dot_string += "}";
dot_string
}
/// Serialize the graph to dot format.
pub fn to_dot_subgraph(&self) -> String {
let mut dot_string = format!("subgraph \"cluster_{}\" {{\n", self.method.__str__());
dot_string += " style=\"dashed\";\n";
dot_string += " color=\"black\";\n";
dot_string += &format!(" label=\"{}\";\n", self.method.__str__());
for (i, node) in self.nodes.iter().enumerate() {
let block_name = if i == 0 {
"ENTRY".into()
} else if !node.labels.is_empty() {
format!("block '{}'", node.labels[0])
} else {
format!("block {i}")
};
let label = if node.code_block.is_empty() {
format!("{{\\< {block_name} \\>}}")
} else {
let mut label = format!("{{\\< {block_name} \\>:\\l\\\n");
for ins in node.code_block {
label += "|";
label += ins
.__str__()
.replace(" ", "\\ ")
.replace(">", "\\>")
.replace("<", "\\<")
.replace("\"", "\\\"")
.replace("{", "\\{")
.replace("}", "\\}")
.as_str();
label += "\\l\\\n";
}
label += "}";
label
};
dot_string += &format!(
" node_{i} [shape=record,style=filled,fillcolor=lightgrey,label=\"{label}\"];\n\n"
);
}
dot_string +=
" node_end [shape=record,style=filled,fillcolor=lightgrey,label=\"{\\< EXIT \\>}\"];\n\n";
for (i, node) in self.nodes.iter().enumerate() {
for j in &node.next_nodes {
if *j == i + 1 {
dot_string += &format!(" node_{i}:s -> node_{j}:n [style=\"solid,bold\",color=black,weight=100,constraint=true];\n");
} else {
dot_string += &format!(" node_{i}:s -> node_{j}:n [style=\"solid,bold\",color=black,weight=10,constraint=true];\n");
}
}
if node.next_nodes.is_empty() {
dot_string += &format!(" node_{i}:s -> node_end:n [style=\"solid,bold\",color=black,weight=10,constraint=true];\n");
}
}
dot_string += "}\n";
dot_string
}
}

View file

@ -0,0 +1,7 @@
//! Module for more advanced code analysis.
//!
//! This is module is quite experimental but can be usefull.
pub mod method_cfg;
pub use method_cfg::*;

View file

@ -22,6 +22,9 @@ pub mod scalar;
pub mod value; pub mod value;
pub mod visitor; pub mod visitor;
#[cfg(feature = "code-analysis")]
pub mod code_analysis;
pub use annotation::*; pub use annotation::*;
pub use apk::*; pub use apk::*;
pub use class::*; pub use class::*;
@ -39,6 +42,9 @@ pub use scalar::*;
pub use value::*; pub use value::*;
pub use visitor::*; pub use visitor::*;
#[cfg(feature = "code-analysis")]
pub use code_analysis::*;
#[cfg(test)] #[cfg(test)]
mod tests; mod tests;