implement a label normalization for compring code

This commit is contained in:
Jean-Marie Mineau 2024-02-07 14:23:11 +01:00
parent e0f348aecc
commit 4755dd995d
Signed by: histausse
GPG key ID: B66AEEDA9B645AD2
2 changed files with 364 additions and 7 deletions

View file

@ -1,12 +1,13 @@
//! Representation of a method.
use anyhow::anyhow;
use serde::{Deserialize, Serialize};
use std::collections::HashSet;
use std::collections::{HashMap, HashSet};
use pyo3::prelude::*;
use crate::{
ins::Instruction, DexString, IdField, IdMethod, IdMethodType, IdType, MethodHandle, Result,
ins, ins::Instruction, DexString, IdField, IdMethod, IdMethodType, IdType, MethodHandle, Result,
};
// TODO: make this easy to edit/manipulate, maybe move to Method
@ -15,7 +16,7 @@ use crate::{
/// The code run by a method.
#[pyclass]
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct Code {
// TODO: remove and compute this value from code?
/// The number of registers used by the code
@ -38,6 +39,18 @@ pub struct Code {
pub insns: Vec<Instruction>,
}
impl PartialEq for Code {
fn eq(&self, other: &Self) -> bool {
let comparable_self = self.with_normalized_labels().unwrap();
let comparable_other = other.with_normalized_labels().unwrap();
(comparable_self.registers_size == comparable_other.registers_size)
&& (comparable_self.ins_size == comparable_other.ins_size)
&& (comparable_self.outs_size == comparable_other.outs_size)
&& (comparable_self.debug_info == comparable_other.debug_info)
&& (comparable_self.insns == comparable_other.insns)
}
}
// TODO reimplement PartialEq: label should become address independant
#[pymethods]
@ -134,4 +147,328 @@ impl Code {
pub fn __eq__(&self, other: &Self) -> bool {
self == other
}
/// Return all the labels used by instrutions in the code.
pub fn get_referenced_label(&self) -> HashSet<String> {
let mut used_labels = HashSet::new();
for ins in &self.insns {
match ins {
Instruction::Goto(ins::Goto { label }) => {
used_labels.insert(label.clone());
}
Instruction::IfEq(ins::IfEq { label, .. }) => {
used_labels.insert(label.clone());
}
Instruction::IfNe(ins::IfNe { label, .. }) => {
used_labels.insert(label.clone());
}
Instruction::IfLt(ins::IfLt { label, .. }) => {
used_labels.insert(label.clone());
}
Instruction::IfGe(ins::IfGe { label, .. }) => {
used_labels.insert(label.clone());
}
Instruction::IfGt(ins::IfGt { label, .. }) => {
used_labels.insert(label.clone());
}
Instruction::IfLe(ins::IfLe { label, .. }) => {
used_labels.insert(label.clone());
}
Instruction::IfEqZ(ins::IfEqZ { label, .. }) => {
used_labels.insert(label.clone());
}
Instruction::IfNeZ(ins::IfNeZ { label, .. }) => {
used_labels.insert(label.clone());
}
Instruction::IfLtZ(ins::IfLtZ { label, .. }) => {
used_labels.insert(label.clone());
}
Instruction::IfGeZ(ins::IfGeZ { label, .. }) => {
used_labels.insert(label.clone());
}
Instruction::IfGtZ(ins::IfGtZ { label, .. }) => {
used_labels.insert(label.clone());
}
Instruction::IfLeZ(ins::IfLeZ { label, .. }) => {
used_labels.insert(label.clone());
}
Instruction::Try(ins::Try {
end_label,
handlers,
default_handler,
}) => {
used_labels.insert(end_label.clone());
for (_, label) in handlers {
used_labels.insert(label.clone());
}
if let Some(label) = default_handler {
used_labels.insert(label.clone());
}
}
Instruction::Switch(ins::Switch { branches, .. }) => {
for label in branches.values() {
used_labels.insert(label.clone());
}
}
_ => (),
};
}
used_labels
}
/// Generate a new code with normalized labels.
/// This allows to compare codes with same semantic but different labels.
/// (ig when the same code was reserialized)
pub fn with_normalized_labels(&self) -> Result<Self> {
let used_labels = self.get_referenced_label();
let mut new_labels = HashMap::new();
let mut label_id = 0;
let mut last_ins_was_a_label = false;
for ins in &self.insns {
match ins {
Instruction::Label(ins::Label { name }) => {
if used_labels.get(name).is_none() {
continue;
}
let new_label_id = if last_ins_was_a_label {
label_id
} else {
label_id += 1;
label_id
};
new_labels.insert(name.clone(), format!("label_{new_label_id}"));
last_ins_was_a_label = true;
}
_ => last_ins_was_a_label = false,
}
}
for label in &used_labels {
if new_labels.get(label).is_none() {
println!("{label} use but not in new_labels");
}
}
let mut new_insns = vec![];
last_ins_was_a_label = false;
for ins in self.insns.iter().cloned() {
match ins {
Instruction::Goto(mut instr) => {
last_ins_was_a_label = false;
instr.label = new_labels
.get(&instr.label)
.ok_or(anyhow!(
"Internal error: {} not found in renamed label",
instr.label
))?
.clone();
new_insns.push(Instruction::Goto(instr));
}
Instruction::IfEq(mut instr) => {
last_ins_was_a_label = false;
instr.label = new_labels
.get(&instr.label)
.ok_or(anyhow!(
"Internal error: {} not found in renamed label",
instr.label
))?
.clone();
new_insns.push(Instruction::IfEq(instr));
}
Instruction::IfNe(mut instr) => {
last_ins_was_a_label = false;
instr.label = new_labels
.get(&instr.label)
.ok_or(anyhow!(
"Internal error: {} not found in renamed label",
instr.label
))?
.clone();
new_insns.push(Instruction::IfNe(instr));
}
Instruction::IfLt(mut instr) => {
last_ins_was_a_label = false;
instr.label = new_labels
.get(&instr.label)
.ok_or(anyhow!(
"Internal error: {} not found in renamed label",
instr.label
))?
.clone();
new_insns.push(Instruction::IfLt(instr));
}
Instruction::IfGe(mut instr) => {
last_ins_was_a_label = false;
instr.label = new_labels
.get(&instr.label)
.ok_or(anyhow!(
"Internal error: {} not found in renamed label",
instr.label
))?
.clone();
new_insns.push(Instruction::IfGe(instr));
}
Instruction::IfGt(mut instr) => {
last_ins_was_a_label = false;
instr.label = new_labels
.get(&instr.label)
.ok_or(anyhow!(
"Internal error: {} not found in renamed label",
instr.label
))?
.clone();
new_insns.push(Instruction::IfGt(instr));
}
Instruction::IfLe(mut instr) => {
last_ins_was_a_label = false;
instr.label = new_labels
.get(&instr.label)
.ok_or(anyhow!(
"Internal error: {} not found in renamed label",
instr.label
))?
.clone();
new_insns.push(Instruction::IfLe(instr));
}
Instruction::IfEqZ(mut instr) => {
last_ins_was_a_label = false;
instr.label = new_labels
.get(&instr.label)
.ok_or(anyhow!(
"Internal error: {} not found in renamed label",
instr.label
))?
.clone();
new_insns.push(Instruction::IfEqZ(instr));
}
Instruction::IfNeZ(mut instr) => {
last_ins_was_a_label = false;
instr.label = new_labels
.get(&instr.label)
.ok_or(anyhow!(
"Internal error: {} not found in renamed label",
instr.label
))?
.clone();
new_insns.push(Instruction::IfNeZ(instr));
}
Instruction::IfLtZ(mut instr) => {
last_ins_was_a_label = false;
instr.label = new_labels
.get(&instr.label)
.ok_or(anyhow!(
"Internal error: {} not found in renamed label",
instr.label
))?
.clone();
new_insns.push(Instruction::IfLtZ(instr));
}
Instruction::IfGeZ(mut instr) => {
last_ins_was_a_label = false;
instr.label = new_labels
.get(&instr.label)
.ok_or(anyhow!(
"Internal error: {} not found in renamed label",
instr.label
))?
.clone();
new_insns.push(Instruction::IfGeZ(instr));
}
Instruction::IfGtZ(mut instr) => {
last_ins_was_a_label = false;
instr.label = new_labels
.get(&instr.label)
.ok_or(anyhow!(
"Internal error: {} not found in renamed label",
instr.label
))?
.clone();
new_insns.push(Instruction::IfGtZ(instr));
}
Instruction::IfLeZ(mut instr) => {
last_ins_was_a_label = false;
instr.label = new_labels
.get(&instr.label)
.ok_or(anyhow!(
"Internal error: {} not found in renamed label",
instr.label
))?
.clone();
new_insns.push(Instruction::IfLeZ(instr));
}
Instruction::Try(mut instr) => {
last_ins_was_a_label = false;
instr.end_label = new_labels
.get(&instr.end_label)
.ok_or(anyhow!(
"Internal error: {} not found in renamed label",
instr.end_label
))?
.clone();
for i in 0..instr.handlers.len() {
instr.handlers[i].1 = new_labels
.get(&instr.handlers[i].1)
.ok_or(anyhow!(
"Internal error: {} not found in renamed label",
instr.handlers[i].1
))?
.clone();
}
if let Some(label) = instr.default_handler {
instr.default_handler = Some(
new_labels
.get(&label)
.ok_or(anyhow!(
"Internal error: {} not found in renamed label",
label
))?
.clone(),
);
}
new_insns.push(Instruction::Try(instr));
}
Instruction::Switch(mut instr) => {
last_ins_was_a_label = false;
for label in instr.branches.values_mut() {
*label = new_labels
.get(label)
.ok_or(anyhow!(
"Internal error: {} not found in renamed label",
label
))?
.clone();
}
new_insns.push(Instruction::Switch(instr));
}
Instruction::Label(ins::Label { name }) => {
if used_labels.get(&name).is_none() {
println!("{name} not used");
continue;
}
if !last_ins_was_a_label {
new_insns.push(Instruction::Label(ins::Label {
name: new_labels
.get(&name)
.ok_or(anyhow!(
"Internal error: {} not found in renamed label",
name
))?
.clone(),
}));
}
last_ins_was_a_label = true;
}
instr => {
last_ins_was_a_label = false;
new_insns.push(instr);
}
};
}
Ok(Self {
insns: new_insns,
..self.clone()
})
}
}

28
test.py
View file

@ -68,12 +68,13 @@ classes = list(
IdType("Landroidx/navigation/NavDeepLink$Builder;"),
IdType("Landroidx/constraintlayout/core/widgets/ConstraintWidget$1;"),
IdType("Landroidx/appcompat/app/ActionBar;"),
IdType("Landroidx/constraintlayout/core/state/WidgetFrame;"),
],
apk.classes.keys(),
)
)
# for cls in classes:
# apk.remove_class(cls)
for cls in classes:
apk.remove_class(cls)
print("[+] Recompile")
@ -144,7 +145,7 @@ def cmp_dict(a, b, req=0):
print(f"{f'{ident}{str(key)}: ':<150}{nice_bool(eq)}")
if not eq:
global last_id
last_id = a.descriptor
last_id = key
cmp(a[key], b[key], req + 1)
@ -177,4 +178,23 @@ if not apk_eq:
# )
# m = apk.classes[mid.class_].direct_methods[mid]
# nm = new_apk.classes[mid.class_].direct_methods[mid]
#
mid = IdMethod(
"setValue",
IdMethodType(
IdType("Z"),
[
IdType("Ljava/lang/String;"),
IdType("Landroidx/constraintlayout/core/parser/CLElement;"),
],
),
IdType("Landroidx/constraintlayout/core/state/WidgetFrame;"),
)
m = apk.classes[mid.class_].virtual_methods[mid]
nm = new_apk.classes[mid.class_].virtual_methods[mid]
c = m.code
nc = nm.code
cc = c.with_normalized_labels()
ncc = nc.with_normalized_labels()