From 4755dd995d7e595aa61d7dbf6f2b9d49160413c5 Mon Sep 17 00:00:00 2001 From: Jean-Marie Mineau Date: Wed, 7 Feb 2024 14:23:11 +0100 Subject: [PATCH] implement a label normalization for compring code --- androscalpel/src/code.rs | 343 ++++++++++++++++++++++++++++++++++++++- test.py | 28 +++- 2 files changed, 364 insertions(+), 7 deletions(-) diff --git a/androscalpel/src/code.rs b/androscalpel/src/code.rs index cb31ff2..c1358ff 100644 --- a/androscalpel/src/code.rs +++ b/androscalpel/src/code.rs @@ -1,12 +1,13 @@ //! Representation of a method. +use anyhow::anyhow; use serde::{Deserialize, Serialize}; -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; use pyo3::prelude::*; use crate::{ - ins::Instruction, DexString, IdField, IdMethod, IdMethodType, IdType, MethodHandle, Result, + ins, ins::Instruction, DexString, IdField, IdMethod, IdMethodType, IdType, MethodHandle, Result, }; // TODO: make this easy to edit/manipulate, maybe move to Method @@ -15,7 +16,7 @@ use crate::{ /// The code run by a method. #[pyclass] -#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)] +#[derive(Debug, Clone, Deserialize, Serialize)] pub struct Code { // TODO: remove and compute this value from code? /// The number of registers used by the code @@ -38,6 +39,18 @@ pub struct Code { pub insns: Vec, } +impl PartialEq for Code { + fn eq(&self, other: &Self) -> bool { + let comparable_self = self.with_normalized_labels().unwrap(); + let comparable_other = other.with_normalized_labels().unwrap(); + (comparable_self.registers_size == comparable_other.registers_size) + && (comparable_self.ins_size == comparable_other.ins_size) + && (comparable_self.outs_size == comparable_other.outs_size) + && (comparable_self.debug_info == comparable_other.debug_info) + && (comparable_self.insns == comparable_other.insns) + } +} + // TODO reimplement PartialEq: label should become address independant #[pymethods] @@ -134,4 +147,328 @@ impl Code { pub fn __eq__(&self, other: &Self) -> bool { self == other } + + /// Return all the labels used by instrutions in the code. + pub fn get_referenced_label(&self) -> HashSet { + let mut used_labels = HashSet::new(); + for ins in &self.insns { + match ins { + Instruction::Goto(ins::Goto { label }) => { + used_labels.insert(label.clone()); + } + Instruction::IfEq(ins::IfEq { label, .. }) => { + used_labels.insert(label.clone()); + } + Instruction::IfNe(ins::IfNe { label, .. }) => { + used_labels.insert(label.clone()); + } + Instruction::IfLt(ins::IfLt { label, .. }) => { + used_labels.insert(label.clone()); + } + Instruction::IfGe(ins::IfGe { label, .. }) => { + used_labels.insert(label.clone()); + } + Instruction::IfGt(ins::IfGt { label, .. }) => { + used_labels.insert(label.clone()); + } + Instruction::IfLe(ins::IfLe { label, .. }) => { + used_labels.insert(label.clone()); + } + Instruction::IfEqZ(ins::IfEqZ { label, .. }) => { + used_labels.insert(label.clone()); + } + Instruction::IfNeZ(ins::IfNeZ { label, .. }) => { + used_labels.insert(label.clone()); + } + Instruction::IfLtZ(ins::IfLtZ { label, .. }) => { + used_labels.insert(label.clone()); + } + Instruction::IfGeZ(ins::IfGeZ { label, .. }) => { + used_labels.insert(label.clone()); + } + Instruction::IfGtZ(ins::IfGtZ { label, .. }) => { + used_labels.insert(label.clone()); + } + Instruction::IfLeZ(ins::IfLeZ { label, .. }) => { + used_labels.insert(label.clone()); + } + Instruction::Try(ins::Try { + end_label, + handlers, + default_handler, + }) => { + used_labels.insert(end_label.clone()); + for (_, label) in handlers { + used_labels.insert(label.clone()); + } + if let Some(label) = default_handler { + used_labels.insert(label.clone()); + } + } + Instruction::Switch(ins::Switch { branches, .. }) => { + for label in branches.values() { + used_labels.insert(label.clone()); + } + } + _ => (), + }; + } + used_labels + } + + /// Generate a new code with normalized labels. + /// This allows to compare codes with same semantic but different labels. + /// (ig when the same code was reserialized) + pub fn with_normalized_labels(&self) -> Result { + let used_labels = self.get_referenced_label(); + let mut new_labels = HashMap::new(); + let mut label_id = 0; + let mut last_ins_was_a_label = false; + + for ins in &self.insns { + match ins { + Instruction::Label(ins::Label { name }) => { + if used_labels.get(name).is_none() { + continue; + } + let new_label_id = if last_ins_was_a_label { + label_id + } else { + label_id += 1; + label_id + }; + new_labels.insert(name.clone(), format!("label_{new_label_id}")); + + last_ins_was_a_label = true; + } + _ => last_ins_was_a_label = false, + } + } + + for label in &used_labels { + if new_labels.get(label).is_none() { + println!("{label} use but not in new_labels"); + } + } + + let mut new_insns = vec![]; + last_ins_was_a_label = false; + for ins in self.insns.iter().cloned() { + match ins { + Instruction::Goto(mut instr) => { + last_ins_was_a_label = false; + instr.label = new_labels + .get(&instr.label) + .ok_or(anyhow!( + "Internal error: {} not found in renamed label", + instr.label + ))? + .clone(); + new_insns.push(Instruction::Goto(instr)); + } + Instruction::IfEq(mut instr) => { + last_ins_was_a_label = false; + instr.label = new_labels + .get(&instr.label) + .ok_or(anyhow!( + "Internal error: {} not found in renamed label", + instr.label + ))? + .clone(); + new_insns.push(Instruction::IfEq(instr)); + } + Instruction::IfNe(mut instr) => { + last_ins_was_a_label = false; + instr.label = new_labels + .get(&instr.label) + .ok_or(anyhow!( + "Internal error: {} not found in renamed label", + instr.label + ))? + .clone(); + new_insns.push(Instruction::IfNe(instr)); + } + Instruction::IfLt(mut instr) => { + last_ins_was_a_label = false; + instr.label = new_labels + .get(&instr.label) + .ok_or(anyhow!( + "Internal error: {} not found in renamed label", + instr.label + ))? + .clone(); + new_insns.push(Instruction::IfLt(instr)); + } + Instruction::IfGe(mut instr) => { + last_ins_was_a_label = false; + instr.label = new_labels + .get(&instr.label) + .ok_or(anyhow!( + "Internal error: {} not found in renamed label", + instr.label + ))? + .clone(); + new_insns.push(Instruction::IfGe(instr)); + } + Instruction::IfGt(mut instr) => { + last_ins_was_a_label = false; + instr.label = new_labels + .get(&instr.label) + .ok_or(anyhow!( + "Internal error: {} not found in renamed label", + instr.label + ))? + .clone(); + new_insns.push(Instruction::IfGt(instr)); + } + Instruction::IfLe(mut instr) => { + last_ins_was_a_label = false; + instr.label = new_labels + .get(&instr.label) + .ok_or(anyhow!( + "Internal error: {} not found in renamed label", + instr.label + ))? + .clone(); + new_insns.push(Instruction::IfLe(instr)); + } + Instruction::IfEqZ(mut instr) => { + last_ins_was_a_label = false; + instr.label = new_labels + .get(&instr.label) + .ok_or(anyhow!( + "Internal error: {} not found in renamed label", + instr.label + ))? + .clone(); + new_insns.push(Instruction::IfEqZ(instr)); + } + Instruction::IfNeZ(mut instr) => { + last_ins_was_a_label = false; + instr.label = new_labels + .get(&instr.label) + .ok_or(anyhow!( + "Internal error: {} not found in renamed label", + instr.label + ))? + .clone(); + new_insns.push(Instruction::IfNeZ(instr)); + } + Instruction::IfLtZ(mut instr) => { + last_ins_was_a_label = false; + instr.label = new_labels + .get(&instr.label) + .ok_or(anyhow!( + "Internal error: {} not found in renamed label", + instr.label + ))? + .clone(); + new_insns.push(Instruction::IfLtZ(instr)); + } + Instruction::IfGeZ(mut instr) => { + last_ins_was_a_label = false; + instr.label = new_labels + .get(&instr.label) + .ok_or(anyhow!( + "Internal error: {} not found in renamed label", + instr.label + ))? + .clone(); + new_insns.push(Instruction::IfGeZ(instr)); + } + Instruction::IfGtZ(mut instr) => { + last_ins_was_a_label = false; + instr.label = new_labels + .get(&instr.label) + .ok_or(anyhow!( + "Internal error: {} not found in renamed label", + instr.label + ))? + .clone(); + new_insns.push(Instruction::IfGtZ(instr)); + } + Instruction::IfLeZ(mut instr) => { + last_ins_was_a_label = false; + instr.label = new_labels + .get(&instr.label) + .ok_or(anyhow!( + "Internal error: {} not found in renamed label", + instr.label + ))? + .clone(); + new_insns.push(Instruction::IfLeZ(instr)); + } + Instruction::Try(mut instr) => { + last_ins_was_a_label = false; + instr.end_label = new_labels + .get(&instr.end_label) + .ok_or(anyhow!( + "Internal error: {} not found in renamed label", + instr.end_label + ))? + .clone(); + for i in 0..instr.handlers.len() { + instr.handlers[i].1 = new_labels + .get(&instr.handlers[i].1) + .ok_or(anyhow!( + "Internal error: {} not found in renamed label", + instr.handlers[i].1 + ))? + .clone(); + } + if let Some(label) = instr.default_handler { + instr.default_handler = Some( + new_labels + .get(&label) + .ok_or(anyhow!( + "Internal error: {} not found in renamed label", + label + ))? + .clone(), + ); + } + new_insns.push(Instruction::Try(instr)); + } + Instruction::Switch(mut instr) => { + last_ins_was_a_label = false; + for label in instr.branches.values_mut() { + *label = new_labels + .get(label) + .ok_or(anyhow!( + "Internal error: {} not found in renamed label", + label + ))? + .clone(); + } + new_insns.push(Instruction::Switch(instr)); + } + Instruction::Label(ins::Label { name }) => { + if used_labels.get(&name).is_none() { + println!("{name} not used"); + continue; + } + if !last_ins_was_a_label { + new_insns.push(Instruction::Label(ins::Label { + name: new_labels + .get(&name) + .ok_or(anyhow!( + "Internal error: {} not found in renamed label", + name + ))? + .clone(), + })); + } + last_ins_was_a_label = true; + } + instr => { + last_ins_was_a_label = false; + new_insns.push(instr); + } + }; + } + Ok(Self { + insns: new_insns, + ..self.clone() + }) + } } diff --git a/test.py b/test.py index 4206ca8..9deb1cf 100644 --- a/test.py +++ b/test.py @@ -68,12 +68,13 @@ classes = list( IdType("Landroidx/navigation/NavDeepLink$Builder;"), IdType("Landroidx/constraintlayout/core/widgets/ConstraintWidget$1;"), IdType("Landroidx/appcompat/app/ActionBar;"), + IdType("Landroidx/constraintlayout/core/state/WidgetFrame;"), ], apk.classes.keys(), ) ) -# for cls in classes: -# apk.remove_class(cls) +for cls in classes: + apk.remove_class(cls) print("[+] Recompile") @@ -144,7 +145,7 @@ def cmp_dict(a, b, req=0): print(f"{f'{ident}{str(key)}: ':<150}{nice_bool(eq)}") if not eq: global last_id - last_id = a.descriptor + last_id = key cmp(a[key], b[key], req + 1) @@ -177,4 +178,23 @@ if not apk_eq: # ) # m = apk.classes[mid.class_].direct_methods[mid] # nm = new_apk.classes[mid.class_].direct_methods[mid] -# + + +mid = IdMethod( + "setValue", + IdMethodType( + IdType("Z"), + [ + IdType("Ljava/lang/String;"), + IdType("Landroidx/constraintlayout/core/parser/CLElement;"), + ], + ), + IdType("Landroidx/constraintlayout/core/state/WidgetFrame;"), +) + +m = apk.classes[mid.class_].virtual_methods[mid] +nm = new_apk.classes[mid.class_].virtual_methods[mid] +c = m.code +nc = nm.code +cc = c.with_normalized_labels() +ncc = nc.with_normalized_labels()