From da47521993edb26f1eeeaae531e5a2a872e3a156 Mon Sep 17 00:00:00 2001 From: Jean-Marie Mineau Date: Mon, 4 Dec 2023 11:38:08 +0100 Subject: [PATCH] add field ids to generated dex --- androscalpel/src/annotation.rs | 16 +++++++- androscalpel/src/class.rs | 26 ++++++++++++ androscalpel/src/code.rs | 8 +++- androscalpel/src/dex_id.rs | 22 ++++++++++ androscalpel/src/dex_writer.rs | 63 ++++++++++++++++++++++++---- androscalpel/src/field.rs | 13 ++++++ androscalpel/src/method.rs | 19 ++++++++- androscalpel/src/method_handle.rs | 68 +++++++++++++++++++++++++++++++ androscalpel/src/scalar.rs | 11 ++++- androscalpel/src/value.rs | 24 +++++++++++ 10 files changed, 258 insertions(+), 12 deletions(-) diff --git a/androscalpel/src/annotation.rs b/androscalpel/src/annotation.rs index 7160181..c605cfe 100644 --- a/androscalpel/src/annotation.rs +++ b/androscalpel/src/annotation.rs @@ -3,7 +3,7 @@ use std::collections::{HashMap, HashSet}; use pyo3::prelude::*; -use crate::{dex_id::IdType, value::DexValue, DexString, IdMethodType}; +use crate::{dex_id::IdType, value::DexValue, DexString, IdField, IdMethodType}; /// Annotation with a visibility #[pyclass] @@ -76,6 +76,11 @@ impl DexAnnotationItem { pub fn get_all_protos(&self) -> HashSet { self.annotation.get_all_protos() } + + /// Return all field ids referenced in the annotation. + pub fn get_all_field_ids(&self) -> HashSet { + self.annotation.get_all_field_ids() + } } /// An annotation. @@ -142,4 +147,13 @@ impl DexAnnotation { } protos } + + /// Return all field ids referenced in the annotation. + pub fn get_all_field_ids(&self) -> HashSet { + let mut fields = HashSet::new(); + for value in self.elements.values() { + fields.extend(value.get_all_field_ids()); + } + fields + } } diff --git a/androscalpel/src/class.rs b/androscalpel/src/class.rs index 6ef5221..284a499 100644 --- a/androscalpel/src/class.rs +++ b/androscalpel/src/class.rs @@ -215,4 +215,30 @@ impl Class { } protos } + + /// Return all fields referenced in the class. + /// This **not** the concatenation of the static and instances fields variable: + /// this also contains reference to fields in other classes used by methods/values + /// in this class. + pub fn get_all_field_ids(&self) -> HashSet { + let mut fields = HashSet::new(); + for (id, field) in &self.static_fields { + fields.insert(id.clone()); + fields.extend(field.get_all_field_ids()); + } + for (id, field) in &self.instance_fields { + fields.insert(id.clone()); + fields.extend(field.get_all_field_ids()); + } + for method in self.direct_methods.values() { + fields.extend(method.get_all_field_ids()); + } + for method in self.virtual_methods.values() { + fields.extend(method.get_all_field_ids()); + } + for annot in &self.annotations { + fields.extend(annot.get_all_field_ids()); + } + fields + } } diff --git a/androscalpel/src/code.rs b/androscalpel/src/code.rs index 07649a1..d8aec0d 100644 --- a/androscalpel/src/code.rs +++ b/androscalpel/src/code.rs @@ -4,7 +4,7 @@ use std::collections::HashSet; use pyo3::prelude::*; -use crate::{DexString, IdMethodType, IdType}; +use crate::{DexString, IdField, IdMethodType, IdType}; // TODO: make this easy to edit/manipulate, maybe move to Method @@ -90,4 +90,10 @@ impl Code { // TODO HashSet::new() } + + /// Return all field ids referenced in the codes. + pub fn get_all_field_ids(&self) -> HashSet { + // TODO + HashSet::new() + } } diff --git a/androscalpel/src/dex_id.rs b/androscalpel/src/dex_id.rs index c1d5f66..17a3451 100644 --- a/androscalpel/src/dex_id.rs +++ b/androscalpel/src/dex_id.rs @@ -468,6 +468,28 @@ impl IdField { types.insert(self.class_.clone()); types } + + /// Return all field ids referenced in the Id. + pub fn get_all_field_ids(&self) -> HashSet { + let mut fields = HashSet::new(); + fields.insert(self.clone()); + fields + } +} + +impl Ord for IdField { + fn cmp(&self, other: &Self) -> Ordering { + self.class_ + .cmp(&other.class_) + .then(self.name.cmp(&other.name)) + .then(self.type_.cmp(&other.type_)) + } +} + +impl PartialOrd for IdField { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } } /// The Id of a method. diff --git a/androscalpel/src/dex_writer.rs b/androscalpel/src/dex_writer.rs index f9840e3..046784a 100644 --- a/androscalpel/src/dex_writer.rs +++ b/androscalpel/src/dex_writer.rs @@ -3,7 +3,7 @@ use std::collections::HashMap; use std::io::{Cursor, Write}; -use anyhow::{anyhow, Context}; +use anyhow::{anyhow, bail, Context}; use crate::Result; use crate::*; @@ -15,7 +15,7 @@ pub struct DexWriter { strings: HashMap, type_ids: HashMap, proto_ids: HashMap, - _field_ids: HashMap, + field_ids: HashMap, _method_ids: HashMap, // TODO: composite classes need a struct for storing link data // class_defs: HashMap, @@ -61,7 +61,7 @@ impl Default for DexWriter { strings: HashMap::new(), type_ids: HashMap::new(), proto_ids: HashMap::new(), - _field_ids: HashMap::new(), + field_ids: HashMap::new(), _method_ids: HashMap::new(), } } @@ -86,16 +86,24 @@ impl DexWriter { let new_types = class.get_all_types(); /* this means more types than bytes in the file, prbl no gonna append * and len(type) <= len(string) anyway - let mut future_type_set = new_types.clone(); - future_type_set.extend(self.type_ids.keys().cloned()); - // TODO: they are ref to type as u16, check - if future_type_set.len() >= u32::MAX as usize { + let mut futur_type_set = new_types.clone(); + futur_type_set.extend(self.type_ids.keys().cloned()); + // TODO: they are ref to type as u16, so checks? + if futur_type_set.len() >= u32::MAX as usize { // TODO return structured error to handle this case by generating multiple dex files bail!("To many types for one dex file"); } */ let new_protos = class.get_all_protos(); + let mut futur_proto_set = new_protos.clone(); + futur_proto_set.extend(self.proto_ids.keys().cloned()); + if futur_proto_set.len() >= u16::MAX as usize { + // TODO return structured error to handle this case by generating multiple dex files + bail!("To many prototypes for one dex file"); + } + + let new_field_ids = class.get_all_field_ids(); for string in new_strings { self.strings.insert(string, 0); @@ -106,6 +114,9 @@ impl DexWriter { for proto in new_protos { self.proto_ids.insert(proto, 0); } + for field in new_field_ids { + self.field_ids.insert(field, 0); + } Ok(()) } @@ -209,6 +220,39 @@ impl DexWriter { offset += list.size(); } + // Sort and generate FieldIdItem + let mut field_ids_list: Vec = self.field_ids.keys().cloned().collect(); + field_ids_list.sort(); + for (idx, field_id) in field_ids_list.iter().enumerate() { + self.field_ids + .entry(field_id.clone()) + .and_modify(|val| *val = idx); + section_manager.add_elt(Section::FieldIdItem, None); + } + let field_ids_list: Vec = { + let mut field_ids_list_aux = vec![]; + for field in field_ids_list.into_iter() { + field_ids_list_aux.push(FieldIdItem { + class_idx: *self.type_ids.get(&field.class_).ok_or(anyhow!( + "Type {} (class of field {}) not found in dex builder", + field.class_.__repr__(), + field.__repr__() + ))? as u16, + type_idx: *self.type_ids.get(&field.type_).ok_or(anyhow!( + "Type {} (type of field {}) not found in dex builder", + field.type_.__repr__(), + field.__repr__() + ))? as u16, + name_idx: *self.strings.get(&field.name).ok_or(anyhow!( + "String {} (name of field {}) not found in dex builder", + field.name.__repr__(), + field.__repr__() + ))? as u32, + }); + } + field_ids_list_aux + }; + // Populate map_list let map_item_size = MapItem { type_: MapItemType::HeaderItem, @@ -292,7 +336,10 @@ impl DexWriter { for proto in proto_ids_list { proto.serialize(writer)?; } - // TODO: FieldIdItem, + // FieldIdItem section + for field_id in field_ids_list { + field_id.serialize(writer)?; + } // TODO: MethodIdItem, // TODO: ClassDefItem, // TODO: CallSiteIdItem, diff --git a/androscalpel/src/field.rs b/androscalpel/src/field.rs index 8203e34..a08fc13 100644 --- a/androscalpel/src/field.rs +++ b/androscalpel/src/field.rs @@ -157,4 +157,17 @@ impl Field { } protos } + + /// Return all field ids referenced in the field. + pub fn get_all_field_ids(&self) -> HashSet { + let mut fields = HashSet::new(); + fields.insert(self.descriptor.clone()); + if let Some(value) = &self.value { + fields.extend(value.get_all_field_ids()); + } + for annot in &self.annotations { + fields.extend(annot.get_all_field_ids()); + } + fields + } } diff --git a/androscalpel/src/method.rs b/androscalpel/src/method.rs index 4ce3f4f..f40a88a 100644 --- a/androscalpel/src/method.rs +++ b/androscalpel/src/method.rs @@ -4,7 +4,7 @@ use std::collections::HashSet; use pyo3::prelude::*; -use crate::{Code, DexAnnotationItem, DexString, IdMethod, IdMethodType, IdType}; +use crate::{Code, DexAnnotationItem, DexString, IdField, IdMethod, IdMethodType, IdType}; /// Represent a method. #[pyclass] @@ -160,4 +160,21 @@ impl Method { } protos } + + /// Return all field ids referenced in the method. + pub fn get_all_field_ids(&self) -> HashSet { + let mut fields = HashSet::new(); + for annot in &self.annotations { + fields.extend(annot.get_all_field_ids()); + } + for param_annots in &self.parameters_annotations { + for annot in param_annots { + fields.extend(annot.get_all_field_ids()); + } + } + if let Some(code) = &self.code { + fields.extend(code.get_all_field_ids()); + } + fields + } } diff --git a/androscalpel/src/method_handle.rs b/androscalpel/src/method_handle.rs index 74e81a1..0df7358 100644 --- a/androscalpel/src/method_handle.rs +++ b/androscalpel/src/method_handle.rs @@ -59,6 +59,13 @@ impl StaticPut { pub fn get_all_protos(&self) -> HashSet { HashSet::new() } + + /// Return all field ids referenced in the handle. + pub fn get_all_field_ids(&self) -> HashSet { + let mut fields = HashSet::new(); + fields.insert(self.0.clone()); + fields + } } #[pyclass] #[derive(Debug, Clone, PartialEq, Eq)] @@ -97,6 +104,13 @@ impl StaticGet { pub fn get_all_protos(&self) -> HashSet { HashSet::new() } + + /// Return all field ids referenced in the handle. + pub fn get_all_field_ids(&self) -> HashSet { + let mut fields = HashSet::new(); + fields.insert(self.0.clone()); + fields + } } #[pyclass] #[derive(Debug, Clone, PartialEq, Eq)] @@ -135,6 +149,13 @@ impl InstancePut { pub fn get_all_protos(&self) -> HashSet { HashSet::new() } + + /// Return all field ids referenced in the handle. + pub fn get_all_field_ids(&self) -> HashSet { + let mut fields = HashSet::new(); + fields.insert(self.0.clone()); + fields + } } #[pyclass] #[derive(Debug, Clone, PartialEq, Eq)] @@ -173,6 +194,13 @@ impl InstanceGet { pub fn get_all_protos(&self) -> HashSet { HashSet::new() } + + /// Return all field ids referenced in the handle. + pub fn get_all_field_ids(&self) -> HashSet { + let mut fields = HashSet::new(); + fields.insert(self.0.clone()); + fields + } } #[pyclass] @@ -212,6 +240,11 @@ impl InvokeStatic { pub fn get_all_protos(&self) -> HashSet { self.0.get_all_protos() } + + /// Return all field ids referenced in the handle. + pub fn get_all_field_ids(&self) -> HashSet { + HashSet::new() + } } #[pyclass] @@ -251,6 +284,11 @@ impl InvokeInstance { pub fn get_all_protos(&self) -> HashSet { self.0.get_all_protos() } + + /// Return all field ids referenced in the handle. + pub fn get_all_field_ids(&self) -> HashSet { + HashSet::new() + } } #[pyclass] @@ -290,6 +328,11 @@ impl InvokeConstructor { pub fn get_all_protos(&self) -> HashSet { self.0.get_all_protos() } + + /// Return all field ids referenced in the handle. + pub fn get_all_field_ids(&self) -> HashSet { + HashSet::new() + } } #[pyclass] @@ -329,6 +372,11 @@ impl InvokeDirect { pub fn get_all_protos(&self) -> HashSet { self.0.get_all_protos() } + + /// Return all field ids referenced in the handle. + pub fn get_all_field_ids(&self) -> HashSet { + HashSet::new() + } } #[pyclass] @@ -368,6 +416,11 @@ impl InvokeInterface { pub fn get_all_protos(&self) -> HashSet { self.0.get_all_protos() } + + /// Return all field ids referenced in the handle. + pub fn get_all_field_ids(&self) -> HashSet { + HashSet::new() + } } impl<'source> FromPyObject<'source> for MethodHandle { @@ -475,4 +528,19 @@ impl MethodHandle { Self::InvokeInterface(val) => val.get_all_protos(), } } + + /// Return all field ids referenced in the handle. + pub fn get_all_field_ids(&self) -> HashSet { + match self { + Self::StaticPut(val) => val.get_all_field_ids(), + Self::StaticGet(val) => val.get_all_field_ids(), + Self::InstancePut(val) => val.get_all_field_ids(), + Self::InstanceGet(val) => val.get_all_field_ids(), + Self::InvokeStatic(val) => val.get_all_field_ids(), + Self::InvokeInstance(val) => val.get_all_field_ids(), + Self::InvokeConstructor(val) => val.get_all_field_ids(), + Self::InvokeDirect(val) => val.get_all_field_ids(), + Self::InvokeInterface(val) => val.get_all_field_ids(), + } + } } diff --git a/androscalpel/src/scalar.rs b/androscalpel/src/scalar.rs index 883be3d..8dc3ef5 100644 --- a/androscalpel/src/scalar.rs +++ b/androscalpel/src/scalar.rs @@ -2,7 +2,7 @@ use std::collections::HashSet; -use crate::{DexString, DexValue, IdMethodType, IdType}; +use crate::{DexString, DexValue, IdField, IdMethodType, IdType}; use pyo3::prelude::*; #[pyclass] @@ -288,4 +288,13 @@ impl DexArray { } protos } + + /// Return all field ids referenced in the value. + pub fn get_all_field_ids(&self) -> HashSet { + let mut fields = HashSet::new(); + for val in &self.0 { + fields.extend(val.get_all_field_ids()); + } + fields + } } diff --git a/androscalpel/src/value.rs b/androscalpel/src/value.rs index a279975..d9ae007 100644 --- a/androscalpel/src/value.rs +++ b/androscalpel/src/value.rs @@ -172,6 +172,30 @@ impl DexValue { DexValue::Boolean(_) => HashSet::new(), } } + + /// Return all field ids referenced in the value. + pub fn get_all_field_ids(&self) -> HashSet { + match self { + DexValue::Byte(_) => HashSet::new(), + DexValue::Short(_) => HashSet::new(), + DexValue::Char(_) => HashSet::new(), + DexValue::Int(_) => HashSet::new(), + DexValue::Long(_) => HashSet::new(), + DexValue::Float(_) => HashSet::new(), + DexValue::Double(_) => HashSet::new(), + DexValue::MethodType(_) => HashSet::new(), + DexValue::MethodHandle(val) => val.get_all_field_ids(), + DexValue::String(_) => HashSet::new(), + DexValue::Type(_) => HashSet::new(), + DexValue::Field(val) => val.get_all_field_ids(), + DexValue::Method(_) => HashSet::new(), + DexValue::Enum(_) => HashSet::new(), + DexValue::Array(val) => val.get_all_field_ids(), + DexValue::Annotation(val) => val.get_all_field_ids(), + DexValue::Null(_) => HashSet::new(), + DexValue::Boolean(_) => HashSet::new(), + } + } } impl IntoPy for DexValue {