From 57d6b38746e403ca9e69b4e241ef1a57d31f6ae1 Mon Sep 17 00:00:00 2001 From: Jean-Marie Mineau Date: Fri, 1 Dec 2023 18:08:30 +0100 Subject: [PATCH] add type lists to generated dex files --- androscalpel/src/annotation.rs | 16 ++- androscalpel/src/class.rs | 27 +++- androscalpel/src/code.rs | 8 +- androscalpel/src/dex_id.rs | 31 ++++- androscalpel/src/dex_writer.rs | 146 ++++++++++++++++++--- androscalpel/src/field.rs | 14 +- androscalpel/src/method.rs | 20 ++- androscalpel/src/method_handle.rs | 60 +++++++++ androscalpel/src/scalar.rs | 11 +- androscalpel/src/value.rs | 24 ++++ androscalpel_serializer/src/items/class.rs | 6 +- 11 files changed, 338 insertions(+), 25 deletions(-) diff --git a/androscalpel/src/annotation.rs b/androscalpel/src/annotation.rs index 15f3e59..7160181 100644 --- a/androscalpel/src/annotation.rs +++ b/androscalpel/src/annotation.rs @@ -3,7 +3,7 @@ use std::collections::{HashMap, HashSet}; use pyo3::prelude::*; -use crate::{dex_id::IdType, value::DexValue, DexString}; +use crate::{dex_id::IdType, value::DexValue, DexString, IdMethodType}; /// Annotation with a visibility #[pyclass] @@ -71,6 +71,11 @@ impl DexAnnotationItem { pub fn get_all_types(&self) -> HashSet { self.annotation.get_all_types() } + + /// Return all prototypes referenced in the annotation. + pub fn get_all_protos(&self) -> HashSet { + self.annotation.get_all_protos() + } } /// An annotation. @@ -128,4 +133,13 @@ impl DexAnnotation { } types } + + /// Return all prototypes referenced in the annotation. + pub fn get_all_protos(&self) -> HashSet { + let mut protos = HashSet::new(); + for value in self.elements.values() { + protos.extend(value.get_all_protos()); + } + protos + } } diff --git a/androscalpel/src/class.rs b/androscalpel/src/class.rs index 8f870c2..6ef5221 100644 --- a/androscalpel/src/class.rs +++ b/androscalpel/src/class.rs @@ -4,7 +4,9 @@ use std::collections::{HashMap, HashSet}; use pyo3::prelude::*; -use crate::{DexAnnotationItem, DexString, Field, IdField, IdMethod, IdType, Method, Result}; +use crate::{ + DexAnnotationItem, DexString, Field, IdField, IdMethod, IdMethodType, IdType, Method, Result, +}; /// Represent an apk #[pyclass] @@ -190,4 +192,27 @@ impl Class { } types } + + /// Return all protoypes referenced in the class. + pub fn get_all_protos(&self) -> HashSet { + let mut protos = HashSet::new(); + for field in self.static_fields.values() { + protos.extend(field.get_all_protos()); + } + for field in self.instance_fields.values() { + protos.extend(field.get_all_protos()); + } + for (id, method) in &self.direct_methods { + protos.extend(id.get_all_protos()); + protos.extend(method.get_all_protos()); + } + for (id, method) in &self.virtual_methods { + protos.extend(id.get_all_protos()); + protos.extend(method.get_all_protos()); + } + for annot in &self.annotations { + protos.extend(annot.get_all_protos()); + } + protos + } } diff --git a/androscalpel/src/code.rs b/androscalpel/src/code.rs index 1582e6e..07649a1 100644 --- a/androscalpel/src/code.rs +++ b/androscalpel/src/code.rs @@ -4,7 +4,7 @@ use std::collections::HashSet; use pyo3::prelude::*; -use crate::{DexString, IdType}; +use crate::{DexString, IdMethodType, IdType}; // TODO: make this easy to edit/manipulate, maybe move to Method @@ -84,4 +84,10 @@ impl Code { } types } + + /// Return all prototypes referenced in the codes. + pub fn get_all_protos(&self) -> HashSet { + // TODO + HashSet::new() + } } diff --git a/androscalpel/src/dex_id.rs b/androscalpel/src/dex_id.rs index bd3b076..c1d5f66 100644 --- a/androscalpel/src/dex_id.rs +++ b/androscalpel/src/dex_id.rs @@ -1,6 +1,6 @@ //! The class identifying dex structure. -use std::cmp::{Ord, PartialOrd}; +use std::cmp::{Ord, Ordering, PartialOrd}; use std::collections::hash_map::DefaultHasher; use std::collections::HashSet; use std::hash::{Hash, Hasher}; @@ -21,6 +21,21 @@ pub struct IdMethodType { pub(crate) parameters: Vec, } +impl Ord for IdMethodType { + fn cmp(&self, other: &Self) -> Ordering { + self.return_type + .cmp(&other.return_type) + .then(self.parameters.cmp(&other.parameters)) + .then(self.shorty.cmp(&other.shorty)) //should not have an influence if generated correctly + } +} + +impl PartialOrd for IdMethodType { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + #[pymethods] /// The type of a method. The shorty is formated as described in /// @@ -90,6 +105,13 @@ impl IdMethodType { types.extend(self.parameters.clone()); types } + + /// Return all prototypes referenced in the Id. + pub fn get_all_protos(&self) -> HashSet { + let mut protos = HashSet::new(); + protos.insert(self.clone()); + protos + } } impl IdMethodType { @@ -523,6 +545,13 @@ impl IdMethod { types.insert(self.class_.clone()); types } + + /// Return all prototypes referenced in the Id. + pub fn get_all_protos(&self) -> HashSet { + let mut protos = HashSet::new(); + protos.insert(self.proto.clone()); + protos + } } #[pyclass] diff --git a/androscalpel/src/dex_writer.rs b/androscalpel/src/dex_writer.rs index 2514947..f9840e3 100644 --- a/androscalpel/src/dex_writer.rs +++ b/androscalpel/src/dex_writer.rs @@ -3,7 +3,7 @@ use std::collections::HashMap; use std::io::{Cursor, Write}; -//use anyhow::bail; +use anyhow::{anyhow, Context}; use crate::Result; use crate::*; @@ -14,7 +14,7 @@ pub struct DexWriter { header: HeaderItem, strings: HashMap, type_ids: HashMap, - _proto_ids: HashMap, + proto_ids: HashMap, _field_ids: HashMap, _method_ids: HashMap, // TODO: composite classes need a struct for storing link data @@ -60,7 +60,7 @@ impl Default for DexWriter { }, strings: HashMap::new(), type_ids: HashMap::new(), - _proto_ids: HashMap::new(), + proto_ids: HashMap::new(), _field_ids: HashMap::new(), _method_ids: HashMap::new(), } @@ -88,18 +88,24 @@ impl DexWriter { * and len(type) <= len(string) anyway let mut future_type_set = new_types.clone(); future_type_set.extend(self.type_ids.keys().cloned()); + // TODO: they are ref to type as u16, check if future_type_set.len() >= u32::MAX as usize { // TODO return structured error to handle this case by generating multiple dex files bail!("To many types for one dex file"); } */ + let new_protos = class.get_all_protos(); + for string in new_strings { self.strings.insert(string, 0); } for ty in new_types { self.type_ids.insert(ty, 0); } + for proto in new_protos { + self.proto_ids.insert(proto, 0); + } Ok(()) } @@ -135,11 +141,73 @@ impl DexWriter { self.type_ids.entry(ty.clone()).and_modify(|val| *val = idx); section_manager.add_elt(Section::TypeIdItem, None); } - let type_ids_list: Vec = (0..type_ids_list.len()) - .map(|idx| TypeIdItem { - descriptor_idx: idx as u32, - }) - .collect(); + let type_ids_list: Vec = { + let mut type_ids_list_aux = vec![]; + for ty in type_ids_list.into_iter() { + type_ids_list_aux.push(TypeIdItem { + descriptor_idx: *self.strings.get(&ty.0).ok_or(anyhow!( + "String {} (name of type {}) not found in dex builder", + ty.0.__repr__(), + ty.__repr__() + ))? as u32, + }); + } + type_ids_list_aux + }; + + // Sort prototype and generate ProtoIdItem + let mut proto_ids_list: Vec = self.proto_ids.keys().cloned().collect(); + proto_ids_list.sort(); + for (idx, proto) in proto_ids_list.iter().enumerate() { + self.proto_ids + .entry(proto.clone()) + .and_modify(|val| *val = idx); + section_manager.add_elt(Section::ProtoIdItem, None); + } + let mut proto_ids_list = { + let mut proto_ids_list_aux = vec![]; + for proto in proto_ids_list { + proto_ids_list_aux.push(ProtoIdItem { + shorty_idx: *self.strings.get(&proto.shorty).ok_or(anyhow!( + "String {}, (shorty of prototype {}) not found in dex builder", + proto.shorty.__repr__(), + proto.__repr__() + ))? as u32, + return_type_idx: *self.type_ids.get(&proto.return_type).ok_or(anyhow!( + "Type {}, (return type of prototype {}) not found in dex builder", + proto.shorty.__repr__(), + proto.__repr__() + ))? as u32, + parameters_off: 0, // TO BE LINKED LATTER + }); + } + proto_ids_list_aux + }; + + // Generate TypeLists + let mut type_lists_index = HashMap::new(); + for proto in self.proto_ids.keys() { + if !proto.parameters.is_empty() { + let type_list = self.gen_type_list(&proto.parameters).with_context(|| { + format!("Failed to generate param list for {}", proto.__repr__()) + })?; + type_lists_index.insert(type_list, 0); + } + } + // TODO add class.interface in type list + let mut offset = 0; + let mut type_lists_and_local_offsets = vec![]; + for (i, (list, idx)) in type_lists_index.iter_mut().enumerate() { + while offset % 4 != 0 { + // Alignment + section_manager.incr_section_size(Section::TypeList, 1); + offset += 1; + } + *idx = i; + type_lists_and_local_offsets.push((list.clone(), offset)); + section_manager.add_elt(Section::TypeList, Some(list.size())); + offset += list.size(); + } // Populate map_list let map_item_size = MapItem { @@ -159,6 +227,13 @@ impl DexWriter { let mut map_list = MapList::default(); for section in Section::VARIANT_LIST { if !section.is_data() && section_manager.get_nb_elt(*section) != 0 { + /* + match section { + // Alignment + // Until Section::MapList included, the section are naturally alligned to 4 + _ => (), + } + */ map_list.list.push(MapItem { type_: section.get_map_item_type(), unused: 0, @@ -170,21 +245,34 @@ impl DexWriter { // Link Header section: self.header.map_off = section_manager.get_offset(Section::MapList); - self.header.string_ids_size = string_ids_list.len() as u32; + self.header.string_ids_size = section_manager.get_nb_elt(Section::StringIdItem) as u32; self.header.string_ids_off = section_manager.get_offset(Section::StringIdItem); - self.header.type_ids_size = 0; // TODO + self.header.type_ids_size = section_manager.get_nb_elt(Section::TypeIdItem) as u32; self.header.type_ids_off = section_manager.get_offset(Section::TypeIdItem); - self.header.proto_ids_size = 0; // TODO + self.header.proto_ids_size = section_manager.get_nb_elt(Section::ProtoIdItem) as u32; self.header.proto_ids_off = section_manager.get_offset(Section::ProtoIdItem); - self.header.field_ids_size = 0; // TODO + self.header.field_ids_size = section_manager.get_nb_elt(Section::FieldIdItem) as u32; self.header.field_ids_off = section_manager.get_offset(Section::FieldIdItem); - self.header.method_ids_size = 0; // TODO + self.header.method_ids_size = section_manager.get_nb_elt(Section::MethodIdItem) as u32; self.header.method_ids_off = section_manager.get_offset(Section::MethodIdItem); - self.header.class_defs_size = 0; // TODO + self.header.class_defs_size = section_manager.get_nb_elt(Section::ClassDefItem) as u32; self.header.class_defs_off = section_manager.get_offset(Section::ClassDefItem); self.header.data_size = section_manager.get_size(Section::Data); self.header.data_off = section_manager.get_offset(Section::Data); + // TODO: link TypeList in ProtoIdItems and ClassDefItems + for (proto, idx) in &self.proto_ids { + if !proto.parameters.is_empty() { + let type_list = self.gen_type_list(&proto.parameters).with_context(|| { + format!("Failed to generate param list for {}", proto.__repr__()) + })?; + let offset = section_manager.get_offset(Section::TypeList) + + type_lists_and_local_offsets[*type_lists_index.get(&type_list).unwrap()].1 + as u32; + proto_ids_list[*idx].parameters_off = offset; + } + } + // TODO: compute checksum, hash, ect self.header.serialize(writer)?; // StringIdItem section @@ -196,10 +284,14 @@ impl DexWriter { str_id.serialize(writer)?; string_off += string.size() as u32; } + // TypeId section for ty in type_ids_list { ty.serialize(writer)?; } - // TODO: ProtoIdItem, + // ProtoId section + for proto in proto_ids_list { + proto.serialize(writer)?; + } // TODO: FieldIdItem, // TODO: MethodIdItem, // TODO: ClassDefItem, @@ -208,7 +300,16 @@ impl DexWriter { // TODO: Data, // MapList, map_list.serialize(writer)?; - // TODO: TypeList, + // TypeList, + let mut offset = 0; + for (list, _) in type_lists_and_local_offsets { + while offset % 4 != 0 { + offset += 1; + 0u8.serialize(writer)?; + } + offset += list.size(); + list.serialize(writer)?; + } // TODO: AnnotationSetRefList, // TODO: AnnotationSetItem, // TODO: ClassDataItem, @@ -225,6 +326,19 @@ impl DexWriter { Ok(()) } + + fn gen_type_list(&self, list: &[IdType]) -> Result { + let mut type_list = TypeList { list: vec![] }; + for ty in list { + type_list.list.push(TypeItem { + type_idx: *self.type_ids.get(ty).ok_or(anyhow!( + "Could not found type {} in dex builder", + ty.__repr__() + ))? as u16, + }); + } + Ok(type_list) + } } #[derive(Debug, Clone, Copy, PartialEq, Eq)] diff --git a/androscalpel/src/field.rs b/androscalpel/src/field.rs index 2236103..8203e34 100644 --- a/androscalpel/src/field.rs +++ b/androscalpel/src/field.rs @@ -4,7 +4,7 @@ use std::collections::HashSet; use pyo3::prelude::*; -use crate::{DexAnnotationItem, DexString, DexValue, IdField, IdType}; +use crate::{DexAnnotationItem, DexString, DexValue, IdField, IdMethodType, IdType}; /// Represent a field. #[pyclass] @@ -145,4 +145,16 @@ impl Field { } types } + + /// Return all prototypes referenced in the field. + pub fn get_all_protos(&self) -> HashSet { + let mut protos = HashSet::new(); + if let Some(value) = &self.value { + protos.extend(value.get_all_protos()); + } + for annot in &self.annotations { + protos.extend(annot.get_all_protos()); + } + protos + } } diff --git a/androscalpel/src/method.rs b/androscalpel/src/method.rs index 4f2da4d..4ce3f4f 100644 --- a/androscalpel/src/method.rs +++ b/androscalpel/src/method.rs @@ -4,7 +4,7 @@ use std::collections::HashSet; use pyo3::prelude::*; -use crate::{Code, DexAnnotationItem, DexString, IdMethod, IdType}; +use crate::{Code, DexAnnotationItem, DexString, IdMethod, IdMethodType, IdType}; /// Represent a method. #[pyclass] @@ -142,4 +142,22 @@ impl Method { types } + + /// Return all prototypes referenced in the method. + pub fn get_all_protos(&self) -> HashSet { + let mut protos = HashSet::new(); + protos.extend(self.descriptor.get_all_protos()); + for annot in &self.annotations { + protos.extend(annot.get_all_protos()); + } + for param_annots in &self.parameters_annotations { + for annot in param_annots { + protos.extend(annot.get_all_protos()); + } + } + if let Some(code) = &self.code { + protos.extend(code.get_all_protos()); + } + protos + } } diff --git a/androscalpel/src/method_handle.rs b/androscalpel/src/method_handle.rs index a292dd9..74e81a1 100644 --- a/androscalpel/src/method_handle.rs +++ b/androscalpel/src/method_handle.rs @@ -54,6 +54,11 @@ impl StaticPut { pub fn get_all_types(&self) -> HashSet { self.0.get_all_types() } + + /// Return all prototypes referenced in the handle. + pub fn get_all_protos(&self) -> HashSet { + HashSet::new() + } } #[pyclass] #[derive(Debug, Clone, PartialEq, Eq)] @@ -87,6 +92,11 @@ impl StaticGet { pub fn get_all_types(&self) -> HashSet { self.0.get_all_types() } + + /// Return all prototypes referenced in the handle. + pub fn get_all_protos(&self) -> HashSet { + HashSet::new() + } } #[pyclass] #[derive(Debug, Clone, PartialEq, Eq)] @@ -120,6 +130,11 @@ impl InstancePut { pub fn get_all_types(&self) -> HashSet { self.0.get_all_types() } + + /// Return all prototypes referenced in the handle. + pub fn get_all_protos(&self) -> HashSet { + HashSet::new() + } } #[pyclass] #[derive(Debug, Clone, PartialEq, Eq)] @@ -153,6 +168,11 @@ impl InstanceGet { pub fn get_all_types(&self) -> HashSet { self.0.get_all_types() } + + /// Return all prototypes referenced in the handle. + pub fn get_all_protos(&self) -> HashSet { + HashSet::new() + } } #[pyclass] @@ -187,6 +207,11 @@ impl InvokeStatic { pub fn get_all_types(&self) -> HashSet { self.0.get_all_types() } + + /// Return all prototypes referenced in the handle. + pub fn get_all_protos(&self) -> HashSet { + self.0.get_all_protos() + } } #[pyclass] @@ -221,6 +246,11 @@ impl InvokeInstance { pub fn get_all_types(&self) -> HashSet { self.0.get_all_types() } + + /// Return all prototypes referenced in the handle. + pub fn get_all_protos(&self) -> HashSet { + self.0.get_all_protos() + } } #[pyclass] @@ -255,6 +285,11 @@ impl InvokeConstructor { pub fn get_all_types(&self) -> HashSet { self.0.get_all_types() } + + /// Return all prototypes referenced in the handle. + pub fn get_all_protos(&self) -> HashSet { + self.0.get_all_protos() + } } #[pyclass] @@ -289,6 +324,11 @@ impl InvokeDirect { pub fn get_all_types(&self) -> HashSet { self.0.get_all_types() } + + /// Return all prototypes referenced in the handle. + pub fn get_all_protos(&self) -> HashSet { + self.0.get_all_protos() + } } #[pyclass] @@ -323,6 +363,11 @@ impl InvokeInterface { pub fn get_all_types(&self) -> HashSet { self.0.get_all_types() } + + /// Return all prototypes referenced in the handle. + pub fn get_all_protos(&self) -> HashSet { + self.0.get_all_protos() + } } impl<'source> FromPyObject<'source> for MethodHandle { @@ -415,4 +460,19 @@ impl MethodHandle { Self::InvokeInterface(val) => val.get_all_types(), } } + + /// Return all prototypes referenced in the handle. + pub fn get_all_protos(&self) -> HashSet { + match self { + Self::StaticPut(val) => val.get_all_protos(), + Self::StaticGet(val) => val.get_all_protos(), + Self::InstancePut(val) => val.get_all_protos(), + Self::InstanceGet(val) => val.get_all_protos(), + Self::InvokeStatic(val) => val.get_all_protos(), + Self::InvokeInstance(val) => val.get_all_protos(), + Self::InvokeConstructor(val) => val.get_all_protos(), + Self::InvokeDirect(val) => val.get_all_protos(), + Self::InvokeInterface(val) => val.get_all_protos(), + } + } } diff --git a/androscalpel/src/scalar.rs b/androscalpel/src/scalar.rs index 8ed7295..883be3d 100644 --- a/androscalpel/src/scalar.rs +++ b/androscalpel/src/scalar.rs @@ -2,7 +2,7 @@ use std::collections::HashSet; -use crate::{DexString, DexValue, IdType}; +use crate::{DexString, DexValue, IdMethodType, IdType}; use pyo3::prelude::*; #[pyclass] @@ -279,4 +279,13 @@ impl DexArray { } types } + + /// Return all prototypes referenced in the value. + pub fn get_all_protos(&self) -> HashSet { + let mut protos = HashSet::new(); + for val in &self.0 { + protos.extend(val.get_all_protos()); + } + protos + } } diff --git a/androscalpel/src/value.rs b/androscalpel/src/value.rs index 0b33046..a279975 100644 --- a/androscalpel/src/value.rs +++ b/androscalpel/src/value.rs @@ -148,6 +148,30 @@ impl DexValue { DexValue::Boolean(_) => HashSet::new(), } } + + /// Return all prototypes referenced in the value. + pub fn get_all_protos(&self) -> HashSet { + match self { + DexValue::Byte(_) => HashSet::new(), + DexValue::Short(_) => HashSet::new(), + DexValue::Char(_) => HashSet::new(), + DexValue::Int(_) => HashSet::new(), + DexValue::Long(_) => HashSet::new(), + DexValue::Float(_) => HashSet::new(), + DexValue::Double(_) => HashSet::new(), + DexValue::MethodType(val) => val.get_all_protos(), + DexValue::MethodHandle(val) => val.get_all_protos(), + DexValue::String(_) => HashSet::new(), + DexValue::Type(_) => HashSet::new(), + DexValue::Field(_) => HashSet::new(), + DexValue::Method(val) => val.get_all_protos(), + DexValue::Enum(_) => HashSet::new(), + DexValue::Array(val) => val.get_all_protos(), + DexValue::Annotation(val) => val.get_all_protos(), + DexValue::Null(_) => HashSet::new(), + DexValue::Boolean(_) => HashSet::new(), + } + } } impl IntoPy for DexValue { diff --git a/androscalpel_serializer/src/items/class.rs b/androscalpel_serializer/src/items/class.rs index 981a13d..c7a932a 100644 --- a/androscalpel_serializer/src/items/class.rs +++ b/androscalpel_serializer/src/items/class.rs @@ -1,5 +1,7 @@ //! Class definitions. +use std::hash::Hash; + use crate as androscalpel_serializer; use crate::{ReadSeek, Result, Serializable, Uleb128}; use std::io::Write; @@ -207,7 +209,7 @@ pub struct EncodedMethod { /// /// alignment: 4 bytes -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct TypeList { // pub size: u32, pub list: Vec, @@ -246,7 +248,7 @@ impl Serializable for TypeList { } /// -#[derive(Serializable, Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Serializable, Debug, Clone, Copy, PartialEq, Eq, Hash)] pub struct TypeItem { /// Index of a [`crate::TypeIdItem`] in the `type_ids` list. pub type_idx: u16,