diff --git a/androscalpel/src/annotation.rs b/androscalpel/src/annotation.rs index c605cfe..12a0c66 100644 --- a/androscalpel/src/annotation.rs +++ b/androscalpel/src/annotation.rs @@ -3,7 +3,7 @@ use std::collections::{HashMap, HashSet}; use pyo3::prelude::*; -use crate::{dex_id::IdType, value::DexValue, DexString, IdField, IdMethodType}; +use crate::{dex_id::IdType, value::DexValue, DexString, IdField, IdMethod, IdMethodType}; /// Annotation with a visibility #[pyclass] @@ -81,6 +81,11 @@ impl DexAnnotationItem { pub fn get_all_field_ids(&self) -> HashSet { self.annotation.get_all_field_ids() } + + /// Return all method ids referenced in the annotation. + pub fn get_all_method_ids(&self) -> HashSet { + self.annotation.get_all_method_ids() + } } /// An annotation. @@ -156,4 +161,13 @@ impl DexAnnotation { } fields } + + /// Return all method ids referenced in the annotation. + pub fn get_all_method_ids(&self) -> HashSet { + let mut methods = HashSet::new(); + for value in self.elements.values() { + methods.extend(value.get_all_method_ids()); + } + methods + } } diff --git a/androscalpel/src/class.rs b/androscalpel/src/class.rs index 284a499..a2ed1c5 100644 --- a/androscalpel/src/class.rs +++ b/androscalpel/src/class.rs @@ -216,8 +216,8 @@ impl Class { protos } - /// Return all fields referenced in the class. - /// This **not** the concatenation of the static and instances fields variable: + /// Return all fields id referenced in the class. + /// This **not** the concatenation of the static and instances fields attributes: /// this also contains reference to fields in other classes used by methods/values /// in this class. pub fn get_all_field_ids(&self) -> HashSet { @@ -241,4 +241,30 @@ impl Class { } fields } + + /// Return all methods id referenced in the class. + /// This **not** the concatenation of the direct and virtual method attributs: + /// this also contains reference to method in other classes used by methods/values + /// in this class. + pub fn get_all_method_ids(&self) -> HashSet { + let mut methods = HashSet::new(); + for field in self.static_fields.values() { + methods.extend(field.get_all_method_ids()); + } + for field in self.instance_fields.values() { + methods.extend(field.get_all_method_ids()); + } + for (id, method) in &self.direct_methods { + methods.insert(id.clone()); + methods.extend(method.get_all_method_ids()); + } + for (id, method) in &self.virtual_methods { + methods.insert(id.clone()); + methods.extend(method.get_all_method_ids()); + } + for annot in &self.annotations { + methods.extend(annot.get_all_method_ids()); + } + methods + } } diff --git a/androscalpel/src/code.rs b/androscalpel/src/code.rs index d8aec0d..82c2a71 100644 --- a/androscalpel/src/code.rs +++ b/androscalpel/src/code.rs @@ -4,7 +4,7 @@ use std::collections::HashSet; use pyo3::prelude::*; -use crate::{DexString, IdField, IdMethodType, IdType}; +use crate::{DexString, IdField, IdMethod, IdMethodType, IdType}; // TODO: make this easy to edit/manipulate, maybe move to Method @@ -96,4 +96,10 @@ impl Code { // TODO HashSet::new() } + + /// Return all method ids referenced in the codes. + pub fn get_all_method_ids(&self) -> HashSet { + // TODO + HashSet::new() + } } diff --git a/androscalpel/src/dex_id.rs b/androscalpel/src/dex_id.rs index 17a3451..efcb30b 100644 --- a/androscalpel/src/dex_id.rs +++ b/androscalpel/src/dex_id.rs @@ -574,6 +574,28 @@ impl IdMethod { protos.insert(self.proto.clone()); protos } + + /// Return all method ids referenced in the Id. + pub fn get_all_method_ids(&self) -> HashSet { + let mut method_ids = HashSet::new(); + method_ids.insert(self.clone()); + method_ids + } +} + +impl Ord for IdMethod { + fn cmp(&self, other: &Self) -> Ordering { + self.class_ + .cmp(&other.class_) + .then(self.name.cmp(&other.name)) + .then(self.proto.cmp(&other.proto)) + } +} + +impl PartialOrd for IdMethod { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } } #[pyclass] @@ -607,4 +629,9 @@ impl IdEnum { pub fn get_all_types(&self) -> HashSet { self.0.get_all_types() } + + /// Return all field ids referenced in the Id. + pub fn get_all_field_ids(&self) -> HashSet { + self.0.get_all_field_ids() + } } diff --git a/androscalpel/src/dex_writer.rs b/androscalpel/src/dex_writer.rs index 046784a..8feee60 100644 --- a/androscalpel/src/dex_writer.rs +++ b/androscalpel/src/dex_writer.rs @@ -3,6 +3,8 @@ use std::collections::HashMap; use std::io::{Cursor, Write}; +use log::debug; + use anyhow::{anyhow, bail, Context}; use crate::Result; @@ -16,7 +18,7 @@ pub struct DexWriter { type_ids: HashMap, proto_ids: HashMap, field_ids: HashMap, - _method_ids: HashMap, + method_ids: HashMap, // TODO: composite classes need a struct for storing link data // class_defs: HashMap, // call_site_ids: // TODO: parsing code insns @@ -62,7 +64,7 @@ impl Default for DexWriter { type_ids: HashMap::new(), proto_ids: HashMap::new(), field_ids: HashMap::new(), - _method_ids: HashMap::new(), + method_ids: HashMap::new(), } } } @@ -73,38 +75,37 @@ impl DexWriter { } pub fn add_class(&mut self, class: &Class) -> Result<()> { + debug!("Adding class {} to dex builder", class.descriptor.__str__()); let new_strings = class.get_all_strings(); - /* this means more strings than bytes in the file, prbl no gonna append first - let mut futur_string_set = new_strings.clone(); - futur_string_set.extend(self.strings.keys().cloned()); - if futur_string_set.len() >= u32::MAX as usize { - // TODO return structured error to handle this case by generating multiple dex files - bail!("To many strings for one dex file"); - } - */ let new_types = class.get_all_types(); - /* this means more types than bytes in the file, prbl no gonna append - * and len(type) <= len(string) anyway - let mut futur_type_set = new_types.clone(); - futur_type_set.extend(self.type_ids.keys().cloned()); - // TODO: they are ref to type as u16, so checks? - if futur_type_set.len() >= u32::MAX as usize { + let new_nb_types = self.type_ids.len() + + new_types + .iter() + .filter(|ty| self.type_ids.get(ty).is_none()) + .count(); + if new_nb_types >= u16::MAX as usize { + // type_ids are not always u16, so this may not be a hard limit, + // but it's easier to enforce it to avoid strange bugs. // TODO return structured error to handle this case by generating multiple dex files bail!("To many types for one dex file"); } - */ let new_protos = class.get_all_protos(); - let mut futur_proto_set = new_protos.clone(); - futur_proto_set.extend(self.proto_ids.keys().cloned()); - if futur_proto_set.len() >= u16::MAX as usize { + let new_nb_protos = self.proto_ids.len() + + new_protos + .iter() + .filter(|proto| self.proto_ids.get(proto).is_none()) + .count(); + if new_nb_protos >= u16::MAX as usize { // TODO return structured error to handle this case by generating multiple dex files bail!("To many prototypes for one dex file"); } let new_field_ids = class.get_all_field_ids(); + let new_method_ids = class.get_all_method_ids(); + for string in new_strings { self.strings.insert(string, 0); } @@ -117,6 +118,9 @@ impl DexWriter { for field in new_field_ids { self.field_ids.insert(field, 0); } + for method in new_method_ids { + self.method_ids.insert(method, 0); + } Ok(()) } @@ -130,9 +134,11 @@ impl DexWriter { let mut section_manager = SectionManager::default(); section_manager.incr_section_size(Section::HeaderItem, 0x70); - // Sort Strings and generate StringDataItem + debug!("Sort string and generate string_data_item and string_ids sections"); let mut string_ids_list: Vec = self.strings.keys().cloned().collect(); + debug!("start sort"); string_ids_list.sort(); + debug!("end sort"); for (idx, string) in string_ids_list.iter().enumerate() { self.strings .entry(string.clone()) @@ -145,9 +151,11 @@ impl DexWriter { .map(|string| string.into()) .collect(); - // Sort types and generate TypeIdItem + debug!("Sort types and generate type_id_item section"); let mut type_ids_list: Vec = self.type_ids.keys().cloned().collect(); + debug!("start sort"); type_ids_list.sort(); + debug!("end sort"); for (idx, ty) in type_ids_list.iter().enumerate() { self.type_ids.entry(ty.clone()).and_modify(|val| *val = idx); section_manager.add_elt(Section::TypeIdItem, None); @@ -166,9 +174,11 @@ impl DexWriter { type_ids_list_aux }; - // Sort prototype and generate ProtoIdItem + debug!("Sort prototypes and generate proto_id_item section"); let mut proto_ids_list: Vec = self.proto_ids.keys().cloned().collect(); + debug!("start sort"); proto_ids_list.sort(); + debug!("end sort"); for (idx, proto) in proto_ids_list.iter().enumerate() { self.proto_ids .entry(proto.clone()) @@ -195,7 +205,7 @@ impl DexWriter { proto_ids_list_aux }; - // Generate TypeLists + debug!("Generate the type_list section"); let mut type_lists_index = HashMap::new(); for proto in self.proto_ids.keys() { if !proto.parameters.is_empty() { @@ -220,9 +230,11 @@ impl DexWriter { offset += list.size(); } - // Sort and generate FieldIdItem + debug!("Sort field ids and generate field_ids_item"); let mut field_ids_list: Vec = self.field_ids.keys().cloned().collect(); + debug!("start sort"); field_ids_list.sort(); + debug!("end sort"); for (idx, field_id) in field_ids_list.iter().enumerate() { self.field_ids .entry(field_id.clone()) @@ -253,7 +265,43 @@ impl DexWriter { field_ids_list_aux }; - // Populate map_list + debug!("Sort method ids and generate method_id_item section"); + let mut method_ids_list: Vec = self.method_ids.keys().cloned().collect(); + debug!("start sort"); + method_ids_list.sort(); + debug!("end sort"); + for (idx, method_id) in method_ids_list.iter().enumerate() { + self.method_ids + .entry(method_id.clone()) + .and_modify(|val| *val = idx); + section_manager.add_elt(Section::MethodIdItem, None); + } + let method_ids_list: Vec = { + let mut method_ids_list_aux = vec![]; + for method in method_ids_list.into_iter() { + method_ids_list_aux.push(MethodIdItem { + class_idx: *self.type_ids.get(&method.class_).ok_or(anyhow!( + "Type {} (class of method {}) not found in dex builder", + method.class_.__repr__(), + method.__repr__() + ))? as u16, + proto_idx: *self.proto_ids.get(&method.proto).ok_or(anyhow!( + "Prototype {} (signature of method {}) not found in dex builder", + method.proto.__repr__(), + method.__repr__() + ))? as u16, + name_idx: *self.strings.get(&method.name).ok_or(anyhow!( + "String {} (name of method {}) not found in dex builder", + method.name.__repr__(), + method.__repr__() + ))? as u32, + }); + } + method_ids_list_aux + }; + + debug!("Generate the map_list"); + // Get the size of a map item let map_item_size = MapItem { type_: MapItemType::HeaderItem, unused: 0, @@ -287,7 +335,7 @@ impl DexWriter { } } - // Link Header section: + debug!("Link the header section"); self.header.map_off = section_manager.get_offset(Section::MapList); self.header.string_ids_size = section_manager.get_nb_elt(Section::StringIdItem) as u32; self.header.string_ids_off = section_manager.get_offset(Section::StringIdItem); @@ -304,7 +352,8 @@ impl DexWriter { self.header.data_size = section_manager.get_size(Section::Data); self.header.data_off = section_manager.get_offset(Section::Data); - // TODO: link TypeList in ProtoIdItems and ClassDefItems + debug!("Link the type_list entries in the proto_id_items"); + // TODO: link TypeList in ClassDefItems for (proto, idx) in &self.proto_ids { if !proto.parameters.is_empty() { let type_list = self.gen_type_list(&proto.parameters).with_context(|| { @@ -317,6 +366,7 @@ impl DexWriter { } } + debug!("Serialize the dex file"); // TODO: compute checksum, hash, ect self.header.serialize(writer)?; // StringIdItem section @@ -340,7 +390,10 @@ impl DexWriter { for field_id in field_ids_list { field_id.serialize(writer)?; } - // TODO: MethodIdItem, + // MethodIdItem section + for method_id in method_ids_list { + method_id.serialize(writer)?; + } // TODO: ClassDefItem, // TODO: CallSiteIdItem, // TODO: MethodHandleItem, diff --git a/androscalpel/src/field.rs b/androscalpel/src/field.rs index a08fc13..fdb94ea 100644 --- a/androscalpel/src/field.rs +++ b/androscalpel/src/field.rs @@ -4,7 +4,7 @@ use std::collections::HashSet; use pyo3::prelude::*; -use crate::{DexAnnotationItem, DexString, DexValue, IdField, IdMethodType, IdType}; +use crate::{DexAnnotationItem, DexString, DexValue, IdField, IdMethod, IdMethodType, IdType}; /// Represent a field. #[pyclass] @@ -170,4 +170,16 @@ impl Field { } fields } + + /// Return all method ids referenced in the method. + pub fn get_all_method_ids(&self) -> HashSet { + let mut methods = HashSet::new(); + if let Some(value) = &self.value { + methods.extend(value.get_all_method_ids()); + } + for annot in &self.annotations { + methods.extend(annot.get_all_method_ids()); + } + methods + } } diff --git a/androscalpel/src/method.rs b/androscalpel/src/method.rs index f40a88a..4e8ed9f 100644 --- a/androscalpel/src/method.rs +++ b/androscalpel/src/method.rs @@ -177,4 +177,23 @@ impl Method { } fields } + + /// Return all method ids referenced in the method. + pub fn get_all_method_ids(&self) -> HashSet { + let mut methods = HashSet::new(); + methods.insert(self.descriptor.clone()); + + for annot in &self.annotations { + methods.extend(annot.get_all_method_ids()); + } + for param_annots in &self.parameters_annotations { + for annot in param_annots { + methods.extend(annot.get_all_method_ids()); + } + } + if let Some(code) = &self.code { + methods.extend(code.get_all_method_ids()); + } + methods + } } diff --git a/androscalpel/src/method_handle.rs b/androscalpel/src/method_handle.rs index 0df7358..8143688 100644 --- a/androscalpel/src/method_handle.rs +++ b/androscalpel/src/method_handle.rs @@ -66,6 +66,11 @@ impl StaticPut { fields.insert(self.0.clone()); fields } + + /// Return all method ids referenced in the handle. + pub fn get_all_method_ids(&self) -> HashSet { + HashSet::new() + } } #[pyclass] #[derive(Debug, Clone, PartialEq, Eq)] @@ -111,6 +116,11 @@ impl StaticGet { fields.insert(self.0.clone()); fields } + + /// Return all method ids referenced in the handle. + pub fn get_all_method_ids(&self) -> HashSet { + HashSet::new() + } } #[pyclass] #[derive(Debug, Clone, PartialEq, Eq)] @@ -156,6 +166,11 @@ impl InstancePut { fields.insert(self.0.clone()); fields } + + /// Return all method ids referenced in the handle. + pub fn get_all_method_ids(&self) -> HashSet { + HashSet::new() + } } #[pyclass] #[derive(Debug, Clone, PartialEq, Eq)] @@ -201,6 +216,11 @@ impl InstanceGet { fields.insert(self.0.clone()); fields } + + /// Return all method ids referenced in the handle. + pub fn get_all_method_ids(&self) -> HashSet { + HashSet::new() + } } #[pyclass] @@ -245,6 +265,13 @@ impl InvokeStatic { pub fn get_all_field_ids(&self) -> HashSet { HashSet::new() } + + /// Return all method ids referenced in the handle. + pub fn get_all_method_ids(&self) -> HashSet { + let mut methods = HashSet::new(); + methods.insert(self.0.clone()); + methods + } } #[pyclass] @@ -289,6 +316,13 @@ impl InvokeInstance { pub fn get_all_field_ids(&self) -> HashSet { HashSet::new() } + + /// Return all method ids referenced in the handle. + pub fn get_all_method_ids(&self) -> HashSet { + let mut methods = HashSet::new(); + methods.insert(self.0.clone()); + methods + } } #[pyclass] @@ -333,6 +367,13 @@ impl InvokeConstructor { pub fn get_all_field_ids(&self) -> HashSet { HashSet::new() } + + /// Return all method ids referenced in the handle. + pub fn get_all_method_ids(&self) -> HashSet { + let mut methods = HashSet::new(); + methods.insert(self.0.clone()); + methods + } } #[pyclass] @@ -377,6 +418,13 @@ impl InvokeDirect { pub fn get_all_field_ids(&self) -> HashSet { HashSet::new() } + + /// Return all method ids referenced in the handle. + pub fn get_all_method_ids(&self) -> HashSet { + let mut methods = HashSet::new(); + methods.insert(self.0.clone()); + methods + } } #[pyclass] @@ -421,6 +469,13 @@ impl InvokeInterface { pub fn get_all_field_ids(&self) -> HashSet { HashSet::new() } + + /// Return all method ids referenced in the handle. + pub fn get_all_method_ids(&self) -> HashSet { + let mut methods = HashSet::new(); + methods.insert(self.0.clone()); + methods + } } impl<'source> FromPyObject<'source> for MethodHandle { @@ -543,4 +598,19 @@ impl MethodHandle { Self::InvokeInterface(val) => val.get_all_field_ids(), } } + + /// Return all method ids referenced in the handle. + pub fn get_all_method_ids(&self) -> HashSet { + match self { + Self::StaticPut(val) => val.get_all_method_ids(), + Self::StaticGet(val) => val.get_all_method_ids(), + Self::InstancePut(val) => val.get_all_method_ids(), + Self::InstanceGet(val) => val.get_all_method_ids(), + Self::InvokeStatic(val) => val.get_all_method_ids(), + Self::InvokeInstance(val) => val.get_all_method_ids(), + Self::InvokeConstructor(val) => val.get_all_method_ids(), + Self::InvokeDirect(val) => val.get_all_method_ids(), + Self::InvokeInterface(val) => val.get_all_method_ids(), + } + } } diff --git a/androscalpel/src/scalar.rs b/androscalpel/src/scalar.rs index 8dc3ef5..6c6c62c 100644 --- a/androscalpel/src/scalar.rs +++ b/androscalpel/src/scalar.rs @@ -2,7 +2,7 @@ use std::collections::HashSet; -use crate::{DexString, DexValue, IdField, IdMethodType, IdType}; +use crate::{DexString, DexValue, IdField, IdMethod, IdMethodType, IdType}; use pyo3::prelude::*; #[pyclass] @@ -297,4 +297,13 @@ impl DexArray { } fields } + + /// Return all method ids referenced in the value. + pub fn get_all_method_ids(&self) -> HashSet { + let mut methods = HashSet::new(); + for val in &self.0 { + methods.extend(val.get_all_method_ids()); + } + methods + } } diff --git a/androscalpel/src/value.rs b/androscalpel/src/value.rs index d9ae007..46c499c 100644 --- a/androscalpel/src/value.rs +++ b/androscalpel/src/value.rs @@ -189,13 +189,37 @@ impl DexValue { DexValue::Type(_) => HashSet::new(), DexValue::Field(val) => val.get_all_field_ids(), DexValue::Method(_) => HashSet::new(), - DexValue::Enum(_) => HashSet::new(), + DexValue::Enum(val) => val.get_all_field_ids(), DexValue::Array(val) => val.get_all_field_ids(), DexValue::Annotation(val) => val.get_all_field_ids(), DexValue::Null(_) => HashSet::new(), DexValue::Boolean(_) => HashSet::new(), } } + + /// Return all method ids referenced in the value. + pub fn get_all_method_ids(&self) -> HashSet { + match self { + DexValue::Byte(_) => HashSet::new(), + DexValue::Short(_) => HashSet::new(), + DexValue::Char(_) => HashSet::new(), + DexValue::Int(_) => HashSet::new(), + DexValue::Long(_) => HashSet::new(), + DexValue::Float(_) => HashSet::new(), + DexValue::Double(_) => HashSet::new(), + DexValue::MethodType(_) => HashSet::new(), + DexValue::MethodHandle(val) => val.get_all_method_ids(), + DexValue::String(_) => HashSet::new(), + DexValue::Type(_) => HashSet::new(), + DexValue::Field(_) => HashSet::new(), + DexValue::Method(val) => val.get_all_method_ids(), + DexValue::Enum(_) => HashSet::new(), + DexValue::Array(val) => val.get_all_method_ids(), + DexValue::Annotation(val) => val.get_all_method_ids(), + DexValue::Null(_) => HashSet::new(), + DexValue::Boolean(_) => HashSet::new(), + } + } } impl IntoPy for DexValue { diff --git a/test.py b/test.py index a1425f8..084143e 100644 --- a/test.py +++ b/test.py @@ -2,7 +2,7 @@ import logging FORMAT = "[%(levelname)s] %(name)s %(filename)s:%(lineno)d: %(message)s" logging.basicConfig(format=FORMAT) -logging.getLogger().setLevel(logging.INFO) +logging.getLogger().setLevel(logging.DEBUG) import androscalpel as asc import zipfile as z