diff --git a/androscalpel/src/apk.rs b/androscalpel/src/apk.rs index 8f588de..79b7004 100644 --- a/androscalpel/src/apk.rs +++ b/androscalpel/src/apk.rs @@ -14,7 +14,7 @@ use androscalpel_serializer::*; #[derive(Debug, Clone)] pub struct Apk { #[pyo3(get, set)] - pub classes: Vec, + pub classes: HashMap, } impl Apk { @@ -22,8 +22,8 @@ impl Apk { pub fn add_dex_file(&mut self, data: &[u8]) -> Result<()> { let dex = DexFileReader::new(data)?; for class in dex.get_class_defs() { - self.classes - .push(self.get_class_from_dex_file(class, &dex)?); + let class = self.get_class_from_dex_file(class, &dex)?; + self.classes.insert(class.descriptor.clone(), class); } Ok(()) } @@ -34,17 +34,14 @@ impl Apk { class_item: &ClassDefItem, dex: &DexFileReader, ) -> Result { - let name_idx = dex - .get_type_id(class_item.class_idx as usize)? - .descriptor_idx; - let name: DexString = dex.get_string(name_idx)?.into(); + let descriptor = Self::get_id_type_from_idx(class_item.class_idx as usize, dex)?; let superclass = if class_item.superclass_idx == NO_INDEX.0 { None } else { - let superclass_idx = dex - .get_type_id(class_item.superclass_idx as usize)? - .descriptor_idx; - Some(dex.get_string(superclass_idx)?.into()) + Some(Self::get_id_type_from_idx( + class_item.superclass_idx as usize, + dex, + )?) }; let interfaces = if class_item.interfaces_off == 0 { vec![] @@ -52,9 +49,7 @@ impl Apk { let type_list = dex.get_struct_at_offset::(class_item.interfaces_off)?; let mut list = vec![]; for ty in type_list.list { - let ty = dex.get_type_id(ty.type_idx as usize)?; - let ty = dex.get_string(ty.descriptor_idx)?.into(); - list.push(ty); + list.push(Self::get_id_type_from_idx(ty.type_idx as usize, dex)?); } list }; @@ -82,7 +77,7 @@ impl Apk { { info!( "Unexpected flags found in class_def_item.access_flags for {}: 0x{:x}", - <&DexString as Into>::into(&name), + String::from(descriptor.get_name()), class_item.access_flags ); } @@ -100,43 +95,57 @@ impl Apk { )?; } } - let mut static_fields = vec![]; - let mut instance_fields = vec![]; - let mut direct_methods = vec![]; - let mut virtual_methods = vec![]; + let mut static_fields_list = vec![]; + let mut instance_fields_list = vec![]; + let mut direct_methods = HashMap::new(); + let mut virtual_methods = HashMap::new(); let data_off = class_item.class_data_off; if data_off != 0 { let data = dex.get_struct_at_offset::(data_off)?; - static_fields = Self::get_field_list_from_encoded_field_list(&data.static_fields, dex)?; - instance_fields = + static_fields_list = + Self::get_field_list_from_encoded_field_list(&data.static_fields, dex)?; + instance_fields_list = Self::get_field_list_from_encoded_field_list(&data.instance_fields, dex)?; - direct_methods = - Self::get_method_list_from_encoded_field_list(&data.direct_methods, dex)?; - virtual_methods = - Self::get_method_list_from_encoded_field_list(&data.virtual_methods, dex)?; + for method in Self::get_method_list_from_encoded_field_list(&data.direct_methods, dex)? + { + direct_methods.insert(method.descriptor.clone(), method); + } + for method in Self::get_method_list_from_encoded_field_list(&data.virtual_methods, dex)? + { + virtual_methods.insert(method.descriptor.clone(), method); + } } if class_item.static_values_off != 0 { let values = dex .get_struct_at_offset::(class_item.static_values_off)? .values; - if values.len() > static_fields.len() { + if values.len() > static_fields_list.len() { return Err(anyhow!( "Inconsistant static_values array found in {}: \ |static_values| = {}, |static_fields| = {}, \ |static_values| should be <= |static_fields|", - <&DexString as Into>::into(&name), + String::from(&descriptor.get_name()), values.len(), - static_fields.len() + static_fields_list.len() )); } for (i, value) in values.iter().enumerate() { - static_fields[i].value = Some(Self::encoded_value_to_dex_value(value, dex)?); + static_fields_list[i].value = Some(Self::encoded_value_to_dex_value(value, dex)?); } - for field in static_fields.iter_mut().skip(values.len()) { + for field in static_fields_list.iter_mut().skip(values.len()) { field.value = None; } } + let mut static_fields = HashMap::new(); + let mut instance_fields = HashMap::new(); + for field in static_fields_list { + static_fields.insert(field.descriptor.clone(), field); + } + for field in instance_fields_list { + instance_fields.insert(field.descriptor.clone(), field); + } + if let Some(annotations_directory) = annotations_directory { for field_annotation in annotations_directory.field_annotations { let field_id = @@ -149,29 +158,22 @@ impl Apk { } else { vec![] }; - if field_id.class_.get_name() != name { + if field_id.class_ != descriptor { info!( "Annotation for field {} found in class {}, dropping it", field_id.__str__(), - name.__str__(), + String::from(descriptor.get_name()), ); } - let mut found = false; - for field in &mut instance_fields { - if field.descriptor == field_id { - field.annotations.append(&mut (annotations.clone())); // the clone is prob - // unnecessary - found = true; - } - } - for field in &mut static_fields { - if field.descriptor == field_id { - field.annotations.append(&mut (annotations.clone())); // the clone is prob - // unnecessary - found = true; - } - } - if !found { + instance_fields + .entry(field_id.clone()) + .and_modify(|field| field.annotations = annotations.clone()); + static_fields + .entry(field_id.clone()) + .and_modify(|field| field.annotations = annotations.clone()); + if instance_fields.get(&field_id).is_none() + && static_fields.get(&field_id).is_none() + { info!( "Annotation found for field {} but could not find the field definition, dropping it", field_id.__str__(), @@ -189,29 +191,22 @@ impl Apk { } else { vec![] }; - if method_id.class_.get_name() != name { + if method_id.class_ != descriptor { info!( "Annotation for method {} found in class {}, dropping it", method_id.__str__(), - name.__str__(), + String::from(descriptor.get_name()), ); } - let mut found = false; - for method in &mut direct_methods { - if method.descriptor == method_id { - method.annotations.append(&mut (annotations.clone())); // the clone is prob - // unnecessary - found = true; - } - } - for method in &mut virtual_methods { - if method.descriptor == method_id { - method.annotations.append(&mut (annotations.clone())); // the clone is prob - // unnecessary - found = true; - } - } - if !found { + direct_methods + .entry(method_id.clone()) + .and_modify(|method| method.annotations = annotations.clone()); // TODO = or append? + virtual_methods + .entry(method_id.clone()) + .and_modify(|method| method.annotations = annotations.clone()); // TODO = or append? + if direct_methods.get(&method_id).is_none() + && virtual_methods.get(&method_id).is_none() + { info!( "Annotation found for method {} but could not find the method definition, dropping it", method_id.__str__(), @@ -235,29 +230,22 @@ impl Apk { annotations_list.push(vec![]); } } - if method_id.class_.get_name() != name { + if method_id.class_ != descriptor { info!( "Annotation for parameter of method {} found in class {}, dropping it", method_id.__str__(), - name.__str__(), + String::from(descriptor.get_name()), ); } - let mut found = false; - for method in &mut direct_methods { - if method.descriptor == method_id { - method.parameters_annotations = annotations_list.clone(); // the clone is prob - // unnecessary - found = true; - } - } - for method in &mut virtual_methods { - if method.descriptor == method_id { - method.parameters_annotations = annotations_list.clone(); // the clone is prob - // unnecessary - found = true; - } - } - if !found { + direct_methods + .entry(method_id.clone()) + .and_modify(|method| method.parameters_annotations = annotations_list.clone()); + virtual_methods + .entry(method_id.clone()) + .and_modify(|method| method.parameters_annotations = annotations_list.clone()); + if direct_methods.get(&method_id).is_none() + && virtual_methods.get(&method_id).is_none() + { info!( "Annotation found for parameter of method {} but could not find the method definition, dropping it", method_id.__str__(), @@ -266,7 +254,7 @@ impl Apk { } } Ok(Class { - name, + descriptor, superclass, interfaces, source_file, @@ -712,7 +700,9 @@ impl Apk { impl Apk { #[new] fn new() -> Self { - Self { classes: vec![] } + Self { + classes: HashMap::new(), + } } #[pyo3(name = "add_dex_file")] diff --git a/androscalpel/src/class.rs b/androscalpel/src/class.rs index abc7b8f..0fc2049 100644 --- a/androscalpel/src/class.rs +++ b/androscalpel/src/class.rs @@ -1,17 +1,19 @@ //! Representation of a class. +use std::collections::HashMap; + use pyo3::prelude::*; -use crate::{DexAnnotationItem, DexString, Field, Method}; +use crate::{DexAnnotationItem, DexString, Field, IdField, IdMethod, IdType, Method, Result}; /// Represent an apk #[pyclass] #[derive(Debug, Clone)] pub struct Class { - /// Type name, format described at + /// Type, format described at /// #[pyo3(get, set)] - pub name: DexString, + pub descriptor: IdType, /// If the class is visible everywhere #[pyo3(get, set)] pub is_public: bool, @@ -36,12 +38,12 @@ pub struct Class { /// Name of the superclass, format described at /// #[pyo3(get, set)] - pub superclass: Option, + pub superclass: Option, /// List of the interfaces that class implement, format of the interfaces /// name is discribed at /// #[pyo3(get, set)] - pub interfaces: Vec, + pub interfaces: Vec, /// Name of the source file where this class is defined. #[pyo3(get, set)] pub source_file: Option, @@ -49,16 +51,16 @@ pub struct Class { // TODO: hash map? /// The static fields #[pyo3(get, set)] - pub static_fields: Vec, + pub static_fields: HashMap, /// The instance fields #[pyo3(get, set)] - pub instance_fields: Vec, + pub instance_fields: HashMap, /// The direct (static, private or constructor) methods of the class #[pyo3(get, set)] - pub direct_methods: Vec, + pub direct_methods: HashMap, /// The virtual (ie non direct) methods of the class #[pyo3(get, set)] - pub virtual_methods: Vec, + pub virtual_methods: HashMap, // Do we need to distinguish direct and virtual (all the other) methods? // Maybe overlapping descriptor (same name, class and proto?) /// The annotation related to this class (note: this does not include the @@ -72,10 +74,10 @@ pub struct Class { #[pymethods] impl Class { #[new] - pub fn new(name: DexString) -> Self { - Self { - name, - superclass: Some("Ljava/lang/Object;".into()), + pub fn new(name: DexString) -> Result { + Ok(Self { + descriptor: IdType::new(name)?, + superclass: Some(IdType::new("Ljava/lang/Object;".into())?), interfaces: vec![], source_file: None, is_public: true, @@ -85,16 +87,16 @@ impl Class { is_synthetic: false, is_annotation: false, is_enum: false, - static_fields: vec![], - instance_fields: vec![], - direct_methods: vec![], - virtual_methods: vec![], + static_fields: HashMap::new(), + instance_fields: HashMap::new(), + direct_methods: HashMap::new(), + virtual_methods: HashMap::new(), annotations: vec![], - } + }) } pub fn __str__(&self) -> String { - let name: String = (&self.name).into(); + let name: String = (&self.descriptor.get_name()).into(); let file = if let Some(file) = &self.source_file { let file: String = file.into(); format!(" defined in {file}\n") @@ -102,7 +104,7 @@ impl Class { "".into() }; let superclass = if let Some(spcl) = &self.superclass { - let spcl: String = spcl.into(); + let spcl: String = spcl.get_name().into(); format!(" extends: {spcl}\n") } else { "".into() @@ -112,7 +114,7 @@ impl Class { } else { let mut interfaces: String = " implements:\n".into(); for it in &self.interfaces { - let it: String = it.into(); + let it: String = it.get_name().into(); interfaces += &format!(" {it}\n"); } interfaces @@ -122,7 +124,7 @@ impl Class { } pub fn __repr__(&self) -> String { - let name: String = (&self.name).into(); + let name: String = (&self.descriptor.get_name()).into(); format!("Class({name})") } } diff --git a/androscalpel/src/dex_id.rs b/androscalpel/src/dex_id.rs index 480229c..165a2de 100644 --- a/androscalpel/src/dex_id.rs +++ b/androscalpel/src/dex_id.rs @@ -1,5 +1,8 @@ //! The class identifying dex structure. +use std::collections::hash_map::DefaultHasher; +use std::hash::{Hash, Hasher}; + use anyhow::anyhow; use pyo3::prelude::*; @@ -7,10 +10,10 @@ use crate::{DexString, Result}; use androscalpel_serializer::{StringDataItem, Uleb128}; #[pyclass] -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct IdMethodType { /// Type formated as described by - pub(crate) shorty: DexString, // Redondant, but same as in the encoding, keep it in cas we ever + pub(crate) shorty: DexString, // Redondant, but same as in the encoding, keep it in case we ever // need it pub(crate) return_type: IdType, pub(crate) parameters: Vec, @@ -56,6 +59,12 @@ impl IdMethodType { pub fn get_parameters(&self) -> Vec { self.parameters.clone() } + + fn __hash__(&self) -> u64 { + let mut hasher = DefaultHasher::new(); + self.hash(&mut hasher); + hasher.finish() + } } impl IdMethodType { @@ -77,12 +86,14 @@ impl IdMethodType { // Not a clean rust enum because we want to be compatible with python, and maybe support strange // malware edge case? #[pyclass] -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct IdType(pub(crate) DexString); #[pymethods] impl IdType { #[new] - pub fn _new(ty: DexString) -> Result { + pub fn new( + #[pyo3(from_py_with = "crate::dex_string::as_dex_string")] ty: DexString, + ) -> Result { // TODO: check format let ty = Self(ty); ty.check_format()?; @@ -173,7 +184,7 @@ impl IdType { pub fn __repr__(&self) -> String { let name: String = (&self.0).into(); - format!("DexType({name})") + format!("IdType({name})") } /// Check if the type is void (return type) @@ -312,11 +323,17 @@ impl IdType { } } + fn __hash__(&self) -> u64 { + let mut hasher = DefaultHasher::new(); + self.hash(&mut hasher); + hasher.finish() + } + // TODO: TESTS } #[pyclass] -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct IdField { /// The name of the field, format described at /// @@ -333,7 +350,12 @@ pub struct IdField { #[pymethods] impl IdField { #[new] - pub fn new(name: DexString, type_: IdType, class_: IdType) -> Self { + pub fn new( + #[pyo3(from_py_with = "crate::dex_string::as_dex_string")] name: DexString, + type_: IdType, + class_: IdType, + ) -> Self { + // Todo: check that class_ is a class? Self { name, type_, @@ -352,10 +374,16 @@ impl IdField { let name: String = (&self.name).into(); format!("IdField({class}.{name})") } + + fn __hash__(&self) -> u64 { + let mut hasher = DefaultHasher::new(); + self.hash(&mut hasher); + hasher.finish() + } } #[pyclass] -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct IdMethod { #[pyo3(get, set)] pub class_: IdType, @@ -394,6 +422,12 @@ impl IdMethod { pub fn __repr__(&self) -> String { format!("DexMethod({})", self.__str__()) } + + fn __hash__(&self) -> u64 { + let mut hasher = DefaultHasher::new(); + self.hash(&mut hasher); + hasher.finish() + } } #[pyclass] diff --git a/androscalpel/src/dex_string.rs b/androscalpel/src/dex_string.rs new file mode 100644 index 0000000..f44d53d --- /dev/null +++ b/androscalpel/src/dex_string.rs @@ -0,0 +1,110 @@ +use std::collections::hash_map::DefaultHasher; +use std::hash::{Hash, Hasher}; + +use pyo3::class::basic::CompareOp; +use pyo3::exceptions::PyTypeError; +use pyo3::prelude::*; + +#[pyclass] +#[derive(Clone, PartialEq, Eq, Debug)] +pub struct DexString(pub androscalpel_serializer::StringDataItem); + +impl From for androscalpel_serializer::StringDataItem { + fn from(DexString(string): DexString) -> Self { + string + } +} + +impl From for DexString { + fn from(string: androscalpel_serializer::StringDataItem) -> Self { + Self(string) + } +} + +impl From<&DexString> for String { + fn from(DexString(string): &DexString) -> Self { + string + .try_into() + .unwrap_or(format!("InvalidEncoding:{:x?}", string.data)) + } +} + +impl From for String { + fn from(string: DexString) -> Self { + (&string).into() + } +} + +impl From<&str> for DexString { + fn from(string: &str) -> Self { + Self(string.into()) + } +} + +impl From for DexString { + fn from(string: String) -> Self { + Self(string.as_str().into()) + } +} + +impl Hash for DexString { + fn hash(&self, state: &mut H) { + self.get_utf16_size().hash(state); + self.get_bytes().hash(state); + } +} + +pub fn as_dex_string(obj: &PyAny) -> PyResult { + if let Ok(string) = DexString::extract(obj) { + Ok(string) + } else if let Ok(string) = String::extract(obj) { + Ok(string.into()) + } else { + Err(PyErr::new::(format!( + "{} cannot be converted to a DexString", + obj.repr()? + ))) + } +} + +#[pymethods] +impl DexString { + #[new] + pub fn new(s: &str) -> Self { + s.into() + } + + /// Return the binary mutf-8 encoded string (minus the trailling 0) + pub fn get_bytes(&self) -> &[u8] { + &self.0.data + } + + /// Return the 'utf-16' size of the string (number of unicode code point, ie its lenght in 'java-land') + pub fn get_utf16_size(&self) -> u32 { + self.0.utf16_size.0 + } + + pub fn __str__(&self) -> String { + self.into() + } + + fn __richcmp__(&self, other: &PyAny, op: CompareOp, py: Python<'_>) -> PyResult { + let other: Self = other + .extract() + .or(::extract(other).map(|string| string.into()))?; + match op { + CompareOp::Eq => Ok((self == &other).into_py(py)), + _ => Ok(py.NotImplemented()), + } + } + + pub fn __repr__(&self) -> String { + self.into() + } + + fn __hash__(&self) -> u64 { + let mut hasher = DefaultHasher::new(); + self.hash(&mut hasher); + hasher.finish() + } +} diff --git a/androscalpel/src/lib.rs b/androscalpel/src/lib.rs index 318a5d7..d88508e 100644 --- a/androscalpel/src/lib.rs +++ b/androscalpel/src/lib.rs @@ -1,15 +1,12 @@ -use std::collections::hash_map::DefaultHasher; -use std::hash::{Hash, Hasher}; - use anyhow::Result; -use pyo3::class::basic::CompareOp; use pyo3::prelude::*; pub mod annotation; pub mod apk; pub mod class; pub mod dex_id; +pub mod dex_string; pub mod field; pub mod method; pub mod method_handle; @@ -20,103 +17,13 @@ pub use annotation::*; pub use apk::*; pub use class::*; pub use dex_id::*; +pub use dex_string::*; pub use field::*; pub use method::*; pub use method_handle::*; pub use scalar::*; pub use value::*; -#[pyclass] -#[derive(Clone, PartialEq, Eq, Debug)] -pub struct DexString(androscalpel_serializer::StringDataItem); - -impl From for androscalpel_serializer::StringDataItem { - fn from(DexString(string): DexString) -> Self { - string - } -} - -impl From for DexString { - fn from(string: androscalpel_serializer::StringDataItem) -> Self { - Self(string) - } -} - -impl From<&DexString> for String { - fn from(DexString(string): &DexString) -> Self { - string - .try_into() - .unwrap_or(format!("InvalidEncoding:{:x?}", string.data)) - } -} - -impl From for String { - fn from(string: DexString) -> Self { - (&string).into() - } -} - -impl From<&str> for DexString { - fn from(string: &str) -> Self { - Self(string.into()) - } -} - -impl From for DexString { - fn from(string: String) -> Self { - Self(string.as_str().into()) - } -} - -impl Hash for DexString { - fn hash(&self, state: &mut H) { - self.get_utf16_size().hash(state); - self.get_bytes().hash(state); - } -} - -#[pymethods] -impl DexString { - #[new] - pub fn new(s: &str) -> Self { - s.into() - } - - /// Return the binary mutf-8 encoded string (minus the trailling 0) - pub fn get_bytes(&self) -> &[u8] { - &self.0.data - } - - /// Return the 'utf-16' size of the string (number of unicode code point, ie its lenght in 'java-land') - pub fn get_utf16_size(&self) -> u32 { - self.0.utf16_size.0 - } - - pub fn __str__(&self) -> String { - self.into() - } - - fn __richcmp__(&self, other: &PyAny, op: CompareOp, py: Python<'_>) -> PyResult { - let other: Self = other - .extract() - .or(::extract(other).map(|string| string.into()))?; - match op { - CompareOp::Eq => Ok((self == &other).into_py(py)), - _ => Ok(py.NotImplemented()), - } - } - - pub fn __repr__(&self) -> String { - self.into() - } - - fn __hash__(&self) -> u64 { - let mut hasher = DefaultHasher::new(); - self.hash(&mut hasher); - hasher.finish() - } -} - /// Androscalpel. #[pymodule] fn androscalpel(_py: Python, m: &PyModule) -> PyResult<()> {