diff --git a/androscalpel/src/apk.rs b/androscalpel/src/apk.rs index 0342c9f..1b868b5 100644 --- a/androscalpel/src/apk.rs +++ b/androscalpel/src/apk.rs @@ -88,11 +88,18 @@ impl Apk { } let mut static_fields = vec![]; let mut instance_fields = vec![]; + let mut direct_methods = vec![]; + let mut virtual_methods = vec![]; let data_off = class_item.class_data_off; if data_off != 0 { let data = dex.get_struct_at_offset::(data_off)?; - static_fields = Self::get_field_list_from_dex(&data.static_fields, dex)?; - instance_fields = Self::get_field_list_from_dex(&data.instance_fields, dex)?; + static_fields = Self::get_field_list_from_encoded_field_list(&data.static_fields, dex)?; + instance_fields = + Self::get_field_list_from_encoded_field_list(&data.instance_fields, dex)?; + direct_methods = + Self::get_method_list_from_encoded_field_list(&data.direct_methods, dex)?; + virtual_methods = + Self::get_method_list_from_encoded_field_list(&data.virtual_methods, dex)?; } if class_item.static_values_off != 0 { @@ -130,6 +137,8 @@ impl Apk { is_enum, instance_fields, static_fields, + direct_methods, + virtual_methods, }) } @@ -284,7 +293,87 @@ impl Apk { } } - pub fn get_field_list_from_dex( + /// Return a [`Field`] from it's idx (index in `field_ids`) in the dex file and its access flags + /// ([`EncodedField.access_flags`]) + pub fn get_field_from_idx( + idx: usize, + Uleb128(access_flags): Uleb128, + dex: &DexFileReader, + ) -> Result { + let descriptor = Self::get_id_field_from_idx(idx, dex)?; + + let is_public = (access_flags & ACC_PUBLIC) != 0; + let is_private = (access_flags & ACC_PRIVATE) != 0; + let is_protected = (access_flags & ACC_PROTECTED) != 0; + let is_static = (access_flags & ACC_STATIC) != 0; + let is_final = (access_flags & ACC_FINAL) != 0; + let is_volatile = (access_flags & ACC_VOLATILE) != 0; + let is_transient = (access_flags & ACC_TRANSIENT) != 0; + let is_synthetic = (access_flags & ACC_SYNTHETIC) != 0; + let is_enum = (access_flags & ACC_ENUM) != 0; + let visibility = match (is_public, is_private, is_protected) { + (true, false, false) => FieldVisibility::Public, + (false, true, false) => FieldVisibility::Private, + (false, false, true) => FieldVisibility::Protected, + (false, false, false) => FieldVisibility::None_, + (pbl, prv, prt) => { + let class: String = descriptor.class_.0.into(); + let name: String = descriptor.name.into(); + return Err(Error::InconsistantStruct(format!( + "Inconsistant visiblity found in {class}.{name}: \ + (public: {pbl}, private: {prv}, protected: {prt})" + ))); + } + }; + if access_flags + & !(ACC_PUBLIC + | ACC_PRIVATE + | ACC_PROTECTED + | ACC_STATIC + | ACC_FINAL + | ACC_VOLATILE + | ACC_TRANSIENT + | ACC_SYNTHETIC + | ACC_ENUM) + != 0 + { + info!( + "Unexpected flags found in {} for : 0x{:x} (unknown flags: 0x{:x})", + descriptor.__repr__(), + access_flags, + access_flags + & !(ACC_PUBLIC + | ACC_PRIVATE + | ACC_PROTECTED + | ACC_STATIC + | ACC_FINAL + | ACC_VOLATILE + | ACC_TRANSIENT + | ACC_SYNTHETIC + | ACC_ENUM) + ); + } + Ok(Field { + descriptor, + visibility, + is_static, + is_final, + is_volatile, + is_transient, + is_synthetic, + is_enum, + value: None, + }) + } + + /// Return a list of field from a list of [`EncodedField`]. + /// + /// # Warning + /// + /// The index of the fields is computed by summing the [`EncodedField.field_idx_diff`] of the + /// previous element of the list the diff of the current field, so the list must be preserved + /// as in the dex file. + pub fn get_field_list_from_encoded_field_list( encoded_fields: &[EncodedField], dex: &DexFileReader, ) -> Result> { @@ -293,55 +382,135 @@ impl Apk { for field in encoded_fields { idx += field.field_idx_diff.0; - let id_item = dex.get_field_id(idx as usize)?; - let class_ty = dex.get_type_id(id_item.class_idx as usize)?; - let class: DexString = dex.get_string(class_ty.descriptor_idx)?.into(); - let ty = dex.get_type_id(id_item.type_idx as usize)?; - let ty = dex.get_string(ty.descriptor_idx)?.into(); - let name: DexString = dex.get_string(id_item.name_idx)?.into(); - - let is_public = (field.access_flags.0 & ACC_PUBLIC) != 0; - let is_private = (field.access_flags.0 & ACC_PRIVATE) != 0; - let is_protected = (field.access_flags.0 & ACC_PROTECTED) != 0; - let is_static = (field.access_flags.0 & ACC_STATIC) != 0; - let is_final = (field.access_flags.0 & ACC_FINAL) != 0; - let is_volatile = (field.access_flags.0 & ACC_VOLATILE) != 0; - let is_transient = (field.access_flags.0 & ACC_TRANSIENT) != 0; - let is_synthetic = (field.access_flags.0 & ACC_SYNTHETIC) != 0; - let is_enum = (field.access_flags.0 & ACC_ENUM) != 0; - let visibility = match (is_public, is_private, is_protected) { - (true, false, false) => FieldVisibility::Public, - (false, true, false) => FieldVisibility::Private, - (false, false, true) => FieldVisibility::Protected, - (false, false, false) => FieldVisibility::None_, - (pbl, prv, prt) => { - let class: String = class.into(); - let name: String = name.into(); - return Err(Error::InconsistantStruct(format!( - "Inconsistant visiblity found in {class}.{name}: \ - (public: {pbl}, private: {prv}, protected: {prt})" - ))); - } - }; - let descriptor = IdField { - name, - type_: IdType(ty), - class_: IdType(class), - }; - fields.push(Field { - descriptor, - visibility, - is_static, - is_final, - is_volatile, - is_transient, - is_synthetic, - is_enum, - value: None, - }) + fields.push(Self::get_field_from_idx( + idx as usize, + field.access_flags, + dex, + )?); } Ok(fields) } + + /// Return a [`Method`] from it's idx (index in `method_ids`) in the dex file and its access flags + /// ([`EncodedMethod.access_flags`]) and code offset ([`EncodedMethod.code_off`]). + pub fn get_method_from_idx( + idx: usize, + Uleb128(access_flags): Uleb128, + Uleb128(_code_off): Uleb128, + dex: &DexFileReader, + ) -> Result { + let descriptor = Self::get_id_method_from_idx(idx, dex)?; + + let is_public = (access_flags & ACC_PUBLIC) != 0; + let is_private = (access_flags & ACC_PRIVATE) != 0; + let is_protected = (access_flags & ACC_PROTECTED) != 0; + let is_static = (access_flags & ACC_STATIC) != 0; + let is_final = (access_flags & ACC_FINAL) != 0; + let is_synchronized = (access_flags & ACC_SYNCHRONIZED) != 0; + let is_bridge = (access_flags & ACC_BRIDGE) != 0; + let is_varargs = (access_flags & ACC_VARARGS) != 0; + let is_native = (access_flags & ACC_NATIVE) != 0; + let is_abstract = (access_flags & ACC_ABSTRACT) != 0; + let is_strictfp = (access_flags & ACC_STRICT) != 0; + let is_synthetic = (access_flags & ACC_SYNTHETIC) != 0; + let is_constructor = (access_flags & ACC_CONSTRUCTOR) != 0; + let is_declared_syncrhonized = (access_flags & ACC_DECLARED_SYNCHRONIZED) != 0; + let visibility = match (is_public, is_private, is_protected) { + (true, false, false) => MethodVisibility::Public, + (false, true, false) => MethodVisibility::Private, + (false, false, true) => MethodVisibility::Protected, + (false, false, false) => MethodVisibility::None_, + (pbl, prv, prt) => { + return Err(Error::InconsistantStruct(format!( + "Inconsistant visiblity found in {}: \ + (public: {pbl}, private: {prv}, protected: {prt})", + descriptor.__repr__() + ))); + // TODO: replace by public? + } + }; + if access_flags + & !(ACC_PUBLIC + | ACC_PRIVATE + | ACC_PROTECTED + | ACC_STATIC + | ACC_FINAL + | ACC_SYNCHRONIZED + | ACC_BRIDGE + | ACC_VARARGS + | ACC_NATIVE + | ACC_ABSTRACT + | ACC_STRICT + | ACC_SYNTHETIC + | ACC_CONSTRUCTOR + | ACC_DECLARED_SYNCHRONIZED) + != 0 + { + info!( + "Unexpected flags found in {} for : 0x{:x} (unknown flags: 0x{:x})", + descriptor.__repr__(), + access_flags, + access_flags + & !(ACC_PUBLIC + | ACC_PRIVATE + | ACC_PROTECTED + | ACC_STATIC + | ACC_FINAL + | ACC_SYNCHRONIZED + | ACC_BRIDGE + | ACC_VARARGS + | ACC_NATIVE + | ACC_ABSTRACT + | ACC_STRICT + | ACC_SYNTHETIC + | ACC_CONSTRUCTOR + | ACC_DECLARED_SYNCHRONIZED) + ); + } + + Ok(Method { + descriptor, + visibility, + is_static, + is_final, + is_synchronized, + is_bridge, + is_varargs, + is_native, + is_abstract, + is_strictfp, + is_synthetic, + is_constructor, + is_declared_syncrhonized, + code: (), + }) + } + + /// Return a list of field from a list of [`EncodedMethod`]. + /// + /// # Warning + /// + /// The index of the fields is computed by summing the [`EncodedMethod.field_idx_diff`] of the + /// previous element of the list the diff of the current field, so the list must be preserved + /// as in the dex file. + pub fn get_method_list_from_encoded_field_list( + encoded_methods: &[EncodedMethod], + dex: &DexFileReader, + ) -> Result> { + let mut idx = 0; + let mut methods = vec![]; + for method in encoded_methods { + idx += method.method_idx_diff.0; + + methods.push(Self::get_method_from_idx( + idx as usize, + method.access_flags, + method.code_off, + dex, + )?); + } + Ok(methods) + } } #[pymethods] diff --git a/androscalpel/src/class.rs b/androscalpel/src/class.rs index 94c2029..4cd3d59 100644 --- a/androscalpel/src/class.rs +++ b/androscalpel/src/class.rs @@ -2,7 +2,7 @@ use pyo3::prelude::*; -use crate::{DexString, Field}; +use crate::{DexString, Field, Method}; /// Represent an apk #[pyclass] @@ -46,21 +46,24 @@ pub struct Class { #[pyo3(get, set)] pub source_file: Option, + // TODO: hash map? /// The static fields #[pyo3(get, set)] pub static_fields: Vec, /// The instance fields #[pyo3(get, set)] pub instance_fields: Vec, - // /// The static methods - // #[pyo3(get, set)] - // pub methods: Vec<()>, - // Dont think we need to distinguish direct (static + private) and virtual (all the other) methods - // + /// The direct (static, private or constructor) methods of the class + #[pyo3(get, set)] + pub direct_methods: Vec, + /// The virtual (ie non direct) methods of the class + #[pyo3(get, set)] + pub virtual_methods: Vec, + // Do we need to distinguish direct and virtual (all the other) methods? + // Maybe overlapping descriptor (same name, class and proto?) + // pub annotations: Option<()> // TODO - // pub data: Option<()> // TODO - // pub static_values: Option<()> // TODO - // TODO: mix annotation data and static values to make it more practical + // TODO: mix annotation data to fields / methods / class to make it more practicle } #[pymethods] @@ -81,6 +84,8 @@ impl Class { is_enum: false, static_fields: vec![], instance_fields: vec![], + direct_methods: vec![], + virtual_methods: vec![], } } diff --git a/androscalpel/src/dex_id.rs b/androscalpel/src/dex_id.rs index 89ebd7f..f709581 100644 --- a/androscalpel/src/dex_id.rs +++ b/androscalpel/src/dex_id.rs @@ -22,7 +22,7 @@ impl IdMethodType { #[new] pub fn new(return_type: IdType, parameters: Vec) -> Self { Self { - shorty: Self::get_shorty(&return_type, ¶meters), + shorty: Self::compute_shorty(&return_type, ¶meters), return_type, parameters, } @@ -43,12 +43,24 @@ impl IdMethodType { pub fn __repr__(&self) -> String { format!("DexMethodType({})", self.__str__()) } + + pub fn get_shorty(&self) -> DexString { + self.shorty.clone() + } + + pub fn get_return_type(&self) -> IdType { + self.return_type.clone() + } + + pub fn get_parameters(&self) -> Vec { + self.parameters.clone() + } } impl IdMethodType { /// Compute the format for the shorty as described in /// - pub fn get_shorty(return_type: &IdType, parameters: &[IdType]) -> DexString { + pub fn compute_shorty(return_type: &IdType, parameters: &[IdType]) -> DexString { let mut shorty: String = return_type.get_shorty().into(); for ty in parameters { let ty: String = ty.get_shorty().into(); @@ -346,8 +358,11 @@ impl IdField { #[pyclass] #[derive(Debug, Clone, PartialEq, Eq)] pub struct IdMethod { + #[pyo3(get, set)] pub class_: IdType, + #[pyo3(get, set)] pub proto: IdMethodType, + #[pyo3(get, set)] pub name: DexString, } diff --git a/androscalpel/src/method.rs b/androscalpel/src/method.rs index f5a9476..112562a 100644 --- a/androscalpel/src/method.rs +++ b/androscalpel/src/method.rs @@ -11,39 +11,77 @@ pub struct Method { /// The structure used to reference this method. #[pyo3(get, set)] pub descriptor: IdMethod, - /* TODO: ACCESS FLAG - /// The field visibility + /// The field visibility. #[pyo3(get, set)] - pub visibility: FieldVisibility, - /// If the field is defined for the class globally + pub visibility: MethodVisibility, + /// Static methods do not take this in argument. #[pyo3(get, set)] pub is_static: bool, - /// If the field is immutable after construction + /// Final methods are not averridable. #[pyo3(get, set)] pub is_final: bool, - /// For thread safety + /// Synchronized method automatically acquire their associated lock around call. + /// Can only be set in native method, (`[Self::is_native] = true`). #[pyo3(get, set)] - pub is_volatile: bool, - /// If the field should **not** be saved by default serialization + pub is_synchronized: bool, + /// Bridge are automatically added by the compiler as a type-safe bridge. #[pyo3(get, set)] - pub is_transient: bool, - /// If the field is not defined in the source code + pub is_bridge: bool, + /// If the last argument should be treated as a "rest" argument by compiler + /// (for method of variable number of argument). + #[pyo3(get, set)] + pub is_varargs: bool, + /// If the method is a native method. + #[pyo3(get, set)] + pub is_native: bool, + /// Abstract methods are not implemented by the class. + #[pyo3(get, set)] + pub is_abstract: bool, + /// If the method must use strict rules for floating point arithmetic. + #[pyo3(get, set)] + pub is_strictfp: bool, + /// Synthetic method are not directly defined in the source code. #[pyo3(get, set)] pub is_synthetic: bool, - /// If the field is an enumerated value + /// If the method is a constructor. #[pyo3(get, set)] - pub is_enum: bool, - */ + pub is_constructor: bool, + /// If the method is declared as synchronize (just indicatif) + #[pyo3(get, set)] + pub is_declared_syncrhonized: bool, + /// The code of the method pub code: (), } +/// Represent the visibility of a field +#[pyclass] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum MethodVisibility { + Public, + Private, + Protected, + None_, // Actually quite common +} + #[pymethods] impl Method { #[new] pub fn new(descriptor: IdMethod) -> Self { Self { descriptor, + visibility: MethodVisibility::Public, + is_static: false, + is_final: false, + is_synchronized: false, + is_bridge: false, + is_varargs: false, + is_native: false, + is_abstract: false, + is_strictfp: false, + is_synthetic: false, + is_constructor: false, + is_declared_syncrhonized: false, code: (), } }