From cdf68c506a66fae9a32890d61eedc14ae8a267f0 Mon Sep 17 00:00:00 2001 From: Jean-Marie Mineau Date: Mon, 2 Oct 2023 15:08:45 +0200 Subject: [PATCH] finish implementing IdMethodType --- androscalpel/src/apk.rs | 26 +++++----- androscalpel/src/dex_id.rs | 103 ++++++++++++++++++++++++++++++------- test.sh | 7 +++ 3 files changed, 105 insertions(+), 31 deletions(-) create mode 100755 test.sh diff --git a/androscalpel/src/apk.rs b/androscalpel/src/apk.rs index 24fea4a..9c73c1a 100644 --- a/androscalpel/src/apk.rs +++ b/androscalpel/src/apk.rs @@ -148,7 +148,7 @@ impl Apk { EncodedValue::MethodType(val) => { let proto = dex.get_proto_id(*val as usize)?; let shorty: DexString = dex.get_string(proto.shorty_idx)?.into(); - let return_type: DexType = DexType( + let return_type: IdType = IdType( dex.get_string( dex.get_type_id(proto.return_type_idx as usize)? .descriptor_idx, @@ -159,16 +159,16 @@ impl Apk { vec![] } else { let type_list = dex.get_struct_at_offset::(proto.parameters_off)?; - let mut parameters: Vec = vec![]; + let mut parameters: Vec = vec![]; for ty in type_list.list { - parameters.push(DexType( + parameters.push(IdType( dex.get_string(dex.get_type_id(ty.type_idx as usize)?.descriptor_idx)? .into(), )); } parameters }; - Ok(DexValue::MethodType(DexMethodType { + Ok(DexValue::MethodType(IdMethodType { shorty, return_type, parameters, @@ -177,29 +177,29 @@ impl Apk { // TODO: need method to be implemented first EncodedValue::MethodHandle(_val) => todo!(), //Ok(DexValue::MethodHandle(DexMethodHandle(*val))), EncodedValue::String(val) => Ok(DexValue::String(dex.get_string(*val)?.into())), - EncodedValue::Type(val) => Ok(DexValue::Type(DexType( + EncodedValue::Type(val) => Ok(DexValue::Type(IdType( dex.get_string(dex.get_type_id(*val as usize)?.descriptor_idx)? .into(), ))), EncodedValue::Field(val) => { let field = dex.get_field_id(*val as usize)?; let name = dex.get_string(field.name_idx)?.into(); - let type_ = DexType( + let type_ = IdType( dex.get_string(dex.get_type_id(field.type_idx as usize)?.descriptor_idx)? .into(), ); - let class_ = DexType( + let class_ = IdType( dex.get_string(dex.get_type_id(field.class_idx as usize)?.descriptor_idx)? .into(), ); - Ok(DexValue::Field(DexField { + Ok(DexValue::Field(IdField { name, type_, class_, })) } - EncodedValue::Method(val) => Ok(DexValue::Method(DexMethod(*val))), - EncodedValue::Enum(val) => Ok(DexValue::Enum(DexEnum(*val))), + EncodedValue::Method(val) => Ok(DexValue::Method(IdMethod(*val))), + EncodedValue::Enum(val) => Ok(DexValue::Enum(IdEnum(*val))), EncodedValue::Array(_val) => todo!(), //Ok(DexValue::Array(DexArray(*val))), EncodedValue::Annotation(_val) => todo!(), //Ok(DexValue::Annotation(DexAnnotation(*val))), EncodedValue::Null => Ok(DexValue::Null(DexNull)), @@ -246,10 +246,10 @@ impl Apk { ))); } }; - let descriptor = DexField { + let descriptor = IdField { name, - type_: DexType(ty), - class_: DexType(class), + type_: IdType(ty), + class_: IdType(class), }; fields.push(Field { descriptor, diff --git a/androscalpel/src/dex_id.rs b/androscalpel/src/dex_id.rs index a88db4d..5d26aaf 100644 --- a/androscalpel/src/dex_id.rs +++ b/androscalpel/src/dex_id.rs @@ -2,14 +2,15 @@ use pyo3::prelude::*; -use crate::DexString; +use crate::{DexString, Error, Result}; use androscalpel_serializer::{StringDataItem, Uleb128}; #[pyclass] #[derive(Debug, Clone, PartialEq, Eq)] pub struct IdMethodType { /// Type formated as described by - pub(crate) shorty: DexString, + pub(crate) shorty: DexString, // Redondant, but same as in the encoding, keep it in cas we ever + // need it pub(crate) return_type: IdType, pub(crate) parameters: Vec, } @@ -20,7 +21,6 @@ pub struct IdMethodType { impl IdMethodType { #[new] pub fn new(return_type: IdType, parameters: Vec) -> Self { - // TODO: check format Self { shorty: Self::get_shorty(&return_type, ¶meters), return_type, @@ -29,12 +29,19 @@ impl IdMethodType { } pub fn __str__(&self) -> String { - self.__repr__() + format!( + "({}){}", + self.parameters + .iter() + .map(|param| param.__str__()) + .collect::>() + .join(" "), + self.return_type.__str__() + ) } pub fn __repr__(&self) -> String { - let repr: String = (&self.shorty).into(); - format!("DexMethodType({repr})") + format!("DexMethodType({})", self.__str__()) } } @@ -42,7 +49,12 @@ impl IdMethodType { /// Compute the format for the shorty as described in /// pub fn get_shorty(return_type: &IdType, parameters: &[IdType]) -> DexString { - todo!() + let mut shorty: String = return_type.get_shorty().into(); + for ty in parameters { + let ty: String = ty.get_shorty().into(); + shorty.push_str(&ty); + } + shorty.into() } } @@ -74,14 +86,24 @@ impl DexMethodHandle { /// A type. /// Type represented by [`DexString`] that follow the TypeDescriptor format /// as described here +// Not a clean rust enum because we want to be compatible with python, and maybe support strange +// malware edge case? #[pyclass] #[derive(Debug, Clone, PartialEq, Eq)] pub struct IdType(pub(crate) DexString); #[pymethods] impl IdType { #[new] - pub fn _new(ty: DexString) -> Self { + pub fn _new(ty: DexString) -> Result { // TODO: check format + let ty = Self(ty); + ty.check_format()?; + Ok(ty) + } + + /// Return a type from its string representation without checking its format + #[staticmethod] + pub fn unchecked_new(ty: DexString) -> Self { Self(ty) } @@ -213,28 +235,28 @@ impl IdType { /// Check if the type is a class pub fn is_class(&self) -> bool { - self.0.get_utf16_size() == 0 - && self.0.get_bytes().is_empty() - && self.0.get_bytes()[0] != 0x76 - // Check if first char is an L + self.0.get_utf16_size() >= 2 + && !self.0.get_bytes().len() > 2 + && self.0.get_bytes()[0] == b'L' + && *self.0.get_bytes().last().unwrap() == b';' } /// Check if the type is an array pub fn is_array(&self) -> bool { - self.0.get_utf16_size() == 0 - && self.0.get_bytes().is_empty() - && self.0.get_bytes()[0] != 0x5b - // Check if first char is an [ + self.0.get_utf16_size() != 0 + && !self.0.get_bytes().is_empty() + && self.0.get_bytes()[0] == b'[' } /// If the type is a class, return the name of the class, /// else None. pub fn get_class_name(&self) -> Option { if self.is_class() { + let bytes = self.0.get_bytes(); Some( StringDataItem { - utf16_size: Uleb128(self.0.get_utf16_size() - 1), - data: self.0.get_bytes()[1..].to_vec(), + utf16_size: Uleb128(self.0.get_utf16_size() - 2), + data: bytes[1..bytes.len() - 1].to_vec(), } .into(), ) @@ -259,6 +281,51 @@ impl IdType { } } + /// Check if the type is a value representation. + /// They should not be any needs to use this method externaly? + pub fn check_format(&self) -> Result<()> { + if self.is_void() + || self.is_boolean() + || self.is_byte() + || self.is_short() + || self.is_char() + || self.is_int() + || self.is_long() + || self.is_float() + || self.is_double() + || self.is_class() + || self + .get_element_type() + .map(|elt| elt.check_format().is_ok()) + .unwrap_or(false) + { + Ok(()) + } else { + let format: String = (&self.0).into(); + Err(Error::InconsistantStruct(format!( + "{format} is not a valid type" + ))) + } + } + + /// Return the shorty repr of this type (ID: the type if it's a scalar, 'L' else) + pub fn get_shorty(&self) -> DexString { + if self.is_void() + || self.is_boolean() + || self.is_byte() + || self.is_short() + || self.is_char() + || self.is_int() + || self.is_long() + || self.is_float() + || self.is_double() + { + self.0.clone() + } else { + "L".into() + } + } + // TODO: TESTS } diff --git a/test.sh b/test.sh new file mode 100755 index 0000000..f6e7a28 --- /dev/null +++ b/test.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env sh +source venv_maturin/bin/activate + +cd androscalpel +maturin develop +cd .. +python test.py