store structs in hash map (WIP)

This commit is contained in:
Jean-Marie Mineau 2023-11-29 17:06:57 +01:00
parent 224d1efdba
commit 67efc6365d
Signed by: histausse
GPG key ID: B66AEEDA9B645AD2
5 changed files with 254 additions and 211 deletions

View file

@ -14,7 +14,7 @@ use androscalpel_serializer::*;
#[derive(Debug, Clone)]
pub struct Apk {
#[pyo3(get, set)]
pub classes: Vec<Class>,
pub classes: HashMap<IdType, Class>,
}
impl Apk {
@ -22,8 +22,8 @@ impl Apk {
pub fn add_dex_file(&mut self, data: &[u8]) -> Result<()> {
let dex = DexFileReader::new(data)?;
for class in dex.get_class_defs() {
self.classes
.push(self.get_class_from_dex_file(class, &dex)?);
let class = self.get_class_from_dex_file(class, &dex)?;
self.classes.insert(class.descriptor.clone(), class);
}
Ok(())
}
@ -34,17 +34,14 @@ impl Apk {
class_item: &ClassDefItem,
dex: &DexFileReader,
) -> Result<Class> {
let name_idx = dex
.get_type_id(class_item.class_idx as usize)?
.descriptor_idx;
let name: DexString = dex.get_string(name_idx)?.into();
let descriptor = Self::get_id_type_from_idx(class_item.class_idx as usize, dex)?;
let superclass = if class_item.superclass_idx == NO_INDEX.0 {
None
} else {
let superclass_idx = dex
.get_type_id(class_item.superclass_idx as usize)?
.descriptor_idx;
Some(dex.get_string(superclass_idx)?.into())
Some(Self::get_id_type_from_idx(
class_item.superclass_idx as usize,
dex,
)?)
};
let interfaces = if class_item.interfaces_off == 0 {
vec![]
@ -52,9 +49,7 @@ impl Apk {
let type_list = dex.get_struct_at_offset::<TypeList>(class_item.interfaces_off)?;
let mut list = vec![];
for ty in type_list.list {
let ty = dex.get_type_id(ty.type_idx as usize)?;
let ty = dex.get_string(ty.descriptor_idx)?.into();
list.push(ty);
list.push(Self::get_id_type_from_idx(ty.type_idx as usize, dex)?);
}
list
};
@ -82,7 +77,7 @@ impl Apk {
{
info!(
"Unexpected flags found in class_def_item.access_flags for {}: 0x{:x}",
<&DexString as Into<String>>::into(&name),
String::from(descriptor.get_name()),
class_item.access_flags
);
}
@ -100,43 +95,57 @@ impl Apk {
)?;
}
}
let mut static_fields = vec![];
let mut instance_fields = vec![];
let mut direct_methods = vec![];
let mut virtual_methods = vec![];
let mut static_fields_list = vec![];
let mut instance_fields_list = vec![];
let mut direct_methods = HashMap::new();
let mut virtual_methods = HashMap::new();
let data_off = class_item.class_data_off;
if data_off != 0 {
let data = dex.get_struct_at_offset::<ClassDataItem>(data_off)?;
static_fields = Self::get_field_list_from_encoded_field_list(&data.static_fields, dex)?;
instance_fields =
static_fields_list =
Self::get_field_list_from_encoded_field_list(&data.static_fields, dex)?;
instance_fields_list =
Self::get_field_list_from_encoded_field_list(&data.instance_fields, dex)?;
direct_methods =
Self::get_method_list_from_encoded_field_list(&data.direct_methods, dex)?;
virtual_methods =
Self::get_method_list_from_encoded_field_list(&data.virtual_methods, dex)?;
for method in Self::get_method_list_from_encoded_field_list(&data.direct_methods, dex)?
{
direct_methods.insert(method.descriptor.clone(), method);
}
for method in Self::get_method_list_from_encoded_field_list(&data.virtual_methods, dex)?
{
virtual_methods.insert(method.descriptor.clone(), method);
}
}
if class_item.static_values_off != 0 {
let values = dex
.get_struct_at_offset::<EncodedArray>(class_item.static_values_off)?
.values;
if values.len() > static_fields.len() {
if values.len() > static_fields_list.len() {
return Err(anyhow!(
"Inconsistant static_values array found in {}: \
|static_values| = {}, |static_fields| = {}, \
|static_values| should be <= |static_fields|",
<&DexString as Into<String>>::into(&name),
String::from(&descriptor.get_name()),
values.len(),
static_fields.len()
static_fields_list.len()
));
}
for (i, value) in values.iter().enumerate() {
static_fields[i].value = Some(Self::encoded_value_to_dex_value(value, dex)?);
static_fields_list[i].value = Some(Self::encoded_value_to_dex_value(value, dex)?);
}
for field in static_fields.iter_mut().skip(values.len()) {
for field in static_fields_list.iter_mut().skip(values.len()) {
field.value = None;
}
}
let mut static_fields = HashMap::new();
let mut instance_fields = HashMap::new();
for field in static_fields_list {
static_fields.insert(field.descriptor.clone(), field);
}
for field in instance_fields_list {
instance_fields.insert(field.descriptor.clone(), field);
}
if let Some(annotations_directory) = annotations_directory {
for field_annotation in annotations_directory.field_annotations {
let field_id =
@ -149,29 +158,22 @@ impl Apk {
} else {
vec![]
};
if field_id.class_.get_name() != name {
if field_id.class_ != descriptor {
info!(
"Annotation for field {} found in class {}, dropping it",
field_id.__str__(),
name.__str__(),
String::from(descriptor.get_name()),
);
}
let mut found = false;
for field in &mut instance_fields {
if field.descriptor == field_id {
field.annotations.append(&mut (annotations.clone())); // the clone is prob
// unnecessary
found = true;
}
}
for field in &mut static_fields {
if field.descriptor == field_id {
field.annotations.append(&mut (annotations.clone())); // the clone is prob
// unnecessary
found = true;
}
}
if !found {
instance_fields
.entry(field_id.clone())
.and_modify(|field| field.annotations = annotations.clone());
static_fields
.entry(field_id.clone())
.and_modify(|field| field.annotations = annotations.clone());
if instance_fields.get(&field_id).is_none()
&& static_fields.get(&field_id).is_none()
{
info!(
"Annotation found for field {} but could not find the field definition, dropping it",
field_id.__str__(),
@ -189,29 +191,22 @@ impl Apk {
} else {
vec![]
};
if method_id.class_.get_name() != name {
if method_id.class_ != descriptor {
info!(
"Annotation for method {} found in class {}, dropping it",
method_id.__str__(),
name.__str__(),
String::from(descriptor.get_name()),
);
}
let mut found = false;
for method in &mut direct_methods {
if method.descriptor == method_id {
method.annotations.append(&mut (annotations.clone())); // the clone is prob
// unnecessary
found = true;
}
}
for method in &mut virtual_methods {
if method.descriptor == method_id {
method.annotations.append(&mut (annotations.clone())); // the clone is prob
// unnecessary
found = true;
}
}
if !found {
direct_methods
.entry(method_id.clone())
.and_modify(|method| method.annotations = annotations.clone()); // TODO = or append?
virtual_methods
.entry(method_id.clone())
.and_modify(|method| method.annotations = annotations.clone()); // TODO = or append?
if direct_methods.get(&method_id).is_none()
&& virtual_methods.get(&method_id).is_none()
{
info!(
"Annotation found for method {} but could not find the method definition, dropping it",
method_id.__str__(),
@ -235,29 +230,22 @@ impl Apk {
annotations_list.push(vec![]);
}
}
if method_id.class_.get_name() != name {
if method_id.class_ != descriptor {
info!(
"Annotation for parameter of method {} found in class {}, dropping it",
method_id.__str__(),
name.__str__(),
String::from(descriptor.get_name()),
);
}
let mut found = false;
for method in &mut direct_methods {
if method.descriptor == method_id {
method.parameters_annotations = annotations_list.clone(); // the clone is prob
// unnecessary
found = true;
}
}
for method in &mut virtual_methods {
if method.descriptor == method_id {
method.parameters_annotations = annotations_list.clone(); // the clone is prob
// unnecessary
found = true;
}
}
if !found {
direct_methods
.entry(method_id.clone())
.and_modify(|method| method.parameters_annotations = annotations_list.clone());
virtual_methods
.entry(method_id.clone())
.and_modify(|method| method.parameters_annotations = annotations_list.clone());
if direct_methods.get(&method_id).is_none()
&& virtual_methods.get(&method_id).is_none()
{
info!(
"Annotation found for parameter of method {} but could not find the method definition, dropping it",
method_id.__str__(),
@ -266,7 +254,7 @@ impl Apk {
}
}
Ok(Class {
name,
descriptor,
superclass,
interfaces,
source_file,
@ -712,7 +700,9 @@ impl Apk {
impl Apk {
#[new]
fn new() -> Self {
Self { classes: vec![] }
Self {
classes: HashMap::new(),
}
}
#[pyo3(name = "add_dex_file")]

View file

@ -1,17 +1,19 @@
//! Representation of a class.
use std::collections::HashMap;
use pyo3::prelude::*;
use crate::{DexAnnotationItem, DexString, Field, Method};
use crate::{DexAnnotationItem, DexString, Field, IdField, IdMethod, IdType, Method, Result};
/// Represent an apk
#[pyclass]
#[derive(Debug, Clone)]
pub struct Class {
/// Type name, format described at
/// Type, format described at
/// <https://source.android.com/docs/core/runtime/dex-format#typedescriptor>
#[pyo3(get, set)]
pub name: DexString,
pub descriptor: IdType,
/// If the class is visible everywhere
#[pyo3(get, set)]
pub is_public: bool,
@ -36,12 +38,12 @@ pub struct Class {
/// Name of the superclass, format described at
/// <https://source.android.com/docs/core/runtime/dex-format#typedescriptor>
#[pyo3(get, set)]
pub superclass: Option<DexString>,
pub superclass: Option<IdType>,
/// List of the interfaces that class implement, format of the interfaces
/// name is discribed at
/// <https://source.android.com/docs/core/runtime/dex-format#typedescriptor>
#[pyo3(get, set)]
pub interfaces: Vec<DexString>,
pub interfaces: Vec<IdType>,
/// Name of the source file where this class is defined.
#[pyo3(get, set)]
pub source_file: Option<DexString>,
@ -49,16 +51,16 @@ pub struct Class {
// TODO: hash map?
/// The static fields
#[pyo3(get, set)]
pub static_fields: Vec<Field>,
pub static_fields: HashMap<IdField, Field>,
/// The instance fields
#[pyo3(get, set)]
pub instance_fields: Vec<Field>,
pub instance_fields: HashMap<IdField, Field>,
/// The direct (static, private or constructor) methods of the class
#[pyo3(get, set)]
pub direct_methods: Vec<Method>,
pub direct_methods: HashMap<IdMethod, Method>,
/// The virtual (ie non direct) methods of the class
#[pyo3(get, set)]
pub virtual_methods: Vec<Method>,
pub virtual_methods: HashMap<IdMethod, Method>,
// Do we need to distinguish direct and virtual (all the other) methods?
// Maybe overlapping descriptor (same name, class and proto?)
/// The annotation related to this class (note: this does not include the
@ -72,10 +74,10 @@ pub struct Class {
#[pymethods]
impl Class {
#[new]
pub fn new(name: DexString) -> Self {
Self {
name,
superclass: Some("Ljava/lang/Object;".into()),
pub fn new(name: DexString) -> Result<Self> {
Ok(Self {
descriptor: IdType::new(name)?,
superclass: Some(IdType::new("Ljava/lang/Object;".into())?),
interfaces: vec![],
source_file: None,
is_public: true,
@ -85,16 +87,16 @@ impl Class {
is_synthetic: false,
is_annotation: false,
is_enum: false,
static_fields: vec![],
instance_fields: vec![],
direct_methods: vec![],
virtual_methods: vec![],
static_fields: HashMap::new(),
instance_fields: HashMap::new(),
direct_methods: HashMap::new(),
virtual_methods: HashMap::new(),
annotations: vec![],
}
})
}
pub fn __str__(&self) -> String {
let name: String = (&self.name).into();
let name: String = (&self.descriptor.get_name()).into();
let file = if let Some(file) = &self.source_file {
let file: String = file.into();
format!(" defined in {file}\n")
@ -102,7 +104,7 @@ impl Class {
"".into()
};
let superclass = if let Some(spcl) = &self.superclass {
let spcl: String = spcl.into();
let spcl: String = spcl.get_name().into();
format!(" extends: {spcl}\n")
} else {
"".into()
@ -112,7 +114,7 @@ impl Class {
} else {
let mut interfaces: String = " implements:\n".into();
for it in &self.interfaces {
let it: String = it.into();
let it: String = it.get_name().into();
interfaces += &format!(" {it}\n");
}
interfaces
@ -122,7 +124,7 @@ impl Class {
}
pub fn __repr__(&self) -> String {
let name: String = (&self.name).into();
let name: String = (&self.descriptor.get_name()).into();
format!("Class({name})")
}
}

View file

@ -1,5 +1,8 @@
//! The class identifying dex structure.
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
use anyhow::anyhow;
use pyo3::prelude::*;
@ -7,10 +10,10 @@ use crate::{DexString, Result};
use androscalpel_serializer::{StringDataItem, Uleb128};
#[pyclass]
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct IdMethodType {
/// Type formated as described by <https://source.android.com/docs/core/runtime/dex-format#shortydescriptor>
pub(crate) shorty: DexString, // Redondant, but same as in the encoding, keep it in cas we ever
pub(crate) shorty: DexString, // Redondant, but same as in the encoding, keep it in case we ever
// need it
pub(crate) return_type: IdType,
pub(crate) parameters: Vec<IdType>,
@ -56,6 +59,12 @@ impl IdMethodType {
pub fn get_parameters(&self) -> Vec<IdType> {
self.parameters.clone()
}
fn __hash__(&self) -> u64 {
let mut hasher = DefaultHasher::new();
self.hash(&mut hasher);
hasher.finish()
}
}
impl IdMethodType {
@ -77,12 +86,14 @@ impl IdMethodType {
// Not a clean rust enum because we want to be compatible with python, and maybe support strange
// malware edge case?
#[pyclass]
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct IdType(pub(crate) DexString);
#[pymethods]
impl IdType {
#[new]
pub fn _new(ty: DexString) -> Result<Self> {
pub fn new(
#[pyo3(from_py_with = "crate::dex_string::as_dex_string")] ty: DexString,
) -> Result<Self> {
// TODO: check format
let ty = Self(ty);
ty.check_format()?;
@ -173,7 +184,7 @@ impl IdType {
pub fn __repr__(&self) -> String {
let name: String = (&self.0).into();
format!("DexType({name})")
format!("IdType({name})")
}
/// Check if the type is void (return type)
@ -312,11 +323,17 @@ impl IdType {
}
}
fn __hash__(&self) -> u64 {
let mut hasher = DefaultHasher::new();
self.hash(&mut hasher);
hasher.finish()
}
// TODO: TESTS
}
#[pyclass]
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct IdField {
/// The name of the field, format described at
/// <https://source.android.com/docs/core/runtime/dex-format#membername>
@ -333,7 +350,12 @@ pub struct IdField {
#[pymethods]
impl IdField {
#[new]
pub fn new(name: DexString, type_: IdType, class_: IdType) -> Self {
pub fn new(
#[pyo3(from_py_with = "crate::dex_string::as_dex_string")] name: DexString,
type_: IdType,
class_: IdType,
) -> Self {
// Todo: check that class_ is a class?
Self {
name,
type_,
@ -352,10 +374,16 @@ impl IdField {
let name: String = (&self.name).into();
format!("IdField({class}.{name})")
}
fn __hash__(&self) -> u64 {
let mut hasher = DefaultHasher::new();
self.hash(&mut hasher);
hasher.finish()
}
}
#[pyclass]
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct IdMethod {
#[pyo3(get, set)]
pub class_: IdType,
@ -394,6 +422,12 @@ impl IdMethod {
pub fn __repr__(&self) -> String {
format!("DexMethod({})", self.__str__())
}
fn __hash__(&self) -> u64 {
let mut hasher = DefaultHasher::new();
self.hash(&mut hasher);
hasher.finish()
}
}
#[pyclass]

View file

@ -0,0 +1,110 @@
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
use pyo3::class::basic::CompareOp;
use pyo3::exceptions::PyTypeError;
use pyo3::prelude::*;
#[pyclass]
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct DexString(pub androscalpel_serializer::StringDataItem);
impl From<DexString> for androscalpel_serializer::StringDataItem {
fn from(DexString(string): DexString) -> Self {
string
}
}
impl From<androscalpel_serializer::StringDataItem> for DexString {
fn from(string: androscalpel_serializer::StringDataItem) -> Self {
Self(string)
}
}
impl From<&DexString> for String {
fn from(DexString(string): &DexString) -> Self {
string
.try_into()
.unwrap_or(format!("InvalidEncoding:{:x?}", string.data))
}
}
impl From<DexString> for String {
fn from(string: DexString) -> Self {
(&string).into()
}
}
impl From<&str> for DexString {
fn from(string: &str) -> Self {
Self(string.into())
}
}
impl From<String> for DexString {
fn from(string: String) -> Self {
Self(string.as_str().into())
}
}
impl Hash for DexString {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.get_utf16_size().hash(state);
self.get_bytes().hash(state);
}
}
pub fn as_dex_string(obj: &PyAny) -> PyResult<DexString> {
if let Ok(string) = DexString::extract(obj) {
Ok(string)
} else if let Ok(string) = String::extract(obj) {
Ok(string.into())
} else {
Err(PyErr::new::<PyTypeError, _>(format!(
"{} cannot be converted to a DexString",
obj.repr()?
)))
}
}
#[pymethods]
impl DexString {
#[new]
pub fn new(s: &str) -> Self {
s.into()
}
/// Return the binary mutf-8 encoded string (minus the trailling 0)
pub fn get_bytes(&self) -> &[u8] {
&self.0.data
}
/// Return the 'utf-16' size of the string (number of unicode code point, ie its lenght in 'java-land')
pub fn get_utf16_size(&self) -> u32 {
self.0.utf16_size.0
}
pub fn __str__(&self) -> String {
self.into()
}
fn __richcmp__(&self, other: &PyAny, op: CompareOp, py: Python<'_>) -> PyResult<PyObject> {
let other: Self = other
.extract()
.or(<String as FromPyObject>::extract(other).map(|string| string.into()))?;
match op {
CompareOp::Eq => Ok((self == &other).into_py(py)),
_ => Ok(py.NotImplemented()),
}
}
pub fn __repr__(&self) -> String {
self.into()
}
fn __hash__(&self) -> u64 {
let mut hasher = DefaultHasher::new();
self.hash(&mut hasher);
hasher.finish()
}
}

View file

@ -1,15 +1,12 @@
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
use anyhow::Result;
use pyo3::class::basic::CompareOp;
use pyo3::prelude::*;
pub mod annotation;
pub mod apk;
pub mod class;
pub mod dex_id;
pub mod dex_string;
pub mod field;
pub mod method;
pub mod method_handle;
@ -20,103 +17,13 @@ pub use annotation::*;
pub use apk::*;
pub use class::*;
pub use dex_id::*;
pub use dex_string::*;
pub use field::*;
pub use method::*;
pub use method_handle::*;
pub use scalar::*;
pub use value::*;
#[pyclass]
#[derive(Clone, PartialEq, Eq, Debug)]
pub struct DexString(androscalpel_serializer::StringDataItem);
impl From<DexString> for androscalpel_serializer::StringDataItem {
fn from(DexString(string): DexString) -> Self {
string
}
}
impl From<androscalpel_serializer::StringDataItem> for DexString {
fn from(string: androscalpel_serializer::StringDataItem) -> Self {
Self(string)
}
}
impl From<&DexString> for String {
fn from(DexString(string): &DexString) -> Self {
string
.try_into()
.unwrap_or(format!("InvalidEncoding:{:x?}", string.data))
}
}
impl From<DexString> for String {
fn from(string: DexString) -> Self {
(&string).into()
}
}
impl From<&str> for DexString {
fn from(string: &str) -> Self {
Self(string.into())
}
}
impl From<String> for DexString {
fn from(string: String) -> Self {
Self(string.as_str().into())
}
}
impl Hash for DexString {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.get_utf16_size().hash(state);
self.get_bytes().hash(state);
}
}
#[pymethods]
impl DexString {
#[new]
pub fn new(s: &str) -> Self {
s.into()
}
/// Return the binary mutf-8 encoded string (minus the trailling 0)
pub fn get_bytes(&self) -> &[u8] {
&self.0.data
}
/// Return the 'utf-16' size of the string (number of unicode code point, ie its lenght in 'java-land')
pub fn get_utf16_size(&self) -> u32 {
self.0.utf16_size.0
}
pub fn __str__(&self) -> String {
self.into()
}
fn __richcmp__(&self, other: &PyAny, op: CompareOp, py: Python<'_>) -> PyResult<PyObject> {
let other: Self = other
.extract()
.or(<String as FromPyObject>::extract(other).map(|string| string.into()))?;
match op {
CompareOp::Eq => Ok((self == &other).into_py(py)),
_ => Ok(py.NotImplemented()),
}
}
pub fn __repr__(&self) -> String {
self.into()
}
fn __hash__(&self) -> u64 {
let mut hasher = DefaultHasher::new();
self.hash(&mut hasher);
hasher.finish()
}
}
/// Androscalpel.
#[pymodule]
fn androscalpel(_py: Python, m: &PyModule) -> PyResult<()> {