clean json format

This commit is contained in:
Jean-Marie Mineau 2024-04-26 13:30:19 +02:00
parent c27a047f89
commit fb2ba61d21
Signed by: histausse
GPG key ID: B66AEEDA9B645AD2
6 changed files with 990 additions and 325 deletions

View file

@ -8,7 +8,6 @@ use log::info;
use pyo3::prelude::*; use pyo3::prelude::*;
use pyo3::types::PyBytes; use pyo3::types::PyBytes;
use crate::hashmap_vectorize;
use crate::ins::CallSite; use crate::ins::CallSite;
use crate::instructions; use crate::instructions;
use crate::Result; use crate::Result;
@ -22,7 +21,6 @@ use rayon::prelude::*;
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Default)] #[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Default)]
pub struct Apk { pub struct Apk {
#[pyo3(get)] #[pyo3(get)]
#[serde(with = "hashmap_vectorize")]
pub classes: HashMap<IdType, Class>, pub classes: HashMap<IdType, Class>,
#[pyo3(get)] #[pyo3(get)]
pub not_referenced_strings: HashSet<DexString>, pub not_referenced_strings: HashSet<DexString>,

View file

@ -5,7 +5,6 @@ use std::collections::{HashMap, HashSet};
use pyo3::prelude::*; use pyo3::prelude::*;
use crate::hashmap_vectorize;
use crate::{ use crate::{
DexAnnotationItem, DexString, Field, IdField, IdMethod, IdMethodType, IdType, Method, DexAnnotationItem, DexString, Field, IdField, IdMethod, IdMethodType, IdType, Method,
MethodHandle, Result, MethodHandle, Result,
@ -56,19 +55,15 @@ pub struct Class {
/// The static fields /// The static fields
#[pyo3(get)] #[pyo3(get)]
#[serde(with = "hashmap_vectorize")]
pub static_fields: HashMap<IdField, Field>, pub static_fields: HashMap<IdField, Field>,
/// The instance fields /// The instance fields
#[pyo3(get)] #[pyo3(get)]
#[serde(with = "hashmap_vectorize")]
pub instance_fields: HashMap<IdField, Field>, pub instance_fields: HashMap<IdField, Field>,
/// The direct (static, private or constructor) methods of the class /// The direct (static, private or constructor) methods of the class
#[pyo3(get)] #[pyo3(get)]
#[serde(with = "hashmap_vectorize")]
pub direct_methods: HashMap<IdMethod, Method>, pub direct_methods: HashMap<IdMethod, Method>,
/// The virtual (ie non direct) methods of the class /// The virtual (ie non direct) methods of the class
#[pyo3(get)] #[pyo3(get)]
#[serde(with = "hashmap_vectorize")]
pub virtual_methods: HashMap<IdMethod, Method>, pub virtual_methods: HashMap<IdMethod, Method>,
// Do we need to distinguish direct and virtual (all the other) methods? // Do we need to distinguish direct and virtual (all the other) methods?
// Maybe overlapping descriptor (same name, class and proto?) // Maybe overlapping descriptor (same name, class and proto?)

View file

@ -16,7 +16,7 @@ use androscalpel_serializer::{StringDataItem, Uleb128};
/// The type of a method. The shorty is formated as described in /// The type of a method. The shorty is formated as described in
/// <https://source.android.com/docs/core/runtime/dex-format#shortydescriptor> /// <https://source.android.com/docs/core/runtime/dex-format#shortydescriptor>
#[pyclass] #[pyclass]
#[derive(Debug, Clone, PartialEq, Eq, Hash, Deserialize, Serialize)] #[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct IdMethodType { pub struct IdMethodType {
/// Type formated as described by <https://source.android.com/docs/core/runtime/dex-format#shortydescriptor> /// Type formated as described by <https://source.android.com/docs/core/runtime/dex-format#shortydescriptor>
pub(crate) shorty: DexString, // Redondant, but same as in the encoding, keep it in case we ever pub(crate) shorty: DexString, // Redondant, but same as in the encoding, keep it in case we ever
@ -180,6 +180,25 @@ impl IdMethodType {
} }
} }
impl SmaliName for IdMethodType {
/// Convert a descriptor to its smali representation.
fn try_to_smali(&self) -> Result<String> {
Ok(format!(
"({}){}",
self.parameters
.iter()
.map(|param| param.try_to_smali())
.collect::<Result<Vec<String>>>()?
.join(""),
self.return_type.try_to_smali()?
))
}
/// Convert a smali representation to its descriptor.
fn try_from_smali(smali: &str) -> Result<Self> {
Self::from_smali(smali)
}
}
impl IdMethodType { impl IdMethodType {
/// Compute the format for the shorty as described in /// Compute the format for the shorty as described in
/// <https://source.android.com/docs/core/runtime/dex-format#shortydescriptor> /// <https://source.android.com/docs/core/runtime/dex-format#shortydescriptor>
@ -199,7 +218,7 @@ impl IdMethodType {
// Not a clean rust enum because we want to be compatible with python, and maybe support strange // Not a clean rust enum because we want to be compatible with python, and maybe support strange
// malware edge case? // malware edge case?
#[pyclass] #[pyclass]
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Deserialize, Serialize)] #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct IdType(pub(crate) DexString); pub struct IdType(pub(crate) DexString);
#[pymethods] #[pymethods]
impl IdType { impl IdType {
@ -591,8 +610,20 @@ impl IdType {
// TODO: TESTS // TODO: TESTS
} }
impl SmaliName for IdType {
/// Convert a descriptor to its smali representation.
fn try_to_smali(&self) -> Result<String> {
let r = (&self.0 .0).try_into()?; // Anyhow conversion stuff
Ok(r)
}
/// Convert a smali representation to its descriptor.
fn try_from_smali(smali: &str) -> Result<Self> {
Self::from_smali(smali)
}
}
#[pyclass] #[pyclass]
#[derive(Debug, Clone, PartialEq, Eq, Hash, Deserialize, Serialize)] #[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct IdField { pub struct IdField {
/// The name of the field, format described at /// The name of the field, format described at
/// <https://source.android.com/docs/core/runtime/dex-format#membername> /// <https://source.android.com/docs/core/runtime/dex-format#membername>
@ -735,9 +766,23 @@ impl PartialOrd for IdField {
} }
} }
impl SmaliName for IdField {
/// Convert a descriptor to its smali representation.
fn try_to_smali(&self) -> Result<String> {
let class: String = self.class_.try_to_smali()?;
let name: String = (&self.name.0).try_into()?;
let ty: String = self.type_.try_to_smali()?;
Ok(format!("{class}->{name}:{ty}"))
}
/// Convert a smali representation to its descriptor.
fn try_from_smali(smali: &str) -> Result<Self> {
Self::from_smali(smali)
}
}
/// The Id of a method. /// The Id of a method.
#[pyclass] #[pyclass]
#[derive(Debug, Clone, PartialEq, Eq, Hash, Deserialize, Serialize)] #[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct IdMethod { pub struct IdMethod {
/// The class containing the method. /// The class containing the method.
#[pyo3(get)] #[pyo3(get)]
@ -906,8 +951,25 @@ impl PartialOrd for IdMethod {
} }
} }
impl SmaliName for IdMethod {
/// Convert a descriptor to its smali representation.
fn try_to_smali(&self) -> Result<String> {
let name: String = (&self.name.0).try_into()?;
Ok(format!(
"{}->{}{}",
self.class_.try_to_smali()?,
name,
self.proto.try_to_smali()?,
))
}
/// Convert a smali representation to its descriptor.
fn try_from_smali(smali: &str) -> Result<Self> {
Self::from_smali(smali)
}
}
#[pyclass] #[pyclass]
#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Ord, PartialOrd)] #[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd)]
pub struct IdEnum(pub IdField); pub struct IdEnum(pub IdField);
#[pymethods] #[pymethods]
@ -957,3 +1019,61 @@ impl IdEnum {
op.matches(self.cmp(other)) op.matches(self.cmp(other))
} }
} }
// Not to sure about this one
impl SmaliName for IdEnum {
/// Convert a descriptor to its smali representation.
fn try_to_smali(&self) -> Result<String> {
self.0.try_to_smali()
}
/// Convert a smali representation to its descriptor.
fn try_from_smali(smali: &str) -> Result<Self> {
Ok(Self(IdField::from_smali(smali)?))
}
}
pub trait SmaliName: Sized {
/// Convert a descriptor to its smali representation.
fn try_to_smali(&self) -> Result<String>;
/// Convert a smali representation to its descriptor.
fn try_from_smali(smali: &str) -> Result<Self>;
}
macro_rules! serde_serialize_to_smali {
// This macro takes an argument of designator `ident` and
// implement Serialize and Deserialize for the type, assuming
// it implement SmaliName.
($type_name:ident) => {
impl serde::Serialize for $type_name {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
Serialize::serialize(
&self
.try_to_smali()
.expect(&format!("Failed to convert {} to smali", self.__str__())),
serializer,
)
}
}
impl<'de> serde::Deserialize<'de> for $type_name {
fn deserialize<D>(deserializer: D) -> Result<$type_name, D::Error>
where
D: serde::Deserializer<'de>,
{
<String as Deserialize>::deserialize(deserializer).map(|string| {
$type_name::try_from_smali(&string)
.expect(&format!("Failed to convert {string} as smali"))
})
}
}
};
}
serde_serialize_to_smali!(IdMethodType);
serde_serialize_to_smali!(IdType);
serde_serialize_to_smali!(IdMethod);
serde_serialize_to_smali!(IdField);
serde_serialize_to_smali!(IdEnum);

View file

@ -4,6 +4,7 @@ use pyo3::prelude::*;
use pyo3::types::PyBytes; use pyo3::types::PyBytes;
use std::collections::HashSet; use std::collections::HashSet;
use std::fs::File;
use std::io::{Cursor, Seek, SeekFrom}; use std::io::{Cursor, Seek, SeekFrom};
use std::path::PathBuf; use std::path::PathBuf;
@ -48,7 +49,7 @@ pub fn sleb128_to_int(b: &[u8]) -> Result<i32> {
Ok(Sleb128::deserialize_from_slice(b)?.0) Ok(Sleb128::deserialize_from_slice(b)?.0)
} }
// TODO: list_defined_classes, is_dex, is_zip take &[u8], but should allow to also read from file // TODO: list_defined_classes, is_dex, is_zip take only &[u8] or file, but should allow to also read from both
/// List all classes defined in a dex file. /// List all classes defined in a dex file.
#[pyfunction] #[pyfunction]
@ -62,8 +63,12 @@ pub fn list_defined_classes(dex: &[u8]) -> Result<HashSet<IdType>> {
/// Test if a file is as .dex file an return the dex version if it is, else return None. /// Test if a file is as .dex file an return the dex version if it is, else return None.
#[pyfunction] #[pyfunction]
pub fn is_dex(file: &[u8]) -> Option<usize> { pub fn is_dex(file: PathBuf) -> Option<usize> {
HeaderItem::deserialize_from_slice(file) let mut file = match File::open(file) {
Ok(file) => file,
Err(_) => return None,
};
HeaderItem::deserialize(&mut file)
.ok() .ok()
.and_then(|header| String::from_utf8(header.magic.version.to_vec()).ok()) .and_then(|header| String::from_utf8(header.magic.version.to_vec()).ok())
.and_then(|version| version.parse::<usize>().ok()) .and_then(|version| version.parse::<usize>().ok())
@ -71,8 +76,11 @@ pub fn is_dex(file: &[u8]) -> Option<usize> {
/// Test if a file is a zip file. /// Test if a file is a zip file.
#[pyfunction] #[pyfunction]
pub fn is_zip(file: &[u8]) -> bool { pub fn is_zip(file: PathBuf) -> bool {
let mut file = Cursor::new(file); let mut file = match File::open(file) {
Ok(file) => file,
Err(_) => return false,
};
let ecd_off = if let Some(off) = ZipFileReader::get_end_of_central_directory_offset(&mut file) { let ecd_off = if let Some(off) = ZipFileReader::get_end_of_central_directory_offset(&mut file) {
off off
} else { } else {
@ -117,6 +125,9 @@ pub(crate) fn export_module(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(uleb128_to_int, m)?)?; m.add_function(wrap_pyfunction!(uleb128_to_int, m)?)?;
m.add_function(wrap_pyfunction!(uleb128p1_to_int, m)?)?; m.add_function(wrap_pyfunction!(uleb128p1_to_int, m)?)?;
m.add_function(wrap_pyfunction!(sleb128_to_int, m)?)?; m.add_function(wrap_pyfunction!(sleb128_to_int, m)?)?;
m.add_function(wrap_pyfunction!(list_defined_classes, m)?)?;
m.add_function(wrap_pyfunction!(is_dex, m)?)?;
m.add_function(wrap_pyfunction!(is_zip, m)?)?;
m.add_function(wrap_pyfunction!(replace_dex, m)?)?; m.add_function(wrap_pyfunction!(replace_dex, m)?)?;
Ok(()) Ok(())
} }

File diff suppressed because one or more lines are too long

View file

@ -1,7 +1,5 @@
{ {
"descriptor": { "descriptor": "Lcom/example/testclassloader/TestB;",
"String": "Lcom/example/testclassloader/TestB;"
},
"is_public": true, "is_public": true,
"is_final": false, "is_final": false,
"is_interface": false, "is_interface": false,
@ -9,316 +7,188 @@
"is_synthetic": false, "is_synthetic": false,
"is_annotation": false, "is_annotation": false,
"is_enum": false, "is_enum": false,
"superclass": { "superclass": "Ljava/lang/Object;",
"String": "Ljava/lang/Object;"
},
"interfaces": [], "interfaces": [],
"source_file": { "source_file": {
"String": "TestB.java" "String": "TestB.java"
}, },
"static_fields": [], "static_fields": {},
"instance_fields": [], "instance_fields": {},
"direct_methods": [ "direct_methods": {
[ "Lcom/example/testclassloader/TestB;-><init>()V": {
{ "descriptor": "Lcom/example/testclassloader/TestB;-><init>()V",
"class_": { "visibility": "Public",
"String": "Lcom/example/testclassloader/TestB;" "is_static": false,
}, "is_final": false,
"proto": { "is_synchronized": false,
"shorty": { "is_bridge": false,
"String": "V" "is_varargs": false,
}, "is_native": false,
"return_type": { "is_abstract": false,
"String": "V" "is_strictfp": false,
}, "is_synthetic": false,
"parameters": [] "is_constructor": true,
}, "is_declared_syncrhonized": false,
"name": { "annotations": [],
"String": "<init>" "parameters_annotations": [],
} "code": {
}, "registers_size": 1,
{ "ins_size": 1,
"descriptor": { "outs_size": 1,
"class_": { "debug_info": [
"String": "Lcom/example/testclassloader/TestB;" 3,
}, [
"proto": { 14,
"shorty": { 0
"String": "V"
},
"return_type": {
"String": "V"
},
"parameters": []
},
"name": {
"String": "<init>"
}
},
"visibility": "Public",
"is_static": false,
"is_final": false,
"is_synchronized": false,
"is_bridge": false,
"is_varargs": false,
"is_native": false,
"is_abstract": false,
"is_strictfp": false,
"is_synthetic": false,
"is_constructor": true,
"is_declared_syncrhonized": false,
"annotations": [],
"parameters_annotations": [],
"code": {
"registers_size": 1,
"ins_size": 1,
"outs_size": 1,
"debug_info": [
3,
[
14,
0
]
],
"parameter_names": [],
"insns": [
{
"Label": {
"name": "label_00000000"
}
},
{
"InvokeDirect": {
"method": {
"class_": {
"String": "Ljava/lang/Object;"
},
"proto": {
"shorty": {
"String": "V"
},
"return_type": {
"String": "V"
},
"parameters": []
},
"name": {
"String": "<init>"
}
},
"args": [
0
]
}
},
{
"Label": {
"name": "label_00000003"
}
},
{
"ReturnVoid": null
}
] ]
} ],
} "parameter_names": [],
] "insns": [
], {
"virtual_methods": [ "Label": {
[ "name": "label_00000000"
{
"class_": {
"String": "Lcom/example/testclassloader/TestB;"
},
"proto": {
"shorty": {
"String": "L"
},
"return_type": {
"String": "Ljava/lang/String;"
},
"parameters": []
},
"name": {
"String": "val"
}
},
{
"descriptor": {
"class_": {
"String": "Lcom/example/testclassloader/TestB;"
},
"proto": {
"shorty": {
"String": "L"
},
"return_type": {
"String": "Ljava/lang/String;"
},
"parameters": []
},
"name": {
"String": "val"
}
},
"visibility": "Public",
"is_static": false,
"is_final": false,
"is_synchronized": false,
"is_bridge": false,
"is_varargs": false,
"is_native": false,
"is_abstract": false,
"is_strictfp": false,
"is_synthetic": false,
"is_constructor": false,
"is_declared_syncrhonized": false,
"annotations": [],
"parameters_annotations": [],
"code": {
"registers_size": 2,
"ins_size": 1,
"outs_size": 1,
"debug_info": [
6,
[
14,
0
]
],
"parameter_names": [],
"insns": [
{
"Label": {
"name": "label_00000000"
}
},
{
"InvokeVirtual": {
"method": {
"class_": {
"String": "Ljava/lang/Object;"
},
"proto": {
"shorty": {
"String": "L"
},
"return_type": {
"String": "Ljava/lang/Class;"
},
"parameters": []
},
"name": {
"String": "getClass"
}
},
"args": [
1
]
}
},
{
"Label": {
"name": "label_00000003"
}
},
{
"MoveResultObject": {
"to": 0
}
},
{
"Label": {
"name": "label_00000004"
}
},
{
"InvokeVirtual": {
"method": {
"class_": {
"String": "Ljava/lang/Class;"
},
"proto": {
"shorty": {
"String": "L"
},
"return_type": {
"String": "Ljava/lang/ClassLoader;"
},
"parameters": []
},
"name": {
"String": "getClassLoader"
}
},
"args": [
0
]
}
},
{
"Label": {
"name": "label_00000007"
}
},
{
"MoveResultObject": {
"to": 0
}
},
{
"Label": {
"name": "label_00000008"
}
},
{
"InvokeVirtual": {
"method": {
"class_": {
"String": "Ljava/lang/Object;"
},
"proto": {
"shorty": {
"String": "L"
},
"return_type": {
"String": "Ljava/lang/String;"
},
"parameters": []
},
"name": {
"String": "toString"
}
},
"args": [
0
]
}
},
{
"Label": {
"name": "label_0000000B"
}
},
{
"MoveResultObject": {
"to": 0
}
},
{
"Label": {
"name": "label_0000000C"
}
},
{
"ReturnObject": {
"reg": 0
}
} }
] },
} {
"InvokeDirect": {
"method": "Ljava/lang/Object;-><init>()V",
"args": [
0
]
}
},
{
"Label": {
"name": "label_00000003"
}
},
{
"ReturnVoid": null
}
]
} }
] }
], },
"virtual_methods": {
"Lcom/example/testclassloader/TestB;->val()Ljava/lang/String;": {
"descriptor": "Lcom/example/testclassloader/TestB;->val()Ljava/lang/String;",
"visibility": "Public",
"is_static": false,
"is_final": false,
"is_synchronized": false,
"is_bridge": false,
"is_varargs": false,
"is_native": false,
"is_abstract": false,
"is_strictfp": false,
"is_synthetic": false,
"is_constructor": false,
"is_declared_syncrhonized": false,
"annotations": [],
"parameters_annotations": [],
"code": {
"registers_size": 2,
"ins_size": 1,
"outs_size": 1,
"debug_info": [
6,
[
14,
0
]
],
"parameter_names": [],
"insns": [
{
"Label": {
"name": "label_00000000"
}
},
{
"InvokeVirtual": {
"method": "Ljava/lang/Object;->getClass()Ljava/lang/Class;",
"args": [
1
]
}
},
{
"Label": {
"name": "label_00000003"
}
},
{
"MoveResultObject": {
"to": 0
}
},
{
"Label": {
"name": "label_00000004"
}
},
{
"InvokeVirtual": {
"method": "Ljava/lang/Class;->getClassLoader()Ljava/lang/ClassLoader;",
"args": [
0
]
}
},
{
"Label": {
"name": "label_00000007"
}
},
{
"MoveResultObject": {
"to": 0
}
},
{
"Label": {
"name": "label_00000008"
}
},
{
"InvokeVirtual": {
"method": "Ljava/lang/Object;->toString()Ljava/lang/String;",
"args": [
0
]
}
},
{
"Label": {
"name": "label_0000000B"
}
},
{
"MoveResultObject": {
"to": 0
}
},
{
"Label": {
"name": "label_0000000C"
}
},
{
"ReturnObject": {
"reg": 0
}
},
{
"Label": {
"name": "label_0000000D"
}
},
{
"Nop": null
}
]
}
}
},
"annotations": [] "annotations": []
} }