clean json format

This commit is contained in:
Jean-Marie Mineau 2024-04-26 13:30:19 +02:00
parent c27a047f89
commit fb2ba61d21
Signed by: histausse
GPG key ID: B66AEEDA9B645AD2
6 changed files with 990 additions and 325 deletions

View file

@ -8,7 +8,6 @@ use log::info;
use pyo3::prelude::*;
use pyo3::types::PyBytes;
use crate::hashmap_vectorize;
use crate::ins::CallSite;
use crate::instructions;
use crate::Result;
@ -22,7 +21,6 @@ use rayon::prelude::*;
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Default)]
pub struct Apk {
#[pyo3(get)]
#[serde(with = "hashmap_vectorize")]
pub classes: HashMap<IdType, Class>,
#[pyo3(get)]
pub not_referenced_strings: HashSet<DexString>,

View file

@ -5,7 +5,6 @@ use std::collections::{HashMap, HashSet};
use pyo3::prelude::*;
use crate::hashmap_vectorize;
use crate::{
DexAnnotationItem, DexString, Field, IdField, IdMethod, IdMethodType, IdType, Method,
MethodHandle, Result,
@ -56,19 +55,15 @@ pub struct Class {
/// The static fields
#[pyo3(get)]
#[serde(with = "hashmap_vectorize")]
pub static_fields: HashMap<IdField, Field>,
/// The instance fields
#[pyo3(get)]
#[serde(with = "hashmap_vectorize")]
pub instance_fields: HashMap<IdField, Field>,
/// The direct (static, private or constructor) methods of the class
#[pyo3(get)]
#[serde(with = "hashmap_vectorize")]
pub direct_methods: HashMap<IdMethod, Method>,
/// The virtual (ie non direct) methods of the class
#[pyo3(get)]
#[serde(with = "hashmap_vectorize")]
pub virtual_methods: HashMap<IdMethod, Method>,
// Do we need to distinguish direct and virtual (all the other) methods?
// Maybe overlapping descriptor (same name, class and proto?)

View file

@ -16,7 +16,7 @@ use androscalpel_serializer::{StringDataItem, Uleb128};
/// The type of a method. The shorty is formated as described in
/// <https://source.android.com/docs/core/runtime/dex-format#shortydescriptor>
#[pyclass]
#[derive(Debug, Clone, PartialEq, Eq, Hash, Deserialize, Serialize)]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct IdMethodType {
/// Type formated as described by <https://source.android.com/docs/core/runtime/dex-format#shortydescriptor>
pub(crate) shorty: DexString, // Redondant, but same as in the encoding, keep it in case we ever
@ -180,6 +180,25 @@ impl IdMethodType {
}
}
impl SmaliName for IdMethodType {
/// Convert a descriptor to its smali representation.
fn try_to_smali(&self) -> Result<String> {
Ok(format!(
"({}){}",
self.parameters
.iter()
.map(|param| param.try_to_smali())
.collect::<Result<Vec<String>>>()?
.join(""),
self.return_type.try_to_smali()?
))
}
/// Convert a smali representation to its descriptor.
fn try_from_smali(smali: &str) -> Result<Self> {
Self::from_smali(smali)
}
}
impl IdMethodType {
/// Compute the format for the shorty as described in
/// <https://source.android.com/docs/core/runtime/dex-format#shortydescriptor>
@ -199,7 +218,7 @@ impl IdMethodType {
// Not a clean rust enum because we want to be compatible with python, and maybe support strange
// malware edge case?
#[pyclass]
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Deserialize, Serialize)]
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct IdType(pub(crate) DexString);
#[pymethods]
impl IdType {
@ -591,8 +610,20 @@ impl IdType {
// TODO: TESTS
}
impl SmaliName for IdType {
/// Convert a descriptor to its smali representation.
fn try_to_smali(&self) -> Result<String> {
let r = (&self.0 .0).try_into()?; // Anyhow conversion stuff
Ok(r)
}
/// Convert a smali representation to its descriptor.
fn try_from_smali(smali: &str) -> Result<Self> {
Self::from_smali(smali)
}
}
#[pyclass]
#[derive(Debug, Clone, PartialEq, Eq, Hash, Deserialize, Serialize)]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct IdField {
/// The name of the field, format described at
/// <https://source.android.com/docs/core/runtime/dex-format#membername>
@ -735,9 +766,23 @@ impl PartialOrd for IdField {
}
}
impl SmaliName for IdField {
/// Convert a descriptor to its smali representation.
fn try_to_smali(&self) -> Result<String> {
let class: String = self.class_.try_to_smali()?;
let name: String = (&self.name.0).try_into()?;
let ty: String = self.type_.try_to_smali()?;
Ok(format!("{class}->{name}:{ty}"))
}
/// Convert a smali representation to its descriptor.
fn try_from_smali(smali: &str) -> Result<Self> {
Self::from_smali(smali)
}
}
/// The Id of a method.
#[pyclass]
#[derive(Debug, Clone, PartialEq, Eq, Hash, Deserialize, Serialize)]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct IdMethod {
/// The class containing the method.
#[pyo3(get)]
@ -906,8 +951,25 @@ impl PartialOrd for IdMethod {
}
}
impl SmaliName for IdMethod {
/// Convert a descriptor to its smali representation.
fn try_to_smali(&self) -> Result<String> {
let name: String = (&self.name.0).try_into()?;
Ok(format!(
"{}->{}{}",
self.class_.try_to_smali()?,
name,
self.proto.try_to_smali()?,
))
}
/// Convert a smali representation to its descriptor.
fn try_from_smali(smali: &str) -> Result<Self> {
Self::from_smali(smali)
}
}
#[pyclass]
#[derive(Debug, Clone, PartialEq, Eq, Deserialize, Serialize, Ord, PartialOrd)]
#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd)]
pub struct IdEnum(pub IdField);
#[pymethods]
@ -957,3 +1019,61 @@ impl IdEnum {
op.matches(self.cmp(other))
}
}
// Not to sure about this one
impl SmaliName for IdEnum {
/// Convert a descriptor to its smali representation.
fn try_to_smali(&self) -> Result<String> {
self.0.try_to_smali()
}
/// Convert a smali representation to its descriptor.
fn try_from_smali(smali: &str) -> Result<Self> {
Ok(Self(IdField::from_smali(smali)?))
}
}
pub trait SmaliName: Sized {
/// Convert a descriptor to its smali representation.
fn try_to_smali(&self) -> Result<String>;
/// Convert a smali representation to its descriptor.
fn try_from_smali(smali: &str) -> Result<Self>;
}
macro_rules! serde_serialize_to_smali {
// This macro takes an argument of designator `ident` and
// implement Serialize and Deserialize for the type, assuming
// it implement SmaliName.
($type_name:ident) => {
impl serde::Serialize for $type_name {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
Serialize::serialize(
&self
.try_to_smali()
.expect(&format!("Failed to convert {} to smali", self.__str__())),
serializer,
)
}
}
impl<'de> serde::Deserialize<'de> for $type_name {
fn deserialize<D>(deserializer: D) -> Result<$type_name, D::Error>
where
D: serde::Deserializer<'de>,
{
<String as Deserialize>::deserialize(deserializer).map(|string| {
$type_name::try_from_smali(&string)
.expect(&format!("Failed to convert {string} as smali"))
})
}
}
};
}
serde_serialize_to_smali!(IdMethodType);
serde_serialize_to_smali!(IdType);
serde_serialize_to_smali!(IdMethod);
serde_serialize_to_smali!(IdField);
serde_serialize_to_smali!(IdEnum);

View file

@ -4,6 +4,7 @@ use pyo3::prelude::*;
use pyo3::types::PyBytes;
use std::collections::HashSet;
use std::fs::File;
use std::io::{Cursor, Seek, SeekFrom};
use std::path::PathBuf;
@ -48,7 +49,7 @@ pub fn sleb128_to_int(b: &[u8]) -> Result<i32> {
Ok(Sleb128::deserialize_from_slice(b)?.0)
}
// TODO: list_defined_classes, is_dex, is_zip take &[u8], but should allow to also read from file
// TODO: list_defined_classes, is_dex, is_zip take only &[u8] or file, but should allow to also read from both
/// List all classes defined in a dex file.
#[pyfunction]
@ -62,8 +63,12 @@ pub fn list_defined_classes(dex: &[u8]) -> Result<HashSet<IdType>> {
/// Test if a file is as .dex file an return the dex version if it is, else return None.
#[pyfunction]
pub fn is_dex(file: &[u8]) -> Option<usize> {
HeaderItem::deserialize_from_slice(file)
pub fn is_dex(file: PathBuf) -> Option<usize> {
let mut file = match File::open(file) {
Ok(file) => file,
Err(_) => return None,
};
HeaderItem::deserialize(&mut file)
.ok()
.and_then(|header| String::from_utf8(header.magic.version.to_vec()).ok())
.and_then(|version| version.parse::<usize>().ok())
@ -71,8 +76,11 @@ pub fn is_dex(file: &[u8]) -> Option<usize> {
/// Test if a file is a zip file.
#[pyfunction]
pub fn is_zip(file: &[u8]) -> bool {
let mut file = Cursor::new(file);
pub fn is_zip(file: PathBuf) -> bool {
let mut file = match File::open(file) {
Ok(file) => file,
Err(_) => return false,
};
let ecd_off = if let Some(off) = ZipFileReader::get_end_of_central_directory_offset(&mut file) {
off
} else {
@ -117,6 +125,9 @@ pub(crate) fn export_module(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(uleb128_to_int, m)?)?;
m.add_function(wrap_pyfunction!(uleb128p1_to_int, m)?)?;
m.add_function(wrap_pyfunction!(sleb128_to_int, m)?)?;
m.add_function(wrap_pyfunction!(list_defined_classes, m)?)?;
m.add_function(wrap_pyfunction!(is_dex, m)?)?;
m.add_function(wrap_pyfunction!(is_zip, m)?)?;
m.add_function(wrap_pyfunction!(replace_dex, m)?)?;
Ok(())
}

File diff suppressed because one or more lines are too long

View file

@ -1,7 +1,5 @@
{
"descriptor": {
"String": "Lcom/example/testclassloader/TestB;"
},
"descriptor": "Lcom/example/testclassloader/TestB;",
"is_public": true,
"is_final": false,
"is_interface": false,
@ -9,52 +7,16 @@
"is_synthetic": false,
"is_annotation": false,
"is_enum": false,
"superclass": {
"String": "Ljava/lang/Object;"
},
"superclass": "Ljava/lang/Object;",
"interfaces": [],
"source_file": {
"String": "TestB.java"
},
"static_fields": [],
"instance_fields": [],
"direct_methods": [
[
{
"class_": {
"String": "Lcom/example/testclassloader/TestB;"
},
"proto": {
"shorty": {
"String": "V"
},
"return_type": {
"String": "V"
},
"parameters": []
},
"name": {
"String": "<init>"
}
},
{
"descriptor": {
"class_": {
"String": "Lcom/example/testclassloader/TestB;"
},
"proto": {
"shorty": {
"String": "V"
},
"return_type": {
"String": "V"
},
"parameters": []
},
"name": {
"String": "<init>"
}
},
"static_fields": {},
"instance_fields": {},
"direct_methods": {
"Lcom/example/testclassloader/TestB;-><init>()V": {
"descriptor": "Lcom/example/testclassloader/TestB;-><init>()V",
"visibility": "Public",
"is_static": false,
"is_final": false,
@ -89,23 +51,7 @@
},
{
"InvokeDirect": {
"method": {
"class_": {
"String": "Ljava/lang/Object;"
},
"proto": {
"shorty": {
"String": "V"
},
"return_type": {
"String": "V"
},
"parameters": []
},
"name": {
"String": "<init>"
}
},
"method": "Ljava/lang/Object;-><init>()V",
"args": [
0
]
@ -122,45 +68,10 @@
]
}
}
]
],
"virtual_methods": [
[
{
"class_": {
"String": "Lcom/example/testclassloader/TestB;"
},
"proto": {
"shorty": {
"String": "L"
},
"return_type": {
"String": "Ljava/lang/String;"
},
"parameters": []
},
"name": {
"String": "val"
}
},
{
"descriptor": {
"class_": {
"String": "Lcom/example/testclassloader/TestB;"
},
"proto": {
"shorty": {
"String": "L"
},
"return_type": {
"String": "Ljava/lang/String;"
},
"parameters": []
},
"name": {
"String": "val"
}
},
"virtual_methods": {
"Lcom/example/testclassloader/TestB;->val()Ljava/lang/String;": {
"descriptor": "Lcom/example/testclassloader/TestB;->val()Ljava/lang/String;",
"visibility": "Public",
"is_static": false,
"is_final": false,
@ -195,23 +106,7 @@
},
{
"InvokeVirtual": {
"method": {
"class_": {
"String": "Ljava/lang/Object;"
},
"proto": {
"shorty": {
"String": "L"
},
"return_type": {
"String": "Ljava/lang/Class;"
},
"parameters": []
},
"name": {
"String": "getClass"
}
},
"method": "Ljava/lang/Object;->getClass()Ljava/lang/Class;",
"args": [
1
]
@ -234,23 +129,7 @@
},
{
"InvokeVirtual": {
"method": {
"class_": {
"String": "Ljava/lang/Class;"
},
"proto": {
"shorty": {
"String": "L"
},
"return_type": {
"String": "Ljava/lang/ClassLoader;"
},
"parameters": []
},
"name": {
"String": "getClassLoader"
}
},
"method": "Ljava/lang/Class;->getClassLoader()Ljava/lang/ClassLoader;",
"args": [
0
]
@ -273,23 +152,7 @@
},
{
"InvokeVirtual": {
"method": {
"class_": {
"String": "Ljava/lang/Object;"
},
"proto": {
"shorty": {
"String": "L"
},
"return_type": {
"String": "Ljava/lang/String;"
},
"parameters": []
},
"name": {
"String": "toString"
}
},
"method": "Ljava/lang/Object;->toString()Ljava/lang/String;",
"args": [
0
]
@ -314,11 +177,18 @@
"ReturnObject": {
"reg": 0
}
},
{
"Label": {
"name": "label_0000000D"
}
},
{
"Nop": null
}
]
}
}
]
],
},
"annotations": []
}