diff --git a/androscalpel/src/apk.rs b/androscalpel/src/apk.rs index 737dae7..4677716 100644 --- a/androscalpel/src/apk.rs +++ b/androscalpel/src/apk.rs @@ -5,6 +5,7 @@ use std::collections::HashMap; use log::info; use pyo3::prelude::*; +use pyo3::types::PyBytes; use crate::*; use androscalpel_serializer::*; @@ -750,6 +751,14 @@ impl Apk { } Ok(methods) } + + fn gen_raw_dex(&self) -> Result>> { + let mut dex_writer = DexWriter::new(); + for class_ in self.classes.values() { + dex_writer.add_class(class_)?; + } + Ok(vec![dex_writer.gen_dex_file_to_vec()?]) + } } #[pymethods] @@ -765,4 +774,13 @@ impl Apk { fn py_add_dex_file(&mut self, data: &[u8]) -> Result<()> { self.add_dex_file(data) } + + #[pyo3(name = "gen_raw_dex")] //Sad GIL noise + fn py_gen_raw_dex(&self, py: Python<'_>) -> Result> { + Ok(self + .gen_raw_dex()? + .into_iter() + .map(|bytes| PyBytes::new(py, &bytes).into()) + .collect()) + } } diff --git a/androscalpel/src/class.rs b/androscalpel/src/class.rs index 07ae4fa..6469aff 100644 --- a/androscalpel/src/class.rs +++ b/androscalpel/src/class.rs @@ -143,8 +143,14 @@ impl Class { strings.extend(id.get_all_strings()); strings.extend(field.get_all_strings()); } - //pub instance_fields: HashMap, - //pub direct_methods: HashMap, + for (id, field) in &self.instance_fields { + strings.extend(id.get_all_strings()); + strings.extend(field.get_all_strings()); + } + for (id, method) in &self.direct_methods { + strings.extend(id.get_all_strings()); + strings.extend(method.get_all_strings()); + } //pub virtual_methods: HashMap, //pub annotations: Vec, strings diff --git a/androscalpel/src/code.rs b/androscalpel/src/code.rs index 57fd7b9..8aea4fb 100644 --- a/androscalpel/src/code.rs +++ b/androscalpel/src/code.rs @@ -1,8 +1,10 @@ //! Representation of a method. +use std::collections::HashSet; + use pyo3::prelude::*; -use crate::IdType; +use crate::{DexString, IdType}; // TODO: make this easy to edit/manipulate, maybe move to Method @@ -60,4 +62,15 @@ impl Code { pub fn __repr__(&self) -> String { "Code()".into() } + + /// Return all strings references in the codes. + pub fn get_all_strings(&self) -> HashSet { + let mut strings = HashSet::new(); + for (list, _) in &self.handlers { + for (ty, _) in list { + strings.extend(ty.get_all_strings()); + } + } + strings + } } diff --git a/androscalpel/src/dex_string.rs b/androscalpel/src/dex_string.rs index df0f720..0980896 100644 --- a/androscalpel/src/dex_string.rs +++ b/androscalpel/src/dex_string.rs @@ -1,4 +1,4 @@ -use std::cmp::{Ord, Ordering, PartialOrd}; +use std::cmp::{Ord, PartialOrd}; use std::collections::hash_map::DefaultHasher; use std::collections::HashSet; use std::hash::{Hash, Hasher}; @@ -8,27 +8,9 @@ use pyo3::exceptions::PyTypeError; use pyo3::prelude::*; #[pyclass] -#[derive(Clone, PartialEq, Eq, Debug)] +#[derive(Clone, PartialEq, Eq, Debug, Ord, PartialOrd)] pub struct DexString(pub androscalpel_serializer::StringDataItem); -impl Ord for DexString { - fn cmp(&self, other: &Self) -> Ordering { - self.0 - .data - .cmp(&other.0.data) - .then(self.0.utf16_size.0.cmp(&other.0.utf16_size.0)) - } -} - -impl PartialOrd for DexString { - fn partial_cmp(&self, other: &Self) -> Option { - self.0 - .data - .partial_cmp(&other.0.data) - .map(|ord| ord.then(self.0.utf16_size.0.cmp(&other.0.utf16_size.0))) - } -} - impl From for androscalpel_serializer::StringDataItem { fn from(DexString(string): DexString) -> Self { string diff --git a/androscalpel/src/dex_writer.rs b/androscalpel/src/dex_writer.rs index 69ec526..05da33c 100644 --- a/androscalpel/src/dex_writer.rs +++ b/androscalpel/src/dex_writer.rs @@ -121,6 +121,10 @@ impl DexWriter { section_manager.add_elt(Section::StringIdItem, None); section_manager.add_elt(Section::StringDataItem, Some(string.0.size())); } + let string_ids_list: Vec = string_ids_list + .into_iter() + .map(|string| string.into()) + .collect(); // Populate map_list let map_item_size = MapItem { @@ -170,12 +174,12 @@ impl DexWriter { self.header.serialize(writer)?; // StringIdItem section let mut string_off = section_manager.get_offset(Section::StringDataItem); - for string in string_ids_list { + for string in string_ids_list.iter() { let str_id = StringIdItem { string_data_off: string_off, }; str_id.serialize(writer)?; - string_off += string.0.size() as u32; + string_off += string.size() as u32; } // TODO: TypeIdItem // TODO: ProtoIdItem, @@ -193,8 +197,9 @@ impl DexWriter { // TODO: ClassDataItem, // TODO: CodeItem, // TODO: StringDataItem, - - // TODO + for string in string_ids_list { + string.serialize(writer)?; + } // TODO: DebugInfoItem, // TODO: AnnotationItem, diff --git a/androscalpel/src/method.rs b/androscalpel/src/method.rs index 3278ff1..47b34dd 100644 --- a/androscalpel/src/method.rs +++ b/androscalpel/src/method.rs @@ -1,8 +1,10 @@ //! Representation of a method. +use std::collections::HashSet; + use pyo3::prelude::*; -use crate::{Code, DexAnnotationItem, IdMethod}; +use crate::{Code, DexAnnotationItem, DexString, IdMethod}; /// Represent a method. #[pyclass] @@ -103,4 +105,22 @@ impl Method { let dscr = self.descriptor.__str__(); format!("Method({dscr})") } + + /// Return all strings references in the method. + pub fn get_all_strings(&self) -> HashSet { + let mut strings = HashSet::new(); + strings.extend(self.descriptor.get_all_strings()); + for annot in &self.annotations { + strings.extend(annot.get_all_strings()); + } + for param_annots in &self.parameters_annotations { + for annot in param_annots { + strings.extend(annot.get_all_strings()); + } + } + if let Some(code) = &self.code { + strings.extend(code.get_all_strings()); + } + strings + } } diff --git a/androscalpel_serializer/src/core/string.rs b/androscalpel_serializer/src/core/string.rs index 13e4008..e15ddf0 100644 --- a/androscalpel_serializer/src/core/string.rs +++ b/androscalpel_serializer/src/core/string.rs @@ -12,6 +12,8 @@ //! | 3 | U+0800 | U+FFFF | 16 | 1110xxxx | 10xxxxxx | 10xxxxxx | | | | //! | 6 | U+10000 | U+FFFFF | 20 | 11101101 | 1010xxxx | 10xxxxxx | 11101101 | 1011xxxx | 10xxxxxx | +use std::cmp::{Ord, Ordering, PartialOrd}; + use crate as androscalpel_serializer; use crate::core::*; pub use androscalpel_serializer_derive::*; @@ -32,6 +34,22 @@ const VALUE_SURROGATED_BYTE_2_PREFIX: u8 = 0b1011_0000; const MASK_TRAYLING_BYTE_PREFIX: u8 = 0b1100_0000; const VALUE_TRAYLING_BYTE_PREFIX: u8 = 0b1000_0000; +impl Ord for StringDataItem { + fn cmp(&self, other: &Self) -> Ordering { + self.data + .cmp(&other.data) + .then(self.utf16_size.cmp(&other.utf16_size)) + } +} + +impl PartialOrd for StringDataItem { + fn partial_cmp(&self, other: &Self) -> Option { + self.data + .partial_cmp(&other.data) + .map(|ord| ord.then(self.utf16_size.cmp(&other.utf16_size))) + } +} + impl TryFrom<&StringDataItem> for String { type Error = Error; fn try_from(item: &StringDataItem) -> Result { diff --git a/test.py b/test.py index 0bf5ba1..a1425f8 100644 --- a/test.py +++ b/test.py @@ -17,3 +17,5 @@ with z.ZipFile(APK_NAME) as zipf: apk = asc.Apk() apk.add_dex_file(dex) + +dex_raw = apk.gen_raw_dex()