From 80968c9bcf711b5ba6b0923940d94252bdd443c9 Mon Sep 17 00:00:00 2001 From: Jean-Marie Mineau Date: Thu, 30 Nov 2023 14:40:49 +0100 Subject: [PATCH] add a rought implem of code --- .gitignore | 1 + TODO.md | 3 +- androscalpel/src/apk.rs | 64 +++++++++++++++++++-- androscalpel/src/code.rs | 63 ++++++++++++++++++++ androscalpel/src/lib.rs | 3 + androscalpel/src/method.rs | 7 ++- androscalpel_serializer/src/items/code.rs | 70 +++++++++++++++++++++-- 7 files changed, 198 insertions(+), 13 deletions(-) create mode 100644 androscalpel/src/code.rs diff --git a/.gitignore b/.gitignore index 3e1d15e..b2c469e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ /target /venv_maturin +/venv_test /test.apk diff --git a/TODO.md b/TODO.md index 5480e61..8b91b9e 100644 --- a/TODO.md +++ b/TODO.md @@ -1,5 +1,4 @@ -- method (what's left to do?) -- annotations (start from annotations_off in class def item) +- method (what's left to do except code?) - generate .dex - code - edditable code format diff --git a/androscalpel/src/apk.rs b/androscalpel/src/apk.rs index 79b7004..737dae7 100644 --- a/androscalpel/src/apk.rs +++ b/androscalpel/src/apk.rs @@ -1,6 +1,6 @@ //! Representation of an apk. -use anyhow::{anyhow, Result}; +use anyhow::{anyhow, Context, Result}; use std::collections::HashMap; use log::info; @@ -280,7 +280,7 @@ impl Apk { )) } - /// Return a [`Vec`] for the offset of an [`AnnotationSet`]. + /// Return a [`Vec`] for the offset of an [`AnnotationSetItem`]. pub fn get_annotation_items_from_annotation_set_off( annotations_set_off: u32, dex: &DexFileReader, @@ -577,7 +577,7 @@ impl Apk { pub fn get_method_from_idx( idx: usize, Uleb128(access_flags): Uleb128, - Uleb128(_code_off): Uleb128, + Uleb128(code_off): Uleb128, dex: &DexFileReader, ) -> Result { let descriptor = Self::get_id_method_from_idx(idx, dex)?; @@ -648,6 +648,13 @@ impl Apk { | ACC_DECLARED_SYNCHRONIZED) ); } + let code = if code_off == 0 { + None + } else { + Some(Self::get_code_from_off(code_off, dex).with_context(|| { + format!("Failed to parse code of method {}", descriptor.__str__()) + })?) + }; Ok(Method { descriptor, @@ -665,7 +672,56 @@ impl Apk { is_declared_syncrhonized, annotations: vec![], parameters_annotations: vec![], - code: (), + code, + }) + } + + /// Return a [`Code`] from it's offset in the dex file. + pub fn get_code_from_off(offset: u32, dex: &DexFileReader) -> Result { + let code_item = dex.get_struct_at_offset::(offset)?; + let debug_info = if code_item.debug_info_off == 0 { + vec![] + } else { + dex.get_struct_at_offset::(code_item.debug_info_off)? + .serialize_to_vec()? // no dealing with that right now + }; + let mut tries = vec![]; + for TryItem { + start_addr, + insn_count, + handler_off, + } in code_item.tries + { + tries.push((start_addr, insn_count, handler_off)); + } + let mut handlers_aux = vec![]; + if let Some(EncodedCatchHandlerList { list }) = code_item.handlers { + for EncodedCatchHandler { + handlers, + catch_all_addr, + } in list + { + let mut handlers_ = vec![]; + let catch_all_addr = catch_all_addr.map(|Uleb128(val)| val); + for EncodedTypeAddrPair { + type_idx: Uleb128(type_idx), + addr: Uleb128(addr), + } in handlers + { + handlers_.push((Self::get_id_type_from_idx(type_idx as usize, dex)?, addr)) + } + handlers_aux.push((handlers_, catch_all_addr)); + } + } + let handlers = handlers_aux; + Ok(Code { + registers_size: code_item.registers_size, + ins_size: code_item.ins_size, + outs_size: code_item.outs_size, + debug_info, + insns: code_item.insns, + tries, + handlers, }) } diff --git a/androscalpel/src/code.rs b/androscalpel/src/code.rs new file mode 100644 index 0000000..57fd7b9 --- /dev/null +++ b/androscalpel/src/code.rs @@ -0,0 +1,63 @@ +//! Representation of a method. + +use pyo3::prelude::*; + +use crate::IdType; + +// TODO: make this easy to edit/manipulate, maybe move to Method + +type TmpHandlerType = (Vec<(IdType, u32)>, Option); + +/// The code run by a method. +#[pyclass] +#[derive(Debug, Clone)] +pub struct Code { + // TODO: remove and compute this value from code. + /// The number of registers used by the code + #[pyo3(get, set)] + pub registers_size: u16, + // TODO: what does it means? is it computable? + /// The number of words of incoming arguments to the method + #[pyo3(get, set)] + pub ins_size: u16, + // TODO: what does it means? is it computable? + /// The number of words of outgoing argument space + #[pyo3(get, set)] + pub outs_size: u16, + // TODO: implement + /// The debug info + #[pyo3(get, set)] + pub debug_info: Vec, + // TODO: implement OPcode + /// The instructions. + #[pyo3(get, set)] + pub insns: Vec, + // TODO: currently unusable, juste a mapping ty TryItem + // TODO: maybe implement as custom OPcode to make me easy to modify? + /// Try blocks + #[pyo3(get, set)] + pub tries: Vec<(u32, u16, u16)>, + // TODO: currently unusable, juste a mapping ty TryItem + // TODO: maybe implement as custom OPcode to make me easy to modify? + /// The handlers associated to the tries blocks. + #[pyo3(get, set)] + pub handlers: Vec, +} + +#[pymethods] +impl Code { + /* + #[new] + pub fn new() -> Self { + todo!() + } + */ + + pub fn __str__(&self) -> String { + self.__repr__() + } + + pub fn __repr__(&self) -> String { + "Code()".into() + } +} diff --git a/androscalpel/src/lib.rs b/androscalpel/src/lib.rs index 03b6128..0052c06 100644 --- a/androscalpel/src/lib.rs +++ b/androscalpel/src/lib.rs @@ -5,6 +5,7 @@ use pyo3::prelude::*; pub mod annotation; pub mod apk; pub mod class; +pub mod code; pub mod dex_id; pub mod dex_string; pub mod field; @@ -16,6 +17,7 @@ pub mod value; pub use annotation::*; pub use apk::*; pub use class::*; +pub use code::*; pub use dex_id::*; pub use dex_string::*; pub use field::*; @@ -63,6 +65,7 @@ fn androscalpel(_py: Python, m: &PyModule) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; m.add_class::()?; Ok(()) } diff --git a/androscalpel/src/method.rs b/androscalpel/src/method.rs index fa465d5..3278ff1 100644 --- a/androscalpel/src/method.rs +++ b/androscalpel/src/method.rs @@ -2,7 +2,7 @@ use pyo3::prelude::*; -use crate::{DexAnnotationItem, IdMethod}; +use crate::{Code, DexAnnotationItem, IdMethod}; /// Represent a method. #[pyclass] @@ -57,7 +57,7 @@ pub struct Method { pub parameters_annotations: Vec>, /// The code of the method - pub code: (), + pub code: Option, } /// Represent the visibility of a field @@ -74,6 +74,7 @@ pub enum MethodVisibility { impl Method { #[new] pub fn new(descriptor: IdMethod) -> Self { + // TODO: take code option as arg and set the default flags accordingly Self { descriptor, visibility: MethodVisibility::Public, @@ -90,7 +91,7 @@ impl Method { is_declared_syncrhonized: false, annotations: vec![], parameters_annotations: vec![], - code: (), + code: None, } } diff --git a/androscalpel_serializer/src/items/code.rs b/androscalpel_serializer/src/items/code.rs index 87c697e..c8afef6 100644 --- a/androscalpel_serializer/src/items/code.rs +++ b/androscalpel_serializer/src/items/code.rs @@ -174,8 +174,8 @@ pub struct TryItem { pub start_addr: u32, /// Number of 16-bit code unit covered by the entry. pub insn_count: u16, - /// **Offset in bytes** from the start of the `EncodedCatchHandlerList` to the - /// `EncodedCatchHandler` associated. + /// **Offset in bytes** from the start of the [`crate::EncodedCatchHandlerList`] to the + /// [`crate::EncodedCatchHandler`] associated. pub handler_off: u16, } @@ -193,7 +193,7 @@ impl EncodedCatchHandlerList { /// Return a reference to the [`crate::EncodedCatchHandler`] located at `offset` bytes after /// the begining of the [`crate::EncodedCatchHandlerList`]. Expected to be used to lookup - /// the value refered to by [`crate::TryItem.handler_off`]. + /// the value refered to by [`crate::TryItem`]`.handler_off`. pub fn get_handler_at_offset(&self, offset: u16) -> Result<&EncodedCatchHandler> { let offset = offset as usize; let mut current_offset = 0; @@ -222,7 +222,7 @@ impl Serializable for EncodedCatchHandlerList { } fn deserialize(input: &mut dyn ReadSeek) -> Result { - let size = i32::deserialize(input)?; + let Uleb128(size) = Uleb128::deserialize(input)?; let mut list = vec![]; for _ in 0..size { list.push(EncodedCatchHandler::deserialize(input)?); @@ -321,3 +321,65 @@ pub struct EncodedTypeAddrPair { /// Bytecode address of the exception handler pub addr: Uleb128, } + +#[cfg(test)] +mod test { + use super::*; + + const CODE_ITEM_RAW_1: &[u8] = &[ + 0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x3d, 0x25, 0x4f, 0x00, 0x15, 0x00, 0x00, + 0x00, 0x54, 0x30, 0x6c, 0x01, 0x71, 0x10, 0xb1, 0x0c, 0x00, 0x00, 0x28, 0x0e, 0x0d, 0x00, + 0x6e, 0x10, 0x26, 0x85, 0x00, 0x00, 0x0c, 0x01, 0x1a, 0x02, 0xcb, 0x15, 0x71, 0x20, 0xe3, + 0x05, 0x21, 0x00, 0x0a, 0x01, 0x38, 0x01, 0x03, 0x00, 0x0e, 0x00, 0x27, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x01, 0x00, 0x01, 0x01, 0xe9, 0x46, 0x06, + ]; + + const ENCODED_CATCH_HANDLER_LIST_1: &[u8] = &[0x01, 0x01, 0xe9, 0x46, 0x06]; + + #[test] + fn test_deserialize_code_item() { + assert_eq!( + CodeItem::deserialize_from_slice(CODE_ITEM_RAW_1).unwrap(), + CodeItem { + registers_size: 4, + ins_size: 1, + outs_size: 2, + debug_info_off: 5186877, + insns: vec![ + 0x3054, 0x16c, 0x1071, 0xcb1, 0x0, 0xe28, 0xd, 0x106e, 0x8526, 0x0, 0x10c, + 0x21a, 0x15cb, 0x2071, 0x5e3, 0x21, 0x10a, 0x138, 0x3, 0xe, 0x27 + ], + tries: vec![TryItem { + start_addr: 0, + insn_count: 5, + handler_off: 1, + },], + handlers: Some(EncodedCatchHandlerList { + list: vec![EncodedCatchHandler { + handlers: vec![EncodedTypeAddrPair { + type_idx: Uleb128(9065), + addr: Uleb128(6) + }], + catch_all_addr: None, + }] + }) + } + ); + } + + #[test] + fn test_deserialize_catch_handler_list() { + assert_eq!( + EncodedCatchHandlerList::deserialize_from_slice(ENCODED_CATCH_HANDLER_LIST_1).unwrap(), + EncodedCatchHandlerList { + list: vec![EncodedCatchHandler { + handlers: vec![EncodedTypeAddrPair { + type_idx: Uleb128(9065), + addr: Uleb128(6) + }], + catch_all_addr: None, + }] + } + ); + } +}