add a rought implem of code

This commit is contained in:
Jean-Marie Mineau 2023-11-30 14:40:49 +01:00
parent 026b9ddd41
commit 80968c9bcf
Signed by: histausse
GPG key ID: B66AEEDA9B645AD2
7 changed files with 198 additions and 13 deletions

1
.gitignore vendored
View file

@ -1,3 +1,4 @@
/target /target
/venv_maturin /venv_maturin
/venv_test
/test.apk /test.apk

View file

@ -1,5 +1,4 @@
- method (what's left to do?) - method (what's left to do except code?)
- annotations (start from annotations_off in class def item)
- generate .dex - generate .dex
- code - code
- edditable code format - edditable code format

View file

@ -1,6 +1,6 @@
//! Representation of an apk. //! Representation of an apk.
use anyhow::{anyhow, Result}; use anyhow::{anyhow, Context, Result};
use std::collections::HashMap; use std::collections::HashMap;
use log::info; use log::info;
@ -280,7 +280,7 @@ impl Apk {
)) ))
} }
/// Return a [`Vec<DexAnnotationItem>`] for the offset of an [`AnnotationSet`]. /// Return a [`Vec<DexAnnotationItem>`] for the offset of an [`AnnotationSetItem`].
pub fn get_annotation_items_from_annotation_set_off( pub fn get_annotation_items_from_annotation_set_off(
annotations_set_off: u32, annotations_set_off: u32,
dex: &DexFileReader, dex: &DexFileReader,
@ -577,7 +577,7 @@ impl Apk {
pub fn get_method_from_idx( pub fn get_method_from_idx(
idx: usize, idx: usize,
Uleb128(access_flags): Uleb128, Uleb128(access_flags): Uleb128,
Uleb128(_code_off): Uleb128, Uleb128(code_off): Uleb128,
dex: &DexFileReader, dex: &DexFileReader,
) -> Result<Method> { ) -> Result<Method> {
let descriptor = Self::get_id_method_from_idx(idx, dex)?; let descriptor = Self::get_id_method_from_idx(idx, dex)?;
@ -648,6 +648,13 @@ impl Apk {
| ACC_DECLARED_SYNCHRONIZED) | ACC_DECLARED_SYNCHRONIZED)
); );
} }
let code = if code_off == 0 {
None
} else {
Some(Self::get_code_from_off(code_off, dex).with_context(|| {
format!("Failed to parse code of method {}", descriptor.__str__())
})?)
};
Ok(Method { Ok(Method {
descriptor, descriptor,
@ -665,7 +672,56 @@ impl Apk {
is_declared_syncrhonized, is_declared_syncrhonized,
annotations: vec![], annotations: vec![],
parameters_annotations: vec![], parameters_annotations: vec![],
code: (), code,
})
}
/// Return a [`Code`] from it's offset in the dex file.
pub fn get_code_from_off(offset: u32, dex: &DexFileReader) -> Result<Code> {
let code_item = dex.get_struct_at_offset::<CodeItem>(offset)?;
let debug_info = if code_item.debug_info_off == 0 {
vec![]
} else {
dex.get_struct_at_offset::<DebugInfoItem>(code_item.debug_info_off)?
.serialize_to_vec()? // no dealing with that right now
};
let mut tries = vec![];
for TryItem {
start_addr,
insn_count,
handler_off,
} in code_item.tries
{
tries.push((start_addr, insn_count, handler_off));
}
let mut handlers_aux = vec![];
if let Some(EncodedCatchHandlerList { list }) = code_item.handlers {
for EncodedCatchHandler {
handlers,
catch_all_addr,
} in list
{
let mut handlers_ = vec![];
let catch_all_addr = catch_all_addr.map(|Uleb128(val)| val);
for EncodedTypeAddrPair {
type_idx: Uleb128(type_idx),
addr: Uleb128(addr),
} in handlers
{
handlers_.push((Self::get_id_type_from_idx(type_idx as usize, dex)?, addr))
}
handlers_aux.push((handlers_, catch_all_addr));
}
}
let handlers = handlers_aux;
Ok(Code {
registers_size: code_item.registers_size,
ins_size: code_item.ins_size,
outs_size: code_item.outs_size,
debug_info,
insns: code_item.insns,
tries,
handlers,
}) })
} }

63
androscalpel/src/code.rs Normal file
View file

@ -0,0 +1,63 @@
//! Representation of a method.
use pyo3::prelude::*;
use crate::IdType;
// TODO: make this easy to edit/manipulate, maybe move to Method
type TmpHandlerType = (Vec<(IdType, u32)>, Option<u32>);
/// The code run by a method.
#[pyclass]
#[derive(Debug, Clone)]
pub struct Code {
// TODO: remove and compute this value from code.
/// The number of registers used by the code
#[pyo3(get, set)]
pub registers_size: u16,
// TODO: what does it means? is it computable?
/// The number of words of incoming arguments to the method
#[pyo3(get, set)]
pub ins_size: u16,
// TODO: what does it means? is it computable?
/// The number of words of outgoing argument space
#[pyo3(get, set)]
pub outs_size: u16,
// TODO: implement
/// The debug info
#[pyo3(get, set)]
pub debug_info: Vec<u8>,
// TODO: implement OPcode
/// The instructions.
#[pyo3(get, set)]
pub insns: Vec<u16>,
// TODO: currently unusable, juste a mapping ty TryItem
// TODO: maybe implement as custom OPcode to make me easy to modify?
/// Try blocks
#[pyo3(get, set)]
pub tries: Vec<(u32, u16, u16)>,
// TODO: currently unusable, juste a mapping ty TryItem
// TODO: maybe implement as custom OPcode to make me easy to modify?
/// The handlers associated to the tries blocks.
#[pyo3(get, set)]
pub handlers: Vec<TmpHandlerType>,
}
#[pymethods]
impl Code {
/*
#[new]
pub fn new() -> Self {
todo!()
}
*/
pub fn __str__(&self) -> String {
self.__repr__()
}
pub fn __repr__(&self) -> String {
"Code()".into()
}
}

View file

@ -5,6 +5,7 @@ use pyo3::prelude::*;
pub mod annotation; pub mod annotation;
pub mod apk; pub mod apk;
pub mod class; pub mod class;
pub mod code;
pub mod dex_id; pub mod dex_id;
pub mod dex_string; pub mod dex_string;
pub mod field; pub mod field;
@ -16,6 +17,7 @@ pub mod value;
pub use annotation::*; pub use annotation::*;
pub use apk::*; pub use apk::*;
pub use class::*; pub use class::*;
pub use code::*;
pub use dex_id::*; pub use dex_id::*;
pub use dex_string::*; pub use dex_string::*;
pub use field::*; pub use field::*;
@ -63,6 +65,7 @@ fn androscalpel(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_class::<Method>()?; m.add_class::<Method>()?;
m.add_class::<Field>()?; m.add_class::<Field>()?;
m.add_class::<Class>()?; m.add_class::<Class>()?;
m.add_class::<Code>()?;
m.add_class::<Apk>()?; m.add_class::<Apk>()?;
Ok(()) Ok(())
} }

View file

@ -2,7 +2,7 @@
use pyo3::prelude::*; use pyo3::prelude::*;
use crate::{DexAnnotationItem, IdMethod}; use crate::{Code, DexAnnotationItem, IdMethod};
/// Represent a method. /// Represent a method.
#[pyclass] #[pyclass]
@ -57,7 +57,7 @@ pub struct Method {
pub parameters_annotations: Vec<Vec<DexAnnotationItem>>, pub parameters_annotations: Vec<Vec<DexAnnotationItem>>,
/// The code of the method /// The code of the method
pub code: (), pub code: Option<Code>,
} }
/// Represent the visibility of a field /// Represent the visibility of a field
@ -74,6 +74,7 @@ pub enum MethodVisibility {
impl Method { impl Method {
#[new] #[new]
pub fn new(descriptor: IdMethod) -> Self { pub fn new(descriptor: IdMethod) -> Self {
// TODO: take code option as arg and set the default flags accordingly
Self { Self {
descriptor, descriptor,
visibility: MethodVisibility::Public, visibility: MethodVisibility::Public,
@ -90,7 +91,7 @@ impl Method {
is_declared_syncrhonized: false, is_declared_syncrhonized: false,
annotations: vec![], annotations: vec![],
parameters_annotations: vec![], parameters_annotations: vec![],
code: (), code: None,
} }
} }

View file

@ -174,8 +174,8 @@ pub struct TryItem {
pub start_addr: u32, pub start_addr: u32,
/// Number of 16-bit code unit covered by the entry. /// Number of 16-bit code unit covered by the entry.
pub insn_count: u16, pub insn_count: u16,
/// **Offset in bytes** from the start of the `EncodedCatchHandlerList` to the /// **Offset in bytes** from the start of the [`crate::EncodedCatchHandlerList`] to the
/// `EncodedCatchHandler` associated. /// [`crate::EncodedCatchHandler`] associated.
pub handler_off: u16, pub handler_off: u16,
} }
@ -193,7 +193,7 @@ impl EncodedCatchHandlerList {
/// Return a reference to the [`crate::EncodedCatchHandler`] located at `offset` bytes after /// Return a reference to the [`crate::EncodedCatchHandler`] located at `offset` bytes after
/// the begining of the [`crate::EncodedCatchHandlerList`]. Expected to be used to lookup /// the begining of the [`crate::EncodedCatchHandlerList`]. Expected to be used to lookup
/// the value refered to by [`crate::TryItem.handler_off`]. /// the value refered to by [`crate::TryItem`]`.handler_off`.
pub fn get_handler_at_offset(&self, offset: u16) -> Result<&EncodedCatchHandler> { pub fn get_handler_at_offset(&self, offset: u16) -> Result<&EncodedCatchHandler> {
let offset = offset as usize; let offset = offset as usize;
let mut current_offset = 0; let mut current_offset = 0;
@ -222,7 +222,7 @@ impl Serializable for EncodedCatchHandlerList {
} }
fn deserialize(input: &mut dyn ReadSeek) -> Result<Self> { fn deserialize(input: &mut dyn ReadSeek) -> Result<Self> {
let size = i32::deserialize(input)?; let Uleb128(size) = Uleb128::deserialize(input)?;
let mut list = vec![]; let mut list = vec![];
for _ in 0..size { for _ in 0..size {
list.push(EncodedCatchHandler::deserialize(input)?); list.push(EncodedCatchHandler::deserialize(input)?);
@ -321,3 +321,65 @@ pub struct EncodedTypeAddrPair {
/// Bytecode address of the exception handler /// Bytecode address of the exception handler
pub addr: Uleb128, pub addr: Uleb128,
} }
#[cfg(test)]
mod test {
use super::*;
const CODE_ITEM_RAW_1: &[u8] = &[
0x04, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, 0x3d, 0x25, 0x4f, 0x00, 0x15, 0x00, 0x00,
0x00, 0x54, 0x30, 0x6c, 0x01, 0x71, 0x10, 0xb1, 0x0c, 0x00, 0x00, 0x28, 0x0e, 0x0d, 0x00,
0x6e, 0x10, 0x26, 0x85, 0x00, 0x00, 0x0c, 0x01, 0x1a, 0x02, 0xcb, 0x15, 0x71, 0x20, 0xe3,
0x05, 0x21, 0x00, 0x0a, 0x01, 0x38, 0x01, 0x03, 0x00, 0x0e, 0x00, 0x27, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x01, 0x00, 0x01, 0x01, 0xe9, 0x46, 0x06,
];
const ENCODED_CATCH_HANDLER_LIST_1: &[u8] = &[0x01, 0x01, 0xe9, 0x46, 0x06];
#[test]
fn test_deserialize_code_item() {
assert_eq!(
CodeItem::deserialize_from_slice(CODE_ITEM_RAW_1).unwrap(),
CodeItem {
registers_size: 4,
ins_size: 1,
outs_size: 2,
debug_info_off: 5186877,
insns: vec![
0x3054, 0x16c, 0x1071, 0xcb1, 0x0, 0xe28, 0xd, 0x106e, 0x8526, 0x0, 0x10c,
0x21a, 0x15cb, 0x2071, 0x5e3, 0x21, 0x10a, 0x138, 0x3, 0xe, 0x27
],
tries: vec![TryItem {
start_addr: 0,
insn_count: 5,
handler_off: 1,
},],
handlers: Some(EncodedCatchHandlerList {
list: vec![EncodedCatchHandler {
handlers: vec![EncodedTypeAddrPair {
type_idx: Uleb128(9065),
addr: Uleb128(6)
}],
catch_all_addr: None,
}]
})
}
);
}
#[test]
fn test_deserialize_catch_handler_list() {
assert_eq!(
EncodedCatchHandlerList::deserialize_from_slice(ENCODED_CATCH_HANDLER_LIST_1).unwrap(),
EncodedCatchHandlerList {
list: vec![EncodedCatchHandler {
handlers: vec![EncodedTypeAddrPair {
type_idx: Uleb128(9065),
addr: Uleb128(6)
}],
catch_all_addr: None,
}]
}
);
}
}