diff --git a/androscalpel_serializer/src/constant.rs b/androscalpel_serializer/src/constant.rs index ed322fd..ef36c2f 100644 --- a/androscalpel_serializer/src/constant.rs +++ b/androscalpel_serializer/src/constant.rs @@ -24,6 +24,8 @@ pub enum EndianConstant { EndianConstant, #[prefix(0x78563412)] ReverseEndianConstant, + #[default_variant] + Unknown(u32), } /// [no-index](https://source.android.com/docs/core/runtime/dex-format#no-index) diff --git a/androscalpel_serializer/src/file_reader.rs b/androscalpel_serializer/src/file_reader.rs new file mode 100644 index 0000000..fd11285 --- /dev/null +++ b/androscalpel_serializer/src/file_reader.rs @@ -0,0 +1,276 @@ +//! Parser for a .dex file. + +use crate::{ + CallSiteIdItem, ClassDefItem, EndianConstant, Error, FieldIdItem, HeaderItem, MapItemType, + MapList, MethodHandleItem, MethodIdItem, ProtoIdItem, Result, Serializable, StringIdItem, + TypeIdItem, +}; +use std::io::{Cursor, Seek, SeekFrom}; + +pub struct DexFileReader<'a> { + data: &'a [u8], + header: HeaderItem, + string_ids: Vec, + type_ids: Vec, + proto_ids: Vec, + field_ids: Vec, + method_ids: Vec, + class_defs: Vec, + call_site_ids: Vec, + method_handles: Vec, + map_list: MapList, +} + +impl<'a> DexFileReader<'a> { + pub fn new(data: &'a [u8]) -> Result { + let mut buffer = Cursor::new(data); + let header = HeaderItem::deserialize(&mut buffer)?; + let mut tmp_file = Self { + data, + header, + // Default values before population + string_ids: vec![], + type_ids: vec![], + proto_ids: vec![], + field_ids: vec![], + method_ids: vec![], + class_defs: vec![], + call_site_ids: vec![], + method_handles: vec![], + map_list: MapList { list: vec![] }, + }; + tmp_file.map_list = tmp_file.get_struct_at_offset(tmp_file.header.map_off)?; + tmp_file.string_ids = tmp_file.get_item_list::( + tmp_file.header.string_ids_off, + tmp_file.header.string_ids_size, + )?; + tmp_file.type_ids = tmp_file.get_item_list::( + tmp_file.header.type_ids_off, + tmp_file.header.type_ids_size, + )?; + tmp_file.proto_ids = tmp_file.get_item_list::( + tmp_file.header.proto_ids_off, + tmp_file.header.proto_ids_size, + )?; + tmp_file.field_ids = tmp_file.get_item_list::( + tmp_file.header.field_ids_off, + tmp_file.header.field_ids_size, + )?; + tmp_file.method_ids = tmp_file.get_item_list::( + tmp_file.header.method_ids_off, + tmp_file.header.method_ids_size, + )?; + tmp_file.class_defs = tmp_file.get_item_list::( + tmp_file.header.class_defs_off, + tmp_file.header.class_defs_size, + )?; + if let Some(item) = tmp_file + .map_list + .list + .iter() + .find(|item| item.type_ == MapItemType::CallSiteIdItem) + { + tmp_file.call_site_ids = + tmp_file.get_item_list::(item.offset, item.size)? + } + if let Some(item) = tmp_file + .map_list + .list + .iter() + .find(|item| item.type_ == MapItemType::CallSiteIdItem) + { + tmp_file.method_handles = + tmp_file.get_item_list::(item.offset, item.size)? + } + tmp_file.sanity_check()?; + Ok(tmp_file) + } + + /// Return the file [`HeaderItem`]. + pub fn get_header(&self) -> &HeaderItem { + &self.header + } + + fn sanity_check(&self) -> Result<()> { + if self.header.magic.version != [0x30, 0x33, 0x39] { + println!( + "DEX 039 is the only verion currently supported, found {}", + std::str::from_utf8(self.header.magic.version.as_slice()) + .unwrap_or(&format!("{:x?}", self.header.magic.version)) + ); // TODO: use proper logging + } + // TODO: check checksum + // TODO: check signature + if self.header.file_size as usize != self.data.len() { + println!( + "Unexpected file size found: {}, expected {}", + self.header.file_size, + self.data.len() + ); // TODO: use proper logging + } + if self.header.header_size != 0x70 { + println!( + "Unexpected header size found: 0x{:x}", + self.header.header_size + ); // TODO: use proper logging + } + if self.header.endian_tag != EndianConstant::EndianConstant { + println!("Wrong endian_tag found: {:x?}", self.header.endian_tag); // TODO: use proper logging + } + if self.header.link_off != 0 || self.header.link_size != 0 { + println!("Found non empty link section"); // TODO: use proper logging + } + for item in &self.map_list.list { + match item.type_ { + MapItemType::HeaderItem if item.offset != 0 || item.size != 1 => { + return Err(Error::InconsistantStruct(format!( + "Inconsistant Header Mapping info found in map_list: {item:x?}" + ))) + } + MapItemType::StringIdItem + if item.offset != self.header.string_ids_off + || item.size != self.header.string_ids_size => + { + return Err(Error::InconsistantStruct(format!( + "Inconsistant MapList Mapping info found in map_list: {item:x?}, \ + header.string_ids_off: 0x{:x}, header.string_ids_size: {}", + self.header.string_ids_off, self.header.string_ids_size + ))) + } + MapItemType::TypeIdItem + if item.offset != self.header.type_ids_off + || item.size != self.header.type_ids_size => + { + return Err(Error::InconsistantStruct(format!( + "Inconsistant MapList Mapping info found in map_list: {item:x?}, \ + header.type_ids_off: 0x{:x}, header.type_ids_size: {}", + self.header.type_ids_off, self.header.type_ids_size + ))) + } + MapItemType::ProtoIdItem + if item.offset != self.header.proto_ids_off + || item.size != self.header.proto_ids_size => + { + return Err(Error::InconsistantStruct(format!( + "Inconsistant MapList Mapping info found in map_list: {item:x?}, \ + header.proto_ids_off: 0x{:x}, header.proto_ids_size: {}", + self.header.proto_ids_off, self.header.proto_ids_size + ))) + } + MapItemType::FieldIdItem + if item.offset != self.header.field_ids_off + || item.size != self.header.field_ids_size => + { + return Err(Error::InconsistantStruct(format!( + "Inconsistant MapList Mapping info found in map_list: {item:x?}, \ + header.field_ids_off: 0x{:x}, header.field_ids_size: {}", + self.header.field_ids_off, self.header.field_ids_size + ))) + } + MapItemType::MethodIdItem + if item.offset != self.header.method_ids_off + || item.size != self.header.method_ids_size => + { + return Err(Error::InconsistantStruct(format!( + "Inconsistant MapList Mapping info found in map_list: {item:x?}, \ + header.method_ids_off: 0x{:x}, header.method_ids_size: {}", + self.header.method_ids_off, self.header.method_ids_size + ))) + } + MapItemType::ClassDefItem + if item.offset != self.header.class_defs_off + || item.size != self.header.class_defs_size => + { + return Err(Error::InconsistantStruct(format!( + "Inconsistant MapList Mapping info found in map_list: {item:x?}, \ + header.class_defs_off: 0x{:x}, header.class_defs_size: {}", + self.header.class_defs_off, self.header.class_defs_size + ))) + } + MapItemType::CallSiteIdItem => todo!(), + MapItemType::MethodHandleItem => todo!(), + MapItemType::MapList if item.offset != self.header.map_off || item.size != 0 => { + return Err(Error::InconsistantStruct(format!( + "Inconsistant MapList Mapping info found in map_list: {item:x?}, \ + header.map_list_off: 0x{:x}", + self.header.map_off + ))) + } + /* + MapItemType::TypeList => todo!(), + MapItemType::AnnotationSetRefList => todo!(), + MapItemType::AnnotationSetItem => todo!(), + MapItemType::ClassDataItem => todo!(), + MapItemType::CodeItem => todo!(), + MapItemType::StringDataItem => todo!(), + MapItemType::DebugInfoItem => todo!(), + MapItemType::AnnotationItem => todo!(), + MapItemType::EncodedArrayItem => todo!(), + MapItemType::AnnotationsDirectoryItem => todo!(), + MapItemType::HiddenapiClassDataItem => todo!(), + */ + MapItemType::UnkownType(ty) => { + println!("Unknown Type found in map_list: 0x{ty:04x}"); // TODO: use proper + // loggin + } + _ => (), + } + let mut occurences = std::collections::HashMap::new(); + for ty in self.map_list.list.iter().map(|val| val.type_) { + *occurences.entry(ty).or_insert(0) += 1; + } + let mut duplicate = false; + for (ty, val) in occurences { + if val > 1 { + println!("Found multiple {} occurence of {:?} in map_list", val, ty); + // TODO: use proper loggin + duplicate = true; + } + } + if duplicate { + return Err(Error::InconsistantStruct( + "Found multiple occurence of the same item type in map_list".into(), + )); + } + } + Ok(()) + } + + fn get_item_list(&self, offset: u32, size: u32) -> Result> { + let mut buffer = Cursor::new(self.data); + buffer.seek(SeekFrom::Start(offset as u64)).map_err(|err| { + Error::DeserializationError(format!("Failed to seek 0x{offset:x} position: {err}")) + })?; + let mut list = vec![]; + for _ in 0..size { + let pos = buffer.stream_position().map_err(|err| { + Error::DeserializationError(format!("Failled to read buffer position: {err}")) + })?; + list.push(T::deserialize(&mut buffer).map_err(|err| { + Error::DeserializationError(format!( + "Failed to deserialize {} at 0x{:x}: {err}", + std::any::type_name::(), + pos + )) + })?); + } + Ok(list) + } + + /// Return the structure `T` located at `offset` in the file. + /// + /// # Warning + /// + /// If the offset is invalid, UB. + pub fn get_struct_at_offset(&self, offset: u32) -> Result { + let mut buffer = Cursor::new(self.data); + buffer.seek(SeekFrom::Start(offset as u64)).unwrap(); + T::deserialize(&mut buffer).map_err(|err| { + Error::DeserializationError(format!( + "Failed to deserialize {} at 0x{:x}: {err}", + std::any::type_name::(), + offset + )) + }) + } +} diff --git a/androscalpel_serializer/src/items/map.rs b/androscalpel_serializer/src/items/map.rs index ba71ac8..10ea0d6 100644 --- a/androscalpel_serializer/src/items/map.rs +++ b/androscalpel_serializer/src/items/map.rs @@ -22,7 +22,7 @@ pub struct MapItem { } /// The type of the items refered by a [`MapItem`]: -#[derive(Serializable, Clone, Copy, PartialEq, Eq, Debug)] +#[derive(Serializable, Clone, Copy, PartialEq, Eq, Debug, Hash)] #[prefix_type(u16)] pub enum MapItemType { #[prefix(0x0000)] @@ -67,6 +67,8 @@ pub enum MapItemType { AnnotationsDirectoryItem, #[prefix(0xF000)] HiddenapiClassDataItem, + #[default_variant] + UnkownType(u16), } impl MapList { diff --git a/androscalpel_serializer/src/lib.rs b/androscalpel_serializer/src/lib.rs index 44a449e..5778f9f 100644 --- a/androscalpel_serializer/src/lib.rs +++ b/androscalpel_serializer/src/lib.rs @@ -3,6 +3,7 @@ pub mod array; pub mod constant; pub mod core; pub mod debug; +pub mod file_reader; pub mod items; pub mod value; @@ -15,3 +16,5 @@ pub use constant::*; pub use debug::*; pub use items::*; pub use value::*; + +pub use file_reader::*;