//! Parser for a .dex file. use crate::{ CallSiteIdItem, ClassDataItem, ClassDefItem, EndianConstant, Error, FieldIdItem, HeaderItem, HiddenApiFlags, HiddenapiClassDataItem, MapItemType, MapList, MethodHandleItem, MethodIdItem, ProtoIdItem, Result, Serializable, StringDataItem, StringIdItem, TypeIdItem, }; use log::{error, info, warn}; use std::io::{Cursor, Seek, SeekFrom}; use std::sync::atomic::{AtomicBool, Ordering}; #[derive(Debug)] pub struct DexFileReader<'a> { // Ideally, this would be a Read+Seek, but Read+Seek is not thread safe, while we can // internally instanciate multiple cursors on the same non mutable slice. data: &'a [u8], header: HeaderItem, string_ids: Vec, /// If `string_was_resolved[string_idx]` is true, the string was resolved at some point. /// This allows us to get the strings that are in a dex file but not used by its /// classes. (Yes, they are some, looking at you `~~D8{"backend":"dex","compilation-mode": /// "release","has-checksums":false,"min-api":24,"version":"8.2.42"}`) /// /// Use AtomicBool to hide this inside &self methods that are easy to run concurrently. string_was_resolved: Vec, type_ids: Vec, proto_ids: Vec, field_ids: Vec, method_ids: Vec, class_defs: Vec, call_site_ids: Vec, method_handles: Vec, hiddenapi_class_data: Option, map_list: MapList, } impl<'a> DexFileReader<'a> { pub fn new(data: &'a [u8]) -> Result { let mut buffer = Cursor::new(data); let header = HeaderItem::deserialize(&mut buffer)?; let mut tmp_file = Self { data, header, // Default values before population string_ids: vec![], string_was_resolved: vec![], type_ids: vec![], proto_ids: vec![], field_ids: vec![], method_ids: vec![], class_defs: vec![], call_site_ids: vec![], method_handles: vec![], hiddenapi_class_data: None, map_list: MapList { list: vec![] }, }; if tmp_file.header.map_off != 0 { tmp_file.map_list = tmp_file.get_struct_at_offset(tmp_file.header.map_off)?; } tmp_file.string_ids = tmp_file.get_item_list::( tmp_file.header.string_ids_off, tmp_file.header.string_ids_size, )?; for _ in 0..tmp_file.string_ids.len() { tmp_file.string_was_resolved.push(AtomicBool::new(false)); } tmp_file.type_ids = tmp_file.get_item_list::( tmp_file.header.type_ids_off, tmp_file.header.type_ids_size, )?; tmp_file.proto_ids = tmp_file.get_item_list::( tmp_file.header.proto_ids_off, tmp_file.header.proto_ids_size, )?; tmp_file.field_ids = tmp_file.get_item_list::( tmp_file.header.field_ids_off, tmp_file.header.field_ids_size, )?; tmp_file.method_ids = tmp_file.get_item_list::( tmp_file.header.method_ids_off, tmp_file.header.method_ids_size, )?; tmp_file.class_defs = tmp_file.get_item_list::( tmp_file.header.class_defs_off, tmp_file.header.class_defs_size, )?; if let Some(item) = tmp_file .map_list .list .iter() .find(|item| item.type_ == MapItemType::CallSiteIdItem) { tmp_file.call_site_ids = tmp_file.get_item_list::(item.offset, item.size)? } if let Some(item) = tmp_file .map_list .list .iter() .find(|item| item.type_ == MapItemType::CallSiteIdItem) { tmp_file.method_handles = tmp_file.get_item_list::(item.offset, item.size)? } if let Some(item) = tmp_file .map_list .list .iter() .find(|item| item.type_ == MapItemType::HiddenapiClassDataItem) { tmp_file.hiddenapi_class_data = Some(tmp_file.get_struct_at_offset::(item.offset)?); } tmp_file.sanity_check()?; Ok(tmp_file) } /// Return the file [`HeaderItem`]. pub fn get_header(&self) -> &HeaderItem { &self.header } /// Return the file [`StringIdItem`] list. pub fn get_string_ids(&self) -> &[StringIdItem] { &self.string_ids } /// Return the file [`TypeIdItem`] list. pub fn get_type_ids(&self) -> &[TypeIdItem] { &self.type_ids } /// Return the file [`ProtoIdItem`] list. pub fn get_proto_ids(&self) -> &[ProtoIdItem] { &self.proto_ids } /// Return the file [`FieldIdItem`] list. pub fn get_field_ids(&self) -> &[FieldIdItem] { &self.field_ids } /// Return the file [`MethodIdItem`] list. pub fn get_method_ids(&self) -> &[MethodIdItem] { &self.method_ids } /// Return the file [`ClassDefItem`] list. pub fn get_class_defs(&self) -> &[ClassDefItem] { &self.class_defs } /// Return the file [`CallSiteIdItem`] list. pub fn get_call_site_ids(&self) -> &[CallSiteIdItem] { &self.call_site_ids } /// Return the file [`MethodHandleItem`] list. pub fn get_method_handles(&self) -> &[MethodHandleItem] { &self.method_handles } /// Return the file [`MapList`]. pub fn get_map_list(&self) -> &MapList { &self.map_list } /// Return the [`StringDataItem`] of from its idx. pub fn get_string(&self, idx: u32) -> Result { let id = self .string_ids .get(idx as usize) .ok_or(Error::InconsistantStruct(format!( "string idx {idx} is out of bound (|string_ids|={})", self.string_ids.len() )))?; let string = self .get_struct_at_offset::(id.string_data_off) .map_err(|err| { Error::DeserializationError(format!("Failled to parse string {idx}: {err}")) })?; self.string_was_resolved[idx as usize].store(true, Ordering::Relaxed); Ok(string) } /// Return a [`TypeIdItem`] reference from its idx. pub fn get_type_id(&self, idx: usize) -> Result<&TypeIdItem> { self.type_ids .get(idx) .ok_or(Error::InconsistantStruct(format!( "type idx {} out of bound of type_ids (|type_ids| = {})", idx, self.type_ids.len() ))) } /// Return a [`ProtoIdItem`] reference from its idx. pub fn get_proto_id(&self, idx: usize) -> Result<&ProtoIdItem> { self.proto_ids .get(idx) .ok_or(Error::InconsistantStruct(format!( "prototype idx {idx} is out of bound (|proto_ids|={})", self.proto_ids.len() ))) } /// Return a [`FieldIdItem`] reference from its idx. pub fn get_field_id(&self, idx: usize) -> Result<&FieldIdItem> { self.field_ids .get(idx) .ok_or(Error::InconsistantStruct(format!( "field idx {idx} is out of bound (|field_ids|={})", self.field_ids.len() ))) } /// Return a [`MethodIdItem`] reference from its idx. pub fn get_method_id(&self, idx: usize) -> Result<&MethodIdItem> { self.method_ids .get(idx) .ok_or(Error::InconsistantStruct(format!( "method idx {idx} is out of bound (|method_ids|={})", self.method_ids.len() ))) } /// Return a [`MethodHandleItem`] reference from its idx. pub fn get_method_handle(&self, idx: usize) -> Result<&MethodHandleItem> { self.method_handles .get(idx) .ok_or(Error::InconsistantStruct(format!( "method handle {idx} is out of bound (|method_handles|={})", self.method_handles.len() ))) } /// Return a [`CallSiteIdItem`] reference from its idx. pub fn get_call_site_id(&self, idx: usize) -> Result<&CallSiteIdItem> { self.call_site_ids .get(idx) .ok_or(Error::InconsistantStruct(format!( "call site {idx} is out of bound (|call_site_ids|={})", self.call_site_ids.len() ))) } fn sanity_check(&self) -> Result<()> { if self.header.magic.version != [0x30, 0x33, 0x39] { warn!( "DEX 039 is the only version currently supported, found {}", std::str::from_utf8(self.header.magic.version.as_slice()) .unwrap_or(&format!("{:x?}", self.header.magic.version)) ); } // TODO: check checksum // TODO: check signature if self.header.file_size as usize != self.data.len() { info!( "Unexpected file size found: {}, expected {}", self.header.file_size, self.data.len() ); } if self.header.header_size != 0x70 { info!( "Unexpected header size found: 0x{:x}", self.header.header_size ); } if self.header.endian_tag != EndianConstant::EndianConstant { warn!("Wrong endian_tag found: {:x?}", self.header.endian_tag); } if self.header.link_off != 0 || self.header.link_size != 0 { info!("Found non empty link section, the section will be ignored"); } for item in &self.map_list.list { match item.type_ { MapItemType::HeaderItem if item.offset != 0 || item.size != 1 => { return Err(Error::InconsistantStruct(format!( "Inconsistant Header Mapping info found in map_list: {item:x?}" ))); } MapItemType::StringIdItem if item.offset != self.header.string_ids_off || item.size != self.header.string_ids_size => { return Err(Error::InconsistantStruct(format!( "Inconsistant MapList Mapping info found in map_list: {item:x?}, \ header.string_ids_off: 0x{:x}, header.string_ids_size: {}", self.header.string_ids_off, self.header.string_ids_size ))); } MapItemType::TypeIdItem if item.offset != self.header.type_ids_off || item.size != self.header.type_ids_size => { return Err(Error::InconsistantStruct(format!( "Inconsistant MapList Mapping info found in map_list: {item:x?}, \ header.type_ids_off: 0x{:x}, header.type_ids_size: {}", self.header.type_ids_off, self.header.type_ids_size ))); } MapItemType::ProtoIdItem if item.offset != self.header.proto_ids_off || item.size != self.header.proto_ids_size => { return Err(Error::InconsistantStruct(format!( "Inconsistant MapList Mapping info found in map_list: {item:x?}, \ header.proto_ids_off: 0x{:x}, header.proto_ids_size: {}", self.header.proto_ids_off, self.header.proto_ids_size ))); } MapItemType::FieldIdItem if item.offset != self.header.field_ids_off || item.size != self.header.field_ids_size => { return Err(Error::InconsistantStruct(format!( "Inconsistant MapList Mapping info found in map_list: {item:x?}, \ header.field_ids_off: 0x{:x}, header.field_ids_size: {}", self.header.field_ids_off, self.header.field_ids_size ))); } MapItemType::MethodIdItem if item.offset != self.header.method_ids_off || item.size != self.header.method_ids_size => { return Err(Error::InconsistantStruct(format!( "Inconsistant MapList Mapping info found in map_list: {item:x?}, \ header.method_ids_off: 0x{:x}, header.method_ids_size: {}", self.header.method_ids_off, self.header.method_ids_size ))); } MapItemType::ClassDefItem if item.offset != self.header.class_defs_off || item.size != self.header.class_defs_size => { return Err(Error::InconsistantStruct(format!( "Inconsistant MapList Mapping info found in map_list: {item:x?}, \ header.class_defs_off: 0x{:x}, header.class_defs_size: {}", self.header.class_defs_off, self.header.class_defs_size ))); } MapItemType::MapList if item.offset != self.header.map_off || item.size != 1 => { return Err(Error::InconsistantStruct(format!( "Inconsistant MapList Mapping info found in map_list: {item:x?}, \ header.map_list_off: 0x{:x}", self.header.map_off ))); } /* MapItemType::CallSiteIdItem => todo!(), MapItemType::MethodHandleItem => todo!(), MapItemType::TypeList => todo!(), MapItemType::AnnotationSetRefList => todo!(), MapItemType::AnnotationSetItem => todo!(), MapItemType::ClassDataItem => todo!(), MapItemType::CodeItem => todo!(), MapItemType::StringDataItem => todo!(), MapItemType::DebugInfoItem => todo!(), MapItemType::AnnotationItem => todo!(), MapItemType::EncodedArrayItem => todo!(), MapItemType::AnnotationsDirectoryItem => todo!(), MapItemType::HiddenapiClassDataItem => todo!(), */ MapItemType::UnkownType(ty) => { info!("Unknown Type found in map_list: 0x{ty:04x}, it will be ignored"); } _ => (), } let mut occurences = std::collections::HashMap::new(); for ty in self.map_list.list.iter().map(|val| val.type_) { *occurences.entry(ty).or_insert(0) += 1; } let mut duplicate = false; for (ty, val) in occurences { if val > 1 { error!("Found multiple {} occurence of {:?} in map_list", val, ty); duplicate = true; } } if duplicate { return Err(Error::InconsistantStruct( "Found multiple occurence of the same item type in map_list".into(), )); } } Ok(()) } fn get_item_list(&self, offset: u32, size: u32) -> Result> { if offset == 0 { return Ok(vec![]); } let mut buffer = Cursor::new(self.data); buffer.seek(SeekFrom::Start(offset as u64)).map_err(|err| { Error::DeserializationError(format!("Failed to seek 0x{offset:x} position: {err}")) })?; let mut list = vec![]; for _ in 0..size { let pos = buffer.stream_position().map_err(|err| { Error::DeserializationError(format!("Failled to read buffer position: {err}")) })?; list.push(T::deserialize(&mut buffer).map_err(|err| { Error::DeserializationError(format!( "Failed to deserialize {} at 0x{:x}: {err}", std::any::type_name::(), pos )) })?); } Ok(list) } /// Return the structure `T` located at `offset` in the file. /// /// # Warning /// /// If the offset is invalid, UB. pub fn get_struct_at_offset(&self, offset: u32) -> Result { let mut buffer = Cursor::new(self.data); buffer.seek(SeekFrom::Start(offset as u64)).unwrap(); let r = T::deserialize(&mut buffer).map_err(|err| { Error::DeserializationError(format!( "Failed to deserialize {} at 0x{:x}: {err}", std::any::type_name::(), offset )) }); if buffer.position() as u32 > self.header.data_off + self.header.data_size { // Return error? Android won't run an apk that does not respect this condition for most // (all?) struct, but a lot a tools don't care. warn!( "Deserialized {} at 0x{:x}, but ended up reading out of the data section \ (0x{:x} to 0x{:x}, current cursor at 0x{:x}", std::any::type_name::(), offset, self.header.data_off, self.header.data_off + self.header.data_size, buffer.position() ); } r } /// Return the hiddenapi flags list for the given class. /// /// The list of flags is composed of one [`HiddenApiFlags`] for each static field, instance /// field, direct method and virtual method of the class, in that order. /// /// `class_def_item_idx` if the idx of the `class_def_item`, **not** the `class_idx` (contrary /// to what /// says) pub fn get_class_hiddenapi_flags( &self, class_def_item_idx: usize, ) -> Result>> { if class_def_item_idx >= self.class_defs.len() { return Err(Error::InconsistantStruct(format!( "idx 0x{class_def_item_idx:x} is out of bound of class_defs" ))); } let class_def = self.class_defs[class_def_item_idx]; if class_def.class_data_off == 0 { if self.hiddenapi_class_data.is_some() { return Ok(Some(vec![])); } else { return Ok(None); } } let class_data = self.get_struct_at_offset::(class_def.class_data_off)?; let nb_flags = class_data.static_fields.len() + class_data.instance_fields.len() + class_data.direct_methods.len() + class_data.virtual_methods.len(); if let Some(hidden_api_data) = &self.hiddenapi_class_data { hidden_api_data .get_flags(nb_flags, class_def_item_idx) .map(Some) } else { Ok(None) } } /// Return the strings that where not referenced. pub fn get_not_resolved_strings(&mut self) -> Result> { // use `&mut self` because using this method at the same time as performing // `Self::get_string()` is UB. let idxs: Vec = (0..self.string_was_resolved.len()) .filter(|idx| !self.string_was_resolved[*idx].load(Ordering::Relaxed)) .map(|idx| idx as u32) .collect(); let mut strings = vec![]; for idx in &idxs { strings.push(self.get_string(*idx)?); } for idx in idxs { self.string_was_resolved[idx as usize].store(false, Ordering::Relaxed) } Ok(strings) } }