436 lines
18 KiB
Rust
436 lines
18 KiB
Rust
//! Parser for a .dex file.
|
|
|
|
use crate::{
|
|
CallSiteIdItem, ClassDefItem, EndianConstant, Error, FieldIdItem, HeaderItem, MapItemType,
|
|
MapList, MethodHandleItem, MethodIdItem, ProtoIdItem, Result, Serializable, StringDataItem,
|
|
StringIdItem, TypeIdItem,
|
|
};
|
|
use log::{error, info, warn};
|
|
use std::io::{Cursor, Seek, SeekFrom};
|
|
use std::sync::atomic::{AtomicBool, Ordering};
|
|
|
|
#[derive(Debug)]
|
|
pub struct DexFileReader<'a> {
|
|
data: &'a [u8],
|
|
header: HeaderItem,
|
|
string_ids: Vec<StringIdItem>,
|
|
/// If `string_was_resolved[string_idx]` is true, the string was resolved at some point.
|
|
/// This allows us to get the strings that are in a dex file but not used by its
|
|
/// classes. (Yes, they are some, looking at you `~~D8{"backend":"dex","compilation-mode":
|
|
/// "release","has-checksums":false,"min-api":24,"version":"8.2.42"}`)
|
|
///
|
|
/// Use AtomicBool to hide this inside &self methods that are easy to run concurrently.
|
|
string_was_resolved: Vec<AtomicBool>,
|
|
type_ids: Vec<TypeIdItem>,
|
|
proto_ids: Vec<ProtoIdItem>,
|
|
field_ids: Vec<FieldIdItem>,
|
|
method_ids: Vec<MethodIdItem>,
|
|
class_defs: Vec<ClassDefItem>,
|
|
call_site_ids: Vec<CallSiteIdItem>,
|
|
method_handles: Vec<MethodHandleItem>,
|
|
map_list: MapList,
|
|
}
|
|
|
|
impl<'a> DexFileReader<'a> {
|
|
pub fn new(data: &'a [u8]) -> Result<Self> {
|
|
let mut buffer = Cursor::new(data);
|
|
let header = HeaderItem::deserialize(&mut buffer)?;
|
|
let mut tmp_file = Self {
|
|
data,
|
|
header,
|
|
// Default values before population
|
|
string_ids: vec![],
|
|
string_was_resolved: vec![],
|
|
type_ids: vec![],
|
|
proto_ids: vec![],
|
|
field_ids: vec![],
|
|
method_ids: vec![],
|
|
class_defs: vec![],
|
|
call_site_ids: vec![],
|
|
method_handles: vec![],
|
|
map_list: MapList { list: vec![] },
|
|
};
|
|
tmp_file.map_list = tmp_file.get_struct_at_offset(tmp_file.header.map_off)?;
|
|
tmp_file.string_ids = tmp_file.get_item_list::<StringIdItem>(
|
|
tmp_file.header.string_ids_off,
|
|
tmp_file.header.string_ids_size,
|
|
)?;
|
|
for _ in 0..tmp_file.string_ids.len() {
|
|
tmp_file.string_was_resolved.push(AtomicBool::new(false));
|
|
}
|
|
tmp_file.type_ids = tmp_file.get_item_list::<TypeIdItem>(
|
|
tmp_file.header.type_ids_off,
|
|
tmp_file.header.type_ids_size,
|
|
)?;
|
|
tmp_file.proto_ids = tmp_file.get_item_list::<ProtoIdItem>(
|
|
tmp_file.header.proto_ids_off,
|
|
tmp_file.header.proto_ids_size,
|
|
)?;
|
|
tmp_file.field_ids = tmp_file.get_item_list::<FieldIdItem>(
|
|
tmp_file.header.field_ids_off,
|
|
tmp_file.header.field_ids_size,
|
|
)?;
|
|
tmp_file.method_ids = tmp_file.get_item_list::<MethodIdItem>(
|
|
tmp_file.header.method_ids_off,
|
|
tmp_file.header.method_ids_size,
|
|
)?;
|
|
tmp_file.class_defs = tmp_file.get_item_list::<ClassDefItem>(
|
|
tmp_file.header.class_defs_off,
|
|
tmp_file.header.class_defs_size,
|
|
)?;
|
|
|
|
if let Some(item) = tmp_file
|
|
.map_list
|
|
.list
|
|
.iter()
|
|
.find(|item| item.type_ == MapItemType::CallSiteIdItem)
|
|
{
|
|
tmp_file.call_site_ids =
|
|
tmp_file.get_item_list::<CallSiteIdItem>(item.offset, item.size)?
|
|
}
|
|
if let Some(item) = tmp_file
|
|
.map_list
|
|
.list
|
|
.iter()
|
|
.find(|item| item.type_ == MapItemType::CallSiteIdItem)
|
|
{
|
|
tmp_file.method_handles =
|
|
tmp_file.get_item_list::<MethodHandleItem>(item.offset, item.size)?
|
|
}
|
|
tmp_file.sanity_check()?;
|
|
Ok(tmp_file)
|
|
}
|
|
|
|
/// Return the file [`HeaderItem`].
|
|
pub fn get_header(&self) -> &HeaderItem {
|
|
&self.header
|
|
}
|
|
/// Retunr the file [`StringIdItem`] list.
|
|
pub fn get_string_ids(&self) -> &[StringIdItem] {
|
|
&self.string_ids
|
|
}
|
|
/// Retunr the file [`TypeIdItem`] list.
|
|
pub fn get_type_ids(&self) -> &[TypeIdItem] {
|
|
&self.type_ids
|
|
}
|
|
/// Retunr the file [`ProtoIdItem`] list.
|
|
pub fn get_proto_ids(&self) -> &[ProtoIdItem] {
|
|
&self.proto_ids
|
|
}
|
|
/// Retunr the file [`FieldIdItem`] list.
|
|
pub fn get_field_ids(&self) -> &[FieldIdItem] {
|
|
&self.field_ids
|
|
}
|
|
/// Retunr the file [`MethodIdItem`] list.
|
|
pub fn get_method_ids(&self) -> &[MethodIdItem] {
|
|
&self.method_ids
|
|
}
|
|
/// Retunr the file [`ClassDefItem`] list.
|
|
pub fn get_class_defs(&self) -> &[ClassDefItem] {
|
|
&self.class_defs
|
|
}
|
|
/// Retunr the file [`CallSiteIdItem`] list.
|
|
pub fn get_call_site_ids(&self) -> &[CallSiteIdItem] {
|
|
&self.call_site_ids
|
|
}
|
|
/// Retunr the file [`MethodHandleItem`] list.
|
|
pub fn get_method_handles(&self) -> &[MethodHandleItem] {
|
|
&self.method_handles
|
|
}
|
|
/// Retunr the file [`MapList`].
|
|
pub fn get_map_list(&self) -> &MapList {
|
|
&self.map_list
|
|
}
|
|
|
|
/// Return the [`StringDataItem`] of from its idx.
|
|
pub fn get_string(&self, idx: u32) -> Result<StringDataItem> {
|
|
let id = self
|
|
.string_ids
|
|
.get(idx as usize)
|
|
.ok_or(Error::InconsistantStruct(format!(
|
|
"string idx {idx} is out of bound (|string_ids|={})",
|
|
self.string_ids.len()
|
|
)))?;
|
|
let string = self
|
|
.get_struct_at_offset::<StringDataItem>(id.string_data_off)
|
|
.map_err(|err| {
|
|
Error::DeserializationError(format!("Failled to parse string {idx}: {err}"))
|
|
})?;
|
|
self.string_was_resolved[idx as usize].store(true, Ordering::Relaxed);
|
|
Ok(string)
|
|
}
|
|
|
|
/// Return a [`TypeIdItem`] reference from its idx.
|
|
pub fn get_type_id(&self, idx: usize) -> Result<&TypeIdItem> {
|
|
self.type_ids
|
|
.get(idx)
|
|
.ok_or(Error::InconsistantStruct(format!(
|
|
"type idx {} out of bound of type_ids (|type_ids| = {})",
|
|
idx,
|
|
self.type_ids.len()
|
|
)))
|
|
}
|
|
|
|
/// Return a [`ProtoIdItem`] reference from its idx.
|
|
pub fn get_proto_id(&self, idx: usize) -> Result<&ProtoIdItem> {
|
|
self.proto_ids
|
|
.get(idx)
|
|
.ok_or(Error::InconsistantStruct(format!(
|
|
"prototype idx {idx} is out of bound (|proto_ids|={})",
|
|
self.proto_ids.len()
|
|
)))
|
|
}
|
|
|
|
/// Return a [`FieldIdItem`] reference from its idx.
|
|
pub fn get_field_id(&self, idx: usize) -> Result<&FieldIdItem> {
|
|
self.field_ids
|
|
.get(idx)
|
|
.ok_or(Error::InconsistantStruct(format!(
|
|
"field idx {idx} is out of bound (|field_ids|={})",
|
|
self.field_ids.len()
|
|
)))
|
|
}
|
|
|
|
/// Return a [`MethodIdItem`] reference from its idx.
|
|
pub fn get_method_id(&self, idx: usize) -> Result<&MethodIdItem> {
|
|
self.method_ids
|
|
.get(idx)
|
|
.ok_or(Error::InconsistantStruct(format!(
|
|
"method idx {idx} is out of bound (|method_ids|={})",
|
|
self.method_ids.len()
|
|
)))
|
|
}
|
|
|
|
/// Return a [`MethodHandleItem`] reference from its idx.
|
|
pub fn get_method_handle(&self, idx: usize) -> Result<&MethodHandleItem> {
|
|
self.method_handles
|
|
.get(idx)
|
|
.ok_or(Error::InconsistantStruct(format!(
|
|
"method handle {idx} is out of bound (|method_handles|={})",
|
|
self.method_handles.len()
|
|
)))
|
|
}
|
|
|
|
/// Return a [`CallSiteIdItem`] reference from its idx.
|
|
pub fn get_call_site_id(&self, idx: usize) -> Result<&CallSiteIdItem> {
|
|
self.call_site_ids
|
|
.get(idx)
|
|
.ok_or(Error::InconsistantStruct(format!(
|
|
"call site {idx} is out of bound (|call_site_ids|={})",
|
|
self.call_site_ids.len()
|
|
)))
|
|
}
|
|
|
|
fn sanity_check(&self) -> Result<()> {
|
|
if self.header.magic.version != [0x30, 0x33, 0x39] {
|
|
warn!(
|
|
"DEX 039 is the only version currently supported, found {}",
|
|
std::str::from_utf8(self.header.magic.version.as_slice())
|
|
.unwrap_or(&format!("{:x?}", self.header.magic.version))
|
|
);
|
|
}
|
|
// TODO: check checksum
|
|
// TODO: check signature
|
|
if self.header.file_size as usize != self.data.len() {
|
|
info!(
|
|
"Unexpected file size found: {}, expected {}",
|
|
self.header.file_size,
|
|
self.data.len()
|
|
);
|
|
}
|
|
if self.header.header_size != 0x70 {
|
|
info!(
|
|
"Unexpected header size found: 0x{:x}",
|
|
self.header.header_size
|
|
);
|
|
}
|
|
if self.header.endian_tag != EndianConstant::EndianConstant {
|
|
warn!("Wrong endian_tag found: {:x?}", self.header.endian_tag);
|
|
}
|
|
if self.header.link_off != 0 || self.header.link_size != 0 {
|
|
info!("Found non empty link section, the section will be ignored");
|
|
}
|
|
for item in &self.map_list.list {
|
|
match item.type_ {
|
|
MapItemType::HeaderItem if item.offset != 0 || item.size != 1 => {
|
|
return Err(Error::InconsistantStruct(format!(
|
|
"Inconsistant Header Mapping info found in map_list: {item:x?}"
|
|
)))
|
|
}
|
|
MapItemType::StringIdItem
|
|
if item.offset != self.header.string_ids_off
|
|
|| item.size != self.header.string_ids_size =>
|
|
{
|
|
return Err(Error::InconsistantStruct(format!(
|
|
"Inconsistant MapList Mapping info found in map_list: {item:x?}, \
|
|
header.string_ids_off: 0x{:x}, header.string_ids_size: {}",
|
|
self.header.string_ids_off, self.header.string_ids_size
|
|
)))
|
|
}
|
|
MapItemType::TypeIdItem
|
|
if item.offset != self.header.type_ids_off
|
|
|| item.size != self.header.type_ids_size =>
|
|
{
|
|
return Err(Error::InconsistantStruct(format!(
|
|
"Inconsistant MapList Mapping info found in map_list: {item:x?}, \
|
|
header.type_ids_off: 0x{:x}, header.type_ids_size: {}",
|
|
self.header.type_ids_off, self.header.type_ids_size
|
|
)))
|
|
}
|
|
MapItemType::ProtoIdItem
|
|
if item.offset != self.header.proto_ids_off
|
|
|| item.size != self.header.proto_ids_size =>
|
|
{
|
|
return Err(Error::InconsistantStruct(format!(
|
|
"Inconsistant MapList Mapping info found in map_list: {item:x?}, \
|
|
header.proto_ids_off: 0x{:x}, header.proto_ids_size: {}",
|
|
self.header.proto_ids_off, self.header.proto_ids_size
|
|
)))
|
|
}
|
|
MapItemType::FieldIdItem
|
|
if item.offset != self.header.field_ids_off
|
|
|| item.size != self.header.field_ids_size =>
|
|
{
|
|
return Err(Error::InconsistantStruct(format!(
|
|
"Inconsistant MapList Mapping info found in map_list: {item:x?}, \
|
|
header.field_ids_off: 0x{:x}, header.field_ids_size: {}",
|
|
self.header.field_ids_off, self.header.field_ids_size
|
|
)))
|
|
}
|
|
MapItemType::MethodIdItem
|
|
if item.offset != self.header.method_ids_off
|
|
|| item.size != self.header.method_ids_size =>
|
|
{
|
|
return Err(Error::InconsistantStruct(format!(
|
|
"Inconsistant MapList Mapping info found in map_list: {item:x?}, \
|
|
header.method_ids_off: 0x{:x}, header.method_ids_size: {}",
|
|
self.header.method_ids_off, self.header.method_ids_size
|
|
)))
|
|
}
|
|
MapItemType::ClassDefItem
|
|
if item.offset != self.header.class_defs_off
|
|
|| item.size != self.header.class_defs_size =>
|
|
{
|
|
return Err(Error::InconsistantStruct(format!(
|
|
"Inconsistant MapList Mapping info found in map_list: {item:x?}, \
|
|
header.class_defs_off: 0x{:x}, header.class_defs_size: {}",
|
|
self.header.class_defs_off, self.header.class_defs_size
|
|
)))
|
|
}
|
|
MapItemType::MapList if item.offset != self.header.map_off || item.size != 1 => {
|
|
return Err(Error::InconsistantStruct(format!(
|
|
"Inconsistant MapList Mapping info found in map_list: {item:x?}, \
|
|
header.map_list_off: 0x{:x}",
|
|
self.header.map_off
|
|
)))
|
|
}
|
|
/*
|
|
MapItemType::CallSiteIdItem => todo!(),
|
|
MapItemType::MethodHandleItem => todo!(),
|
|
MapItemType::TypeList => todo!(),
|
|
MapItemType::AnnotationSetRefList => todo!(),
|
|
MapItemType::AnnotationSetItem => todo!(),
|
|
MapItemType::ClassDataItem => todo!(),
|
|
MapItemType::CodeItem => todo!(),
|
|
MapItemType::StringDataItem => todo!(),
|
|
MapItemType::DebugInfoItem => todo!(),
|
|
MapItemType::AnnotationItem => todo!(),
|
|
MapItemType::EncodedArrayItem => todo!(),
|
|
MapItemType::AnnotationsDirectoryItem => todo!(),
|
|
MapItemType::HiddenapiClassDataItem => todo!(),
|
|
*/
|
|
MapItemType::UnkownType(ty) => {
|
|
info!("Unknown Type found in map_list: 0x{ty:04x}, it will be ignored");
|
|
}
|
|
_ => (),
|
|
}
|
|
let mut occurences = std::collections::HashMap::new();
|
|
for ty in self.map_list.list.iter().map(|val| val.type_) {
|
|
*occurences.entry(ty).or_insert(0) += 1;
|
|
}
|
|
let mut duplicate = false;
|
|
for (ty, val) in occurences {
|
|
if val > 1 {
|
|
error!("Found multiple {} occurence of {:?} in map_list", val, ty);
|
|
duplicate = true;
|
|
}
|
|
}
|
|
if duplicate {
|
|
return Err(Error::InconsistantStruct(
|
|
"Found multiple occurence of the same item type in map_list".into(),
|
|
));
|
|
}
|
|
}
|
|
Ok(())
|
|
}
|
|
|
|
fn get_item_list<T: Serializable>(&self, offset: u32, size: u32) -> Result<Vec<T>> {
|
|
let mut buffer = Cursor::new(self.data);
|
|
buffer.seek(SeekFrom::Start(offset as u64)).map_err(|err| {
|
|
Error::DeserializationError(format!("Failed to seek 0x{offset:x} position: {err}"))
|
|
})?;
|
|
let mut list = vec![];
|
|
for _ in 0..size {
|
|
let pos = buffer.stream_position().map_err(|err| {
|
|
Error::DeserializationError(format!("Failled to read buffer position: {err}"))
|
|
})?;
|
|
list.push(T::deserialize(&mut buffer).map_err(|err| {
|
|
Error::DeserializationError(format!(
|
|
"Failed to deserialize {} at 0x{:x}: {err}",
|
|
std::any::type_name::<T>(),
|
|
pos
|
|
))
|
|
})?);
|
|
}
|
|
Ok(list)
|
|
}
|
|
|
|
/// Return the structure `T` located at `offset` in the file.
|
|
///
|
|
/// # Warning
|
|
///
|
|
/// If the offset is invalid, UB.
|
|
pub fn get_struct_at_offset<T: Serializable>(&self, offset: u32) -> Result<T> {
|
|
let mut buffer = Cursor::new(self.data);
|
|
buffer.seek(SeekFrom::Start(offset as u64)).unwrap();
|
|
let r = T::deserialize(&mut buffer).map_err(|err| {
|
|
Error::DeserializationError(format!(
|
|
"Failed to deserialize {} at 0x{:x}: {err}",
|
|
std::any::type_name::<T>(),
|
|
offset
|
|
))
|
|
});
|
|
if buffer.position() as u32 > self.header.data_off + self.header.data_size {
|
|
// Return error? Android won't run an apk that does not respect this condition for most
|
|
// (all?) struct, but a lot a tools don't care.
|
|
warn!(
|
|
"Deserialized {} at 0x{:x}, but ended up reading out of the data section \
|
|
(0x{:x} to 0x{:x}, current cursor at 0x{:x}",
|
|
std::any::type_name::<T>(),
|
|
offset,
|
|
self.header.data_off,
|
|
self.header.data_off + self.header.data_size,
|
|
buffer.position()
|
|
);
|
|
}
|
|
r
|
|
}
|
|
|
|
/// Return the strings that where not referenced.
|
|
pub fn get_not_resolved_strings(&mut self) -> Result<Vec<StringDataItem>> {
|
|
// use `&mut self` because using this method at the same time as performing
|
|
// `Self::get_string()` is UB.
|
|
let idxs: Vec<u32> = (0..self.string_was_resolved.len())
|
|
.filter(|idx| !self.string_was_resolved[*idx].load(Ordering::Relaxed))
|
|
.map(|idx| idx as u32)
|
|
.collect();
|
|
let mut strings = vec![];
|
|
for idx in &idxs {
|
|
strings.push(self.get_string(*idx)?);
|
|
}
|
|
for idx in idxs {
|
|
self.string_was_resolved[idx as usize].store(false, Ordering::Relaxed)
|
|
}
|
|
Ok(strings)
|
|
}
|
|
}
|