add unreferenced strings

This commit is contained in:
Jean-Marie Mineau 2024-02-15 11:34:43 +01:00
parent b47c9dd666
commit 6637745cdf
Signed by: histausse
GPG key ID: B66AEEDA9B645AD2
4 changed files with 114 additions and 68 deletions

View file

@ -7,12 +7,18 @@ use crate::{
};
use log::{error, info, warn};
use std::io::{Cursor, Seek, SeekFrom};
use std::sync::atomic::{AtomicBool, Ordering};
#[derive(Debug)]
pub struct DexFileReader<'a> {
data: &'a [u8],
header: HeaderItem,
string_ids: Vec<StringIdItem>,
/// If `string_was_resolved[string_idx]` is true, the string was resolved at some point.
/// This alows us to get the strings that are in a dex file but not used by its
/// classes. (Yes, they are some, looking at you `~~D8{"backend":"dex","compilation-mode":
/// "release","has-checksums":false,"min-api":24,"version":"8.2.42"}`)
string_was_resolved: Vec<AtomicBool>,
type_ids: Vec<TypeIdItem>,
proto_ids: Vec<ProtoIdItem>,
field_ids: Vec<FieldIdItem>,
@ -32,6 +38,7 @@ impl<'a> DexFileReader<'a> {
header,
// Default values before population
string_ids: vec![],
string_was_resolved: vec![],
type_ids: vec![],
proto_ids: vec![],
field_ids: vec![],
@ -46,6 +53,9 @@ impl<'a> DexFileReader<'a> {
tmp_file.header.string_ids_off,
tmp_file.header.string_ids_size,
)?;
for _ in 0..tmp_file.string_ids.len() {
tmp_file.string_was_resolved.push(AtomicBool::new(false));
}
tmp_file.type_ids = tmp_file.get_item_list::<TypeIdItem>(
tmp_file.header.type_ids_off,
tmp_file.header.type_ids_size,
@ -66,6 +76,7 @@ impl<'a> DexFileReader<'a> {
tmp_file.header.class_defs_off,
tmp_file.header.class_defs_size,
)?;
if let Some(item) = tmp_file
.map_list
.list
@ -138,10 +149,13 @@ impl<'a> DexFileReader<'a> {
"string idx {idx} is out of bound (|string_ids|={})",
self.string_ids.len()
)))?;
self.get_struct_at_offset(id.string_data_off)
let string = self
.get_struct_at_offset::<StringDataItem>(id.string_data_off)
.map_err(|err| {
Error::DeserializationError(format!("Failled to parse string {idx}: {err}"))
})
})?;
self.string_was_resolved[idx as usize].store(true, Ordering::Relaxed);
Ok(string)
}
/// Return a [`TypeIdItem`] reference from its idx.
@ -399,4 +413,22 @@ impl<'a> DexFileReader<'a> {
}
r
}
/// Return the strings that where not referenced.
pub fn get_not_resolved_strings(&mut self) -> Result<Vec<StringDataItem>> {
// use `&mut self` because using this method at the same time as performing
// `Self::get_string()` is UB.
let idxs: Vec<u32> = (0..self.string_was_resolved.len())
.filter(|idx| !self.string_was_resolved[*idx].load(Ordering::Relaxed))
.map(|idx| idx as u32)
.collect();
let mut strings = vec![];
for idx in &idxs {
strings.push(self.get_string(*idx)?);
}
for idx in idxs {
self.string_was_resolved[idx as usize].store(false, Ordering::Relaxed)
}
Ok(strings)
}
}