add redistribute_classes() and parallel dex compilation

This commit is contained in:
Jean-Marie Mineau 2025-06-18 16:40:20 +02:00
parent c159e3e599
commit be241e9fd1
Signed by: histausse
GPG key ID: B66AEEDA9B645AD2
2 changed files with 135 additions and 28 deletions

View file

@ -34,6 +34,68 @@ pub struct DexFile {
pub(crate) bin_cache: Option<Vec<u8>>, // TODO: invalidate the cache !!! pub(crate) bin_cache: Option<Vec<u8>>, // TODO: invalidate the cache !!!
} }
impl DexFile {
pub fn get_all_types(&self) -> HashSet<IdType> {
let mut v = TypeCollector::default();
v.visit_dex_file(self).unwrap();
v.types
}
pub fn get_all_method_ids(&self) -> HashSet<IdMethod> {
let mut v = MethodIdCollector::default();
v.visit_dex_file(self).unwrap();
v.method_ids
}
pub fn get_all_protos(&self) -> HashSet<IdMethodType> {
let mut v = MethodTypeCollector::default();
v.visit_dex_file(self).unwrap();
v.method_types
}
pub fn get_all_field_ids(&self) -> HashSet<IdField> {
let mut v = FieldIdCollector::default();
v.visit_dex_file(self).unwrap();
v.field_ids
}
/// Check if the `DexFile` is exiding the format limits.
pub fn is_overflowing(&self) -> bool {
(self.get_all_types().len() > u16::MAX as usize)
|| (self.get_all_protos().len() > u16::MAX as usize)
|| (self.get_all_field_ids().len() > u16::MAX as usize)
|| (self.get_all_method_ids().len() > u16::MAX as usize)
}
/// Check if the class can be added to the dex file without exiding the format limits.
pub fn has_space_for_class(&self, class: &Class) -> bool {
(self
.get_all_types()
.union(&class.get_all_types())
.collect::<HashSet<_>>()
.len()
<= u16::MAX as usize)
&& (self
.get_all_protos()
.union(&class.get_all_protos())
.collect::<HashSet<_>>()
.len()
<= u16::MAX as usize)
&& (self
.get_all_field_ids()
.union(&class.get_all_field_ids())
.collect::<HashSet<_>>()
.len()
<= u16::MAX as usize)
&& (self
.get_all_method_ids()
.union(&class.get_all_method_ids())
.collect::<HashSet<_>>()
.len()
<= u16::MAX as usize)
}
}
impl<V: Visitor> Visitable<V> for DexFile { impl<V: Visitor> Visitable<V> for DexFile {
fn default_visit(&self, v: &mut V) -> Result<()> { fn default_visit(&self, v: &mut V) -> Result<()> {
for (id, class) in &self.classes { for (id, class) in &self.classes {
@ -2846,36 +2908,33 @@ impl Apk {
.map(|mut dex_writer| dex_writer.gen_dex_file_to_vec()) .map(|mut dex_writer| dex_writer.gen_dex_file_to_vec())
.collect() .collect()
*/ */
let mut bin_dex_files = HashMap::new(); self.dex_files.par_iter().map(|(
// TODO: Multithread
for (
name, name,
DexFile { DexFile {
classes, classes,
not_referenced_strings, not_referenced_strings,
bin_cache, bin_cache,
}, },
) in self.dex_files.iter() )|
{ {
if let Some(bin_cache) = bin_cache { if let Some(bin_cache) = bin_cache {
bin_dex_files.insert(name.clone(), bin_cache.clone()); Ok((name.clone(), bin_cache.clone()))
} else { } else {
let mut writer = DexWriter::new(); let mut writer = DexWriter::new();
for class_ in classes.values() { for class_ in classes.values() {
match writer.add_class(class_) { match writer.add_class(class_) {
Ok(()) => (), Ok(()) => (),
Err(DexWritterError::OutOfSpace(_)) => bail!( Err(DexWritterError::OutOfSpace(err)) => bail!(
"{name} dex file has to many class/method/field to be serialize in dex format" "{name} dex file has to many class/method/field to be serialize in dex format ({err}). You might want to use `Apk::redistribute_classes() before serializing`"
), ),
} }
} }
for string in not_referenced_strings { for string in not_referenced_strings {
writer.add_string(string.clone()); writer.add_string(string.clone());
} }
bin_dex_files.insert(name.clone(), writer.gen_dex_file_to_vec()?); writer.gen_dex_file_to_vec().map(|bin_dex| (name.clone(), bin_dex))
};
} }
Ok(bin_dex_files) }).collect::<Result<HashMap<_, _>>>()
} }
// TODO: check for android platform classes? // TODO: check for android platform classes?
@ -3368,6 +3427,54 @@ impl Apk {
} }
} }
} }
/// Redistribute classes among dex files. This is needed when a file reference more than 2**16
/// types, methods, fields or protoypes (restriction imposed by the dalvik format).
///
/// <div class="warning">
///
/// In some edge cases (when several classes in the same application share the same name), this
/// method can change the behavior of the application.
///
/// </div>
pub fn redistribute_classes(&mut self) {
// TODO: handle cases where several classes have the same name.
while let Some(fname) = self.dex_files.iter().find_map(|(name, dex)| {
if dex.is_overflowing() {
Some(name.clone())
} else {
None
}
}) {
let dex = self.dex_files.get_mut(&fname).unwrap();
let class_name = dex.classes.keys().find(|_| true).unwrap().clone();
let class = dex.classes.remove(&class_name).unwrap();
if let Some(dex) = self
.dex_files
.values_mut()
.find(|dex| dex.has_space_for_class(&class))
{
dex.classes.insert(class_name, class);
} else {
// If not file has space for the class, create a new file
let mut i = 0;
let new_fname = loop {
let name = if i == 0 {
"classes.dex".into()
} else {
format!("classes{}.dex", i + 1)
};
if self.dex_files.contains_key(&name) {
break name;
};
i += 1;
};
let mut new_dex = DexFile::default();
new_dex.classes.insert(class_name, class);
self.dex_files.insert(new_fname, new_dex);
}
}
}
} }
/// Parse a .dex file name, and if it is a valid android file, return the index of the file. /// Parse a .dex file name, and if it is a valid android file, return the index of the file.

View file

@ -5,7 +5,7 @@ use std::io;
use std::io::{Cursor, Seek, SeekFrom, Write}; use std::io::{Cursor, Seek, SeekFrom, Write};
use adler::Adler32; use adler::Adler32;
use anyhow::{Context, anyhow, bail}; use anyhow::{anyhow, bail, Context};
use log::{debug, warn}; use log::{debug, warn};
use sha1::{Digest, Sha1}; use sha1::{Digest, Sha1};
@ -194,7 +194,7 @@ impl DexWriter {
.iter() .iter()
.filter(|ty| !self.type_ids.contains_key(ty)) .filter(|ty| !self.type_ids.contains_key(ty))
.count(); .count();
if new_nb_types >= u16::MAX as usize { if new_nb_types > u16::MAX as usize {
return Err(DexWritterError::OutOfSpace( return Err(DexWritterError::OutOfSpace(
"To many types for one dex file".into(), "To many types for one dex file".into(),
)); ));
@ -206,7 +206,7 @@ impl DexWriter {
.iter() .iter()
.filter(|proto| !self.proto_ids.contains_key(proto)) .filter(|proto| !self.proto_ids.contains_key(proto))
.count(); .count();
if new_nb_protos >= u16::MAX as usize { if new_nb_protos > u16::MAX as usize {
return Err(DexWritterError::OutOfSpace( return Err(DexWritterError::OutOfSpace(
"To many prototypes for one dex file".into(), "To many prototypes for one dex file".into(),
)); ));
@ -218,7 +218,7 @@ impl DexWriter {
.iter() .iter()
.filter(|field| !self.field_ids.contains_key(field)) .filter(|field| !self.field_ids.contains_key(field))
.count(); .count();
if new_nb_field_ids >= u16::MAX as usize { if new_nb_field_ids > u16::MAX as usize {
return Err(DexWritterError::OutOfSpace( return Err(DexWritterError::OutOfSpace(
"To many field ids for one dex file".into(), "To many field ids for one dex file".into(),
)); ));
@ -230,7 +230,7 @@ impl DexWriter {
.iter() .iter()
.filter(|meth| !self.method_ids.contains_key(meth)) .filter(|meth| !self.method_ids.contains_key(meth))
.count(); .count();
if new_nb_method_ids >= u16::MAX as usize { if new_nb_method_ids > u16::MAX as usize {
return Err(DexWritterError::OutOfSpace( return Err(DexWritterError::OutOfSpace(
"To many method ids for one dex file".into(), "To many method ids for one dex file".into(),
)); ));