try (and fail) to improve on class redistribution

This commit is contained in:
Jean-Marie Mineau 2025-06-23 13:40:08 +02:00
parent d5006f591c
commit 22571a39fc
Signed by: histausse
GPG key ID: B66AEEDA9B645AD2
2 changed files with 161 additions and 21 deletions

View file

@ -2,6 +2,7 @@
use anyhow::{anyhow, bail, Context}; use anyhow::{anyhow, bail, Context};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::cmp::max;
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap, HashSet};
use std::fs::File; use std::fs::File;
use std::io::{Cursor, Read, Seek}; use std::io::{Cursor, Read, Seek};
@ -38,25 +39,25 @@ impl DexFile {
pub fn get_all_types(&self) -> HashSet<IdType> { pub fn get_all_types(&self) -> HashSet<IdType> {
let mut v = TypeCollector::default(); let mut v = TypeCollector::default();
v.visit_dex_file(self).unwrap(); v.visit_dex_file(self).unwrap();
v.types v.result()
} }
pub fn get_all_method_ids(&self) -> HashSet<IdMethod> { pub fn get_all_method_ids(&self) -> HashSet<IdMethod> {
let mut v = MethodIdCollector::default(); let mut v = MethodIdCollector::default();
v.visit_dex_file(self).unwrap(); v.visit_dex_file(self).unwrap();
v.method_ids v.result()
} }
pub fn get_all_protos(&self) -> HashSet<IdMethodType> { pub fn get_all_protos(&self) -> HashSet<IdMethodType> {
let mut v = MethodTypeCollector::default(); let mut v = MethodTypeCollector::default();
v.visit_dex_file(self).unwrap(); v.visit_dex_file(self).unwrap();
v.method_types v.result()
} }
pub fn get_all_field_ids(&self) -> HashSet<IdField> { pub fn get_all_field_ids(&self) -> HashSet<IdField> {
let mut v = FieldIdCollector::default(); let mut v = FieldIdCollector::default();
v.visit_dex_file(self).unwrap(); v.visit_dex_file(self).unwrap();
v.field_ids v.result()
} }
/// Check if the `DexFile` is exiding the format limits. /// Check if the `DexFile` is exiding the format limits.
@ -94,6 +95,34 @@ impl DexFile {
.len() .len()
<= u16::MAX as usize) <= u16::MAX as usize)
} }
/// Split the dex file into 2
pub fn _split(self) -> (Self, Self) {
let mut classes0 = HashMap::new();
let mut classes1 = HashMap::new();
let mut i = 0;
for (name, class) in self.classes.into_iter() {
if i == 0 {
classes0.insert(name, class);
i = 1;
} else {
classes1.insert(name, class);
i = 0;
}
}
(
Self {
classes: classes0,
not_referenced_strings: self.not_referenced_strings,
bin_cache: None,
},
Self {
classes: classes1,
not_referenced_strings: HashSet::new(),
bin_cache: None,
},
)
}
} }
impl<V: Visitor> Visitable<V> for DexFile { impl<V: Visitor> Visitable<V> for DexFile {
@ -3439,6 +3468,18 @@ impl Apk {
/// </div> /// </div>
pub fn redistribute_classes(&mut self) { pub fn redistribute_classes(&mut self) {
// TODO: handle cases where several classes have the same name. // TODO: handle cases where several classes have the same name.
let mut dex_i = 0;
loop {
let name = if dex_i == 0 {
"classes.dex".into()
} else {
format!("classes{}.dex", dex_i + 1)
};
if self.dex_files.contains_key(&name) {
break;
};
dex_i += 1;
}
while let Some(fname) = self.dex_files.iter().find_map(|(name, dex)| { while let Some(fname) = self.dex_files.iter().find_map(|(name, dex)| {
if dex.is_overflowing() { if dex.is_overflowing() {
Some(name.clone()) Some(name.clone())
@ -3447,7 +3488,42 @@ impl Apk {
} }
}) { }) {
let dex = self.dex_files.get_mut(&fname).unwrap(); let dex = self.dex_files.get_mut(&fname).unwrap();
let class_name = dex.classes.keys().find(|_| true).unwrap().clone();
/*
* TODO: check impact on inheritance topology
// If the dex is overflowing by a big margin
if (dex.get_all_types().len() > u16::MAX as usize + ((u16::MAX / 10) as usize))
|| (dex.get_all_protos().len() > u16::MAX as usize + ((u16::MAX / 10) as usize))
|| (dex.get_all_field_ids().len() > u16::MAX as usize + ((u16::MAX / 10) as usize))
|| (dex.get_all_method_ids().len() > u16::MAX as usize + ((u16::MAX / 10) as usize))
{
let dex = self.dex_files.remove(&fname).unwrap();
let (dex1, dex2) = dex.split();
self.dex_files.insert(fname, dex1);
let fname2 = if dex_i == 0 {
"classes.dex".into()
} else {
format!("classes{}.dex", dex_i + 1)
};
dex_i += 1;
self.dex_files.insert(fname2, dex2);
} else {
*/
let class_name = dex
.classes
.iter()
.max_by_key(|(_, class)| {
max(
max(class.get_all_types().len(), class.get_all_protos().len()),
max(
class.get_all_field_ids().len(),
class.get_all_method_ids().len(),
),
)
})
.map(|(name, _)| name)
.unwrap()
.clone();
let class = dex.classes.remove(&class_name).unwrap(); let class = dex.classes.remove(&class_name).unwrap();
if let Some(dex) = self if let Some(dex) = self
.dex_files .dex_files
@ -3457,22 +3533,17 @@ impl Apk {
dex.classes.insert(class_name, class); dex.classes.insert(class_name, class);
} else { } else {
// If not file has space for the class, create a new file // If not file has space for the class, create a new file
let mut i = 0;
let new_fname = loop {
let name = if i == 0 {
"classes.dex".into()
} else {
format!("classes{}.dex", i + 1)
};
if self.dex_files.contains_key(&name) {
break name;
};
i += 1;
};
let mut new_dex = DexFile::default(); let mut new_dex = DexFile::default();
new_dex.classes.insert(class_name, class); new_dex.classes.insert(class_name, class);
let new_fname = if dex_i == 0 {
"classes.dex".into()
} else {
format!("classes{}.dex", dex_i + 1)
};
dex_i += 1;
self.dex_files.insert(new_fname, new_dex); self.dex_files.insert(new_fname, new_dex);
} }
//}
} }
} }
} }

View file

@ -1,11 +1,12 @@
//! The visitor trait and common implementations. //! The visitor trait and common implementations.
use crate::{ use crate::{
Apk, CallSite, Class, Code, DexAnnotation, DexAnnotationItem, DexFile, DexString, DexValue, ins::Instruction, scalar::*, Apk, CallSite, Class, Code, DexAnnotation, DexAnnotationItem,
Field, FieldVisibility, HiddenApiData, HiddenApiDomain, HiddenApiPermission, IdEnum, IdField, DexFile, DexString, DexValue, Field, FieldVisibility, HiddenApiData, HiddenApiDomain,
IdMethod, IdMethodType, IdType, Method, MethodHandle, MethodVisibility, Result, HiddenApiPermission, IdEnum, IdField, IdMethod, IdMethodType, IdType, Method, MethodHandle,
ins::Instruction, scalar::*, MethodVisibility, Result,
}; };
use rayon::prelude::*;
use std::collections::HashSet; use std::collections::HashSet;
pub trait Visitor: Sized { pub trait Visitor: Sized {
@ -275,6 +276,23 @@ impl Visitor for TypeCollector {
self.types.insert(ty.clone()); self.types.insert(ty.clone());
Ok(()) Ok(())
} }
fn visit_dex_file(&mut self, dex: &DexFile) -> Result<()> {
let results: Vec<_> = dex
.classes
.par_iter()
.map(|(ty, cls)| {
let mut v = Self::default();
v.visit_type(ty)
.map(|_| v.visit_class(cls))
.map(|_| v.result())
})
.collect::<Result<_, _>>()?;
for r in results.into_iter() {
self.types.extend(r);
}
Ok(())
}
} }
#[derive(Debug, Default)] #[derive(Debug, Default)]
@ -293,6 +311,23 @@ impl Visitor for MethodTypeCollector {
self.method_types.insert(mty.clone()); self.method_types.insert(mty.clone());
Ok(()) Ok(())
} }
fn visit_dex_file(&mut self, dex: &DexFile) -> Result<()> {
let results: Vec<_> = dex
.classes
.par_iter()
.map(|(ty, cls)| {
let mut v = Self::default();
v.visit_type(ty)
.map(|_| v.visit_class(cls))
.map(|_| v.result())
})
.collect::<Result<_, _>>()?;
for r in results.into_iter() {
self.method_types.extend(r);
}
Ok(())
}
} }
#[derive(Debug, Default)] #[derive(Debug, Default)]
@ -311,6 +346,23 @@ impl Visitor for FieldIdCollector {
self.field_ids.insert(id.clone()); self.field_ids.insert(id.clone());
Ok(()) Ok(())
} }
fn visit_dex_file(&mut self, dex: &DexFile) -> Result<()> {
let results: Vec<_> = dex
.classes
.par_iter()
.map(|(ty, cls)| {
let mut v = Self::default();
v.visit_type(ty)
.map(|_| v.visit_class(cls))
.map(|_| v.result())
})
.collect::<Result<_, _>>()?;
for r in results.into_iter() {
self.field_ids.extend(r);
}
Ok(())
}
} }
#[derive(Debug, Default)] #[derive(Debug, Default)]
@ -329,6 +381,23 @@ impl Visitor for MethodIdCollector {
self.method_ids.insert(id.clone()); self.method_ids.insert(id.clone());
Ok(()) Ok(())
} }
fn visit_dex_file(&mut self, dex: &DexFile) -> Result<()> {
let results: Vec<_> = dex
.classes
.par_iter()
.map(|(ty, cls)| {
let mut v = Self::default();
v.visit_type(ty)
.map(|_| v.visit_class(cls))
.map(|_| v.result())
})
.collect::<Result<_, _>>()?;
for r in results.into_iter() {
self.method_ids.extend(r);
}
Ok(())
}
} }
#[derive(Debug, Default)] #[derive(Debug, Default)]