start separating classes in files

This commit is contained in:
Jean-Marie Mineau 2025-01-14 20:29:17 +01:00
parent 4b28d0a406
commit 91859170c3
Signed by: histausse
GPG key ID: B66AEEDA9B645AD2
2 changed files with 157 additions and 29 deletions

View file

@ -16,19 +16,44 @@ use androscalpel_serializer::Instruction as InsFormat;
use androscalpel_serializer::*;
use rayon::prelude::*;
#[derive(Debug, Clone, PartialEq, Default, Deserialize, Serialize)]
pub struct DexFile {
/// The classes in the dex file.
pub classes: HashMap<IdType, Class>,
/// Set of strings found in the dex file that are not referenced in classes.
pub not_referenced_strings: HashSet<DexString>,
/// The binary of the dexfile.
#[serde(skip_serializing)]
pub(crate) bin_cache: Option<Vec<u8>>, // TODO: invalidate the cache !!!
}
/// Represent an apk.
#[pyclass]
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Default)]
#[derive(Debug, Clone, PartialEq, Default, Deserialize, Serialize)]
pub struct Apk {
#[pyo3(get)]
pub classes: HashMap<IdType, Class>,
#[pyo3(get)]
pub not_referenced_strings: HashSet<DexString>,
pub dex_files: HashMap<String, DexFile>, // TODO: use accessort for chache invalidation
}
impl Apk {
/// Add the content of a dex file to the apk.
pub fn add_dex_file(&mut self, data: &[u8], label_each_ins: bool) -> Result<()> {
///
/// # Parameters
/// - `name`: the name of the dex file
/// - `data`: the dex file binary
/// - `label_each_ins`: if set to true, insert a label before each instruction
/// indicating the instruction address
/// - `cache`: if set to true, copy and cache the binary data format.
pub fn add_dex_file(
&mut self,
name: &str,
data: &[u8],
label_each_ins: bool,
cache: bool,
) -> Result<()> {
let name: String = name.into();
if self.dex_files.contains_key(&name) {
bail!("{name} already exist in the application")
}
let mut dex = DexFileReader::new(data)?;
let classes = dex
.get_class_defs()
@ -36,10 +61,17 @@ impl Apk {
.enumerate()
.map(|(idx, class)| self.get_class_from_dex_file(class, idx, &dex, label_each_ins))
.map(|class| class.map(|class| (class.descriptor.clone(), class)))
.collect::<Result<Vec<_>, _>>()?;
self.classes.par_extend(classes);
self.not_referenced_strings
.extend(dex.get_not_resolved_strings()?.into_iter().map(DexString));
.collect::<Result<HashMap<IdType, Class>, _>>()?;
let dex_file = DexFile {
classes,
not_referenced_strings: dex
.get_not_resolved_strings()?
.into_iter()
.map(DexString)
.collect(),
bin_cache: if cache { Some(data.to_vec()) } else { None },
};
self.dex_files.insert(name, dex_file);
Ok(())
}
@ -2834,7 +2866,8 @@ impl Apk {
Ok(methods)
}
pub fn gen_raw_dex(&self) -> Result<Vec<Vec<u8>>> {
pub fn gen_raw_dex(&self) -> Result<HashMap<String, Vec<u8>>> {
/*
let mut dex_writers = vec![];
let mut dex_writer = DexWriter::new();
for class_ in self.classes.values() {
@ -2865,6 +2898,46 @@ impl Apk {
.into_iter()
.map(|mut dex_writer| dex_writer.gen_dex_file_to_vec())
.collect()
*/
let mut bin_dex_files = HashMap::new();
// TODO: Multithread
for (
name,
DexFile {
classes,
not_referenced_strings,
bin_cache,
},
) in self.dex_files.iter()
{
if let Some(bin_cache) = bin_cache {
bin_dex_files.insert(name.clone(), bin_cache.clone());
} else {
let mut writer = DexWriter::new();
for class_ in classes.values() {
match writer.add_class(class_) {
Ok(()) => (),
Err(DexWritterError::OutOfSpace(_)) => bail!(
"{name} dex file has to many class/method/field to be serialize in dex format"
),
}
}
for string in not_referenced_strings {
writer.add_string(string.clone());
}
bin_dex_files.insert(name.clone(), writer.gen_dex_file_to_vec()?);
};
}
Ok(bin_dex_files)
}
/// Search for the given class. If several classes with the same name are found,
/// return the class used by android (classes.dex over classes2.dex over classes3.dex ...),
/// and if not found in files used by android, look in any other files (classes0.dex,
/// classses1.dex, classes02.dex, assets/stuff.dex, ...), and select the first one in
/// alphabetical order of dex file name.
pub fn get_class_mut(&mut self, class_id: IdType) -> Option<&mut Class> {
todo!()
}
}
@ -2873,30 +2946,75 @@ impl Apk {
#[new]
pub fn new() -> Self {
Self {
classes: HashMap::new(),
not_referenced_strings: HashSet::new(),
dex_files: HashMap::new(),
}
}
#[pyo3(name = "add_dex_file")]
pub fn py_add_dex_file(&mut self, data: &[u8], label_each_ins: Option<bool>) -> Result<()> {
self.add_dex_file(data, label_each_ins.unwrap_or(false))
pub fn py_add_dex_file(
&mut self,
name: &str,
data: &[u8],
label_each_ins: Option<bool>,
cache: Option<bool>,
) -> Result<()> {
self.add_dex_file(
name,
data,
label_each_ins.unwrap_or(false),
cache.unwrap_or(false), // TODO: change to true when cache invalidation is setup
)
}
pub fn add_class(&mut self, class: Class) -> Result<()> {
pub fn add_class(&mut self, dex_file: &str, class: Class) -> Result<()> {
let file: String = dex_file.into();
let id = class.descriptor.clone();
if self.classes.get(&id).is_some() {
bail!("class {} already exists in the apk", id.__str__());
if !self.dex_files.contains_key(&file) {
self.dex_files.insert(file.clone(), DexFile::default());
}
self.classes.insert(id, class);
if self
.dex_files
.get(&file)
.unwrap()
.classes
.get(&id)
.is_some()
{
bail!("class {} already exists in {}", id.__str__(), &file);
}
self.dex_files
.get_mut(&file)
.unwrap()
.classes
.insert(id, class);
Ok(())
}
pub fn set_method_code(&mut self, method_id: IdMethod, code: Option<Code>) -> Result<()> {
let class = self
pub fn set_method_code(
&mut self,
method_id: IdMethod,
code: Option<Code>,
dex_file: Option<&str>,
) -> Result<()> {
let ty = &method_id.class_;
let class = if let Some(dex_file) = dex_file {
self.dex_files
.get_mut(&dex_file.to_string())
.with_context(|| format!("file {} not found in apk", dex_file))?
.classes
.get_mut(&method_id.class_)
.ok_or_else(|| anyhow!("Class {} not found", method_id.class_.__repr__()))?;
.ok_or_else(|| {
anyhow!(
"Class {} not found in file {}",
method_id.class_.__repr__(),
dex_file
)
})?
} else {
self.get_class_mut(ty.clone())
.with_context(|| format!("{} not found in apk", method_id.class_.__repr__()))?
};
let method = class
.direct_methods
.get_mut(&method_id)
@ -2911,11 +3029,11 @@ impl Apk {
}
#[pyo3(name = "gen_raw_dex")] //Sad GIL noise
pub fn py_gen_raw_dex(&self, py: Python<'_>) -> Result<Vec<PyObject>> {
pub fn py_gen_raw_dex(&self, py: Python<'_>) -> Result<HashMap<String, PyObject>> {
Ok(self
.gen_raw_dex()?
.into_iter()
.map(|bytes| PyBytes::new_bound(py, &bytes).into())
.map(|(file_name, bytes)| (file_name, PyBytes::new_bound(py, &bytes).into()))
.collect())
}
@ -2928,7 +3046,17 @@ impl Apk {
Ok(serde_json::from_str(json)?)
}
pub fn remove_class(&mut self, class: &IdType) {
self.classes.remove(class);
pub fn remove_class(&mut self, class: &IdType, dex_file: Option<&str>) -> Result<()> {
// TODO: remove all if dex_file is not provided
if let Some(dex_file) = dex_file {
self.dex_files
.get_mut(&dex_file.to_string())
.with_context(|| format!("file {} not found in apk", dex_file))?
.classes
.remove(class);
} else {
todo!()
}
Ok(())
}
}

View file

@ -678,7 +678,7 @@ mod test {
fn test_get_expl_debug() {
const RAW_DEBUG: [u8; 10] = [23, 0, 14, 135, 3, 0, 16, 2, 150, 0];
let debug = DebugInfoItem::deserialize_from_slice(&RAW_DEBUG).unwrap();
let mut reader = DebugInfoReader::new(&debug);
let mut reader = DebugInfoReader::new(debug.clone());
let mut list_info = vec![];
loop {
list_info.push(reader.next_info());