fix the data size problem

This commit is contained in:
Jean-Marie 'Histausse' Mineau 2024-02-13 19:30:26 +01:00
parent 1c012cecf3
commit c41b5f0b0c
Signed by: histausse
GPG key ID: B66AEEDA9B645AD2
5 changed files with 114 additions and 20 deletions

View file

@ -18,7 +18,7 @@ use androscalpel_serializer::*;
/// Represent an apk.
#[pyclass]
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Default)]
pub struct Apk {
#[pyo3(get)]
#[serde(with = "hashmap_vectorize")]
@ -2327,7 +2327,7 @@ impl Apk {
Ok(methods)
}
fn gen_raw_dex(&self) -> Result<Vec<Vec<u8>>> {
pub fn gen_raw_dex(&self) -> Result<Vec<Vec<u8>>> {
let mut dex_writer = DexWriter::new();
for class_ in self.classes.values() {
dex_writer.add_class(class_)?;
@ -2339,7 +2339,7 @@ impl Apk {
#[pymethods]
impl Apk {
#[new]
fn new() -> Self {
pub fn new() -> Self {
Self {
classes: HashMap::new(),
}

View file

@ -2304,31 +2304,40 @@ impl DexWriter {
///
/// # Warning
///
/// Linking can only occur once all sections are entirelly generated.
/// This is the only link method called before generating the map list and finilizing the
/// section:
///
/// Linking can only occur once all sections are entirelly generated, however,
/// `class_data_item.direct|virtual_methods[.].code_off` are Uleb128 encoded, meaning
/// that linking class_data_item modify the size of the class_data_items, hence the position
/// of the class_data_item and all element located after, as well as the size of the data
/// section. This is pretty bothersome and means that the sections **are** modified.
fn link_class_data(&mut self) -> Result<()> {
debug!("Link class data items");
let mut unlinked_local_offset = 0;
let mut linked_local_offset = 0;
let code_section_off = self.section_manager.get_code_item_offset_prefinalized();
for data in self.class_data_list.iter_mut() {
let unlinked_size = data.size() as u32;
for method in &mut data.direct_methods {
if method.code_off.0 != 0 {
method.code_off.0 += self.section_manager.get_offset(Section::CodeItem) - 1;
method.code_off.0 += code_section_off - 1;
}
}
for method in &mut data.virtual_methods {
if method.code_off.0 != 0 {
method.code_off.0 += self.section_manager.get_offset(Section::CodeItem) - 1;
method.code_off.0 += code_section_off - 1;
}
}
self.corrected_class_data_offset
.insert(unlinked_local_offset, linked_local_offset);
linked_local_offset += data.size() as u32;
unlinked_local_offset += unlinked_size;
// TODO: update section manager even if read only? seams like a bad idea but
// this invalidate the size of the section and the offset of the hidden api
// section
}
self.section_manager.incr_section_size(
Section::ClassDataItem,
linked_local_offset as usize - unlinked_local_offset as usize,
);
Ok(())
}
@ -2480,15 +2489,22 @@ impl DexWriter {
}
self.gen_type_list_section()?;
self.gen_map_list()?; // TODO TODO TODO not good, the values are not yes set because:
// - alignment
// - F***ing class_data that change size during linking
// start by linking class_data_items to populate self.corrected_class_data_offset
// and update the class_data_item sections size.
// Why before gen_map_list? Because the offsets in class_data_items are F***ing Uleb128
// encoded, so there size change when linking (see doc of self.corrected_class_data_offset).
let code_offset = self.section_manager.get_code_item_offset_prefinalized();
self.link_class_data()?;
self.gen_map_list()?;
assert_eq!(
code_offset,
self.section_manager.get_offset(Section::CodeItem),
"Prelinking computed value and post linking value for \
the offset of the code_item section don't match"
);
// From now on, all section are generated and the value in section_manager do not change,
// except for class data items, because F (see doc of self.corrected_class_data_offset).
// start by linking class_data_items to populate self.corrected_class_data_offset
self.link_class_data()?;
self.link_header();
self.link_call_site_ids();
self.link_proto_id()?;
@ -2580,6 +2596,7 @@ impl DexWriter {
set.serialize(&mut buffer)?;
}
// CodeItem section
println!("Actual code offset: 0x{:x}", buffer.position());
self.check_section_offset(&buffer, Section::CodeItem);
for code_item in &self.code_items {
Self::fix_section_alignement(&mut buffer, Section::CodeItem)?;
@ -2628,10 +2645,10 @@ impl DexWriter {
*/
let end_data = buffer.position();
/*assert_eq!(
assert_eq!(
end_data as u32,
self.header.data_off + self.header.data_size
);*/
);
// compute signature
buffer.seek(SeekFrom::Start(8 + 4 + 20))?;
@ -2979,6 +2996,7 @@ impl Section {
struct SectionManager {
sizes: [u32; Self::NB_SECTION],
nb_elt: [usize; Self::NB_SECTION],
offsets: [u32; Self::NB_SECTION],
editable: bool,
}
@ -2988,6 +3006,7 @@ impl SectionManager {
fn reset(&mut self) {
self.sizes = [0; Self::NB_SECTION];
self.nb_elt = [0; Self::NB_SECTION];
self.offsets = [0; Self::NB_SECTION];
self.editable = true;
}
@ -3016,13 +3035,13 @@ impl SectionManager {
if self.editable {
panic!("Try to get section offset before sections are finilized");
}
let size = self.sizes[..section.get_index()].iter().sum();
let size = self.offsets[section.get_index()];
let alignment = section.get_item_alignment();
if size % alignment != 0 {
panic!(
"section {section:?} must be aligned on {alignment} bytes, \
"section {section:?} should be aligned on {alignment} bytes, \
found section offset 0x{size:x}"
);
); // avoid by finilized
}
size
}
@ -3064,9 +3083,51 @@ impl SectionManager {
);
}
}
let mut offset = 0;
for section in Section::VARIANT_LIST {
self.offsets[section.get_index()] = offset;
offset += self.sizes[section.get_index()];
}
self.editable = false;
}
/// This method exist for the only purpose of linking the method code offset inside
/// the class data items. This linking needs to be done before finilizing because it change the
/// size of the class data item section.
///
/// Seriously, avoid using this.
fn get_code_item_offset_prefinalized(&mut self) -> u32 {
if !self.editable || self.get_nb_elt(Section::MapList) != 0 {
panic!("Don't use this method for other purpose than linking class_data_items");
}
let mut map_list_size = 4;
let map_item_size = 12; /* = MapItem {
type_: MapItemType::HeaderItem,
unused: 0,
size: 0,
offset: 0,
}
.size(); */
for section in Section::VARIANT_LIST {
if !section.is_data()
&& (self.get_nb_elt(*section) != 0 || section == &Section::MapList)
{
map_list_size += map_item_size;
}
}
let mut offset = map_list_size; // This is aligned so it wont affect alignment
for section in &Section::VARIANT_LIST[..Section::CodeItem.get_index()] {
// size Section::Data and size Section::MapList are 0
while offset % section.get_item_alignment() != 0 {
offset += 1;
}
offset += self.sizes[section.get_index()];
}
offset
}
/// Display the sections informations.
#[allow(dead_code)]
fn show(&self) {

View file

@ -32,6 +32,9 @@ pub use method_handle::*;
pub use scalar::*;
pub use value::*;
#[cfg(test)]
mod tests;
/// Androscalpel.
#[pymodule]
fn androscalpel(py: Python, m: &PyModule) -> PyResult<()> {

Binary file not shown.

View file

@ -0,0 +1,30 @@
use super::*;
use androscalpel_serializer::*;
use std::fs::File;
use std::io;
fn get_dex(filename: &str) -> Vec<u8> {
let hello_world_dex = format!("{}/src/tests/{}", env!("CARGO_MANIFEST_DIR"), filename);
let mut file = File::open(&hello_world_dex).expect(&format!("{} not found", filename));
let mut data = vec![];
io::copy(&mut file, &mut data).unwrap();
data
}
#[test]
fn test_generated_data_size() {
let mut apk = Apk::new();
let dex_data = get_dex("classes_hello_world.dex");
apk.add_dex_file(&dex_data).unwrap();
let new_dex = apk.gen_raw_dex().unwrap();
assert_eq!(new_dex.len(), 1);
let new_dex = new_dex.first().unwrap();
let dex = DexFileReader::new(&new_dex).unwrap();
assert_eq!(
dex.get_header().data_off + dex.get_header().data_size,
new_dex.len() as u32
)
// TODO: check for all pool concerned if the pool span outside the data section?
//for item in dex.get_map_list().list() {}
}