diff --git a/androscalpel/src/apk.rs b/androscalpel/src/apk.rs index 07ef1cc..1f444af 100644 --- a/androscalpel/src/apk.rs +++ b/androscalpel/src/apk.rs @@ -18,7 +18,7 @@ use androscalpel_serializer::*; /// Represent an apk. #[pyclass] -#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)] +#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Default)] pub struct Apk { #[pyo3(get)] #[serde(with = "hashmap_vectorize")] @@ -2327,7 +2327,7 @@ impl Apk { Ok(methods) } - fn gen_raw_dex(&self) -> Result>> { + pub fn gen_raw_dex(&self) -> Result>> { let mut dex_writer = DexWriter::new(); for class_ in self.classes.values() { dex_writer.add_class(class_)?; @@ -2339,7 +2339,7 @@ impl Apk { #[pymethods] impl Apk { #[new] - fn new() -> Self { + pub fn new() -> Self { Self { classes: HashMap::new(), } diff --git a/androscalpel/src/dex_writer.rs b/androscalpel/src/dex_writer.rs index 0e6fddd..87bbed4 100644 --- a/androscalpel/src/dex_writer.rs +++ b/androscalpel/src/dex_writer.rs @@ -2304,31 +2304,40 @@ impl DexWriter { /// /// # Warning /// - /// Linking can only occur once all sections are entirelly generated. + /// This is the only link method called before generating the map list and finilizing the + /// section: + /// + /// Linking can only occur once all sections are entirelly generated, however, + /// `class_data_item.direct|virtual_methods[.].code_off` are Uleb128 encoded, meaning + /// that linking class_data_item modify the size of the class_data_items, hence the position + /// of the class_data_item and all element located after, as well as the size of the data + /// section. This is pretty bothersome and means that the sections **are** modified. fn link_class_data(&mut self) -> Result<()> { debug!("Link class data items"); let mut unlinked_local_offset = 0; let mut linked_local_offset = 0; + let code_section_off = self.section_manager.get_code_item_offset_prefinalized(); for data in self.class_data_list.iter_mut() { let unlinked_size = data.size() as u32; for method in &mut data.direct_methods { if method.code_off.0 != 0 { - method.code_off.0 += self.section_manager.get_offset(Section::CodeItem) - 1; + method.code_off.0 += code_section_off - 1; } } for method in &mut data.virtual_methods { if method.code_off.0 != 0 { - method.code_off.0 += self.section_manager.get_offset(Section::CodeItem) - 1; + method.code_off.0 += code_section_off - 1; } } self.corrected_class_data_offset .insert(unlinked_local_offset, linked_local_offset); linked_local_offset += data.size() as u32; unlinked_local_offset += unlinked_size; - // TODO: update section manager even if read only? seams like a bad idea but - // this invalidate the size of the section and the offset of the hidden api - // section } + self.section_manager.incr_section_size( + Section::ClassDataItem, + linked_local_offset as usize - unlinked_local_offset as usize, + ); Ok(()) } @@ -2480,15 +2489,22 @@ impl DexWriter { } self.gen_type_list_section()?; - self.gen_map_list()?; // TODO TODO TODO not good, the values are not yes set because: - // - alignment - // - F***ing class_data that change size during linking + // start by linking class_data_items to populate self.corrected_class_data_offset + // and update the class_data_item sections size. + // Why before gen_map_list? Because the offsets in class_data_items are F***ing Uleb128 + // encoded, so there size change when linking (see doc of self.corrected_class_data_offset). + let code_offset = self.section_manager.get_code_item_offset_prefinalized(); + self.link_class_data()?; + self.gen_map_list()?; + assert_eq!( + code_offset, + self.section_manager.get_offset(Section::CodeItem), + "Prelinking computed value and post linking value for \ + the offset of the code_item section don't match" + ); // From now on, all section are generated and the value in section_manager do not change, - // except for class data items, because F (see doc of self.corrected_class_data_offset). - // start by linking class_data_items to populate self.corrected_class_data_offset - self.link_class_data()?; self.link_header(); self.link_call_site_ids(); self.link_proto_id()?; @@ -2580,6 +2596,7 @@ impl DexWriter { set.serialize(&mut buffer)?; } // CodeItem section + println!("Actual code offset: 0x{:x}", buffer.position()); self.check_section_offset(&buffer, Section::CodeItem); for code_item in &self.code_items { Self::fix_section_alignement(&mut buffer, Section::CodeItem)?; @@ -2628,10 +2645,10 @@ impl DexWriter { */ let end_data = buffer.position(); - /*assert_eq!( + assert_eq!( end_data as u32, self.header.data_off + self.header.data_size - );*/ + ); // compute signature buffer.seek(SeekFrom::Start(8 + 4 + 20))?; @@ -2979,6 +2996,7 @@ impl Section { struct SectionManager { sizes: [u32; Self::NB_SECTION], nb_elt: [usize; Self::NB_SECTION], + offsets: [u32; Self::NB_SECTION], editable: bool, } @@ -2988,6 +3006,7 @@ impl SectionManager { fn reset(&mut self) { self.sizes = [0; Self::NB_SECTION]; self.nb_elt = [0; Self::NB_SECTION]; + self.offsets = [0; Self::NB_SECTION]; self.editable = true; } @@ -3016,13 +3035,13 @@ impl SectionManager { if self.editable { panic!("Try to get section offset before sections are finilized"); } - let size = self.sizes[..section.get_index()].iter().sum(); + let size = self.offsets[section.get_index()]; let alignment = section.get_item_alignment(); if size % alignment != 0 { panic!( - "section {section:?} must be aligned on {alignment} bytes, \ + "section {section:?} should be aligned on {alignment} bytes, \ found section offset 0x{size:x}" - ); + ); // avoid by finilized } size } @@ -3064,9 +3083,51 @@ impl SectionManager { ); } } + let mut offset = 0; + for section in Section::VARIANT_LIST { + self.offsets[section.get_index()] = offset; + offset += self.sizes[section.get_index()]; + } + self.editable = false; } + /// This method exist for the only purpose of linking the method code offset inside + /// the class data items. This linking needs to be done before finilizing because it change the + /// size of the class data item section. + /// + /// Seriously, avoid using this. + fn get_code_item_offset_prefinalized(&mut self) -> u32 { + if !self.editable || self.get_nb_elt(Section::MapList) != 0 { + panic!("Don't use this method for other purpose than linking class_data_items"); + } + let mut map_list_size = 4; + let map_item_size = 12; /* = MapItem { + type_: MapItemType::HeaderItem, + unused: 0, + size: 0, + offset: 0, + } + .size(); */ + for section in Section::VARIANT_LIST { + if !section.is_data() + && (self.get_nb_elt(*section) != 0 || section == &Section::MapList) + { + map_list_size += map_item_size; + } + } + let mut offset = map_list_size; // This is aligned so it wont affect alignment + for section in &Section::VARIANT_LIST[..Section::CodeItem.get_index()] { + // size Section::Data and size Section::MapList are 0 + while offset % section.get_item_alignment() != 0 { + offset += 1; + } + offset += self.sizes[section.get_index()]; + } + + offset + } + /// Display the sections informations. #[allow(dead_code)] fn show(&self) { diff --git a/androscalpel/src/lib.rs b/androscalpel/src/lib.rs index c9ad984..3ece94a 100644 --- a/androscalpel/src/lib.rs +++ b/androscalpel/src/lib.rs @@ -32,6 +32,9 @@ pub use method_handle::*; pub use scalar::*; pub use value::*; +#[cfg(test)] +mod tests; + /// Androscalpel. #[pymodule] fn androscalpel(py: Python, m: &PyModule) -> PyResult<()> { diff --git a/androscalpel/src/tests/classes_hello_world.dex b/androscalpel/src/tests/classes_hello_world.dex new file mode 100644 index 0000000..a83580a Binary files /dev/null and b/androscalpel/src/tests/classes_hello_world.dex differ diff --git a/androscalpel/src/tests/mod.rs b/androscalpel/src/tests/mod.rs new file mode 100644 index 0000000..6f33c0a --- /dev/null +++ b/androscalpel/src/tests/mod.rs @@ -0,0 +1,30 @@ +use super::*; +use androscalpel_serializer::*; +use std::fs::File; +use std::io; + +fn get_dex(filename: &str) -> Vec { + let hello_world_dex = format!("{}/src/tests/{}", env!("CARGO_MANIFEST_DIR"), filename); + let mut file = File::open(&hello_world_dex).expect(&format!("{} not found", filename)); + let mut data = vec![]; + io::copy(&mut file, &mut data).unwrap(); + data +} + +#[test] +fn test_generated_data_size() { + let mut apk = Apk::new(); + let dex_data = get_dex("classes_hello_world.dex"); + apk.add_dex_file(&dex_data).unwrap(); + let new_dex = apk.gen_raw_dex().unwrap(); + assert_eq!(new_dex.len(), 1); + let new_dex = new_dex.first().unwrap(); + let dex = DexFileReader::new(&new_dex).unwrap(); + + assert_eq!( + dex.get_header().data_off + dex.get_header().data_size, + new_dex.len() as u32 + ) + // TODO: check for all pool concerned if the pool span outside the data section? + //for item in dex.get_map_list().list() {} +}