diff --git a/androscalpel/src/dex_writer.rs b/androscalpel/src/dex_writer.rs index 1286fcf..bbacb73 100644 --- a/androscalpel/src/dex_writer.rs +++ b/androscalpel/src/dex_writer.rs @@ -89,6 +89,22 @@ pub struct DexWriter { debug_info_items: Vec, /// The map_list map_list: MapList, + /// Map the **local** offset of **unlinked** class data offset to the **local** + /// offset of the **linked**. + /// + /// local offset means the offset of the first data item in the section is 0 (i.e. not + /// the offset from the beginning of the file). + /// + /// This hack is necessary (well, maybe not necessary but that would require some refactoring) + /// because someone thought that encoding offsets in Uleb format was a good idea. It was not. + /// The size of the encoded offset depend on the value of the offset, so the linking change the + /// size of the struct, and doing so change the offset of the next struct, ect. + /// + /// When generating the structs refering to `class_data_item`s, the local offset of the + /// unlinked `class_data_item` is known, but not the linked offset. This struct is filled + /// when linking the `class_data_item`s, and can then be used to link the structs refering to + /// the `class_data_item`s. + corrected_class_data_offset: HashMap, } impl Default for DexWriter { @@ -147,6 +163,7 @@ impl Default for DexWriter { annotation_items: vec![], annotation_set_lists: vec![], debug_info_items: vec![], + corrected_class_data_offset: HashMap::new(), } } } @@ -1440,6 +1457,7 @@ impl DexWriter { } self.section_manager .add_elt(Section::ClassDataItem, Some(data.size())); + assert_eq!(data.size(), data.serialize_to_vec().unwrap().len()); self.class_data_list.push(data); Ok(()) } @@ -2171,8 +2189,10 @@ impl DexWriter { /// /// # Warning /// - /// All sections must be generated (but not necessarely linked) before generating the map list. - fn get_map_list(&mut self) -> Result<()> { + /// All sections must be generated (but not linked) before generating the map list. + /// + /// This method switch the section manager from edit mode to read only. + fn gen_map_list(&mut self) -> Result<()> { debug!("Generate the map_list"); // Get the size of a map item let map_item_size = 12; /* = MapItem { @@ -2192,6 +2212,8 @@ impl DexWriter { .incr_section_size(Section::MapList, map_item_size); } } + // All sections are knowns and should not be eddited anymore + self.section_manager.finalize_sections(); for section in Section::VARIANT_LIST { if !section.is_data() && self.section_manager.get_nb_elt(*section) != 0 { /* @@ -2292,8 +2314,15 @@ impl DexWriter { for class_def in self.class_defs_list.iter_mut() { // prelink value is set to offset in the section + 1 (to distinguish with 0) if class_def.class_data_off != 0 { - class_def.class_data_off += - self.section_manager.get_offset(Section::ClassDataItem) - 1; + let unlinked_local_offset = class_def.class_data_off - 1; + let linked_local_offset = *self + .corrected_class_data_offset + .get(&unlinked_local_offset) + .expect( + "Unlinked class_data_item offset not found in corrected_class_data_offset", + ); + class_def.class_data_off = + self.section_manager.get_offset(Section::ClassDataItem) + linked_local_offset; } } } @@ -2320,7 +2349,10 @@ impl DexWriter { /// Linking can only occur once all sections are entirelly generated. fn link_code_item(&mut self) { debug!("Link the code_item entries in class_data_items"); + let mut unlinked_local_offset = 0; + let mut linked_local_offset = 0; for data in &mut self.class_data_list { + let unlinked_size = data.size() as u32; for method in &mut data.direct_methods { if method.code_off.0 != 0 { method.code_off.0 += self.section_manager.get_offset(Section::CodeItem) - 1; @@ -2331,6 +2363,13 @@ impl DexWriter { method.code_off.0 += self.section_manager.get_offset(Section::CodeItem) - 1; } } + linked_local_offset += data.size() as u32; + self.corrected_class_data_offset + .insert(unlinked_local_offset, linked_local_offset); + unlinked_local_offset += unlinked_size; + // TODO: update section manager even if read only? seams like a bad idea but + // this invalidate the size of the section and the offset of the hidden api + // section } } @@ -2411,114 +2450,152 @@ impl DexWriter { } self.gen_type_list_section()?; - self.get_map_list()?; + self.gen_map_list()?; - // From now on, all section are generated and the value in section_manager do not change. + // From now on, all section are generated and the value in section_manager do not change, + // except for class data items, because F (see doc of self.corrected_class_data_offset). + + // start by linking class_data_items to populate self.corrected_class_data_offset + // TODO: reorganize this: group by referenced structures instead of grouping by referencer. + // this would make it easier handle the class data structure incident. + self.link_code_item(); self.link_header(); self.link_call_site_ids(); self.link_type_list_occurences()?; self.link_class_data_occurences(); self.link_static_values(); - self.link_code_item(); self.link_debug_info(); self.link_annotations(); debug!("Serialize the dex file"); let mut buffer = Cursor::new(Vec::::new()); - // TODO: compute checksum, hash, ect + + self.check_section_offset(&buffer, Section::HeaderItem); + Self::fix_section_alignement(&mut buffer, Section::HeaderItem)?; self.header.serialize(&mut buffer)?; // StringIdItem section let mut string_off = self.section_manager.get_offset(Section::StringDataItem); + self.check_section_offset(&buffer, Section::StringIdItem); for string in self.string_data_list.iter() { let str_id = StringIdItem { string_data_off: string_off, }; + Self::fix_section_alignement(&mut buffer, Section::StringIdItem)?; str_id.serialize(&mut buffer)?; string_off += string.size() as u32; } // TypeId section + self.check_section_offset(&buffer, Section::TypeIdItem); for ty in &self.type_ids_list { + Self::fix_section_alignement(&mut buffer, Section::TypeIdItem)?; ty.serialize(&mut buffer)?; } // ProtoId section + self.check_section_offset(&buffer, Section::ProtoIdItem); for proto in &self.proto_ids_list { + Self::fix_section_alignement(&mut buffer, Section::ProtoIdItem)?; proto.serialize(&mut buffer)?; } // FieldIdItem section + self.check_section_offset(&buffer, Section::FieldIdItem); for field_id in &self.field_ids_list { + Self::fix_section_alignement(&mut buffer, Section::FieldIdItem)?; field_id.serialize(&mut buffer)?; } // MethodIdItem section + self.check_section_offset(&buffer, Section::MethodIdItem); for method_id in &self.method_ids_list { + Self::fix_section_alignement(&mut buffer, Section::MethodIdItem)?; method_id.serialize(&mut buffer)?; } // ClassDefItem section + self.check_section_offset(&buffer, Section::ClassDefItem); for class_def in &self.class_defs_list { + Self::fix_section_alignement(&mut buffer, Section::ClassDefItem)?; class_def.serialize(&mut buffer)?; } // CallSiteIdItem, data are inserted as encoded array item later + self.check_section_offset(&buffer, Section::CallSiteIdItem); for call_site_id in &self.call_site_ids { + Self::fix_section_alignement(&mut buffer, Section::CallSiteIdItem)?; call_site_id.serialize(&mut buffer)?; } // MethodHandleItem section + self.check_section_offset(&buffer, Section::MethodHandleItem); for handle in &self.method_handles { + Self::fix_section_alignement(&mut buffer, Section::MethodHandleItem)?; handle.serialize(&mut buffer)?; } // MapList + self.check_section_offset(&buffer, Section::MapList); + Self::fix_section_alignement(&mut buffer, Section::MapList)?; self.map_list.serialize(&mut buffer)?; // TypeList, - let mut offset = 0; // the sections are always aligned until the type_lists + self.check_section_offset(&buffer, Section::TypeList); for (list, _) in &self.type_lists_with_offset { - while offset % 4 != 0 { - offset += 1; - 0u8.serialize(&mut buffer)?; - } - offset += list.size(); + Self::fix_section_alignement(&mut buffer, Section::TypeList)?; list.serialize(&mut buffer)?; } - // The next section requires alignment to 4 - while offset % 4 != 0 { - // Alignment - self.section_manager.incr_section_size(Section::TypeList, 1); - offset += 1; - } // AnnotationSetRefList section + self.check_section_offset(&buffer, Section::AnnotationSetRefList); for list in &self.annotation_set_lists { + Self::fix_section_alignement(&mut buffer, Section::AnnotationSetRefList)?; list.serialize(&mut buffer)?; } // AnnotationSetItem section + self.check_section_offset(&buffer, Section::AnnotationSetItem); for set in &self.annotation_set_items { + Self::fix_section_alignement(&mut buffer, Section::AnnotationSetItem)?; set.serialize(&mut buffer)?; } - // ClassDataItem section - for data in &self.class_data_list { - data.serialize(&mut buffer)?; - } // CodeItem section + self.check_section_offset(&buffer, Section::CodeItem); for code_item in &self.code_items { + Self::fix_section_alignement(&mut buffer, Section::CodeItem)?; code_item.serialize(&mut buffer)? } + // StringDataItem section + self.check_section_offset(&buffer, Section::StringDataItem); for string in &self.string_data_list { + Self::fix_section_alignement(&mut buffer, Section::StringDataItem)?; string.serialize(&mut buffer)?; } // DebugInfoItem section + self.check_section_offset(&buffer, Section::DebugInfoItem); for debug_info in &self.debug_info_items { + Self::fix_section_alignement(&mut buffer, Section::DebugInfoItem)?; debug_info.serialize(&mut buffer)?; } // AnnotationItem section + self.check_section_offset(&buffer, Section::AnnotationItem); for annot in &self.annotation_items { + Self::fix_section_alignement(&mut buffer, Section::AnnotationItem)?; annot.serialize(&mut buffer)?; } // EncodedArrayItem section + self.check_section_offset(&buffer, Section::EncodedArrayItem); for array in &self.encoded_array_items { + Self::fix_section_alignement(&mut buffer, Section::EncodedArrayItem)?; array.serialize(&mut buffer)?; } // AnnotationsDirectoryItem section + self.check_section_offset(&buffer, Section::AnnotationsDirectoryItem); for dir in &self.annotations_directory_items { + Self::fix_section_alignement(&mut buffer, Section::AnnotationsDirectoryItem)?; dir.serialize(&mut buffer)?; } + // ClassDataItem section + self.check_section_offset(&buffer, Section::ClassDataItem); + for data in &self.class_data_list { + Self::fix_section_alignement(&mut buffer, Section::ClassDataItem)?; + data.serialize(&mut buffer)?; + } // TODO: HiddenapiClassDataItem, + /* + self.check_section_offset(&buffer, Section::HiddenapiClassDataItem); + Self::fix_section_alignement(&mut buffer, Section::HiddenapiClassDataItem)?; + */ // compute signature buffer.seek(SeekFrom::Start(8 + 4 + 20))?; @@ -2543,6 +2620,29 @@ impl DexWriter { Ok(()) } + /// Insert 0 to a buffer until the right alignment is reached for an element of the + /// given section. + fn fix_section_alignement(buffer: &mut Cursor>, section: Section) -> Result<()> { + while buffer.position() % section.get_item_alignment() as u64 != 0 { + 0u8.serialize(buffer)?; + } + Ok(()) + } + + /// Check if a section + fn check_section_offset(&self, buffer: &Cursor, section: Section) { + let mut pos = buffer.position(); + while pos % section.get_item_alignment() as u64 != 0 { + pos += 1; + } + let expected = self.section_manager.get_offset(section) as u64; + assert_eq!( + pos, expected, + "Computed section offset and actual section offset do not match for section \ + {section:?}, expected 0x{expected:x}, found 0x{pos:x}" + ); + } + /// Compute the order of the classes in the section `class_defs`. /// Class definitions must be sorted so that a class's superclass and interfaces /// are before the class. @@ -2641,7 +2741,6 @@ enum Section { TypeList, AnnotationSetRefList, AnnotationSetItem, - ClassDataItem, CodeItem, StringDataItem, DebugInfoItem, @@ -2649,6 +2748,7 @@ enum Section { EncodedArrayItem, AnnotationsDirectoryItem, HiddenapiClassDataItem, + ClassDataItem, } impl Section { @@ -2667,13 +2767,14 @@ impl Section { Self::TypeList, Self::AnnotationSetRefList, Self::AnnotationSetItem, - Self::ClassDataItem, Self::CodeItem, Self::StringDataItem, Self::DebugInfoItem, Self::AnnotationItem, Self::EncodedArrayItem, Self::AnnotationsDirectoryItem, + Self::ClassDataItem, // must be last because contains offsets in Uleb, + // so size change when linking ! Self::HiddenapiClassDataItem, ]; @@ -2693,13 +2794,13 @@ impl Section { Self::TypeList => 11, Self::AnnotationSetRefList => 12, Self::AnnotationSetItem => 13, - Self::ClassDataItem => 14, - Self::CodeItem => 15, - Self::StringDataItem => 16, - Self::DebugInfoItem => 17, - Self::AnnotationItem => 18, - Self::EncodedArrayItem => 19, - Self::AnnotationsDirectoryItem => 20, + Self::CodeItem => 14, + Self::StringDataItem => 15, + Self::DebugInfoItem => 16, + Self::AnnotationItem => 17, + Self::EncodedArrayItem => 18, + Self::AnnotationsDirectoryItem => 19, + Self::ClassDataItem => 20, Self::HiddenapiClassDataItem => 21, } } @@ -2748,6 +2849,34 @@ impl Section { } } + /// Return the previous section if it exist. + fn prev(&self) -> Option { + match self { + Self::HeaderItem => None, + Self::StringIdItem => Some(Self::HeaderItem), + Self::TypeIdItem => Some(Self::StringIdItem), + Self::ProtoIdItem => Some(Self::TypeIdItem), + Self::FieldIdItem => Some(Self::ProtoIdItem), + Self::MethodIdItem => Some(Self::FieldIdItem), + Self::ClassDefItem => Some(Self::MethodIdItem), + Self::CallSiteIdItem => Some(Self::ClassDefItem), + Self::MethodHandleItem => Some(Self::CallSiteIdItem), + Self::Data => Some(Self::MethodHandleItem), + Self::MapList => Some(Self::MethodHandleItem), // Data is just an indicator + Self::TypeList => Some(Self::MapList), + Self::AnnotationSetRefList => Some(Self::TypeList), + Self::AnnotationSetItem => Some(Self::AnnotationSetRefList), + Self::CodeItem => Some(Self::AnnotationSetItem), + Self::StringDataItem => Some(Self::CodeItem), + Self::DebugInfoItem => Some(Self::StringDataItem), + Self::AnnotationItem => Some(Self::DebugInfoItem), + Self::EncodedArrayItem => Some(Self::AnnotationItem), + Self::AnnotationsDirectoryItem => Some(Self::EncodedArrayItem), + Self::ClassDataItem => Some(Self::AnnotationsDirectoryItem), + Self::HiddenapiClassDataItem => Some(Self::ClassDataItem), + } + } + fn get_map_item_type(&self) -> MapItemType { match self { Self::HeaderItem => MapItemType::HeaderItem, @@ -2764,17 +2893,45 @@ impl Section { Self::TypeList => MapItemType::TypeList, Self::AnnotationSetRefList => MapItemType::AnnotationSetRefList, Self::AnnotationSetItem => MapItemType::AnnotationSetItem, - Self::ClassDataItem => MapItemType::ClassDataItem, Self::CodeItem => MapItemType::CodeItem, Self::StringDataItem => MapItemType::StringDataItem, Self::DebugInfoItem => MapItemType::DebugInfoItem, Self::AnnotationItem => MapItemType::AnnotationItem, Self::EncodedArrayItem => MapItemType::EncodedArrayItem, Self::AnnotationsDirectoryItem => MapItemType::AnnotationsDirectoryItem, + Self::ClassDataItem => MapItemType::ClassDataItem, Self::HiddenapiClassDataItem => MapItemType::HiddenapiClassDataItem, } } + /// Return the alignment of the item in byte. + fn get_item_alignment(&self) -> u32 { + match self { + Self::HeaderItem => 4, + Self::StringIdItem => 4, + Self::TypeIdItem => 4, + Self::ProtoIdItem => 4, + Self::FieldIdItem => 4, + Self::MethodIdItem => 4, + Self::ClassDefItem => 4, + Self::CallSiteIdItem => 1, + Self::MethodHandleItem => 4, + Self::Data => 1, + Self::MapList => 4, + Self::TypeList => 4, + Self::AnnotationSetRefList => 4, + Self::AnnotationSetItem => 4, + Self::CodeItem => 4, + Self::StringDataItem => 1, + Self::DebugInfoItem => 1, + Self::AnnotationItem => 1, + Self::EncodedArrayItem => 1, + Self::AnnotationsDirectoryItem => 4, + Self::ClassDataItem => 1, + Self::HiddenapiClassDataItem => 1, + } + } + fn is_data(&self) -> bool { matches!(self, Self::Data) } @@ -2784,6 +2941,7 @@ impl Section { struct SectionManager { sizes: [u32; Self::NB_SECTION], nb_elt: [usize; Self::NB_SECTION], + editable: bool, } impl SectionManager { @@ -2791,28 +2949,47 @@ impl SectionManager { fn reset(&mut self) { self.sizes = [0; Self::NB_SECTION]; - self.nb_elt = [0; Self::NB_SECTION] + self.nb_elt = [0; Self::NB_SECTION]; + self.editable = true; } fn add_elt(&mut self, section: Section, size: Option) { + if !self.editable { + panic!("Try to modify a section when the sections are set to read only"); + } if section.is_data() { panic!("Cannot add element directly in section data"); } + while self.sizes[section.get_index()] % section.get_item_alignment() != 0 { + self.sizes[section.get_index()] += 1; + } self.sizes[section.get_index()] += section.get_elt_size(size) as u32; self.nb_elt[section.get_index()] += 1; } fn incr_section_size(&mut self, section: Section, size: usize) { + if !self.editable { + panic!("Try to modify a section when the sections are set to read only"); + } self.sizes[section.get_index()] += size as u32; } fn get_offset(&self, section: Section) -> u32 { - // TODO: check alignment - self.sizes[..section.get_index()].iter().sum() + if self.editable { + panic!("Try to get section offset before sections are finilized"); + } + let size = self.sizes[..section.get_index()].iter().sum(); + let alignment = section.get_item_alignment(); + if size % alignment != 0 { + panic!( + "section {section:?} must be aligned on {alignment} bytes, \ + found section offset 0x{size:x}" + ); + } + size } fn get_size(&self, section: Section) -> u32 { - // TODO: check alignment if section.is_data() { self.sizes[section.get_index()..].iter().sum() } else { @@ -2823,4 +3000,41 @@ impl SectionManager { fn get_nb_elt(&self, section: Section) -> usize { self.nb_elt[section.get_index()] } + + /// Finialize the sections: switch to read only and fix the section alignment. + fn finalize_sections(&mut self) { + for section in Section::VARIANT_LIST { + while self.sizes[..section.get_index()].iter().sum::() + % section.get_item_alignment() + != 0 + { + self.incr_section_size( + section.prev().expect( + "First section (Header) should alway be aligned but \ + found unaligned section without predecessor", + ), + 1, + ); + } + } + self.editable = false; + } + + /// Display the sections informations. + #[allow(dead_code)] + fn show(&self) { + let mut offset = 0; + for section in Section::VARIANT_LIST { + let size = self.get_size(*section); + let new_offset = offset + size; + let nb_elt = self.get_nb_elt(*section); + println!( + "{section:?}: 0x{offset:x} -> 0x{new_offset:x} (size: 0x{size:x}, \ + nb elt: {nb_elt})" + ); + if !section.is_data() { + offset = new_offset; + } + } + } }