fix offsets

This commit is contained in:
Jean-Marie Mineau 2024-01-05 17:06:35 +01:00
parent e1f1d01e2c
commit 92d4ecaa1c
Signed by: histausse
GPG key ID: B66AEEDA9B645AD2

View file

@ -89,6 +89,22 @@ pub struct DexWriter {
debug_info_items: Vec<DebugInfoItem>,
/// The map_list
map_list: MapList,
/// Map the **local** offset of **unlinked** class data offset to the **local**
/// offset of the **linked**.
///
/// local offset means the offset of the first data item in the section is 0 (i.e. not
/// the offset from the beginning of the file).
///
/// This hack is necessary (well, maybe not necessary but that would require some refactoring)
/// because someone thought that encoding offsets in Uleb format was a good idea. It was not.
/// The size of the encoded offset depend on the value of the offset, so the linking change the
/// size of the struct, and doing so change the offset of the next struct, ect.
///
/// When generating the structs refering to `class_data_item`s, the local offset of the
/// unlinked `class_data_item` is known, but not the linked offset. This struct is filled
/// when linking the `class_data_item`s, and can then be used to link the structs refering to
/// the `class_data_item`s.
corrected_class_data_offset: HashMap<u32, u32>,
}
impl Default for DexWriter {
@ -147,6 +163,7 @@ impl Default for DexWriter {
annotation_items: vec![],
annotation_set_lists: vec![],
debug_info_items: vec![],
corrected_class_data_offset: HashMap::new(),
}
}
}
@ -1440,6 +1457,7 @@ impl DexWriter {
}
self.section_manager
.add_elt(Section::ClassDataItem, Some(data.size()));
assert_eq!(data.size(), data.serialize_to_vec().unwrap().len());
self.class_data_list.push(data);
Ok(())
}
@ -2171,8 +2189,10 @@ impl DexWriter {
///
/// # Warning
///
/// All sections must be generated (but not necessarely linked) before generating the map list.
fn get_map_list(&mut self) -> Result<()> {
/// All sections must be generated (but not linked) before generating the map list.
///
/// This method switch the section manager from edit mode to read only.
fn gen_map_list(&mut self) -> Result<()> {
debug!("Generate the map_list");
// Get the size of a map item
let map_item_size = 12; /* = MapItem {
@ -2192,6 +2212,8 @@ impl DexWriter {
.incr_section_size(Section::MapList, map_item_size);
}
}
// All sections are knowns and should not be eddited anymore
self.section_manager.finalize_sections();
for section in Section::VARIANT_LIST {
if !section.is_data() && self.section_manager.get_nb_elt(*section) != 0 {
/*
@ -2292,8 +2314,15 @@ impl DexWriter {
for class_def in self.class_defs_list.iter_mut() {
// prelink value is set to offset in the section + 1 (to distinguish with 0)
if class_def.class_data_off != 0 {
class_def.class_data_off +=
self.section_manager.get_offset(Section::ClassDataItem) - 1;
let unlinked_local_offset = class_def.class_data_off - 1;
let linked_local_offset = *self
.corrected_class_data_offset
.get(&unlinked_local_offset)
.expect(
"Unlinked class_data_item offset not found in corrected_class_data_offset",
);
class_def.class_data_off =
self.section_manager.get_offset(Section::ClassDataItem) + linked_local_offset;
}
}
}
@ -2320,7 +2349,10 @@ impl DexWriter {
/// Linking can only occur once all sections are entirelly generated.
fn link_code_item(&mut self) {
debug!("Link the code_item entries in class_data_items");
let mut unlinked_local_offset = 0;
let mut linked_local_offset = 0;
for data in &mut self.class_data_list {
let unlinked_size = data.size() as u32;
for method in &mut data.direct_methods {
if method.code_off.0 != 0 {
method.code_off.0 += self.section_manager.get_offset(Section::CodeItem) - 1;
@ -2331,6 +2363,13 @@ impl DexWriter {
method.code_off.0 += self.section_manager.get_offset(Section::CodeItem) - 1;
}
}
linked_local_offset += data.size() as u32;
self.corrected_class_data_offset
.insert(unlinked_local_offset, linked_local_offset);
unlinked_local_offset += unlinked_size;
// TODO: update section manager even if read only? seams like a bad idea but
// this invalidate the size of the section and the offset of the hidden api
// section
}
}
@ -2411,114 +2450,152 @@ impl DexWriter {
}
self.gen_type_list_section()?;
self.get_map_list()?;
self.gen_map_list()?;
// From now on, all section are generated and the value in section_manager do not change.
// From now on, all section are generated and the value in section_manager do not change,
// except for class data items, because F (see doc of self.corrected_class_data_offset).
// start by linking class_data_items to populate self.corrected_class_data_offset
// TODO: reorganize this: group by referenced structures instead of grouping by referencer.
// this would make it easier handle the class data structure incident.
self.link_code_item();
self.link_header();
self.link_call_site_ids();
self.link_type_list_occurences()?;
self.link_class_data_occurences();
self.link_static_values();
self.link_code_item();
self.link_debug_info();
self.link_annotations();
debug!("Serialize the dex file");
let mut buffer = Cursor::new(Vec::<u8>::new());
// TODO: compute checksum, hash, ect
self.check_section_offset(&buffer, Section::HeaderItem);
Self::fix_section_alignement(&mut buffer, Section::HeaderItem)?;
self.header.serialize(&mut buffer)?;
// StringIdItem section
let mut string_off = self.section_manager.get_offset(Section::StringDataItem);
self.check_section_offset(&buffer, Section::StringIdItem);
for string in self.string_data_list.iter() {
let str_id = StringIdItem {
string_data_off: string_off,
};
Self::fix_section_alignement(&mut buffer, Section::StringIdItem)?;
str_id.serialize(&mut buffer)?;
string_off += string.size() as u32;
}
// TypeId section
self.check_section_offset(&buffer, Section::TypeIdItem);
for ty in &self.type_ids_list {
Self::fix_section_alignement(&mut buffer, Section::TypeIdItem)?;
ty.serialize(&mut buffer)?;
}
// ProtoId section
self.check_section_offset(&buffer, Section::ProtoIdItem);
for proto in &self.proto_ids_list {
Self::fix_section_alignement(&mut buffer, Section::ProtoIdItem)?;
proto.serialize(&mut buffer)?;
}
// FieldIdItem section
self.check_section_offset(&buffer, Section::FieldIdItem);
for field_id in &self.field_ids_list {
Self::fix_section_alignement(&mut buffer, Section::FieldIdItem)?;
field_id.serialize(&mut buffer)?;
}
// MethodIdItem section
self.check_section_offset(&buffer, Section::MethodIdItem);
for method_id in &self.method_ids_list {
Self::fix_section_alignement(&mut buffer, Section::MethodIdItem)?;
method_id.serialize(&mut buffer)?;
}
// ClassDefItem section
self.check_section_offset(&buffer, Section::ClassDefItem);
for class_def in &self.class_defs_list {
Self::fix_section_alignement(&mut buffer, Section::ClassDefItem)?;
class_def.serialize(&mut buffer)?;
}
// CallSiteIdItem, data are inserted as encoded array item later
self.check_section_offset(&buffer, Section::CallSiteIdItem);
for call_site_id in &self.call_site_ids {
Self::fix_section_alignement(&mut buffer, Section::CallSiteIdItem)?;
call_site_id.serialize(&mut buffer)?;
}
// MethodHandleItem section
self.check_section_offset(&buffer, Section::MethodHandleItem);
for handle in &self.method_handles {
Self::fix_section_alignement(&mut buffer, Section::MethodHandleItem)?;
handle.serialize(&mut buffer)?;
}
// MapList
self.check_section_offset(&buffer, Section::MapList);
Self::fix_section_alignement(&mut buffer, Section::MapList)?;
self.map_list.serialize(&mut buffer)?;
// TypeList,
let mut offset = 0; // the sections are always aligned until the type_lists
self.check_section_offset(&buffer, Section::TypeList);
for (list, _) in &self.type_lists_with_offset {
while offset % 4 != 0 {
offset += 1;
0u8.serialize(&mut buffer)?;
}
offset += list.size();
Self::fix_section_alignement(&mut buffer, Section::TypeList)?;
list.serialize(&mut buffer)?;
}
// The next section requires alignment to 4
while offset % 4 != 0 {
// Alignment
self.section_manager.incr_section_size(Section::TypeList, 1);
offset += 1;
}
// AnnotationSetRefList section
self.check_section_offset(&buffer, Section::AnnotationSetRefList);
for list in &self.annotation_set_lists {
Self::fix_section_alignement(&mut buffer, Section::AnnotationSetRefList)?;
list.serialize(&mut buffer)?;
}
// AnnotationSetItem section
self.check_section_offset(&buffer, Section::AnnotationSetItem);
for set in &self.annotation_set_items {
Self::fix_section_alignement(&mut buffer, Section::AnnotationSetItem)?;
set.serialize(&mut buffer)?;
}
// ClassDataItem section
for data in &self.class_data_list {
data.serialize(&mut buffer)?;
}
// CodeItem section
self.check_section_offset(&buffer, Section::CodeItem);
for code_item in &self.code_items {
Self::fix_section_alignement(&mut buffer, Section::CodeItem)?;
code_item.serialize(&mut buffer)?
}
// StringDataItem section
self.check_section_offset(&buffer, Section::StringDataItem);
for string in &self.string_data_list {
Self::fix_section_alignement(&mut buffer, Section::StringDataItem)?;
string.serialize(&mut buffer)?;
}
// DebugInfoItem section
self.check_section_offset(&buffer, Section::DebugInfoItem);
for debug_info in &self.debug_info_items {
Self::fix_section_alignement(&mut buffer, Section::DebugInfoItem)?;
debug_info.serialize(&mut buffer)?;
}
// AnnotationItem section
self.check_section_offset(&buffer, Section::AnnotationItem);
for annot in &self.annotation_items {
Self::fix_section_alignement(&mut buffer, Section::AnnotationItem)?;
annot.serialize(&mut buffer)?;
}
// EncodedArrayItem section
self.check_section_offset(&buffer, Section::EncodedArrayItem);
for array in &self.encoded_array_items {
Self::fix_section_alignement(&mut buffer, Section::EncodedArrayItem)?;
array.serialize(&mut buffer)?;
}
// AnnotationsDirectoryItem section
self.check_section_offset(&buffer, Section::AnnotationsDirectoryItem);
for dir in &self.annotations_directory_items {
Self::fix_section_alignement(&mut buffer, Section::AnnotationsDirectoryItem)?;
dir.serialize(&mut buffer)?;
}
// ClassDataItem section
self.check_section_offset(&buffer, Section::ClassDataItem);
for data in &self.class_data_list {
Self::fix_section_alignement(&mut buffer, Section::ClassDataItem)?;
data.serialize(&mut buffer)?;
}
// TODO: HiddenapiClassDataItem,
/*
self.check_section_offset(&buffer, Section::HiddenapiClassDataItem);
Self::fix_section_alignement(&mut buffer, Section::HiddenapiClassDataItem)?;
*/
// compute signature
buffer.seek(SeekFrom::Start(8 + 4 + 20))?;
@ -2543,6 +2620,29 @@ impl DexWriter {
Ok(())
}
/// Insert 0 to a buffer until the right alignment is reached for an element of the
/// given section.
fn fix_section_alignement(buffer: &mut Cursor<Vec<u8>>, section: Section) -> Result<()> {
while buffer.position() % section.get_item_alignment() as u64 != 0 {
0u8.serialize(buffer)?;
}
Ok(())
}
/// Check if a section
fn check_section_offset<T>(&self, buffer: &Cursor<T>, section: Section) {
let mut pos = buffer.position();
while pos % section.get_item_alignment() as u64 != 0 {
pos += 1;
}
let expected = self.section_manager.get_offset(section) as u64;
assert_eq!(
pos, expected,
"Computed section offset and actual section offset do not match for section \
{section:?}, expected 0x{expected:x}, found 0x{pos:x}"
);
}
/// Compute the order of the classes in the section `class_defs`.
/// Class definitions must be sorted so that a class's superclass and interfaces
/// are before the class.
@ -2641,7 +2741,6 @@ enum Section {
TypeList,
AnnotationSetRefList,
AnnotationSetItem,
ClassDataItem,
CodeItem,
StringDataItem,
DebugInfoItem,
@ -2649,6 +2748,7 @@ enum Section {
EncodedArrayItem,
AnnotationsDirectoryItem,
HiddenapiClassDataItem,
ClassDataItem,
}
impl Section {
@ -2667,13 +2767,14 @@ impl Section {
Self::TypeList,
Self::AnnotationSetRefList,
Self::AnnotationSetItem,
Self::ClassDataItem,
Self::CodeItem,
Self::StringDataItem,
Self::DebugInfoItem,
Self::AnnotationItem,
Self::EncodedArrayItem,
Self::AnnotationsDirectoryItem,
Self::ClassDataItem, // must be last because contains offsets in Uleb,
// so size change when linking !
Self::HiddenapiClassDataItem,
];
@ -2693,13 +2794,13 @@ impl Section {
Self::TypeList => 11,
Self::AnnotationSetRefList => 12,
Self::AnnotationSetItem => 13,
Self::ClassDataItem => 14,
Self::CodeItem => 15,
Self::StringDataItem => 16,
Self::DebugInfoItem => 17,
Self::AnnotationItem => 18,
Self::EncodedArrayItem => 19,
Self::AnnotationsDirectoryItem => 20,
Self::CodeItem => 14,
Self::StringDataItem => 15,
Self::DebugInfoItem => 16,
Self::AnnotationItem => 17,
Self::EncodedArrayItem => 18,
Self::AnnotationsDirectoryItem => 19,
Self::ClassDataItem => 20,
Self::HiddenapiClassDataItem => 21,
}
}
@ -2748,6 +2849,34 @@ impl Section {
}
}
/// Return the previous section if it exist.
fn prev(&self) -> Option<Self> {
match self {
Self::HeaderItem => None,
Self::StringIdItem => Some(Self::HeaderItem),
Self::TypeIdItem => Some(Self::StringIdItem),
Self::ProtoIdItem => Some(Self::TypeIdItem),
Self::FieldIdItem => Some(Self::ProtoIdItem),
Self::MethodIdItem => Some(Self::FieldIdItem),
Self::ClassDefItem => Some(Self::MethodIdItem),
Self::CallSiteIdItem => Some(Self::ClassDefItem),
Self::MethodHandleItem => Some(Self::CallSiteIdItem),
Self::Data => Some(Self::MethodHandleItem),
Self::MapList => Some(Self::MethodHandleItem), // Data is just an indicator
Self::TypeList => Some(Self::MapList),
Self::AnnotationSetRefList => Some(Self::TypeList),
Self::AnnotationSetItem => Some(Self::AnnotationSetRefList),
Self::CodeItem => Some(Self::AnnotationSetItem),
Self::StringDataItem => Some(Self::CodeItem),
Self::DebugInfoItem => Some(Self::StringDataItem),
Self::AnnotationItem => Some(Self::DebugInfoItem),
Self::EncodedArrayItem => Some(Self::AnnotationItem),
Self::AnnotationsDirectoryItem => Some(Self::EncodedArrayItem),
Self::ClassDataItem => Some(Self::AnnotationsDirectoryItem),
Self::HiddenapiClassDataItem => Some(Self::ClassDataItem),
}
}
fn get_map_item_type(&self) -> MapItemType {
match self {
Self::HeaderItem => MapItemType::HeaderItem,
@ -2764,17 +2893,45 @@ impl Section {
Self::TypeList => MapItemType::TypeList,
Self::AnnotationSetRefList => MapItemType::AnnotationSetRefList,
Self::AnnotationSetItem => MapItemType::AnnotationSetItem,
Self::ClassDataItem => MapItemType::ClassDataItem,
Self::CodeItem => MapItemType::CodeItem,
Self::StringDataItem => MapItemType::StringDataItem,
Self::DebugInfoItem => MapItemType::DebugInfoItem,
Self::AnnotationItem => MapItemType::AnnotationItem,
Self::EncodedArrayItem => MapItemType::EncodedArrayItem,
Self::AnnotationsDirectoryItem => MapItemType::AnnotationsDirectoryItem,
Self::ClassDataItem => MapItemType::ClassDataItem,
Self::HiddenapiClassDataItem => MapItemType::HiddenapiClassDataItem,
}
}
/// Return the alignment of the item in byte.
fn get_item_alignment(&self) -> u32 {
match self {
Self::HeaderItem => 4,
Self::StringIdItem => 4,
Self::TypeIdItem => 4,
Self::ProtoIdItem => 4,
Self::FieldIdItem => 4,
Self::MethodIdItem => 4,
Self::ClassDefItem => 4,
Self::CallSiteIdItem => 1,
Self::MethodHandleItem => 4,
Self::Data => 1,
Self::MapList => 4,
Self::TypeList => 4,
Self::AnnotationSetRefList => 4,
Self::AnnotationSetItem => 4,
Self::CodeItem => 4,
Self::StringDataItem => 1,
Self::DebugInfoItem => 1,
Self::AnnotationItem => 1,
Self::EncodedArrayItem => 1,
Self::AnnotationsDirectoryItem => 4,
Self::ClassDataItem => 1,
Self::HiddenapiClassDataItem => 1,
}
}
fn is_data(&self) -> bool {
matches!(self, Self::Data)
}
@ -2784,6 +2941,7 @@ impl Section {
struct SectionManager {
sizes: [u32; Self::NB_SECTION],
nb_elt: [usize; Self::NB_SECTION],
editable: bool,
}
impl SectionManager {
@ -2791,28 +2949,47 @@ impl SectionManager {
fn reset(&mut self) {
self.sizes = [0; Self::NB_SECTION];
self.nb_elt = [0; Self::NB_SECTION]
self.nb_elt = [0; Self::NB_SECTION];
self.editable = true;
}
fn add_elt(&mut self, section: Section, size: Option<usize>) {
if !self.editable {
panic!("Try to modify a section when the sections are set to read only");
}
if section.is_data() {
panic!("Cannot add element directly in section data");
}
while self.sizes[section.get_index()] % section.get_item_alignment() != 0 {
self.sizes[section.get_index()] += 1;
}
self.sizes[section.get_index()] += section.get_elt_size(size) as u32;
self.nb_elt[section.get_index()] += 1;
}
fn incr_section_size(&mut self, section: Section, size: usize) {
if !self.editable {
panic!("Try to modify a section when the sections are set to read only");
}
self.sizes[section.get_index()] += size as u32;
}
fn get_offset(&self, section: Section) -> u32 {
// TODO: check alignment
self.sizes[..section.get_index()].iter().sum()
if self.editable {
panic!("Try to get section offset before sections are finilized");
}
let size = self.sizes[..section.get_index()].iter().sum();
let alignment = section.get_item_alignment();
if size % alignment != 0 {
panic!(
"section {section:?} must be aligned on {alignment} bytes, \
found section offset 0x{size:x}"
);
}
size
}
fn get_size(&self, section: Section) -> u32 {
// TODO: check alignment
if section.is_data() {
self.sizes[section.get_index()..].iter().sum()
} else {
@ -2823,4 +3000,41 @@ impl SectionManager {
fn get_nb_elt(&self, section: Section) -> usize {
self.nb_elt[section.get_index()]
}
/// Finialize the sections: switch to read only and fix the section alignment.
fn finalize_sections(&mut self) {
for section in Section::VARIANT_LIST {
while self.sizes[..section.get_index()].iter().sum::<u32>()
% section.get_item_alignment()
!= 0
{
self.incr_section_size(
section.prev().expect(
"First section (Header) should alway be aligned but \
found unaligned section without predecessor",
),
1,
);
}
}
self.editable = false;
}
/// Display the sections informations.
#[allow(dead_code)]
fn show(&self) {
let mut offset = 0;
for section in Section::VARIANT_LIST {
let size = self.get_size(*section);
let new_offset = offset + size;
let nb_elt = self.get_nb_elt(*section);
println!(
"{section:?}: 0x{offset:x} -> 0x{new_offset:x} (size: 0x{size:x}, \
nb elt: {nb_elt})"
);
if !section.is_data() {
offset = new_offset;
}
}
}
}