From 71fc0d2398ff7c55fa97848efabb8a4a707eef6b Mon Sep 17 00:00:00 2001 From: Jean-Marie Mineau Date: Mon, 11 Mar 2024 16:31:03 +0100 Subject: [PATCH] wip: start building fragment --- androscalpel/src/dex_fragment.rs | 2912 ++++++++++++++++++++++++++++++ androscalpel/src/lib.rs | 1 + 2 files changed, 2913 insertions(+) create mode 100644 androscalpel/src/dex_fragment.rs diff --git a/androscalpel/src/dex_fragment.rs b/androscalpel/src/dex_fragment.rs new file mode 100644 index 0000000..ed45a38 --- /dev/null +++ b/androscalpel/src/dex_fragment.rs @@ -0,0 +1,2912 @@ +//! The structure that generate a .dex from classes. + +use std::collections::{HashMap, HashSet, VecDeque}; +use std::io; +use std::io::{Cursor, Seek, SeekFrom, Write}; + +use anyhow::{anyhow, bail, Context}; +use log::debug; + +use crate::Result; +use crate::*; +use androscalpel_serializer::*; + +use crate::ins::{CallSite, Instruction}; +use crate::instructions::*; +use androscalpel_serializer::Instruction as InsFormat; + +/// A [`DexFragment`] is a 'precompile' class. Its is not compiled per-say, +/// but most structures are converted to a [`Serializable`] structs close to +/// the dalvik format. The structs still need to be linked before serialization. +/// +/// Ids structs ([`IdType`], [`IdMethodType`], [`IdField`], ect...) are almost entirely +/// recomputed at the link stage and need to be sorted, so they are kept in the hight +/// level format. +#[derive(Debug, Clone)] +pub struct DexFragment { + /// The id of the class represented by this fragment. + class_id: IdType, + /// The strings in the dex file, sorted. + strings: Vec, + /// The types in the dex file, sorted. + type_ids: Vec, + /// The prototypes in the dex file, sorted. + proto_ids: Vec, + /// The field ids in the dex file, sorted. + field_ids: Vec, + /// The methods ids in the dex file, sorted. + method_ids: Vec, + /// The call sites refered to in the bytecode. + call_site_ids: Vec, + /// A struct that keep track of sections size, nb elements and offsets. + section_manager: FragSectionManager, + /// The class_def. The `annotations_off`, `class_data_off` and `static_values_off` field take + /// the value of the offset from the start of their respective section +1 (or zero if the class + /// does not have a value associated to the field). + class_def: ClassDefItem, + /// The class_data. + class_data: Option, + // TODO: type list should be handle like other ids + // TODO: type list inside of proto ids are not handled here + /// The type lists found in the classes associated to their index in the type_lists section. + type_lists_index: HashMap, + /// The type_lists section and the offset of the lists inside the section. + type_lists_with_offset: Vec<(TypeList, u32)>, + /// The encoded_array_items section. + encoded_array_items: Vec, + /// The annotations_directory_item section. + annotations_directory_items: Vec, + /// The annotation_set_item section. + annotation_set_items: Vec, + /// The annotation item section. + annotation_items: Vec, + /// The annotation_set_ref_list section. + annotation_set_lists: Vec, + /// The method_handles section. + method_handles: Vec, + /// The code_items sections. + code_items: Vec, + /// The debug info items section. + debug_info_items: Vec, +} + +impl DexFragment { + pub fn new(class: &Class) -> Result { + debug!( + "Building dex fragment for class {}", + class.descriptor.__str__() + ); + let mut frag = Self { + class_id: class.descriptor.clone(), + strings: vec![], + type_ids: vec![], + proto_ids: vec![], + field_ids: vec![], + method_ids: vec![], + class_def: ClassDefItem { + class_idx: 0, + access_flags: 0, + superclass_idx: 0, + interfaces_off: 0, + source_file_idx: 0, + annotations_off: 0, + class_data_off: 0, + static_values_off: 0, + }, + call_site_ids: vec![], + section_manager: FragSectionManager::default(), + class_data: None, + type_lists_index: HashMap::new(), + type_lists_with_offset: vec![], + encoded_array_items: vec![], + method_handles: vec![], + code_items: vec![], + annotations_directory_items: vec![], + annotation_set_items: vec![], + annotation_items: vec![], + annotation_set_lists: vec![], + debug_info_items: vec![], + }; + frag.strings = class.get_all_strings().into_iter().collect(); + frag.strings.sort(); + let strings_index = frag + .strings + .iter() + .enumerate() + .map(|(idx, string)| (string.clone(), idx)) + .collect::>(); + frag.type_ids = class.get_all_types().into_iter().collect(); + frag.type_ids.sort(); + let types_index = frag + .type_ids + .iter() + .enumerate() + .map(|(idx, ty)| (ty.clone(), idx)) + .collect::>(); + frag.proto_ids = class.get_all_protos().into_iter().collect(); + frag.proto_ids.sort(); + let protos_index = frag + .proto_ids + .iter() + .enumerate() + .map(|(idx, proto)| (proto.clone(), idx)) + .collect::>(); + frag.field_ids = class.get_all_field_ids().into_iter().collect(); + frag.field_ids.sort(); + let fields_index = frag + .field_ids + .iter() + .enumerate() + .map(|(idx, field)| (field.clone(), idx)) + .collect::>(); + frag.method_ids = class.get_all_method_ids().into_iter().collect(); + frag.method_ids.sort(); + let methods_index = frag + .method_ids + .iter() + .enumerate() + .map(|(idx, method)| (method.clone(), idx)) + .collect::>(); + + frag.section_manager + .add_elt(FragSection::ClassDefItem, None); + Ok(frag) + } + + /// Insert a code_item. + /// + /// # Warning + /// + /// This is currently a stub that probably serialize invalid references to data. + fn insert_code_item(&mut self, method_id: IdMethod, direct_methods: bool) -> Result<()> { + let code = if direct_methods { + self.class_defs + .get(&method_id.class_) + .unwrap() + .0 + .direct_methods + .get(&method_id) + .unwrap() + .code + .as_ref() + .unwrap() + .clone() + } else { + self.class_defs + .get(&method_id.class_) + .unwrap() + .0 + .virtual_methods + .get(&method_id) + .unwrap() + .code + .as_ref() + .unwrap() + .clone() + }; + // Estimate instructions addresses + let mut min_addr = 0; + let mut max_addr = 0; + let mut label_min_max_addrs: HashMap = HashMap::new(); + + for ins in &code.insns { + match ins { + Instruction::Label(Label { name }) => { + label_min_max_addrs.insert(name.clone(), (min_addr, max_addr)); + min_addr += ins.min_ins_size() / 2; + max_addr += ins.max_ins_size() / 2; + } + Instruction::ConstString(ins) => { + let string_idx = self.strings.get(&ins.lit).ok_or(anyhow!( + "String {} (found in code of {}) not found in dex builder", + ins.lit.__repr__(), + method_id.__repr__() + ))?; + let size = ins.get_raw_ins(*string_idx).size() / 2; + min_addr += size; + max_addr += size; + } + _ => { + min_addr += ins.min_ins_size() / 2; + max_addr += ins.max_ins_size() / 2; + } + } + } + // Compute instruction size and precise addresses + let mut addr = 0; + let mut label_addrs = HashMap::new(); + let mut goto_sizes = vec![]; + for ins in &code.insns { + match ins { + Instruction::Label(Label { name }) => { + label_addrs.insert(name.clone(), addr); + addr += ins.max_ins_size() / 2; + } + Instruction::ConstString(ins) => { + let string_idx = self.strings.get(&ins.lit).ok_or(anyhow!( + "String {} (found in code of {}) not found in dex builder", + ins.lit.__repr__(), + method_id.__repr__() + ))?; + addr += ins.get_raw_ins(*string_idx).size() / 2; + } + Instruction::Goto(Goto { label }) => { + let (min_addr, max_addr) = label_min_max_addrs + .get(label) + .ok_or(anyhow!("Label {label} not found in label estimation map"))?; + let size = Goto::size_from_branch_offset_interval(addr, *min_addr, *max_addr)?; + goto_sizes.push(size); + addr += size / 2; + } + _ => addr += ins.ins_size()? / 2, + } + } + // Serialize instructions + let mut tries = vec![]; + let mut handlers = EncodedCatchHandlerList { list: vec![] }; + let mut handler_off = 0; + let mut insns = vec![]; + let mut payloads = vec![]; + let mut goto_idx = 0; + let mut payload_addr = addr; + if payload_addr % 2 != 0 { + payload_addr += 1; // For switch and array table alignment + } + addr = 0; + for ins in &code.insns { + match ins { + Instruction::ConstString(ins) => { + let string_idx = self.strings.get(&ins.lit).ok_or(anyhow!( + "String {} (found in code of {}) not found in dex builder", + ins.lit.__repr__(), + method_id.__repr__() + ))?; + let ins = ins.get_raw_ins(*string_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::ConstClass(ins) => { + let class_idx = *self.type_ids.get(&ins.lit).ok_or(anyhow!( + "Class {} (type of class found in code of {}) not found in dex builder", + ins.lit.__repr__(), + method_id.__repr__() + ))?; + let ins = ins.get_raw_ins(class_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::CheckCast(ins) => { + let class_idx = *self.type_ids.get(&ins.lit).ok_or(anyhow!( + "Class {} (type of class found in code of {}) not found in dex builder", + ins.lit.__repr__(), + method_id.__repr__() + ))?; + let ins = ins.get_raw_ins(class_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::InstanceOf(ins) => { + let class_idx = *self.type_ids.get(&ins.lit).ok_or(anyhow!( + "Class {} (type of class found in code of {}) not found in dex builder", + ins.lit.__repr__(), + method_id.__repr__() + ))?; + let ins = ins.get_raw_ins(class_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::NewInstance(ins) => { + let class_idx = *self.type_ids.get(&ins.lit).ok_or(anyhow!( + "Class {} (type of class found in code of {}) not found in dex builder", + ins.lit.__repr__(), + method_id.__repr__() + ))?; + let ins = ins.get_raw_ins(class_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::NewArray(ins) => { + let class_idx = *self.type_ids.get(&ins.lit).ok_or(anyhow!( + "Type {} (type found in code of {}) not found in dex builder", + ins.lit.__repr__(), + method_id.__repr__() + ))?; + let ins = ins.get_raw_ins(class_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::FilledNewArray(ins) => { + let class_idx = *self.type_ids.get(&ins.type_).ok_or(anyhow!( + "Type {} (type found in code of {}) not found in dex builder", + ins.type_.__repr__(), + method_id.__repr__() + ))?; + let ins = ins.get_raw_ins(class_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::FillArrayData(ins) => { + let payload = InsFormat::FormatFillArrayDataPayload { + elt_width: ins.elt_width, + data: ins.data.clone(), + }; + if payload_addr % 2 != 0 { + // https://cs.android.com/android/platform/superproject/main/+/main:art/runtime/verifier/method_verifier.cc;drc=e8c3e7be783937a340cd4f3280b69962d6f1ea0c;l=1347 + // The ART check if the array data table is 4 bytes aligned (= 2 ins alligned) + // TODO: check how it is donne in android and other dex generation code. + let nop = Instruction::Nop(Nop).get_raw_ins()?; + payload_addr += nop.size() / 2; + payloads.push(nop); + } + let data_offset = payload_addr as i32 - addr as i32; + payload_addr += payload.size() / 2; + payloads.push(payload); + let ins = ins.get_raw_ins(data_offset); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::Goto(ins) => { + let goto_size = goto_sizes[goto_idx]; + goto_idx += 1; + let label_addr = label_addrs.get(&ins.label).ok_or(anyhow!( + "Label {} not found in code of {}, but found goto with this label", + ins.label, + method_id.__repr__() + ))?; + let branch_offset = *label_addr as i32 - addr as i32; + let ins = ins.get_raw_ins(branch_offset, goto_size); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::Switch(ins) => { + let mut key_targets = vec![]; + for (key, label) in &ins.branches { + let label_addr = label_addrs.get(label).ok_or(anyhow!( + "Label {} not found in code of {}, but found goto with this label", + label, + method_id.__repr__() + ))?; + let branch_offset = *label_addr as i32 - addr as i32; + key_targets.push((*key, branch_offset)); + } + key_targets.sort_by_key(|(key, _)| *key); + let payload = if ins.is_packed() { + let (first_key, _) = *key_targets.first().ok_or(anyhow!( + "Found empty swith in code of {}", + method_id.__repr__() + ))?; + let targets: Vec<_> = + key_targets.into_iter().map(|(_, target)| target).collect(); + InsFormat::FormatPackedSwitchPayload { first_key, targets } + } else { + InsFormat::FormatSparseSwitchPayload { key_targets } + }; + if payload_addr % 2 != 0 { + // https://cs.android.com/android/platform/superproject/main/+/main:art/runtime/verifier/method_verifier.cc;drc=e8c3e7be783937a340cd4f3280b69962d6f1ea0c;l=1464 + // The ART check if the switch table is 4 bytes aligned (= 2 ins alligned) + // TODO: check how it is donne in android and other dex generation code. + let nop = Instruction::Nop(Nop).get_raw_ins()?; + payload_addr += nop.size() / 2; + payloads.push(nop); + } + let data_offset = payload_addr as i32 - addr as i32; + payload_addr += payload.size() / 2; + payloads.push(payload); + let ins = ins.get_raw_ins(data_offset); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::IfEq(ins) => { + let label_addr = label_addrs.get(&ins.label).ok_or(anyhow!( + "Label {} not found in code of {}, but found if with this label", + ins.label, + method_id.__repr__() + ))?; + let branch_offset = *label_addr as i32 - addr as i32; + if branch_offset > i16::MAX as i32 || branch_offset < i16::MIN as i32 { + bail!( + "Found an if that jump to far from the instruction in code of {}", + method_id.__repr__() + ); + } + let ins = ins.get_raw_ins(branch_offset as i16); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::IfNe(ins) => { + let label_addr = label_addrs.get(&ins.label).ok_or(anyhow!( + "Label {} not found in code of {}, but found if with this label", + ins.label, + method_id.__repr__() + ))?; + let branch_offset = *label_addr as i32 - addr as i32; + if branch_offset > i16::MAX as i32 || branch_offset < i16::MIN as i32 { + bail!( + "Found an if that jump to far from the instruction in code of {}", + method_id.__repr__() + ); + } + let ins = ins.get_raw_ins(branch_offset as i16); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::IfLt(ins) => { + let label_addr = label_addrs.get(&ins.label).ok_or(anyhow!( + "Label {} not found in code of {}, but found if with this label", + ins.label, + method_id.__repr__() + ))?; + let branch_offset = *label_addr as i32 - addr as i32; + if branch_offset > i16::MAX as i32 || branch_offset < i16::MIN as i32 { + bail!( + "Found an if that jump to far from the instruction in code of {}", + method_id.__repr__() + ); + } + let ins = ins.get_raw_ins(branch_offset as i16); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::IfGe(ins) => { + let label_addr = label_addrs.get(&ins.label).ok_or(anyhow!( + "Label {} not found in code of {}, but found if with this label", + ins.label, + method_id.__repr__() + ))?; + let branch_offset = *label_addr as i32 - addr as i32; + if branch_offset > i16::MAX as i32 || branch_offset < i16::MIN as i32 { + bail!( + "Found an if that jump to far from the instruction in code of {}", + method_id.__repr__() + ); + } + let ins = ins.get_raw_ins(branch_offset as i16); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::IfGt(ins) => { + let label_addr = label_addrs.get(&ins.label).ok_or(anyhow!( + "Label {} not found in code of {}, but found if with this label", + ins.label, + method_id.__repr__() + ))?; + let branch_offset = *label_addr as i32 - addr as i32; + if branch_offset > i16::MAX as i32 || branch_offset < i16::MIN as i32 { + bail!( + "Found an if that jump to far from the instruction in code of {}", + method_id.__repr__() + ); + } + let ins = ins.get_raw_ins(branch_offset as i16); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::IfLe(ins) => { + let label_addr = label_addrs.get(&ins.label).ok_or(anyhow!( + "Label {} not found in code of {}, but found if with this label", + ins.label, + method_id.__repr__() + ))?; + let branch_offset = *label_addr as i32 - addr as i32; + if branch_offset > i16::MAX as i32 || branch_offset < i16::MIN as i32 { + bail!( + "Found an if that jump to far from the instruction in code of {}", + method_id.__repr__() + ); + } + let ins = ins.get_raw_ins(branch_offset as i16); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::IfEqZ(ins) => { + let label_addr = label_addrs.get(&ins.label).ok_or(anyhow!( + "Label {} not found in code of {}, but found if with this label", + ins.label, + method_id.__repr__() + ))?; + let branch_offset = *label_addr as i32 - addr as i32; + if branch_offset > i16::MAX as i32 || branch_offset < i16::MIN as i32 { + bail!( + "Found an if that jump to far from the instruction in code of {}", + method_id.__repr__() + ); + } + let ins = ins.get_raw_ins(branch_offset as i16); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::IfNeZ(ins) => { + let label_addr = label_addrs.get(&ins.label).ok_or(anyhow!( + "Label {} not found in code of {}, but found if with this label", + ins.label, + method_id.__repr__() + ))?; + let branch_offset = *label_addr as i32 - addr as i32; + if branch_offset > i16::MAX as i32 || branch_offset < i16::MIN as i32 { + bail!( + "Found an if that jump to far from the instruction in code of {}", + method_id.__repr__() + ); + } + let ins = ins.get_raw_ins(branch_offset as i16); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::IfLtZ(ins) => { + let label_addr = label_addrs.get(&ins.label).ok_or(anyhow!( + "Label {} not found in code of {}, but found if with this label", + ins.label, + method_id.__repr__() + ))?; + let branch_offset = *label_addr as i32 - addr as i32; + if branch_offset > i16::MAX as i32 || branch_offset < i16::MIN as i32 { + bail!( + "Found an if that jump to far from the instruction in code of {}", + method_id.__repr__() + ); + } + let ins = ins.get_raw_ins(branch_offset as i16); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::IfGeZ(ins) => { + let label_addr = label_addrs.get(&ins.label).ok_or(anyhow!( + "Label {} not found in code of {}, but found if with this label", + ins.label, + method_id.__repr__() + ))?; + let branch_offset = *label_addr as i32 - addr as i32; + if branch_offset > i16::MAX as i32 || branch_offset < i16::MIN as i32 { + bail!( + "Found an if that jump to far from the instruction in code of {}", + method_id.__repr__() + ); + } + let ins = ins.get_raw_ins(branch_offset as i16); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::IfGtZ(ins) => { + let label_addr = label_addrs.get(&ins.label).ok_or(anyhow!( + "Label {} not found in code of {}, but found if with this label", + ins.label, + method_id.__repr__() + ))?; + let branch_offset = *label_addr as i32 - addr as i32; + if branch_offset > i16::MAX as i32 || branch_offset < i16::MIN as i32 { + bail!( + "Found an if that jump to far from the instruction in code of {}", + method_id.__repr__() + ); + } + let ins = ins.get_raw_ins(branch_offset as i16); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::IfLeZ(ins) => { + let label_addr = label_addrs.get(&ins.label).ok_or(anyhow!( + "Label {} not found in code of {}, but found if with this label", + ins.label, + method_id.__repr__() + ))?; + let branch_offset = *label_addr as i32 - addr as i32; + if branch_offset > i16::MAX as i32 || branch_offset < i16::MIN as i32 { + bail!( + "Found an if that jump to far from the instruction in code of {}", + method_id.__repr__() + ); + } + let ins = ins.get_raw_ins(branch_offset as i16); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::IGet(ins) => { + let field = &ins.field; + let field_idx = self.field_ids.get(field).ok_or(anyhow!( + "Field {} (field of class {}, found in code of {}) not found in dex builder", + field.__repr__(), + field.class_.__repr__(), + method_id.__repr__() + ))?; + let ins = ins.get_raw_ins(*field_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::IGetWide(ins) => { + let field = &ins.field; + let field_idx = self.field_ids.get(field).ok_or(anyhow!( + "Field {} (field of class {}, found in code of {}) not found in dex builder", + field.__repr__(), + field.class_.__repr__(), + method_id.__repr__() + ))?; + let ins = ins.get_raw_ins(*field_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::IGetObject(ins) => { + let field = &ins.field; + let field_idx = self.field_ids.get(field).ok_or(anyhow!( + "Field {} (field of class {}, found in code of {}) not found in dex builder", + field.__repr__(), + field.class_.__repr__(), + method_id.__repr__() + ))?; + let ins = ins.get_raw_ins(*field_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::IGetBoolean(ins) => { + let field = &ins.field; + let field_idx = self.field_ids.get(field).ok_or(anyhow!( + "Field {} (field of class {}, found in code of {}) not found in dex builder", + field.__repr__(), + field.class_.__repr__(), + method_id.__repr__() + ))?; + let ins = ins.get_raw_ins(*field_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::IGetByte(ins) => { + let field = &ins.field; + let field_idx = self.field_ids.get(field).ok_or(anyhow!( + "Field {} (field of class {}, found in code of {}) not found in dex builder", + field.__repr__(), + field.class_.__repr__(), + method_id.__repr__() + ))?; + let ins = ins.get_raw_ins(*field_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::IGetChar(ins) => { + let field = &ins.field; + let field_idx = self.field_ids.get(field).ok_or(anyhow!( + "Field {} (field of class {}, found in code of {}) not found in dex builder", + field.__repr__(), + field.class_.__repr__(), + method_id.__repr__() + ))?; + let ins = ins.get_raw_ins(*field_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::IGetShort(ins) => { + let field = &ins.field; + let field_idx = self.field_ids.get(field).ok_or(anyhow!( + "Field {} (field of class {}, found in code of {}) not found in dex builder", + field.__repr__(), + field.class_.__repr__(), + method_id.__repr__() + ))?; + let ins = ins.get_raw_ins(*field_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::IPut(ins) => { + let field = &ins.field; + let field_idx = self.field_ids.get(field).ok_or(anyhow!( + "Field {} (field of class {}, found in code of {}) not found in dex builder", + field.__repr__(), + field.class_.__repr__(), + method_id.__repr__() + ))?; + let ins = ins.get_raw_ins(*field_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::IPutWide(ins) => { + let field = &ins.field; + let field_idx = self.field_ids.get(field).ok_or(anyhow!( + "Field {} (field of class {}, found in code of {}) not found in dex builder", + field.__repr__(), + field.class_.__repr__(), + method_id.__repr__() + ))?; + let ins = ins.get_raw_ins(*field_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::IPutObject(ins) => { + let field = &ins.field; + let field_idx = self.field_ids.get(field).ok_or(anyhow!( + "Field {} (field of class {}, found in code of {}) not found in dex builder", + field.__repr__(), + field.class_.__repr__(), + method_id.__repr__() + ))?; + let ins = ins.get_raw_ins(*field_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::IPutBoolean(ins) => { + let field = &ins.field; + let field_idx = self.field_ids.get(field).ok_or(anyhow!( + "Field {} (field of class {}, found in code of {}) not found in dex builder", + field.__repr__(), + field.class_.__repr__(), + method_id.__repr__() + ))?; + let ins = ins.get_raw_ins(*field_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::IPutByte(ins) => { + let field = &ins.field; + let field_idx = self.field_ids.get(field).ok_or(anyhow!( + "Field {} (field of class {}, found in code of {}) not found in dex builder", + field.__repr__(), + field.class_.__repr__(), + method_id.__repr__() + ))?; + let ins = ins.get_raw_ins(*field_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::IPutChar(ins) => { + let field = &ins.field; + let field_idx = self.field_ids.get(field).ok_or(anyhow!( + "Field {} (field of class {}, found in code of {}) not found in dex builder", + field.__repr__(), + field.class_.__repr__(), + method_id.__repr__() + ))?; + let ins = ins.get_raw_ins(*field_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::IPutShort(ins) => { + let field = &ins.field; + let field_idx = self.field_ids.get(field).ok_or(anyhow!( + "Field {} (field of class {}, found in code of {}) not found in dex builder", + field.__repr__(), + field.class_.__repr__(), + method_id.__repr__() + ))?; + let ins = ins.get_raw_ins(*field_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::SGet(ins) => { + let field = &ins.field; + let field_idx = self.field_ids.get(field).ok_or(anyhow!( + "Field {} (field of class {}, found in code of {}) not found in dex builder", + field.__repr__(), + field.class_.__repr__(), + method_id.__repr__() + ))?; + let ins = ins.get_raw_ins(*field_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::SGetWide(ins) => { + let field = &ins.field; + let field_idx = self.field_ids.get(field).ok_or(anyhow!( + "Field {} (field of class {}, found in code of {}) not found in dex builder", + field.__repr__(), + field.class_.__repr__(), + method_id.__repr__() + ))?; + let ins = ins.get_raw_ins(*field_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::SGetObject(ins) => { + let field = &ins.field; + let field_idx = self.field_ids.get(field).ok_or(anyhow!( + "Field {} (field of class {}, found in code of {}) not found in dex builder", + field.__repr__(), + field.class_.__repr__(), + method_id.__repr__() + ))?; + let ins = ins.get_raw_ins(*field_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::SGetBoolean(ins) => { + let field = &ins.field; + let field_idx = self.field_ids.get(field).ok_or(anyhow!( + "Field {} (field of class {}, found in code of {}) not found in dex builder", + field.__repr__(), + field.class_.__repr__(), + method_id.__repr__() + ))?; + let ins = ins.get_raw_ins(*field_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::SGetByte(ins) => { + let field = &ins.field; + let field_idx = self.field_ids.get(field).ok_or(anyhow!( + "Field {} (field of class {}, found in code of {}) not found in dex builder", + field.__repr__(), + field.class_.__repr__(), + method_id.__repr__() + ))?; + let ins = ins.get_raw_ins(*field_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::SGetChar(ins) => { + let field = &ins.field; + let field_idx = self.field_ids.get(field).ok_or(anyhow!( + "Field {} (field of class {}, found in code of {}) not found in dex builder", + field.__repr__(), + field.class_.__repr__(), + method_id.__repr__() + ))?; + let ins = ins.get_raw_ins(*field_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::SGetShort(ins) => { + let field = &ins.field; + let field_idx = self.field_ids.get(field).ok_or(anyhow!( + "Field {} (field of class {}, found in code of {}) not found in dex builder", + field.__repr__(), + field.class_.__repr__(), + method_id.__repr__() + ))?; + let ins = ins.get_raw_ins(*field_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::SPut(ins) => { + let field = &ins.field; + let field_idx = self.field_ids.get(field).ok_or(anyhow!( + "Field {} (field of class {}, found in code of {}) not found in dex builder", + field.__repr__(), + field.class_.__repr__(), + method_id.__repr__() + ))?; + let ins = ins.get_raw_ins(*field_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::SPutWide(ins) => { + let field = &ins.field; + let field_idx = self.field_ids.get(field).ok_or(anyhow!( + "Field {} (field of class {}, found in code of {}) not found in dex builder", + field.__repr__(), + field.class_.__repr__(), + method_id.__repr__() + ))?; + let ins = ins.get_raw_ins(*field_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::SPutObject(ins) => { + let field = &ins.field; + let field_idx = self.field_ids.get(field).ok_or(anyhow!( + "Field {} (field of class {}, found in code of {}) not found in dex builder", + field.__repr__(), + field.class_.__repr__(), + method_id.__repr__() + ))?; + let ins = ins.get_raw_ins(*field_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::SPutBoolean(ins) => { + let field = &ins.field; + let field_idx = self.field_ids.get(field).ok_or(anyhow!( + "Field {} (field of class {}, found in code of {}) not found in dex builder", + field.__repr__(), + field.class_.__repr__(), + method_id.__repr__() + ))?; + let ins = ins.get_raw_ins(*field_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::SPutByte(ins) => { + let field = &ins.field; + let field_idx = self.field_ids.get(field).ok_or(anyhow!( + "Field {} (field of class {}, found in code of {}) not found in dex builder", + field.__repr__(), + field.class_.__repr__(), + method_id.__repr__() + ))?; + let ins = ins.get_raw_ins(*field_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::SPutChar(ins) => { + let field = &ins.field; + let field_idx = self.field_ids.get(field).ok_or(anyhow!( + "Field {} (field of class {}, found in code of {}) not found in dex builder", + field.__repr__(), + field.class_.__repr__(), + method_id.__repr__() + ))?; + let ins = ins.get_raw_ins(*field_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::SPutShort(ins) => { + let field = &ins.field; + let field_idx = self.field_ids.get(field).ok_or(anyhow!( + "Field {} (field of class {}, found in code of {}) not found in dex builder", + field.__repr__(), + field.class_.__repr__(), + method_id.__repr__() + ))?; + let ins = ins.get_raw_ins(*field_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::InvokeVirtual(ins) => { + let meth = &ins.method; + let meth_idx = self.method_ids.get(meth).ok_or(anyhow!( + "Method {} (method of class {}, found in code of {}) not found in dex builder", + meth.__repr__(), + meth.class_.__repr__(), + method_id.__repr__() + ))?; + debug_assert!( + *meth_idx <= u16::MAX as usize, + "methode id too big for invoke instruction" + ); + let meth_idx = *meth_idx as u16; + let ins = ins.get_raw_ins(meth_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::InvokeSuper(ins) => { + let meth = &ins.method; + let meth_idx = self.method_ids.get(meth).ok_or(anyhow!( + "Method {} (method of class {}, found in code of {}) not found in dex builder", + meth.__repr__(), + meth.class_.__repr__(), + method_id.__repr__() + ))?; + debug_assert!( + *meth_idx <= u16::MAX as usize, + "methode id too big for invoke instruction" + ); + let meth_idx = *meth_idx as u16; + let ins = ins.get_raw_ins(meth_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::InvokeDirect(ins) => { + let meth = &ins.method; + let meth_idx = self.method_ids.get(meth).ok_or(anyhow!( + "Method {} (method of class {}, found in code of {}) not found in dex builder", + meth.__repr__(), + meth.class_.__repr__(), + method_id.__repr__() + ))?; + debug_assert!( + *meth_idx <= u16::MAX as usize, + "methode id too big for invoke instruction" + ); + let meth_idx = *meth_idx as u16; + let ins = ins.get_raw_ins(meth_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::InvokeStatic(ins) => { + let meth = &ins.method; + let meth_idx = self.method_ids.get(meth).ok_or(anyhow!( + "Method {} (method of class {}, found in code of {}) not found in dex builder", + meth.__repr__(), + meth.class_.__repr__(), + method_id.__repr__() + ))?; + debug_assert!( + *meth_idx <= u16::MAX as usize, + "methode id too big for invoke instruction" + ); + let meth_idx = *meth_idx as u16; + let ins = ins.get_raw_ins(meth_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::InvokeInterface(ins) => { + let meth = &ins.method; + let meth_idx = self.method_ids.get(meth).ok_or(anyhow!( + "Method {} (method of class {}, found in code of {}) not found in dex builder", + meth.__repr__(), + meth.class_.__repr__(), + method_id.__repr__() + ))?; + debug_assert!( + *meth_idx <= u16::MAX as usize, + "methode id too big for invoke instruction" + ); + let meth_idx = *meth_idx as u16; + let ins = ins.get_raw_ins(meth_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::InvokePolymorphic(ins) => { + let meth = &ins.method; + let meth_idx = self.method_ids.get(meth).ok_or(anyhow!( + "Method {} (method of class {}, found in code of {}) not found in dex builder", + meth.__repr__(), + meth.class_.__repr__(), + method_id.__repr__() + ))?; + let proto_idx = self.proto_ids.get(&ins.proto).ok_or(anyhow!( + "Prototype {} (found in code of {}) not found in dex builder", + ins.proto.__repr__(), + method_id.__repr__() + ))?; + debug_assert!( + *meth_idx <= u16::MAX as usize, + "methode id too big for invoke instruction" + ); + debug_assert!( + *proto_idx <= u16::MAX as usize, + "proto id too big for invoke instruction" + ); + let meth_idx = *meth_idx as u16; + let proto_idx = *proto_idx as u16; + let ins = ins.get_raw_ins(meth_idx, proto_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::InvokeCustom(ins) => { + let call_site_idx = self.call_site_ids.len(); + self.insert_call_site_item(&ins.call_site)?; + let ins = ins.get_raw_ins(call_site_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::ConstMethodHandle(ins) => { + let method_handle_idx = self.method_handles.len(); + self.insert_method_handle(&ins.handle)?; + let ins = ins.get_raw_ins(method_handle_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::ConstMethodType(ins) => { + let proto_idx = self.proto_ids.get(&ins.proto).ok_or(anyhow!( + "Prototype {} (found in code of {}) not found in dex builder", + ins.proto.__repr__(), + method_id.__repr__() + ))?; + let ins = ins.get_raw_ins(*proto_idx); + addr += ins.size() / 2; + insns.push(ins); + } + Instruction::Try(try_) => { + let end_block_addr = *label_addrs.get(&try_.end_label).ok_or(anyhow!( + "Label {} not found in code of {}, but found try with this label", + &try_.end_label, + method_id.__repr__() + ))?; + if end_block_addr < addr { + bail!( + "Found end label of a try block before the try instruction in code of {}", + method_id.__repr__() + ) + } + let try_item = TryItem { + start_addr: addr as u32, + insn_count: (end_block_addr - addr) as u16, + handler_off: handler_off as u16, // will be ajusted once the size of the + // handler list object is known + }; + tries.push(try_item); + let mut catches = EncodedCatchHandler { + handlers: vec![], + catch_all_addr: None, + }; + for (ty, label) in &try_.handlers { + let type_idx = Uleb128(*self.type_ids.get(ty).ok_or(anyhow!( + "Could not found type {} captured by a try block in {}\ + in the dex builder", + ty.__repr__(), + method_id.__repr__() + ))? as u32); + let addr = Uleb128(*label_addrs.get(label).ok_or(anyhow!( + "Label {} not found in code of {}, but found try \ + with this label as catch for type {}", + &try_.end_label, + method_id.__repr__(), + ty.__repr__(), + ))? as u32); + catches + .handlers + .push(EncodedTypeAddrPair { type_idx, addr }); + } + if let Some(ref label) = try_.default_handler { + let catch_all_addr = *label_addrs.get(label).ok_or(anyhow!( + "Label {} not found in code of {}, but found try \ + with this label as catch all", + &try_.end_label, + method_id.__repr__() + ))?; + catches.catch_all_addr = Some(Uleb128(catch_all_addr as u32)); + } + handler_off += catches.size(); + handlers.list.push(catches); + } + Instruction::Label(_) => (), + _ => { + let ins = ins.get_raw_ins().with_context(|| { + format!( + "Failed to convert instruction {} (found in code of {}) to raw instruction", + ins.__repr__(), + method_id.__repr__() + ) + })?; + addr += ins.size() / 2; + insns.push(ins); + } + } + } + if addr % 2 != 0 { + // make sure the payload section is 4 bytes aligned + let nop = Instruction::Nop(Nop).get_raw_ins()?; + //addr += nop.size() / 2; + insns.push(nop); + } + insns.extend(payloads); + + for try_ in &mut tries { + try_.handler_off += handlers.size_field().size() as u16; + } + + let debug_info_off = if code.debug_info.1.is_empty() && code.parameter_names.is_none() { + 0 + } else { + let debug_info_off = self + .section_manager + .get_aligned_size(Section::DebugInfoItem); + let mut cursor = Cursor::new(code.debug_info.1); + let mut item = DebugInfoItem { + line_start: Uleb128(code.debug_info.0), + parameter_names: vec![], + bytecode: Vec::::deserialize(&mut cursor, DbgBytecode::EndSequence)?, + }; + if let Some(parameter_names) = code.parameter_names { + for name in ¶meter_names { + if let Some(name) = name { + item.parameter_names + .push(Uleb128p1(*self.strings.get(name).ok_or(anyhow!( + "String {} (name of param of {}) not found", + name.__str__(), + method_id.__repr__() + ))? as u32)); + } else { + item.parameter_names.push(NO_INDEX); + } + } + } + self.section_manager + .add_elt(Section::DebugInfoItem, Some(item.size())); + self.debug_info_items.push(item); + debug_info_off + 1 + }; + let handlers = if handlers.list.is_empty() { + None + } else { + Some(handlers) + }; + let item = CodeItem { + registers_size: code.registers_size, + ins_size: code.ins_size, + outs_size: code.outs_size, + debug_info_off, // linked in link_debug_info() + insns, + tries, + handlers, + }; + self.section_manager + .add_elt(Section::CodeItem, Some(item.size())); + self.code_items.push(item); + Ok(()) + } + + /// Insert annotation associated to a class. + /// + + /// Insert a class_data_item in the class_data section (in data). + /// + /// # Note + /// + /// code_item objects are 4 bytes aligns, so their offset cannot be odd. + /// + /// To distinguish prelinked value (offset inside the code_item section) to actual values (offset + /// in the whole file or 0), their value is set to the actual value prelink value + 1. This allow + /// to distinguish the offset of the first item (equal to zero before linking) and the value + /// 0 used to indicate an abscence of item. + fn insert_class_data_item(&mut self, class_id: &IdType) -> Result<()> { + let mut data = ClassDataItem::default(); + let (class, _) = self.class_defs.get(class_id).unwrap(); + + let mut static_fields: Vec = class.static_fields.keys().cloned().collect(); + static_fields.sort(); + let mut last_field_id = 0; + for id in &static_fields { + let idx = self.field_ids.get(id).ok_or(anyhow!( + "Field {} (field of class {}) not found in dex builder", + id.__repr__(), + class.__repr__() + ))?; + let field_idx_diff = Uleb128((idx - last_field_id) as u32); + last_field_id = *idx; + let access_flags = Uleb128(class.static_fields.get(id).unwrap().get_raw_access_flags()); + data.static_fields.push(EncodedField { + field_idx_diff, + access_flags, + }); + } + + let mut instance_fields: Vec = class.instance_fields.keys().cloned().collect(); + instance_fields.sort(); + let mut last_field_id = 0; + for id in &instance_fields { + let idx = self.field_ids.get(id).ok_or(anyhow!( + "Field {} (field of class {}) not found in dex builder", + id.__repr__(), + class.__repr__() + ))?; + let field_idx_diff = Uleb128((idx - last_field_id) as u32); + last_field_id = *idx; + let access_flags = Uleb128( + class + .instance_fields + .get(id) + .unwrap() + .get_raw_access_flags(), + ); + data.instance_fields.push(EncodedField { + field_idx_diff, + access_flags, + }); + } + + let mut direct_methods: Vec = class.direct_methods.keys().cloned().collect(); + direct_methods.sort(); + let mut last_method_id = 0; + for id in &direct_methods { + // &mut vs & of self and class make things difficult... + let (class, _) = self.class_defs.get(class_id).unwrap(); + let idx = self.method_ids.get(id).ok_or(anyhow!( + "Method {} (method of class {}) not found in dex builder", + id.__repr__(), + class.__repr__() + ))?; + let method_idx_diff = Uleb128((idx - last_method_id) as u32); + last_method_id = *idx; + let access_flags = + Uleb128(class.direct_methods.get(id).unwrap().get_raw_access_flags()); + // No if let because ownership gunfooterie + let code_off = if class.direct_methods.get(id).unwrap().code.is_some() { + let code_off = self.section_manager.get_aligned_size(Section::CodeItem); + self.insert_code_item(id.clone(), true)?; + Uleb128(code_off + 1) + } else { + Uleb128(0) + }; + data.direct_methods.push(EncodedMethod { + method_idx_diff, + access_flags, + code_off, // Will be relinked once the offset of the code item section is known + }); + } + + let (class, _) = self.class_defs.get(class_id).unwrap(); + let mut virtual_methods: Vec = class.virtual_methods.keys().cloned().collect(); + virtual_methods.sort(); + let mut last_method_id = 0; + for id in &virtual_methods { + let (class, _) = self.class_defs.get(class_id).unwrap(); + let idx = self.method_ids.get(id).ok_or(anyhow!( + "Method {} (method of class {}) not found in dex builder", + id.__repr__(), + class.__repr__() + ))?; + let method_idx_diff = Uleb128((idx - last_method_id) as u32); + last_method_id = *idx; + let access_flags = Uleb128( + class + .virtual_methods + .get(id) + .unwrap() + .get_raw_access_flags(), + ); + // No if let because ownership gunfooterie + let code_off = if class.virtual_methods.get(id).unwrap().code.is_some() { + let code_off = self.section_manager.get_aligned_size(Section::CodeItem); + self.insert_code_item(id.clone(), false)?; + Uleb128(code_off + 1) + } else { + Uleb128(0) + }; + data.virtual_methods.push(EncodedMethod { + method_idx_diff, + access_flags, + code_off, // Will be relinked once the offset of the code item section is known + }); + } + self.section_manager + .add_elt(Section::ClassDataItem, Some(data.size())); + //assert_eq!(data.size(), data.serialize_to_vec().unwrap().len()); + self.class_data_list.push(data); + Ok(()) + } + + /// Insert a [`MethodHandle`]. + pub fn insert_method_handle(&mut self, handle: &MethodHandle) -> Result<()> { + let (field_or_method_id, method_handle_type) = match handle { + MethodHandle::StaticPut(StaticPut(field)) => ( + *self.field_ids.get(field).ok_or(anyhow!( + "Field {} not found in dex writer", + field.__repr__() + ))? as u16, + MethodHandleType::StaticPut, + ), + MethodHandle::StaticGet(StaticGet(field)) => ( + *self.field_ids.get(field).ok_or(anyhow!( + "Field {} not found in dex writer", + field.__repr__() + ))? as u16, + MethodHandleType::StaticGet, + ), + MethodHandle::InstancePut(InstancePut(field)) => ( + *self.field_ids.get(field).ok_or(anyhow!( + "Field {} not found in dex writer", + field.__repr__() + ))? as u16, + MethodHandleType::InstancePut, + ), + MethodHandle::InstanceGet(InstanceGet(field)) => ( + *self.field_ids.get(field).ok_or(anyhow!( + "Field {} not found in dex writer", + field.__repr__() + ))? as u16, + MethodHandleType::InstanceGet, + ), + MethodHandle::InvokeStatic(InvokeStatic(meth)) => ( + *self.method_ids.get(meth).ok_or(anyhow!( + "Method {} not found in dex writer", + meth.__repr__() + ))? as u16, + MethodHandleType::InvokeStatic, + ), + MethodHandle::InvokeInstance(InvokeInstance(meth)) => ( + *self.method_ids.get(meth).ok_or(anyhow!( + "Method {} not found in dex writer", + meth.__repr__() + ))? as u16, + MethodHandleType::InvokeInstance, + ), + MethodHandle::InvokeConstructor(InvokeConstructor(meth)) => ( + *self.method_ids.get(meth).ok_or(anyhow!( + "Method {} not found in dex writer", + meth.__repr__() + ))? as u16, + MethodHandleType::InvokeConstructor, + ), + MethodHandle::InvokeDirect(InvokeDirect(meth)) => ( + *self.method_ids.get(meth).ok_or(anyhow!( + "Method {} not found in dex writer", + meth.__repr__() + ))? as u16, + MethodHandleType::InvokeDirect, + ), + MethodHandle::InvokeInterface(InvokeInterface(meth)) => ( + *self.method_ids.get(meth).ok_or(anyhow!( + "Method {} not found in dex writer", + meth.__repr__() + ))? as u16, + MethodHandleType::InvokeInterface, + ), + }; + self.method_handles.push(MethodHandleItem { + method_handle_type, + field_or_method_id, + unused1: 0, + unused2: 0, + }); + Ok(()) + } + + /// Convert a [`DexValue`] to an [`EncodedValue`]. + /// + /// # Warning + /// + /// This method can insert element in the dex file like method_handles. + pub fn dex_value_to_encoded_value(&mut self, value: &DexValue) -> Result { + match value { + DexValue::Byte(DexByte(val)) => Ok(EncodedValue::Byte(*val)), + DexValue::Short(DexShort(val)) => Ok(EncodedValue::Short(*val)), + DexValue::Char(DexChar(val)) => Ok(EncodedValue::Char(*val)), + DexValue::Int(DexInt(val)) => Ok(EncodedValue::Int(*val)), + DexValue::Long(DexLong(val)) => Ok(EncodedValue::Long(*val)), + DexValue::Float(DexFloat(val)) => Ok(EncodedValue::Float(*val)), + DexValue::Double(DexDouble(val)) => Ok(EncodedValue::Double(*val)), + DexValue::MethodType(val) => Ok(EncodedValue::MethodType( + *self.proto_ids.get(val).ok_or(anyhow!( + "Prototype {} not found in dex writer", + val.__repr__() + ))? as u32, + )), + DexValue::MethodHandle(val) => { + // TODO: move to a method + let idx = self.method_handles.len() as u32; + self.insert_method_handle(val)?; + Ok(EncodedValue::MethodHandle(idx)) + } + DexValue::String(val) => Ok(EncodedValue::String( + *self + .strings + .get(val) + .ok_or(anyhow!("String {} not found in dex writer", val.__repr__()))? + as u32, + )), + DexValue::Type(val) => Ok(EncodedValue::Type( + *self + .type_ids + .get(val) + .ok_or(anyhow!("Type {} not found in dex writer", val.__repr__()))? + as u32, + )), + DexValue::Field(val) => Ok(EncodedValue::Field( + *self + .field_ids + .get(val) + .ok_or(anyhow!("Field {} not found in dex writer", val.__repr__()))? + as u32, + )), + DexValue::Method(val) => Ok(EncodedValue::Method( + *self + .method_ids + .get(val) + .ok_or(anyhow!("Method {} not found in dex writer", val.__repr__()))? + as u32, + )), + DexValue::Enum(IdEnum(val)) => Ok(EncodedValue::Enum( + *self + .field_ids + .get(val) + .ok_or(anyhow!("Field {} not found in dex writer", val.__repr__()))? + as u32, + )), + DexValue::Array(DexArray(arr)) => { + let mut values = vec![]; + for val in arr { + values.push( + self.dex_value_to_encoded_value(val) + .context("Error while serializing a array")?, + ); + } + Ok(EncodedValue::Array(EncodedArray { values })) + } + DexValue::Annotation(val) => Ok(EncodedValue::Annotation( + self.dex_annotation_to_encoded_annotation(val.clone())?, + )), + DexValue::Null(DexNull) => Ok(EncodedValue::Null), + DexValue::Boolean(DexBoolean(val)) => Ok(EncodedValue::Boolean(*val)), + } + } + + /// Insert an encoded_array in the encoded_array_item section. + fn insert_encoded_array_item(&mut self, DexArray(array): DexArray) -> Result<()> { + let mut values = vec![]; + for value in array { + values.push(self.dex_value_to_encoded_value(&value)?); + } + let item = EncodedArrayItem { + value: EncodedArray { values }, + }; + self.section_manager + .add_elt(Section::EncodedArrayItem, Some(item.size())); + self.encoded_array_items.push(item); + Ok(()) + } + + /// Insert a [`CallSite`] to the encoded array items + /// + /// # Warning + /// + /// This method can insert element in the dex file like method_handles. + pub fn insert_call_site_item(&mut self, call_site: &CallSite) -> Result<()> { + let mut values = vec![]; + values.push(DexValue::MethodHandle(call_site.method_handle.clone())); + values.push(DexValue::String(call_site.name.clone())); + values.push(DexValue::MethodType(call_site.type_.clone())); + values.extend(call_site.args.iter().cloned()); + self.call_site_ids.push(CallSiteIdItem { + call_site_off: self + .section_manager + .get_aligned_size(Section::EncodedArrayItem), + }); // linked in link_call_site_ids() + self.section_manager.add_elt(Section::CallSiteIdItem, None); + self.insert_encoded_array_item(DexArray(values)) + } + + /// Insert the encoded_array_item encoding the static_values of a class. + fn insert_class_static_values(&mut self, class_id: &IdType) -> Result<()> { + let (class, _) = self.class_defs.get(class_id).unwrap(); + let mut static_fields: Vec = class.static_fields.keys().cloned().collect(); + static_fields.sort(); + let mut array = vec![]; + let mut last_defined_field_index = 0; + for (idx, f) in static_fields.iter().enumerate() { + if class.static_fields.get(f).unwrap().value.is_some() { + last_defined_field_index = idx; + } + } + for f in &static_fields[..=last_defined_field_index] { + let field = class.static_fields.get(f).unwrap(); + if let Some(val) = field.value.as_ref() { + array.push(val.clone()); + } else { + array.push(field.descriptor.type_.get_default_value().ok_or(anyhow!( + "The type {} (for field {} in class {}) does not have a default value", + field.descriptor.type_.__repr__(), + field.descriptor.__repr__(), + class_id.__repr__() + ))?); + } + } + self.insert_encoded_array_item(DexArray(array)) + .with_context(|| { + format!( + "Failed to serialize static values of class {}", + class_id.__repr__() + ) + }) + } + + fn dex_annotation_to_encoded_annotation( + &mut self, + DexAnnotation { type_, elements }: DexAnnotation, + ) -> Result { + let mut encoded_elements = vec![]; + + let mut elements_names: Vec<_> = elements.keys().collect(); + elements_names.sort(); + + for name in elements_names { + let elt = elements.get(name).unwrap(); + encoded_elements.push(AnnotationElement { + name_idx: Uleb128(*self.strings.get(name).ok_or(anyhow!( + "{} (annotation element name) not found in dex builder", + name.__str__() + ))? as u32), + value: self.dex_value_to_encoded_value(elt)?, + }); + } + Ok(EncodedAnnotation { + type_idx: Uleb128(*self.type_ids.get(&type_).ok_or(anyhow!( + "Annotation type {} not found in dex builder", + type_.__repr__(), + ))? as u32), + elements: encoded_elements, + }) + } + + /// Insert the annnotations set for a class. + fn insert_class_annotation_set(&mut self, class_id: &IdType) -> Result<()> { + let (class, _) = self.class_defs.get(class_id).unwrap(); + let mut annotations = class.annotations.clone(); + + let mut annotation_set = AnnotationSetItem { entries: vec![] }; + annotations.sort_by_key(|annot| annot.annotation.type_.clone()); + for annot in annotations { + annotation_set.entries.push(AnnotationOffItem { + annotation_off: self + .section_manager + .get_aligned_size(Section::AnnotationItem), + }); // linked in link_annotations() + + let item = AnnotationItem { + visibility: match ( + annot.visibility_build, + annot.visibility_runtime, + annot.visibility_system, + ) { + (true, false, false) => AnnotationVisibility::Build, + (false, true, false) => AnnotationVisibility::Runtime, + (false, false, true) => AnnotationVisibility::System, + _ => bail!( + "Annotation need visibility set to one and only one of build, \ + runtime or system" + ), // TODO: check if this is true + }, + annotation: self.dex_annotation_to_encoded_annotation(annot.annotation)?, + }; + self.section_manager + .add_elt(Section::AnnotationItem, Some(item.size())); + self.annotation_items.push(item); + } + self.section_manager + .add_elt(Section::AnnotationSetItem, Some(annotation_set.size())); + self.annotation_set_items.push(annotation_set); + Ok(()) + } + + /// Insert the annnotations set for a class. + fn insert_field_annotation_set( + &mut self, + field_id: &IdField, + is_static_field: bool, + ) -> Result<()> { + let (class, _) = self.class_defs.get(&field_id.class_).unwrap(); + let field = if is_static_field { + class.static_fields.get(field_id).unwrap() + } else { + class.instance_fields.get(field_id).unwrap() + }; + let mut annotations = field.annotations.clone(); + + let mut annotation_set = AnnotationSetItem { entries: vec![] }; + annotations.sort_by_key(|annot| annot.annotation.type_.clone()); + for annot in annotations { + annotation_set.entries.push(AnnotationOffItem { + annotation_off: self + .section_manager + .get_aligned_size(Section::AnnotationItem), + }); // linked in link_annotations() + + let item = AnnotationItem { + visibility: match ( + annot.visibility_build, + annot.visibility_runtime, + annot.visibility_system, + ) { + (true, false, false) => AnnotationVisibility::Build, + (false, true, false) => AnnotationVisibility::Runtime, + (false, false, true) => AnnotationVisibility::System, + _ => bail!( + "Annotation need visibility set to one and only one of build, \ + runtime or system" + ), // TODO: check if this is true + }, + annotation: self.dex_annotation_to_encoded_annotation(annot.annotation)?, + }; + self.section_manager + .add_elt(Section::AnnotationItem, Some(item.size())); + self.annotation_items.push(item); + } + self.section_manager + .add_elt(Section::AnnotationSetItem, Some(annotation_set.size())); + self.annotation_set_items.push(annotation_set); + Ok(()) + } + + /// Insert the annnotations set for a method (but not the parameters annotations). + fn insert_method_annotation_set( + &mut self, + method_id: &IdMethod, + is_direct_method: bool, + ) -> Result<()> { + let (class, _) = self.class_defs.get(&method_id.class_).unwrap(); + let method = if is_direct_method { + class.direct_methods.get(method_id).unwrap() + } else { + class.virtual_methods.get(method_id).unwrap() + }; + let mut annotations = method.annotations.clone(); + + let mut annotation_set = AnnotationSetItem { entries: vec![] }; + annotations.sort_by_key(|annot| annot.annotation.type_.clone()); + for annot in annotations { + annotation_set.entries.push(AnnotationOffItem { + annotation_off: self + .section_manager + .get_aligned_size(Section::AnnotationItem), + }); // linked in link_annotations() + + let item = AnnotationItem { + visibility: match ( + annot.visibility_build, + annot.visibility_runtime, + annot.visibility_system, + ) { + (true, false, false) => AnnotationVisibility::Build, + (false, true, false) => AnnotationVisibility::Runtime, + (false, false, true) => AnnotationVisibility::System, + _ => bail!( + "Annotation need visibility set to one and only one of build, \ + runtime or system" + ), // TODO: check if this is true + }, + annotation: self.dex_annotation_to_encoded_annotation(annot.annotation)?, + }; + self.section_manager + .add_elt(Section::AnnotationItem, Some(item.size())); + self.annotation_items.push(item); + } + self.section_manager + .add_elt(Section::AnnotationSetItem, Some(annotation_set.size())); + self.annotation_set_items.push(annotation_set); + Ok(()) + } + + /// Insert the annotations set for a method parameter. + fn insert_parameters_annotation_set( + &mut self, + method_id: &IdMethod, + is_direct_method: bool, + parameter_idx: usize, + ) -> Result<()> { + let (class, _) = self.class_defs.get(&method_id.class_).unwrap(); + let method = if is_direct_method { + class.direct_methods.get(method_id).unwrap() + } else { + class.virtual_methods.get(method_id).unwrap() + }; + let mut annotations = method.parameters_annotations[parameter_idx].clone(); + + let mut annotation_set = AnnotationSetItem { entries: vec![] }; + annotations.sort_by_key(|annot| annot.annotation.type_.clone()); + for annot in annotations { + annotation_set.entries.push(AnnotationOffItem { + annotation_off: self + .section_manager + .get_aligned_size(Section::AnnotationItem), + }); // linked in link_annotations() + + let item = AnnotationItem { + visibility: match ( + annot.visibility_build, + annot.visibility_runtime, + annot.visibility_system, + ) { + (true, false, false) => AnnotationVisibility::Build, + (false, true, false) => AnnotationVisibility::Runtime, + (false, false, true) => AnnotationVisibility::System, + _ => bail!( + "Annotation need visibility set to one and only one of build, \ + runtime or system" + ), // TODO: check if this is true + }, + annotation: self.dex_annotation_to_encoded_annotation(annot.annotation)?, + }; + self.section_manager + .add_elt(Section::AnnotationItem, Some(item.size())); + self.annotation_items.push(item); + } + self.section_manager + .add_elt(Section::AnnotationSetItem, Some(annotation_set.size())); + self.annotation_set_items.push(annotation_set); + Ok(()) + } + + /// Insert the annotations set list for a method parameters. + fn insert_parameters_annotation_set_list( + &mut self, + method_id: &IdMethod, + is_direct_method: bool, + ) -> Result<()> { + let mut list = AnnotationSetRefList { list: vec![] }; + let (class, _) = self.class_defs.get(&method_id.class_).unwrap(); + let method = if is_direct_method { + class.direct_methods.get(method_id).unwrap() + } else { + class.virtual_methods.get(method_id).unwrap() + }; + let param_has_annotation: Vec<_> = method + .parameters_annotations + .iter() + .map(|annots| !annots.is_empty()) + .collect(); + for (param_idx, has_annotation) in param_has_annotation.into_iter().enumerate() { + list.list.push(AnnotationSetRefItem { + annotations_off: if has_annotation { + let annotation_off = self + .section_manager + .get_aligned_size(Section::AnnotationSetItem); + self.insert_parameters_annotation_set(method_id, is_direct_method, param_idx)?; + annotation_off + 1 + } else { + 0 + }, // linked in link_annotations() + }); + } + + self.section_manager + .add_elt(Section::AnnotationSetRefList, Some(list.size())); + self.annotation_set_lists.push(list); + Ok(()) + } + + /// Insert a class annotations (including field, methods and parameters annotations). + /// + /// # Note + /// + /// annotation_set_item objects are 4 bytes aligns, so their offset cannot be odd. + /// + /// To distinguish prelinked value (offset inside the code_item section) to actual values (offset + /// in the whole file or 0), their value is set to the actual value prelink value + 1. This allow + /// to distinguish the offset of the first item (equal to zero before linking) and the value + /// 0 used to indicate an abscence of item. + fn insert_annotations(&mut self, class_id: &IdType) -> Result<()> { + let (class, _) = self.class_defs.get(class_id).unwrap(); + let class_annotations_off = if !class.annotations.is_empty() { + let class_annotations_off = self + .section_manager + .get_aligned_size(Section::AnnotationSetItem); + self.insert_class_annotation_set(class_id) + .with_context(|| { + format!( + "Failed to insert class annotation for class {}", + class_id.__repr__() + ) + })?; + class_annotations_off + 1 + } else { + 0 + }; + + let mut field_ids = vec![]; + let (class, _) = self.class_defs.get(class_id).unwrap(); + field_ids.extend(class.static_fields.keys().cloned()); + field_ids.extend(class.instance_fields.keys().cloned()); + field_ids.sort(); + let mut field_annotations = vec![]; + for field_id in field_ids { + let (class, _) = self.class_defs.get(class_id).unwrap(); + let static_field = class.static_fields.get(&field_id); + let instance_field = class.instance_fields.get(&field_id); + let (is_static, field) = match (static_field, instance_field) { + (Some(field), None) => (true, field), + (None, Some(field)) => (false, field), + _ => bail!( + "Unexpected configuration: field {} is both a static and a instance field in {}", + field_id.__repr__(), class_id.__repr__()), + }; + if !field.annotations.is_empty() { + let annotations_off = self + .section_manager + .get_aligned_size(Section::AnnotationSetItem) + + 1; + self.insert_field_annotation_set(&field_id, is_static)?; + field_annotations.push(FieldAnnotation { + field_idx: *self.field_ids.get(&field_id).ok_or(anyhow!( + "Field {} in {} not found in dex builder", + field_id.__repr__(), + class_id.__repr__(), + ))? as u32, + annotations_off, // linked in link_annotations() + }); + } + } + + let mut method_ids = vec![]; + let (class, _) = self.class_defs.get(class_id).unwrap(); + method_ids.extend(class.direct_methods.keys().cloned()); + method_ids.extend(class.virtual_methods.keys().cloned()); + method_ids.sort(); + let mut method_annotations = vec![]; + for method_id in &method_ids { + let (class, _) = self.class_defs.get(class_id).unwrap(); + let direct_method = class.direct_methods.get(method_id); + let virtual_method = class.virtual_methods.get(method_id); + let (is_direct, method) = match (direct_method, virtual_method) { + (Some(method), None) => (true, method), + (None, Some(method)) => (false, method), + _ => bail!( + "Unexpected configuration: method {} is both a direct and a virtual method in {}", + method_id.__repr__(), class_id.__repr__()), + }; + if !method.annotations.is_empty() { + let annotations_off = self + .section_manager + .get_aligned_size(Section::AnnotationSetItem) + + 1; + self.insert_method_annotation_set(method_id, is_direct)?; + method_annotations.push(MethodAnnotation { + method_idx: *self.method_ids.get(method_id).ok_or(anyhow!( + "Method {} in {} not found in dex builder", + method_id.__repr__(), + class_id.__repr__(), + ))? as u32, + annotations_off, // linked in link_annotations() + }); + } + } + + let mut parameter_annotations = vec![]; + for method_id in method_ids { + let (class, _) = self.class_defs.get(class_id).unwrap(); + let direct_method = class.direct_methods.get(&method_id); + let virtual_method = class.virtual_methods.get(&method_id); + let (is_direct, method) = match (direct_method, virtual_method) { + (Some(method), None) => (true, method), + (None, Some(method)) => (false, method), + _ => bail!( + "Unexpected configuration: method {} is both a direct and a virtual method in {}", + method_id.__repr__(), class_id.__repr__()), + }; + if !method.parameters_annotations.is_empty() { + let annotations_off = self + .section_manager + .get_aligned_size(Section::AnnotationSetRefList) + + 1; + self.insert_parameters_annotation_set_list(&method_id, is_direct)?; + parameter_annotations.push(ParameterAnnotation { + method_idx: *self.method_ids.get(&method_id).ok_or(anyhow!( + "Method {} in {} not found in dex builder", + method_id.__repr__(), + class_id.__repr__(), + ))? as u32, + annotations_off, // linked in link_annotations() + }); + } + } + + let item = AnnotationsDirectoryItem { + class_annotations_off, // linked in link_annotations() + field_annotations, + method_annotations, + parameter_annotations, + }; + self.section_manager + .add_elt(Section::AnnotationsDirectoryItem, Some(item.size())); + self.annotations_directory_items.push(item); + Ok(()) + } + + /// Insert a class_def_item in the class_defs section **and** the other struct that needs to be + /// generated on the fly. + /// + /// # Warning + /// + /// The class_defs section **MUST** be sorted by inheritance dependencies (parents classes and + /// interfaces must appear **before** child classes). Accordingly, this method must be invoked + /// in the right order. + /// + /// # Note + /// + /// annotations_directory_item, encoded_array_item and class_data_item objects are 4 bytes + /// aligns, so their offset cannot be odd. + /// + /// To distinguish prelinked value (offset inside the code_item section) to actual values (offset + /// in the whole file or 0), their value is set to the actual value prelink value + 1. This allow + /// to distinguish the offset of the first item (equal to zero before linking) and the value + /// 0 used to indicate an abscence of item. + fn insert_class_def_item(&mut self, class_id: &IdType) -> Result<()> { + let idx = self.class_defs_list.len(); + self.class_defs + .entry(class_id.clone()) + .and_modify(|(_, i)| *i = idx); + let (class, _) = self.class_defs.get(class_id).unwrap(); + let class_data_off = if class.has_data_item() { + let class_data_off = self + .section_manager + .get_aligned_size(Section::ClassDataItem); + self.insert_class_data_item(class_id)?; + class_data_off + 1 + } else { + 0 + }; + // & vs &mut cluster-f, this make rust drop the ref so self hold by `class` before + // mutating self with `insert_class_data_item`, and get a new ref afterward + let (class, _) = self.class_defs.get(class_id).unwrap(); + let static_values_off = if class.has_static_values_array() { + let static_values_off = self + .section_manager + .get_aligned_size(Section::EncodedArrayItem); + self.insert_class_static_values(class_id)?; + static_values_off + 1 + } else { + 0 + }; + let (class, _) = self.class_defs.get(class_id).unwrap(); + let annotations_off = if class.has_annotations() { + let annotations_off = self + .section_manager + .get_aligned_size(Section::AnnotationsDirectoryItem); + self.insert_annotations(class_id)?; + annotations_off + 1 + } else { + 0 + }; + let (class, _) = self.class_defs.get(class_id).unwrap(); + self.class_defs_list.push(ClassDefItem { + class_idx: *self.type_ids.get(class_id).ok_or(anyhow!( + "Type {} (type of class {}) not found in dex builder", + class_id.__repr__(), + class.__repr__() + ))? as u32, + access_flags: class.get_raw_access_flags(), + superclass_idx: if let Some(sup) = &class.superclass { + *self.type_ids.get(sup).ok_or(anyhow!( + "Type {} (superclass of class {}) not found in dex builder", + sup.__repr__(), + class.__repr__() + ))? as u32 + } else { + NO_INDEX.0 + }, + interfaces_off: 0, + source_file_idx: if let Some(file) = &class.source_file { + *self.strings.get(file).ok_or(anyhow!( + "String {} (source file of class {}) not found in dex builder", + file.__repr__(), + class.__repr__() + ))? as u32 + } else { + NO_INDEX.0 + }, + + annotations_off, // need relinking once offset of class_def_item section is known + class_data_off, // need relinking once offset of class_data section is known + static_values_off, // need relinking once offset of encoded_array section is known + }); + self.section_manager.add_elt(Section::ClassDefItem, None); + Ok(()) + } + + fn gen_type_list_section(&mut self) -> Result<()> { + debug!("Generate the type_list section"); + // Collect all type lists + for proto in self.proto_ids.keys() { + if !proto.parameters.is_empty() { + let type_list = self.gen_type_list(&proto.parameters).with_context(|| { + format!("Failed to generate param list for {}", proto.__repr__()) + })?; + self.type_lists_index.insert(type_list, 0); + } + } + for (class, _) in self.class_defs.values() { + if !class.interfaces.is_empty() { + let type_list = self.gen_type_list(&class.interfaces).with_context(|| { + format!("Failed to generate interface list for {}", class.__repr__()) + })?; + self.type_lists_index.insert(type_list, 0); + } + } + + // safe type lists with their offset in the section + let mut offset = 0; + for (i, (list, idx)) in self.type_lists_index.iter_mut().enumerate() { + while offset % 4 != 0 { + // Alignment + self.section_manager.incr_section_size(Section::TypeList, 1); + offset += 1; + } + *idx = i; + self.type_lists_with_offset.push((list.clone(), offset)); + self.section_manager + .add_elt(Section::TypeList, Some(list.size())); + offset += list.size() as u32; + } + // The next section requires alignment to 4 + while offset % 4 != 0 { + // Alignment + self.section_manager.incr_section_size(Section::TypeList, 1); + offset += 1; + } + Ok(()) + } + + /// Generate the map list. + /// + /// # Warning + /// + /// All sections must be generated (but not linked) before generating the map list. + /// + /// This method switch the section manager from edit mode to read only. + fn gen_map_list(&mut self) -> Result<()> { + debug!("Generate the map_list"); + // Get the size of a map item + let map_item_size = 12; /* = MapItem { + type_: MapItemType::HeaderItem, + unused: 0, + size: 0, + offset: 0, + } + .size(); */ + // Empty map has a size 4, then we add the size of a MapItem for each element + // The size of the map_list must be computed before generating the map list, + // as it affect the offset of some sections. + self.section_manager.add_elt(Section::MapList, Some(4)); + for section in Section::VARIANT_LIST { + if !section.is_data() && self.section_manager.get_nb_elt(*section) != 0 { + self.section_manager + .incr_section_size(Section::MapList, map_item_size); + } + } + // All sections are knowns and should not be eddited anymore + self.section_manager.finalize_sections(); + for section in Section::VARIANT_LIST { + if !section.is_data() && self.section_manager.get_nb_elt(*section) != 0 { + /* + match section { + // Alignment + // Until Section::MapList included, the section are naturally alligned to 4 + _ => (), + } + */ + self.map_list.list.push(MapItem { + type_: section.get_map_item_type(), + unused: 0, + size: self.section_manager.get_nb_elt(*section) as u32, + offset: self.section_manager.get_offset(*section), + }); + } + } + Ok(()) + } + + /// Link the offsets in the header. + /// + /// # Warning + /// + /// Linking can only occur once all sections are entirelly generated. + fn link_header(&mut self) { + debug!("Link the header section"); + self.header.map_off = self.section_manager.get_offset(Section::MapList); + self.header.string_ids_size = self.section_manager.get_nb_elt(Section::StringIdItem) as u32; + self.header.string_ids_off = self.section_manager.get_offset(Section::StringIdItem); + self.header.type_ids_size = self.section_manager.get_nb_elt(Section::TypeIdItem) as u32; + self.header.type_ids_off = self.section_manager.get_offset(Section::TypeIdItem); + self.header.proto_ids_size = self.section_manager.get_nb_elt(Section::ProtoIdItem) as u32; + self.header.proto_ids_off = self.section_manager.get_offset(Section::ProtoIdItem); + self.header.field_ids_size = self.section_manager.get_nb_elt(Section::FieldIdItem) as u32; + self.header.field_ids_off = self.section_manager.get_offset(Section::FieldIdItem); + self.header.method_ids_size = self.section_manager.get_nb_elt(Section::MethodIdItem) as u32; + self.header.method_ids_off = self.section_manager.get_offset(Section::MethodIdItem); + self.header.class_defs_size = self.section_manager.get_nb_elt(Section::ClassDefItem) as u32; + self.header.class_defs_off = self.section_manager.get_offset(Section::ClassDefItem); + self.header.data_size = self.section_manager.get_unaligned_size(Section::Data); + self.header.data_off = self.section_manager.get_offset(Section::Data); + } + + /// Link the offsets in the call site id items. + /// + /// # Warning + /// + /// Linking can only occur once all sections are entirelly generated. + fn link_call_site_ids(&mut self) { + debug!("Link call site id items"); + for id in &mut self.call_site_ids { + id.call_site_off += self.section_manager.get_offset(Section::EncodedArrayItem); + } + } + + /// Link the offsets in class_def_items. + /// + /// # Warning + /// + /// This is the only link method called before generating the map list and finilizing the + /// section: + /// + /// Linking can only occur once all sections are entirelly generated, however, + /// `class_data_item.direct|virtual_methods[.].code_off` are Uleb128 encoded, meaning + /// that linking class_data_item modify the size of the class_data_items, hence the position + /// of the class_data_item and all element located after, as well as the size of the data + /// section. This is pretty bothersome and means that the sections **are** modified. + fn link_class_data(&mut self) -> Result<()> { + debug!("Link class data items"); + let mut unlinked_local_offset = 0; + let mut linked_local_offset = 0; + let code_section_off = self.section_manager.get_code_item_offset_prefinalized(); + for data in self.class_data_list.iter_mut() { + let unlinked_size = data.size() as u32; + for method in &mut data.direct_methods { + if method.code_off.0 != 0 { + method.code_off.0 += code_section_off - 1; + } + } + for method in &mut data.virtual_methods { + if method.code_off.0 != 0 { + method.code_off.0 += code_section_off - 1; + } + } + self.corrected_class_data_offset + .insert(unlinked_local_offset, linked_local_offset); + linked_local_offset += data.size() as u32; + unlinked_local_offset += unlinked_size; + } + self.section_manager.incr_section_size( + Section::ClassDataItem, + linked_local_offset as usize - unlinked_local_offset as usize, + ); + Ok(()) + } + + /// Link the offsets in proto_id_items. + /// + /// # Warning + /// + /// Linking can only occur once all sections are entirelly generated. + fn link_proto_id(&mut self) -> Result<()> { + debug!("Link proto id items"); + for (proto, idx) in &self.proto_ids { + if !proto.parameters.is_empty() { + let type_list = self.gen_type_list(&proto.parameters).with_context(|| { + format!("Failed to generate param list for {}", proto.__repr__()) + })?; + let offset = self.section_manager.get_offset(Section::TypeList) + + self.type_lists_with_offset[*self.type_lists_index.get(&type_list).unwrap()] + .1; + self.proto_ids_list[*idx].parameters_off = offset; + } + } + Ok(()) + } + + /// Link the offsets of class_data_items in class_def_items. + /// + /// # Warning + /// + /// Linking can only occur once all sections are entirelly generated. + fn link_class_def(&mut self) -> Result<()> { + debug!("Link class_def_items"); + for class_def in self.class_defs_list.iter_mut() { + // Link the class_data_item entries + // prelink value is set to offset in the section + 1 (to distinguish with 0) + if class_def.class_data_off != 0 { + let unlinked_local_offset = class_def.class_data_off - 1; + let linked_local_offset = *self + .corrected_class_data_offset + .get(&unlinked_local_offset) + .expect( + "Unlinked class_data_item offset not found in corrected_class_data_offset", + ); + class_def.class_data_off = + self.section_manager.get_offset(Section::ClassDataItem) + linked_local_offset; + } + // Link the annotations_directory_item entrie + // prelink value is set to offset in the section + 1 (to distinguish with 0) + if class_def.annotations_off != 0 { + class_def.annotations_off += self + .section_manager + .get_offset(Section::AnnotationsDirectoryItem) + - 1; + } + + // Link the static_values entries + if class_def.static_values_off != 0 { + class_def.static_values_off += + self.section_manager.get_offset(Section::EncodedArrayItem) - 1; + } + } + for (cls, idx) in self.class_defs.values() { + if !cls.interfaces.is_empty() { + let type_list = self.gen_type_list(&cls.interfaces).with_context(|| { + format!("Failed to generate interface list for {}", cls.__repr__()) + })?; + let offset = self.section_manager.get_offset(Section::TypeList) + + self.type_lists_with_offset[*self.type_lists_index.get(&type_list).unwrap()] + .1; + self.class_defs_list[*idx].interfaces_off = offset; + } + } + Ok(()) + } + + /// Link the offset of debug info item in code items. + /// + /// # Warning + /// + /// Linking can only occur once all sections are entirelly generated. + fn link_code(&mut self) { + debug!("Link the debug_info_off entries in code_items"); + for code in self.code_items.iter_mut() { + if code.debug_info_off != 0 { + code.debug_info_off += self.section_manager.get_offset(Section::DebugInfoItem) - 1; + } + } + } + + /// Link all annotations objects. + /// + /// # Warning + /// + /// Linking can only occur once all sections are entirelly generated. + fn link_annotations(&mut self) { + for annotation in self.annotations_directory_items.iter_mut() { + if annotation.class_annotations_off != 0 { + annotation.class_annotations_off += + self.section_manager.get_offset(Section::AnnotationSetItem) - 1; + } + for field_annotation in annotation.field_annotations.iter_mut() { + if field_annotation.annotations_off != 0 { + field_annotation.annotations_off += + self.section_manager.get_offset(Section::AnnotationSetItem) - 1; + } + } + for method_annotation in annotation.method_annotations.iter_mut() { + if method_annotation.annotations_off != 0 { + method_annotation.annotations_off += + self.section_manager.get_offset(Section::AnnotationSetItem) - 1; + } + } + for parameter_annotation in annotation.parameter_annotations.iter_mut() { + if parameter_annotation.annotations_off != 0 { + parameter_annotation.annotations_off += self + .section_manager + .get_offset(Section::AnnotationSetRefList) + - 1; + } + } + } + for annotation_set in self.annotation_set_items.iter_mut() { + for entry in annotation_set.entries.iter_mut() { + entry.annotation_off += self.section_manager.get_offset(Section::AnnotationItem); + } + } + for list in self.annotation_set_lists.iter_mut() { + for annotation in list.list.iter_mut() { + if annotation.annotations_off != 0 { + annotation.annotations_off += + self.section_manager.get_offset(Section::AnnotationSetItem) - 1; + } + } + } + } + + fn write_dex_file(&mut self, writer: &mut dyn Write) -> Result<()> { + self.section_manager.reset(); + self.section_manager.add_elt(Section::HeaderItem, None); + + self.gen_string_data_section()?; + self.gen_type_ids_section()?; + self.gen_proto_ids_section()?; + self.gen_field_ids_section()?; + self.gen_method_ids_section()?; + + debug!("Sort classes and generate the class_defs and class_data section"); + for class_id in self.get_sorted_class_def()? { + self.insert_class_def_item(&class_id)?; + } + self.gen_type_list_section()?; + + // start by linking class_data_items to populate self.corrected_class_data_offset + // and update the class_data_item sections size. + // Why before gen_map_list? Because the offsets in class_data_items are F***ing Uleb128 + // encoded, so there size change when linking (see doc of self.corrected_class_data_offset). + let code_offset = self.section_manager.get_code_item_offset_prefinalized(); + self.link_class_data()?; + self.gen_map_list()?; + assert_eq!( + code_offset, + self.section_manager.get_offset(Section::CodeItem), + "Prelinking computed value and post linking value for \ + the offset of the code_item section don't match" + ); + + // From now on, all section are generated and the value in section_manager do not change, + + self.link_header(); + self.link_call_site_ids(); + self.link_proto_id()?; + self.link_class_def()?; + self.link_code(); + self.link_annotations(); + + debug!("Serialize the dex file"); + let mut buffer = Cursor::new(Vec::::new()); + + self.check_section_offset(&buffer, Section::HeaderItem); + Self::fix_section_alignement(&mut buffer, Section::HeaderItem)?; + self.header.serialize(&mut buffer)?; + // StringIdItem section + let mut string_off = self.section_manager.get_offset(Section::StringDataItem); + self.check_section_offset(&buffer, Section::StringIdItem); + for string in self.string_data_list.iter() { + let str_id = StringIdItem { + string_data_off: string_off, + }; + Self::fix_section_alignement(&mut buffer, Section::StringIdItem)?; + str_id.serialize(&mut buffer)?; + string_off += string.size() as u32; + } + // TypeId section + self.check_section_offset(&buffer, Section::TypeIdItem); + for ty in &self.type_ids_list { + Self::fix_section_alignement(&mut buffer, Section::TypeIdItem)?; + ty.serialize(&mut buffer)?; + } + // ProtoId section + self.check_section_offset(&buffer, Section::ProtoIdItem); + for proto in &self.proto_ids_list { + Self::fix_section_alignement(&mut buffer, Section::ProtoIdItem)?; + proto.serialize(&mut buffer)?; + } + // FieldIdItem section + self.check_section_offset(&buffer, Section::FieldIdItem); + for field_id in &self.field_ids_list { + Self::fix_section_alignement(&mut buffer, Section::FieldIdItem)?; + field_id.serialize(&mut buffer)?; + } + // MethodIdItem section + self.check_section_offset(&buffer, Section::MethodIdItem); + for method_id in &self.method_ids_list { + Self::fix_section_alignement(&mut buffer, Section::MethodIdItem)?; + method_id.serialize(&mut buffer)?; + } + // ClassDefItem section + self.check_section_offset(&buffer, Section::ClassDefItem); + for class_def in &self.class_defs_list { + Self::fix_section_alignement(&mut buffer, Section::ClassDefItem)?; + class_def.serialize(&mut buffer)?; + } + // CallSiteIdItem, data are inserted as encoded array item later + self.check_section_offset(&buffer, Section::CallSiteIdItem); + for call_site_id in &self.call_site_ids { + Self::fix_section_alignement(&mut buffer, Section::CallSiteIdItem)?; + call_site_id.serialize(&mut buffer)?; + } + + // MethodHandleItem section + self.check_section_offset(&buffer, Section::MethodHandleItem); + for handle in &self.method_handles { + Self::fix_section_alignement(&mut buffer, Section::MethodHandleItem)?; + handle.serialize(&mut buffer)?; + } + // MapList + self.check_section_offset(&buffer, Section::Data); + self.check_section_offset(&buffer, Section::MapList); + Self::fix_section_alignement(&mut buffer, Section::MapList)?; + self.map_list.serialize(&mut buffer)?; + // TypeList, + self.check_section_offset(&buffer, Section::TypeList); + for (list, _) in &self.type_lists_with_offset { + Self::fix_section_alignement(&mut buffer, Section::TypeList)?; + list.serialize(&mut buffer)?; + } + // AnnotationSetRefList section + self.check_section_offset(&buffer, Section::AnnotationSetRefList); + for list in &self.annotation_set_lists { + Self::fix_section_alignement(&mut buffer, Section::AnnotationSetRefList)?; + list.serialize(&mut buffer)?; + } + // AnnotationSetItem section + self.check_section_offset(&buffer, Section::AnnotationSetItem); + for set in &self.annotation_set_items { + Self::fix_section_alignement(&mut buffer, Section::AnnotationSetItem)?; + set.serialize(&mut buffer)?; + } + // CodeItem section + self.check_section_offset(&buffer, Section::CodeItem); + for code_item in &self.code_items { + Self::fix_section_alignement(&mut buffer, Section::CodeItem)?; + code_item.serialize(&mut buffer)? + } + // StringDataItem section + self.check_section_offset(&buffer, Section::StringDataItem); + for string in &self.string_data_list { + Self::fix_section_alignement(&mut buffer, Section::StringDataItem)?; + string.serialize(&mut buffer)?; + } + // DebugInfoItem section + self.check_section_offset(&buffer, Section::DebugInfoItem); + for debug_info in &self.debug_info_items { + Self::fix_section_alignement(&mut buffer, Section::DebugInfoItem)?; + debug_info.serialize(&mut buffer)?; + } + // AnnotationItem section + self.check_section_offset(&buffer, Section::AnnotationItem); + for annot in &self.annotation_items { + Self::fix_section_alignement(&mut buffer, Section::AnnotationItem)?; + annot.serialize(&mut buffer)?; + } + // EncodedArrayItem section + self.check_section_offset(&buffer, Section::EncodedArrayItem); + for array in &self.encoded_array_items { + Self::fix_section_alignement(&mut buffer, Section::EncodedArrayItem)?; + array.serialize(&mut buffer)?; + } + // AnnotationsDirectoryItem section + self.check_section_offset(&buffer, Section::AnnotationsDirectoryItem); + for dir in &self.annotations_directory_items { + Self::fix_section_alignement(&mut buffer, Section::AnnotationsDirectoryItem)?; + dir.serialize(&mut buffer)?; + } + // ClassDataItem section + self.check_section_offset(&buffer, Section::ClassDataItem); + for data in &self.class_data_list { + Self::fix_section_alignement(&mut buffer, Section::ClassDataItem)?; + data.serialize(&mut buffer)?; + } + // TODO: HiddenapiClassDataItem, + /* + self.check_section_offset(&buffer, Section::HiddenapiClassDataItem); + Self::fix_section_alignement(&mut buffer, Section::HiddenapiClassDataItem)?; + */ + + let end_data = buffer.position(); + assert_eq!( + end_data as u32, + self.header.data_off + self.header.data_size + ); + + // compute signature + buffer.seek(SeekFrom::Start(8 + 4 + 20))?; + let mut hasher = Sha1::new(); + io::copy(&mut buffer, &mut hasher)?; + self.header.signature = hasher.finalize().into(); + let size = buffer.seek(SeekFrom::End(0))? as u32; + self.header.file_size = size; + buffer.rewind()?; + self.header.serialize(&mut buffer)?; + + // Compute checksum + //buffer.seek(SeekFrom::Start(8 + 4))?; + let mut adler = Adler32::new(); + adler.write_slice(&buffer.get_ref()[8 + 4..]); + self.header.checksum = adler.checksum(); + buffer.rewind()?; + self.header.serialize(&mut buffer)?; + + // copy buffer to output + buffer.rewind()?; + io::copy(&mut buffer, writer)?; + + Ok(()) + } + + /// Insert 0 to a buffer until the right alignment is reached for an element of the + /// given section. + fn fix_section_alignement(buffer: &mut Cursor>, section: Section) -> Result<()> { + while buffer.position() % section.get_item_alignment() as u64 != 0 { + Serializable::serialize(&0u8, buffer)?; + } + Ok(()) + } + + /// Check if a section + fn check_section_offset(&self, buffer: &Cursor, section: Section) { + let mut pos = buffer.position(); + while pos % section.get_item_alignment() as u64 != 0 { + pos += 1; + } + let expected = self.section_manager.get_offset(section) as u64; + assert_eq!( + pos, expected, + "Computed section offset and actual section offset do not match for section \ + {section:?}, expected 0x{expected:x}, found 0x{pos:x}" + ); + } + + /// Compute the order of the classes in the section `class_defs`. + /// Class definitions must be sorted so that a class's superclass and interfaces + /// are before the class. + fn get_sorted_class_def(&self) -> Result> { + // Use Kahn's algorithm + let mut graph: HashMap<&IdType, (HashSet<&IdType>, HashSet<&IdType>)> = HashMap::new(); + for (ty, (def, _)) in &self.class_defs { + let mut edges_to = HashSet::new(); + if let Some(sup) = def.superclass.as_ref() { + if self.class_defs.get(sup).is_some() { + edges_to.insert(sup); + } + } + for sup in &def.interfaces { + if self.class_defs.get(sup).is_some() { + edges_to.insert(sup); + } + } + for n_to in &edges_to { + let (from, _) = graph + .entry(n_to) + .or_insert((HashSet::new(), HashSet::new())); + from.insert(ty); + } + let (_, to) = graph.entry(ty).or_insert((HashSet::new(), HashSet::new())); + to.extend(edges_to); + } + + let mut sorted = vec![]; + let mut no_outgoing: VecDeque<&IdType> = VecDeque::new(); + no_outgoing.extend( + graph + .iter() + .filter(|(_, (_, to))| to.is_empty()) + .map(|(ty, _)| ty), + ); + if no_outgoing.is_empty() { + bail!("The class inheritance topoloy is either empty or cyclic"); + } + + while let Some(n) = no_outgoing.pop_front() { + sorted.push(n.clone()); + let (from, _) = graph.get(n).cloned().unwrap(); + for n_from in from { + graph.entry(n_from).and_modify(|(_, to)| _ = to.remove(n)); + let (_, to) = graph.get(n_from).unwrap(); + if to.is_empty() { + no_outgoing.push_back(n_from); + } + } + graph + .entry(n) + .and_modify(|(from, _)| *from = HashSet::new()); + } + for (_, (from, to)) in graph { + if !from.is_empty() || !to.is_empty() { + bail!("The class inheritance topology is cyclic"); + } + } + Ok(sorted) + } + + fn gen_type_list(&self, list: &[IdType]) -> Result { + let mut type_list = TypeList { list: vec![] }; + for ty in list { + type_list.list.push(TypeItem { + type_idx: *self.type_ids.get(ty).ok_or(anyhow!( + "Could not found type {} in dex builder", + ty.__repr__() + ))? as u16, + }); + } + Ok(type_list) + } + + pub fn gen_dex_file_to_vec(&mut self) -> Result> { + let mut output = Cursor::new(Vec::::new()); + self.write_dex_file(&mut output)?; + Ok(output.into_inner()) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum FragSection { + ClassDefItem, + CallSiteIdItem, + MethodHandleItem, + TypeList, + AnnotationSetRefList, + AnnotationSetItem, + CodeItem, + StringDataItem, + DebugInfoItem, + AnnotationItem, + EncodedArrayItem, + AnnotationsDirectoryItem, + ClassDataItem, + // HiddenapiClassDataItem, +} + +impl FragSection { + const VARIANT_LIST: &'static [Self] = &[ + Self::ClassDefItem, + Self::CallSiteIdItem, + Self::MethodHandleItem, + Self::TypeList, + Self::AnnotationSetRefList, + Self::AnnotationSetItem, + Self::CodeItem, + Self::StringDataItem, + Self::DebugInfoItem, + Self::AnnotationItem, + Self::EncodedArrayItem, + Self::AnnotationsDirectoryItem, + // must be last because contains offsets in Uleb, + // so size change when linking ! + Self::ClassDataItem, + // Self::HiddenapiClassDataItem, + ]; + + fn get_index(&self) -> usize { + match self { + Self::ClassDefItem => 0, + Self::CallSiteIdItem => 1, + Self::MethodHandleItem => 2, + Self::TypeList => 3, + Self::AnnotationSetRefList => 4, + Self::AnnotationSetItem => 5, + Self::CodeItem => 6, + Self::StringDataItem => 7, + Self::DebugInfoItem => 8, + Self::AnnotationItem => 9, + Self::EncodedArrayItem => 10, + Self::AnnotationsDirectoryItem => 11, + Self::ClassDataItem => 12, + // Self::HiddenapiClassDataItem => 13, + } + } + + fn get_elt_size(&self, default_size: Option) -> usize { + let fixed_size = match self { + Self::ClassDefItem => Some(0x20), + Self::CallSiteIdItem => Some(4), + Self::MethodHandleItem => Some(8), + Self::TypeList => None, + Self::AnnotationSetRefList => None, + Self::AnnotationSetItem => None, + Self::CodeItem => None, + Self::StringDataItem => None, + Self::DebugInfoItem => None, + Self::AnnotationItem => None, + Self::EncodedArrayItem => None, + Self::AnnotationsDirectoryItem => None, + Self::ClassDataItem => None, + //Self::HiddenapiClassDataItem => None, + }; + if let (Some(fixed_size), Some(default_size)) = (fixed_size, default_size) { + if fixed_size == default_size { + default_size + } else { + panic!( + "Element in {:?} have a size of {}, not {}", + self, fixed_size, default_size + ) + } + } else { + fixed_size.or(default_size).unwrap_or_else(|| { + panic!( + "Element of {:?} don't have a fixed size, you need to provide one", + self + ) + }) + } + } + + /// Return the alignment of the item in byte. + fn get_item_alignment(&self) -> u32 { + match self { + Self::ClassDefItem => 4, + Self::CallSiteIdItem => 1, + Self::MethodHandleItem => 4, + Self::TypeList => 4, + Self::AnnotationSetRefList => 4, + Self::AnnotationSetItem => 4, + Self::CodeItem => 4, + Self::StringDataItem => 1, + Self::DebugInfoItem => 1, + Self::AnnotationItem => 1, + Self::EncodedArrayItem => 1, + Self::AnnotationsDirectoryItem => 4, + Self::ClassDataItem => 1, + //Self::HiddenapiClassDataItem => 1, + } + } +} + +#[derive(Debug, Default, Clone)] +struct FragSectionManager { + sizes: [u32; Self::NB_SECTION], + nb_elt: [usize; Self::NB_SECTION], + offsets: [u32; Self::NB_SECTION], + editable: bool, +} + +impl FragSectionManager { + const NB_SECTION: usize = 12; + + fn reset(&mut self) { + self.sizes = [0; Self::NB_SECTION]; + self.nb_elt = [0; Self::NB_SECTION]; + self.offsets = [0; Self::NB_SECTION]; + self.editable = true; + } + + fn add_elt(&mut self, section: FragSection, size: Option) { + if !self.editable { + panic!("Try to modify a section when the sections are set to read only"); + } + if (section == FragSection::ClassDefItem || section == FragSection::ClassDataItem) + && (self.nb_elt[section.get_index()] >= 1) + { + panic!("{section:#?} cannot contain more than one element in a dex fragment"); + } + while self.sizes[section.get_index()] % section.get_item_alignment() != 0 { + self.sizes[section.get_index()] += 1; + } + self.sizes[section.get_index()] += section.get_elt_size(size) as u32; + self.nb_elt[section.get_index()] += 1; + } + /* + fn incr_section_size(&mut self, section: FragSection, size: usize) { + if !self.editable { + panic!("Try to modify a section when the sections are set to read only"); + } + self.sizes[section.get_index()] += size as u32; + } + fn get_offset(&self, section: FragSection) -> u32 { + if self.editable { + panic!("Try to get section offset before sections are finilized"); + } + let size = self.offsets[section.get_index()]; + let alignment = section.get_item_alignment(); + if size % alignment != 0 { + panic!( + "section {section:?} should be aligned on {alignment} bytes, \ + found section offset 0x{size:x}" + ); // avoid by finilized + } + size + } + */ + + fn get_unaligned_size(&self, section: FragSection) -> u32 { + self.sizes[section.get_index()] + } + + /// The position of a potential new item in the section considering alignment. + fn get_aligned_size(&self, section: FragSection) -> u32 { + let mut size = self.get_unaligned_size(section); + while size % section.get_item_alignment() != 0 { + size += 1; + } + size + } + + fn get_nb_elt(&self, section: FragSection) -> usize { + self.nb_elt[section.get_index()] + } + + /* + /// Finialize the sections: switch to read only and fix the section alignment. + fn finalize_sections(&mut self) { + for section in Section::VARIANT_LIST { + while self.sizes[..section.get_index()].iter().sum::() + % section.get_item_alignment() + != 0 + { + self.incr_section_size( + section.prev().expect( + "First section (Header) should alway be aligned but \ + found unaligned section without predecessor", + ), + 1, + ); + } + } + let mut offset = 0; + for section in Section::VARIANT_LIST { + self.offsets[section.get_index()] = offset; + offset += self.sizes[section.get_index()]; + } + + self.editable = false; + } + + /// This method exist for the only purpose of linking the method code offset inside + /// the class data items. This linking needs to be done before finilizing because it change the + /// size of the class data item section. + /// + /// Seriously, avoid using this. + fn get_code_item_offset_prefinalized(&mut self) -> u32 { + if !self.editable || self.get_nb_elt(Section::MapList) != 0 { + panic!("Don't use this method for other purpose than linking class_data_items"); + } + let mut map_list_size = 4; + let map_item_size = 12; /* = MapItem { + type_: MapItemType::HeaderItem, + unused: 0, + size: 0, + offset: 0, + } + .size(); */ + for section in Section::VARIANT_LIST { + if !section.is_data() + && (self.get_nb_elt(*section) != 0 || section == &Section::MapList) + { + map_list_size += map_item_size; + } + } + let mut offset = map_list_size; // This is aligned so it wont affect alignment + for section in &Section::VARIANT_LIST[..Section::CodeItem.get_index()] { + // size Section::Data and size Section::MapList are 0 + while offset % section.get_item_alignment() != 0 { + offset += 1; + } + offset += self.sizes[section.get_index()]; + } + + offset + } + */ + + /// Display the sections informations. + #[allow(dead_code)] + fn show(&self) { + let mut offset = 0; + for section in FragSection::VARIANT_LIST { + let size = self.get_unaligned_size(*section); + let new_offset = offset + size; + let nb_elt = self.get_nb_elt(*section); + println!( + "{section:?}: 0x{offset:x} -> 0x{new_offset:x} (size: 0x{size:x}, \ + nb elt: {nb_elt})" + ); + } + } +} diff --git a/androscalpel/src/lib.rs b/androscalpel/src/lib.rs index 3ece94a..e45a1c2 100644 --- a/androscalpel/src/lib.rs +++ b/androscalpel/src/lib.rs @@ -6,6 +6,7 @@ pub mod annotation; pub mod apk; pub mod class; pub mod code; +pub mod dex_fragment; pub mod dex_id; pub mod dex_string; pub mod dex_writer;