From 409663ca196d7c6a3ec93e4d809e09bbb438d07e Mon Sep 17 00:00:00 2001 From: Jean-Marie Mineau Date: Mon, 4 Dec 2023 16:56:20 +0100 Subject: [PATCH] add class defs to generated dex --- androscalpel/src/dex_writer.rs | 224 +++++++++++++++++++++++++++------ 1 file changed, 183 insertions(+), 41 deletions(-) diff --git a/androscalpel/src/dex_writer.rs b/androscalpel/src/dex_writer.rs index 8feee60..e3bd407 100644 --- a/androscalpel/src/dex_writer.rs +++ b/androscalpel/src/dex_writer.rs @@ -1,6 +1,6 @@ //! The structure that generate a .dex from classes. -use std::collections::HashMap; +use std::collections::{HashMap, HashSet, VecDeque}; use std::io::{Cursor, Write}; use log::debug; @@ -14,13 +14,30 @@ use androscalpel_serializer::*; #[derive(Debug, Clone)] pub struct DexWriter { header: HeaderItem, + /// The strings in the dex file. Initially values are set to zero. + /// Once the strings are sorted, the values of the map are set to the + /// strings idx strings: HashMap, + /// The types in the dex file. Initially values are set to zero. + /// Once the types are sorted, the values of the map are set to the + /// types idx type_ids: HashMap, + /// The prototypes in the dex file. Initially values are set to zero. + /// Once the prototypes are sorted, the values of the map are set to the + /// prototypes idx proto_ids: HashMap, + /// The field ids in the dex file. Initially values are set to zero. + /// Once the fields are sorted, the values of the map are set to the + /// fields idx field_ids: HashMap, + /// The methods ids in the dex file. Initially values are set to zero. + /// Once the methods are sorted, the values of the map are set to the + /// method idx method_ids: HashMap, - // TODO: composite classes need a struct for storing link data - // class_defs: HashMap, + /// The classes defined in the dex file. Initially values are set to the classes and zero. + /// Once the class definitions are sorted, the values are set to the classes and there + /// index in the `class_defs` section. + class_defs: HashMap, // call_site_ids: // TODO: parsing code insns // method_handles: // TODO: other structs in data: @@ -65,6 +82,7 @@ impl Default for DexWriter { proto_ids: HashMap::new(), field_ids: HashMap::new(), method_ids: HashMap::new(), + class_defs: HashMap::new(), } } } @@ -121,6 +139,8 @@ impl DexWriter { for method in new_method_ids { self.method_ids.insert(method, 0); } + self.class_defs + .insert(class.descriptor.clone(), (class.clone(), 0)); Ok(()) } @@ -136,9 +156,7 @@ impl DexWriter { debug!("Sort string and generate string_data_item and string_ids sections"); let mut string_ids_list: Vec = self.strings.keys().cloned().collect(); - debug!("start sort"); string_ids_list.sort(); - debug!("end sort"); for (idx, string) in string_ids_list.iter().enumerate() { self.strings .entry(string.clone()) @@ -153,9 +171,7 @@ impl DexWriter { debug!("Sort types and generate type_id_item section"); let mut type_ids_list: Vec = self.type_ids.keys().cloned().collect(); - debug!("start sort"); type_ids_list.sort(); - debug!("end sort"); for (idx, ty) in type_ids_list.iter().enumerate() { self.type_ids.entry(ty.clone()).and_modify(|val| *val = idx); section_manager.add_elt(Section::TypeIdItem, None); @@ -176,9 +192,7 @@ impl DexWriter { debug!("Sort prototypes and generate proto_id_item section"); let mut proto_ids_list: Vec = self.proto_ids.keys().cloned().collect(); - debug!("start sort"); proto_ids_list.sort(); - debug!("end sort"); for (idx, proto) in proto_ids_list.iter().enumerate() { self.proto_ids .entry(proto.clone()) @@ -205,36 +219,9 @@ impl DexWriter { proto_ids_list_aux }; - debug!("Generate the type_list section"); - let mut type_lists_index = HashMap::new(); - for proto in self.proto_ids.keys() { - if !proto.parameters.is_empty() { - let type_list = self.gen_type_list(&proto.parameters).with_context(|| { - format!("Failed to generate param list for {}", proto.__repr__()) - })?; - type_lists_index.insert(type_list, 0); - } - } - // TODO add class.interface in type list - let mut offset = 0; - let mut type_lists_and_local_offsets = vec![]; - for (i, (list, idx)) in type_lists_index.iter_mut().enumerate() { - while offset % 4 != 0 { - // Alignment - section_manager.incr_section_size(Section::TypeList, 1); - offset += 1; - } - *idx = i; - type_lists_and_local_offsets.push((list.clone(), offset)); - section_manager.add_elt(Section::TypeList, Some(list.size())); - offset += list.size(); - } - debug!("Sort field ids and generate field_ids_item"); let mut field_ids_list: Vec = self.field_ids.keys().cloned().collect(); - debug!("start sort"); field_ids_list.sort(); - debug!("end sort"); for (idx, field_id) in field_ids_list.iter().enumerate() { self.field_ids .entry(field_id.clone()) @@ -267,9 +254,7 @@ impl DexWriter { debug!("Sort method ids and generate method_id_item section"); let mut method_ids_list: Vec = self.method_ids.keys().cloned().collect(); - debug!("start sort"); method_ids_list.sort(); - debug!("end sort"); for (idx, method_id) in method_ids_list.iter().enumerate() { self.method_ids .entry(method_id.clone()) @@ -300,6 +285,88 @@ impl DexWriter { method_ids_list_aux }; + debug!("Sort classes and generate the class_defs section"); + let mut class_defs_list = vec![]; + for (idx, class_id) in self.get_sorted_class_def()?.into_iter().enumerate() { + self.class_defs + .entry(class_id.clone()) + .and_modify(|(_, i)| *i = idx); + let (class, _) = self.class_defs.get(&class_id).unwrap(); + class_defs_list.push(ClassDefItem { + class_idx: *self.type_ids.get(&class.descriptor).ok_or(anyhow!( + "Type {} (type of class {}) not found in dex builder", + class.descriptor.__repr__(), + class.__repr__() + ))? as u32, + access_flags: if class.is_public { ACC_PUBLIC } else { 0 } + | if class.is_final { ACC_FINAL } else { 0 } + | if class.is_interface { ACC_INTERFACE } else { 0 } + | if class.is_abstract { ACC_ABSTRACT } else { 0 } + | if class.is_synthetic { ACC_SYNTHETIC } else { 0 } + | if class.is_annotation { + ACC_ANNOTATION + } else { + 0 + } + | if class.is_enum { ACC_ENUM } else { 0 }, + superclass_idx: if let Some(sup) = &class.superclass { + *self.type_ids.get(sup).ok_or(anyhow!( + "Type {} (superclass of class {}) not found in dex builder", + sup.__repr__(), + class.__repr__() + ))? as u32 + } else { + NO_INDEX.0 + }, + interfaces_off: 0, // TODO + source_file_idx: if let Some(file) = &class.source_file { + *self.strings.get(file).ok_or(anyhow!( + "String {} (source file of class {}) not found in dex builder", + file.__repr__(), + class.__repr__() + ))? as u32 + } else { + NO_INDEX.0 + }, + annotations_off: 0, // TODO + class_data_off: 0, // TODO + static_values_off: 0, // TODO + }); + section_manager.add_elt(Section::ClassDefItem, None); + } + + debug!("Generate the type_list section"); + let mut type_lists_index = HashMap::new(); + for proto in self.proto_ids.keys() { + if !proto.parameters.is_empty() { + let type_list = self.gen_type_list(&proto.parameters).with_context(|| { + format!("Failed to generate param list for {}", proto.__repr__()) + })?; + type_lists_index.insert(type_list, 0); + } + } + for (class, _) in self.class_defs.values() { + if !class.interfaces.is_empty() { + let type_list = self.gen_type_list(&class.interfaces).with_context(|| { + format!("Failed to generate interface list for {}", class.__repr__()) + })?; + type_lists_index.insert(type_list, 0); + } + } + let mut offset = 0; + let mut type_lists_and_local_offsets = vec![]; + for (i, (list, idx)) in type_lists_index.iter_mut().enumerate() { + while offset % 4 != 0 { + // Alignment + section_manager.incr_section_size(Section::TypeList, 1); + offset += 1; + } + *idx = i; + type_lists_and_local_offsets.push((list.clone(), offset)); + section_manager.add_elt(Section::TypeList, Some(list.size())); + offset += list.size(); + } + debug!("Generate the map_list"); // Get the size of a map item let map_item_size = MapItem { @@ -352,8 +419,7 @@ impl DexWriter { self.header.data_size = section_manager.get_size(Section::Data); self.header.data_off = section_manager.get_offset(Section::Data); - debug!("Link the type_list entries in the proto_id_items"); - // TODO: link TypeList in ClassDefItems + debug!("Link the type_list entries in the proto_id_items and class_def_items"); for (proto, idx) in &self.proto_ids { if !proto.parameters.is_empty() { let type_list = self.gen_type_list(&proto.parameters).with_context(|| { @@ -365,6 +431,17 @@ impl DexWriter { proto_ids_list[*idx].parameters_off = offset; } } + for (cls, idx) in self.class_defs.values() { + if !cls.interfaces.is_empty() { + let type_list = self.gen_type_list(&cls.interfaces).with_context(|| { + format!("Failed to generate interface list for {}", cls.__repr__()) + })?; + let offset = section_manager.get_offset(Section::TypeList) + + type_lists_and_local_offsets[*type_lists_index.get(&type_list).unwrap()].1 + as u32; + class_defs_list[*idx].interfaces_off = offset; + } + } debug!("Serialize the dex file"); // TODO: compute checksum, hash, ect @@ -394,7 +471,10 @@ impl DexWriter { for method_id in method_ids_list { method_id.serialize(writer)?; } - // TODO: ClassDefItem, + // ClassDefItem section + for class_def in class_defs_list { + class_def.serialize(writer)?; + } // TODO: CallSiteIdItem, // TODO: MethodHandleItem, // TODO: Data, @@ -427,6 +507,68 @@ impl DexWriter { Ok(()) } + /// Compute the order of the classes in the section `class_defs`. + /// Class definitions must be sorted so that a class's superclass and interfaces + /// are before the class. + fn get_sorted_class_def(&self) -> Result> { + // Use Kahn's algorithm + let mut graph: HashMap<&IdType, (HashSet<&IdType>, HashSet<&IdType>)> = HashMap::new(); + for (ty, (def, _)) in &self.class_defs { + let mut edges_to = HashSet::new(); + if let Some(sup) = def.superclass.as_ref() { + if self.class_defs.get(sup).is_some() { + edges_to.insert(sup); + } + } + for sup in &def.interfaces { + if self.class_defs.get(sup).is_some() { + edges_to.insert(sup); + } + } + for n_to in &edges_to { + let (from, _) = graph + .entry(n_to) + .or_insert((HashSet::new(), HashSet::new())); + from.insert(ty); + } + let (_, to) = graph.entry(ty).or_insert((HashSet::new(), HashSet::new())); + to.extend(edges_to); + } + + let mut sorted = vec![]; + let mut no_outgoing: VecDeque<&IdType> = VecDeque::new(); + no_outgoing.extend( + graph + .iter() + .filter(|(_, (_, to))| to.is_empty()) + .map(|(ty, _)| ty), + ); + if no_outgoing.is_empty() { + bail!("The class inheritance topoloy is either empty or cyclic"); + } + + while let Some(n) = no_outgoing.pop_front() { + sorted.push(n.clone()); + let (from, _) = graph.get(n).cloned().unwrap(); + for n_from in from { + graph.entry(n_from).and_modify(|(_, to)| _ = to.remove(n)); + let (_, to) = graph.get(n_from).unwrap(); + if to.is_empty() { + no_outgoing.push_back(n_from); + } + } + graph + .entry(n) + .and_modify(|(from, _)| *from = HashSet::new()); + } + for (_, (from, to)) in graph { + if !from.is_empty() || !to.is_empty() { + bail!("The class inheritance topology is cyclic"); + } + } + Ok(sorted) + } + fn gen_type_list(&self, list: &[IdType]) -> Result { let mut type_list = TypeList { list: vec![] }; for ty in list {