diff --git a/androscalpel/src/dex_fragment.rs b/androscalpel/src/dex_fragment.rs index aaea8db..e369844 100644 --- a/androscalpel/src/dex_fragment.rs +++ b/androscalpel/src/dex_fragment.rs @@ -10,6 +10,7 @@ use crate::Result; use crate::*; use androscalpel_serializer::*; +use crate::dex_writer::DexIndex; use crate::ins::{CallSite, Instruction}; use crate::instructions::*; use androscalpel_serializer::Instruction as InsFormat; @@ -572,12 +573,13 @@ impl DexFragment { Ok(()) } + // TODO: find if there is a way to efficiently link code item. /// Insert a code_item. /// - /// # Warning - /// - /// This is currently a stub that probably serialize invalid references to data. - fn insert_code_item(&mut self, code: &Code, index: &FragIndex) -> Result<()> { + /// This item cannot be cached, because the jump instructions depend on the size of + /// instructions that depend on the size of the descriptor ids that depend on the + /// list of all descriptors in the dex file. + fn insert_code_item(&mut self, code: &Code, index: &DexIndex) -> Result<()> { // Estimate instructions addresses let mut min_addr = 0; let mut max_addr = 0; @@ -1563,11 +1565,14 @@ impl DexFragment { let access_flags = Uleb128(class.direct_methods.get(id).unwrap().get_raw_access_flags()); let code_off = if let Some(code) = &class.direct_methods.get(id).unwrap().code { - let code_off = self.section_manager.get_aligned_size(FragSection::CodeItem); - self.insert_code_item(code, index).with_context(|| { - format!("Failed to convert serialize code of {}", id.__str__()) - })?; - Uleb128(code_off + 1) + // CodeItems depend to much on the complte descriptor list to be generated + // prior to linking + //let code_off = self.section_manager.get_aligned_size(FragSection::CodeItem); + //self.insert_code_item(code, index).with_context(|| { + // format!("Failed to convert serialize code of {}", id.__str__()) + //})?; + //Uleb128(code_off + 1) + Uleb128(1) } else { Uleb128(0) }; @@ -1597,11 +1602,14 @@ impl DexFragment { .get_raw_access_flags(), ); let code_off = if let Some(code) = &class.virtual_methods.get(id).unwrap().code { - let code_off = self.section_manager.get_aligned_size(FragSection::CodeItem); - self.insert_code_item(code, index).with_context(|| { - format!("Failed to convert serialize code of {}", id.__str__()) - })?; - Uleb128(code_off + 1) + // CodeItems depend to much on the complte descriptor list to be generated + // prior to linking + // let code_off = self.section_manager.get_aligned_size(FragSection::CodeItem); + // self.insert_code_item(code, index).with_context(|| { + // format!("Failed to convert serialize code of {}", id.__str__()) + // })?; + // Uleb128(code_off + 1) + Uleb128(1) } else { Uleb128(0) }; @@ -1884,21 +1892,19 @@ impl DexFragment { /// linked before. pub fn link_global_ids( &mut self, - global_strings: &[DexString], - global_type_ids: &[IdType], - global_proto_ids: &[IdMethodType], - global_field_ids: &[IdField], - global_method_ids: &[IdMethod], + class: &Class, + index: &DexIndex, nb_method_handle_before_fragment: usize, ) -> Result<()> { self.link_state.start_linking_idx()?; let string_reindex = Vec::with_capacity(self.strings.len()); + // TODO: considering we have the map, this can be simplified a lot let mut global_idx = 0; for s in self.strings { - while global_idx < global_strings.len() && global_strings[global_idx] != s { + while global_idx < index.strings_list.len() && index.strings_list[global_idx] != s { global_idx += 1; } - if global_idx == global_strings.len() { + if global_idx == index.strings_list.len() { bail!("String {} not found in global index", s.__str__()); } string_reindex.push(global_idx as u32); @@ -1906,10 +1912,10 @@ impl DexFragment { let type_reindex = Vec::with_capacity(self.type_ids.len()); let mut global_idx = 0; for ty in self.type_ids { - while global_idx < global_type_ids.len() && global_type_ids[global_idx] != ty { + while global_idx < index.type_ids_list.len() && index.type_ids_list[global_idx] != ty { global_idx += 1; } - if global_idx == global_type_ids.len() { + if global_idx == index.type_ids_list.len() { bail!("Type {} not found in global index", ty.__str__()); } type_reindex.push(global_idx as u32); @@ -1917,10 +1923,12 @@ impl DexFragment { let proto_reindex = Vec::with_capacity(self.proto_ids.len()); let mut global_idx = 0; for proto in self.proto_ids { - while global_idx < global_proto_ids.len() && global_proto_ids[global_idx] != proto { + while global_idx < index.proto_ids_list.len() + && index.proto_ids_list[global_idx] != proto + { global_idx += 1; } - if global_idx == global_proto_ids.len() { + if global_idx == index.proto_ids_list.len() { bail!("Prototype {} not found in global index", proto.__str__()); } proto_reindex.push(global_idx as u32); @@ -1928,10 +1936,12 @@ impl DexFragment { let field_reindex = Vec::with_capacity(self.field_ids.len()); let mut global_idx = 0; for field in self.field_ids { - while global_idx < global_field_ids.len() && global_field_ids[global_idx] != field { + while global_idx < index.field_ids_list.len() + && index.field_ids_list[global_idx] != field + { global_idx += 1; } - if global_idx == global_field_ids.len() { + if global_idx == index.field_ids_list.len() { bail!("Field {} not found in global index", field.__str__()); } field_reindex.push(global_idx as u16); @@ -1939,18 +1949,23 @@ impl DexFragment { let method_reindex = Vec::with_capacity(self.method_ids.len()); let mut global_idx = 0; for meth in self.method_ids { - while global_idx < global_method_ids.len() && global_method_ids[global_idx] != meth { + while global_idx < index.method_ids_list.len() + && index.method_ids_list[global_idx] != meth + { global_idx += 1; } - if global_idx == global_method_ids.len() { + if global_idx == index.method_ids_list.len() { bail!("Method {} not found in global index", meth.__str__()); } method_reindex.push(global_idx as u16); } + self.link_id_class_data_and_gen_code(class, &field_reindex, &method_reindex, index)?; self.link_id_class_def(&string_reindex, &type_reindex); self.link_id_method_handle(&field_reindex, &method_reindex); + // GEN CODE + todo!() } @@ -1977,51 +1992,6 @@ impl DexFragment { } } - fn link_id_code(&mut self, string_reindex: &[u32], type_reindex: &[u32]) { - let mut total_size = 0; - let mut code_item_relocation = if let FragLinkState::LinkedIdx { - code_item_relocation, - .. - } = self.link_state - { - code_item_relocation - } else { - // link_global_ids() should prevent that - panic!("link_id_code should not be run outside of fn link_global_ids(..)"); - }; - for code in self.code_items { - let current_size = code.size(); - for ins in &mut code.insns { - Self::link_id_ins(ins, string_reindex); - } - // TODO: TryItem recompute handler_off - if let Some(handlers) = code.handlers { - let mut handler_off_reindex = HashMap::new(); - let mut current_offset = handlers.size_field().size(); - let mut old_offset = handlers.size_field().size(); - for handlers in handlers.list { - handler_off_reindex.insert(old_offset as u16, current_offset as u16); - old_offset += handlers.size(); - for handler in handlers.handlers { - handler.type_idx.0 = type_reindex[handler.type_idx.0 as usize]; - } - current_offset += handlers.size(); - } - for try_ in code.tries { - try_.handler_off = *handler_off_reindex - .get(&try_.handler_off) - .expect("Something whent wrong with the handle reindexing"); - } - } - } - } - fn link_id_ins(ins: &mut InsFormat, string_reindex: &[u32]) { - match ins { - InsFormat::Format31C { op: 0x1b, b, .. } => *b = string_reindex[b as usize], - InsFormat::Format21C { op: 0x1a, b, .. } => todo!(), // TODO FUCK this - _ => todo!(), - } - } fn link_id_string_data(&mut self) { todo!() } @@ -2037,8 +2007,103 @@ impl DexFragment { fn link_id_annotation_dir(&mut self) { todo!() } - fn link_id_class_data(&mut self) { - todo!() + + /// Link ids in [`ClassDataItem`] *and* generate the [`CodeItem`]. + fn link_id_class_data_and_gen_code( + &mut self, + class: &Class, + field_reindex: &[u16], + method_reindex: &[u16], + index: &DexIndex, + ) -> Result<()> { + if let Some(data) = self.class_data { + let mut last_local_id = 0; + let mut last_global_id = 0; + for field in data.static_fields { + let new_local_id = last_local_id + field.field_idx_diff.0; + let new_global_id = field_reindex[new_local_id as usize]; + field.field_idx_diff.0 = (new_global_id - last_global_id) as u32; + last_local_id = new_local_id; + last_global_id = new_global_id; + } + let mut last_local_id = 0; + let mut last_global_id = 0; + for field in data.instance_fields { + let new_local_id = last_local_id + field.field_idx_diff.0; + let new_global_id = field_reindex[new_local_id as usize]; + field.field_idx_diff.0 = (new_global_id - last_global_id) as u32; + last_local_id = new_local_id; + last_global_id = new_global_id; + } + + let mut last_local_id = 0; + let mut last_global_id = 0; + for meth in data.direct_methods { + let new_local_id = last_local_id + meth.method_idx_diff.0; + let new_global_id = field_reindex[new_local_id as usize]; + meth.method_idx_diff.0 = (new_global_id - last_global_id) as u32; + if meth.code_off.0 != 0 { + let meth_id = index.method_ids_list[new_global_id as usize]; + let code = class + .direct_methods + .get(&meth_id) + .ok_or(anyhow!( + "direct method {} expected from fragment but not found in {}", + meth_id.__str__(), + class.__str__() + ))? + .code; + if let Some(code) = code { + let code_off = self.section_manager.get_aligned_size(FragSection::CodeItem); + self.insert_code_item(&code, index)?; + meth.code_off.0 = code_off + 1; + } else { + bail!( + "Inconsistant fragment: fragment expect a code item for {}\ + but none was found in {}", + meth_id.__str__(), + class.__str__() + ); + } + } + last_local_id = new_local_id; + last_global_id = new_global_id; + } + let mut last_local_id = 0; + let mut last_global_id = 0; + for meth in data.virtual_methods { + let new_local_id = last_local_id + meth.method_idx_diff.0; + let new_global_id = field_reindex[new_local_id as usize]; + meth.method_idx_diff.0 = (new_global_id - last_global_id) as u32; + if meth.code_off.0 != 0 { + let meth_id = index.method_ids_list[new_global_id as usize]; + let code = class + .virtual_methods + .get(&meth_id) + .ok_or(anyhow!( + "virtual method {} expected from fragment but not found in {}", + meth_id.__str__(), + class.__str__() + ))? + .code; + if let Some(code) = code { + let code_off = self.section_manager.get_aligned_size(FragSection::CodeItem); + self.insert_code_item(&code, index)?; + meth.code_off.0 = code_off + 1; + } else { + bail!( + "Inconsistant fragment: fragment expect a code item for {}\ + but none was found in {}", + meth_id.__str__(), + class.__str__() + ); + } + } + last_local_id = new_local_id; + last_global_id = new_global_id; + } + } + Ok(()) } } @@ -2305,7 +2370,7 @@ impl FragSectionManager { } } -/// Index that associate a type to its local id in a fragment. +/// Index that associate descriptors to their local id in a fragment. #[derive(Debug, Clone)] struct FragIndex { pub strings: HashMap, diff --git a/androscalpel/src/dex_writer.rs b/androscalpel/src/dex_writer.rs index 840872c..854bb73 100644 --- a/androscalpel/src/dex_writer.rs +++ b/androscalpel/src/dex_writer.rs @@ -1,34 +1,38 @@ //! The structure that generate a .dex from classes. -use std::collections::{HashSet, VecDeque}; +use std::collections::{HashMap, HashSet, VecDeque}; use crate::dex_fragment::DexFragment; -use crate::{DexString, IdField, IdMethod, IdMethodType, IdType}; +use crate::{Class, DexString, IdField, IdMethod, IdMethodType, IdType, Result}; #[derive(Debug, Clone)] -pub struct DexWriter { - fragments: VecDeque, +pub struct DexWriter<'a> { + classes: VecDeque<&'a Class>, } -impl Default for DexWriter { +impl<'a> Default for DexWriter<'a> { fn default() -> Self { Self { - fragments: VecDeque::new(), + classes: VecDeque::new(), } } } -impl DexWriter { +impl<'a> DexWriter<'a> { pub fn new() -> Self { Self::default() } pub fn empty(&self) -> bool { - self.fragments.is_empty() + self.classes.is_empty() + } + + pub fn add_class(&mut self, class: &'a Class) { + self.classes.push_back(class) } /// Take as many fragments as possible and convert them to a dex file. - pub fn generate_next_dex_file(&mut self) -> Vec { - let mut fragments = vec![]; + pub fn generate_next_dex_file(&mut self) -> Result> { + let mut fragments_in_file = vec![]; let mut string_set: HashSet = HashSet::new(); let mut type_set: HashSet = HashSet::new(); let mut proto_set: HashSet = HashSet::new(); @@ -37,8 +41,16 @@ impl DexWriter { let mut type_list_set: HashSet> = HashSet::new(); let mut nb_method_handle = 0; let mut nb_method_handle_before = vec![]; + let fragments: VecDeque<(&'a Class, DexFragment)> = self + .classes + .into_iter() + .map(|class| match DexFragment::new(class) { + Ok(frag) => Ok((class, frag)), + Err(err) => Err(err), + }) + .collect()?; loop { - let new_fragment = if let Some(new_fragment) = self.fragments.pop_front() { + let (class, new_fragment) = if let Some(new_fragment) = fragments.pop_front() { new_fragment } else { break; @@ -52,7 +64,7 @@ impl DexWriter { .count() > u16::MAX as usize { - self.fragments.push_front(new_fragment); + fragments.push_front((class, new_fragment)); break; } if proto_set.len() + new_fragment.proto_ids().len() > u16::MAX as usize @@ -64,7 +76,7 @@ impl DexWriter { .count() > u16::MAX as usize { - self.fragments.push_front(new_fragment); + fragments.push_front((class, new_fragment)); break; } if field_set.len() + new_fragment.field_ids().len() > u16::MAX as usize @@ -76,7 +88,7 @@ impl DexWriter { .count() > u16::MAX as usize { - self.fragments.push_front(new_fragment); + fragments.push_front((class, new_fragment)); break; } if method_set.len() + new_fragment.method_ids().len() > u16::MAX as usize @@ -88,7 +100,7 @@ impl DexWriter { .count() > u16::MAX as usize { - self.fragments.push_front(new_fragment); + fragments.push_front((class, new_fragment)); break; } string_set.extend(new_fragment.strings().iter().cloned()); @@ -99,32 +111,89 @@ impl DexWriter { type_list_set.insert(new_fragment.interfaces().to_vec()); nb_method_handle_before.push(nb_method_handle); nb_method_handle += new_fragment.method_handles().len(); - fragments.push(new_fragment); + fragments_in_file.push((class, new_fragment)); } type_list_set.extend(proto_set.iter().map(|proto| proto.parameters.clone())); - let mut strings: Vec = string_set.into_iter().collect(); + let mut strings: Vec = string_set.iter().cloned().collect(); strings.sort(); - let mut type_ids: Vec = type_set.into_iter().collect(); + let mut type_ids: Vec = type_set.iter().cloned().collect(); type_ids.sort(); - let mut proto_ids: Vec = proto_set.into_iter().collect(); + let mut proto_ids: Vec = proto_set.iter().cloned().collect(); proto_ids.sort(); - let mut field_ids: Vec = field_set.into_iter().collect(); + let mut field_ids: Vec = field_set.iter().cloned().collect(); field_ids.sort(); - let mut method_ids: Vec = method_set.into_iter().collect(); + let mut method_ids: Vec = method_set.iter().cloned().collect(); method_ids.sort(); - let mut type_lists: Vec> = type_list_set.into_iter().collect(); + let mut type_lists: Vec> = type_list_set.iter().cloned().collect(); + let index = DexIndex::new(&strings, &type_ids, &proto_ids, &field_ids, &method_ids); - for (i, fragment) in fragments.iter().enumerate() { - fragment.link_global_ids( - &strings, - &type_ids, - &proto_ids, - &field_ids, - &method_ids, - nb_method_handle_before[i], - ); + for (i, (class, fragment)) in fragments_in_file.iter().enumerate() { + fragment.link_global_ids(&index, nb_method_handle_before[i]); } - vec![] + Ok(vec![]) + } +} + +/// Index that associate descriptors to their id in the dex file. +#[derive(Debug, Clone)] +pub(crate) struct DexIndex<'a> { + pub strings: HashMap, + pub types: HashMap, + pub protos: HashMap, + pub fields: HashMap, + pub methods: HashMap, + pub strings_list: &'a [DexString], + pub type_ids_list: &'a [IdType], + pub proto_ids_list: &'a [IdMethodType], + pub field_ids_list: &'a [IdField], + pub method_ids_list: &'a [IdMethod], +} + +impl<'a> DexIndex<'a> { + fn new( + strings_list: &'a [DexString], + type_ids_list: &'a [IdType], + proto_ids_list: &'a [IdMethodType], + field_ids_list: &'a [IdField], + method_ids_list: &'a [IdMethod], + ) -> Self { + Self { + strings_list, + type_ids_list, + proto_ids_list, + field_ids_list, + method_ids_list, + strings: strings_list + .iter() + .cloned() + .enumerate() + .map(|(x, y)| (y, x)) + .collect(), + types: type_ids_list + .iter() + .cloned() + .enumerate() + .map(|(x, y)| (y, x)) + .collect(), + protos: proto_ids_list + .iter() + .cloned() + .enumerate() + .map(|(x, y)| (y, x)) + .collect(), + fields: field_ids_list + .iter() + .cloned() + .enumerate() + .map(|(x, y)| (y, x)) + .collect(), + methods: method_ids_list + .iter() + .cloned() + .enumerate() + .map(|(x, y)| (y, x)) + .collect(), + } } }