generate code when linking class data item

This commit is contained in:
Jean-Marie 'Histausse' Mineau 2024-03-22 18:50:43 +01:00
parent 232b906db1
commit e78a67d1d1
Signed by: histausse
GPG key ID: B66AEEDA9B645AD2
2 changed files with 243 additions and 109 deletions

View file

@ -10,6 +10,7 @@ use crate::Result;
use crate::*; use crate::*;
use androscalpel_serializer::*; use androscalpel_serializer::*;
use crate::dex_writer::DexIndex;
use crate::ins::{CallSite, Instruction}; use crate::ins::{CallSite, Instruction};
use crate::instructions::*; use crate::instructions::*;
use androscalpel_serializer::Instruction as InsFormat; use androscalpel_serializer::Instruction as InsFormat;
@ -572,12 +573,13 @@ impl DexFragment {
Ok(()) Ok(())
} }
// TODO: find if there is a way to efficiently link code item.
/// Insert a code_item. /// Insert a code_item.
/// ///
/// # Warning /// This item cannot be cached, because the jump instructions depend on the size of
/// /// instructions that depend on the size of the descriptor ids that depend on the
/// This is currently a stub that probably serialize invalid references to data. /// list of all descriptors in the dex file.
fn insert_code_item(&mut self, code: &Code, index: &FragIndex) -> Result<()> { fn insert_code_item(&mut self, code: &Code, index: &DexIndex) -> Result<()> {
// Estimate instructions addresses // Estimate instructions addresses
let mut min_addr = 0; let mut min_addr = 0;
let mut max_addr = 0; let mut max_addr = 0;
@ -1563,11 +1565,14 @@ impl DexFragment {
let access_flags = let access_flags =
Uleb128(class.direct_methods.get(id).unwrap().get_raw_access_flags()); Uleb128(class.direct_methods.get(id).unwrap().get_raw_access_flags());
let code_off = if let Some(code) = &class.direct_methods.get(id).unwrap().code { let code_off = if let Some(code) = &class.direct_methods.get(id).unwrap().code {
let code_off = self.section_manager.get_aligned_size(FragSection::CodeItem); // CodeItems depend to much on the complte descriptor list to be generated
self.insert_code_item(code, index).with_context(|| { // prior to linking
format!("Failed to convert serialize code of {}", id.__str__()) //let code_off = self.section_manager.get_aligned_size(FragSection::CodeItem);
})?; //self.insert_code_item(code, index).with_context(|| {
Uleb128(code_off + 1) // format!("Failed to convert serialize code of {}", id.__str__())
//})?;
//Uleb128(code_off + 1)
Uleb128(1)
} else { } else {
Uleb128(0) Uleb128(0)
}; };
@ -1597,11 +1602,14 @@ impl DexFragment {
.get_raw_access_flags(), .get_raw_access_flags(),
); );
let code_off = if let Some(code) = &class.virtual_methods.get(id).unwrap().code { let code_off = if let Some(code) = &class.virtual_methods.get(id).unwrap().code {
let code_off = self.section_manager.get_aligned_size(FragSection::CodeItem); // CodeItems depend to much on the complte descriptor list to be generated
self.insert_code_item(code, index).with_context(|| { // prior to linking
format!("Failed to convert serialize code of {}", id.__str__()) // let code_off = self.section_manager.get_aligned_size(FragSection::CodeItem);
})?; // self.insert_code_item(code, index).with_context(|| {
Uleb128(code_off + 1) // format!("Failed to convert serialize code of {}", id.__str__())
// })?;
// Uleb128(code_off + 1)
Uleb128(1)
} else { } else {
Uleb128(0) Uleb128(0)
}; };
@ -1884,21 +1892,19 @@ impl DexFragment {
/// linked before. /// linked before.
pub fn link_global_ids( pub fn link_global_ids(
&mut self, &mut self,
global_strings: &[DexString], class: &Class,
global_type_ids: &[IdType], index: &DexIndex,
global_proto_ids: &[IdMethodType],
global_field_ids: &[IdField],
global_method_ids: &[IdMethod],
nb_method_handle_before_fragment: usize, nb_method_handle_before_fragment: usize,
) -> Result<()> { ) -> Result<()> {
self.link_state.start_linking_idx()?; self.link_state.start_linking_idx()?;
let string_reindex = Vec::with_capacity(self.strings.len()); let string_reindex = Vec::with_capacity(self.strings.len());
// TODO: considering we have the map, this can be simplified a lot
let mut global_idx = 0; let mut global_idx = 0;
for s in self.strings { for s in self.strings {
while global_idx < global_strings.len() && global_strings[global_idx] != s { while global_idx < index.strings_list.len() && index.strings_list[global_idx] != s {
global_idx += 1; global_idx += 1;
} }
if global_idx == global_strings.len() { if global_idx == index.strings_list.len() {
bail!("String {} not found in global index", s.__str__()); bail!("String {} not found in global index", s.__str__());
} }
string_reindex.push(global_idx as u32); string_reindex.push(global_idx as u32);
@ -1906,10 +1912,10 @@ impl DexFragment {
let type_reindex = Vec::with_capacity(self.type_ids.len()); let type_reindex = Vec::with_capacity(self.type_ids.len());
let mut global_idx = 0; let mut global_idx = 0;
for ty in self.type_ids { for ty in self.type_ids {
while global_idx < global_type_ids.len() && global_type_ids[global_idx] != ty { while global_idx < index.type_ids_list.len() && index.type_ids_list[global_idx] != ty {
global_idx += 1; global_idx += 1;
} }
if global_idx == global_type_ids.len() { if global_idx == index.type_ids_list.len() {
bail!("Type {} not found in global index", ty.__str__()); bail!("Type {} not found in global index", ty.__str__());
} }
type_reindex.push(global_idx as u32); type_reindex.push(global_idx as u32);
@ -1917,10 +1923,12 @@ impl DexFragment {
let proto_reindex = Vec::with_capacity(self.proto_ids.len()); let proto_reindex = Vec::with_capacity(self.proto_ids.len());
let mut global_idx = 0; let mut global_idx = 0;
for proto in self.proto_ids { for proto in self.proto_ids {
while global_idx < global_proto_ids.len() && global_proto_ids[global_idx] != proto { while global_idx < index.proto_ids_list.len()
&& index.proto_ids_list[global_idx] != proto
{
global_idx += 1; global_idx += 1;
} }
if global_idx == global_proto_ids.len() { if global_idx == index.proto_ids_list.len() {
bail!("Prototype {} not found in global index", proto.__str__()); bail!("Prototype {} not found in global index", proto.__str__());
} }
proto_reindex.push(global_idx as u32); proto_reindex.push(global_idx as u32);
@ -1928,10 +1936,12 @@ impl DexFragment {
let field_reindex = Vec::with_capacity(self.field_ids.len()); let field_reindex = Vec::with_capacity(self.field_ids.len());
let mut global_idx = 0; let mut global_idx = 0;
for field in self.field_ids { for field in self.field_ids {
while global_idx < global_field_ids.len() && global_field_ids[global_idx] != field { while global_idx < index.field_ids_list.len()
&& index.field_ids_list[global_idx] != field
{
global_idx += 1; global_idx += 1;
} }
if global_idx == global_field_ids.len() { if global_idx == index.field_ids_list.len() {
bail!("Field {} not found in global index", field.__str__()); bail!("Field {} not found in global index", field.__str__());
} }
field_reindex.push(global_idx as u16); field_reindex.push(global_idx as u16);
@ -1939,18 +1949,23 @@ impl DexFragment {
let method_reindex = Vec::with_capacity(self.method_ids.len()); let method_reindex = Vec::with_capacity(self.method_ids.len());
let mut global_idx = 0; let mut global_idx = 0;
for meth in self.method_ids { for meth in self.method_ids {
while global_idx < global_method_ids.len() && global_method_ids[global_idx] != meth { while global_idx < index.method_ids_list.len()
&& index.method_ids_list[global_idx] != meth
{
global_idx += 1; global_idx += 1;
} }
if global_idx == global_method_ids.len() { if global_idx == index.method_ids_list.len() {
bail!("Method {} not found in global index", meth.__str__()); bail!("Method {} not found in global index", meth.__str__());
} }
method_reindex.push(global_idx as u16); method_reindex.push(global_idx as u16);
} }
self.link_id_class_data_and_gen_code(class, &field_reindex, &method_reindex, index)?;
self.link_id_class_def(&string_reindex, &type_reindex); self.link_id_class_def(&string_reindex, &type_reindex);
self.link_id_method_handle(&field_reindex, &method_reindex); self.link_id_method_handle(&field_reindex, &method_reindex);
// GEN CODE
todo!() todo!()
} }
@ -1977,51 +1992,6 @@ impl DexFragment {
} }
} }
fn link_id_code(&mut self, string_reindex: &[u32], type_reindex: &[u32]) {
let mut total_size = 0;
let mut code_item_relocation = if let FragLinkState::LinkedIdx {
code_item_relocation,
..
} = self.link_state
{
code_item_relocation
} else {
// link_global_ids() should prevent that
panic!("link_id_code should not be run outside of fn link_global_ids(..)");
};
for code in self.code_items {
let current_size = code.size();
for ins in &mut code.insns {
Self::link_id_ins(ins, string_reindex);
}
// TODO: TryItem recompute handler_off
if let Some(handlers) = code.handlers {
let mut handler_off_reindex = HashMap::new();
let mut current_offset = handlers.size_field().size();
let mut old_offset = handlers.size_field().size();
for handlers in handlers.list {
handler_off_reindex.insert(old_offset as u16, current_offset as u16);
old_offset += handlers.size();
for handler in handlers.handlers {
handler.type_idx.0 = type_reindex[handler.type_idx.0 as usize];
}
current_offset += handlers.size();
}
for try_ in code.tries {
try_.handler_off = *handler_off_reindex
.get(&try_.handler_off)
.expect("Something whent wrong with the handle reindexing");
}
}
}
}
fn link_id_ins(ins: &mut InsFormat, string_reindex: &[u32]) {
match ins {
InsFormat::Format31C { op: 0x1b, b, .. } => *b = string_reindex[b as usize],
InsFormat::Format21C { op: 0x1a, b, .. } => todo!(), // TODO FUCK this
_ => todo!(),
}
}
fn link_id_string_data(&mut self) { fn link_id_string_data(&mut self) {
todo!() todo!()
} }
@ -2037,8 +2007,103 @@ impl DexFragment {
fn link_id_annotation_dir(&mut self) { fn link_id_annotation_dir(&mut self) {
todo!() todo!()
} }
fn link_id_class_data(&mut self) {
todo!() /// Link ids in [`ClassDataItem`] *and* generate the [`CodeItem`].
fn link_id_class_data_and_gen_code(
&mut self,
class: &Class,
field_reindex: &[u16],
method_reindex: &[u16],
index: &DexIndex,
) -> Result<()> {
if let Some(data) = self.class_data {
let mut last_local_id = 0;
let mut last_global_id = 0;
for field in data.static_fields {
let new_local_id = last_local_id + field.field_idx_diff.0;
let new_global_id = field_reindex[new_local_id as usize];
field.field_idx_diff.0 = (new_global_id - last_global_id) as u32;
last_local_id = new_local_id;
last_global_id = new_global_id;
}
let mut last_local_id = 0;
let mut last_global_id = 0;
for field in data.instance_fields {
let new_local_id = last_local_id + field.field_idx_diff.0;
let new_global_id = field_reindex[new_local_id as usize];
field.field_idx_diff.0 = (new_global_id - last_global_id) as u32;
last_local_id = new_local_id;
last_global_id = new_global_id;
}
let mut last_local_id = 0;
let mut last_global_id = 0;
for meth in data.direct_methods {
let new_local_id = last_local_id + meth.method_idx_diff.0;
let new_global_id = field_reindex[new_local_id as usize];
meth.method_idx_diff.0 = (new_global_id - last_global_id) as u32;
if meth.code_off.0 != 0 {
let meth_id = index.method_ids_list[new_global_id as usize];
let code = class
.direct_methods
.get(&meth_id)
.ok_or(anyhow!(
"direct method {} expected from fragment but not found in {}",
meth_id.__str__(),
class.__str__()
))?
.code;
if let Some(code) = code {
let code_off = self.section_manager.get_aligned_size(FragSection::CodeItem);
self.insert_code_item(&code, index)?;
meth.code_off.0 = code_off + 1;
} else {
bail!(
"Inconsistant fragment: fragment expect a code item for {}\
but none was found in {}",
meth_id.__str__(),
class.__str__()
);
}
}
last_local_id = new_local_id;
last_global_id = new_global_id;
}
let mut last_local_id = 0;
let mut last_global_id = 0;
for meth in data.virtual_methods {
let new_local_id = last_local_id + meth.method_idx_diff.0;
let new_global_id = field_reindex[new_local_id as usize];
meth.method_idx_diff.0 = (new_global_id - last_global_id) as u32;
if meth.code_off.0 != 0 {
let meth_id = index.method_ids_list[new_global_id as usize];
let code = class
.virtual_methods
.get(&meth_id)
.ok_or(anyhow!(
"virtual method {} expected from fragment but not found in {}",
meth_id.__str__(),
class.__str__()
))?
.code;
if let Some(code) = code {
let code_off = self.section_manager.get_aligned_size(FragSection::CodeItem);
self.insert_code_item(&code, index)?;
meth.code_off.0 = code_off + 1;
} else {
bail!(
"Inconsistant fragment: fragment expect a code item for {}\
but none was found in {}",
meth_id.__str__(),
class.__str__()
);
}
}
last_local_id = new_local_id;
last_global_id = new_global_id;
}
}
Ok(())
} }
} }
@ -2305,7 +2370,7 @@ impl FragSectionManager {
} }
} }
/// Index that associate a type to its local id in a fragment. /// Index that associate descriptors to their local id in a fragment.
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
struct FragIndex { struct FragIndex {
pub strings: HashMap<DexString, usize>, pub strings: HashMap<DexString, usize>,

View file

@ -1,34 +1,38 @@
//! The structure that generate a .dex from classes. //! The structure that generate a .dex from classes.
use std::collections::{HashSet, VecDeque}; use std::collections::{HashMap, HashSet, VecDeque};
use crate::dex_fragment::DexFragment; use crate::dex_fragment::DexFragment;
use crate::{DexString, IdField, IdMethod, IdMethodType, IdType}; use crate::{Class, DexString, IdField, IdMethod, IdMethodType, IdType, Result};
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct DexWriter { pub struct DexWriter<'a> {
fragments: VecDeque<DexFragment>, classes: VecDeque<&'a Class>,
} }
impl Default for DexWriter { impl<'a> Default for DexWriter<'a> {
fn default() -> Self { fn default() -> Self {
Self { Self {
fragments: VecDeque::new(), classes: VecDeque::new(),
} }
} }
} }
impl DexWriter { impl<'a> DexWriter<'a> {
pub fn new() -> Self { pub fn new() -> Self {
Self::default() Self::default()
} }
pub fn empty(&self) -> bool { pub fn empty(&self) -> bool {
self.fragments.is_empty() self.classes.is_empty()
}
pub fn add_class(&mut self, class: &'a Class) {
self.classes.push_back(class)
} }
/// Take as many fragments as possible and convert them to a dex file. /// Take as many fragments as possible and convert them to a dex file.
pub fn generate_next_dex_file(&mut self) -> Vec<u8> { pub fn generate_next_dex_file(&mut self) -> Result<Vec<u8>> {
let mut fragments = vec![]; let mut fragments_in_file = vec![];
let mut string_set: HashSet<DexString> = HashSet::new(); let mut string_set: HashSet<DexString> = HashSet::new();
let mut type_set: HashSet<IdType> = HashSet::new(); let mut type_set: HashSet<IdType> = HashSet::new();
let mut proto_set: HashSet<IdMethodType> = HashSet::new(); let mut proto_set: HashSet<IdMethodType> = HashSet::new();
@ -37,8 +41,16 @@ impl DexWriter {
let mut type_list_set: HashSet<Vec<IdType>> = HashSet::new(); let mut type_list_set: HashSet<Vec<IdType>> = HashSet::new();
let mut nb_method_handle = 0; let mut nb_method_handle = 0;
let mut nb_method_handle_before = vec![]; let mut nb_method_handle_before = vec![];
let fragments: VecDeque<(&'a Class, DexFragment)> = self
.classes
.into_iter()
.map(|class| match DexFragment::new(class) {
Ok(frag) => Ok((class, frag)),
Err(err) => Err(err),
})
.collect()?;
loop { loop {
let new_fragment = if let Some(new_fragment) = self.fragments.pop_front() { let (class, new_fragment) = if let Some(new_fragment) = fragments.pop_front() {
new_fragment new_fragment
} else { } else {
break; break;
@ -52,7 +64,7 @@ impl DexWriter {
.count() .count()
> u16::MAX as usize > u16::MAX as usize
{ {
self.fragments.push_front(new_fragment); fragments.push_front((class, new_fragment));
break; break;
} }
if proto_set.len() + new_fragment.proto_ids().len() > u16::MAX as usize if proto_set.len() + new_fragment.proto_ids().len() > u16::MAX as usize
@ -64,7 +76,7 @@ impl DexWriter {
.count() .count()
> u16::MAX as usize > u16::MAX as usize
{ {
self.fragments.push_front(new_fragment); fragments.push_front((class, new_fragment));
break; break;
} }
if field_set.len() + new_fragment.field_ids().len() > u16::MAX as usize if field_set.len() + new_fragment.field_ids().len() > u16::MAX as usize
@ -76,7 +88,7 @@ impl DexWriter {
.count() .count()
> u16::MAX as usize > u16::MAX as usize
{ {
self.fragments.push_front(new_fragment); fragments.push_front((class, new_fragment));
break; break;
} }
if method_set.len() + new_fragment.method_ids().len() > u16::MAX as usize if method_set.len() + new_fragment.method_ids().len() > u16::MAX as usize
@ -88,7 +100,7 @@ impl DexWriter {
.count() .count()
> u16::MAX as usize > u16::MAX as usize
{ {
self.fragments.push_front(new_fragment); fragments.push_front((class, new_fragment));
break; break;
} }
string_set.extend(new_fragment.strings().iter().cloned()); string_set.extend(new_fragment.strings().iter().cloned());
@ -99,32 +111,89 @@ impl DexWriter {
type_list_set.insert(new_fragment.interfaces().to_vec()); type_list_set.insert(new_fragment.interfaces().to_vec());
nb_method_handle_before.push(nb_method_handle); nb_method_handle_before.push(nb_method_handle);
nb_method_handle += new_fragment.method_handles().len(); nb_method_handle += new_fragment.method_handles().len();
fragments.push(new_fragment); fragments_in_file.push((class, new_fragment));
} }
type_list_set.extend(proto_set.iter().map(|proto| proto.parameters.clone())); type_list_set.extend(proto_set.iter().map(|proto| proto.parameters.clone()));
let mut strings: Vec<DexString> = string_set.into_iter().collect(); let mut strings: Vec<DexString> = string_set.iter().cloned().collect();
strings.sort(); strings.sort();
let mut type_ids: Vec<IdType> = type_set.into_iter().collect(); let mut type_ids: Vec<IdType> = type_set.iter().cloned().collect();
type_ids.sort(); type_ids.sort();
let mut proto_ids: Vec<IdMethodType> = proto_set.into_iter().collect(); let mut proto_ids: Vec<IdMethodType> = proto_set.iter().cloned().collect();
proto_ids.sort(); proto_ids.sort();
let mut field_ids: Vec<IdField> = field_set.into_iter().collect(); let mut field_ids: Vec<IdField> = field_set.iter().cloned().collect();
field_ids.sort(); field_ids.sort();
let mut method_ids: Vec<IdMethod> = method_set.into_iter().collect(); let mut method_ids: Vec<IdMethod> = method_set.iter().cloned().collect();
method_ids.sort(); method_ids.sort();
let mut type_lists: Vec<Vec<IdType>> = type_list_set.into_iter().collect(); let mut type_lists: Vec<Vec<IdType>> = type_list_set.iter().cloned().collect();
let index = DexIndex::new(&strings, &type_ids, &proto_ids, &field_ids, &method_ids);
for (i, fragment) in fragments.iter().enumerate() { for (i, (class, fragment)) in fragments_in_file.iter().enumerate() {
fragment.link_global_ids( fragment.link_global_ids(&index, nb_method_handle_before[i]);
&strings,
&type_ids,
&proto_ids,
&field_ids,
&method_ids,
nb_method_handle_before[i],
);
} }
vec![] Ok(vec![])
}
}
/// Index that associate descriptors to their id in the dex file.
#[derive(Debug, Clone)]
pub(crate) struct DexIndex<'a> {
pub strings: HashMap<DexString, usize>,
pub types: HashMap<IdType, usize>,
pub protos: HashMap<IdMethodType, usize>,
pub fields: HashMap<IdField, usize>,
pub methods: HashMap<IdMethod, usize>,
pub strings_list: &'a [DexString],
pub type_ids_list: &'a [IdType],
pub proto_ids_list: &'a [IdMethodType],
pub field_ids_list: &'a [IdField],
pub method_ids_list: &'a [IdMethod],
}
impl<'a> DexIndex<'a> {
fn new(
strings_list: &'a [DexString],
type_ids_list: &'a [IdType],
proto_ids_list: &'a [IdMethodType],
field_ids_list: &'a [IdField],
method_ids_list: &'a [IdMethod],
) -> Self {
Self {
strings_list,
type_ids_list,
proto_ids_list,
field_ids_list,
method_ids_list,
strings: strings_list
.iter()
.cloned()
.enumerate()
.map(|(x, y)| (y, x))
.collect(),
types: type_ids_list
.iter()
.cloned()
.enumerate()
.map(|(x, y)| (y, x))
.collect(),
protos: proto_ids_list
.iter()
.cloned()
.enumerate()
.map(|(x, y)| (y, x))
.collect(),
fields: field_ids_list
.iter()
.cloned()
.enumerate()
.map(|(x, y)| (y, x))
.collect(),
methods: method_ids_list
.iter()
.cloned()
.enumerate()
.map(|(x, y)| (y, x))
.collect(),
}
} }
} }