diff --git a/androscalpel/src/class.rs b/androscalpel/src/class.rs index dc9bec4..3e4adb6 100644 --- a/androscalpel/src/class.rs +++ b/androscalpel/src/class.rs @@ -291,14 +291,21 @@ impl Class { methods } - /// If a data section is needed for this class. - pub fn has_data_section(&self) -> bool { + /// If a data item is needed for this class. + pub fn has_data_item(&self) -> bool { !self.static_fields.is_empty() || !self.instance_fields.is_empty() || !self.direct_methods.is_empty() || !self.virtual_methods.is_empty() } + /// If a static_values array is needed for this class. + pub fn has_static_values_array(&self) -> bool { + self.static_fields + .values() + .any(|field| field.value.is_some()) + } + /// Return the binary representation of access flags. pub fn get_raw_access_flags(&self) -> u32 { let mut flags = 0u32; diff --git a/androscalpel/src/dex_id.rs b/androscalpel/src/dex_id.rs index efcb30b..6911d83 100644 --- a/androscalpel/src/dex_id.rs +++ b/androscalpel/src/dex_id.rs @@ -8,7 +8,7 @@ use std::hash::{Hash, Hasher}; use anyhow::anyhow; use pyo3::prelude::*; -use crate::{DexString, Result}; +use crate::{scalar::*, DexString, DexValue, Result}; use androscalpel_serializer::{StringDataItem, Uleb128}; #[pyclass] @@ -352,6 +352,34 @@ impl IdType { } } + /// Return the default value of the described type. (The value used for uninitialized field). + /// It can be either `0` of `null` depending on the type. + pub fn get_default_value(&self) -> Option { + if self.is_void() { + None + } else if self.is_boolean() { + Some(DexValue::Boolean(DexBoolean(false))) + } else if self.is_byte() { + Some(DexValue::Byte(DexByte(0))) + } else if self.is_short() { + Some(DexValue::Short(DexShort(0))) + } else if self.is_char() { + Some(DexValue::Char(DexChar(0))) + } else if self.is_int() { + Some(DexValue::Int(DexInt(0))) + } else if self.is_long() { + Some(DexValue::Long(DexLong(0))) + } else if self.is_float() { + Some(DexValue::Float(DexFloat(0.))) + } else if self.is_double() { + Some(DexValue::Double(DexDouble(0.))) + } else if self.is_class() || self.is_array() { + Some(DexValue::Null(DexNull)) + } else { + None + } + } + /// Return the shorty repr of this type (ID: the type if it's a scalar, 'L' else) pub fn get_shorty(&self) -> DexString { if self.is_void() diff --git a/androscalpel/src/dex_writer.rs b/androscalpel/src/dex_writer.rs index fc81392..f99b3f8 100644 --- a/androscalpel/src/dex_writer.rs +++ b/androscalpel/src/dex_writer.rs @@ -70,6 +70,10 @@ pub struct DexWriter { type_lists_index: HashMap, /// The type_lists section and the offset of the lists inside the section. type_lists_with_offset: Vec<(TypeList, u32)>, + /// The encoded_array_items section. + encoded_array_items: Vec, + /// The method_handles section. + method_handles: Vec, /// The map_list map_list: MapList, } @@ -121,6 +125,8 @@ impl Default for DexWriter { type_lists_index: HashMap::new(), type_lists_with_offset: vec![], map_list: MapList::default(), + encoded_array_items: vec![], + method_handles: vec![], } } } @@ -419,6 +425,202 @@ impl DexWriter { Ok(()) } + /// Convert a [`DexValue`] to an [`EncodedValue`]. + /// + /// # Warning + /// + /// This method can insert element in the dex file like method_handles. + pub fn dex_value_to_encoded_value(&mut self, value: &DexValue) -> Result { + match value { + DexValue::Byte(DexByte(val)) => Ok(EncodedValue::Byte(*val)), + DexValue::Short(DexShort(val)) => Ok(EncodedValue::Short(*val)), + DexValue::Char(DexChar(val)) => Ok(EncodedValue::Char(*val)), + DexValue::Int(DexInt(val)) => Ok(EncodedValue::Int(*val)), + DexValue::Long(DexLong(val)) => Ok(EncodedValue::Long(*val)), + DexValue::Float(DexFloat(val)) => Ok(EncodedValue::Float(*val)), + DexValue::Double(DexDouble(val)) => Ok(EncodedValue::Double(*val)), + DexValue::MethodType(val) => Ok(EncodedValue::MethodType( + *self.proto_ids.get(val).ok_or(anyhow!( + "Prototype {} not found in dex writer", + val.__repr__() + ))? as u32, + )), + DexValue::MethodHandle(val) => { + // TODO: move to a method + let idx = self.method_handles.len() as u32; + let (field_or_method_id, method_handle_type) = match val { + MethodHandle::StaticPut(StaticPut(field)) => ( + *self.field_ids.get(field).ok_or(anyhow!( + "Field {} not found in dex writer", + field.__repr__() + ))? as u16, + MethodHandleType::StaticPut, + ), + MethodHandle::StaticGet(StaticGet(field)) => ( + *self.field_ids.get(field).ok_or(anyhow!( + "Field {} not found in dex writer", + field.__repr__() + ))? as u16, + MethodHandleType::StaticGet, + ), + MethodHandle::InstancePut(InstancePut(field)) => ( + *self.field_ids.get(field).ok_or(anyhow!( + "Field {} not found in dex writer", + field.__repr__() + ))? as u16, + MethodHandleType::InstancePut, + ), + MethodHandle::InstanceGet(InstanceGet(field)) => ( + *self.field_ids.get(field).ok_or(anyhow!( + "Field {} not found in dex writer", + field.__repr__() + ))? as u16, + MethodHandleType::InstanceGet, + ), + MethodHandle::InvokeStatic(InvokeStatic(meth)) => ( + *self.method_ids.get(meth).ok_or(anyhow!( + "Method {} not found in dex writer", + meth.__repr__() + ))? as u16, + MethodHandleType::InvokeStatic, + ), + MethodHandle::InvokeInstance(InvokeInstance(meth)) => ( + *self.method_ids.get(meth).ok_or(anyhow!( + "Method {} not found in dex writer", + meth.__repr__() + ))? as u16, + MethodHandleType::InvokeInstance, + ), + MethodHandle::InvokeConstructor(InvokeConstructor(meth)) => ( + *self.method_ids.get(meth).ok_or(anyhow!( + "Method {} not found in dex writer", + meth.__repr__() + ))? as u16, + MethodHandleType::InvokeConstructor, + ), + MethodHandle::InvokeDirect(InvokeDirect(meth)) => ( + *self.method_ids.get(meth).ok_or(anyhow!( + "Method {} not found in dex writer", + meth.__repr__() + ))? as u16, + MethodHandleType::InvokeDirect, + ), + MethodHandle::InvokeInterface(InvokeInterface(meth)) => ( + *self.method_ids.get(meth).ok_or(anyhow!( + "Method {} not found in dex writer", + meth.__repr__() + ))? as u16, + MethodHandleType::InvokeInterface, + ), + }; + self.method_handles.push(MethodHandleItem { + method_handle_type, + field_or_method_id, + unused1: 0, + unused2: 0, + }); + Ok(EncodedValue::MethodHandle(idx)) + } + DexValue::String(val) => Ok(EncodedValue::String( + *self + .strings + .get(val) + .ok_or(anyhow!("String {} not found in dex writer", val.__repr__()))? + as u32, + )), + DexValue::Type(val) => Ok(EncodedValue::Type( + *self + .type_ids + .get(val) + .ok_or(anyhow!("Type {} not found in dex writer", val.__repr__()))? + as u32, + )), + DexValue::Field(val) => Ok(EncodedValue::Field( + *self + .field_ids + .get(val) + .ok_or(anyhow!("Field {} not found in dex writer", val.__repr__()))? + as u32, + )), + DexValue::Method(val) => Ok(EncodedValue::Method( + *self + .method_ids + .get(val) + .ok_or(anyhow!("Method {} not found in dex writer", val.__repr__()))? + as u32, + )), + DexValue::Enum(IdEnum(val)) => Ok(EncodedValue::Enum( + *self + .field_ids + .get(val) + .ok_or(anyhow!("Field {} not found in dex writer", val.__repr__()))? + as u32, + )), + DexValue::Array(DexArray(arr)) => { + let mut values = vec![]; + for val in arr { + values.push( + self.dex_value_to_encoded_value(val) + .context("Error while serializing a array")?, + ); + } + Ok(EncodedValue::Array(EncodedArray { values })) + } + DexValue::Annotation(_) => todo!(), // Ok(EncodedValue::Annotation(todo!())), + DexValue::Null(DexNull) => Ok(EncodedValue::Null), + DexValue::Boolean(DexBoolean(val)) => Ok(EncodedValue::Boolean(*val)), + } + } + + /// Insert an encoded_array in the encoded_array_item section. + fn insert_encoded_array_item(&mut self, DexArray(array): DexArray) -> Result<()> { + let mut values = vec![]; + for value in array { + values.push(self.dex_value_to_encoded_value(&value)?); + } + let item = EncodedArrayItem { + value: EncodedArray { values }, + }; + self.section_manager + .add_elt(Section::EncodedArrayItem, Some(item.size())); + self.encoded_array_items.push(item); + Ok(()) + } + + /// Insert the encoded_array_item encoding the static_values of a class. + fn insert_class_static_values(&mut self, class_id: &IdType) -> Result<()> { + let (class, _) = self.class_defs.get(class_id).unwrap(); + let mut static_fields: Vec = class.static_fields.keys().cloned().collect(); + static_fields.sort(); + let mut array = vec![]; + let mut last_defined_field_idx = 0; + for (idx, f) in static_fields.iter().enumerate() { + if class.static_fields.get(f).unwrap().value.is_some() { + last_defined_field_idx = idx; + } + } + for f in &static_fields[..last_defined_field_idx] { + let field = class.static_fields.get(f).unwrap(); + if let Some(val) = field.value.as_ref() { + array.push(val.clone()); + } else { + array.push(field.descriptor.type_.get_default_value().ok_or(anyhow!( + "The type {} (for field {} in class {}) does not have a default value", + field.descriptor.type_.__repr__(), + field.descriptor.__repr__(), + class_id.__repr__() + ))?); + } + } + self.insert_encoded_array_item(DexArray(array)) + .with_context(|| { + format!( + "Failed to serialize static values of class {}", + class_id.__repr__() + ) + }) + } + /// Insert a class_def_item in the class_defs section **and** the other struct that needs to be /// generated on the fly. /// @@ -433,7 +635,7 @@ impl DexWriter { .entry(class_id.clone()) .and_modify(|(_, i)| *i = idx); let (class, _) = self.class_defs.get(class_id).unwrap(); - let class_data_off = if class.has_data_section() { + let class_data_off = if class.has_data_item() { let class_data_off = self.section_manager.get_size(Section::ClassDataItem); self.insert_class_data_item(class_id)?; class_data_off @@ -443,6 +645,14 @@ impl DexWriter { // & vs &mut cluster-f, this make rust drop the ref so self hold by `class` before // mutating self with `insert_class_data_item`, and get a new ref afterward let (class, _) = self.class_defs.get(class_id).unwrap(); + let static_values_off = if class.has_static_values_array() { + let static_values_off = self.section_manager.get_size(Section::EncodedArrayItem); + self.insert_class_static_values(class_id)?; + static_values_off + } else { + 0 + }; + let (class, _) = self.class_defs.get(class_id).unwrap(); self.class_defs_list.push(ClassDefItem { class_idx: *self.type_ids.get(class_id).ok_or(anyhow!( "Type {} (type of class {}) not found in dex builder", @@ -470,9 +680,9 @@ impl DexWriter { NO_INDEX.0 }, - annotations_off: 0, // TODO - class_data_off, // need relinking once offset of class_data section is known - static_values_off: 0, // TODO + annotations_off: 0, // TODO + class_data_off, // need relinking once offset of class_data section is known + static_values_off, // need relinking once offset of encoded_array section is known }); self.section_manager.add_elt(Section::ClassDefItem, None); Ok(()) @@ -512,6 +722,12 @@ impl DexWriter { .add_elt(Section::TypeList, Some(list.size())); offset += list.size() as u32; } + // The next section requires alignment to 4 + while offset % 4 != 0 { + // Alignment + self.section_manager.incr_section_size(Section::TypeList, 1); + offset += 1; + } Ok(()) } @@ -626,7 +842,7 @@ impl DexWriter { fn link_class_data_occurences(&mut self) -> Result<()> { debug!("Link the class_data_item entries in class_def_items"); for (class, idx) in self.class_defs.values() { - if class.has_data_section() { + if class.has_data_item() { self.class_defs_list[*idx].class_data_off += self.section_manager.get_offset(Section::ClassDataItem); } @@ -634,6 +850,22 @@ impl DexWriter { Ok(()) } + /// Link the offsets of static_values_off in class_def_items. + /// + /// # Warning + /// + /// Linking can only occur once all sections are entirelly generated. + fn link_static_values(&mut self) -> Result<()> { + debug!("Link the static_values entries in class_def_items"); + for (class, idx) in self.class_defs.values() { + if class.has_static_values_array() { + self.class_defs_list[*idx].class_data_off += + self.section_manager.get_offset(Section::EncodedArrayItem); + } + } + Ok(()) + } + fn write_dex_file(&mut self, writer: &mut dyn Write) -> Result<()> { // TODO: SPLIT THIS IN METHODS !!! self.section_manager.reset(); @@ -651,15 +883,12 @@ impl DexWriter { } self.gen_type_list_section()?; - // TODO: move to attributes - // Method handles are not ordered, nor deduplicated, so they are generated on the fly - let mut _method_handles: Vec = vec![]; - self.get_map_list()?; self.link_header(); self.link_type_list_occurences()?; self.link_class_data_occurences()?; + self.link_static_values()?; debug!("Serialize the dex file"); // TODO: compute checksum, hash, ect @@ -695,13 +924,13 @@ impl DexWriter { } // TODO: CallSiteIdItem, // MethodHandleItem section - for handle in _method_handles { + for handle in &self.method_handles { handle.serialize(writer)?; } // MapList self.map_list.serialize(writer)?; // TypeList, - let mut offset = 0; + let mut offset = 0; // the sections are always aligned until the type_lists for (list, _) in &self.type_lists_with_offset { while offset % 4 != 0 { offset += 1; @@ -710,6 +939,12 @@ impl DexWriter { offset += list.size(); list.serialize(writer)?; } + // The next section requires alignment to 4 + while offset % 4 != 0 { + // Alignment + self.section_manager.incr_section_size(Section::TypeList, 1); + offset += 1; + } // TODO: AnnotationSetRefList, // TODO: AnnotationSetItem, // ClassDataItem section @@ -723,7 +958,10 @@ impl DexWriter { // TODO: DebugInfoItem, // TODO: AnnotationItem, - // TODO: EncodedArrayItem, + // TODO: EncodedArrayItem: partialy done + for array in &self.encoded_array_items { + array.serialize(writer)?; + } // TODO: AnnotationsDirectoryItem, // TODO: HiddenapiClassDataItem,