add type lists to generated dex files

This commit is contained in:
Jean-Marie Mineau 2023-12-01 18:08:30 +01:00
parent 211402ae9d
commit 57d6b38746
Signed by: histausse
GPG key ID: B66AEEDA9B645AD2
11 changed files with 338 additions and 25 deletions

View file

@ -3,7 +3,7 @@ use std::collections::{HashMap, HashSet};
use pyo3::prelude::*;
use crate::{dex_id::IdType, value::DexValue, DexString};
use crate::{dex_id::IdType, value::DexValue, DexString, IdMethodType};
/// Annotation with a visibility
#[pyclass]
@ -71,6 +71,11 @@ impl DexAnnotationItem {
pub fn get_all_types(&self) -> HashSet<IdType> {
self.annotation.get_all_types()
}
/// Return all prototypes referenced in the annotation.
pub fn get_all_protos(&self) -> HashSet<IdMethodType> {
self.annotation.get_all_protos()
}
}
/// An annotation.
@ -128,4 +133,13 @@ impl DexAnnotation {
}
types
}
/// Return all prototypes referenced in the annotation.
pub fn get_all_protos(&self) -> HashSet<IdMethodType> {
let mut protos = HashSet::new();
for value in self.elements.values() {
protos.extend(value.get_all_protos());
}
protos
}
}

View file

@ -4,7 +4,9 @@ use std::collections::{HashMap, HashSet};
use pyo3::prelude::*;
use crate::{DexAnnotationItem, DexString, Field, IdField, IdMethod, IdType, Method, Result};
use crate::{
DexAnnotationItem, DexString, Field, IdField, IdMethod, IdMethodType, IdType, Method, Result,
};
/// Represent an apk
#[pyclass]
@ -190,4 +192,27 @@ impl Class {
}
types
}
/// Return all protoypes referenced in the class.
pub fn get_all_protos(&self) -> HashSet<IdMethodType> {
let mut protos = HashSet::new();
for field in self.static_fields.values() {
protos.extend(field.get_all_protos());
}
for field in self.instance_fields.values() {
protos.extend(field.get_all_protos());
}
for (id, method) in &self.direct_methods {
protos.extend(id.get_all_protos());
protos.extend(method.get_all_protos());
}
for (id, method) in &self.virtual_methods {
protos.extend(id.get_all_protos());
protos.extend(method.get_all_protos());
}
for annot in &self.annotations {
protos.extend(annot.get_all_protos());
}
protos
}
}

View file

@ -4,7 +4,7 @@ use std::collections::HashSet;
use pyo3::prelude::*;
use crate::{DexString, IdType};
use crate::{DexString, IdMethodType, IdType};
// TODO: make this easy to edit/manipulate, maybe move to Method
@ -84,4 +84,10 @@ impl Code {
}
types
}
/// Return all prototypes referenced in the codes.
pub fn get_all_protos(&self) -> HashSet<IdMethodType> {
// TODO
HashSet::new()
}
}

View file

@ -1,6 +1,6 @@
//! The class identifying dex structure.
use std::cmp::{Ord, PartialOrd};
use std::cmp::{Ord, Ordering, PartialOrd};
use std::collections::hash_map::DefaultHasher;
use std::collections::HashSet;
use std::hash::{Hash, Hasher};
@ -21,6 +21,21 @@ pub struct IdMethodType {
pub(crate) parameters: Vec<IdType>,
}
impl Ord for IdMethodType {
fn cmp(&self, other: &Self) -> Ordering {
self.return_type
.cmp(&other.return_type)
.then(self.parameters.cmp(&other.parameters))
.then(self.shorty.cmp(&other.shorty)) //should not have an influence if generated correctly
}
}
impl PartialOrd for IdMethodType {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
#[pymethods]
/// The type of a method. The shorty is formated as described in
/// <https://source.android.com/docs/core/runtime/dex-format#shortydescriptor>
@ -90,6 +105,13 @@ impl IdMethodType {
types.extend(self.parameters.clone());
types
}
/// Return all prototypes referenced in the Id.
pub fn get_all_protos(&self) -> HashSet<IdMethodType> {
let mut protos = HashSet::new();
protos.insert(self.clone());
protos
}
}
impl IdMethodType {
@ -523,6 +545,13 @@ impl IdMethod {
types.insert(self.class_.clone());
types
}
/// Return all prototypes referenced in the Id.
pub fn get_all_protos(&self) -> HashSet<IdMethodType> {
let mut protos = HashSet::new();
protos.insert(self.proto.clone());
protos
}
}
#[pyclass]

View file

@ -3,7 +3,7 @@
use std::collections::HashMap;
use std::io::{Cursor, Write};
//use anyhow::bail;
use anyhow::{anyhow, Context};
use crate::Result;
use crate::*;
@ -14,7 +14,7 @@ pub struct DexWriter {
header: HeaderItem,
strings: HashMap<DexString, usize>,
type_ids: HashMap<IdType, usize>,
_proto_ids: HashMap<IdMethodType, usize>,
proto_ids: HashMap<IdMethodType, usize>,
_field_ids: HashMap<IdField, usize>,
_method_ids: HashMap<IdMethod, usize>,
// TODO: composite classes need a struct for storing link data
@ -60,7 +60,7 @@ impl Default for DexWriter {
},
strings: HashMap::new(),
type_ids: HashMap::new(),
_proto_ids: HashMap::new(),
proto_ids: HashMap::new(),
_field_ids: HashMap::new(),
_method_ids: HashMap::new(),
}
@ -88,18 +88,24 @@ impl DexWriter {
* and len(type) <= len(string) anyway
let mut future_type_set = new_types.clone();
future_type_set.extend(self.type_ids.keys().cloned());
// TODO: they are ref to type as u16, check
if future_type_set.len() >= u32::MAX as usize {
// TODO return structured error to handle this case by generating multiple dex files
bail!("To many types for one dex file");
}
*/
let new_protos = class.get_all_protos();
for string in new_strings {
self.strings.insert(string, 0);
}
for ty in new_types {
self.type_ids.insert(ty, 0);
}
for proto in new_protos {
self.proto_ids.insert(proto, 0);
}
Ok(())
}
@ -135,11 +141,73 @@ impl DexWriter {
self.type_ids.entry(ty.clone()).and_modify(|val| *val = idx);
section_manager.add_elt(Section::TypeIdItem, None);
}
let type_ids_list: Vec<TypeIdItem> = (0..type_ids_list.len())
.map(|idx| TypeIdItem {
descriptor_idx: idx as u32,
})
.collect();
let type_ids_list: Vec<TypeIdItem> = {
let mut type_ids_list_aux = vec![];
for ty in type_ids_list.into_iter() {
type_ids_list_aux.push(TypeIdItem {
descriptor_idx: *self.strings.get(&ty.0).ok_or(anyhow!(
"String {} (name of type {}) not found in dex builder",
ty.0.__repr__(),
ty.__repr__()
))? as u32,
});
}
type_ids_list_aux
};
// Sort prototype and generate ProtoIdItem
let mut proto_ids_list: Vec<IdMethodType> = self.proto_ids.keys().cloned().collect();
proto_ids_list.sort();
for (idx, proto) in proto_ids_list.iter().enumerate() {
self.proto_ids
.entry(proto.clone())
.and_modify(|val| *val = idx);
section_manager.add_elt(Section::ProtoIdItem, None);
}
let mut proto_ids_list = {
let mut proto_ids_list_aux = vec![];
for proto in proto_ids_list {
proto_ids_list_aux.push(ProtoIdItem {
shorty_idx: *self.strings.get(&proto.shorty).ok_or(anyhow!(
"String {}, (shorty of prototype {}) not found in dex builder",
proto.shorty.__repr__(),
proto.__repr__()
))? as u32,
return_type_idx: *self.type_ids.get(&proto.return_type).ok_or(anyhow!(
"Type {}, (return type of prototype {}) not found in dex builder",
proto.shorty.__repr__(),
proto.__repr__()
))? as u32,
parameters_off: 0, // TO BE LINKED LATTER
});
}
proto_ids_list_aux
};
// Generate TypeLists
let mut type_lists_index = HashMap::new();
for proto in self.proto_ids.keys() {
if !proto.parameters.is_empty() {
let type_list = self.gen_type_list(&proto.parameters).with_context(|| {
format!("Failed to generate param list for {}", proto.__repr__())
})?;
type_lists_index.insert(type_list, 0);
}
}
// TODO add class.interface in type list
let mut offset = 0;
let mut type_lists_and_local_offsets = vec![];
for (i, (list, idx)) in type_lists_index.iter_mut().enumerate() {
while offset % 4 != 0 {
// Alignment
section_manager.incr_section_size(Section::TypeList, 1);
offset += 1;
}
*idx = i;
type_lists_and_local_offsets.push((list.clone(), offset));
section_manager.add_elt(Section::TypeList, Some(list.size()));
offset += list.size();
}
// Populate map_list
let map_item_size = MapItem {
@ -159,6 +227,13 @@ impl DexWriter {
let mut map_list = MapList::default();
for section in Section::VARIANT_LIST {
if !section.is_data() && section_manager.get_nb_elt(*section) != 0 {
/*
match section {
// Alignment
// Until Section::MapList included, the section are naturally alligned to 4
_ => (),
}
*/
map_list.list.push(MapItem {
type_: section.get_map_item_type(),
unused: 0,
@ -170,21 +245,34 @@ impl DexWriter {
// Link Header section:
self.header.map_off = section_manager.get_offset(Section::MapList);
self.header.string_ids_size = string_ids_list.len() as u32;
self.header.string_ids_size = section_manager.get_nb_elt(Section::StringIdItem) as u32;
self.header.string_ids_off = section_manager.get_offset(Section::StringIdItem);
self.header.type_ids_size = 0; // TODO
self.header.type_ids_size = section_manager.get_nb_elt(Section::TypeIdItem) as u32;
self.header.type_ids_off = section_manager.get_offset(Section::TypeIdItem);
self.header.proto_ids_size = 0; // TODO
self.header.proto_ids_size = section_manager.get_nb_elt(Section::ProtoIdItem) as u32;
self.header.proto_ids_off = section_manager.get_offset(Section::ProtoIdItem);
self.header.field_ids_size = 0; // TODO
self.header.field_ids_size = section_manager.get_nb_elt(Section::FieldIdItem) as u32;
self.header.field_ids_off = section_manager.get_offset(Section::FieldIdItem);
self.header.method_ids_size = 0; // TODO
self.header.method_ids_size = section_manager.get_nb_elt(Section::MethodIdItem) as u32;
self.header.method_ids_off = section_manager.get_offset(Section::MethodIdItem);
self.header.class_defs_size = 0; // TODO
self.header.class_defs_size = section_manager.get_nb_elt(Section::ClassDefItem) as u32;
self.header.class_defs_off = section_manager.get_offset(Section::ClassDefItem);
self.header.data_size = section_manager.get_size(Section::Data);
self.header.data_off = section_manager.get_offset(Section::Data);
// TODO: link TypeList in ProtoIdItems and ClassDefItems
for (proto, idx) in &self.proto_ids {
if !proto.parameters.is_empty() {
let type_list = self.gen_type_list(&proto.parameters).with_context(|| {
format!("Failed to generate param list for {}", proto.__repr__())
})?;
let offset = section_manager.get_offset(Section::TypeList)
+ type_lists_and_local_offsets[*type_lists_index.get(&type_list).unwrap()].1
as u32;
proto_ids_list[*idx].parameters_off = offset;
}
}
// TODO: compute checksum, hash, ect
self.header.serialize(writer)?;
// StringIdItem section
@ -196,10 +284,14 @@ impl DexWriter {
str_id.serialize(writer)?;
string_off += string.size() as u32;
}
// TypeId section
for ty in type_ids_list {
ty.serialize(writer)?;
}
// TODO: ProtoIdItem,
// ProtoId section
for proto in proto_ids_list {
proto.serialize(writer)?;
}
// TODO: FieldIdItem,
// TODO: MethodIdItem,
// TODO: ClassDefItem,
@ -208,7 +300,16 @@ impl DexWriter {
// TODO: Data,
// MapList,
map_list.serialize(writer)?;
// TODO: TypeList,
// TypeList,
let mut offset = 0;
for (list, _) in type_lists_and_local_offsets {
while offset % 4 != 0 {
offset += 1;
0u8.serialize(writer)?;
}
offset += list.size();
list.serialize(writer)?;
}
// TODO: AnnotationSetRefList,
// TODO: AnnotationSetItem,
// TODO: ClassDataItem,
@ -225,6 +326,19 @@ impl DexWriter {
Ok(())
}
fn gen_type_list(&self, list: &[IdType]) -> Result<TypeList> {
let mut type_list = TypeList { list: vec![] };
for ty in list {
type_list.list.push(TypeItem {
type_idx: *self.type_ids.get(ty).ok_or(anyhow!(
"Could not found type {} in dex builder",
ty.__repr__()
))? as u16,
});
}
Ok(type_list)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]

View file

@ -4,7 +4,7 @@ use std::collections::HashSet;
use pyo3::prelude::*;
use crate::{DexAnnotationItem, DexString, DexValue, IdField, IdType};
use crate::{DexAnnotationItem, DexString, DexValue, IdField, IdMethodType, IdType};
/// Represent a field.
#[pyclass]
@ -145,4 +145,16 @@ impl Field {
}
types
}
/// Return all prototypes referenced in the field.
pub fn get_all_protos(&self) -> HashSet<IdMethodType> {
let mut protos = HashSet::new();
if let Some(value) = &self.value {
protos.extend(value.get_all_protos());
}
for annot in &self.annotations {
protos.extend(annot.get_all_protos());
}
protos
}
}

View file

@ -4,7 +4,7 @@ use std::collections::HashSet;
use pyo3::prelude::*;
use crate::{Code, DexAnnotationItem, DexString, IdMethod, IdType};
use crate::{Code, DexAnnotationItem, DexString, IdMethod, IdMethodType, IdType};
/// Represent a method.
#[pyclass]
@ -142,4 +142,22 @@ impl Method {
types
}
/// Return all prototypes referenced in the method.
pub fn get_all_protos(&self) -> HashSet<IdMethodType> {
let mut protos = HashSet::new();
protos.extend(self.descriptor.get_all_protos());
for annot in &self.annotations {
protos.extend(annot.get_all_protos());
}
for param_annots in &self.parameters_annotations {
for annot in param_annots {
protos.extend(annot.get_all_protos());
}
}
if let Some(code) = &self.code {
protos.extend(code.get_all_protos());
}
protos
}
}

View file

@ -54,6 +54,11 @@ impl StaticPut {
pub fn get_all_types(&self) -> HashSet<IdType> {
self.0.get_all_types()
}
/// Return all prototypes referenced in the handle.
pub fn get_all_protos(&self) -> HashSet<IdMethodType> {
HashSet::new()
}
}
#[pyclass]
#[derive(Debug, Clone, PartialEq, Eq)]
@ -87,6 +92,11 @@ impl StaticGet {
pub fn get_all_types(&self) -> HashSet<IdType> {
self.0.get_all_types()
}
/// Return all prototypes referenced in the handle.
pub fn get_all_protos(&self) -> HashSet<IdMethodType> {
HashSet::new()
}
}
#[pyclass]
#[derive(Debug, Clone, PartialEq, Eq)]
@ -120,6 +130,11 @@ impl InstancePut {
pub fn get_all_types(&self) -> HashSet<IdType> {
self.0.get_all_types()
}
/// Return all prototypes referenced in the handle.
pub fn get_all_protos(&self) -> HashSet<IdMethodType> {
HashSet::new()
}
}
#[pyclass]
#[derive(Debug, Clone, PartialEq, Eq)]
@ -153,6 +168,11 @@ impl InstanceGet {
pub fn get_all_types(&self) -> HashSet<IdType> {
self.0.get_all_types()
}
/// Return all prototypes referenced in the handle.
pub fn get_all_protos(&self) -> HashSet<IdMethodType> {
HashSet::new()
}
}
#[pyclass]
@ -187,6 +207,11 @@ impl InvokeStatic {
pub fn get_all_types(&self) -> HashSet<IdType> {
self.0.get_all_types()
}
/// Return all prototypes referenced in the handle.
pub fn get_all_protos(&self) -> HashSet<IdMethodType> {
self.0.get_all_protos()
}
}
#[pyclass]
@ -221,6 +246,11 @@ impl InvokeInstance {
pub fn get_all_types(&self) -> HashSet<IdType> {
self.0.get_all_types()
}
/// Return all prototypes referenced in the handle.
pub fn get_all_protos(&self) -> HashSet<IdMethodType> {
self.0.get_all_protos()
}
}
#[pyclass]
@ -255,6 +285,11 @@ impl InvokeConstructor {
pub fn get_all_types(&self) -> HashSet<IdType> {
self.0.get_all_types()
}
/// Return all prototypes referenced in the handle.
pub fn get_all_protos(&self) -> HashSet<IdMethodType> {
self.0.get_all_protos()
}
}
#[pyclass]
@ -289,6 +324,11 @@ impl InvokeDirect {
pub fn get_all_types(&self) -> HashSet<IdType> {
self.0.get_all_types()
}
/// Return all prototypes referenced in the handle.
pub fn get_all_protos(&self) -> HashSet<IdMethodType> {
self.0.get_all_protos()
}
}
#[pyclass]
@ -323,6 +363,11 @@ impl InvokeInterface {
pub fn get_all_types(&self) -> HashSet<IdType> {
self.0.get_all_types()
}
/// Return all prototypes referenced in the handle.
pub fn get_all_protos(&self) -> HashSet<IdMethodType> {
self.0.get_all_protos()
}
}
impl<'source> FromPyObject<'source> for MethodHandle {
@ -415,4 +460,19 @@ impl MethodHandle {
Self::InvokeInterface(val) => val.get_all_types(),
}
}
/// Return all prototypes referenced in the handle.
pub fn get_all_protos(&self) -> HashSet<IdMethodType> {
match self {
Self::StaticPut(val) => val.get_all_protos(),
Self::StaticGet(val) => val.get_all_protos(),
Self::InstancePut(val) => val.get_all_protos(),
Self::InstanceGet(val) => val.get_all_protos(),
Self::InvokeStatic(val) => val.get_all_protos(),
Self::InvokeInstance(val) => val.get_all_protos(),
Self::InvokeConstructor(val) => val.get_all_protos(),
Self::InvokeDirect(val) => val.get_all_protos(),
Self::InvokeInterface(val) => val.get_all_protos(),
}
}
}

View file

@ -2,7 +2,7 @@
use std::collections::HashSet;
use crate::{DexString, DexValue, IdType};
use crate::{DexString, DexValue, IdMethodType, IdType};
use pyo3::prelude::*;
#[pyclass]
@ -279,4 +279,13 @@ impl DexArray {
}
types
}
/// Return all prototypes referenced in the value.
pub fn get_all_protos(&self) -> HashSet<IdMethodType> {
let mut protos = HashSet::new();
for val in &self.0 {
protos.extend(val.get_all_protos());
}
protos
}
}

View file

@ -148,6 +148,30 @@ impl DexValue {
DexValue::Boolean(_) => HashSet::new(),
}
}
/// Return all prototypes referenced in the value.
pub fn get_all_protos(&self) -> HashSet<IdMethodType> {
match self {
DexValue::Byte(_) => HashSet::new(),
DexValue::Short(_) => HashSet::new(),
DexValue::Char(_) => HashSet::new(),
DexValue::Int(_) => HashSet::new(),
DexValue::Long(_) => HashSet::new(),
DexValue::Float(_) => HashSet::new(),
DexValue::Double(_) => HashSet::new(),
DexValue::MethodType(val) => val.get_all_protos(),
DexValue::MethodHandle(val) => val.get_all_protos(),
DexValue::String(_) => HashSet::new(),
DexValue::Type(_) => HashSet::new(),
DexValue::Field(_) => HashSet::new(),
DexValue::Method(val) => val.get_all_protos(),
DexValue::Enum(_) => HashSet::new(),
DexValue::Array(val) => val.get_all_protos(),
DexValue::Annotation(val) => val.get_all_protos(),
DexValue::Null(_) => HashSet::new(),
DexValue::Boolean(_) => HashSet::new(),
}
}
}
impl IntoPy<PyObject> for DexValue {

View file

@ -1,5 +1,7 @@
//! Class definitions.
use std::hash::Hash;
use crate as androscalpel_serializer;
use crate::{ReadSeek, Result, Serializable, Uleb128};
use std::io::Write;
@ -207,7 +209,7 @@ pub struct EncodedMethod {
/// <https://source.android.com/docs/core/runtime/dex-format#type-list>
/// alignment: 4 bytes
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct TypeList {
// pub size: u32,
pub list: Vec<TypeItem>,
@ -246,7 +248,7 @@ impl Serializable for TypeList {
}
/// <https://source.android.com/docs/core/runtime/dex-format#type-item-format>
#[derive(Serializable, Debug, Clone, Copy, PartialEq, Eq)]
#[derive(Serializable, Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct TypeItem {
/// Index of a [`crate::TypeIdItem`] in the `type_ids` list.
pub type_idx: u16,