add method ids to generated dex

This commit is contained in:
Jean-Marie Mineau 2023-12-04 14:09:19 +01:00
parent da47521993
commit 67f47a332e
Signed by: histausse
GPG key ID: B66AEEDA9B645AD2
11 changed files with 297 additions and 37 deletions

View file

@ -3,7 +3,7 @@ use std::collections::{HashMap, HashSet};
use pyo3::prelude::*;
use crate::{dex_id::IdType, value::DexValue, DexString, IdField, IdMethodType};
use crate::{dex_id::IdType, value::DexValue, DexString, IdField, IdMethod, IdMethodType};
/// Annotation with a visibility
#[pyclass]
@ -81,6 +81,11 @@ impl DexAnnotationItem {
pub fn get_all_field_ids(&self) -> HashSet<IdField> {
self.annotation.get_all_field_ids()
}
/// Return all method ids referenced in the annotation.
pub fn get_all_method_ids(&self) -> HashSet<IdMethod> {
self.annotation.get_all_method_ids()
}
}
/// An annotation.
@ -156,4 +161,13 @@ impl DexAnnotation {
}
fields
}
/// Return all method ids referenced in the annotation.
pub fn get_all_method_ids(&self) -> HashSet<IdMethod> {
let mut methods = HashSet::new();
for value in self.elements.values() {
methods.extend(value.get_all_method_ids());
}
methods
}
}

View file

@ -216,8 +216,8 @@ impl Class {
protos
}
/// Return all fields referenced in the class.
/// This **not** the concatenation of the static and instances fields variable:
/// Return all fields id referenced in the class.
/// This **not** the concatenation of the static and instances fields attributes:
/// this also contains reference to fields in other classes used by methods/values
/// in this class.
pub fn get_all_field_ids(&self) -> HashSet<IdField> {
@ -241,4 +241,30 @@ impl Class {
}
fields
}
/// Return all methods id referenced in the class.
/// This **not** the concatenation of the direct and virtual method attributs:
/// this also contains reference to method in other classes used by methods/values
/// in this class.
pub fn get_all_method_ids(&self) -> HashSet<IdMethod> {
let mut methods = HashSet::new();
for field in self.static_fields.values() {
methods.extend(field.get_all_method_ids());
}
for field in self.instance_fields.values() {
methods.extend(field.get_all_method_ids());
}
for (id, method) in &self.direct_methods {
methods.insert(id.clone());
methods.extend(method.get_all_method_ids());
}
for (id, method) in &self.virtual_methods {
methods.insert(id.clone());
methods.extend(method.get_all_method_ids());
}
for annot in &self.annotations {
methods.extend(annot.get_all_method_ids());
}
methods
}
}

View file

@ -4,7 +4,7 @@ use std::collections::HashSet;
use pyo3::prelude::*;
use crate::{DexString, IdField, IdMethodType, IdType};
use crate::{DexString, IdField, IdMethod, IdMethodType, IdType};
// TODO: make this easy to edit/manipulate, maybe move to Method
@ -96,4 +96,10 @@ impl Code {
// TODO
HashSet::new()
}
/// Return all method ids referenced in the codes.
pub fn get_all_method_ids(&self) -> HashSet<IdMethod> {
// TODO
HashSet::new()
}
}

View file

@ -574,6 +574,28 @@ impl IdMethod {
protos.insert(self.proto.clone());
protos
}
/// Return all method ids referenced in the Id.
pub fn get_all_method_ids(&self) -> HashSet<IdMethod> {
let mut method_ids = HashSet::new();
method_ids.insert(self.clone());
method_ids
}
}
impl Ord for IdMethod {
fn cmp(&self, other: &Self) -> Ordering {
self.class_
.cmp(&other.class_)
.then(self.name.cmp(&other.name))
.then(self.proto.cmp(&other.proto))
}
}
impl PartialOrd for IdMethod {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
#[pyclass]
@ -607,4 +629,9 @@ impl IdEnum {
pub fn get_all_types(&self) -> HashSet<IdType> {
self.0.get_all_types()
}
/// Return all field ids referenced in the Id.
pub fn get_all_field_ids(&self) -> HashSet<IdField> {
self.0.get_all_field_ids()
}
}

View file

@ -3,6 +3,8 @@
use std::collections::HashMap;
use std::io::{Cursor, Write};
use log::debug;
use anyhow::{anyhow, bail, Context};
use crate::Result;
@ -16,7 +18,7 @@ pub struct DexWriter {
type_ids: HashMap<IdType, usize>,
proto_ids: HashMap<IdMethodType, usize>,
field_ids: HashMap<IdField, usize>,
_method_ids: HashMap<IdMethod, usize>,
method_ids: HashMap<IdMethod, usize>,
// TODO: composite classes need a struct for storing link data
// class_defs: HashMap<IdMethod, (Class, u32)>,
// call_site_ids: // TODO: parsing code insns
@ -62,7 +64,7 @@ impl Default for DexWriter {
type_ids: HashMap::new(),
proto_ids: HashMap::new(),
field_ids: HashMap::new(),
_method_ids: HashMap::new(),
method_ids: HashMap::new(),
}
}
}
@ -73,38 +75,37 @@ impl DexWriter {
}
pub fn add_class(&mut self, class: &Class) -> Result<()> {
debug!("Adding class {} to dex builder", class.descriptor.__str__());
let new_strings = class.get_all_strings();
/* this means more strings than bytes in the file, prbl no gonna append first
let mut futur_string_set = new_strings.clone();
futur_string_set.extend(self.strings.keys().cloned());
if futur_string_set.len() >= u32::MAX as usize {
// TODO return structured error to handle this case by generating multiple dex files
bail!("To many strings for one dex file");
}
*/
let new_types = class.get_all_types();
/* this means more types than bytes in the file, prbl no gonna append
* and len(type) <= len(string) anyway
let mut futur_type_set = new_types.clone();
futur_type_set.extend(self.type_ids.keys().cloned());
// TODO: they are ref to type as u16, so checks?
if futur_type_set.len() >= u32::MAX as usize {
let new_nb_types = self.type_ids.len()
+ new_types
.iter()
.filter(|ty| self.type_ids.get(ty).is_none())
.count();
if new_nb_types >= u16::MAX as usize {
// type_ids are not always u16, so this may not be a hard limit,
// but it's easier to enforce it to avoid strange bugs.
// TODO return structured error to handle this case by generating multiple dex files
bail!("To many types for one dex file");
}
*/
let new_protos = class.get_all_protos();
let mut futur_proto_set = new_protos.clone();
futur_proto_set.extend(self.proto_ids.keys().cloned());
if futur_proto_set.len() >= u16::MAX as usize {
let new_nb_protos = self.proto_ids.len()
+ new_protos
.iter()
.filter(|proto| self.proto_ids.get(proto).is_none())
.count();
if new_nb_protos >= u16::MAX as usize {
// TODO return structured error to handle this case by generating multiple dex files
bail!("To many prototypes for one dex file");
}
let new_field_ids = class.get_all_field_ids();
let new_method_ids = class.get_all_method_ids();
for string in new_strings {
self.strings.insert(string, 0);
}
@ -117,6 +118,9 @@ impl DexWriter {
for field in new_field_ids {
self.field_ids.insert(field, 0);
}
for method in new_method_ids {
self.method_ids.insert(method, 0);
}
Ok(())
}
@ -130,9 +134,11 @@ impl DexWriter {
let mut section_manager = SectionManager::default();
section_manager.incr_section_size(Section::HeaderItem, 0x70);
// Sort Strings and generate StringDataItem
debug!("Sort string and generate string_data_item and string_ids sections");
let mut string_ids_list: Vec<DexString> = self.strings.keys().cloned().collect();
debug!("start sort");
string_ids_list.sort();
debug!("end sort");
for (idx, string) in string_ids_list.iter().enumerate() {
self.strings
.entry(string.clone())
@ -145,9 +151,11 @@ impl DexWriter {
.map(|string| string.into())
.collect();
// Sort types and generate TypeIdItem
debug!("Sort types and generate type_id_item section");
let mut type_ids_list: Vec<IdType> = self.type_ids.keys().cloned().collect();
debug!("start sort");
type_ids_list.sort();
debug!("end sort");
for (idx, ty) in type_ids_list.iter().enumerate() {
self.type_ids.entry(ty.clone()).and_modify(|val| *val = idx);
section_manager.add_elt(Section::TypeIdItem, None);
@ -166,9 +174,11 @@ impl DexWriter {
type_ids_list_aux
};
// Sort prototype and generate ProtoIdItem
debug!("Sort prototypes and generate proto_id_item section");
let mut proto_ids_list: Vec<IdMethodType> = self.proto_ids.keys().cloned().collect();
debug!("start sort");
proto_ids_list.sort();
debug!("end sort");
for (idx, proto) in proto_ids_list.iter().enumerate() {
self.proto_ids
.entry(proto.clone())
@ -195,7 +205,7 @@ impl DexWriter {
proto_ids_list_aux
};
// Generate TypeLists
debug!("Generate the type_list section");
let mut type_lists_index = HashMap::new();
for proto in self.proto_ids.keys() {
if !proto.parameters.is_empty() {
@ -220,9 +230,11 @@ impl DexWriter {
offset += list.size();
}
// Sort and generate FieldIdItem
debug!("Sort field ids and generate field_ids_item");
let mut field_ids_list: Vec<IdField> = self.field_ids.keys().cloned().collect();
debug!("start sort");
field_ids_list.sort();
debug!("end sort");
for (idx, field_id) in field_ids_list.iter().enumerate() {
self.field_ids
.entry(field_id.clone())
@ -253,7 +265,43 @@ impl DexWriter {
field_ids_list_aux
};
// Populate map_list
debug!("Sort method ids and generate method_id_item section");
let mut method_ids_list: Vec<IdMethod> = self.method_ids.keys().cloned().collect();
debug!("start sort");
method_ids_list.sort();
debug!("end sort");
for (idx, method_id) in method_ids_list.iter().enumerate() {
self.method_ids
.entry(method_id.clone())
.and_modify(|val| *val = idx);
section_manager.add_elt(Section::MethodIdItem, None);
}
let method_ids_list: Vec<MethodIdItem> = {
let mut method_ids_list_aux = vec![];
for method in method_ids_list.into_iter() {
method_ids_list_aux.push(MethodIdItem {
class_idx: *self.type_ids.get(&method.class_).ok_or(anyhow!(
"Type {} (class of method {}) not found in dex builder",
method.class_.__repr__(),
method.__repr__()
))? as u16,
proto_idx: *self.proto_ids.get(&method.proto).ok_or(anyhow!(
"Prototype {} (signature of method {}) not found in dex builder",
method.proto.__repr__(),
method.__repr__()
))? as u16,
name_idx: *self.strings.get(&method.name).ok_or(anyhow!(
"String {} (name of method {}) not found in dex builder",
method.name.__repr__(),
method.__repr__()
))? as u32,
});
}
method_ids_list_aux
};
debug!("Generate the map_list");
// Get the size of a map item
let map_item_size = MapItem {
type_: MapItemType::HeaderItem,
unused: 0,
@ -287,7 +335,7 @@ impl DexWriter {
}
}
// Link Header section:
debug!("Link the header section");
self.header.map_off = section_manager.get_offset(Section::MapList);
self.header.string_ids_size = section_manager.get_nb_elt(Section::StringIdItem) as u32;
self.header.string_ids_off = section_manager.get_offset(Section::StringIdItem);
@ -304,7 +352,8 @@ impl DexWriter {
self.header.data_size = section_manager.get_size(Section::Data);
self.header.data_off = section_manager.get_offset(Section::Data);
// TODO: link TypeList in ProtoIdItems and ClassDefItems
debug!("Link the type_list entries in the proto_id_items");
// TODO: link TypeList in ClassDefItems
for (proto, idx) in &self.proto_ids {
if !proto.parameters.is_empty() {
let type_list = self.gen_type_list(&proto.parameters).with_context(|| {
@ -317,6 +366,7 @@ impl DexWriter {
}
}
debug!("Serialize the dex file");
// TODO: compute checksum, hash, ect
self.header.serialize(writer)?;
// StringIdItem section
@ -340,7 +390,10 @@ impl DexWriter {
for field_id in field_ids_list {
field_id.serialize(writer)?;
}
// TODO: MethodIdItem,
// MethodIdItem section
for method_id in method_ids_list {
method_id.serialize(writer)?;
}
// TODO: ClassDefItem,
// TODO: CallSiteIdItem,
// TODO: MethodHandleItem,

View file

@ -4,7 +4,7 @@ use std::collections::HashSet;
use pyo3::prelude::*;
use crate::{DexAnnotationItem, DexString, DexValue, IdField, IdMethodType, IdType};
use crate::{DexAnnotationItem, DexString, DexValue, IdField, IdMethod, IdMethodType, IdType};
/// Represent a field.
#[pyclass]
@ -170,4 +170,16 @@ impl Field {
}
fields
}
/// Return all method ids referenced in the method.
pub fn get_all_method_ids(&self) -> HashSet<IdMethod> {
let mut methods = HashSet::new();
if let Some(value) = &self.value {
methods.extend(value.get_all_method_ids());
}
for annot in &self.annotations {
methods.extend(annot.get_all_method_ids());
}
methods
}
}

View file

@ -177,4 +177,23 @@ impl Method {
}
fields
}
/// Return all method ids referenced in the method.
pub fn get_all_method_ids(&self) -> HashSet<IdMethod> {
let mut methods = HashSet::new();
methods.insert(self.descriptor.clone());
for annot in &self.annotations {
methods.extend(annot.get_all_method_ids());
}
for param_annots in &self.parameters_annotations {
for annot in param_annots {
methods.extend(annot.get_all_method_ids());
}
}
if let Some(code) = &self.code {
methods.extend(code.get_all_method_ids());
}
methods
}
}

View file

@ -66,6 +66,11 @@ impl StaticPut {
fields.insert(self.0.clone());
fields
}
/// Return all method ids referenced in the handle.
pub fn get_all_method_ids(&self) -> HashSet<IdMethod> {
HashSet::new()
}
}
#[pyclass]
#[derive(Debug, Clone, PartialEq, Eq)]
@ -111,6 +116,11 @@ impl StaticGet {
fields.insert(self.0.clone());
fields
}
/// Return all method ids referenced in the handle.
pub fn get_all_method_ids(&self) -> HashSet<IdMethod> {
HashSet::new()
}
}
#[pyclass]
#[derive(Debug, Clone, PartialEq, Eq)]
@ -156,6 +166,11 @@ impl InstancePut {
fields.insert(self.0.clone());
fields
}
/// Return all method ids referenced in the handle.
pub fn get_all_method_ids(&self) -> HashSet<IdMethod> {
HashSet::new()
}
}
#[pyclass]
#[derive(Debug, Clone, PartialEq, Eq)]
@ -201,6 +216,11 @@ impl InstanceGet {
fields.insert(self.0.clone());
fields
}
/// Return all method ids referenced in the handle.
pub fn get_all_method_ids(&self) -> HashSet<IdMethod> {
HashSet::new()
}
}
#[pyclass]
@ -245,6 +265,13 @@ impl InvokeStatic {
pub fn get_all_field_ids(&self) -> HashSet<IdField> {
HashSet::new()
}
/// Return all method ids referenced in the handle.
pub fn get_all_method_ids(&self) -> HashSet<IdMethod> {
let mut methods = HashSet::new();
methods.insert(self.0.clone());
methods
}
}
#[pyclass]
@ -289,6 +316,13 @@ impl InvokeInstance {
pub fn get_all_field_ids(&self) -> HashSet<IdField> {
HashSet::new()
}
/// Return all method ids referenced in the handle.
pub fn get_all_method_ids(&self) -> HashSet<IdMethod> {
let mut methods = HashSet::new();
methods.insert(self.0.clone());
methods
}
}
#[pyclass]
@ -333,6 +367,13 @@ impl InvokeConstructor {
pub fn get_all_field_ids(&self) -> HashSet<IdField> {
HashSet::new()
}
/// Return all method ids referenced in the handle.
pub fn get_all_method_ids(&self) -> HashSet<IdMethod> {
let mut methods = HashSet::new();
methods.insert(self.0.clone());
methods
}
}
#[pyclass]
@ -377,6 +418,13 @@ impl InvokeDirect {
pub fn get_all_field_ids(&self) -> HashSet<IdField> {
HashSet::new()
}
/// Return all method ids referenced in the handle.
pub fn get_all_method_ids(&self) -> HashSet<IdMethod> {
let mut methods = HashSet::new();
methods.insert(self.0.clone());
methods
}
}
#[pyclass]
@ -421,6 +469,13 @@ impl InvokeInterface {
pub fn get_all_field_ids(&self) -> HashSet<IdField> {
HashSet::new()
}
/// Return all method ids referenced in the handle.
pub fn get_all_method_ids(&self) -> HashSet<IdMethod> {
let mut methods = HashSet::new();
methods.insert(self.0.clone());
methods
}
}
impl<'source> FromPyObject<'source> for MethodHandle {
@ -543,4 +598,19 @@ impl MethodHandle {
Self::InvokeInterface(val) => val.get_all_field_ids(),
}
}
/// Return all method ids referenced in the handle.
pub fn get_all_method_ids(&self) -> HashSet<IdMethod> {
match self {
Self::StaticPut(val) => val.get_all_method_ids(),
Self::StaticGet(val) => val.get_all_method_ids(),
Self::InstancePut(val) => val.get_all_method_ids(),
Self::InstanceGet(val) => val.get_all_method_ids(),
Self::InvokeStatic(val) => val.get_all_method_ids(),
Self::InvokeInstance(val) => val.get_all_method_ids(),
Self::InvokeConstructor(val) => val.get_all_method_ids(),
Self::InvokeDirect(val) => val.get_all_method_ids(),
Self::InvokeInterface(val) => val.get_all_method_ids(),
}
}
}

View file

@ -2,7 +2,7 @@
use std::collections::HashSet;
use crate::{DexString, DexValue, IdField, IdMethodType, IdType};
use crate::{DexString, DexValue, IdField, IdMethod, IdMethodType, IdType};
use pyo3::prelude::*;
#[pyclass]
@ -297,4 +297,13 @@ impl DexArray {
}
fields
}
/// Return all method ids referenced in the value.
pub fn get_all_method_ids(&self) -> HashSet<IdMethod> {
let mut methods = HashSet::new();
for val in &self.0 {
methods.extend(val.get_all_method_ids());
}
methods
}
}

View file

@ -189,13 +189,37 @@ impl DexValue {
DexValue::Type(_) => HashSet::new(),
DexValue::Field(val) => val.get_all_field_ids(),
DexValue::Method(_) => HashSet::new(),
DexValue::Enum(_) => HashSet::new(),
DexValue::Enum(val) => val.get_all_field_ids(),
DexValue::Array(val) => val.get_all_field_ids(),
DexValue::Annotation(val) => val.get_all_field_ids(),
DexValue::Null(_) => HashSet::new(),
DexValue::Boolean(_) => HashSet::new(),
}
}
/// Return all method ids referenced in the value.
pub fn get_all_method_ids(&self) -> HashSet<IdMethod> {
match self {
DexValue::Byte(_) => HashSet::new(),
DexValue::Short(_) => HashSet::new(),
DexValue::Char(_) => HashSet::new(),
DexValue::Int(_) => HashSet::new(),
DexValue::Long(_) => HashSet::new(),
DexValue::Float(_) => HashSet::new(),
DexValue::Double(_) => HashSet::new(),
DexValue::MethodType(_) => HashSet::new(),
DexValue::MethodHandle(val) => val.get_all_method_ids(),
DexValue::String(_) => HashSet::new(),
DexValue::Type(_) => HashSet::new(),
DexValue::Field(_) => HashSet::new(),
DexValue::Method(val) => val.get_all_method_ids(),
DexValue::Enum(_) => HashSet::new(),
DexValue::Array(val) => val.get_all_method_ids(),
DexValue::Annotation(val) => val.get_all_method_ids(),
DexValue::Null(_) => HashSet::new(),
DexValue::Boolean(_) => HashSet::new(),
}
}
}
impl IntoPy<PyObject> for DexValue {

View file

@ -2,7 +2,7 @@ import logging
FORMAT = "[%(levelname)s] %(name)s %(filename)s:%(lineno)d: %(message)s"
logging.basicConfig(format=FORMAT)
logging.getLogger().setLevel(logging.INFO)
logging.getLogger().setLevel(logging.DEBUG)
import androscalpel as asc
import zipfile as z