use a closure to add custom labels

This commit is contained in:
Jean-Marie Mineau 2025-02-03 14:52:37 +01:00
parent 85a0e8557b
commit 68bbd3ecee
Signed by: histausse
GPG key ID: B66AEEDA9B645AD2
3 changed files with 155 additions and 67 deletions

View file

@ -101,13 +101,16 @@ impl Apk {
/// Extract a class from a dex file reader. /// Extract a class from a dex file reader.
/// `class_item_idx` if the index of the `class_def_item` of the class, **not** the /// `class_item_idx` if the index of the `class_def_item` of the class, **not** the
/// `class_idx`. /// `class_idx`.
fn get_class_from_dex_file( fn get_class_from_dex_file<F>(
&self, &self,
class_item: &ClassDefItem, class_item: &ClassDefItem,
class_item_idx: usize, class_item_idx: usize,
dex: &DexFileReader, dex: &DexFileReader,
label_each_ins: bool, label_ins: F,
) -> Result<Class> { ) -> Result<Class>
where
F: FnMut(&IdMethod, &instructions::Instruction, usize) -> Option<String> + Clone,
{
let descriptor = Self::get_id_type_from_idx(class_item.class_idx as usize, dex)?; let descriptor = Self::get_id_type_from_idx(class_item.class_idx as usize, dex)?;
let superclass = if class_item.superclass_idx == NO_INDEX.0 { let superclass = if class_item.superclass_idx == NO_INDEX.0 {
None None
@ -196,7 +199,7 @@ impl Apk {
for mut method in Self::get_method_list_from_encoded_field_list( for mut method in Self::get_method_list_from_encoded_field_list(
&data.direct_methods, &data.direct_methods,
dex, dex,
label_each_ins, label_ins.clone(),
)? { )? {
if let Some(hiddenapi) = &hiddenapi { if let Some(hiddenapi) = &hiddenapi {
method.hiddenapi = Some((&hiddenapi[hiddenapi_i]).into()); method.hiddenapi = Some((&hiddenapi[hiddenapi_i]).into());
@ -207,7 +210,7 @@ impl Apk {
for mut method in Self::get_method_list_from_encoded_field_list( for mut method in Self::get_method_list_from_encoded_field_list(
&data.virtual_methods, &data.virtual_methods,
dex, dex,
label_each_ins, label_ins.clone(),
)? { )? {
if let Some(hiddenapi) = &hiddenapi { if let Some(hiddenapi) = &hiddenapi {
method.hiddenapi = Some((&hiddenapi[hiddenapi_i]).into()); method.hiddenapi = Some((&hiddenapi[hiddenapi_i]).into());
@ -729,13 +732,16 @@ impl Apk {
/// Return a [`Method`] from it's idx (index in `method_ids`) in the dex file and its access flags /// Return a [`Method`] from it's idx (index in `method_ids`) in the dex file and its access flags
/// ([`EncodedMethod.access_flags`]) and code offset ([`EncodedMethod.code_off`]). /// ([`EncodedMethod.access_flags`]) and code offset ([`EncodedMethod.code_off`]).
pub fn get_method_from_idx( pub fn get_method_from_idx<F>(
idx: usize, idx: usize,
Uleb128(access_flags): Uleb128, Uleb128(access_flags): Uleb128,
Uleb128(code_off): Uleb128, Uleb128(code_off): Uleb128,
dex: &DexFileReader, dex: &DexFileReader,
label_each_ins: bool, mut label_ins: F,
) -> Result<Method> { ) -> Result<Method>
where
F: FnMut(&IdMethod, &instructions::Instruction, usize) -> Option<String> + Clone,
{
let descriptor = Self::get_id_method_from_idx(idx, dex)?; let descriptor = Self::get_id_method_from_idx(idx, dex)?;
let is_public = (access_flags & ACC_PUBLIC) != 0; let is_public = (access_flags & ACC_PUBLIC) != 0;
@ -807,8 +813,10 @@ impl Apk {
let code = if code_off == 0 { let code = if code_off == 0 {
None None
} else { } else {
match Self::get_code_from_off(code_off, dex, label_each_ins) match Self::get_code_from_off(code_off, dex, |ins, addr| {
.with_context(|| format!("Failed to parse code of method {}", descriptor.__str__())) label_ins(&descriptor, ins, addr)
})
.with_context(|| format!("Failed to parse code of method {}", descriptor.__str__()))
{ {
Err(e) => { Err(e) => {
error!( error!(
@ -844,20 +852,19 @@ impl Apk {
} }
/// Convert an instruction format to an instruction. /// Convert an instruction format to an instruction.
fn instruction_format_to_instruction( fn instruction_format_to_instruction<F>(
format: &InsFormat, format: &InsFormat,
addr: usize, addr: usize,
insns_ref: &HashMap<usize, &InsFormat>, insns_ref: &HashMap<usize, &InsFormat>,
dex: &DexFileReader, dex: &DexFileReader,
label_each_ins: bool, label_ins: &mut F,
) -> Result<Option<(instructions::Instruction, HashMap<usize, String>)>> { ) -> Result<Option<(instructions::Instruction, HashMap<usize, String>)>>
where
F: FnMut(&instructions::Instruction, usize) -> Option<String>,
{
use crate::instructions::*; use crate::instructions::*;
use InsFormat::*; use InsFormat::*;
let mut labels = HashMap::new(); let mut labels = HashMap::new();
if label_each_ins {
let label = format!("label_{addr:08X}");
labels.insert(addr, label.clone());
}
let ins = match format.clone() { let ins = match format.clone() {
Format10X { op: 0x00 } => Instruction::Nop {}, Format10X { op: 0x00 } => Instruction::Nop {},
Format12X { op: 0x01, va, vb } => Instruction::Move { Format12X { op: 0x01, va, vb } => Instruction::Move {
@ -2657,15 +2664,19 @@ impl Apk {
}; };
ins.sanity_check() ins.sanity_check()
.with_context(|| anyhow!("Invalid instruction {ins:?} found at {addr}: {format:?}"))?; .with_context(|| anyhow!("Invalid instruction {ins:?} found at {addr}: {format:?}"))?;
if let Some(label) = label_ins(&ins, addr) {
//let label = format!("label_{addr:08X}");
labels.insert(addr, label);
}
Ok(Some((ins, labels))) Ok(Some((ins, labels)))
} }
/// Return a [`Code`] from it's offset in the dex file. /// Return a [`Code`] from it's offset in the dex file.
pub fn get_code_from_off( pub fn get_code_from_off<F>(offset: u32, dex: &DexFileReader, mut label_ins: F) -> Result<Code>
offset: u32, where
dex: &DexFileReader, F: FnMut(&instructions::Instruction, usize) -> Option<String>,
label_each_ins: bool, {
) -> Result<Code> {
use crate::instructions::Instruction; use crate::instructions::Instruction;
let code_item = dex.get_struct_at_offset::<CodeItem>(offset)?; let code_item = dex.get_struct_at_offset::<CodeItem>(offset)?;
@ -2774,7 +2785,7 @@ impl Apk {
addr, addr,
&instructions_raw, &instructions_raw,
dex, dex,
label_each_ins, &mut label_ins,
)? { )? {
instructions.push((addr, ins)); instructions.push((addr, ins));
addr += ins_f.size() / 2; addr += ins_f.size() / 2;
@ -2876,11 +2887,14 @@ impl Apk {
/// The index of the fields is computed by summing the [`EncodedMethod.field_idx_diff`] of the /// The index of the fields is computed by summing the [`EncodedMethod.field_idx_diff`] of the
/// previous element of the list the diff of the current field, so the list must be preserved /// previous element of the list the diff of the current field, so the list must be preserved
/// as in the dex file. /// as in the dex file.
pub fn get_method_list_from_encoded_field_list( pub fn get_method_list_from_encoded_field_list<F>(
encoded_methods: &[EncodedMethod], encoded_methods: &[EncodedMethod],
dex: &DexFileReader, dex: &DexFileReader,
label_each_ins: bool, label_ins: F,
) -> Result<Vec<Method>> { ) -> Result<Vec<Method>>
where
F: FnMut(&IdMethod, &instructions::Instruction, usize) -> Option<String> + Clone,
{
let mut idx = 0; let mut idx = 0;
let mut methods = vec![]; let mut methods = vec![];
for method in encoded_methods { for method in encoded_methods {
@ -2891,7 +2905,7 @@ impl Apk {
method.access_flags, method.access_flags,
method.code_off, method.code_off,
dex, dex,
label_each_ins, label_ins.clone(),
)?); )?);
} }
Ok(methods) Ok(methods)
@ -3049,8 +3063,17 @@ impl Apk {
/// Load all android files in an application. /// Load all android files in an application.
/// This **does not include any .dex file that android would not load. /// This **does not include any .dex file that android would not load.
///
/// - `label_ins` is a function that take an method id and instruction and return
/// true is a label "label_{addr:08X}" should be added befor the instruction.
#[cfg(feature = "external-zip-reader")] #[cfg(feature = "external-zip-reader")]
pub fn load_apk(apk: impl Read + Seek, label_each_ins: bool, cache: bool) -> Result<Self> { pub fn load_apk<F>(apk: impl Read + Seek, label_ins: F, cache: bool) -> Result<Self>
where
F: FnMut(&IdMethod, &instructions::Instruction, usize) -> Option<String>
+ Clone
+ Send
+ Sync,
{
let mut zip = ZipArchive::new(apk)?; let mut zip = ZipArchive::new(apk)?;
let mut apk = Self::default(); let mut apk = Self::default();
let file_names: HashSet<String> = zip.file_names().map(|s| s.into()).collect(); let file_names: HashSet<String> = zip.file_names().map(|s| s.into()).collect();
@ -3073,15 +3096,25 @@ impl Apk {
let mut data = vec![]; let mut data = vec![];
// TODO: It sould not be necessary to clone data, but right now DexFileReader takes a &[u8] // TODO: It sould not be necessary to clone data, but right now DexFileReader takes a &[u8]
std::io::copy(&mut zip.by_name(&name)?, &mut data)?; std::io::copy(&mut zip.by_name(&name)?, &mut data)?;
apk.add_dex_file(&name, &data, label_each_ins, cache)?; apk.add_dex_file(&name, &data, label_ins.clone(), cache)?;
} }
Ok(apk) Ok(apk)
} }
/// Load all android files in an application. /// Load all android files in an application.
/// This **does not include any .dex file that android would not load. /// This **does not include any .dex file that android would not load.
///
/// - `label_ins`: Function that take a method id, instruction and address and return
/// a label, if a label needs to be inserted before the instruction.
/// - `cache`: if set to true, copy and cache the binary data format.
#[cfg(not(feature = "external-zip-reader"))] #[cfg(not(feature = "external-zip-reader"))]
pub fn load_apk(apk: impl Read + Seek, label_each_ins: bool, cache: bool) -> Result<Self> { pub fn load_apk<F>(apk: impl Read + Seek, label_ins: F, cache: bool) -> Result<Self>
where
F: FnMut(&IdMethod, &instructions::Instruction, usize) -> Option<String>
+ Clone
+ Send
+ Sync,
{
let mut apk_z = ZipFileReader::new(apk); let mut apk_z = ZipFileReader::new(apk);
let mut apk = Self::default(); let mut apk = Self::default();
let dex_names = apk_z let dex_names = apk_z
@ -3091,10 +3124,56 @@ impl Apk {
.collect::<Vec<_>>(); .collect::<Vec<_>>();
for name in dex_names { for name in dex_names {
let data = apk_z.read_file_as_vec(&name); let data = apk_z.read_file_as_vec(&name);
apk.add_dex_file(&name, &data, label_each_ins, cache)?; apk.add_dex_file(&name, &data, label_ins.clone(), cache)?;
} }
Ok(apk) Ok(apk)
} }
/// Add the content of a dex file to the apk.
///
/// # Parameters
/// - `name`: the name of the dex file
/// - `data`: the dex file binary
/// - `label_ins`: Function that take a method id, instruction and address and return
/// a label, if a label needs to be inserted before the instruction.
/// - `cache`: if set to true, copy and cache the binary data format.
pub fn add_dex_file<F>(
&mut self,
name: &str,
data: &[u8],
label_ins: F,
cache: bool,
) -> Result<()>
where
F: FnMut(&IdMethod, &instructions::Instruction, usize) -> Option<String>
+ Clone
+ Send
+ Sync,
{
let name: String = name.into();
if self.dex_files.contains_key(&name) {
bail!("{name} already exist in the application")
}
let mut dex = DexFileReader::new(data)?;
let classes = dex
.get_class_defs()
.par_iter()
.enumerate()
.map(|(idx, class)| self.get_class_from_dex_file(class, idx, &dex, label_ins.clone()))
.map(|class| class.map(|class| (class.descriptor.clone(), class)))
.collect::<Result<HashMap<IdType, Class>, _>>()?;
let dex_file = DexFile {
classes,
not_referenced_strings: dex
.get_not_resolved_strings()?
.into_iter()
.map(DexString)
.collect(),
bin_cache: if cache { Some(data.to_vec()) } else { None },
};
self.dex_files.insert(name, dex_file);
Ok(())
}
} }
#[cfg_attr(feature = "python", pymethods)] #[cfg_attr(feature = "python", pymethods)]
@ -3113,7 +3192,17 @@ impl Apk {
#[cfg_attr(feature = "python", pyo3(signature = (apk, label_each_ins=false, cache=false)))] #[cfg_attr(feature = "python", pyo3(signature = (apk, label_each_ins=false, cache=false)))]
pub fn load_apk_path(apk: PathBuf, label_each_ins: bool, cache: bool) -> Result<Self> { pub fn load_apk_path(apk: PathBuf, label_each_ins: bool, cache: bool) -> Result<Self> {
let file = File::open(apk)?; let file = File::open(apk)?;
Self::load_apk(file, label_each_ins, cache) Self::load_apk(
file,
|_, _, addr| {
if label_each_ins {
Some(format!("label_{addr:08X}"))
} else {
None
}
},
cache,
)
} }
/// Load all android files in an application. /// Load all android files in an application.
@ -3121,7 +3210,17 @@ impl Apk {
#[cfg_attr(feature = "python", staticmethod)] #[cfg_attr(feature = "python", staticmethod)]
#[cfg_attr(feature = "python", pyo3(signature = (apk, label_each_ins=false, cache=false)))] #[cfg_attr(feature = "python", pyo3(signature = (apk, label_each_ins=false, cache=false)))]
pub fn load_apk_bin(apk: &[u8], label_each_ins: bool, cache: bool) -> Result<Self> { pub fn load_apk_bin(apk: &[u8], label_each_ins: bool, cache: bool) -> Result<Self> {
Self::load_apk(Cursor::new(apk), label_each_ins, cache) Self::load_apk(
Cursor::new(apk),
|_, _, addr| {
if label_each_ins {
Some(format!("label_{addr:08X}"))
} else {
None
}
},
cache,
)
} }
// TODO: change cache to true when cache invalidation is setup // TODO: change cache to true when cache invalidation is setup
@ -3133,37 +3232,26 @@ impl Apk {
/// - `label_each_ins`: if set to true, insert a label before each instruction /// - `label_each_ins`: if set to true, insert a label before each instruction
/// indicating the instruction address /// indicating the instruction address
/// - `cache`: if set to true, copy and cache the binary data format. /// - `cache`: if set to true, copy and cache the binary data format.
#[cfg_attr(feature = "python", pyo3(signature = (name, data, label_each_ins=false, cache=false)))] #[cfg_attr(feature = "python", pyo3(name = "add_dex_file", signature = (name, data, label_each_ins=false, cache=false)))]
pub fn add_dex_file( pub fn add_dex_file_py(
&mut self, &mut self,
name: &str, name: &str,
data: &[u8], data: &[u8],
label_each_ins: bool, label_each_ins: bool,
cache: bool, cache: bool,
) -> Result<()> { ) -> Result<()> {
let name: String = name.into(); self.add_dex_file(
if self.dex_files.contains_key(&name) { name,
bail!("{name} already exist in the application") data,
} |_, _, addr| {
let mut dex = DexFileReader::new(data)?; if label_each_ins {
let classes = dex Some(format!("label_{addr:08X}"))
.get_class_defs() } else {
.par_iter() None
.enumerate() }
.map(|(idx, class)| self.get_class_from_dex_file(class, idx, &dex, label_each_ins)) },
.map(|class| class.map(|class| (class.descriptor.clone(), class))) cache,
.collect::<Result<HashMap<IdType, Class>, _>>()?; )
let dex_file = DexFile {
classes,
not_referenced_strings: dex
.get_not_resolved_strings()?
.into_iter()
.map(DexString)
.collect(),
bin_cache: if cache { Some(data.to_vec()) } else { None },
};
self.dex_files.insert(name, dex_file);
Ok(())
} }
/// List the classes implemented in the application. /// List the classes implemented in the application.

View file

@ -86,12 +86,12 @@ impl IdMethodType {
/// Try to parse a smali representation of a prototype into a IdMethodType. /// Try to parse a smali representation of a prototype into a IdMethodType.
/// ///
/// ``` /// ```
/// use androscalpel::IdMethodType; /// use androscalpel::{IdMethodType, IdType};
/// ///
/// let proto = IdMethodType::from_smali("(Landroidx/core/util/Predicate;Landroidx/core/util/Predicate;Ljava/lang/Object;)Z").unwrap(); /// let proto = IdMethodType::from_smali("(Landroidx/core/util/Predicate;Landroidx/core/util/Predicate;Ljava/lang/Object;)Z").unwrap();
/// assert_eq!( /// assert_eq!(
/// proto, /// proto,
/// IdMethodType( /// IdMethodType::new(
/// IdType::boolean(), /// IdType::boolean(),
/// vec![ /// vec![
/// IdType::class("androidx/core/util/Predicate"), /// IdType::class("androidx/core/util/Predicate"),
@ -282,7 +282,7 @@ impl IdType {
/// id types. /// id types.
/// ///
/// ``` /// ```
/// use androscalpel::IdType /// use androscalpel::IdType;
/// ///
/// let id_type = IdType::from_smali( /// let id_type = IdType::from_smali(
/// "Landroidx/core/util/Predicate;" /// "Landroidx/core/util/Predicate;"
@ -706,7 +706,7 @@ impl IdField {
/// Try to parse a smali representation of a field into a IdField. /// Try to parse a smali representation of a field into a IdField.
/// ///
/// ``` /// ```
/// use androscalpel::IdField; /// use androscalpel::{IdField, IdType};
/// ///
/// let proto = IdField::from_smali("Ljava/lang/annotation/ElementType;->FIELD:Ljava/lang/annotation/ElementType;").unwrap(); /// let proto = IdField::from_smali("Ljava/lang/annotation/ElementType;->FIELD:Ljava/lang/annotation/ElementType;").unwrap();
/// assert_eq!( /// assert_eq!(
@ -875,7 +875,7 @@ impl IdMethod {
/// Try to parse a smali representation of method into a IdMethod. /// Try to parse a smali representation of method into a IdMethod.
/// ///
/// ``` /// ```
/// use androscalpel::IdMethod; /// use androscalpel::{IdType, IdMethod, IdMethodType};
/// ///
/// let id_method = IdMethod::from_smali( /// let id_method = IdMethod::from_smali(
/// "Landroidx/core/util/Predicate;->lambda$and$0(Landroidx/core/util/Predicate;Landroidx/core/util/Predicate;Ljava/lang/Object;)Z" /// "Landroidx/core/util/Predicate;->lambda$and$0(Landroidx/core/util/Predicate;Landroidx/core/util/Predicate;Ljava/lang/Object;)Z"

View file

@ -42,7 +42,7 @@ fn get_hello_world_apk() -> &'static Apk {
HELLO_WORLD_APK.get_or_init(|| { HELLO_WORLD_APK.get_or_init(|| {
let mut apk = Apk::new(); let mut apk = Apk::new();
let start = Instant::now(); let start = Instant::now();
apk.add_dex_file("classes.dex", get_hello_world_dex(), false, false) apk.add_dex_file("classes.dex", get_hello_world_dex(), |_, _, _| None, false)
.unwrap(); .unwrap();
let duration = start.elapsed(); let duration = start.elapsed();
write_to_report(&format!("Parsing classes_hello_world.dex: {duration:?}")); write_to_report(&format!("Parsing classes_hello_world.dex: {duration:?}"));
@ -142,7 +142,7 @@ fn test_generated_apk_equivalence() {
let new_dex = get_hello_world_recompilled(); let new_dex = get_hello_world_recompilled();
let mut new_apk = Apk::new(); let mut new_apk = Apk::new();
new_apk new_apk
.add_dex_file("classes.dex", &new_dex, false, false) .add_dex_file("classes.dex", &new_dex, |_, _, _| None, false)
.unwrap(); .unwrap();
/* /*
@ -575,7 +575,7 @@ fn test_2_from_json() {
let dex = apk.gen_raw_dex().unwrap().remove("classes.dex").unwrap(); let dex = apk.gen_raw_dex().unwrap().remove("classes.dex").unwrap();
let mut new_apk = Apk::new(); let mut new_apk = Apk::new();
new_apk new_apk
.add_dex_file("classes.dex", &dex, false, false) .add_dex_file("classes.dex", &dex, |_, _, _| None, false)
.unwrap(); .unwrap();
assert_eq!(apk, new_apk); assert_eq!(apk, new_apk);
} }
@ -638,7 +638,7 @@ fn test_hidden_api() {
let apktool_result = std::io::BufReader::new(apktool_result); let apktool_result = std::io::BufReader::new(apktool_result);
let apktool_result: sj::Value = sj::from_reader(apktool_result).unwrap(); let apktool_result: sj::Value = sj::from_reader(apktool_result).unwrap();
let mut apk = Apk::new(); let mut apk = Apk::new();
apk.add_dex_file("classes.dex", &dex_raw, false, false) apk.add_dex_file("classes.dex", &dex_raw, |_, _, _| None, false)
.unwrap(); .unwrap();
for cls in apktool_result.as_object().unwrap().keys() { for cls in apktool_result.as_object().unwrap().keys() {
assert!( assert!(