add unreferenced strings

This commit is contained in:
Jean-Marie Mineau 2024-02-15 11:34:43 +01:00
parent b47c9dd666
commit 6637745cdf
Signed by: histausse
GPG key ID: B66AEEDA9B645AD2
4 changed files with 114 additions and 68 deletions

View file

@ -2,7 +2,7 @@
use anyhow::{anyhow, bail, Context}; use anyhow::{anyhow, bail, Context};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::collections::HashMap; use std::collections::{HashMap, HashSet};
use log::info; use log::info;
use pyo3::prelude::*; use pyo3::prelude::*;
@ -23,16 +23,23 @@ pub struct Apk {
#[pyo3(get)] #[pyo3(get)]
#[serde(with = "hashmap_vectorize")] #[serde(with = "hashmap_vectorize")]
pub classes: HashMap<IdType, Class>, pub classes: HashMap<IdType, Class>,
#[pyo3(get)]
pub not_referenced_strings: HashSet<DexString>,
} }
impl Apk { impl Apk {
/// Add the content of a dex file to the apk. /// Add the content of a dex file to the apk.
pub fn add_dex_file(&mut self, data: &[u8]) -> Result<()> { pub fn add_dex_file(&mut self, data: &[u8]) -> Result<()> {
let dex = DexFileReader::new(data)?; let mut dex = DexFileReader::new(data)?;
for class in dex.get_class_defs() { for class in dex.get_class_defs() {
let class = self.get_class_from_dex_file(class, &dex)?; let class = self.get_class_from_dex_file(class, &dex)?;
self.classes.insert(class.descriptor.clone(), class); self.classes.insert(class.descriptor.clone(), class);
} }
self.not_referenced_strings.extend(
dex.get_not_resolved_strings()?
.into_iter()
.map(|string| DexString(string)),
);
Ok(()) Ok(())
} }
@ -2352,6 +2359,9 @@ impl Apk {
for class_ in self.classes.values() { for class_ in self.classes.values() {
dex_writer.add_class(class_)?; dex_writer.add_class(class_)?;
} }
for string in &self.not_referenced_strings {
dex_writer.add_string(string.clone());
}
Ok(vec![dex_writer.gen_dex_file_to_vec()?]) Ok(vec![dex_writer.gen_dex_file_to_vec()?])
} }
} }
@ -2362,6 +2372,7 @@ impl Apk {
pub fn new() -> Self { pub fn new() -> Self {
Self { Self {
classes: HashMap::new(), classes: HashMap::new(),
not_referenced_strings: HashSet::new(),
} }
} }

View file

@ -226,6 +226,10 @@ impl DexWriter {
Ok(()) Ok(())
} }
pub fn add_string(&mut self, string: DexString) {
self.strings.insert(string, 0);
}
fn gen_string_data_section(&mut self) -> Result<()> { fn gen_string_data_section(&mut self) -> Result<()> {
debug!("Sort string and generate string_data_item and string_ids sections"); debug!("Sort string and generate string_data_item and string_ids sections");
let mut string_ids_list: Vec<DexString> = self.strings.keys().cloned().collect(); let mut string_ids_list: Vec<DexString> = self.strings.keys().cloned().collect();

View file

@ -7,12 +7,18 @@ use crate::{
}; };
use log::{error, info, warn}; use log::{error, info, warn};
use std::io::{Cursor, Seek, SeekFrom}; use std::io::{Cursor, Seek, SeekFrom};
use std::sync::atomic::{AtomicBool, Ordering};
#[derive(Debug)] #[derive(Debug)]
pub struct DexFileReader<'a> { pub struct DexFileReader<'a> {
data: &'a [u8], data: &'a [u8],
header: HeaderItem, header: HeaderItem,
string_ids: Vec<StringIdItem>, string_ids: Vec<StringIdItem>,
/// If `string_was_resolved[string_idx]` is true, the string was resolved at some point.
/// This alows us to get the strings that are in a dex file but not used by its
/// classes. (Yes, they are some, looking at you `~~D8{"backend":"dex","compilation-mode":
/// "release","has-checksums":false,"min-api":24,"version":"8.2.42"}`)
string_was_resolved: Vec<AtomicBool>,
type_ids: Vec<TypeIdItem>, type_ids: Vec<TypeIdItem>,
proto_ids: Vec<ProtoIdItem>, proto_ids: Vec<ProtoIdItem>,
field_ids: Vec<FieldIdItem>, field_ids: Vec<FieldIdItem>,
@ -32,6 +38,7 @@ impl<'a> DexFileReader<'a> {
header, header,
// Default values before population // Default values before population
string_ids: vec![], string_ids: vec![],
string_was_resolved: vec![],
type_ids: vec![], type_ids: vec![],
proto_ids: vec![], proto_ids: vec![],
field_ids: vec![], field_ids: vec![],
@ -46,6 +53,9 @@ impl<'a> DexFileReader<'a> {
tmp_file.header.string_ids_off, tmp_file.header.string_ids_off,
tmp_file.header.string_ids_size, tmp_file.header.string_ids_size,
)?; )?;
for _ in 0..tmp_file.string_ids.len() {
tmp_file.string_was_resolved.push(AtomicBool::new(false));
}
tmp_file.type_ids = tmp_file.get_item_list::<TypeIdItem>( tmp_file.type_ids = tmp_file.get_item_list::<TypeIdItem>(
tmp_file.header.type_ids_off, tmp_file.header.type_ids_off,
tmp_file.header.type_ids_size, tmp_file.header.type_ids_size,
@ -66,6 +76,7 @@ impl<'a> DexFileReader<'a> {
tmp_file.header.class_defs_off, tmp_file.header.class_defs_off,
tmp_file.header.class_defs_size, tmp_file.header.class_defs_size,
)?; )?;
if let Some(item) = tmp_file if let Some(item) = tmp_file
.map_list .map_list
.list .list
@ -138,10 +149,13 @@ impl<'a> DexFileReader<'a> {
"string idx {idx} is out of bound (|string_ids|={})", "string idx {idx} is out of bound (|string_ids|={})",
self.string_ids.len() self.string_ids.len()
)))?; )))?;
self.get_struct_at_offset(id.string_data_off) let string = self
.get_struct_at_offset::<StringDataItem>(id.string_data_off)
.map_err(|err| { .map_err(|err| {
Error::DeserializationError(format!("Failled to parse string {idx}: {err}")) Error::DeserializationError(format!("Failled to parse string {idx}: {err}"))
}) })?;
self.string_was_resolved[idx as usize].store(true, Ordering::Relaxed);
Ok(string)
} }
/// Return a [`TypeIdItem`] reference from its idx. /// Return a [`TypeIdItem`] reference from its idx.
@ -399,4 +413,22 @@ impl<'a> DexFileReader<'a> {
} }
r r
} }
/// Return the strings that where not referenced.
pub fn get_not_resolved_strings(&mut self) -> Result<Vec<StringDataItem>> {
// use `&mut self` because using this method at the same time as performing
// `Self::get_string()` is UB.
let idxs: Vec<u32> = (0..self.string_was_resolved.len())
.filter(|idx| !self.string_was_resolved[*idx].load(Ordering::Relaxed))
.map(|idx| idx as u32)
.collect();
let mut strings = vec![];
for idx in &idxs {
strings.push(self.get_string(*idx)?);
}
for idx in idxs {
self.string_was_resolved[idx as usize].store(false, Ordering::Relaxed)
}
Ok(strings)
}
} }

127
test.py
View file

@ -21,61 +21,60 @@ with z.ZipFile(APK_NAME) as zipf:
apk = Apk() apk = Apk()
apk.add_dex_file(dex) apk.add_dex_file(dex)
exit()
clazz_id = IdType("Lcom/example/testapplication/ui/home/HomeViewModel;")
proto_id = IdMethodType(IdType("Ljava/lang/String;"), [])
method_id = IdMethod("text_gen", proto_id, clazz_id)
clazz = apk.classes[clazz_id]
method = clazz.virtual_methods[method_id]
code = method.code
# clazz_id = IdType("Lcom/example/testapplication/ui/home/HomeViewModel;")
# proto_id = IdMethodType(IdType("Ljava/lang/String;"), [])
# method_id = IdMethod("text_gen", proto_id, clazz_id)
#
# clazz = apk.classes[clazz_id]
# method = clazz.virtual_methods[method_id]
# code = method.code
#
logging.getLogger().setLevel(logging.WARNING) logging.getLogger().setLevel(logging.WARNING)
#
print(f"[+] Code of {method_id} ") # print(f"[+] Code of {method_id} ")
for i in code.insns: # for i in code.insns:
print(f" {i}") # print(f" {i}")
print("[+] Modify code") # print("[+] Modify code")
#
new_insns = [] # new_insns = []
for i in code.insns: # for i in code.insns:
if isinstance(i, ins.ConstString): # if isinstance(i, ins.ConstString):
if i.lit == "Hello": # if i.lit == "Hello":
i = ins.ConstString(i.reg, DexString("Degemer Mat")) # i = ins.ConstString(i.reg, DexString("Degemer Mat"))
elif i.lit == "Bye": # elif i.lit == "Bye":
i = ins.ConstString(i.reg, DexString("Kenavo")) # i = ins.ConstString(i.reg, DexString("Kenavo"))
new_insns.append(i) # new_insns.append(i)
#
# This need improving! # # This need improving!
code = Code(code.registers_size, code.ins_size, code.outs_size, new_insns) # code = Code(code.registers_size, code.ins_size, code.outs_size, new_insns)
apk.set_method_code(method_id, code) # apk.set_method_code(method_id, code)
# apk.set_method_code(method.descriptor, code) # # apk.set_method_code(method.descriptor, code)
#
#
clazz = apk.classes[clazz_id] # clazz = apk.classes[clazz_id]
method = clazz.virtual_methods[method_id] # method = clazz.virtual_methods[method_id]
code = method.code # code = method.code
print(f"[+] New code of {method_id} ") # print(f"[+] New code of {method_id} ")
for i in code.insns: # for i in code.insns:
print(f" {i}") # print(f" {i}")
#
# Strip class for debugging # # Strip class for debugging
classes = list( # classes = list(
filter( # filter(
lambda x: x # lambda x: x
not in [ # not in [
IdType("Lcom/example/testapplication/ui/home/HomeViewModel;"), # IdType("Lcom/example/testapplication/ui/home/HomeViewModel;"),
IdType("Landroidx/navigation/NavDeepLink$Builder;"), # IdType("Landroidx/navigation/NavDeepLink$Builder;"),
IdType("Landroidx/constraintlayout/core/widgets/ConstraintWidget$1;"), # IdType("Landroidx/constraintlayout/core/widgets/ConstraintWidget$1;"),
IdType("Landroidx/appcompat/app/ActionBar;"), # IdType("Landroidx/appcompat/app/ActionBar;"),
IdType("Landroidx/constraintlayout/core/state/WidgetFrame;"), # IdType("Landroidx/constraintlayout/core/state/WidgetFrame;"),
], # ],
apk.classes.keys(), # apk.classes.keys(),
) # )
) # )
# for cls in classes: # # for cls in classes:
# apk.remove_class(cls) # # apk.remove_class(cls)
print("[+] Recompile") print("[+] Recompile")
@ -86,17 +85,17 @@ for dex in dex_raw:
new_apk.add_dex_file(dex) new_apk.add_dex_file(dex)
print("[+] Repackage") # print("[+] Repackage")
#
utils.replace_dex( # utils.replace_dex(
APK_NAME, # APK_NAME,
APK_NAME.parent / (APK_NAME.name.removesuffix(".apk") + "-instrumented.apk"), # APK_NAME.parent / (APK_NAME.name.removesuffix(".apk") + "-instrumented.apk"),
dex_raw, # dex_raw,
Path().parent / "my-release-key.jks", # Path().parent / "my-release-key.jks",
zipalign=Path.home() / "Android" / "Sdk" / "build-tools" / "34.0.0" / "zipalign", # zipalign=Path.home() / "Android" / "Sdk" / "build-tools" / "34.0.0" / "zipalign",
apksigner=Path.home() / "Android" / "Sdk" / "build-tools" / "34.0.0" / "apksigner", # apksigner=Path.home() / "Android" / "Sdk" / "build-tools" / "34.0.0" / "apksigner",
) # )
#
last_id = None last_id = None
MAX_REQ = 1 MAX_REQ = 1