diff --git a/frida/theseus_frida/__init__.py b/frida/theseus_frida/__init__.py index 76b401c..b1a1332 100644 --- a/frida/theseus_frida/__init__.py +++ b/frida/theseus_frida/__init__.py @@ -6,8 +6,11 @@ import subprocess import time import json import sys +import tempfile +import shutil +import lzma from pathlib import Path -from typing import TextIO +from typing import TextIO, Any import frida # type: ignore from androguard.core.apk import get_apkid # type: ignore @@ -23,6 +26,12 @@ STACK_CONSUMER_B64 = Path(__file__).parent / "StackConsumer.dex.b64" HASH_NB_BYTES = 4 +def spinner(symbs: str = "-\\|/"): + while True: + for s in symbs: + yield s + + # Define handler to event generated by the scripts def on_message(message, data, data_storage: dict, file_storage: Path): if message["type"] == "error": @@ -36,6 +45,8 @@ def on_message(message, data, data_storage: dict, file_storage: Path): handle_cnstr_new_inst_data(message["payload"]["data"], data_storage) elif message["type"] == "send" and message["payload"]["type"] == "load-dex": handle_load_dex(message["payload"]["data"], data_storage, file_storage) + elif message["type"] == "send" and message["payload"]["type"] == "apk-cl": + handle_classloader_data(message["payload"]["data"], data_storage) else: print("[-] message:", message) @@ -48,8 +59,13 @@ def print_stack(stack, prefix: str): print(f" {prefix}{frame['method']}:{frame['bytecode_index']}{native}") +def handle_classloader_data(data: dict, data_storage: dict): + data_storage["initial_classloaders"].append(data) + + def handle_invoke_data(data, data_storage: dict): method = data["method"] + method_cl_id = data["method_cl_id"] # TODO: good idea? if method in [ "Landroid/view/View;->getTranslationZ()F", @@ -59,6 +75,7 @@ def handle_invoke_data(data, data_storage: dict): if len(data["stack"]) == 0: return caller_method = data["stack"][0]["method"] + caller_cl_id = data["stack"][0]["cl_id"] addr = data["stack"][0]["bytecode_index"] is_static = data["is_static"] if is_static: @@ -66,8 +83,8 @@ def handle_invoke_data(data, data_storage: dict): else: is_static_str = "" print("[+] Method.Invoke:") - print(f" called: {method}{is_static_str}") - print(f" by: {caller_method}") + print(f" called: [{method_cl_id}]{method}{is_static_str}") + print(f" by: [{caller_cl_id}]{caller_method}") print(f" at: 0x{addr:08x}") # print(f" stack:") # print_stack(data["stack"], " ") @@ -76,7 +93,11 @@ def handle_invoke_data(data, data_storage: dict): data_storage["invoke_data"].append( { "method": method, + "method_cl_id": method_cl_id, + "renamed_method": None, "caller_method": caller_method, + "caller_cl_id": caller_cl_id, + "renamed_caller_method": None, "addr": addr, "is_static": is_static, } @@ -85,6 +106,7 @@ def handle_invoke_data(data, data_storage: dict): def handle_class_new_inst_data(data, data_storage: dict): constructor = data["constructor"] + constructor_cl_id = data["constructor_cl_id"] if len(data["stack"]) == 0: return if ( @@ -97,10 +119,11 @@ def handle_class_new_inst_data(data, data_storage: dict): else: return caller_method = frame["method"] + caller_cl_id = frame["cl_id"] addr = frame["bytecode_index"] print("[+] Class.NewInstance:") - print(f" called: {constructor}") - print(f" by: {caller_method}") + print(f" called: [{constructor_cl_id}]{constructor}") + print(f" by: [{caller_cl_id}]{caller_method}") print(f" at: 0x{addr:08x}") # print(f" stack:") # print_stack(data["stack"], " ") @@ -109,7 +132,11 @@ def handle_class_new_inst_data(data, data_storage: dict): data_storage["class_new_inst_data"].append( { "constructor": constructor, + "constructor_cl_id": constructor_cl_id, + "renamed_constructor": None, "caller_method": caller_method, + "caller_cl_id": caller_cl_id, + "renamed_caller_method": None, "addr": addr, } ) @@ -117,15 +144,17 @@ def handle_class_new_inst_data(data, data_storage: dict): def handle_cnstr_new_inst_data(data, data_storage: dict): constructor = data["constructor"] + constructor_cl_id = data["constructor_cl_id"] if not constructor.startswith("Lcom/example/theseus"): return if len(data["stack"]) == 0: return caller_method = data["stack"][0]["method"] + caller_cl_id = data["stack"][0]["cl_id"] addr = data["stack"][0]["bytecode_index"] print("[+] Constructor.newInstance:") - print(f" called: {constructor}") - print(f" by: {caller_method}") + print(f" called: [{constructor_cl_id}]{constructor}") + print(f" by: [{caller_cl_id}]{caller_method}") print(f" at: 0x{addr:08x}") # print(f" stack:") # print_stack(data["stack"], " ") @@ -134,7 +163,11 @@ def handle_cnstr_new_inst_data(data, data_storage: dict): data_storage["cnstr_new_inst_data"].append( { "constructor": constructor, + "constructor_cl_id": constructor_cl_id, + "renamed_constructor": None, "caller_method": caller_method, + "caller_cl_id": caller_cl_id, + "renamed_caller_method": None, "addr": addr, } ) @@ -183,10 +216,10 @@ FRIDA_SERVER_BIN = Path(__file__).parent / "frida-server-16.7.0-android-x86_64.x FRIDA_SERVER_ANDROID_PATH = "/data/local/tmp/frida-server" -def setup_frida(device: str, env: dict[str, str]) -> frida.core.Device: - if device != "": - device = frida.get_device(args.device) - env["ANDROID_SERIAL"] = args.device +def setup_frida(device_name: str, env: dict[str, str]) -> frida.core.Device: + if device_name != "": + device = frida.get_device(device_name) + env["ANDROID_SERIAL"] = device_name else: device = frida.get_usb_device() @@ -197,14 +230,19 @@ def setup_frida(device: str, env: dict[str, str]) -> frida.core.Device: except frida.ServerNotRunningError: pass # Start server - proc = subprocess.run( - ["adb", "shell", "whoami"], encoding="utf-8", stdout=subprocess.PIPE, env=env + proc: subprocess.CompletedProcess[str] | subprocess.CompletedProcess[bytes] = ( + subprocess.run( + ["adb", "shell", "whoami"], + encoding="utf-8", + stdout=subprocess.PIPE, + env=env, + ) ) if proc.stdout.strip() != "root": proc = subprocess.run(["adb", "root"], env=env) # Rooting adb will disconnect the device - if device != "": - device = frida.get_device(device) + if device_name != "": + device = frida.get_device(device_name) else: device = frida.get_usb_device() perm = subprocess.run( @@ -221,20 +259,31 @@ def setup_frida(device: str, env: dict[str, str]) -> frida.core.Device: "7", ] # int(perm[0]) & 1 == 1 if perm == "": - subprocess.run( - [ - "adb", - "push", - str(FRIDA_SERVER_BIN.absolute()), - FRIDA_SERVER_ANDROID_PATH, - ], - env=env, - ) + with tempfile.TemporaryDirectory() as tmpdname: + tmpd = Path(tmpdname) + with ( + lzma.open(str(FRIDA_SERVER_BIN.absolute())) as fin, + (tmpd / "frida-server").open("wb") as fout, + ): + shutil.copyfileobj(fin, fout) + + subprocess.run( + [ + "adb", + "push", + str((tmpd / "frida-server").absolute()), + FRIDA_SERVER_ANDROID_PATH, + ], + env=env, + ) if need_perm_resset: - subprocess.run(["adb", "chmod", "755", FRIDA_SERVER_ANDROID_PATH], env=env) + subprocess.run( + ["adb", "shell", "chmod", "755", FRIDA_SERVER_ANDROID_PATH], env=env + ) subprocess.Popen(["adb", "shell", FRIDA_SERVER_ANDROID_PATH], env=env) # The server take some time to start # time.sleep(3) + t = spinner() while True: try: s = device.attach(0) @@ -242,11 +291,11 @@ def setup_frida(device: str, env: dict[str, str]) -> frida.core.Device: print("[*] Server started: begin analysis ") return device except frida.ServerNotRunningError: - print("[-] Waiting for frida server to start", end="\r") + print(f"[{t.__next__()}] Waiting for frida server to start", end="\r") time.sleep(0.3) -def collect_runtime(apk: Path, device: str, file_storage: Path, output: TextIO): +def collect_runtime(apk: Path, device_name: str, file_storage: Path, output: TextIO): env = dict(os.environ) if not file_storage.exists(): @@ -255,7 +304,7 @@ def collect_runtime(apk: Path, device: str, file_storage: Path, output: TextIO): print("[!] file_storage must be a directory") exit() - device = setup_frida(device, env) + device = setup_frida(device_name, env) app = get_apkid(apk)[0] @@ -265,9 +314,9 @@ def collect_runtime(apk: Path, device: str, file_storage: Path, output: TextIO): subprocess.run(["adb", "install", str(apk.absolute())], env=env) with FRIDA_SCRIPT.open("r") as file: - script = file.read() + jsscript = file.read() with STACK_CONSUMER_B64.open("r") as file: - script = script.replace( + jsscript = jsscript.replace( "", file.read().replace("\n", "").strip(), ) @@ -275,7 +324,7 @@ def collect_runtime(apk: Path, device: str, file_storage: Path, output: TextIO): pid = device.spawn([app]) session = device.attach(pid) try: - script = session.create_script(script) + script = session.create_script(jsscript) except frida.InvalidArgumentError as e: print("[!] Error:") print( @@ -286,11 +335,12 @@ def collect_runtime(apk: Path, device: str, file_storage: Path, output: TextIO): ) raise e - data_storage = { + data_storage: dict[str, Any] = { "invoke_data": [], "class_new_inst_data": [], "cnstr_new_inst_data": [], "dyn_code_load": [], + "initial_classloaders": [], } script.on( @@ -305,6 +355,40 @@ def collect_runtime(apk: Path, device: str, file_storage: Path, output: TextIO): print("==> Press ENTER to finish the analysis <==") input() + main_class_loader: str | None = None + cls = {d["id"]: d for d in data_storage["initial_classloaders"]} + for load_data in data_storage["dyn_code_load"]: + if load_data["classloader"] in cls: + del cls[load_data["classloader"]] + for id_ in cls.keys(): + if ( + 'dalvik.system.PathClassLoader[DexPathList[[directory "."],' + in cls[id_]["str"] + ): + del cls[id_] + elif cls[id_]["cname"] == "java.lang.BootClassLoader": + del cls[id_] + if len(cls) == 0: + print("[!] No classloader found for the main APK") + elif len(cls) > 1: + print( + "[!] Multiple classloader found that could be the main APK, try to guess the right one" + ) + nb_occ = {k: 0 for k in cls.keys()} + for data in data_storage["class_new_inst_data"]: + if data["caller_cl_id"] in nb_occ: + nb_occ[data["caller_cl_id"]] += 1 + for data in data_storage["invoke_data"]: + if data["caller_cl_id"] in nb_occ: + nb_occ[data["caller_cl_id"]] += 1 + for data in data_storage["cnstr_new_inst_data"]: + if data["caller_cl_id"] in nb_occ: + nb_occ[data["caller_cl_id"]] += 1 + main_class_loader = max(cls.keys(), key=lambda x: nb_occ[x]) + else: + main_class_loader = list(cls.keys())[0] + data_storage["apk_cl_id"] = main_class_loader + json.dump(data_storage, output, indent=" ") @@ -340,7 +424,7 @@ def main(): if args.output is None: collect_runtime( apk=args.apk, - device=args.device, + device_name=args.device, file_storage=args.dex_dir, output=sys.stdout, ) @@ -348,7 +432,7 @@ def main(): with args.output.open("w") as fp: collect_runtime( apk=args.apk, - device=args.device, + device_name=args.device, file_storage=args.dex_dir, output=fp, ) diff --git a/frida/theseus_frida/hook.js b/frida/theseus_frida/hook.js index 51654df..b9ea4c9 100644 --- a/frida/theseus_frida/hook.js +++ b/frida/theseus_frida/hook.js @@ -46,6 +46,7 @@ Java.perform(() => { "bytecode_index": frame.getByteCodeIndex(), "is_native": frame.isNativeMethod(), "method": frame.getDeclaringClass().descriptorString() + "->" + frame.getMethodName() + frame.getDescriptor(), + "cl_id": System.identityHashCode(frame.getDeclaringClass().getClassLoader()), //{ //"descriptor": frame.getDescriptor(), //"name": frame.getMethodName(), @@ -101,6 +102,7 @@ Java.perform(() => { "type": "invoke", "data": { "method": get_method_dsc(this), + "method_cl_id": System.identityHashCode(this.getDeclaringClass().getClassLoader()), /*{ "name": this.getName(), "class": this.getDeclaringClass().getName(), @@ -123,6 +125,7 @@ Java.perform(() => { "type": "class-new-inst", "data": { "constructor": this.descriptorString() + "->()V", + "constructor_cl_id": System.identityHashCode(this.getClassLoader()), /*{ "name": "", "class": this.getName(), @@ -144,6 +147,7 @@ Java.perform(() => { "type": "cnstr-new-isnt", "data": { "constructor": get_constr_dsc(this), + "constructor_cl_id": System.identityHashCode(this.getDeclaringClass().getClassLoader()), /* { "name": "", @@ -275,5 +279,22 @@ Java.perform(() => { elements, ); }; + + // Find the main APK class loader: + // Not so easy, just send all class loader and sort this out later: + var class_loader = Java.enumerateClassLoadersSync(); + for (var cl of class_loader) { + //if (cl.toString().includes("dalvik.system.PathClassLoader[DexPathList[[directory \".\"],")) { + // continue; + //} + //if (cl.$className == "java.lang.BootClassLoader") { + // continue; + //} + send({"type": "classloader", "data": { + "id": System.identityHashCode(cl), + "str": cl.toString(), + "cname": cl.$className + }}); + } }); diff --git a/patcher/src/bin/patcher.rs b/patcher/src/bin/patcher.rs index 9aa974b..149a226 100644 --- a/patcher/src/bin/patcher.rs +++ b/patcher/src/bin/patcher.rs @@ -43,10 +43,11 @@ fn main() { .unwrap() .read_to_string(&mut json) .unwrap(); - let rt_data: RuntimeData = serde_json::from_str(&json).unwrap(); + let mut rt_data: RuntimeData = serde_json::from_str(&json).unwrap(); // Dynamic Loading - insert_code(cli.code_loading_patch_strategy, &mut apk, &rt_data).unwrap(); + insert_code(cli.code_loading_patch_strategy, &mut apk, &mut rt_data).unwrap(); + let rt_data = rt_data; // not mut anymore // Reflection let mut test_methods = HashMap::new(); diff --git a/patcher/src/code_loading_patcher.rs b/patcher/src/code_loading_patcher.rs index b02de2b..aacf350 100644 --- a/patcher/src/code_loading_patcher.rs +++ b/patcher/src/code_loading_patcher.rs @@ -7,6 +7,12 @@ use clap::ValueEnum; use crate::runtime_data::RuntimeData; +// TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO +// +// INSERT EMPTY CLASS LOADERS WHEN ID REFERS TO UNKNOWN CLASS LOADER +// +// TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO + #[derive(ValueEnum, Debug, PartialEq, Clone, Copy, Default)] pub enum CodePatchingStrategy { #[default] @@ -41,7 +47,10 @@ fn insert_code_model_class_loaders(apk: &mut Apk, runtime_data: &mut RuntimeData let mut class_defined = apk.list_classes(); let mut class_redefined = HashSet::new(); let mut class_loaders = HashMap::new(); - let main_cl_id = runtime_data.apk_cl_id.clone(); + let main_cl_id = runtime_data + .apk_cl_id + .clone() + .unwrap_or_else(|| "MAIN".to_string()); class_loaders.insert( main_cl_id.clone(), ClassLoader { diff --git a/patcher/src/runtime_data.rs b/patcher/src/runtime_data.rs index dae03ec..aa32f09 100644 --- a/patcher/src/runtime_data.rs +++ b/patcher/src/runtime_data.rs @@ -11,7 +11,7 @@ pub struct RuntimeData { pub cnstr_new_inst_data: Vec, pub dyn_code_load: Vec, /// The id of the class loader of the apk (the main classloader) - pub apk_cl_id: String, + pub apk_cl_id: Option, } impl RuntimeData { diff --git a/theseus_autopatcher/src/theseus_autopatcher/__init__.py b/theseus_autopatcher/src/theseus_autopatcher/__init__.py index dd9b443..1a67523 100644 --- a/theseus_autopatcher/src/theseus_autopatcher/__init__.py +++ b/theseus_autopatcher/src/theseus_autopatcher/__init__.py @@ -8,6 +8,12 @@ from shutil import which from theseus_frida import collect_runtime +def spinner(symbs: str = "◜◠◝◞◡◟"): + while True: + for s in symbs: + yield s + + def get_android_sdk_path() -> Path | None: if "ANDROID_HOME" in os.environ: return Path(os.environ["ANDROID_HOME"]) @@ -216,7 +222,10 @@ def main(): (tmpd / "dex").mkdir() with (tmpd / "runtime.json").open("w") as fp: collect_runtime( - apk=args.apk, device=args.device, file_storage=tmpd / "dex", output=fp + apk=args.apk, + device_name=args.device, + file_storage=tmpd / "dex", + output=fp, ) patch_apk( runtime_data=tmpd / "runtime.json",