diff --git a/experiment/compare_callgraphs.py b/experiment/compare_callgraphs.py new file mode 100644 index 0000000..fa64020 --- /dev/null +++ b/experiment/compare_callgraphs.py @@ -0,0 +1,205 @@ +# PEP 723 inline deps (https://peps.python.org/pep-0723/): +# +# /// script +# requires-python = ">=3.13" +# dependencies = [ +# "androguard==4.1.3", +# ] +# /// + +import zipfile + +from argparse import ArgumentParser +from pathlib import Path + +from androguard.misc import AnalyzeAPK +from androguard.core.analysis.analysis import Analysis +from androguard.core import dex +from androguard.util import set_log + +from networkx.classes.digraph import DiGraph + +set_log("CRITICAL") + + +GLUE_METHODS: set[str] = { + "Ljava/lang/reflect/Method;->invoke(Ljava/lang/Object;[Ljava/lang/Object;)Ljava/lang/Object;", + "Ljava/lang/reflect/Method;->getName()Ljava/lang/String;", + "Ljava/lang/reflect/Method;->getParameterTypes()[Ljava/lang/Class;", + "Ljava/lang/reflect/Method;->getReturnType()Ljava/lang/Class;", + "Ljava/lang/reflect/Method;->getDeclaringClass()Ljava/lang/Class;", + "Ljava/lang/String;->equals(Ljava/lang/Object;)Z" + "Ljava/lang/Class;->newInstance()Ljava/lang/Object;", + "Ljava/lang/reflect/Constructor;->newInstance([Ljava/lang/Object;)Ljava/lang/Object;", + "Ljava/lang/reflect/Constructor;->getParameterTypes()[Ljava/lang/Class;", + "Ljava/lang/reflect/Constructor;->getDeclaringClass()Ljava/lang/Class;", + "Ljava/lang/Class;->descriptorString()Ljava/lang/String;", + "Ljava/lang/Boolean;->booleanValue()Z" + "Ljava/lang/Byte;->byteValue()B" + "Ljava/lang/Short;->shortValue()S" + "Ljava/lang/Character;->charValue()C" + "Ljava/lang/Integer;->intValue()I" + "Ljava/lang/Long;->longValue()J" + "Ljava/lang/Float;->floatValue()F" + "Ljava/lang/Double;->doubleValue()D" + "Ljava/lang/Boolean;->valueOf(Z)Ljava/lang/Boolean;", + "Ljava/lang/Byte;->valueOf(B)Ljava/lang/Byte;", + "Ljava/lang/Short;->valueOf(S)Ljava/lang/Short;", + "Ljava/lang/Character;->valueOf(C)Ljava/lang/Character;", + "Ljava/lang/Integer;->valueOf(I)Ljava/lang/Integer;", + "Ljava/lang/Long;->valueOf(J)Ljava/lang/Long;", + "Ljava/lang/Float;->valueOf(F)Ljava/lang/Float;", + "Ljava/lang/Double;->valueOf(D)Ljava/lang/Double;", + "Ljava/lang/Class;->getClassLoader()Ljava/lang/ClassLoader;", + "Ljava/lang/ClassLoader;->getParent()Ljava/lang/ClassLoader;", + "Ljava/lang/Object;->getClass()Ljava/lang/Class;", + "Ljava/lang/Object;->toString()Ljava/lang/String;", + # Classes used: + # + # "Ljava/lang/BootClassLoader;", + # "Ljava/lang/Object;", + # "Ldalvik/system/DelegateLastClassLoader;", + # "Ljava/lang/Boolean;", + # "Ljava/lang/Byte;", + # "Ljava/lang/Short;", + # "Ljava/lang/Character;", + # "Ljava/lang/Integer;", + # "Ljava/lang/Long;", + # "Ljava/lang/Float;", + # "Ljava/lang/Double;", +} + + +def is_generated_method(method) -> bool: + class_def = method.get_class_name() + if class_def.startswith("Ltheseus/") and class_def.endswith("/T;"): + return True + return False + + +def is_glue_method(method) -> bool: + if is_generated_method(method): + return True + full_name = ( + f"{method.get_class_name()}->{method.get_name()}{method.get_descriptor()}" + ) + return full_name in GLUE_METHODS + + +def count_edges(cg: DiGraph) -> tuple[int, int]: + """Count method calls and method calls that we may have added (glue methods). + Comparing this number of glue edges allows to compute how many actuall edges we added. + """ + n = 0 + glue = 0 + for u, v in cg.edges(): + n += 1 + if is_generated_method(u) or is_glue_method(v): + glue += 1 + # print(f"{u.get_name()} -> {v.get_name()}") + + return n, glue + + +def main(): + parser = ArgumentParser( + description="Compare the call graph of an application and its patched version" + ) + parser.add_argument("app", help="The original application", type=Path) + parser.add_argument("patched_app", help="The patched apk", type=Path) + parser.add_argument( + "--show-new-methods", action="store_true", help="Show added methods edges" + ) + parser.add_argument( + "--csv-format", + action="store_true", + help="Show the results in a CSV format (apk sha256, nb edge before, nb edges after, added without glue, added ref only)", + ) + parser.add_argument( + "--dyn-bytecode", action="extend", nargs="+", type=Path, default=[] + ) + + args = parser.parse_args() + + print(f"app: {args.app}\npatched: {args.patched_app}") + + assert args.app.exists() + assert args.patched_app.exists() + + apk, _, dx = AnalyzeAPK(args.app) + cg = dx.get_call_graph() + _, _, dx2 = AnalyzeAPK(args.patched_app) + cg_patched = dx2.get_call_graph() + + dyn_cgs = [] + for dyn in args.dyn_bytecode: + if zipfile.is_zipfile(dyn): + _, _, dx = AnalyzeAPK(dyn) + else: + dx = Analysis() + with dyn.open("rb") as fp: + raw = fp.read() + d = dex.DEX(raw, using_api=apk.get_target_sdk_version()) + dx.add(d) + dx.create_xref() + + dyn_cgs.append(dx.get_call_graph()) + nb_methods_app = cg.number_of_nodes() + nb_methods_pch = cg_patched.number_of_nodes() + nb_methods_dyn = sum(map(lambda x: x.number_of_nodes(), dyn_cgs)) + + nb_edges_app, nb_glue_app = count_edges(cg) + nb_edges_pch, nb_glue_pch = count_edges(cg_patched) + nb_edges_dyn, nb_glue_dyn = 0, 0 + for cgd in dyn_cgs: + nb_e, nb_g = count_edges(cgd) + nb_edges_dyn += nb_e + nb_glue_dyn += nb_g + + added_glue = nb_glue_pch - nb_glue_dyn - nb_glue_app + added_edges = nb_edges_pch - nb_edges_app - added_glue + + if args.csv_format: + import hashlib + + with args.app.open("rb") as fp: + hash = hashlib.file_digest(fp, "sha256").hexdigest() + print( + f"{hash},{nb_edges_app},{nb_edges_pch},{added_edges},{added_edges - nb_edges_dyn}" + ) + # apk sha256, nb edge before, nb edges after, added without glue, added ref only + else: + print("app:") + print(f" nodes: {nb_methods_app}") + print(f" nb edges {nb_edges_app}") + print(f" glue edges {nb_glue_app}") + print("dyn loaded:") + print(f" nodes: {nb_methods_dyn}") + print(f" nb edges {nb_edges_dyn}") + print(f" glue edges {nb_glue_dyn}") + print("patched:") + print(f" nb node: {nb_methods_pch}") + print(f" nb edges {nb_edges_pch}") + print(f" glue edges {nb_glue_pch}") + print("") + print( + f"Total edges added: {added_edges} ({added_edges - nb_edges_dyn} ref only)" + ) + + if args.show_new_methods: + for u, v in cg_patched.edges(): + if is_generated_method(u) or is_glue_method(v): + continue + if (u.full_name, v.full_name) in set( + map(lambda x: (x[0].full_name, x[1].full_name), cg.edges()) + ): + continue + print( + f"{u.get_class_name()}->{u.get_name()} ==> {v.get_class_name()}->{v.get_name()}" + ) + + return cg_patched + + +if __name__ == "__main__": + cg = main()