compare call graphs

This commit is contained in:
Jean-Marie 'Histausse' Mineau 2025-09-16 18:33:05 +02:00
parent c0a33536b9
commit 4bf84361a4
Signed by: histausse
GPG key ID: B66AEEDA9B645AD2

View file

@ -0,0 +1,205 @@
# PEP 723 inline deps (https://peps.python.org/pep-0723/):
#
# /// script
# requires-python = ">=3.13"
# dependencies = [
# "androguard==4.1.3",
# ]
# ///
import zipfile
from argparse import ArgumentParser
from pathlib import Path
from androguard.misc import AnalyzeAPK
from androguard.core.analysis.analysis import Analysis
from androguard.core import dex
from androguard.util import set_log
from networkx.classes.digraph import DiGraph
set_log("CRITICAL")
GLUE_METHODS: set[str] = {
"Ljava/lang/reflect/Method;->invoke(Ljava/lang/Object;[Ljava/lang/Object;)Ljava/lang/Object;",
"Ljava/lang/reflect/Method;->getName()Ljava/lang/String;",
"Ljava/lang/reflect/Method;->getParameterTypes()[Ljava/lang/Class;",
"Ljava/lang/reflect/Method;->getReturnType()Ljava/lang/Class;",
"Ljava/lang/reflect/Method;->getDeclaringClass()Ljava/lang/Class;",
"Ljava/lang/String;->equals(Ljava/lang/Object;)Z"
"Ljava/lang/Class;->newInstance()Ljava/lang/Object;",
"Ljava/lang/reflect/Constructor;->newInstance([Ljava/lang/Object;)Ljava/lang/Object;",
"Ljava/lang/reflect/Constructor;->getParameterTypes()[Ljava/lang/Class;",
"Ljava/lang/reflect/Constructor;->getDeclaringClass()Ljava/lang/Class;",
"Ljava/lang/Class;->descriptorString()Ljava/lang/String;",
"Ljava/lang/Boolean;->booleanValue()Z"
"Ljava/lang/Byte;->byteValue()B"
"Ljava/lang/Short;->shortValue()S"
"Ljava/lang/Character;->charValue()C"
"Ljava/lang/Integer;->intValue()I"
"Ljava/lang/Long;->longValue()J"
"Ljava/lang/Float;->floatValue()F"
"Ljava/lang/Double;->doubleValue()D"
"Ljava/lang/Boolean;->valueOf(Z)Ljava/lang/Boolean;",
"Ljava/lang/Byte;->valueOf(B)Ljava/lang/Byte;",
"Ljava/lang/Short;->valueOf(S)Ljava/lang/Short;",
"Ljava/lang/Character;->valueOf(C)Ljava/lang/Character;",
"Ljava/lang/Integer;->valueOf(I)Ljava/lang/Integer;",
"Ljava/lang/Long;->valueOf(J)Ljava/lang/Long;",
"Ljava/lang/Float;->valueOf(F)Ljava/lang/Float;",
"Ljava/lang/Double;->valueOf(D)Ljava/lang/Double;",
"Ljava/lang/Class;->getClassLoader()Ljava/lang/ClassLoader;",
"Ljava/lang/ClassLoader;->getParent()Ljava/lang/ClassLoader;",
"Ljava/lang/Object;->getClass()Ljava/lang/Class;",
"Ljava/lang/Object;->toString()Ljava/lang/String;",
# Classes used:
#
# "Ljava/lang/BootClassLoader;",
# "Ljava/lang/Object;",
# "Ldalvik/system/DelegateLastClassLoader;",
# "Ljava/lang/Boolean;",
# "Ljava/lang/Byte;",
# "Ljava/lang/Short;",
# "Ljava/lang/Character;",
# "Ljava/lang/Integer;",
# "Ljava/lang/Long;",
# "Ljava/lang/Float;",
# "Ljava/lang/Double;",
}
def is_generated_method(method) -> bool:
class_def = method.get_class_name()
if class_def.startswith("Ltheseus/") and class_def.endswith("/T;"):
return True
return False
def is_glue_method(method) -> bool:
if is_generated_method(method):
return True
full_name = (
f"{method.get_class_name()}->{method.get_name()}{method.get_descriptor()}"
)
return full_name in GLUE_METHODS
def count_edges(cg: DiGraph) -> tuple[int, int]:
"""Count method calls and method calls that we may have added (glue methods).
Comparing this number of glue edges allows to compute how many actuall edges we added.
"""
n = 0
glue = 0
for u, v in cg.edges():
n += 1
if is_generated_method(u) or is_glue_method(v):
glue += 1
# print(f"{u.get_name()} -> {v.get_name()}")
return n, glue
def main():
parser = ArgumentParser(
description="Compare the call graph of an application and its patched version"
)
parser.add_argument("app", help="The original application", type=Path)
parser.add_argument("patched_app", help="The patched apk", type=Path)
parser.add_argument(
"--show-new-methods", action="store_true", help="Show added methods edges"
)
parser.add_argument(
"--csv-format",
action="store_true",
help="Show the results in a CSV format (apk sha256, nb edge before, nb edges after, added without glue, added ref only)",
)
parser.add_argument(
"--dyn-bytecode", action="extend", nargs="+", type=Path, default=[]
)
args = parser.parse_args()
print(f"app: {args.app}\npatched: {args.patched_app}")
assert args.app.exists()
assert args.patched_app.exists()
apk, _, dx = AnalyzeAPK(args.app)
cg = dx.get_call_graph()
_, _, dx2 = AnalyzeAPK(args.patched_app)
cg_patched = dx2.get_call_graph()
dyn_cgs = []
for dyn in args.dyn_bytecode:
if zipfile.is_zipfile(dyn):
_, _, dx = AnalyzeAPK(dyn)
else:
dx = Analysis()
with dyn.open("rb") as fp:
raw = fp.read()
d = dex.DEX(raw, using_api=apk.get_target_sdk_version())
dx.add(d)
dx.create_xref()
dyn_cgs.append(dx.get_call_graph())
nb_methods_app = cg.number_of_nodes()
nb_methods_pch = cg_patched.number_of_nodes()
nb_methods_dyn = sum(map(lambda x: x.number_of_nodes(), dyn_cgs))
nb_edges_app, nb_glue_app = count_edges(cg)
nb_edges_pch, nb_glue_pch = count_edges(cg_patched)
nb_edges_dyn, nb_glue_dyn = 0, 0
for cgd in dyn_cgs:
nb_e, nb_g = count_edges(cgd)
nb_edges_dyn += nb_e
nb_glue_dyn += nb_g
added_glue = nb_glue_pch - nb_glue_dyn - nb_glue_app
added_edges = nb_edges_pch - nb_edges_app - added_glue
if args.csv_format:
import hashlib
with args.app.open("rb") as fp:
hash = hashlib.file_digest(fp, "sha256").hexdigest()
print(
f"{hash},{nb_edges_app},{nb_edges_pch},{added_edges},{added_edges - nb_edges_dyn}"
)
# apk sha256, nb edge before, nb edges after, added without glue, added ref only
else:
print("app:")
print(f" nodes: {nb_methods_app}")
print(f" nb edges {nb_edges_app}")
print(f" glue edges {nb_glue_app}")
print("dyn loaded:")
print(f" nodes: {nb_methods_dyn}")
print(f" nb edges {nb_edges_dyn}")
print(f" glue edges {nb_glue_dyn}")
print("patched:")
print(f" nb node: {nb_methods_pch}")
print(f" nb edges {nb_edges_pch}")
print(f" glue edges {nb_glue_pch}")
print("")
print(
f"Total edges added: {added_edges} ({added_edges - nb_edges_dyn} ref only)"
)
if args.show_new_methods:
for u, v in cg_patched.edges():
if is_generated_method(u) or is_glue_method(v):
continue
if (u.full_name, v.full_name) in set(
map(lambda x: (x[0].full_name, x[1].full_name), cg.edges())
):
continue
print(
f"{u.get_class_name()}->{u.get_name()} ==> {v.get_class_name()}->{v.get_name()}"
)
return cg_patched
if __name__ == "__main__":
cg = main()