compare call graphs
This commit is contained in:
parent
c0a33536b9
commit
4bf84361a4
1 changed files with 205 additions and 0 deletions
205
experiment/compare_callgraphs.py
Normal file
205
experiment/compare_callgraphs.py
Normal file
|
|
@ -0,0 +1,205 @@
|
|||
# PEP 723 inline deps (https://peps.python.org/pep-0723/):
|
||||
#
|
||||
# /// script
|
||||
# requires-python = ">=3.13"
|
||||
# dependencies = [
|
||||
# "androguard==4.1.3",
|
||||
# ]
|
||||
# ///
|
||||
|
||||
import zipfile
|
||||
|
||||
from argparse import ArgumentParser
|
||||
from pathlib import Path
|
||||
|
||||
from androguard.misc import AnalyzeAPK
|
||||
from androguard.core.analysis.analysis import Analysis
|
||||
from androguard.core import dex
|
||||
from androguard.util import set_log
|
||||
|
||||
from networkx.classes.digraph import DiGraph
|
||||
|
||||
set_log("CRITICAL")
|
||||
|
||||
|
||||
GLUE_METHODS: set[str] = {
|
||||
"Ljava/lang/reflect/Method;->invoke(Ljava/lang/Object;[Ljava/lang/Object;)Ljava/lang/Object;",
|
||||
"Ljava/lang/reflect/Method;->getName()Ljava/lang/String;",
|
||||
"Ljava/lang/reflect/Method;->getParameterTypes()[Ljava/lang/Class;",
|
||||
"Ljava/lang/reflect/Method;->getReturnType()Ljava/lang/Class;",
|
||||
"Ljava/lang/reflect/Method;->getDeclaringClass()Ljava/lang/Class;",
|
||||
"Ljava/lang/String;->equals(Ljava/lang/Object;)Z"
|
||||
"Ljava/lang/Class;->newInstance()Ljava/lang/Object;",
|
||||
"Ljava/lang/reflect/Constructor;->newInstance([Ljava/lang/Object;)Ljava/lang/Object;",
|
||||
"Ljava/lang/reflect/Constructor;->getParameterTypes()[Ljava/lang/Class;",
|
||||
"Ljava/lang/reflect/Constructor;->getDeclaringClass()Ljava/lang/Class;",
|
||||
"Ljava/lang/Class;->descriptorString()Ljava/lang/String;",
|
||||
"Ljava/lang/Boolean;->booleanValue()Z"
|
||||
"Ljava/lang/Byte;->byteValue()B"
|
||||
"Ljava/lang/Short;->shortValue()S"
|
||||
"Ljava/lang/Character;->charValue()C"
|
||||
"Ljava/lang/Integer;->intValue()I"
|
||||
"Ljava/lang/Long;->longValue()J"
|
||||
"Ljava/lang/Float;->floatValue()F"
|
||||
"Ljava/lang/Double;->doubleValue()D"
|
||||
"Ljava/lang/Boolean;->valueOf(Z)Ljava/lang/Boolean;",
|
||||
"Ljava/lang/Byte;->valueOf(B)Ljava/lang/Byte;",
|
||||
"Ljava/lang/Short;->valueOf(S)Ljava/lang/Short;",
|
||||
"Ljava/lang/Character;->valueOf(C)Ljava/lang/Character;",
|
||||
"Ljava/lang/Integer;->valueOf(I)Ljava/lang/Integer;",
|
||||
"Ljava/lang/Long;->valueOf(J)Ljava/lang/Long;",
|
||||
"Ljava/lang/Float;->valueOf(F)Ljava/lang/Float;",
|
||||
"Ljava/lang/Double;->valueOf(D)Ljava/lang/Double;",
|
||||
"Ljava/lang/Class;->getClassLoader()Ljava/lang/ClassLoader;",
|
||||
"Ljava/lang/ClassLoader;->getParent()Ljava/lang/ClassLoader;",
|
||||
"Ljava/lang/Object;->getClass()Ljava/lang/Class;",
|
||||
"Ljava/lang/Object;->toString()Ljava/lang/String;",
|
||||
# Classes used:
|
||||
#
|
||||
# "Ljava/lang/BootClassLoader;",
|
||||
# "Ljava/lang/Object;",
|
||||
# "Ldalvik/system/DelegateLastClassLoader;",
|
||||
# "Ljava/lang/Boolean;",
|
||||
# "Ljava/lang/Byte;",
|
||||
# "Ljava/lang/Short;",
|
||||
# "Ljava/lang/Character;",
|
||||
# "Ljava/lang/Integer;",
|
||||
# "Ljava/lang/Long;",
|
||||
# "Ljava/lang/Float;",
|
||||
# "Ljava/lang/Double;",
|
||||
}
|
||||
|
||||
|
||||
def is_generated_method(method) -> bool:
|
||||
class_def = method.get_class_name()
|
||||
if class_def.startswith("Ltheseus/") and class_def.endswith("/T;"):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def is_glue_method(method) -> bool:
|
||||
if is_generated_method(method):
|
||||
return True
|
||||
full_name = (
|
||||
f"{method.get_class_name()}->{method.get_name()}{method.get_descriptor()}"
|
||||
)
|
||||
return full_name in GLUE_METHODS
|
||||
|
||||
|
||||
def count_edges(cg: DiGraph) -> tuple[int, int]:
|
||||
"""Count method calls and method calls that we may have added (glue methods).
|
||||
Comparing this number of glue edges allows to compute how many actuall edges we added.
|
||||
"""
|
||||
n = 0
|
||||
glue = 0
|
||||
for u, v in cg.edges():
|
||||
n += 1
|
||||
if is_generated_method(u) or is_glue_method(v):
|
||||
glue += 1
|
||||
# print(f"{u.get_name()} -> {v.get_name()}")
|
||||
|
||||
return n, glue
|
||||
|
||||
|
||||
def main():
|
||||
parser = ArgumentParser(
|
||||
description="Compare the call graph of an application and its patched version"
|
||||
)
|
||||
parser.add_argument("app", help="The original application", type=Path)
|
||||
parser.add_argument("patched_app", help="The patched apk", type=Path)
|
||||
parser.add_argument(
|
||||
"--show-new-methods", action="store_true", help="Show added methods edges"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--csv-format",
|
||||
action="store_true",
|
||||
help="Show the results in a CSV format (apk sha256, nb edge before, nb edges after, added without glue, added ref only)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dyn-bytecode", action="extend", nargs="+", type=Path, default=[]
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
print(f"app: {args.app}\npatched: {args.patched_app}")
|
||||
|
||||
assert args.app.exists()
|
||||
assert args.patched_app.exists()
|
||||
|
||||
apk, _, dx = AnalyzeAPK(args.app)
|
||||
cg = dx.get_call_graph()
|
||||
_, _, dx2 = AnalyzeAPK(args.patched_app)
|
||||
cg_patched = dx2.get_call_graph()
|
||||
|
||||
dyn_cgs = []
|
||||
for dyn in args.dyn_bytecode:
|
||||
if zipfile.is_zipfile(dyn):
|
||||
_, _, dx = AnalyzeAPK(dyn)
|
||||
else:
|
||||
dx = Analysis()
|
||||
with dyn.open("rb") as fp:
|
||||
raw = fp.read()
|
||||
d = dex.DEX(raw, using_api=apk.get_target_sdk_version())
|
||||
dx.add(d)
|
||||
dx.create_xref()
|
||||
|
||||
dyn_cgs.append(dx.get_call_graph())
|
||||
nb_methods_app = cg.number_of_nodes()
|
||||
nb_methods_pch = cg_patched.number_of_nodes()
|
||||
nb_methods_dyn = sum(map(lambda x: x.number_of_nodes(), dyn_cgs))
|
||||
|
||||
nb_edges_app, nb_glue_app = count_edges(cg)
|
||||
nb_edges_pch, nb_glue_pch = count_edges(cg_patched)
|
||||
nb_edges_dyn, nb_glue_dyn = 0, 0
|
||||
for cgd in dyn_cgs:
|
||||
nb_e, nb_g = count_edges(cgd)
|
||||
nb_edges_dyn += nb_e
|
||||
nb_glue_dyn += nb_g
|
||||
|
||||
added_glue = nb_glue_pch - nb_glue_dyn - nb_glue_app
|
||||
added_edges = nb_edges_pch - nb_edges_app - added_glue
|
||||
|
||||
if args.csv_format:
|
||||
import hashlib
|
||||
|
||||
with args.app.open("rb") as fp:
|
||||
hash = hashlib.file_digest(fp, "sha256").hexdigest()
|
||||
print(
|
||||
f"{hash},{nb_edges_app},{nb_edges_pch},{added_edges},{added_edges - nb_edges_dyn}"
|
||||
)
|
||||
# apk sha256, nb edge before, nb edges after, added without glue, added ref only
|
||||
else:
|
||||
print("app:")
|
||||
print(f" nodes: {nb_methods_app}")
|
||||
print(f" nb edges {nb_edges_app}")
|
||||
print(f" glue edges {nb_glue_app}")
|
||||
print("dyn loaded:")
|
||||
print(f" nodes: {nb_methods_dyn}")
|
||||
print(f" nb edges {nb_edges_dyn}")
|
||||
print(f" glue edges {nb_glue_dyn}")
|
||||
print("patched:")
|
||||
print(f" nb node: {nb_methods_pch}")
|
||||
print(f" nb edges {nb_edges_pch}")
|
||||
print(f" glue edges {nb_glue_pch}")
|
||||
print("")
|
||||
print(
|
||||
f"Total edges added: {added_edges} ({added_edges - nb_edges_dyn} ref only)"
|
||||
)
|
||||
|
||||
if args.show_new_methods:
|
||||
for u, v in cg_patched.edges():
|
||||
if is_generated_method(u) or is_glue_method(v):
|
||||
continue
|
||||
if (u.full_name, v.full_name) in set(
|
||||
map(lambda x: (x[0].full_name, x[1].full_name), cg.edges())
|
||||
):
|
||||
continue
|
||||
print(
|
||||
f"{u.get_class_name()}->{u.get_name()} ==> {v.get_class_name()}->{v.get_name()}"
|
||||
)
|
||||
|
||||
return cg_patched
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cg = main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue