diff --git a/experiment/compare.sh b/experiment/compare.sh new file mode 100644 index 0000000..9175713 --- /dev/null +++ b/experiment/compare.sh @@ -0,0 +1,14 @@ + +# use `uv run --script` instead of python because nor python nor pip support PIP 723 meta data an a whole package for one dep is overkill + +echo "sha256,edges_before,edges_after,added,added_ref_only" +uv run --script compare_callgraphs.py 0019D7FB6ADDA0619C0BEFC8DE53E2E59139B3BC0DE62E30BB0E2AB5B2C6D79D/{original.apk,patched.apk} --dyn-bytecode 0019D7FB6ADDA0619C0BEFC8DE53E2E59139B3BC0DE62E30BB0E2AB5B2C6D79D/DexClassLoader_06e0f355_7f2bf9d6e8990548.bytecode --csv-format +uv run --script compare_callgraphs.py 274B677449ACB313396C833475183E384D69C611F5FCA0DFCA4E415FB057C012/{original.apk,patched.apk} --dyn-bytecode 274B677449ACB313396C833475183E384D69C611F5FCA0DFCA4E415FB057C012/InMemoryDexClassLoader_02660f87_9a71e65fab380fdd.bytecode 274B677449ACB313396C833475183E384D69C611F5FCA0DFCA4E415FB057C012/InMemoryDexClassLoader_071ee3ab_ffd17def1b374966.bytecode --csv-format +uv run --script compare_callgraphs.py 34599C24994658C0FE3D40A67E655584AF657408C803595B771DCAC58A6A7F02/{original.apk,patched.apk} --dyn-bytecode 34599C24994658C0FE3D40A67E655584AF657408C803595B771DCAC58A6A7F02/InMemoryDexClassLoader_00b67e31_191f4ebf5f4f6abe.bytecode 34599C24994658C0FE3D40A67E655584AF657408C803595B771DCAC58A6A7F02/InMemoryDexClassLoader_047570a2_3a0f974f46226e85.bytecode --csv-format +uv run --script compare_callgraphs.py 35065C683441E62C59C0DA0D86E6793256E33E54834E22AD0F70F44C99419E2F/{original.apk,patched.apk} --dyn-bytecode 35065C683441E62C59C0DA0D86E6793256E33E54834E22AD0F70F44C99419E2F/InMemoryDexClassLoader_05374c50_24b5d062d94bf20c.bytecode 35065C683441E62C59C0DA0D86E6793256E33E54834E22AD0F70F44C99419E2F/InMemoryDexClassLoader_07ccdd7c_2b17b4115bf3cb31.bytecode --csv-format +uv run --script compare_callgraphs.py E7B2FB02FF14706D989BE662CEE89954FD49CFBAB3CEEE449CD215188EECA433/{original.apk,patched.apk} --dyn-bytecode E7B2FB02FF14706D989BE662CEE89954FD49CFBAB3CEEE449CD215188EECA433/DexClassLoader_0ace6346_8ef3c21dda54fce8.bytecode --csv-format +uv run --script compare_callgraphs.py EFECECC03CBD7EE7B73F80CCB2ABD6A5F59C7E33150D336AD7BF8601CFB9A4EF/{original.apk,patched.apk} --dyn-bytecode EFECECC03CBD7EE7B73F80CCB2ABD6A5F59C7E33150D336AD7BF8601CFB9A4EF/InMemoryDexClassLoader_06fbc21a_b44f6f6227ec96df.bytecode --csv-format +uv run --script compare_callgraphs.py F34CE1E7A81F935A5BB2D0B2B3FE81E62C1C8B906C92253C9CA467DA9BB3C9D1/{original.apk,patched.apk} --dyn-bytecode F34CE1E7A81F935A5BB2D0B2B3FE81E62C1C8B906C92253C9CA467DA9BB3C9D1/InMemoryDexClassLoader_021c35b6_c8de2e8473aa0901.bytecode F34CE1E7A81F935A5BB2D0B2B3FE81E62C1C8B906C92253C9CA467DA9BB3C9D1/InMemoryDexClassLoader_0f7fb4ea_0b2bfce46dac73c7.bytecode --csv-format + +# 5D2CD1D10ABE9B1E8D93C4C339A6B4E3D75895DE1FC49E248248B5F0B05EF1CE failled to patch +echo 5D2CD1D10ABE9B1E8D93C4C339A6B4E3D75895DE1FC49E248248B5F0B05EF1CE,,,, diff --git a/experiment/compare_callgraphs.py b/experiment/compare_callgraphs.py index fa64020..bc06773 100644 --- a/experiment/compare_callgraphs.py +++ b/experiment/compare_callgraphs.py @@ -113,7 +113,7 @@ def main(): parser.add_argument( "--csv-format", action="store_true", - help="Show the results in a CSV format (apk sha256, nb edge before, nb edges after, added without glue, added ref only)", + help="Show the results in a CSV format (apk sha256, nb edge before, nb edges after, added, added ref only)", ) parser.add_argument( "--dyn-bytecode", action="extend", nargs="+", type=Path, default=[] @@ -121,11 +121,6 @@ def main(): args = parser.parse_args() - print(f"app: {args.app}\npatched: {args.patched_app}") - - assert args.app.exists() - assert args.patched_app.exists() - apk, _, dx = AnalyzeAPK(args.app) cg = dx.get_call_graph() _, _, dx2 = AnalyzeAPK(args.patched_app) @@ -136,6 +131,7 @@ def main(): if zipfile.is_zipfile(dyn): _, _, dx = AnalyzeAPK(dyn) else: + print(dyn) dx = Analysis() with dyn.open("rb") as fp: raw = fp.read() @@ -144,6 +140,7 @@ def main(): dx.create_xref() dyn_cgs.append(dx.get_call_graph()) + nb_methods_app = cg.number_of_nodes() nb_methods_pch = cg_patched.number_of_nodes() nb_methods_dyn = sum(map(lambda x: x.number_of_nodes(), dyn_cgs)) @@ -157,18 +154,34 @@ def main(): nb_glue_dyn += nb_g added_glue = nb_glue_pch - nb_glue_dyn - nb_glue_app - added_edges = nb_edges_pch - nb_edges_app - added_glue + # added_edges = nb_edges_pch - nb_edges_app - added_glue # meh, don't works for 35065C683441E62C59C0DA0D86E6793256E33E54834E22AD0F70F44C99419E2F? + added_edges = nb_edges_pch - nb_edges_app + added_ref_only = 0 + + all_original_edges = set() + for u, v in cg.edges(): + all_original_edges.add((u.full_name, v.full_name)) + for cgd in dyn_cgs: + for u, v in cgd.edges(): + all_original_edges.add((u.full_name, v.full_name)) + + for u, v in cg_patched.edges(): + if is_generated_method(u) or is_glue_method(v): + continue + if (u.full_name, v.full_name) in all_original_edges: + continue + added_ref_only += 1 if args.csv_format: import hashlib with args.app.open("rb") as fp: - hash = hashlib.file_digest(fp, "sha256").hexdigest() - print( - f"{hash},{nb_edges_app},{nb_edges_pch},{added_edges},{added_edges - nb_edges_dyn}" - ) - # apk sha256, nb edge before, nb edges after, added without glue, added ref only + hash = hashlib.file_digest(fp, "sha256").hexdigest().upper() + print(f"{hash},{nb_edges_app},{nb_edges_pch},{added_edges},{added_ref_only}") + # apk sha256, nb edge before, nb edges after, added, added ref only else: + print(f"app: {args.app}\npatched: {args.patched_app}") + print("app:") print(f" nodes: {nb_methods_app}") print(f" nb edges {nb_edges_app}") @@ -182,21 +195,18 @@ def main(): print(f" nb edges {nb_edges_pch}") print(f" glue edges {nb_glue_pch}") print("") - print( - f"Total edges added: {added_edges} ({added_edges - nb_edges_dyn} ref only)" - ) + print(f"Total edges added: {added_edges} ({added_ref_only} ref only)") if args.show_new_methods: for u, v in cg_patched.edges(): if is_generated_method(u) or is_glue_method(v): continue - if (u.full_name, v.full_name) in set( - map(lambda x: (x[0].full_name, x[1].full_name), cg.edges()) - ): + if (u.full_name, v.full_name) in all_original_edges: continue - print( - f"{u.get_class_name()}->{u.get_name()} ==> {v.get_class_name()}->{v.get_name()}" - ) + # print( + # f"{u.get_class_name()}->{u.get_name()} ==> {v.get_class_name()}->{v.get_name()}" + # ) + print(f"{u.full_name} ==> {v.full_name}") return cg_patched