diff --git a/android_class_shadowing_scanner/__init__.py b/android_class_shadowing_scanner/__init__.py index bf2c8d9..6ac316b 100644 --- a/android_class_shadowing_scanner/__init__.py +++ b/android_class_shadowing_scanner/__init__.py @@ -5,6 +5,7 @@ import json import sqlite3 import tempfile import subprocess +import json from argparse import ArgumentParser from pathlib import Path @@ -17,7 +18,6 @@ from androguard.core.apk import APK # type: ignore from .androzoo import download_apk from .data import ApkData, load_from_directory from .analysis import analyze -from .data_mining import analyse_sdk_redef def main(): @@ -224,7 +224,7 @@ def main(): apk_bin = download_apk(sha256, api_key, logfile=logfile) if apk_bin is None: continue - androguard_apk = APK(apk_bin) + androguard_apk = APK(apk_bin, raw=True) with zipfile.ZipFile(io.BytesIO(apk_bin)) as apk: try: if args.json: @@ -360,14 +360,13 @@ def check_smali(): if not api_key: api_key = getpass(prompt="Androzoo API key: ").strip() - # with sqlite3.connect(args.db) as conn: - # apks = list( - # map( - # lambda t: t[0], - # conn.execute("SELECT sha256 FROM data WHERE nb_duplicate_classes >= 1"), - # ) - # ) - apks = ["E0467A3E79C344216EEEC9691E43C49DCE3230EB312979F7DC37AAC829077249"] + with sqlite3.connect(args.db) as conn: + apks = list( + map( + lambda t: t[0], + conn.execute("SELECT sha256 FROM data WHERE nb_duplicate_classes >= 1"), + ) + ) data = {} for sha256 in apks: with tempfile.TemporaryDirectory() as tmpdirname: @@ -377,9 +376,10 @@ def check_smali(): continue with (d / "app.apk").open("wb") as fp: fp.write(apk_bin) + androguard_apk = APK(str(d / "app.apk")) with zipfile.ZipFile(io.BytesIO(apk_bin)) as apk: data[sha256] = {} - entry = analyze(apk, sha256, json_out=data[sha256]) + entry = analyze(apk, androguard_apk, sha256, json_out=data[sha256]) r = subprocess.run( [ "java", @@ -429,7 +429,237 @@ def check_smali(): json.dump(data, f) +def check_smali_platform(): + parser = ArgumentParser( + prog="Smalli Check", + description="Check if duplicated classes are distinct from the actual sources", + ) + parser.add_argument( + "--sha256", + help="sha256 of the apk to test", + type=str, + required=True, + ) + parser.add_argument( + "--output-dir", + help="The directory where to output results, when no set results are printed to stdout", + type=Path, + ) + parser.add_argument( + "--apktool-jar", + help="Path to the apktool jar file", + type=Path, + required=True, + ) + + parser.add_argument( + "--path-platform-smali", + help=( + "Path to the folder containing the framework jars dissassembled by apktool.\n" + "This folder need to be of the form:\n" + " android-/\n" + " platform/\n" + " .jar.out/" + " smali\n" + " smali_classes\n" + " sdk/\n" + " android.jar.out/\n" + " smali\n" + " smali_classes\n" + ), + type=Path, + required=True, + ) + key_parser = parser.add_mutually_exclusive_group(required=False) + key_parser.add_argument( + "--api-key-file", + help="The path to a file containing the Androzoo API key", + type=Path, + ) + key_parser.add_argument( + "--api-key", help="The Androzoo API key (Usage NOT recommanded)", type=str + ) + SECRET_STORAGE_IMPORTED = False + try: + import secretstorage + + SECRET_STORAGE_IMPORTED = True + + key_parser.add_argument( + "--api-key-keyring-id", + help="The ID of the Androzoo API key in the secret service storage", + type=str, + ) + except ModuleNotFoundError: + pass + args = parser.parse_args() + + if args.output_dir: + if not args.output_dir.exists(): + args.output_dir.mkdir(parents=True) + if not args.output_dir.is_dir(): + raise RuntimeError("--output-dir must be a directory") + args.output_dir.mkdir(parents=True, exist_ok=True) + + apktool = args.apktool_jar.resolve() + api_key = "" + if args.api_key: + api_key = args.api_key + if args.api_key_file: + with args.api_key_file.open("r") as file: + api_key = file.read().strip() + if SECRET_STORAGE_IMPORTED and not api_key: + if args.api_key_keyring_id: + key_id = args.api_key_keyring_id + else: + key_id = "androzoo" + try: + with secretstorage.dbus_init() as connection: + collection = secretstorage.get_default_collection(connection) + item = next(collection.search_items({"Title": key_id})) + item.unlock() + api_key = item.get_secret().decode("utf-8").strip() + except: + pass + if not api_key: + api_key = getpass(prompt="Androzoo API key: ").strip() + apktool = args.apktool_jar.resolve() + + # with sqlite3.connect(args.db) as conn: + # apks = list( + # map( + # lambda t: t[0], + # conn.execute( + # "SELECT sha256 FROM data WHERE " + # "nb_def_platform_32_classes >= 1 OR " + # "nb_def_platform_33_classes >= 1 OR " + # "nb_def_platform_34_classes >= 1;" + # ), + # ) + # ) + sha256 = args.sha256 + data = {} + with tempfile.TemporaryDirectory() as tmpdirname: + d = Path(tmpdirname) + apk_bin = download_apk(sha256, api_key, logfile=None) + if apk_bin is None: + return + with (d / "app.apk").open("wb") as fp: + fp.write(apk_bin) + androguard_apk = APK(str(d / "app.apk")) + with zipfile.ZipFile(io.BytesIO(apk_bin)) as apk: + data[sha256] = {} + entry = analyze(apk, androguard_apk, sha256, json_out=data) + r = subprocess.run( + [ + "java", + "-Xmx8G", + "-jar", + str(apktool), + "d", + "app.apk", + "-o", + "apktool_out", + ], + cwd=d, + ) + data["apktool-finished"] = (r.returncode == 0) and ( + d / "apktool_out" / "apktool.yml" + ).exists() + + smalli_dirs = [] + for dex in data["class_dex"]: + if dex == "classes.dex": + smalli_dirs.append(d / "apktool_out" / "smali") + else: + smalli_dirs.append( + d / "apktool_out" / ("smali_" + dex.removesuffix(".dex")) + ) + + for a_sdk_dir in args.path_platform_smali.iterdir(): + sdk_v = a_sdk_dir.name.removeprefix("android-") + sdk_smalli_dirs = [] + plat_smalli_dirs = [] + for jar_dir in (a_sdk_dir / "sdk").iterdir(): + if not jar_dir.name.endswith(".out"): + continue + for smali_dir in jar_dir.iterdir(): + if not smali_dir.name.startswith("smali"): + continue + sdk_smalli_dirs.append(smali_dir) + for jar_dir in (a_sdk_dir / "platform").iterdir(): + if not jar_dir.name.endswith(".out"): + continue + for smali_dir in jar_dir.iterdir(): + if not smali_dir.name.startswith("smali"): + continue + plat_smalli_dirs.append(smali_dir) + + plat_diff_smalli = set() + sdk_diff_smalli = set() + + for cl in data[f"platform_{sdk_v}_classes"]: + cl_f = cl.removesuffix(";").removeprefix("L") + ".smali" + plt_files = [] + for smalli_dir in plat_smalli_dirs: + if (smalli_dir / cl_f).exists(): + plt_files.append(smalli_dir / cl_f) + if len(plt_files) == 0: + print(f"{cl} not found in {a_sdk_dir / 'platform'}, strange") + continue + elif len(plt_files) > 1: + print( + f"Multiple {cl} found in {a_sdk_dir / 'platform'}, " + "strange, {plt_files[0]} selected" + ) + with plt_files[0].open("r") as file: + plt_smali = file.read() + + for smalli_dir in smalli_dirs: + if (smalli_dir / cl_f).exists(): + with (smalli_dir / cl_f).open("r") as file: + if file.read() != plt_smali: + plat_diff_smalli.add(cl) + break + + for cl in data[f"sdk_{sdk_v}_classes"]: + cl_f = cl.removesuffix(";").removeprefix("L") + ".smali" + sdk_files = [] + for smalli_dir in plat_smalli_dirs: + if (smalli_dir / cl_f).exists(): + sdk_files.append(smalli_dir / cl_f) + if len(sdk_files) == 0: + print(f"{cl} not found in {a_sdk_dir / 'sdk'}, strange") + continue + elif len(sdk_files) > 1: + print( + f"Multiple {cl} found in {a_sdk_dir / ''}, " + "strange, {sdk_files[0]} selected" + ) + with sdk_files[0].open("r") as file: + sdk_smali = file.read() + for smalli_dir in smalli_dirs: + if (smalli_dir / cl_f).exists(): + with (smalli_dir / cl_f).open("r") as file: + if file.read() != sdk_smali: + sdk_diff_smalli.add(cl) + break + + data[f"sdk_{sdk_v}_diff_smalli"] = list(sdk_diff_smalli) + data[f"platform_{sdk_v}_diff_smalli"] = list(plat_diff_smalli) + + if args.output_dir: + with (args.output_dir / sha256).open("w") as file: + json.dump(data, file) + else: + print(json.dumps(data, indent=2)) + + def data_mining(): + # use plt and numpy + # those libs are iffy on the server so let's not import them when not needed + from .data_mining import analyse_sdk_redef + parser = ArgumentParser( prog="Data Mining", description="Analyze result collected from the scan", diff --git a/platforms.zip b/platforms.zip new file mode 100644 index 0000000..b7f0acd --- /dev/null +++ b/platforms.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:149925aa91e73ca8f25f218d4e45aaf54997527a090a1648baf892303c518d9a +size 147045360 diff --git a/pyproject.toml b/pyproject.toml index 5d0132b..ee0ad6b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,4 +23,5 @@ build-backend = "poetry.core.masonry.api" scan = 'android_class_shadowing_scanner.__init__:main' collect-scan = 'android_class_shadowing_scanner.__init__:collect_to_db' check-class-redef = 'android_class_shadowing_scanner.__init__:check_smali' +check-platf-reder = 'android_class_shadowing_scanner.__init__:check_smali_platform' data-mining = 'android_class_shadowing_scanner.__init__:data_mining' diff --git a/run_exp_5.sh b/run_exp_5.sh new file mode 100644 index 0000000..d347877 --- /dev/null +++ b/run_exp_5.sh @@ -0,0 +1,65 @@ +#!/usr/bin/bash + +WD=$(pwd) +SCRIPT_DIR="$(dirname "$(readlink -f "$0")")" +PLATFORM_DIR=$(mktemp -d) +APKTOOL="${WD}/apktool.jar" +DB="${SCRIPT_DIR}/data/app-2023-xp4.db" +LIST=$(mktemp) +APKTOOL="${SCRIPT_DIR}/apktool.jar" +ANDROZOO_KEY="${SCRIPT_DIR}/.ZOO_KEY" + +app_lst=( + '00' + '01' + '02' + '03' + '04' + '05' + '06' + '07' + '08' + '09' + '10' + '11' + '12' + '13' + '14' + '15' + '16' + '17' + '18' + '19' +) + +unzip platforms.zip -d "${PLATFORM_DIR}" + +for ad in "${PLATFORM_DIR}"/**/{platform,sdk}; do + cd ${ad} + for jar in "${ad}"/*.jar; do + java -Xmx8G -jar ${APKTOOL} d "${jar}" + done +done + +cd "${WD}" + +sqlite3 ${DB} 'SELECT sha256 FROM data WHERE nb_def_platform_32_classes >= 1 OR nb_def_platform_33_classes >= 1 OR nb_def_platform_34_classes >= 1;' > "${LIST}" + +N_CHUNK=$(python3 -c "print($(cat ${LIST} | wc -l)//20 + 1)") +rm -r ./app-2023-exp4 +mkdir ./app-2023-exp4 +split -a 2 -d -l "${N_CHUNK}" "${LIST}" ./app-2023-exp4/ + +worker () { + for sha in $(cat "${1}"); do + "${SCRIPT_DIR}"/venv/bin/check-platf-reder --api-key-file "${ANDROZOO_KEY}" --sha256 "${sha}" --path-platform-smali "${PLATFORM_DIR}" --apktool-jar "${APKTOOL}" --output-dir "${OUT_DIR}" + done + echo "Finished ${1}" +} + +for lst in ${app_lst[@]}; do + worker "./app-2023-exp4/${lst}" & + echo 1 +done + +echo 'PROCESS LAUNCHED'