diff --git a/android_class_shadowing_scanner/__init__.py b/android_class_shadowing_scanner/__init__.py index bb90c44..bc57026 100644 --- a/android_class_shadowing_scanner/__init__.py +++ b/android_class_shadowing_scanner/__init__.py @@ -1,5 +1,7 @@ import zipfile import io +import hashlib +import pprint from argparse import ArgumentParser from pathlib import Path @@ -20,6 +22,11 @@ def main(): "--sha256", help="The sha256 hash of the APK to download", type=str ) apk_parser.add_argument("--apk", help="The APK to use", type=Path) + apk_parser.add_argument( + "--apk-list", + help="A file containing a list of path to application (one by line)", + type=Path, + ) apk_parser.add_argument( "--sha256-list", help="A file containing a list of application sha256s (one by line)", @@ -39,6 +46,11 @@ def main(): help="The directory where to output results, when no set results are printed to stdout", type=Path, ) + parser.add_argument( + "--pprint", + help="Print the result with pprint and add more information when available", + action="store_true", + ) SECRET_STORAGE_IMPORTED = False try: @@ -60,19 +72,49 @@ def main(): raise RuntimeError("--output-dir must be a directory") args.output_dir.mkdir(parents=True, exist_ok=True) + # Case 1: apk from file + apks = [] if args.apk: - with args.apk.open("rb") as file: - with zipfile.ZipFile(file) as apk: - entry = analyze(apk) - pprint.pprint(entry) - exit() + apks.append(args.apk) + if args.apk_list: + with args.apk_list.open("r") as file: + for line in file: + if not line.strip(): + continue + apks.append(Path(line.strip())) + for apk_path in apks: + with apk_path.open("rb") as file: + digest = hashlib.file_digest(file, "sha256") + sha256 = digest.hexdigest().upper() + if args.output_dir and (args.output_dir / sha256).exists(): + continue + + if args.pprint: + print(f"APK: {str(apk_path)}") + with apk_path.open("rb") as file: + with zipfile.ZipFile(file) as apk: + entry = analyze(apk, sha256, verbose=args.pprint) + if args.pprint: + pprint.pprint(entry) + if not args.output_dir: + print(entry.to_string()) + else: + with (args.output_dir / sha256).open("w") as file: + file.write(entry) + + if apks: + exit() + + # Case 2: apk from SHA256 sha256s = [] if args.sha256: sha256s.append(args.sha256) if args.sha256_list: with args.sha256_list.open("r") as file: for line in file: + if not line.strip(): + continue sha256s.append(line.strip()) api_key = "" @@ -101,9 +143,11 @@ def main(): if args.output_dir and (args.output_dir / sha256).exists(): continue with zipfile.ZipFile(io.BytesIO(download_apk(sha256, api_key))) as apk: - entry = analyze(apk, sha256) - if not args.output_dir: - print(entry.to_string()) - else: - with (args.output_dir / sha256).open("w") as file: - file.write(entry) + entry = analyze(apk, sha256, verbose=args.pprint) + if args.pprint: + pprint.pprint(entry) + if not args.output_dir: + print(entry.to_string()) + else: + with (args.output_dir / sha256).open("w") as file: + file.write(entry) diff --git a/android_class_shadowing_scanner/analysis.py b/android_class_shadowing_scanner/analysis.py index 7286b7b..910af51 100644 --- a/android_class_shadowing_scanner/analysis.py +++ b/android_class_shadowing_scanner/analysis.py @@ -17,6 +17,7 @@ from .platform_classes import ( SDK_33_CLASSES, PLATFORM_34_CLASSES, SDK_34_CLASSES, + D8_CLASSES, ) from .data import ApkData @@ -62,29 +63,37 @@ androguard.core.dex.HiddenApiClassDataItem.RestrictionApiFlag = ( @dataclass class PlatformClassesData: nb_duplicate_classes: int - nb_platform_32_classes: int - nb_platform_non_sdk_32_classes: int - nb_sdk_32_classes: int - nb_platform_33_classes: int - nb_platform_non_sdk_33_classes: int - nb_sdk_33_classes: int - nb_platform_34_classes: int - nb_platform_non_sdk_34_classes: int - nb_sdk_34_classes: int + nb_def_platform_32_classes: int + nb_def_platform_non_sdk_32_classes: int + nb_def_sdk_32_classes: int + nb_ref_platform_non_sdk_32_classes: int + nb_def_platform_33_classes: int + nb_def_platform_non_sdk_33_classes: int + nb_def_sdk_33_classes: int + nb_ref_platform_non_sdk_33_classes: int + nb_def_platform_34_classes: int + nb_def_platform_non_sdk_34_classes: int + nb_def_sdk_34_classes: int + nb_ref_platform_non_sdk_34_classes: int -def scan_classes(apk: zipfile.ZipFile, file_names: set[str]) -> PlatformClassesData: +def scan_classes( + apk: zipfile.ZipFile, file_names: set[str], verbose: bool = False +) -> PlatformClassesData: all_classes = set() duplicated_classes = set() platform_32_classes = set() sdk_32_classes = set() platform_non_sdk_32_classes = set() + ref_platform_non_sdk_32_classes = set() platform_33_classes = set() sdk_33_classes = set() platform_non_sdk_33_classes = set() + ref_platform_non_sdk_33_classes = set() platform_34_classes = set() sdk_34_classes = set() platform_non_sdk_34_classes = set() + ref_platform_non_sdk_34_classes = set() for name in file_names: with apk.open(name) as dex_f: dex = DEX(dex_f.read()) @@ -110,21 +119,103 @@ def scan_classes(apk: zipfile.ZipFile, file_names: set[str]) -> PlatformClassesD if clazz in PLATFORM_34_CLASSES and clazz not in SDK_34_CLASSES: platform_non_sdk_34_classes.add(clazz) all_classes.add(clazz) + types = dex.map_list.get_item_type( + androguard.core.dex.TypeMapItem.TYPE_ID_ITEM + ).type + for ty in types: + ty_name = ty.descriptor_idx_value + if len(ty_name) < 2: + continue + if ( + ty_name in PLATFORM_32_CLASSES + and not ty_name in SDK_32_CLASSES + and ty_name not in D8_CLASSES + ): + ref_platform_non_sdk_32_classes.add(ty_name) + if ( + ty_name in PLATFORM_33_CLASSES + and not ty_name in SDK_33_CLASSES + and ty_name not in D8_CLASSES + ): + ref_platform_non_sdk_33_classes.add(ty_name) + if ( + ty_name in PLATFORM_34_CLASSES + and not ty_name in SDK_34_CLASSES + and ty_name not in D8_CLASSES + ): + ref_platform_non_sdk_34_classes.add(ty_name) + if verbose: + if duplicated_classes: + print("Duplicated classes:") + for cl in duplicated_classes: + print(f" {cl}") + if platform_32_classes: + print("Redefined Platform Classes (v32):") + for cl in platform_32_classes: + print(f" {cl}") + if sdk_32_classes: + print("Redefined SDK Classes (v32):") + for cl in sdk_32_classes: + print(f" {cl}") + if platform_non_sdk_32_classes: + print("Redefined non-SDK Platform Classes (v32):") + for cl in platform_non_sdk_32_classes: + print(f" {cl}") + if ref_platform_non_sdk_32_classes: + print("Reference to non-SDK Platform Classes (v32):") + for cl in ref_platform_non_sdk_32_classes: + print(f" {cl}") + if platform_33_classes: + print("Redefined Platform Classes (v33):") + for cl in platform_33_classes: + print(f" {cl}") + if sdk_33_classes: + print("Redefined SDK Classes (v33):") + for cl in sdk_33_classes: + print(f" {cl}") + if platform_non_sdk_33_classes: + print("Redefined non-SDK Platform Classes (v33):") + for cl in platform_non_sdk_33_classes: + print(f" {cl}") + if ref_platform_non_sdk_33_classes: + print("Reference to non-SDK Platform Classes (v33):") + for cl in ref_platform_non_sdk_33_classes: + print(f" {cl}") + if platform_34_classes: + print("Redefined Platform Classes (v34):") + for cl in platform_34_classes: + print(f" {cl}") + if sdk_34_classes: + print("Redefined SDK Classes (v34):") + for cl in sdk_34_classes: + print(f" {cl}") + if platform_non_sdk_34_classes: + print("Redefined non-SDK Platform Classes (v34):") + for cl in platform_non_sdk_34_classes: + print(f" {cl}") + if ref_platform_non_sdk_34_classes: + print("Reference to non-SDK Platform Classes (v34):") + for cl in ref_platform_non_sdk_34_classes: + print(f" {cl}") + return PlatformClassesData( nb_duplicate_classes=len(duplicated_classes), - nb_platform_32_classes=len(platform_32_classes), - nb_platform_non_sdk_32_classes=len(platform_non_sdk_32_classes), - nb_sdk_32_classes=len(sdk_32_classes), - nb_platform_33_classes=len(platform_33_classes), - nb_platform_non_sdk_33_classes=len(platform_non_sdk_33_classes), - nb_sdk_33_classes=len(sdk_33_classes), - nb_platform_34_classes=len(platform_34_classes), - nb_platform_non_sdk_34_classes=len(platform_non_sdk_34_classes), - nb_sdk_34_classes=len(sdk_34_classes), + nb_def_platform_32_classes=len(platform_32_classes), + nb_def_platform_non_sdk_32_classes=len(platform_non_sdk_32_classes), + nb_def_sdk_32_classes=len(sdk_32_classes), + nb_ref_platform_non_sdk_32_classes=len(ref_platform_non_sdk_32_classes), + nb_def_platform_33_classes=len(platform_33_classes), + nb_def_platform_non_sdk_33_classes=len(platform_non_sdk_33_classes), + nb_def_sdk_33_classes=len(sdk_33_classes), + nb_ref_platform_non_sdk_33_classes=len(ref_platform_non_sdk_33_classes), + nb_def_platform_34_classes=len(platform_34_classes), + nb_def_platform_non_sdk_34_classes=len(platform_non_sdk_34_classes), + nb_def_sdk_34_classes=len(sdk_34_classes), + nb_ref_platform_non_sdk_34_classes=len(ref_platform_non_sdk_34_classes), ) -def analyze(apk: zipfile.ZipFile, sha256: str) -> ApkData: +def analyze(apk: zipfile.ZipFile, sha256: str, verbose: bool = False) -> ApkData: classes_dex = set( filter( lambda name: name.startswith("classes") and name.endswith(".dex"), @@ -166,7 +257,7 @@ def analyze(apk: zipfile.ZipFile, sha256: str) -> ApkData: has_non_consecutive_classes_dex = True break - platform_classes_data = scan_classes(apk, classes_dex) + platform_classes_data = scan_classes(apk, classes_dex, verbose=verbose) return ApkData( sha256=sha256, diff --git a/android_class_shadowing_scanner/data.py b/android_class_shadowing_scanner/data.py index 781e780..2272302 100644 --- a/android_class_shadowing_scanner/data.py +++ b/android_class_shadowing_scanner/data.py @@ -6,15 +6,18 @@ from typing import Self class ApkData: sha256: str nb_duplicate_classes: int - nb_platform_32_classes: int - nb_platform_non_sdk_32_classes: int - nb_sdk_32_classes: int - nb_platform_33_classes: int - nb_platform_non_sdk_33_classes: int - nb_sdk_33_classes: int - nb_platform_34_classes: int - nb_platform_non_sdk_34_classes: int - nb_sdk_34_classes: int + nb_def_platform_32_classes: int + nb_def_platform_non_sdk_32_classes: int + nb_def_sdk_32_classes: int + nb_ref_platform_non_sdk_32_classes: int + nb_def_platform_33_classes: int + nb_def_platform_non_sdk_33_classes: int + nb_def_sdk_33_classes: int + nb_ref_platform_non_sdk_33_classes: int + nb_def_platform_34_classes: int + nb_def_platform_non_sdk_34_classes: int + nb_def_sdk_34_classes: int + nb_ref_platform_non_sdk_34_classes: int has_classes0_dex: bool has_classes1_dex: bool has_classes0X_dex: bool diff --git a/android_class_shadowing_scanner/platform_classes.py b/android_class_shadowing_scanner/platform_classes.py index b4445a8..e6d87bf 100644 --- a/android_class_shadowing_scanner/platform_classes.py +++ b/android_class_shadowing_scanner/platform_classes.py @@ -9,6 +9,13 @@ SDK_33_CLASSES = set() PLATFORM_34_CLASSES = set() SDK_34_CLASSES = set() +# Classes added by D8 for internal stuff +D8_CLASSES = { + "Ldalvik/annotation/InnerClass;", + "Ldalvik/annotation/Signature;", + "Ldalvik/annotation/EnclosingMethod;", +} + with (local_dir / "android-32" / "classes.txt").open() as file: for line in file: class_name = line.strip()