add better local testing capabilities

This commit is contained in:
Jean-Marie Mineau 2024-10-18 15:47:02 +02:00
parent fe3c16de35
commit decac18a0d
4 changed files with 186 additions and 41 deletions

View file

@ -1,5 +1,7 @@
import zipfile
import io
import hashlib
import pprint
from argparse import ArgumentParser
from pathlib import Path
@ -20,6 +22,11 @@ def main():
"--sha256", help="The sha256 hash of the APK to download", type=str
)
apk_parser.add_argument("--apk", help="The APK to use", type=Path)
apk_parser.add_argument(
"--apk-list",
help="A file containing a list of path to application (one by line)",
type=Path,
)
apk_parser.add_argument(
"--sha256-list",
help="A file containing a list of application sha256s (one by line)",
@ -39,6 +46,11 @@ def main():
help="The directory where to output results, when no set results are printed to stdout",
type=Path,
)
parser.add_argument(
"--pprint",
help="Print the result with pprint and add more information when available",
action="store_true",
)
SECRET_STORAGE_IMPORTED = False
try:
@ -60,19 +72,49 @@ def main():
raise RuntimeError("--output-dir must be a directory")
args.output_dir.mkdir(parents=True, exist_ok=True)
# Case 1: apk from file
apks = []
if args.apk:
with args.apk.open("rb") as file:
with zipfile.ZipFile(file) as apk:
entry = analyze(apk)
pprint.pprint(entry)
exit()
apks.append(args.apk)
if args.apk_list:
with args.apk_list.open("r") as file:
for line in file:
if not line.strip():
continue
apks.append(Path(line.strip()))
for apk_path in apks:
with apk_path.open("rb") as file:
digest = hashlib.file_digest(file, "sha256")
sha256 = digest.hexdigest().upper()
if args.output_dir and (args.output_dir / sha256).exists():
continue
if args.pprint:
print(f"APK: {str(apk_path)}")
with apk_path.open("rb") as file:
with zipfile.ZipFile(file) as apk:
entry = analyze(apk, sha256, verbose=args.pprint)
if args.pprint:
pprint.pprint(entry)
if not args.output_dir:
print(entry.to_string())
else:
with (args.output_dir / sha256).open("w") as file:
file.write(entry)
if apks:
exit()
# Case 2: apk from SHA256
sha256s = []
if args.sha256:
sha256s.append(args.sha256)
if args.sha256_list:
with args.sha256_list.open("r") as file:
for line in file:
if not line.strip():
continue
sha256s.append(line.strip())
api_key = ""
@ -101,9 +143,11 @@ def main():
if args.output_dir and (args.output_dir / sha256).exists():
continue
with zipfile.ZipFile(io.BytesIO(download_apk(sha256, api_key))) as apk:
entry = analyze(apk, sha256)
if not args.output_dir:
print(entry.to_string())
else:
with (args.output_dir / sha256).open("w") as file:
file.write(entry)
entry = analyze(apk, sha256, verbose=args.pprint)
if args.pprint:
pprint.pprint(entry)
if not args.output_dir:
print(entry.to_string())
else:
with (args.output_dir / sha256).open("w") as file:
file.write(entry)

View file

@ -17,6 +17,7 @@ from .platform_classes import (
SDK_33_CLASSES,
PLATFORM_34_CLASSES,
SDK_34_CLASSES,
D8_CLASSES,
)
from .data import ApkData
@ -62,29 +63,37 @@ androguard.core.dex.HiddenApiClassDataItem.RestrictionApiFlag = (
@dataclass
class PlatformClassesData:
nb_duplicate_classes: int
nb_platform_32_classes: int
nb_platform_non_sdk_32_classes: int
nb_sdk_32_classes: int
nb_platform_33_classes: int
nb_platform_non_sdk_33_classes: int
nb_sdk_33_classes: int
nb_platform_34_classes: int
nb_platform_non_sdk_34_classes: int
nb_sdk_34_classes: int
nb_def_platform_32_classes: int
nb_def_platform_non_sdk_32_classes: int
nb_def_sdk_32_classes: int
nb_ref_platform_non_sdk_32_classes: int
nb_def_platform_33_classes: int
nb_def_platform_non_sdk_33_classes: int
nb_def_sdk_33_classes: int
nb_ref_platform_non_sdk_33_classes: int
nb_def_platform_34_classes: int
nb_def_platform_non_sdk_34_classes: int
nb_def_sdk_34_classes: int
nb_ref_platform_non_sdk_34_classes: int
def scan_classes(apk: zipfile.ZipFile, file_names: set[str]) -> PlatformClassesData:
def scan_classes(
apk: zipfile.ZipFile, file_names: set[str], verbose: bool = False
) -> PlatformClassesData:
all_classes = set()
duplicated_classes = set()
platform_32_classes = set()
sdk_32_classes = set()
platform_non_sdk_32_classes = set()
ref_platform_non_sdk_32_classes = set()
platform_33_classes = set()
sdk_33_classes = set()
platform_non_sdk_33_classes = set()
ref_platform_non_sdk_33_classes = set()
platform_34_classes = set()
sdk_34_classes = set()
platform_non_sdk_34_classes = set()
ref_platform_non_sdk_34_classes = set()
for name in file_names:
with apk.open(name) as dex_f:
dex = DEX(dex_f.read())
@ -110,21 +119,103 @@ def scan_classes(apk: zipfile.ZipFile, file_names: set[str]) -> PlatformClassesD
if clazz in PLATFORM_34_CLASSES and clazz not in SDK_34_CLASSES:
platform_non_sdk_34_classes.add(clazz)
all_classes.add(clazz)
types = dex.map_list.get_item_type(
androguard.core.dex.TypeMapItem.TYPE_ID_ITEM
).type
for ty in types:
ty_name = ty.descriptor_idx_value
if len(ty_name) < 2:
continue
if (
ty_name in PLATFORM_32_CLASSES
and not ty_name in SDK_32_CLASSES
and ty_name not in D8_CLASSES
):
ref_platform_non_sdk_32_classes.add(ty_name)
if (
ty_name in PLATFORM_33_CLASSES
and not ty_name in SDK_33_CLASSES
and ty_name not in D8_CLASSES
):
ref_platform_non_sdk_33_classes.add(ty_name)
if (
ty_name in PLATFORM_34_CLASSES
and not ty_name in SDK_34_CLASSES
and ty_name not in D8_CLASSES
):
ref_platform_non_sdk_34_classes.add(ty_name)
if verbose:
if duplicated_classes:
print("Duplicated classes:")
for cl in duplicated_classes:
print(f" {cl}")
if platform_32_classes:
print("Redefined Platform Classes (v32):")
for cl in platform_32_classes:
print(f" {cl}")
if sdk_32_classes:
print("Redefined SDK Classes (v32):")
for cl in sdk_32_classes:
print(f" {cl}")
if platform_non_sdk_32_classes:
print("Redefined non-SDK Platform Classes (v32):")
for cl in platform_non_sdk_32_classes:
print(f" {cl}")
if ref_platform_non_sdk_32_classes:
print("Reference to non-SDK Platform Classes (v32):")
for cl in ref_platform_non_sdk_32_classes:
print(f" {cl}")
if platform_33_classes:
print("Redefined Platform Classes (v33):")
for cl in platform_33_classes:
print(f" {cl}")
if sdk_33_classes:
print("Redefined SDK Classes (v33):")
for cl in sdk_33_classes:
print(f" {cl}")
if platform_non_sdk_33_classes:
print("Redefined non-SDK Platform Classes (v33):")
for cl in platform_non_sdk_33_classes:
print(f" {cl}")
if ref_platform_non_sdk_33_classes:
print("Reference to non-SDK Platform Classes (v33):")
for cl in ref_platform_non_sdk_33_classes:
print(f" {cl}")
if platform_34_classes:
print("Redefined Platform Classes (v34):")
for cl in platform_34_classes:
print(f" {cl}")
if sdk_34_classes:
print("Redefined SDK Classes (v34):")
for cl in sdk_34_classes:
print(f" {cl}")
if platform_non_sdk_34_classes:
print("Redefined non-SDK Platform Classes (v34):")
for cl in platform_non_sdk_34_classes:
print(f" {cl}")
if ref_platform_non_sdk_34_classes:
print("Reference to non-SDK Platform Classes (v34):")
for cl in ref_platform_non_sdk_34_classes:
print(f" {cl}")
return PlatformClassesData(
nb_duplicate_classes=len(duplicated_classes),
nb_platform_32_classes=len(platform_32_classes),
nb_platform_non_sdk_32_classes=len(platform_non_sdk_32_classes),
nb_sdk_32_classes=len(sdk_32_classes),
nb_platform_33_classes=len(platform_33_classes),
nb_platform_non_sdk_33_classes=len(platform_non_sdk_33_classes),
nb_sdk_33_classes=len(sdk_33_classes),
nb_platform_34_classes=len(platform_34_classes),
nb_platform_non_sdk_34_classes=len(platform_non_sdk_34_classes),
nb_sdk_34_classes=len(sdk_34_classes),
nb_def_platform_32_classes=len(platform_32_classes),
nb_def_platform_non_sdk_32_classes=len(platform_non_sdk_32_classes),
nb_def_sdk_32_classes=len(sdk_32_classes),
nb_ref_platform_non_sdk_32_classes=len(ref_platform_non_sdk_32_classes),
nb_def_platform_33_classes=len(platform_33_classes),
nb_def_platform_non_sdk_33_classes=len(platform_non_sdk_33_classes),
nb_def_sdk_33_classes=len(sdk_33_classes),
nb_ref_platform_non_sdk_33_classes=len(ref_platform_non_sdk_33_classes),
nb_def_platform_34_classes=len(platform_34_classes),
nb_def_platform_non_sdk_34_classes=len(platform_non_sdk_34_classes),
nb_def_sdk_34_classes=len(sdk_34_classes),
nb_ref_platform_non_sdk_34_classes=len(ref_platform_non_sdk_34_classes),
)
def analyze(apk: zipfile.ZipFile, sha256: str) -> ApkData:
def analyze(apk: zipfile.ZipFile, sha256: str, verbose: bool = False) -> ApkData:
classes_dex = set(
filter(
lambda name: name.startswith("classes") and name.endswith(".dex"),
@ -166,7 +257,7 @@ def analyze(apk: zipfile.ZipFile, sha256: str) -> ApkData:
has_non_consecutive_classes_dex = True
break
platform_classes_data = scan_classes(apk, classes_dex)
platform_classes_data = scan_classes(apk, classes_dex, verbose=verbose)
return ApkData(
sha256=sha256,

View file

@ -6,15 +6,18 @@ from typing import Self
class ApkData:
sha256: str
nb_duplicate_classes: int
nb_platform_32_classes: int
nb_platform_non_sdk_32_classes: int
nb_sdk_32_classes: int
nb_platform_33_classes: int
nb_platform_non_sdk_33_classes: int
nb_sdk_33_classes: int
nb_platform_34_classes: int
nb_platform_non_sdk_34_classes: int
nb_sdk_34_classes: int
nb_def_platform_32_classes: int
nb_def_platform_non_sdk_32_classes: int
nb_def_sdk_32_classes: int
nb_ref_platform_non_sdk_32_classes: int
nb_def_platform_33_classes: int
nb_def_platform_non_sdk_33_classes: int
nb_def_sdk_33_classes: int
nb_ref_platform_non_sdk_33_classes: int
nb_def_platform_34_classes: int
nb_def_platform_non_sdk_34_classes: int
nb_def_sdk_34_classes: int
nb_ref_platform_non_sdk_34_classes: int
has_classes0_dex: bool
has_classes1_dex: bool
has_classes0X_dex: bool

View file

@ -9,6 +9,13 @@ SDK_33_CLASSES = set()
PLATFORM_34_CLASSES = set()
SDK_34_CLASSES = set()
# Classes added by D8 for internal stuff
D8_CLASSES = {
"Ldalvik/annotation/InnerClass;",
"Ldalvik/annotation/Signature;",
"Ldalvik/annotation/EnclosingMethod;",
}
with (local_dir / "android-32" / "classes.txt").open() as file:
for line in file:
class_name = line.strip()