android_class_shadowing_sca.../android_class_shadowing_scanner/analysis.py
2024-11-14 11:43:42 +01:00

295 lines
10 KiB
Python

import zipfile
import io
from dataclasses import dataclass, asdict
from enum import IntEnum
from pathlib import Path
import androguard.core.dex # type: ignore
from androguard.core.dex import DEX # type: ignore
from androguard.core.apk import APK # type: ignore
# loguru -> logging framework used by androgard
from loguru import logger # type: ignore
from .platform_classes import (
PLATFORM_32_CLASSES,
SDK_32_CLASSES,
PLATFORM_33_CLASSES,
SDK_33_CLASSES,
PLATFORM_34_CLASSES,
SDK_34_CLASSES,
D8_CLASSES,
)
from .data import ApkData
# Remove Androguard logs
logger.remove()
# Patch Androguard
class PatchedDomapiApiFlag(IntEnum):
NONE = 0
CORE_PLATFORM_API = 1
TEST_API = 2
UNKN_3 = 3
UNKN_4 = 4
UNKN_5 = 5
UNKN_6 = 6
UNKN_7 = 7
UNKN_8 = 8
UNKN_9 = 9
INKN_10 = 10
class PatchedRestrictionApiFlag(IntEnum):
WHITELIST = 0
GREYLIST = 1
BLACKLIST = 2
GREYLIST_MAX_O = 3
GREYLIST_MAX_P = 4
GREYLIST_MAX_Q = 5
GREYLIST_MAX_R = 6
GREYLIST_MAX_7 = 7
GREYLIST_MAX_8 = 8
GREYLIST_MAX_9 = 9
GREYLIST_MAX_10 = 10
androguard.core.dex.HiddenApiClassDataItem.DomapiApiFlag = PatchedDomapiApiFlag
androguard.core.dex.HiddenApiClassDataItem.RestrictionApiFlag = (
PatchedRestrictionApiFlag
)
@dataclass
class PlatformClassesData:
nb_duplicate_classes: int
nb_def_platform_32_classes: int
nb_def_platform_non_sdk_32_classes: int
nb_def_sdk_32_classes: int
nb_ref_platform_non_sdk_32_classes: int
nb_def_platform_33_classes: int
nb_def_platform_non_sdk_33_classes: int
nb_def_sdk_33_classes: int
nb_ref_platform_non_sdk_33_classes: int
nb_def_platform_34_classes: int
nb_def_platform_non_sdk_34_classes: int
nb_def_sdk_34_classes: int
nb_ref_platform_non_sdk_34_classes: int
def scan_classes(
apk: zipfile.ZipFile,
file_names: set[str],
json_out: dict | None = None,
sdk34_classes_file: Path | None = None,
ref_hidden_file: Path | None = None,
def_hidden_file: Path | None = None,
) -> PlatformClassesData:
all_classes = set()
duplicated_classes = set()
platform_32_classes = set()
sdk_32_classes = set()
platform_non_sdk_32_classes = set()
ref_platform_non_sdk_32_classes = set()
platform_33_classes = set()
sdk_33_classes = set()
platform_non_sdk_33_classes = set()
ref_platform_non_sdk_33_classes = set()
platform_34_classes = set()
sdk_34_classes = set()
platform_non_sdk_34_classes = set()
ref_platform_non_sdk_34_classes = set()
for name in file_names:
with apk.open(name) as dex_f:
dex = DEX(dex_f.read())
for clazz in map(lambda c: c.name, dex.get_classes()):
if clazz in all_classes:
duplicated_classes.add(clazz)
if clazz in PLATFORM_32_CLASSES:
platform_32_classes.add(clazz)
if clazz in SDK_32_CLASSES:
sdk_32_classes.add(clazz)
if clazz in PLATFORM_32_CLASSES and clazz not in SDK_32_CLASSES:
platform_non_sdk_32_classes.add(clazz)
if clazz in PLATFORM_33_CLASSES:
platform_33_classes.add(clazz)
if clazz in SDK_33_CLASSES:
sdk_33_classes.add(clazz)
if clazz in PLATFORM_33_CLASSES and clazz not in SDK_33_CLASSES:
platform_non_sdk_33_classes.add(clazz)
if clazz in PLATFORM_34_CLASSES:
platform_34_classes.add(clazz)
if clazz in SDK_34_CLASSES:
sdk_34_classes.add(clazz)
if clazz in PLATFORM_34_CLASSES and clazz not in SDK_34_CLASSES:
platform_non_sdk_34_classes.add(clazz)
all_classes.add(clazz)
types = dex.map_list.get_item_type(
androguard.core.dex.TypeMapItem.TYPE_ID_ITEM
).type
for ty in types:
ty_name = ty.descriptor_idx_value
if len(ty_name) < 2:
continue
if (
ty_name in PLATFORM_32_CLASSES
and not ty_name in SDK_32_CLASSES
and ty_name not in D8_CLASSES
):
ref_platform_non_sdk_32_classes.add(ty_name)
if (
ty_name in PLATFORM_33_CLASSES
and not ty_name in SDK_33_CLASSES
and ty_name not in D8_CLASSES
):
ref_platform_non_sdk_33_classes.add(ty_name)
if (
ty_name in PLATFORM_34_CLASSES
and not ty_name in SDK_34_CLASSES
and ty_name not in D8_CLASSES
):
ref_platform_non_sdk_34_classes.add(ty_name)
entry = PlatformClassesData(
nb_duplicate_classes=len(duplicated_classes),
nb_def_platform_32_classes=len(platform_32_classes),
nb_def_platform_non_sdk_32_classes=len(platform_non_sdk_32_classes),
nb_def_sdk_32_classes=len(sdk_32_classes),
nb_ref_platform_non_sdk_32_classes=len(ref_platform_non_sdk_32_classes),
nb_def_platform_33_classes=len(platform_33_classes),
nb_def_platform_non_sdk_33_classes=len(platform_non_sdk_33_classes),
nb_def_sdk_33_classes=len(sdk_33_classes),
nb_ref_platform_non_sdk_33_classes=len(ref_platform_non_sdk_33_classes),
nb_def_platform_34_classes=len(platform_34_classes),
nb_def_platform_non_sdk_34_classes=len(platform_non_sdk_34_classes),
nb_def_sdk_34_classes=len(sdk_34_classes),
nb_ref_platform_non_sdk_34_classes=len(ref_platform_non_sdk_34_classes),
)
if json_out is not None:
data = json_out
data["duplicated_classes"] = list(duplicated_classes)
data["platform_32_classes"] = list(platform_32_classes)
data["sdk_32_classes"] = list(sdk_32_classes)
data["platform_non_sdk_32_classes"] = list(platform_non_sdk_32_classes)
data["ref_platform_non_sdk_32_classes"] = list(ref_platform_non_sdk_32_classes)
data["platform_33_classes"] = list(platform_33_classes)
data["sdk_33_classes"] = list(sdk_33_classes)
data["platform_non_sdk_33_classes"] = list(platform_non_sdk_33_classes)
data["ref_platform_non_sdk_33_classes"] = list(ref_platform_non_sdk_33_classes)
data["platform_34_classes"] = list(platform_34_classes)
data["sdk_34_classes"] = list(sdk_34_classes)
data["platform_non_sdk_34_classes"] = list(platform_non_sdk_34_classes)
data["ref_platform_non_sdk_34_classes"] = list(ref_platform_non_sdk_34_classes)
if sdk34_classes_file is not None:
with sdk34_classes_file.open("w") as file:
for l in sorted(sdk_34_classes):
file.write(f"{l}\n")
if ref_hidden_file is not None:
with ref_hidden_file.open("w") as file:
for l in sorted(ref_platform_non_sdk_34_classes):
file.write(f"{l}\n")
if def_hidden_file is not None:
with def_hidden_file.open("w") as file:
for l in sorted(platform_non_sdk_34_classes):
file.write(f"{l}\n")
return entry
def analyze(
apk: zipfile.ZipFile,
androguard_apk: APK,
sha256: str,
json_out: dict | None = None,
sdk34_dir: Path | None = None,
ref_hidden_dir: Path | None = None,
def_hidden_dir: Path | None = None,
) -> ApkData:
classes_dex = set(
filter(
lambda name: name.startswith("classes") and name.endswith(".dex"),
apk.namelist(),
)
)
dex_numbers = set(
map(
int,
filter(
lambda string: string not in ("", "1")
and string.isnumeric()
and string[0] != "0",
map(
lambda name: name.removeprefix("classes").removesuffix(".dex"),
classes_dex,
),
),
)
)
has_non_numeric_classes_dex = False
for name in classes_dex:
if name == "classes.dex":
continue
if not name.removeprefix("classes").removesuffix(".dex").isnumeric():
has_non_numeric_classes_dex = True
has_non_consecutive_classes_dex = False
dex_numbers.discard(1)
dex_numbers.discard(0)
if dex_numbers:
max_dex_num = max(dex_numbers)
else:
if "classes.dex" in classes_dex:
max_dex_num = 1
else:
max_dex_num = 0
for i in range(max_dex_num):
if i == 0:
name = "classes.dex"
else:
name = f"classes{i+1}.dex"
if name not in classes_dex:
has_non_consecutive_classes_dex = True
break
if sdk34_dir:
sdk34_classes_file = sdk34_dir / sha256
else:
sdk34_classes_file = None
if ref_hidden_dir:
ref_hidden_file = ref_hidden_dir / sha256
else:
ref_hidden_file = None
if def_hidden_dir:
def_hidden_file = def_hidden_dir / sha256
else:
def_hidden_file = None
platform_classes_data = scan_classes(
apk,
classes_dex,
json_out=json_out,
sdk34_classes_file=sdk34_classes_file,
ref_hidden_file=ref_hidden_file,
def_hidden_file=def_hidden_file,
)
entry = ApkData(
sha256=sha256,
**asdict(platform_classes_data),
has_classes0_dex="classes0.dex" in classes_dex,
has_classes1_dex="classes1.dex" in classes_dex,
has_classes0X_dex=any(
map(lambda name: name.startswith("classes0"), classes_dex)
),
has_classes_dex_over_10=any(map(lambda x: x >= 10, dex_numbers)),
has_non_numeric_classes_dex=has_non_numeric_classes_dex,
has_non_consecutive_classes_dex=has_non_consecutive_classes_dex,
min_sdk_version=int(androguard_apk.get_min_sdk_version() or "-1"),
max_sdk_version=int(androguard_apk.get_max_sdk_version() or "-1"),
target_sdk_version=int(androguard_apk.get_target_sdk_version() or "-1"),
)
if json_out is not None:
json_out["entry"] = asdict(entry)
json_out["class_dex"] = list(classes_dex)
return entry