analyse result

This commit is contained in:
Jean-Marie Mineau 2024-11-13 18:10:53 +01:00
parent 5a6a4fdca6
commit cbac24b0f7
8 changed files with 115 additions and 25 deletions

1
.gitignore vendored
View file

@ -1,3 +1,4 @@
__pycache__
test_apks.txt
dist
data

View file

@ -15,6 +15,7 @@ from datetime import datetime
from .androzoo import download_apk
from .data import ApkData, load_from_directory
from .analysis import analyze
from .data_mining import analyse_sdk_redef
def main():
@ -57,12 +58,17 @@ def main():
type=Path,
)
parser.add_argument(
"--output-dir-sdk34-classes",
"--output-dir-def-sdk34-classes",
help="SDK 34 redefinition",
type=Path,
)
parser.add_argument(
"--output-dir-hidden-api",
"--output-dir-ref-hidden-api-34",
help="Reference to hidden api",
type=Path,
)
parser.add_argument(
"--output-dir-def-hidden-api-34",
help="Reference to hidden api",
type=Path,
)
@ -105,17 +111,22 @@ def main():
raise RuntimeError("--output-dir must be a directory")
args.output_dir.mkdir(parents=True, exist_ok=True)
if args.output_dir_sdk34_classes:
if not args.output_dir_sdk34_classes.exists():
args.output_dir_sdk34_classes.mkdir(parents=True)
if not args.output_dir_sdk34_classes.is_dir():
raise RuntimeError("--output-dir-sdk34-classes must be a directory")
if args.output_dir_def_sdk34_classes:
if not args.output_dir_def_sdk34_classes.exists():
args.output_dir_def_sdk34_classes.mkdir(parents=True, exist_ok=True)
if not args.output_dir_def_sdk34_classes.is_dir():
raise RuntimeError("--output-dir-def-sdk34-classes must be a directory")
if args.output_dir_hidden_api:
if not args.output_dir_hidden_api.exists():
args.output_dir_hidden_api.mkdir(parents=True)
if not args.output_dir_hidden_api.is_dir():
raise RuntimeError("--output-dir-hidden-api must be a directory")
if args.output_dir_ref_hidden_api_34:
if not args.output_dir_ref_hidden_api_34.exists():
args.output_dir_ref_hidden_api_34.mkdir(parents=True, exist_ok=True)
if not args.output_dir_ref_hidden_api_34.is_dir():
raise RuntimeError("--output-dir-ref-hidden-api-34 must be a directory")
if args.output_dir_def_hidden_api_34:
if not args.output_dir_def_hidden_api_34.exists():
args.output_dir_def_hidden_api_34.mkdir(parents=True, exist_ok=True)
if not args.output_dir_def_hidden_api_34.is_dir():
raise RuntimeError("--output-dir-def-hidden-api-34 must be a directory")
# Case 1: apk from file
apks = []
@ -147,8 +158,9 @@ def main():
apk,
sha256,
json_out=json_out,
sdk34_dir=args.output_dir_sdk34_classes,
hidden_dir=args.output_dir_hidden_api,
sdk34_dir=args.output_dir_def_sdk34_classes,
ref_hidden_dir=args.output_dir_ref_hidden_api_34,
def_hidden_dir=args.output_dir_def_hidden_api_34,
)
except Exception as e:
log = f"[{datetime.today().strftime('%Y-%m-%d %H:%M:%S')}] Failed to analyzed {sha256}: {e}, abort"
@ -218,8 +230,9 @@ def main():
apk,
sha256,
json_out=json_out,
sdk34_dir=args.output_dir_sdk34_classes,
hidden_dir=args.output_dir_hidden_api,
sdk34_dir=args.output_dir_def_sdk34_classes,
ref_hidden_dir=args.output_dir_ref_hidden_api_34,
def_hidden_dir=args.output_dir_def_hidden_api_34,
)
except Exception as e:
log = f"[{datetime.today().strftime('%Y-%m-%d %H:%M:%S')}] Failed to analyzed {sha256}: {e}, abort"
@ -408,3 +421,26 @@ def check_smali():
with args.out.open("w") as f:
json.dump(data, f)
def data_mining():
parser = ArgumentParser(
prog="Data Mining",
description="Analyze result collected from the scan",
)
parser.add_argument(
"--db",
help="Path to the database storing the results",
type=Path,
)
parser.add_argument(
"--output-dir-def-sdk34-classes",
help="The directory storing the classes already in SDK 34 redefined by apks",
type=Path,
)
args = parser.parse_args()
if args.db is not None:
pass
if args.output_dir_def_sdk34_classes is not None:
analyse_sdk_redef(args.output_dir_def_sdk34_classes)

View file

@ -83,7 +83,8 @@ def scan_classes(
file_names: set[str],
json_out: dict | None = None,
sdk34_classes_file: Path | None = None,
hidden_file: Path | None = None,
ref_hidden_file: Path | None = None,
def_hidden_file: Path | None = None,
) -> PlatformClassesData:
all_classes = set()
duplicated_classes = set()
@ -184,10 +185,14 @@ def scan_classes(
with sdk34_classes_file.open("w") as file:
for l in sorted(sdk_34_classes):
file.write(f"{l}\n")
if hidden_file is not None:
with hidden_file.open("w") as file:
if ref_hidden_file is not None:
with ref_hidden_file.open("w") as file:
for l in sorted(ref_platform_non_sdk_34_classes):
file.write(f"{l}\n")
if def_hidden_file is not None:
with def_hidden_file.open("w") as file:
for l in sorted(platform_non_sdk_34_classes):
file.write(f"{l}\n")
return entry
@ -196,7 +201,8 @@ def analyze(
sha256: str,
json_out: dict | None = None,
sdk34_dir: Path | None = None,
hidden_dir: Path | None = None,
ref_hidden_dir: Path | None = None,
def_hidden_dir: Path | None = None,
) -> ApkData:
classes_dex = set(
filter(
@ -249,16 +255,21 @@ def analyze(
sdk34_classes_file = sdk34_dir / sha256
else:
sdk34_classes_file = None
if hidden_dir:
hidden_file = hidden_dir / sha256
if ref_hidden_dir:
ref_hidden_file = ref_hidden_dir / sha256
else:
hidden_file = None
ref_hidden_file = None
if def_hidden_dir:
def_hidden_file = def_hidden_dir / sha256
else:
def_hidden_file = None
platform_classes_data = scan_classes(
apk,
classes_dex,
json_out=json_out,
sdk34_classes_file=sdk34_classes_file,
hidden_file=hidden_file,
ref_hidden_file=ref_hidden_file,
def_hidden_file=def_hidden_file,
)
entry = ApkData(

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,35 @@
from pathlib import Path
from .platform_classes import MIN_MAX_SDK
from matplotlib import pyplot as plt
def analyse_sdk_redef(folder: Path):
classes_by_app = {}
for file in folder.iterdir():
cls = set()
with file.open("r") as fp:
for cl in fp:
if cl.strip():
cls.add(cl.strip())
classes_by_app[file.name] = cls
classes_occ = {}
for cls in classes_by_app.values():
for cl in cls:
if cl not in classes_occ:
classes_occ[cl] = 0
classes_occ[cl] += 1
print()
print(f"redefined class occurences | min sdk")
print()
for cl in sorted(classes_occ.keys(), key=lambda x: classes_occ[x], reverse=True)[
:10
]:
print(f"{cl:<50} {classes_occ[cl]: >5} | {MIN_MAX_SDK[cl][0]: >2}")
cls_by_sdk = [0 for _ in range(35)]
for cl, n in classes_occ.items():
cls_by_sdk[MIN_MAX_SDK[cl][0]] += n
plt.bar([i for i in range(7, 35)], cls_by_sdk[7:], bottom=0)
plt.show()

View file

@ -1,4 +1,5 @@
from pathlib import Path
import json
local_dir = Path(__file__).parent
@ -51,3 +52,6 @@ with (local_dir / "android-34" / "sdk_classes.txt").open() as file:
class_name = line.strip()
if class_name:
SDK_34_CLASSES.add(class_name)
with (local_dir / "classes_min_max_sdk.json").open() as file:
MIN_MAX_SDK = json.load(file)

2
poetry.lock generated
View file

@ -1712,4 +1712,4 @@ secretstorage = ["SecretStorage"]
[metadata]
lock-version = "2.0"
python-versions = "^3.12"
content-hash = "17b8e803d4fecdffce0e19b557cedc226cc67d501c8f86b8228af513d9dfc3e3"
content-hash = "e1a0492abb5c7c774aa2cb67019441477990fd914cfb49c4c7530f8dfb8fb8ed"

View file

@ -11,6 +11,7 @@ androguard = "^4.1.2"
SecretStorage = { version = "^3.3.3", optional = true }
matplotlib = "^3.9.2"
[tool.poetry.extras]
secretstorage = ["SecretStorage"]
@ -22,3 +23,4 @@ build-backend = "poetry.core.masonry.api"
scan = 'android_class_shadowing_scanner.__init__:main'
collect-scan = 'android_class_shadowing_scanner.__init__:collect_to_db'
check-class-redef = 'android_class_shadowing_scanner.__init__:check_smali'
data-mining = 'android_class_shadowing_scanner.__init__:data_mining'