analyse result
This commit is contained in:
parent
5a6a4fdca6
commit
cbac24b0f7
8 changed files with 115 additions and 25 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -1,3 +1,4 @@
|
|||
__pycache__
|
||||
test_apks.txt
|
||||
dist
|
||||
data
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ from datetime import datetime
|
|||
from .androzoo import download_apk
|
||||
from .data import ApkData, load_from_directory
|
||||
from .analysis import analyze
|
||||
from .data_mining import analyse_sdk_redef
|
||||
|
||||
|
||||
def main():
|
||||
|
|
@ -57,12 +58,17 @@ def main():
|
|||
type=Path,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-dir-sdk34-classes",
|
||||
"--output-dir-def-sdk34-classes",
|
||||
help="SDK 34 redefinition",
|
||||
type=Path,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-dir-hidden-api",
|
||||
"--output-dir-ref-hidden-api-34",
|
||||
help="Reference to hidden api",
|
||||
type=Path,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-dir-def-hidden-api-34",
|
||||
help="Reference to hidden api",
|
||||
type=Path,
|
||||
)
|
||||
|
|
@ -105,17 +111,22 @@ def main():
|
|||
raise RuntimeError("--output-dir must be a directory")
|
||||
args.output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if args.output_dir_sdk34_classes:
|
||||
if not args.output_dir_sdk34_classes.exists():
|
||||
args.output_dir_sdk34_classes.mkdir(parents=True)
|
||||
if not args.output_dir_sdk34_classes.is_dir():
|
||||
raise RuntimeError("--output-dir-sdk34-classes must be a directory")
|
||||
if args.output_dir_def_sdk34_classes:
|
||||
if not args.output_dir_def_sdk34_classes.exists():
|
||||
args.output_dir_def_sdk34_classes.mkdir(parents=True, exist_ok=True)
|
||||
if not args.output_dir_def_sdk34_classes.is_dir():
|
||||
raise RuntimeError("--output-dir-def-sdk34-classes must be a directory")
|
||||
|
||||
if args.output_dir_hidden_api:
|
||||
if not args.output_dir_hidden_api.exists():
|
||||
args.output_dir_hidden_api.mkdir(parents=True)
|
||||
if not args.output_dir_hidden_api.is_dir():
|
||||
raise RuntimeError("--output-dir-hidden-api must be a directory")
|
||||
if args.output_dir_ref_hidden_api_34:
|
||||
if not args.output_dir_ref_hidden_api_34.exists():
|
||||
args.output_dir_ref_hidden_api_34.mkdir(parents=True, exist_ok=True)
|
||||
if not args.output_dir_ref_hidden_api_34.is_dir():
|
||||
raise RuntimeError("--output-dir-ref-hidden-api-34 must be a directory")
|
||||
if args.output_dir_def_hidden_api_34:
|
||||
if not args.output_dir_def_hidden_api_34.exists():
|
||||
args.output_dir_def_hidden_api_34.mkdir(parents=True, exist_ok=True)
|
||||
if not args.output_dir_def_hidden_api_34.is_dir():
|
||||
raise RuntimeError("--output-dir-def-hidden-api-34 must be a directory")
|
||||
|
||||
# Case 1: apk from file
|
||||
apks = []
|
||||
|
|
@ -147,8 +158,9 @@ def main():
|
|||
apk,
|
||||
sha256,
|
||||
json_out=json_out,
|
||||
sdk34_dir=args.output_dir_sdk34_classes,
|
||||
hidden_dir=args.output_dir_hidden_api,
|
||||
sdk34_dir=args.output_dir_def_sdk34_classes,
|
||||
ref_hidden_dir=args.output_dir_ref_hidden_api_34,
|
||||
def_hidden_dir=args.output_dir_def_hidden_api_34,
|
||||
)
|
||||
except Exception as e:
|
||||
log = f"[{datetime.today().strftime('%Y-%m-%d %H:%M:%S')}] Failed to analyzed {sha256}: {e}, abort"
|
||||
|
|
@ -218,8 +230,9 @@ def main():
|
|||
apk,
|
||||
sha256,
|
||||
json_out=json_out,
|
||||
sdk34_dir=args.output_dir_sdk34_classes,
|
||||
hidden_dir=args.output_dir_hidden_api,
|
||||
sdk34_dir=args.output_dir_def_sdk34_classes,
|
||||
ref_hidden_dir=args.output_dir_ref_hidden_api_34,
|
||||
def_hidden_dir=args.output_dir_def_hidden_api_34,
|
||||
)
|
||||
except Exception as e:
|
||||
log = f"[{datetime.today().strftime('%Y-%m-%d %H:%M:%S')}] Failed to analyzed {sha256}: {e}, abort"
|
||||
|
|
@ -408,3 +421,26 @@ def check_smali():
|
|||
|
||||
with args.out.open("w") as f:
|
||||
json.dump(data, f)
|
||||
|
||||
|
||||
def data_mining():
|
||||
parser = ArgumentParser(
|
||||
prog="Data Mining",
|
||||
description="Analyze result collected from the scan",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--db",
|
||||
help="Path to the database storing the results",
|
||||
type=Path,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-dir-def-sdk34-classes",
|
||||
help="The directory storing the classes already in SDK 34 redefined by apks",
|
||||
type=Path,
|
||||
)
|
||||
args = parser.parse_args()
|
||||
if args.db is not None:
|
||||
pass
|
||||
|
||||
if args.output_dir_def_sdk34_classes is not None:
|
||||
analyse_sdk_redef(args.output_dir_def_sdk34_classes)
|
||||
|
|
|
|||
|
|
@ -83,7 +83,8 @@ def scan_classes(
|
|||
file_names: set[str],
|
||||
json_out: dict | None = None,
|
||||
sdk34_classes_file: Path | None = None,
|
||||
hidden_file: Path | None = None,
|
||||
ref_hidden_file: Path | None = None,
|
||||
def_hidden_file: Path | None = None,
|
||||
) -> PlatformClassesData:
|
||||
all_classes = set()
|
||||
duplicated_classes = set()
|
||||
|
|
@ -184,10 +185,14 @@ def scan_classes(
|
|||
with sdk34_classes_file.open("w") as file:
|
||||
for l in sorted(sdk_34_classes):
|
||||
file.write(f"{l}\n")
|
||||
if hidden_file is not None:
|
||||
with hidden_file.open("w") as file:
|
||||
if ref_hidden_file is not None:
|
||||
with ref_hidden_file.open("w") as file:
|
||||
for l in sorted(ref_platform_non_sdk_34_classes):
|
||||
file.write(f"{l}\n")
|
||||
if def_hidden_file is not None:
|
||||
with def_hidden_file.open("w") as file:
|
||||
for l in sorted(platform_non_sdk_34_classes):
|
||||
file.write(f"{l}\n")
|
||||
return entry
|
||||
|
||||
|
||||
|
|
@ -196,7 +201,8 @@ def analyze(
|
|||
sha256: str,
|
||||
json_out: dict | None = None,
|
||||
sdk34_dir: Path | None = None,
|
||||
hidden_dir: Path | None = None,
|
||||
ref_hidden_dir: Path | None = None,
|
||||
def_hidden_dir: Path | None = None,
|
||||
) -> ApkData:
|
||||
classes_dex = set(
|
||||
filter(
|
||||
|
|
@ -249,16 +255,21 @@ def analyze(
|
|||
sdk34_classes_file = sdk34_dir / sha256
|
||||
else:
|
||||
sdk34_classes_file = None
|
||||
if hidden_dir:
|
||||
hidden_file = hidden_dir / sha256
|
||||
if ref_hidden_dir:
|
||||
ref_hidden_file = ref_hidden_dir / sha256
|
||||
else:
|
||||
hidden_file = None
|
||||
ref_hidden_file = None
|
||||
if def_hidden_dir:
|
||||
def_hidden_file = def_hidden_dir / sha256
|
||||
else:
|
||||
def_hidden_file = None
|
||||
platform_classes_data = scan_classes(
|
||||
apk,
|
||||
classes_dex,
|
||||
json_out=json_out,
|
||||
sdk34_classes_file=sdk34_classes_file,
|
||||
hidden_file=hidden_file,
|
||||
ref_hidden_file=ref_hidden_file,
|
||||
def_hidden_file=def_hidden_file,
|
||||
)
|
||||
|
||||
entry = ApkData(
|
||||
|
|
|
|||
1
android_class_shadowing_scanner/classes_min_max_sdk.json
Normal file
1
android_class_shadowing_scanner/classes_min_max_sdk.json
Normal file
File diff suppressed because one or more lines are too long
35
android_class_shadowing_scanner/data_mining.py
Normal file
35
android_class_shadowing_scanner/data_mining.py
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
from pathlib import Path
|
||||
from .platform_classes import MIN_MAX_SDK
|
||||
|
||||
from matplotlib import pyplot as plt
|
||||
|
||||
|
||||
def analyse_sdk_redef(folder: Path):
|
||||
classes_by_app = {}
|
||||
for file in folder.iterdir():
|
||||
cls = set()
|
||||
with file.open("r") as fp:
|
||||
for cl in fp:
|
||||
if cl.strip():
|
||||
cls.add(cl.strip())
|
||||
classes_by_app[file.name] = cls
|
||||
|
||||
classes_occ = {}
|
||||
for cls in classes_by_app.values():
|
||||
for cl in cls:
|
||||
if cl not in classes_occ:
|
||||
classes_occ[cl] = 0
|
||||
classes_occ[cl] += 1
|
||||
print()
|
||||
print(f"redefined class occurences | min sdk")
|
||||
print()
|
||||
for cl in sorted(classes_occ.keys(), key=lambda x: classes_occ[x], reverse=True)[
|
||||
:10
|
||||
]:
|
||||
print(f"{cl:<50} {classes_occ[cl]: >5} | {MIN_MAX_SDK[cl][0]: >2}")
|
||||
|
||||
cls_by_sdk = [0 for _ in range(35)]
|
||||
for cl, n in classes_occ.items():
|
||||
cls_by_sdk[MIN_MAX_SDK[cl][0]] += n
|
||||
plt.bar([i for i in range(7, 35)], cls_by_sdk[7:], bottom=0)
|
||||
plt.show()
|
||||
|
|
@ -1,4 +1,5 @@
|
|||
from pathlib import Path
|
||||
import json
|
||||
|
||||
local_dir = Path(__file__).parent
|
||||
|
||||
|
|
@ -51,3 +52,6 @@ with (local_dir / "android-34" / "sdk_classes.txt").open() as file:
|
|||
class_name = line.strip()
|
||||
if class_name:
|
||||
SDK_34_CLASSES.add(class_name)
|
||||
|
||||
with (local_dir / "classes_min_max_sdk.json").open() as file:
|
||||
MIN_MAX_SDK = json.load(file)
|
||||
|
|
|
|||
2
poetry.lock
generated
2
poetry.lock
generated
|
|
@ -1712,4 +1712,4 @@ secretstorage = ["SecretStorage"]
|
|||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.12"
|
||||
content-hash = "17b8e803d4fecdffce0e19b557cedc226cc67d501c8f86b8228af513d9dfc3e3"
|
||||
content-hash = "e1a0492abb5c7c774aa2cb67019441477990fd914cfb49c4c7530f8dfb8fb8ed"
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ androguard = "^4.1.2"
|
|||
|
||||
SecretStorage = { version = "^3.3.3", optional = true }
|
||||
|
||||
matplotlib = "^3.9.2"
|
||||
[tool.poetry.extras]
|
||||
secretstorage = ["SecretStorage"]
|
||||
|
||||
|
|
@ -22,3 +23,4 @@ build-backend = "poetry.core.masonry.api"
|
|||
scan = 'android_class_shadowing_scanner.__init__:main'
|
||||
collect-scan = 'android_class_shadowing_scanner.__init__:collect_to_db'
|
||||
check-class-redef = 'android_class_shadowing_scanner.__init__:check_smali'
|
||||
data-mining = 'android_class_shadowing_scanner.__init__:data_mining'
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue