dump data in json

This commit is contained in:
Jean-Marie Mineau 2024-10-30 11:50:40 +01:00
parent 0d524d6a3e
commit 3d7764d958
2 changed files with 45 additions and 69 deletions

View file

@ -1,7 +1,7 @@
import zipfile
import io
import hashlib
import pprint
import json
from argparse import ArgumentParser
from pathlib import Path
@ -53,8 +53,8 @@ def main():
type=Path,
)
parser.add_argument(
"--pprint",
help="Print the result with pprint and add more information when available",
"--json",
help="Print the results in json format with additionnal data",
action="store_true",
)
@ -82,6 +82,8 @@ def main():
else:
logfile = None
json_data = {}
if args.output_dir:
if not args.output_dir.exists():
args.output_dir.mkdir(parents=True)
@ -107,12 +109,15 @@ def main():
if args.output_dir and (args.output_dir / sha256).exists():
continue
if args.pprint:
print(f"APK: {str(apk_path)}")
with apk_path.open("rb") as file:
with zipfile.ZipFile(file) as apk:
try:
entry = analyze(apk, sha256, verbose=args.pprint)
if args.json:
json_data[sha256] = {"file": str(apk_path)}
json_out = json_data[sha256]
else:
json_out = None
entry = analyze(apk, sha256, json_out=json_out)
except Exception as e:
log = f"[{datetime.today().strftime('%Y-%m-%d %H:%M:%S')}] Failed to analyzed {sha256}: {e}, abort"
if logfile:
@ -121,8 +126,6 @@ def main():
else:
print(log)
continue
if args.pprint:
pprint.pprint(entry)
if not args.output_dir:
print(entry.to_string())
else:
@ -130,6 +133,8 @@ def main():
file.write(entry)
if apks:
if args.json:
print(json.dumps(json_data))
exit()
# Case 2: apk from SHA256
@ -173,7 +178,11 @@ def main():
continue
with zipfile.ZipFile(io.BytesIO(apk_bin)) as apk:
try:
entry = analyze(apk, sha256, verbose=args.pprint)
if args.json:
json_out = json_data[sha256]
else:
json_out = None
entry = analyze(apk, sha256, json_out=json_out)
except Exception as e:
log = f"[{datetime.today().strftime('%Y-%m-%d %H:%M:%S')}] Failed to analyzed {sha256}: {e}, abort"
if logfile:
@ -183,8 +192,6 @@ def main():
print(log)
continue
if args.pprint:
pprint.pprint(entry)
if not args.output_dir:
print(entry.to_string())
else:
@ -195,6 +202,8 @@ def main():
print(f"finished {args.sha256_list}")
else:
print(f"finished {args.sha256}")
if args.json:
print(json.dumps(json_data))
def collect_to_db():

View file

@ -78,7 +78,7 @@ class PlatformClassesData:
def scan_classes(
apk: zipfile.ZipFile, file_names: set[str], verbose: bool = False
apk: zipfile.ZipFile, file_names: set[str], json_out: dict | None = None
) -> PlatformClassesData:
all_classes = set()
duplicated_classes = set()
@ -144,61 +144,8 @@ def scan_classes(
and ty_name not in D8_CLASSES
):
ref_platform_non_sdk_34_classes.add(ty_name)
if verbose:
if duplicated_classes:
print("Duplicated classes:")
for cl in duplicated_classes:
print(f" {cl}")
if platform_32_classes:
print("Redefined Platform Classes (v32):")
for cl in platform_32_classes:
print(f" {cl}")
if sdk_32_classes:
print("Redefined SDK Classes (v32):")
for cl in sdk_32_classes:
print(f" {cl}")
if platform_non_sdk_32_classes:
print("Redefined non-SDK Platform Classes (v32):")
for cl in platform_non_sdk_32_classes:
print(f" {cl}")
if ref_platform_non_sdk_32_classes:
print("Reference to non-SDK Platform Classes (v32):")
for cl in ref_platform_non_sdk_32_classes:
print(f" {cl}")
if platform_33_classes:
print("Redefined Platform Classes (v33):")
for cl in platform_33_classes:
print(f" {cl}")
if sdk_33_classes:
print("Redefined SDK Classes (v33):")
for cl in sdk_33_classes:
print(f" {cl}")
if platform_non_sdk_33_classes:
print("Redefined non-SDK Platform Classes (v33):")
for cl in platform_non_sdk_33_classes:
print(f" {cl}")
if ref_platform_non_sdk_33_classes:
print("Reference to non-SDK Platform Classes (v33):")
for cl in ref_platform_non_sdk_33_classes:
print(f" {cl}")
if platform_34_classes:
print("Redefined Platform Classes (v34):")
for cl in platform_34_classes:
print(f" {cl}")
if sdk_34_classes:
print("Redefined SDK Classes (v34):")
for cl in sdk_34_classes:
print(f" {cl}")
if platform_non_sdk_34_classes:
print("Redefined non-SDK Platform Classes (v34):")
for cl in platform_non_sdk_34_classes:
print(f" {cl}")
if ref_platform_non_sdk_34_classes:
print("Reference to non-SDK Platform Classes (v34):")
for cl in ref_platform_non_sdk_34_classes:
print(f" {cl}")
return PlatformClassesData(
entry = PlatformClassesData(
nb_duplicate_classes=len(duplicated_classes),
nb_def_platform_32_classes=len(platform_32_classes),
nb_def_platform_non_sdk_32_classes=len(platform_non_sdk_32_classes),
@ -213,9 +160,25 @@ def scan_classes(
nb_def_sdk_34_classes=len(sdk_34_classes),
nb_ref_platform_non_sdk_34_classes=len(ref_platform_non_sdk_34_classes),
)
if json_out is not None:
data = json_out
data["duplicated_classes"] = list(duplicated_classes)
data["platform_32_classes"] = list(platform_32_classes)
data["sdk_32_classes"] = list(sdk_32_classes)
data["platform_non_sdk_32_classes"] = list(platform_non_sdk_32_classes)
data["ref_platform_non_sdk_32_classes"] = list(ref_platform_non_sdk_32_classes)
data["platform_33_classes"] = list(platform_33_classes)
data["sdk_33_classes"] = list(sdk_33_classes)
data["platform_non_sdk_33_classes"] = list(platform_non_sdk_33_classes)
data["ref_platform_non_sdk_33_classes"] = list(ref_platform_non_sdk_33_classes)
data["platform_34_classes"] = list(platform_34_classes)
data["sdk_34_classes"] = list(sdk_34_classes)
data["platform_non_sdk_34_classes"] = list(platform_non_sdk_34_classes)
data["ref_platform_non_sdk_34_classes"] = list(ref_platform_non_sdk_34_classes)
return entry
def analyze(apk: zipfile.ZipFile, sha256: str, verbose: bool = False) -> ApkData:
def analyze(apk: zipfile.ZipFile, sha256: str, json_out: dict | None = None) -> ApkData:
classes_dex = set(
filter(
lambda name: name.startswith("classes") and name.endswith(".dex"),
@ -263,9 +226,9 @@ def analyze(apk: zipfile.ZipFile, sha256: str, verbose: bool = False) -> ApkData
has_non_consecutive_classes_dex = True
break
platform_classes_data = scan_classes(apk, classes_dex, verbose=verbose)
platform_classes_data = scan_classes(apk, classes_dex, json_out=json_out)
return ApkData(
entry = ApkData(
sha256=sha256,
**asdict(platform_classes_data),
has_classes0_dex="classes0.dex" in classes_dex,
@ -277,3 +240,7 @@ def analyze(apk: zipfile.ZipFile, sha256: str, verbose: bool = False) -> ApkData
has_non_numeric_classes_dex=has_non_numeric_classes_dex,
has_non_consecutive_classes_dex=has_non_consecutive_classes_dex,
)
if json_out is not None:
json_out["entry"] = asdict(entry)
json_out["class_dex"] = list(classes_dex)
return entry