update
This commit is contained in:
parent
e51f8e57a9
commit
7a83e118a3
5 changed files with 76 additions and 227 deletions
|
|
@ -292,145 +292,6 @@ def collect_to_db():
|
|||
|
||||
|
||||
def check_smali():
|
||||
parser = ArgumentParser(
|
||||
prog="Smalli Check",
|
||||
description="Check if duplicated classes have distinct smali",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--db",
|
||||
help="Path to the database storing the results",
|
||||
type=Path,
|
||||
required=True,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--out",
|
||||
help="Path to the file where to store the results",
|
||||
type=Path,
|
||||
required=True,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--apktool-jar",
|
||||
help="Path to the apktool jar file",
|
||||
type=Path,
|
||||
required=True,
|
||||
)
|
||||
key_parser = parser.add_mutually_exclusive_group(required=False)
|
||||
key_parser.add_argument(
|
||||
"--api-key-file",
|
||||
help="The path to a file containing the Androzoo API key",
|
||||
type=Path,
|
||||
)
|
||||
key_parser.add_argument(
|
||||
"--api-key", help="The Androzoo API key (Usage NOT recommanded)", type=str
|
||||
)
|
||||
SECRET_STORAGE_IMPORTED = False
|
||||
try:
|
||||
import secretstorage
|
||||
|
||||
SECRET_STORAGE_IMPORTED = True
|
||||
|
||||
key_parser.add_argument(
|
||||
"--api-key-keyring-id",
|
||||
help="The ID of the Androzoo API key in the secret service storage",
|
||||
type=str,
|
||||
)
|
||||
except ModuleNotFoundError:
|
||||
pass
|
||||
args = parser.parse_args()
|
||||
|
||||
apktool = args.apktool_jar.resolve()
|
||||
api_key = ""
|
||||
if args.api_key:
|
||||
api_key = args.api_key
|
||||
if args.api_key_file:
|
||||
with args.api_key_file.open("r") as file:
|
||||
api_key = file.read().strip()
|
||||
if SECRET_STORAGE_IMPORTED and not api_key:
|
||||
if args.api_key_keyring_id:
|
||||
key_id = args.api_key_keyring_id
|
||||
else:
|
||||
key_id = "androzoo"
|
||||
try:
|
||||
with secretstorage.dbus_init() as connection:
|
||||
collection = secretstorage.get_default_collection(connection)
|
||||
item = next(collection.search_items({"Title": key_id}))
|
||||
item.unlock()
|
||||
api_key = item.get_secret().decode("utf-8").strip()
|
||||
except:
|
||||
pass
|
||||
if not api_key:
|
||||
api_key = getpass(prompt="Androzoo API key: ").strip()
|
||||
|
||||
with sqlite3.connect(args.db) as conn:
|
||||
apks = list(
|
||||
map(
|
||||
lambda t: t[0],
|
||||
conn.execute("SELECT sha256 FROM data WHERE nb_duplicate_classes >= 1"),
|
||||
)
|
||||
)
|
||||
data = {}
|
||||
for sha256 in apks:
|
||||
with tempfile.TemporaryDirectory() as tmpdirname:
|
||||
d = Path(tmpdirname)
|
||||
apk_bin = download_apk(sha256, api_key, logfile=None)
|
||||
if apk_bin is None:
|
||||
continue
|
||||
with (d / "app.apk").open("wb") as fp:
|
||||
fp.write(apk_bin)
|
||||
androguard_apk = APK(str(d / "app.apk"))
|
||||
with zipfile.ZipFile(io.BytesIO(apk_bin)) as apk:
|
||||
data[sha256] = {}
|
||||
entry = analyze(apk, androguard_apk, sha256, json_out=data[sha256])
|
||||
r = subprocess.run(
|
||||
[
|
||||
"java",
|
||||
"-Xmx8G",
|
||||
"-jar",
|
||||
str(apktool),
|
||||
"d",
|
||||
"app.apk",
|
||||
"-o",
|
||||
"apktool_out",
|
||||
],
|
||||
cwd=d,
|
||||
)
|
||||
data[sha256]["apktool-finished"] = (r.returncode == 0) and (
|
||||
d / "apktool_out" / "apktool.yml"
|
||||
).exists()
|
||||
smalli_dirs = []
|
||||
for dex in data[sha256]["class_dex"]:
|
||||
if dex == "classes.dex":
|
||||
smalli_dirs.append(d / "apktool_out" / "smali")
|
||||
else:
|
||||
smalli_dirs.append(
|
||||
d / "apktool_out" / ("smali_" + dex.removesuffix(".dex"))
|
||||
)
|
||||
dist_dup_classes = set()
|
||||
for cl in data[sha256]["duplicated_classes"]:
|
||||
cl_f = cl.removesuffix(";").removeprefix("L") + ".smali"
|
||||
smali = None
|
||||
for cdir in smalli_dirs:
|
||||
if (cdir / cl_f).exists():
|
||||
print((cdir / cl_f))
|
||||
with (cdir / cl_f).open() as file:
|
||||
smali_new = file.read()
|
||||
if smali is None:
|
||||
smali = smali_new
|
||||
elif smali != smali_new:
|
||||
dist_dup_classes.add(cl)
|
||||
data[sha256]["redef_classes"] = list(dist_dup_classes)
|
||||
if data[sha256]["redef_classes"]:
|
||||
print(f"{sha256}:")
|
||||
for c in data[sha256]["redef_classes"]:
|
||||
print(f" {c}")
|
||||
else:
|
||||
print(f"{sha256}: No true redefinition")
|
||||
|
||||
with args.out.open("w") as f:
|
||||
json.dump(data, f)
|
||||
|
||||
|
||||
def check_smali_platform():
|
||||
parser = ArgumentParser(
|
||||
prog="Smalli Check",
|
||||
description="Check if duplicated classes are distinct from the actual sources",
|
||||
|
|
@ -563,6 +424,7 @@ def check_smali_platform():
|
|||
"apktool_out",
|
||||
],
|
||||
cwd=d,
|
||||
capture_output=True, # just avoid spamming
|
||||
)
|
||||
data["apktool-finished"] = (r.returncode == 0) and (
|
||||
d / "apktool_out" / "apktool.yml"
|
||||
|
|
@ -698,27 +560,13 @@ def data_mining():
|
|||
type=Path,
|
||||
required=True,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-dir-def-sdk34-classes",
|
||||
help="The directory storing the classes already in SDK 34 redefined by apks",
|
||||
type=Path,
|
||||
required=True,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--detail-class-redef",
|
||||
help="Path to json file outputed by `check-class-redef`",
|
||||
type=Path,
|
||||
required=True,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-check-platform-redef",
|
||||
help="The directory storing the result of smali comparision between platform classes and classes defined in apk (--output-dir of `check-platf-reder`)",
|
||||
help="The directory storing the result of smali comparision between platform classes and classes defined in apk (--output-dir of `check-smali`)",
|
||||
type=Path,
|
||||
required=True,
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
stats(args.db, args.out, args.output_check_platform_redef, args.detail_class_redef)
|
||||
|
||||
if args.output_dir_def_sdk34_classes is not None:
|
||||
analyse_sdk_redef(args.output_dir_def_sdk34_classes, args.db, args.out)
|
||||
stats(args.db, args.out, args.output_check_platform_redef)
|
||||
# analyse_sdk_redef(args.output_check_platform_redef, args.db, args.out)
|
||||
|
|
|
|||
|
|
@ -6,6 +6,10 @@ def cmp_smali(sm1: str, sm2: str, sha256_1: str = "", sha256_2: str = "") -> boo
|
|||
for m in meths_1.keys():
|
||||
s1 = meths_1[m]
|
||||
s2 = meths_2[m]
|
||||
if len(s1) > 1:
|
||||
print(f"method {m} in {sha256_1} has multiple implementation")
|
||||
if len(s2) > 1:
|
||||
print(f"method {m} in {sha256_2} has multiple implementation")
|
||||
for b1 in s1:
|
||||
match = False
|
||||
for b2 in s2:
|
||||
|
|
|
|||
|
|
@ -9,12 +9,14 @@ from matplotlib import pyplot as plt
|
|||
import matplotlib
|
||||
|
||||
|
||||
def stats(db: Path, out: Path, folder_plat_diff_smali: Path, detail_class_redef: Path):
|
||||
def stats(db: Path, out: Path, folder_plat_diff_smali: Path):
|
||||
|
||||
occ_sdk34 = {}
|
||||
occ_hid34 = {}
|
||||
occ_self_redef = {}
|
||||
occ_self = {}
|
||||
occ_sdk34_non_id = {}
|
||||
occ_hid34_non_id = {}
|
||||
occ_self_non_id = {}
|
||||
|
||||
nb_sdk_cl_redef = 0
|
||||
nb_sdk_cl_id = 0
|
||||
|
|
@ -24,9 +26,32 @@ def stats(db: Path, out: Path, folder_plat_diff_smali: Path, detail_class_redef:
|
|||
nb_hid_cl_id = 0
|
||||
nb_app_hid_cl_redef = 0
|
||||
nb_app_hid_cl_redef_false_pos = 0
|
||||
nb_class_self_shadow = 0
|
||||
nb_class_self_shadow_id = 0
|
||||
nb_app_self_shadow = 0
|
||||
nb_app_self_shadow_false_pos = 0
|
||||
|
||||
for file in folder_plat_diff_smali.iterdir():
|
||||
with file.open("r") as fd:
|
||||
data = json.load(fd)
|
||||
if not data["apktool-finished"]:
|
||||
continue
|
||||
for cl in data["redef_classes"]:
|
||||
if cl not in occ_self_non_id:
|
||||
occ_self_non_id[cl] = 0
|
||||
occ_self_non_id[cl] += 1
|
||||
for cl in data["duplicated_classes"]:
|
||||
if cl not in occ_self:
|
||||
occ_self[cl] = 0
|
||||
occ_self[cl] += 1
|
||||
if data["duplicated_classes"]:
|
||||
nb_app_self_shadow += 1
|
||||
if data["duplicated_classes"] and not data["redef_classes"]:
|
||||
nb_app_self_shadow_false_pos += 1
|
||||
nb_class_self_shadow += len(data["duplicated_classes"])
|
||||
nb_class_self_shadow_id += len(data["duplicated_classes"]) - len(
|
||||
data["redef_classes"]
|
||||
)
|
||||
l_nb_sdk_cl_id = 0
|
||||
for cl in data["sdk_34_classes"]:
|
||||
nb_sdk_cl_redef += 1
|
||||
|
|
@ -47,6 +72,10 @@ def stats(db: Path, out: Path, folder_plat_diff_smali: Path, detail_class_redef:
|
|||
]
|
||||
):
|
||||
l_nb_sdk_cl_id += 1
|
||||
else:
|
||||
if cl not in occ_sdk34_non_id:
|
||||
occ_sdk34_non_id[cl] = 0
|
||||
occ_sdk34_non_id[cl] += 1
|
||||
nb_sdk_cl_id += l_nb_sdk_cl_id
|
||||
if data["sdk_34_classes"]:
|
||||
nb_app_sdk_cl_redef += 1
|
||||
|
|
@ -70,6 +99,10 @@ def stats(db: Path, out: Path, folder_plat_diff_smali: Path, detail_class_redef:
|
|||
]
|
||||
):
|
||||
l_nb_hid_cl_id += 1
|
||||
else:
|
||||
if cl not in occ_hid34_non_id:
|
||||
occ_hid34_non_id[cl] = 0
|
||||
occ_hid34_non_id[cl] += 1
|
||||
nb_hid_cl_id += l_nb_hid_cl_id
|
||||
if data["platform_non_sdk_34_classes"]:
|
||||
nb_app_hid_cl_redef += 1
|
||||
|
|
@ -78,29 +111,6 @@ def stats(db: Path, out: Path, folder_plat_diff_smali: Path, detail_class_redef:
|
|||
and len(data["platform_non_sdk_34_classes"]) == l_nb_hid_cl_id
|
||||
):
|
||||
nb_app_hid_cl_redef_false_pos += 1
|
||||
nb_class_self_shadow = 0
|
||||
nb_class_self_shadow_id = 0
|
||||
nb_app_self_shadow = 0
|
||||
nb_app_self_shadow_false_pos = 0
|
||||
with detail_class_redef.open("r") as fd:
|
||||
data = json.load(fd)
|
||||
for v in data.values():
|
||||
for cl in v["redef_classes"]:
|
||||
if cl not in occ_self_redef:
|
||||
occ_self_redef[cl] = 0
|
||||
occ_self_redef[cl] += 1
|
||||
for cl in v["duplicated_classes"]:
|
||||
if cl not in occ_self:
|
||||
occ_self[cl] = 0
|
||||
occ_self[cl] += 1
|
||||
if v["duplicated_classes"]:
|
||||
nb_app_self_shadow += 1
|
||||
if v["duplicated_classes"] and not v["redef_classes"]:
|
||||
nb_app_self_shadow_false_pos += 1
|
||||
nb_class_self_shadow += len(v["duplicated_classes"])
|
||||
nb_class_self_shadow_id += len(v["duplicated_classes"]) - len(
|
||||
v["redef_classes"]
|
||||
)
|
||||
|
||||
prop_id_self_cl = round(100 * nb_class_self_shadow_id / nb_class_self_shadow, 2)
|
||||
prop_id_self_ap = round(100 * nb_app_self_shadow_false_pos / nb_app_self_shadow, 2)
|
||||
|
|
@ -110,10 +120,11 @@ def stats(db: Path, out: Path, folder_plat_diff_smali: Path, detail_class_redef:
|
|||
prop_id_hid_ap = round(100 * nb_app_hid_cl_redef_false_pos / nb_app_hid_cl_redef, 2)
|
||||
tot_prop_id = round(
|
||||
100
|
||||
* (nb_class_self_shadow_id + nb_sdk_cl_id + 100 * nb_hid_cl_id)
|
||||
* (nb_class_self_shadow_id + nb_sdk_cl_id + nb_hid_cl_id)
|
||||
/ (nb_class_self_shadow + nb_sdk_cl_redef + nb_hid_cl_redef),
|
||||
2,
|
||||
)
|
||||
|
||||
print(
|
||||
f"Self classes: {nb_class_self_shadow_id}/{nb_class_self_shadow}: {prop_id_self_cl}%"
|
||||
)
|
||||
|
|
@ -260,80 +271,66 @@ def stats(db: Path, out: Path, folder_plat_diff_smali: Path, detail_class_redef:
|
|||
# occ_self = {}
|
||||
|
||||
print()
|
||||
print(
|
||||
"redefined class SDK occurences"
|
||||
)
|
||||
print(f"redefined class SDK {' '*40} occurences disctinct")
|
||||
print()
|
||||
for cl in sorted(occ_sdk34.keys(), key=lambda x: occ_sdk34[x], reverse=True)[:10]:
|
||||
print(f"{cl:<70} {occ_sdk34[cl]: >5}")
|
||||
print(f"{cl:<70} {occ_sdk34[cl]: >5} {occ_sdk34_non_id.get(cl, 0): >5}")
|
||||
print()
|
||||
|
||||
print()
|
||||
print(
|
||||
"redefined class Hidden occurences"
|
||||
)
|
||||
print(f"redefined class Hidden {' '*40} occurences disctinct")
|
||||
print()
|
||||
for cl in sorted(occ_hid34.keys(), key=lambda x: occ_hid34[x], reverse=True)[:10]:
|
||||
print(f"{cl:<70} {occ_hid34[cl]: >5}")
|
||||
print(f"{cl:<70} {occ_hid34[cl]: >5} {occ_hid34_non_id.get(cl, 0): >5}")
|
||||
print()
|
||||
|
||||
print()
|
||||
print(
|
||||
"collision class Self occurences"
|
||||
)
|
||||
print(f"collision class Self {' '*40} occurences disctinct")
|
||||
print()
|
||||
for cl in sorted(occ_self.keys(), key=lambda x: occ_self[x], reverse=True)[:10]:
|
||||
print(f"{cl:<70} {occ_self[cl]: >5}")
|
||||
print(f"{cl:<70} {occ_self[cl]: >5} {occ_self_non_id.get(cl, 0): >5}")
|
||||
print()
|
||||
|
||||
print()
|
||||
print(
|
||||
"redefined class Self occurences"
|
||||
)
|
||||
print(f"redefined class Self {' '*40} occurences")
|
||||
print()
|
||||
for cl in sorted(
|
||||
occ_self_redef.keys(), key=lambda x: occ_self_redef[x], reverse=True
|
||||
occ_self_non_id.keys(), key=lambda x: occ_self_non_id[x], reverse=True
|
||||
)[:10]:
|
||||
print(f"{cl:<70} {occ_self_redef[cl]: >5}")
|
||||
print(f"{cl:<70} {occ_self_non_id[cl]: >5}")
|
||||
print()
|
||||
|
||||
print()
|
||||
print(
|
||||
"redefined class SDK <= 7 occurences"
|
||||
)
|
||||
print(f"redefined class SDK <= 7 {' '*40} occurences disctinct")
|
||||
print()
|
||||
for cl in sorted(
|
||||
filter(lambda cl: MIN_MAX_SDK[cl][0] == 7, occ_sdk34.keys()),
|
||||
key=lambda x: occ_sdk34[x],
|
||||
reverse=True,
|
||||
)[:10]:
|
||||
print(f"{cl:<70} {occ_sdk34[cl]: >5}")
|
||||
print(f"{cl:<70} {occ_sdk34[cl]: >5} {occ_sdk34_non_id.get(cl, 0): >5}")
|
||||
print()
|
||||
|
||||
print()
|
||||
print(
|
||||
"redefined class SDK = 8 occurences"
|
||||
)
|
||||
print(f"redefined class SDK = 8 {' '*40} occurences disctinct")
|
||||
print()
|
||||
for cl in sorted(
|
||||
filter(lambda cl: MIN_MAX_SDK[cl][0] == 8, occ_sdk34.keys()),
|
||||
key=lambda x: occ_sdk34[x],
|
||||
reverse=True,
|
||||
)[:10]:
|
||||
print(f"{cl:<70} {occ_sdk34[cl]: >5}")
|
||||
print(f"{cl:<70} {occ_sdk34[cl]: >5} {occ_sdk34_non_id.get(cl, 0): >5}")
|
||||
print()
|
||||
|
||||
print()
|
||||
print(
|
||||
"redefined class SDK = 16 occurences"
|
||||
)
|
||||
print(f"redefined class SDK = 16 {' '*40} occurences disctinct")
|
||||
print()
|
||||
for cl in sorted(
|
||||
filter(lambda cl: MIN_MAX_SDK[cl][0] == 16, occ_sdk34.keys()),
|
||||
key=lambda x: occ_sdk34[x],
|
||||
reverse=True,
|
||||
)[:10]:
|
||||
print(f"{cl:<70} {occ_sdk34[cl]: >5}")
|
||||
print(f"{cl:<70} {occ_sdk34[cl]: >5} {occ_sdk34_non_id.get(cl, 0): >5}")
|
||||
print()
|
||||
|
||||
|
||||
|
|
@ -360,10 +357,11 @@ def analyse_sdk_redef(folder: Path, db: Path, out: Path):
|
|||
apk_min_sdk = min_sdks[file.name]
|
||||
apk_targ_sdk = targ_sdks[file.name]
|
||||
with file.open("r") as fp:
|
||||
for cl in fp:
|
||||
if cl.strip():
|
||||
cls.add(cl.strip())
|
||||
cl_min_sdk, _ = MIN_MAX_SDK[cl.strip()]
|
||||
data = json.load(fp)
|
||||
|
||||
for cl in data["sdk_34_classes"]:
|
||||
cls.add(cl)
|
||||
cl_min_sdk, _ = MIN_MAX_SDK[cl]
|
||||
if cl_min_sdk < apk_min_sdk:
|
||||
cls_by_sdk_under_min[cl_min_sdk] += 1
|
||||
else:
|
||||
|
|
|
|||
BIN
platforms.zip
(Stored with Git LFS)
BIN
platforms.zip
(Stored with Git LFS)
Binary file not shown.
|
|
@ -22,6 +22,5 @@ build-backend = "poetry.core.masonry.api"
|
|||
[tool.poetry.scripts]
|
||||
scan = 'android_class_shadowing_scanner.__init__:main'
|
||||
collect-scan = 'android_class_shadowing_scanner.__init__:collect_to_db'
|
||||
check-class-redef = 'android_class_shadowing_scanner.__init__:check_smali'
|
||||
check-platf-reder = 'android_class_shadowing_scanner.__init__:check_smali_platform'
|
||||
check-smali = 'android_class_shadowing_scanner.__init__:check_smali'
|
||||
data-mining = 'android_class_shadowing_scanner.__init__:data_mining'
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue