fuzzy class comp
This commit is contained in:
parent
ad789abc7b
commit
0b572e1885
4 changed files with 252 additions and 2 deletions
|
|
@ -17,6 +17,7 @@ from androguard.core.apk import APK # type: ignore
|
|||
|
||||
from .androzoo import download_apk
|
||||
from .data import ApkData, load_from_directory
|
||||
from .cmp_smali import cmp_smali
|
||||
from .analysis import analyze
|
||||
|
||||
|
||||
|
|
@ -618,7 +619,12 @@ def check_smali_platform():
|
|||
for smalli_dir in smalli_dirs:
|
||||
if (smalli_dir / cl_f).exists():
|
||||
with (smalli_dir / cl_f).open("r") as file:
|
||||
if file.read() != plt_smali:
|
||||
if not cmp_smali(
|
||||
file.read(),
|
||||
plt_smali,
|
||||
sha256,
|
||||
f"{a_sdk_dir / 'platform'}",
|
||||
):
|
||||
plat_diff_smalli.add(cl)
|
||||
break
|
||||
|
||||
|
|
@ -641,13 +647,28 @@ def check_smali_platform():
|
|||
for smalli_dir in smalli_dirs:
|
||||
if (smalli_dir / cl_f).exists():
|
||||
with (smalli_dir / cl_f).open("r") as file:
|
||||
if file.read() != sdk_smali:
|
||||
if not cmp_smali(
|
||||
file.read(), sdk_smali, sha256, f"{a_sdk_dir / 'sdk'}"
|
||||
):
|
||||
sdk_diff_smalli.add(cl)
|
||||
break
|
||||
|
||||
data[f"sdk_{sdk_v}_diff_smalli"] = list(sdk_diff_smalli)
|
||||
data[f"platform_{sdk_v}_diff_smalli"] = list(plat_diff_smalli)
|
||||
|
||||
for cl in data["duplicated_classes"]:
|
||||
cl_f = cl.removesuffix(";").removeprefix("L") + ".smali"
|
||||
smali = None
|
||||
for cdir in smalli_dirs:
|
||||
if (cdir / cl_f).exists():
|
||||
with (cdir / cl_f).open() as file:
|
||||
smali_new = file.read()
|
||||
if smali is None:
|
||||
smali = smali_new
|
||||
elif not cmp_smali(smali, smali_new, sha256, sha256):
|
||||
dist_dup_classes.add(cl)
|
||||
data["redef_classes"] = list(dist_dup_classes)
|
||||
|
||||
if args.output_dir:
|
||||
with (args.output_dir / sha256).open("w") as file:
|
||||
json.dump(data, file)
|
||||
|
|
|
|||
53
android_class_shadowing_scanner/cmp_smali.py
Normal file
53
android_class_shadowing_scanner/cmp_smali.py
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
def cmp_smali(sm1: str, sm2: str, sha256_1: str = "", sha256_2: str = "") -> bool:
|
||||
meths_1 = get_methods(sm1, sha256_1)
|
||||
meths_2 = get_methods(sm2, sha256_2)
|
||||
if set(meths_1.keys()) != set(meths_2.keys()):
|
||||
return False
|
||||
for m in meths_1.keys():
|
||||
s1 = meths_1[m]
|
||||
s2 = meths_2[m]
|
||||
for b1 in s1:
|
||||
match = False
|
||||
for b2 in s2:
|
||||
if b1 == b2:
|
||||
match = True
|
||||
break
|
||||
if not match:
|
||||
return False
|
||||
for b2 in s2:
|
||||
match = False
|
||||
for b1 in s1:
|
||||
if b1 == b2:
|
||||
match = True
|
||||
break
|
||||
if not match:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def get_methods(sm: str, sha256: str = "") -> dict[str, list[list[str]]]:
|
||||
class_name = "UNINITIALIZED"
|
||||
current_meth: None | str = None
|
||||
current_body: list[str] = []
|
||||
rest: dict[str, list[list[str]]] = {}
|
||||
for line in sm.split("\n"):
|
||||
striped = line.strip()
|
||||
if striped.startswith(".class "):
|
||||
class_name = striped.split(" ")[-1]
|
||||
if striped == ".end method":
|
||||
if current_meth is None:
|
||||
print(f"ERROR PARSING SMALI of {class_name} {sha256}")
|
||||
else:
|
||||
if current_meth not in rest:
|
||||
rest[current_meth] = []
|
||||
rest[current_meth].append(current_body)
|
||||
current_body = []
|
||||
current_meth = None
|
||||
if current_meth is not None and striped and not striped.startswith(".line "):
|
||||
current_body.append(striped)
|
||||
if striped.startswith(".method "):
|
||||
if current_meth is not None:
|
||||
print(f"ERROR PARSING SMALI of {class_name} {sha256}")
|
||||
current_meth = striped.split(" ")[-1]
|
||||
current_body = []
|
||||
return rest
|
||||
|
|
@ -6,9 +6,16 @@ from pathlib import Path
|
|||
from .platform_classes import MIN_MAX_SDK
|
||||
|
||||
from matplotlib import pyplot as plt
|
||||
import matplotlib
|
||||
|
||||
|
||||
def stats(db: Path, out: Path, folder_plat_diff_smali: Path, detail_class_redef: Path):
|
||||
|
||||
occ_sdk34 = {}
|
||||
occ_hid34 = {}
|
||||
occ_self_redef = {}
|
||||
occ_self = {}
|
||||
|
||||
nb_sdk_cl_redef = 0
|
||||
nb_sdk_cl_id = 0
|
||||
nb_app_sdk_cl_redef = 0
|
||||
|
|
@ -23,6 +30,9 @@ def stats(db: Path, out: Path, folder_plat_diff_smali: Path, detail_class_redef:
|
|||
l_nb_sdk_cl_id = 0
|
||||
for cl in data["sdk_34_classes"]:
|
||||
nb_sdk_cl_redef += 1
|
||||
if cl not in occ_sdk34:
|
||||
occ_sdk34[cl] = 0
|
||||
occ_sdk34[cl] += 1
|
||||
if any(
|
||||
[
|
||||
cl not in data[lst]
|
||||
|
|
@ -45,6 +55,9 @@ def stats(db: Path, out: Path, folder_plat_diff_smali: Path, detail_class_redef:
|
|||
|
||||
l_nb_hid_cl_id = 0
|
||||
for cl in data["platform_non_sdk_34_classes"]:
|
||||
if cl not in occ_hid34:
|
||||
occ_hid34[cl] = 0
|
||||
occ_hid34[cl] += 1
|
||||
nb_hid_cl_redef += 1
|
||||
if any(
|
||||
[
|
||||
|
|
@ -72,6 +85,14 @@ def stats(db: Path, out: Path, folder_plat_diff_smali: Path, detail_class_redef:
|
|||
with detail_class_redef.open("r") as fd:
|
||||
data = json.load(fd)
|
||||
for v in data.values():
|
||||
for cl in v["redef_classes"]:
|
||||
if cl not in occ_self_redef:
|
||||
occ_self_redef[cl] = 0
|
||||
occ_self_redef[cl] += 1
|
||||
for cl in v["duplicated_classes"]:
|
||||
if cl not in occ_self:
|
||||
occ_self[cl] = 0
|
||||
occ_self[cl] += 1
|
||||
if v["duplicated_classes"]:
|
||||
nb_app_self_shadow += 1
|
||||
if v["duplicated_classes"] and not v["redef_classes"]:
|
||||
|
|
@ -233,6 +254,88 @@ def stats(db: Path, out: Path, folder_plat_diff_smali: Path, detail_class_redef:
|
|||
for row in data_only:
|
||||
writer.writerow(row)
|
||||
|
||||
# occ_sdk34 = {}
|
||||
# occ_hid34 = {}
|
||||
# occ_self_redef = {}
|
||||
# occ_self = {}
|
||||
|
||||
print()
|
||||
print(
|
||||
"redefined class SDK occurences"
|
||||
)
|
||||
print()
|
||||
for cl in sorted(occ_sdk34.keys(), key=lambda x: occ_sdk34[x], reverse=True)[:10]:
|
||||
print(f"{cl:<70} {occ_sdk34[cl]: >5}")
|
||||
print()
|
||||
|
||||
print()
|
||||
print(
|
||||
"redefined class Hidden occurences"
|
||||
)
|
||||
print()
|
||||
for cl in sorted(occ_hid34.keys(), key=lambda x: occ_hid34[x], reverse=True)[:10]:
|
||||
print(f"{cl:<70} {occ_hid34[cl]: >5}")
|
||||
print()
|
||||
|
||||
print()
|
||||
print(
|
||||
"collision class Self occurences"
|
||||
)
|
||||
print()
|
||||
for cl in sorted(occ_self.keys(), key=lambda x: occ_self[x], reverse=True)[:10]:
|
||||
print(f"{cl:<70} {occ_self[cl]: >5}")
|
||||
print()
|
||||
|
||||
print()
|
||||
print(
|
||||
"redefined class Self occurences"
|
||||
)
|
||||
print()
|
||||
for cl in sorted(
|
||||
occ_self_redef.keys(), key=lambda x: occ_self_redef[x], reverse=True
|
||||
)[:10]:
|
||||
print(f"{cl:<70} {occ_self_redef[cl]: >5}")
|
||||
print()
|
||||
|
||||
print()
|
||||
print(
|
||||
"redefined class SDK <= 7 occurences"
|
||||
)
|
||||
print()
|
||||
for cl in sorted(
|
||||
filter(lambda cl: MIN_MAX_SDK[cl][0] == 7, occ_sdk34.keys()),
|
||||
key=lambda x: occ_sdk34[x],
|
||||
reverse=True,
|
||||
)[:10]:
|
||||
print(f"{cl:<70} {occ_sdk34[cl]: >5}")
|
||||
print()
|
||||
|
||||
print()
|
||||
print(
|
||||
"redefined class SDK = 8 occurences"
|
||||
)
|
||||
print()
|
||||
for cl in sorted(
|
||||
filter(lambda cl: MIN_MAX_SDK[cl][0] == 8, occ_sdk34.keys()),
|
||||
key=lambda x: occ_sdk34[x],
|
||||
reverse=True,
|
||||
)[:10]:
|
||||
print(f"{cl:<70} {occ_sdk34[cl]: >5}")
|
||||
print()
|
||||
|
||||
print()
|
||||
print(
|
||||
"redefined class SDK = 16 occurences"
|
||||
)
|
||||
print()
|
||||
for cl in sorted(
|
||||
filter(lambda cl: MIN_MAX_SDK[cl][0] == 16, occ_sdk34.keys()),
|
||||
key=lambda x: occ_sdk34[x],
|
||||
reverse=True,
|
||||
)[:10]:
|
||||
print(f"{cl:<70} {occ_sdk34[cl]: >5}")
|
||||
print()
|
||||
|
||||
|
||||
def analyse_sdk_redef(folder: Path, db: Path, out: Path):
|
||||
with sqlite3.connect(db) as con:
|
||||
|
|
@ -289,6 +392,7 @@ def analyse_sdk_redef(folder: Path, db: Path, out: Path):
|
|||
for cl, n in classes_occ.items():
|
||||
cls_by_sdk[MIN_MAX_SDK[cl][0]] += n
|
||||
|
||||
matplotlib.rcParams.update({"font.size": 22})
|
||||
plt.figure(figsize=(20, 9), dpi=80)
|
||||
plt.bar(
|
||||
["<=7" if i == 7 else str(i) for i in range(7, 35)],
|
||||
|
|
@ -308,6 +412,8 @@ def analyse_sdk_redef(folder: Path, db: Path, out: Path):
|
|||
edgecolor="black",
|
||||
)
|
||||
plt.legend(loc="upper left")
|
||||
plt.ylabel("Nb Classes")
|
||||
plt.xlabel("First SDK containing the class")
|
||||
plt.savefig(out / "redef_sdk_relative_min_sdk.pdf", format="pdf")
|
||||
plt.savefig(out / "redef_sdk_relative_min_sdk.svg", format="svg")
|
||||
plt.show()
|
||||
|
|
@ -332,6 +438,8 @@ def analyse_sdk_redef(folder: Path, db: Path, out: Path):
|
|||
edgecolor="black",
|
||||
)
|
||||
plt.legend(loc="upper left")
|
||||
plt.ylabel("Nb Classes")
|
||||
plt.xlabel("First SDK containing the class")
|
||||
plt.savefig(out / "redef_sdk_relative_targ_sdk.pdf", format="pdf")
|
||||
plt.savefig(out / "redef_sdk_relative_targ_sdk.svg", format="svg")
|
||||
plt.show()
|
||||
|
|
|
|||
68
run_exp_6.sh
Normal file
68
run_exp_6.sh
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
#!/usr/bin/bash
|
||||
|
||||
WD=$(pwd)
|
||||
SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
|
||||
PLATFORM_DIR=$(mktemp -d)
|
||||
APKTOOL="${WD}/apktool.jar"
|
||||
DB="${SCRIPT_DIR}/data/app-2023-xp4.db"
|
||||
LIST=$(mktemp)
|
||||
CHUNK_FOLDER="./app-2023-exp6"
|
||||
APKTOOL="${SCRIPT_DIR}/apktool.jar"
|
||||
ANDROZOO_KEY="${SCRIPT_DIR}/.ZOO_KEY"
|
||||
OUT_DIR="app-2023-xp6.out"
|
||||
|
||||
app_lst=(
|
||||
'00'
|
||||
'01'
|
||||
'02'
|
||||
'03'
|
||||
'04'
|
||||
'05'
|
||||
'06'
|
||||
'07'
|
||||
'08'
|
||||
'09'
|
||||
'10'
|
||||
'11'
|
||||
'12'
|
||||
'13'
|
||||
'14'
|
||||
'15'
|
||||
'16'
|
||||
'17'
|
||||
'18'
|
||||
'19'
|
||||
)
|
||||
|
||||
mkdir -p "${OUT_DIR}"
|
||||
unzip platforms.zip -d "${PLATFORM_DIR}"
|
||||
|
||||
for ad in "${PLATFORM_DIR}"/**/{platform,sdk}; do
|
||||
cd ${ad}
|
||||
for jar in "${ad}"/*.jar; do
|
||||
java -Xmx8G -jar ${APKTOOL} d "${jar}"
|
||||
done
|
||||
done
|
||||
|
||||
cd "${WD}"
|
||||
|
||||
sqlite3 ${DB} 'SELECT sha256 FROM data WHERE nb_def_platform_32_classes >= 1 OR nb_def_platform_33_classes >= 1 OR nb_def_platform_34_classes >= 1 OR nb_duplicate_classes>=1;' > "${LIST}"
|
||||
|
||||
N_CHUNK=$(python3 -c "print($(cat ${LIST} | wc -l)//20 + 1)")
|
||||
rm -r "${CHUNK_FOLDER}"
|
||||
mkdir "${CHUNK_FOLDER}"
|
||||
split -a 2 -d -l "${N_CHUNK}" "${LIST}" "${CHUNK_FOLDER}"
|
||||
|
||||
worker () {
|
||||
for sha in $(cat "${1}"); do
|
||||
"${SCRIPT_DIR}"/venv/bin/check-platf-reder --api-key-file "${ANDROZOO_KEY}" --sha256 "${sha}" --path-platform-smali "${PLATFORM_DIR}" --apktool-jar "${APKTOOL}" --output-dir "${OUT_DIR}"
|
||||
done
|
||||
echo "Finished ${1}"
|
||||
}
|
||||
|
||||
for lst in ${app_lst[@]}; do
|
||||
worker "${CHUNK_FOLDER}/${lst}" &
|
||||
sleep 1
|
||||
done
|
||||
|
||||
echo 'PROCESS LAUNCHED'
|
||||
Loading…
Add table
Add a link
Reference in a new issue