fuzzy class comp

This commit is contained in:
Jean-Marie Mineau 2024-11-19 21:09:08 +01:00
parent ad789abc7b
commit 0b572e1885
4 changed files with 252 additions and 2 deletions

View file

@ -17,6 +17,7 @@ from androguard.core.apk import APK # type: ignore
from .androzoo import download_apk
from .data import ApkData, load_from_directory
from .cmp_smali import cmp_smali
from .analysis import analyze
@ -618,7 +619,12 @@ def check_smali_platform():
for smalli_dir in smalli_dirs:
if (smalli_dir / cl_f).exists():
with (smalli_dir / cl_f).open("r") as file:
if file.read() != plt_smali:
if not cmp_smali(
file.read(),
plt_smali,
sha256,
f"{a_sdk_dir / 'platform'}",
):
plat_diff_smalli.add(cl)
break
@ -641,13 +647,28 @@ def check_smali_platform():
for smalli_dir in smalli_dirs:
if (smalli_dir / cl_f).exists():
with (smalli_dir / cl_f).open("r") as file:
if file.read() != sdk_smali:
if not cmp_smali(
file.read(), sdk_smali, sha256, f"{a_sdk_dir / 'sdk'}"
):
sdk_diff_smalli.add(cl)
break
data[f"sdk_{sdk_v}_diff_smalli"] = list(sdk_diff_smalli)
data[f"platform_{sdk_v}_diff_smalli"] = list(plat_diff_smalli)
for cl in data["duplicated_classes"]:
cl_f = cl.removesuffix(";").removeprefix("L") + ".smali"
smali = None
for cdir in smalli_dirs:
if (cdir / cl_f).exists():
with (cdir / cl_f).open() as file:
smali_new = file.read()
if smali is None:
smali = smali_new
elif not cmp_smali(smali, smali_new, sha256, sha256):
dist_dup_classes.add(cl)
data["redef_classes"] = list(dist_dup_classes)
if args.output_dir:
with (args.output_dir / sha256).open("w") as file:
json.dump(data, file)

View file

@ -0,0 +1,53 @@
def cmp_smali(sm1: str, sm2: str, sha256_1: str = "", sha256_2: str = "") -> bool:
meths_1 = get_methods(sm1, sha256_1)
meths_2 = get_methods(sm2, sha256_2)
if set(meths_1.keys()) != set(meths_2.keys()):
return False
for m in meths_1.keys():
s1 = meths_1[m]
s2 = meths_2[m]
for b1 in s1:
match = False
for b2 in s2:
if b1 == b2:
match = True
break
if not match:
return False
for b2 in s2:
match = False
for b1 in s1:
if b1 == b2:
match = True
break
if not match:
return False
return True
def get_methods(sm: str, sha256: str = "") -> dict[str, list[list[str]]]:
class_name = "UNINITIALIZED"
current_meth: None | str = None
current_body: list[str] = []
rest: dict[str, list[list[str]]] = {}
for line in sm.split("\n"):
striped = line.strip()
if striped.startswith(".class "):
class_name = striped.split(" ")[-1]
if striped == ".end method":
if current_meth is None:
print(f"ERROR PARSING SMALI of {class_name} {sha256}")
else:
if current_meth not in rest:
rest[current_meth] = []
rest[current_meth].append(current_body)
current_body = []
current_meth = None
if current_meth is not None and striped and not striped.startswith(".line "):
current_body.append(striped)
if striped.startswith(".method "):
if current_meth is not None:
print(f"ERROR PARSING SMALI of {class_name} {sha256}")
current_meth = striped.split(" ")[-1]
current_body = []
return rest

View file

@ -6,9 +6,16 @@ from pathlib import Path
from .platform_classes import MIN_MAX_SDK
from matplotlib import pyplot as plt
import matplotlib
def stats(db: Path, out: Path, folder_plat_diff_smali: Path, detail_class_redef: Path):
occ_sdk34 = {}
occ_hid34 = {}
occ_self_redef = {}
occ_self = {}
nb_sdk_cl_redef = 0
nb_sdk_cl_id = 0
nb_app_sdk_cl_redef = 0
@ -23,6 +30,9 @@ def stats(db: Path, out: Path, folder_plat_diff_smali: Path, detail_class_redef:
l_nb_sdk_cl_id = 0
for cl in data["sdk_34_classes"]:
nb_sdk_cl_redef += 1
if cl not in occ_sdk34:
occ_sdk34[cl] = 0
occ_sdk34[cl] += 1
if any(
[
cl not in data[lst]
@ -45,6 +55,9 @@ def stats(db: Path, out: Path, folder_plat_diff_smali: Path, detail_class_redef:
l_nb_hid_cl_id = 0
for cl in data["platform_non_sdk_34_classes"]:
if cl not in occ_hid34:
occ_hid34[cl] = 0
occ_hid34[cl] += 1
nb_hid_cl_redef += 1
if any(
[
@ -72,6 +85,14 @@ def stats(db: Path, out: Path, folder_plat_diff_smali: Path, detail_class_redef:
with detail_class_redef.open("r") as fd:
data = json.load(fd)
for v in data.values():
for cl in v["redef_classes"]:
if cl not in occ_self_redef:
occ_self_redef[cl] = 0
occ_self_redef[cl] += 1
for cl in v["duplicated_classes"]:
if cl not in occ_self:
occ_self[cl] = 0
occ_self[cl] += 1
if v["duplicated_classes"]:
nb_app_self_shadow += 1
if v["duplicated_classes"] and not v["redef_classes"]:
@ -233,6 +254,88 @@ def stats(db: Path, out: Path, folder_plat_diff_smali: Path, detail_class_redef:
for row in data_only:
writer.writerow(row)
# occ_sdk34 = {}
# occ_hid34 = {}
# occ_self_redef = {}
# occ_self = {}
print()
print(
"redefined class SDK occurences"
)
print()
for cl in sorted(occ_sdk34.keys(), key=lambda x: occ_sdk34[x], reverse=True)[:10]:
print(f"{cl:<70} {occ_sdk34[cl]: >5}")
print()
print()
print(
"redefined class Hidden occurences"
)
print()
for cl in sorted(occ_hid34.keys(), key=lambda x: occ_hid34[x], reverse=True)[:10]:
print(f"{cl:<70} {occ_hid34[cl]: >5}")
print()
print()
print(
"collision class Self occurences"
)
print()
for cl in sorted(occ_self.keys(), key=lambda x: occ_self[x], reverse=True)[:10]:
print(f"{cl:<70} {occ_self[cl]: >5}")
print()
print()
print(
"redefined class Self occurences"
)
print()
for cl in sorted(
occ_self_redef.keys(), key=lambda x: occ_self_redef[x], reverse=True
)[:10]:
print(f"{cl:<70} {occ_self_redef[cl]: >5}")
print()
print()
print(
"redefined class SDK <= 7 occurences"
)
print()
for cl in sorted(
filter(lambda cl: MIN_MAX_SDK[cl][0] == 7, occ_sdk34.keys()),
key=lambda x: occ_sdk34[x],
reverse=True,
)[:10]:
print(f"{cl:<70} {occ_sdk34[cl]: >5}")
print()
print()
print(
"redefined class SDK = 8 occurences"
)
print()
for cl in sorted(
filter(lambda cl: MIN_MAX_SDK[cl][0] == 8, occ_sdk34.keys()),
key=lambda x: occ_sdk34[x],
reverse=True,
)[:10]:
print(f"{cl:<70} {occ_sdk34[cl]: >5}")
print()
print()
print(
"redefined class SDK = 16 occurences"
)
print()
for cl in sorted(
filter(lambda cl: MIN_MAX_SDK[cl][0] == 16, occ_sdk34.keys()),
key=lambda x: occ_sdk34[x],
reverse=True,
)[:10]:
print(f"{cl:<70} {occ_sdk34[cl]: >5}")
print()
def analyse_sdk_redef(folder: Path, db: Path, out: Path):
with sqlite3.connect(db) as con:
@ -289,6 +392,7 @@ def analyse_sdk_redef(folder: Path, db: Path, out: Path):
for cl, n in classes_occ.items():
cls_by_sdk[MIN_MAX_SDK[cl][0]] += n
matplotlib.rcParams.update({"font.size": 22})
plt.figure(figsize=(20, 9), dpi=80)
plt.bar(
["<=7" if i == 7 else str(i) for i in range(7, 35)],
@ -308,6 +412,8 @@ def analyse_sdk_redef(folder: Path, db: Path, out: Path):
edgecolor="black",
)
plt.legend(loc="upper left")
plt.ylabel("Nb Classes")
plt.xlabel("First SDK containing the class")
plt.savefig(out / "redef_sdk_relative_min_sdk.pdf", format="pdf")
plt.savefig(out / "redef_sdk_relative_min_sdk.svg", format="svg")
plt.show()
@ -332,6 +438,8 @@ def analyse_sdk_redef(folder: Path, db: Path, out: Path):
edgecolor="black",
)
plt.legend(loc="upper left")
plt.ylabel("Nb Classes")
plt.xlabel("First SDK containing the class")
plt.savefig(out / "redef_sdk_relative_targ_sdk.pdf", format="pdf")
plt.savefig(out / "redef_sdk_relative_targ_sdk.svg", format="svg")
plt.show()

68
run_exp_6.sh Normal file
View file

@ -0,0 +1,68 @@
#!/usr/bin/bash
WD=$(pwd)
SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
PLATFORM_DIR=$(mktemp -d)
APKTOOL="${WD}/apktool.jar"
DB="${SCRIPT_DIR}/data/app-2023-xp4.db"
LIST=$(mktemp)
CHUNK_FOLDER="./app-2023-exp6"
APKTOOL="${SCRIPT_DIR}/apktool.jar"
ANDROZOO_KEY="${SCRIPT_DIR}/.ZOO_KEY"
OUT_DIR="app-2023-xp6.out"
app_lst=(
'00'
'01'
'02'
'03'
'04'
'05'
'06'
'07'
'08'
'09'
'10'
'11'
'12'
'13'
'14'
'15'
'16'
'17'
'18'
'19'
)
mkdir -p "${OUT_DIR}"
unzip platforms.zip -d "${PLATFORM_DIR}"
for ad in "${PLATFORM_DIR}"/**/{platform,sdk}; do
cd ${ad}
for jar in "${ad}"/*.jar; do
java -Xmx8G -jar ${APKTOOL} d "${jar}"
done
done
cd "${WD}"
sqlite3 ${DB} 'SELECT sha256 FROM data WHERE nb_def_platform_32_classes >= 1 OR nb_def_platform_33_classes >= 1 OR nb_def_platform_34_classes >= 1 OR nb_duplicate_classes>=1;' > "${LIST}"
N_CHUNK=$(python3 -c "print($(cat ${LIST} | wc -l)//20 + 1)")
rm -r "${CHUNK_FOLDER}"
mkdir "${CHUNK_FOLDER}"
split -a 2 -d -l "${N_CHUNK}" "${LIST}" "${CHUNK_FOLDER}"
worker () {
for sha in $(cat "${1}"); do
"${SCRIPT_DIR}"/venv/bin/check-platf-reder --api-key-file "${ANDROZOO_KEY}" --sha256 "${sha}" --path-platform-smali "${PLATFORM_DIR}" --apktool-jar "${APKTOOL}" --output-dir "${OUT_DIR}"
done
echo "Finished ${1}"
}
for lst in ${app_lst[@]}; do
worker "${CHUNK_FOLDER}/${lst}" &
sleep 1
done
echo 'PROCESS LAUNCHED'