fuzzy class comp
This commit is contained in:
parent
ad789abc7b
commit
0b572e1885
4 changed files with 252 additions and 2 deletions
|
|
@ -17,6 +17,7 @@ from androguard.core.apk import APK # type: ignore
|
||||||
|
|
||||||
from .androzoo import download_apk
|
from .androzoo import download_apk
|
||||||
from .data import ApkData, load_from_directory
|
from .data import ApkData, load_from_directory
|
||||||
|
from .cmp_smali import cmp_smali
|
||||||
from .analysis import analyze
|
from .analysis import analyze
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -618,7 +619,12 @@ def check_smali_platform():
|
||||||
for smalli_dir in smalli_dirs:
|
for smalli_dir in smalli_dirs:
|
||||||
if (smalli_dir / cl_f).exists():
|
if (smalli_dir / cl_f).exists():
|
||||||
with (smalli_dir / cl_f).open("r") as file:
|
with (smalli_dir / cl_f).open("r") as file:
|
||||||
if file.read() != plt_smali:
|
if not cmp_smali(
|
||||||
|
file.read(),
|
||||||
|
plt_smali,
|
||||||
|
sha256,
|
||||||
|
f"{a_sdk_dir / 'platform'}",
|
||||||
|
):
|
||||||
plat_diff_smalli.add(cl)
|
plat_diff_smalli.add(cl)
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
@ -641,13 +647,28 @@ def check_smali_platform():
|
||||||
for smalli_dir in smalli_dirs:
|
for smalli_dir in smalli_dirs:
|
||||||
if (smalli_dir / cl_f).exists():
|
if (smalli_dir / cl_f).exists():
|
||||||
with (smalli_dir / cl_f).open("r") as file:
|
with (smalli_dir / cl_f).open("r") as file:
|
||||||
if file.read() != sdk_smali:
|
if not cmp_smali(
|
||||||
|
file.read(), sdk_smali, sha256, f"{a_sdk_dir / 'sdk'}"
|
||||||
|
):
|
||||||
sdk_diff_smalli.add(cl)
|
sdk_diff_smalli.add(cl)
|
||||||
break
|
break
|
||||||
|
|
||||||
data[f"sdk_{sdk_v}_diff_smalli"] = list(sdk_diff_smalli)
|
data[f"sdk_{sdk_v}_diff_smalli"] = list(sdk_diff_smalli)
|
||||||
data[f"platform_{sdk_v}_diff_smalli"] = list(plat_diff_smalli)
|
data[f"platform_{sdk_v}_diff_smalli"] = list(plat_diff_smalli)
|
||||||
|
|
||||||
|
for cl in data["duplicated_classes"]:
|
||||||
|
cl_f = cl.removesuffix(";").removeprefix("L") + ".smali"
|
||||||
|
smali = None
|
||||||
|
for cdir in smalli_dirs:
|
||||||
|
if (cdir / cl_f).exists():
|
||||||
|
with (cdir / cl_f).open() as file:
|
||||||
|
smali_new = file.read()
|
||||||
|
if smali is None:
|
||||||
|
smali = smali_new
|
||||||
|
elif not cmp_smali(smali, smali_new, sha256, sha256):
|
||||||
|
dist_dup_classes.add(cl)
|
||||||
|
data["redef_classes"] = list(dist_dup_classes)
|
||||||
|
|
||||||
if args.output_dir:
|
if args.output_dir:
|
||||||
with (args.output_dir / sha256).open("w") as file:
|
with (args.output_dir / sha256).open("w") as file:
|
||||||
json.dump(data, file)
|
json.dump(data, file)
|
||||||
|
|
|
||||||
53
android_class_shadowing_scanner/cmp_smali.py
Normal file
53
android_class_shadowing_scanner/cmp_smali.py
Normal file
|
|
@ -0,0 +1,53 @@
|
||||||
|
def cmp_smali(sm1: str, sm2: str, sha256_1: str = "", sha256_2: str = "") -> bool:
|
||||||
|
meths_1 = get_methods(sm1, sha256_1)
|
||||||
|
meths_2 = get_methods(sm2, sha256_2)
|
||||||
|
if set(meths_1.keys()) != set(meths_2.keys()):
|
||||||
|
return False
|
||||||
|
for m in meths_1.keys():
|
||||||
|
s1 = meths_1[m]
|
||||||
|
s2 = meths_2[m]
|
||||||
|
for b1 in s1:
|
||||||
|
match = False
|
||||||
|
for b2 in s2:
|
||||||
|
if b1 == b2:
|
||||||
|
match = True
|
||||||
|
break
|
||||||
|
if not match:
|
||||||
|
return False
|
||||||
|
for b2 in s2:
|
||||||
|
match = False
|
||||||
|
for b1 in s1:
|
||||||
|
if b1 == b2:
|
||||||
|
match = True
|
||||||
|
break
|
||||||
|
if not match:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def get_methods(sm: str, sha256: str = "") -> dict[str, list[list[str]]]:
|
||||||
|
class_name = "UNINITIALIZED"
|
||||||
|
current_meth: None | str = None
|
||||||
|
current_body: list[str] = []
|
||||||
|
rest: dict[str, list[list[str]]] = {}
|
||||||
|
for line in sm.split("\n"):
|
||||||
|
striped = line.strip()
|
||||||
|
if striped.startswith(".class "):
|
||||||
|
class_name = striped.split(" ")[-1]
|
||||||
|
if striped == ".end method":
|
||||||
|
if current_meth is None:
|
||||||
|
print(f"ERROR PARSING SMALI of {class_name} {sha256}")
|
||||||
|
else:
|
||||||
|
if current_meth not in rest:
|
||||||
|
rest[current_meth] = []
|
||||||
|
rest[current_meth].append(current_body)
|
||||||
|
current_body = []
|
||||||
|
current_meth = None
|
||||||
|
if current_meth is not None and striped and not striped.startswith(".line "):
|
||||||
|
current_body.append(striped)
|
||||||
|
if striped.startswith(".method "):
|
||||||
|
if current_meth is not None:
|
||||||
|
print(f"ERROR PARSING SMALI of {class_name} {sha256}")
|
||||||
|
current_meth = striped.split(" ")[-1]
|
||||||
|
current_body = []
|
||||||
|
return rest
|
||||||
|
|
@ -6,9 +6,16 @@ from pathlib import Path
|
||||||
from .platform_classes import MIN_MAX_SDK
|
from .platform_classes import MIN_MAX_SDK
|
||||||
|
|
||||||
from matplotlib import pyplot as plt
|
from matplotlib import pyplot as plt
|
||||||
|
import matplotlib
|
||||||
|
|
||||||
|
|
||||||
def stats(db: Path, out: Path, folder_plat_diff_smali: Path, detail_class_redef: Path):
|
def stats(db: Path, out: Path, folder_plat_diff_smali: Path, detail_class_redef: Path):
|
||||||
|
|
||||||
|
occ_sdk34 = {}
|
||||||
|
occ_hid34 = {}
|
||||||
|
occ_self_redef = {}
|
||||||
|
occ_self = {}
|
||||||
|
|
||||||
nb_sdk_cl_redef = 0
|
nb_sdk_cl_redef = 0
|
||||||
nb_sdk_cl_id = 0
|
nb_sdk_cl_id = 0
|
||||||
nb_app_sdk_cl_redef = 0
|
nb_app_sdk_cl_redef = 0
|
||||||
|
|
@ -23,6 +30,9 @@ def stats(db: Path, out: Path, folder_plat_diff_smali: Path, detail_class_redef:
|
||||||
l_nb_sdk_cl_id = 0
|
l_nb_sdk_cl_id = 0
|
||||||
for cl in data["sdk_34_classes"]:
|
for cl in data["sdk_34_classes"]:
|
||||||
nb_sdk_cl_redef += 1
|
nb_sdk_cl_redef += 1
|
||||||
|
if cl not in occ_sdk34:
|
||||||
|
occ_sdk34[cl] = 0
|
||||||
|
occ_sdk34[cl] += 1
|
||||||
if any(
|
if any(
|
||||||
[
|
[
|
||||||
cl not in data[lst]
|
cl not in data[lst]
|
||||||
|
|
@ -45,6 +55,9 @@ def stats(db: Path, out: Path, folder_plat_diff_smali: Path, detail_class_redef:
|
||||||
|
|
||||||
l_nb_hid_cl_id = 0
|
l_nb_hid_cl_id = 0
|
||||||
for cl in data["platform_non_sdk_34_classes"]:
|
for cl in data["platform_non_sdk_34_classes"]:
|
||||||
|
if cl not in occ_hid34:
|
||||||
|
occ_hid34[cl] = 0
|
||||||
|
occ_hid34[cl] += 1
|
||||||
nb_hid_cl_redef += 1
|
nb_hid_cl_redef += 1
|
||||||
if any(
|
if any(
|
||||||
[
|
[
|
||||||
|
|
@ -72,6 +85,14 @@ def stats(db: Path, out: Path, folder_plat_diff_smali: Path, detail_class_redef:
|
||||||
with detail_class_redef.open("r") as fd:
|
with detail_class_redef.open("r") as fd:
|
||||||
data = json.load(fd)
|
data = json.load(fd)
|
||||||
for v in data.values():
|
for v in data.values():
|
||||||
|
for cl in v["redef_classes"]:
|
||||||
|
if cl not in occ_self_redef:
|
||||||
|
occ_self_redef[cl] = 0
|
||||||
|
occ_self_redef[cl] += 1
|
||||||
|
for cl in v["duplicated_classes"]:
|
||||||
|
if cl not in occ_self:
|
||||||
|
occ_self[cl] = 0
|
||||||
|
occ_self[cl] += 1
|
||||||
if v["duplicated_classes"]:
|
if v["duplicated_classes"]:
|
||||||
nb_app_self_shadow += 1
|
nb_app_self_shadow += 1
|
||||||
if v["duplicated_classes"] and not v["redef_classes"]:
|
if v["duplicated_classes"] and not v["redef_classes"]:
|
||||||
|
|
@ -233,6 +254,88 @@ def stats(db: Path, out: Path, folder_plat_diff_smali: Path, detail_class_redef:
|
||||||
for row in data_only:
|
for row in data_only:
|
||||||
writer.writerow(row)
|
writer.writerow(row)
|
||||||
|
|
||||||
|
# occ_sdk34 = {}
|
||||||
|
# occ_hid34 = {}
|
||||||
|
# occ_self_redef = {}
|
||||||
|
# occ_self = {}
|
||||||
|
|
||||||
|
print()
|
||||||
|
print(
|
||||||
|
"redefined class SDK occurences"
|
||||||
|
)
|
||||||
|
print()
|
||||||
|
for cl in sorted(occ_sdk34.keys(), key=lambda x: occ_sdk34[x], reverse=True)[:10]:
|
||||||
|
print(f"{cl:<70} {occ_sdk34[cl]: >5}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
print()
|
||||||
|
print(
|
||||||
|
"redefined class Hidden occurences"
|
||||||
|
)
|
||||||
|
print()
|
||||||
|
for cl in sorted(occ_hid34.keys(), key=lambda x: occ_hid34[x], reverse=True)[:10]:
|
||||||
|
print(f"{cl:<70} {occ_hid34[cl]: >5}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
print()
|
||||||
|
print(
|
||||||
|
"collision class Self occurences"
|
||||||
|
)
|
||||||
|
print()
|
||||||
|
for cl in sorted(occ_self.keys(), key=lambda x: occ_self[x], reverse=True)[:10]:
|
||||||
|
print(f"{cl:<70} {occ_self[cl]: >5}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
print()
|
||||||
|
print(
|
||||||
|
"redefined class Self occurences"
|
||||||
|
)
|
||||||
|
print()
|
||||||
|
for cl in sorted(
|
||||||
|
occ_self_redef.keys(), key=lambda x: occ_self_redef[x], reverse=True
|
||||||
|
)[:10]:
|
||||||
|
print(f"{cl:<70} {occ_self_redef[cl]: >5}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
print()
|
||||||
|
print(
|
||||||
|
"redefined class SDK <= 7 occurences"
|
||||||
|
)
|
||||||
|
print()
|
||||||
|
for cl in sorted(
|
||||||
|
filter(lambda cl: MIN_MAX_SDK[cl][0] == 7, occ_sdk34.keys()),
|
||||||
|
key=lambda x: occ_sdk34[x],
|
||||||
|
reverse=True,
|
||||||
|
)[:10]:
|
||||||
|
print(f"{cl:<70} {occ_sdk34[cl]: >5}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
print()
|
||||||
|
print(
|
||||||
|
"redefined class SDK = 8 occurences"
|
||||||
|
)
|
||||||
|
print()
|
||||||
|
for cl in sorted(
|
||||||
|
filter(lambda cl: MIN_MAX_SDK[cl][0] == 8, occ_sdk34.keys()),
|
||||||
|
key=lambda x: occ_sdk34[x],
|
||||||
|
reverse=True,
|
||||||
|
)[:10]:
|
||||||
|
print(f"{cl:<70} {occ_sdk34[cl]: >5}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
print()
|
||||||
|
print(
|
||||||
|
"redefined class SDK = 16 occurences"
|
||||||
|
)
|
||||||
|
print()
|
||||||
|
for cl in sorted(
|
||||||
|
filter(lambda cl: MIN_MAX_SDK[cl][0] == 16, occ_sdk34.keys()),
|
||||||
|
key=lambda x: occ_sdk34[x],
|
||||||
|
reverse=True,
|
||||||
|
)[:10]:
|
||||||
|
print(f"{cl:<70} {occ_sdk34[cl]: >5}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
|
||||||
def analyse_sdk_redef(folder: Path, db: Path, out: Path):
|
def analyse_sdk_redef(folder: Path, db: Path, out: Path):
|
||||||
with sqlite3.connect(db) as con:
|
with sqlite3.connect(db) as con:
|
||||||
|
|
@ -289,6 +392,7 @@ def analyse_sdk_redef(folder: Path, db: Path, out: Path):
|
||||||
for cl, n in classes_occ.items():
|
for cl, n in classes_occ.items():
|
||||||
cls_by_sdk[MIN_MAX_SDK[cl][0]] += n
|
cls_by_sdk[MIN_MAX_SDK[cl][0]] += n
|
||||||
|
|
||||||
|
matplotlib.rcParams.update({"font.size": 22})
|
||||||
plt.figure(figsize=(20, 9), dpi=80)
|
plt.figure(figsize=(20, 9), dpi=80)
|
||||||
plt.bar(
|
plt.bar(
|
||||||
["<=7" if i == 7 else str(i) for i in range(7, 35)],
|
["<=7" if i == 7 else str(i) for i in range(7, 35)],
|
||||||
|
|
@ -308,6 +412,8 @@ def analyse_sdk_redef(folder: Path, db: Path, out: Path):
|
||||||
edgecolor="black",
|
edgecolor="black",
|
||||||
)
|
)
|
||||||
plt.legend(loc="upper left")
|
plt.legend(loc="upper left")
|
||||||
|
plt.ylabel("Nb Classes")
|
||||||
|
plt.xlabel("First SDK containing the class")
|
||||||
plt.savefig(out / "redef_sdk_relative_min_sdk.pdf", format="pdf")
|
plt.savefig(out / "redef_sdk_relative_min_sdk.pdf", format="pdf")
|
||||||
plt.savefig(out / "redef_sdk_relative_min_sdk.svg", format="svg")
|
plt.savefig(out / "redef_sdk_relative_min_sdk.svg", format="svg")
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
@ -332,6 +438,8 @@ def analyse_sdk_redef(folder: Path, db: Path, out: Path):
|
||||||
edgecolor="black",
|
edgecolor="black",
|
||||||
)
|
)
|
||||||
plt.legend(loc="upper left")
|
plt.legend(loc="upper left")
|
||||||
|
plt.ylabel("Nb Classes")
|
||||||
|
plt.xlabel("First SDK containing the class")
|
||||||
plt.savefig(out / "redef_sdk_relative_targ_sdk.pdf", format="pdf")
|
plt.savefig(out / "redef_sdk_relative_targ_sdk.pdf", format="pdf")
|
||||||
plt.savefig(out / "redef_sdk_relative_targ_sdk.svg", format="svg")
|
plt.savefig(out / "redef_sdk_relative_targ_sdk.svg", format="svg")
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
|
||||||
68
run_exp_6.sh
Normal file
68
run_exp_6.sh
Normal file
|
|
@ -0,0 +1,68 @@
|
||||||
|
#!/usr/bin/bash
|
||||||
|
|
||||||
|
WD=$(pwd)
|
||||||
|
SCRIPT_DIR="$(dirname "$(readlink -f "$0")")"
|
||||||
|
PLATFORM_DIR=$(mktemp -d)
|
||||||
|
APKTOOL="${WD}/apktool.jar"
|
||||||
|
DB="${SCRIPT_DIR}/data/app-2023-xp4.db"
|
||||||
|
LIST=$(mktemp)
|
||||||
|
CHUNK_FOLDER="./app-2023-exp6"
|
||||||
|
APKTOOL="${SCRIPT_DIR}/apktool.jar"
|
||||||
|
ANDROZOO_KEY="${SCRIPT_DIR}/.ZOO_KEY"
|
||||||
|
OUT_DIR="app-2023-xp6.out"
|
||||||
|
|
||||||
|
app_lst=(
|
||||||
|
'00'
|
||||||
|
'01'
|
||||||
|
'02'
|
||||||
|
'03'
|
||||||
|
'04'
|
||||||
|
'05'
|
||||||
|
'06'
|
||||||
|
'07'
|
||||||
|
'08'
|
||||||
|
'09'
|
||||||
|
'10'
|
||||||
|
'11'
|
||||||
|
'12'
|
||||||
|
'13'
|
||||||
|
'14'
|
||||||
|
'15'
|
||||||
|
'16'
|
||||||
|
'17'
|
||||||
|
'18'
|
||||||
|
'19'
|
||||||
|
)
|
||||||
|
|
||||||
|
mkdir -p "${OUT_DIR}"
|
||||||
|
unzip platforms.zip -d "${PLATFORM_DIR}"
|
||||||
|
|
||||||
|
for ad in "${PLATFORM_DIR}"/**/{platform,sdk}; do
|
||||||
|
cd ${ad}
|
||||||
|
for jar in "${ad}"/*.jar; do
|
||||||
|
java -Xmx8G -jar ${APKTOOL} d "${jar}"
|
||||||
|
done
|
||||||
|
done
|
||||||
|
|
||||||
|
cd "${WD}"
|
||||||
|
|
||||||
|
sqlite3 ${DB} 'SELECT sha256 FROM data WHERE nb_def_platform_32_classes >= 1 OR nb_def_platform_33_classes >= 1 OR nb_def_platform_34_classes >= 1 OR nb_duplicate_classes>=1;' > "${LIST}"
|
||||||
|
|
||||||
|
N_CHUNK=$(python3 -c "print($(cat ${LIST} | wc -l)//20 + 1)")
|
||||||
|
rm -r "${CHUNK_FOLDER}"
|
||||||
|
mkdir "${CHUNK_FOLDER}"
|
||||||
|
split -a 2 -d -l "${N_CHUNK}" "${LIST}" "${CHUNK_FOLDER}"
|
||||||
|
|
||||||
|
worker () {
|
||||||
|
for sha in $(cat "${1}"); do
|
||||||
|
"${SCRIPT_DIR}"/venv/bin/check-platf-reder --api-key-file "${ANDROZOO_KEY}" --sha256 "${sha}" --path-platform-smali "${PLATFORM_DIR}" --apktool-jar "${APKTOOL}" --output-dir "${OUT_DIR}"
|
||||||
|
done
|
||||||
|
echo "Finished ${1}"
|
||||||
|
}
|
||||||
|
|
||||||
|
for lst in ${app_lst[@]}; do
|
||||||
|
worker "${CHUNK_FOLDER}/${lst}" &
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
|
|
||||||
|
echo 'PROCESS LAUNCHED'
|
||||||
Loading…
Add table
Add a link
Reference in a new issue