keep tweeking
This commit is contained in:
parent
fc38321d09
commit
cea77c2fc3
3 changed files with 175 additions and 27 deletions
|
|
@ -571,4 +571,4 @@ def data_mining():
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
stats(args.db, args.out, args.output_check_platform_redef)
|
stats(args.db, args.out, args.output_check_platform_redef)
|
||||||
# analyse_sdk_redef(args.output_check_platform_redef, args.db, args.out)
|
analyse_sdk_redef(args.output_check_platform_redef, args.db, args.out)
|
||||||
|
|
|
||||||
|
|
@ -50,8 +50,9 @@ def get_methods(sm: str, sha256: str = "") -> dict[str, list[list[str]]]:
|
||||||
if (
|
if (
|
||||||
current_meth is not None
|
current_meth is not None
|
||||||
and striped
|
and striped
|
||||||
and not striped.startswith(".line ")
|
# and not striped.startswith(".line ")
|
||||||
and not striped.startswith(".param ")
|
# and not striped.startswith(".param ")
|
||||||
|
and not striped.startswith(".")
|
||||||
):
|
):
|
||||||
current_body.append(striped)
|
current_body.append(striped)
|
||||||
if striped.startswith(".method "):
|
if striped.startswith(".method "):
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@ import matplotlib
|
||||||
|
|
||||||
|
|
||||||
def stats(db: Path, out: Path, folder_plat_diff_smali: Path):
|
def stats(db: Path, out: Path, folder_plat_diff_smali: Path):
|
||||||
|
VT_THRESH = 3
|
||||||
|
|
||||||
occ_sdk34 = {}
|
occ_sdk34 = {}
|
||||||
occ_hid34 = {}
|
occ_hid34 = {}
|
||||||
|
|
@ -143,6 +144,13 @@ def stats(db: Path, out: Path, folder_plat_diff_smali: Path):
|
||||||
with sqlite3.connect(db) as con:
|
with sqlite3.connect(db) as con:
|
||||||
cur = con.cursor()
|
cur = con.cursor()
|
||||||
nb_apk_all = cur.execute("SELECT COUNT(sha256) FROM data;").fetchone()[0]
|
nb_apk_all = cur.execute("SELECT COUNT(sha256) FROM data;").fetchone()[0]
|
||||||
|
nb_mal_any = cur.execute(
|
||||||
|
f"SELECT COUNT(sha256) FROM data WHERE vt_detection>={VT_THRESH} AND "
|
||||||
|
" (nb_duplicate_classes>=1 OR nb_def_sdk_34_classes>=1 OR nb_def_platform_non_sdk_34_classes>=1);"
|
||||||
|
).fetchone()[0]
|
||||||
|
nb_mal_all = cur.execute(
|
||||||
|
f"SELECT COUNT(sha256) FROM data WHERE vt_detection>={VT_THRESH};"
|
||||||
|
).fetchone()[0]
|
||||||
avg_target_sdk_tot = cur.execute(
|
avg_target_sdk_tot = cur.execute(
|
||||||
"SELECT AVG(target_sdk_version) FROM data WHERE target_sdk_version>=0;"
|
"SELECT AVG(target_sdk_version) FROM data WHERE target_sdk_version>=0;"
|
||||||
).fetchone()[0]
|
).fetchone()[0]
|
||||||
|
|
@ -162,6 +170,37 @@ def stats(db: Path, out: Path, folder_plat_diff_smali: Path):
|
||||||
"LIMIT 1 "
|
"LIMIT 1 "
|
||||||
"OFFSET (SELECT COUNT(*) FROM data) / 2;"
|
"OFFSET (SELECT COUNT(*) FROM data) / 2;"
|
||||||
).fetchone()[0]
|
).fetchone()[0]
|
||||||
|
nb_apk_any = cur.execute(
|
||||||
|
"SELECT COUNT(sha256) FROM data WHERE "
|
||||||
|
" (nb_duplicate_classes>=1 OR nb_def_sdk_34_classes>=1 OR nb_def_platform_non_sdk_34_classes>=1);"
|
||||||
|
).fetchone()[0]
|
||||||
|
avg_target_sdk_any = cur.execute(
|
||||||
|
"SELECT AVG(target_sdk_version) FROM data WHERE target_sdk_version>=0 AND"
|
||||||
|
" (nb_duplicate_classes>=1 OR nb_def_sdk_34_classes>=1 OR nb_def_platform_non_sdk_34_classes>=1);"
|
||||||
|
).fetchone()[0]
|
||||||
|
avg_min_sdk_any = cur.execute(
|
||||||
|
"SELECT AVG(min_sdk_version) FROM data WHERE min_sdk_version>=0 AND "
|
||||||
|
" (nb_duplicate_classes>=1 OR nb_def_sdk_34_classes>=1 OR nb_def_platform_non_sdk_34_classes>=1);"
|
||||||
|
).fetchone()[0]
|
||||||
|
tot_avg_any = cur.execute(
|
||||||
|
"SELECT AVG("
|
||||||
|
" nb_duplicate_classes+nb_def_sdk_34_classes+nb_def_platform_non_sdk_34_classes"
|
||||||
|
") FROM data WHERE "
|
||||||
|
" (nb_duplicate_classes>=1 OR nb_def_sdk_34_classes>=1 OR nb_def_platform_non_sdk_34_classes>=1);"
|
||||||
|
).fetchone()[0]
|
||||||
|
tot_median_any = cur.execute(
|
||||||
|
"SELECT "
|
||||||
|
" nb_duplicate_classes+nb_def_sdk_34_classes+nb_def_platform_non_sdk_34_classes "
|
||||||
|
"FROM data WHERE "
|
||||||
|
" (nb_duplicate_classes>=1 OR nb_def_sdk_34_classes>=1 OR nb_def_platform_non_sdk_34_classes>=1) "
|
||||||
|
"ORDER BY "
|
||||||
|
" nb_duplicate_classes+nb_def_sdk_34_classes+nb_def_platform_non_sdk_34_classes "
|
||||||
|
"LIMIT 1 "
|
||||||
|
"OFFSET ("
|
||||||
|
" SELECT COUNT(*) FROM data WHERE "
|
||||||
|
" (nb_duplicate_classes>=1 OR nb_def_sdk_34_classes>=1 OR nb_def_platform_non_sdk_34_classes>=1) "
|
||||||
|
") / 2;"
|
||||||
|
).fetchone()[0]
|
||||||
data_all = []
|
data_all = []
|
||||||
data_only = []
|
data_only = []
|
||||||
for name, field in [
|
for name, field in [
|
||||||
|
|
@ -172,6 +211,9 @@ def stats(db: Path, out: Path, folder_plat_diff_smali: Path):
|
||||||
nb_app, avg, avg_min_sdk = cur.execute(
|
nb_app, avg, avg_min_sdk = cur.execute(
|
||||||
f"SELECT COUNT(sha256), AVG({field}), AVG(min_sdk_version) FROM data WHERE {field}>=1;"
|
f"SELECT COUNT(sha256), AVG({field}), AVG(min_sdk_version) FROM data WHERE {field}>=1;"
|
||||||
).fetchone()
|
).fetchone()
|
||||||
|
nb_mal = cur.execute(
|
||||||
|
f"SELECT COUNT(sha256) FROM data WHERE {field}>=1 AND vt_detection>={VT_THRESH};"
|
||||||
|
).fetchone()[0]
|
||||||
avg_target_sdk = cur.execute(
|
avg_target_sdk = cur.execute(
|
||||||
f"SELECT AVG(target_sdk_version) FROM data WHERE target_sdk_version>=0 AND {field}>=1;"
|
f"SELECT AVG(target_sdk_version) FROM data WHERE target_sdk_version>=0 AND {field}>=1;"
|
||||||
).fetchone()[0]
|
).fetchone()[0]
|
||||||
|
|
@ -202,6 +244,7 @@ def stats(db: Path, out: Path, folder_plat_diff_smali: Path):
|
||||||
"avgtargetsdk": round(avg_target_sdk, 1),
|
"avgtargetsdk": round(avg_target_sdk, 1),
|
||||||
"avgminsdk": round(avg_min_sdk, 1),
|
"avgminsdk": round(avg_min_sdk, 1),
|
||||||
"ratioapp": round(100 * nb_app / nb_apk_all, 2),
|
"ratioapp": round(100 * nb_app / nb_apk_all, 2),
|
||||||
|
"ratiomal": round(100 * nb_mal / nb_app, 2),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
data_all.append(
|
data_all.append(
|
||||||
|
|
@ -214,9 +257,23 @@ def stats(db: Path, out: Path, folder_plat_diff_smali: Path):
|
||||||
"avgtargetsdk": round(avg_target_sdk_tot, 1),
|
"avgtargetsdk": round(avg_target_sdk_tot, 1),
|
||||||
"avgminsdk": round(avg_min_sdk_tot, 1),
|
"avgminsdk": round(avg_min_sdk_tot, 1),
|
||||||
"ratioapp": round(100 * nb_apk_all / nb_apk_all, 2),
|
"ratioapp": round(100 * nb_apk_all / nb_apk_all, 2),
|
||||||
|
"ratiomal": round(100 * nb_mal_all / nb_apk_all, 2),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
data_only.append(
|
||||||
|
{
|
||||||
|
"method": "Total",
|
||||||
|
"nbapp": nb_apk_any,
|
||||||
|
"avgshadow": round(tot_avg_any, 1),
|
||||||
|
"median": tot_median_any,
|
||||||
|
"id": tot_prop_id,
|
||||||
|
"avgtargetsdk": round(avg_target_sdk_any, 1),
|
||||||
|
"avgminsdk": round(avg_min_sdk_any, 1),
|
||||||
|
"ratioapp": round(100 * nb_apk_any / nb_apk_all, 2),
|
||||||
|
"ratiomal": round(100 * nb_mal_any / nb_apk_any, 2),
|
||||||
|
}
|
||||||
|
)
|
||||||
data_all.append(
|
data_all.append(
|
||||||
{
|
{
|
||||||
"method": "Total",
|
"method": "Total",
|
||||||
|
|
@ -227,9 +284,15 @@ def stats(db: Path, out: Path, folder_plat_diff_smali: Path):
|
||||||
"avgtargetsdk": round(avg_target_sdk_tot, 1),
|
"avgtargetsdk": round(avg_target_sdk_tot, 1),
|
||||||
"avgminsdk": round(avg_min_sdk_tot, 1),
|
"avgminsdk": round(avg_min_sdk_tot, 1),
|
||||||
"ratioapp": round(100 * nb_apk_all / nb_apk_all, 2),
|
"ratioapp": round(100 * nb_apk_all / nb_apk_all, 2),
|
||||||
|
"ratiomal": round(100 * nb_mal_all / nb_apk_all, 2),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
print(f"NB MALWARE: {nb_mal_all} ({round(100*nb_mal_all/nb_apk_all, 2)}%)")
|
||||||
|
print(
|
||||||
|
f"NB MALWARE USING SHADOWING: {nb_mal_any} ({round(100*nb_mal_any/nb_apk_any, 2)}%)"
|
||||||
|
)
|
||||||
|
|
||||||
with (out / "results_50k.csv").open("w") as fd:
|
with (out / "results_50k.csv").open("w") as fd:
|
||||||
writer = csv.DictWriter(
|
writer = csv.DictWriter(
|
||||||
fd,
|
fd,
|
||||||
|
|
@ -242,6 +305,7 @@ def stats(db: Path, out: Path, folder_plat_diff_smali: Path):
|
||||||
"avgtargetsdk",
|
"avgtargetsdk",
|
||||||
"avgminsdk",
|
"avgminsdk",
|
||||||
"ratioapp",
|
"ratioapp",
|
||||||
|
"ratiomal",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
writer.writeheader()
|
writer.writeheader()
|
||||||
|
|
@ -259,6 +323,7 @@ def stats(db: Path, out: Path, folder_plat_diff_smali: Path):
|
||||||
"avgtargetsdk",
|
"avgtargetsdk",
|
||||||
"avgminsdk",
|
"avgminsdk",
|
||||||
"ratioapp",
|
"ratioapp",
|
||||||
|
"ratiomal",
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
writer.writeheader()
|
writer.writeheader()
|
||||||
|
|
@ -300,38 +365,120 @@ def stats(db: Path, out: Path, folder_plat_diff_smali: Path):
|
||||||
print(f"{cl:<70} {occ_self_non_id[cl]: >5}")
|
print(f"{cl:<70} {occ_self_non_id[cl]: >5}")
|
||||||
print()
|
print()
|
||||||
|
|
||||||
print()
|
with (out / "redef_sdk_7minus.csv").open("w") as fd:
|
||||||
print(f"redefined class SDK <= 7 {' '*40} occurences disctinct")
|
writer = csv.DictWriter(
|
||||||
print()
|
fd,
|
||||||
for cl in sorted(
|
fieldnames=[
|
||||||
filter(lambda cl: MIN_MAX_SDK[cl][0] == 7, occ_sdk34.keys()),
|
"class",
|
||||||
key=lambda x: occ_sdk34[x],
|
"occ",
|
||||||
reverse=True,
|
"id",
|
||||||
)[:10]:
|
"idper",
|
||||||
print(f"{cl:<70} {occ_sdk34[cl]: >5} {occ_sdk34_non_id.get(cl, 0): >5}")
|
],
|
||||||
print()
|
)
|
||||||
|
writer.writeheader()
|
||||||
|
print()
|
||||||
|
print(
|
||||||
|
f"redefined class SDK <= 7 {' '*40} occurences identique identique%"
|
||||||
|
)
|
||||||
|
print()
|
||||||
|
for cl in sorted(
|
||||||
|
filter(lambda cl: MIN_MAX_SDK[cl][0] == 7, occ_sdk34.keys()),
|
||||||
|
key=lambda x: occ_sdk34[x],
|
||||||
|
reverse=True,
|
||||||
|
)[:10]:
|
||||||
|
occ = occ_sdk34[cl]
|
||||||
|
id = occ - occ_sdk34_non_id.get(cl, 0)
|
||||||
|
id_per = round((100 * id) / occ, 2)
|
||||||
|
print(f"{cl:<70} {occ: >5} {id: >5} {id_per: >5}")
|
||||||
|
writer.writerow({"class": cl, "occ": occ, "id": id, "idper": id_per})
|
||||||
|
print()
|
||||||
|
|
||||||
|
with (out / "redef_sdk_8.csv").open("w") as fd:
|
||||||
|
writer = csv.DictWriter(
|
||||||
|
fd,
|
||||||
|
fieldnames=[
|
||||||
|
"class",
|
||||||
|
"occ",
|
||||||
|
"id",
|
||||||
|
"idper",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
writer.writeheader()
|
||||||
|
print()
|
||||||
|
print(
|
||||||
|
f"redefined class SDK = 8 {' '*40} occurences identique identique%"
|
||||||
|
)
|
||||||
|
print()
|
||||||
|
for cl in sorted(
|
||||||
|
filter(lambda cl: MIN_MAX_SDK[cl][0] == 8, occ_sdk34.keys()),
|
||||||
|
key=lambda x: occ_sdk34[x],
|
||||||
|
reverse=True,
|
||||||
|
)[:10]:
|
||||||
|
occ = occ_sdk34[cl]
|
||||||
|
id = occ - occ_sdk34_non_id.get(cl, 0)
|
||||||
|
id_per = round((100 * id) / occ, 2)
|
||||||
|
print(f"{cl:<70} {occ: >5} {id: >5} {id_per: >5}")
|
||||||
|
writer.writerow({"class": cl, "occ": occ, "id": id, "idper": id_per})
|
||||||
|
print()
|
||||||
|
|
||||||
|
with (out / "redef_sdk_16.csv").open("w") as fd:
|
||||||
|
writer = csv.DictWriter(
|
||||||
|
fd,
|
||||||
|
fieldnames=[
|
||||||
|
"class",
|
||||||
|
"occ",
|
||||||
|
"id",
|
||||||
|
"idper",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
writer.writeheader()
|
||||||
|
print()
|
||||||
|
print(
|
||||||
|
f"redefined class SDK = 16 {' '*40} occurences identique identique%"
|
||||||
|
)
|
||||||
|
print()
|
||||||
|
for cl in sorted(
|
||||||
|
filter(lambda cl: MIN_MAX_SDK[cl][0] == 16, occ_sdk34.keys()),
|
||||||
|
key=lambda x: occ_sdk34[x],
|
||||||
|
reverse=True,
|
||||||
|
)[:10]:
|
||||||
|
occ = occ_sdk34[cl]
|
||||||
|
id = occ - occ_sdk34_non_id.get(cl, 0)
|
||||||
|
id_per = round((100 * id) / occ, 2)
|
||||||
|
print(f"{cl:<70} {occ: >5} {id: >5} {id_per: >5}")
|
||||||
|
writer.writerow({"class": cl, "occ": occ, "id": id, "idper": id_per})
|
||||||
|
print()
|
||||||
|
|
||||||
|
occ_package_hid34 = {}
|
||||||
|
for cl, occ in occ_hid34.items():
|
||||||
|
package = ".".join(cl.removeprefix("L").removesuffix(";").split("/")[:-1])
|
||||||
|
if package not in occ_package_hid34:
|
||||||
|
occ_package_hid34[package] = 0
|
||||||
|
occ_package_hid34[package] += occ
|
||||||
|
occ_package_hid34_non_id = {}
|
||||||
|
for cl, occ in occ_hid34_non_id.items():
|
||||||
|
package = ".".join(cl.removeprefix("L").removesuffix(";").split("/")[:-1])
|
||||||
|
if package not in occ_package_hid34_non_id:
|
||||||
|
occ_package_hid34_non_id[package] = 0
|
||||||
|
occ_package_hid34_non_id[package] += occ
|
||||||
|
|
||||||
print()
|
print()
|
||||||
print(f"redefined class SDK = 8 {' '*40} occurences disctinct")
|
print("top 10 hidden package")
|
||||||
print()
|
for pk in sorted(
|
||||||
for cl in sorted(
|
occ_package_hid34.keys(),
|
||||||
filter(lambda cl: MIN_MAX_SDK[cl][0] == 8, occ_sdk34.keys()),
|
key=lambda x: occ_package_hid34[x],
|
||||||
key=lambda x: occ_sdk34[x],
|
|
||||||
reverse=True,
|
reverse=True,
|
||||||
)[:10]:
|
)[:10]:
|
||||||
print(f"{cl:<70} {occ_sdk34[cl]: >5} {occ_sdk34_non_id.get(cl, 0): >5}")
|
print(f" {pk:<70} {occ_package_hid34[pk]}")
|
||||||
print()
|
print()
|
||||||
|
|
||||||
print()
|
print()
|
||||||
print(f"redefined class SDK = 16 {' '*40} occurences disctinct")
|
print("top 10 hidden package non id")
|
||||||
print()
|
for pk in sorted(
|
||||||
for cl in sorted(
|
occ_package_hid34_non_id.keys(),
|
||||||
filter(lambda cl: MIN_MAX_SDK[cl][0] == 16, occ_sdk34.keys()),
|
key=lambda x: occ_package_hid34_non_id[x],
|
||||||
key=lambda x: occ_sdk34[x],
|
|
||||||
reverse=True,
|
reverse=True,
|
||||||
)[:10]:
|
)[:10]:
|
||||||
print(f"{cl:<70} {occ_sdk34[cl]: >5} {occ_sdk34_non_id.get(cl, 0): >5}")
|
print(f" {pk:<70} {occ_package_hid34_non_id[pk]}")
|
||||||
print()
|
|
||||||
|
|
||||||
|
|
||||||
def analyse_sdk_redef(folder: Path, db: Path, out: Path):
|
def analyse_sdk_redef(folder: Path, db: Path, out: Path):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue