keep tweeking
This commit is contained in:
parent
fc38321d09
commit
cea77c2fc3
3 changed files with 175 additions and 27 deletions
|
|
@ -571,4 +571,4 @@ def data_mining():
|
|||
args = parser.parse_args()
|
||||
|
||||
stats(args.db, args.out, args.output_check_platform_redef)
|
||||
# analyse_sdk_redef(args.output_check_platform_redef, args.db, args.out)
|
||||
analyse_sdk_redef(args.output_check_platform_redef, args.db, args.out)
|
||||
|
|
|
|||
|
|
@ -50,8 +50,9 @@ def get_methods(sm: str, sha256: str = "") -> dict[str, list[list[str]]]:
|
|||
if (
|
||||
current_meth is not None
|
||||
and striped
|
||||
and not striped.startswith(".line ")
|
||||
and not striped.startswith(".param ")
|
||||
# and not striped.startswith(".line ")
|
||||
# and not striped.startswith(".param ")
|
||||
and not striped.startswith(".")
|
||||
):
|
||||
current_body.append(striped)
|
||||
if striped.startswith(".method "):
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ import matplotlib
|
|||
|
||||
|
||||
def stats(db: Path, out: Path, folder_plat_diff_smali: Path):
|
||||
VT_THRESH = 3
|
||||
|
||||
occ_sdk34 = {}
|
||||
occ_hid34 = {}
|
||||
|
|
@ -143,6 +144,13 @@ def stats(db: Path, out: Path, folder_plat_diff_smali: Path):
|
|||
with sqlite3.connect(db) as con:
|
||||
cur = con.cursor()
|
||||
nb_apk_all = cur.execute("SELECT COUNT(sha256) FROM data;").fetchone()[0]
|
||||
nb_mal_any = cur.execute(
|
||||
f"SELECT COUNT(sha256) FROM data WHERE vt_detection>={VT_THRESH} AND "
|
||||
" (nb_duplicate_classes>=1 OR nb_def_sdk_34_classes>=1 OR nb_def_platform_non_sdk_34_classes>=1);"
|
||||
).fetchone()[0]
|
||||
nb_mal_all = cur.execute(
|
||||
f"SELECT COUNT(sha256) FROM data WHERE vt_detection>={VT_THRESH};"
|
||||
).fetchone()[0]
|
||||
avg_target_sdk_tot = cur.execute(
|
||||
"SELECT AVG(target_sdk_version) FROM data WHERE target_sdk_version>=0;"
|
||||
).fetchone()[0]
|
||||
|
|
@ -162,6 +170,37 @@ def stats(db: Path, out: Path, folder_plat_diff_smali: Path):
|
|||
"LIMIT 1 "
|
||||
"OFFSET (SELECT COUNT(*) FROM data) / 2;"
|
||||
).fetchone()[0]
|
||||
nb_apk_any = cur.execute(
|
||||
"SELECT COUNT(sha256) FROM data WHERE "
|
||||
" (nb_duplicate_classes>=1 OR nb_def_sdk_34_classes>=1 OR nb_def_platform_non_sdk_34_classes>=1);"
|
||||
).fetchone()[0]
|
||||
avg_target_sdk_any = cur.execute(
|
||||
"SELECT AVG(target_sdk_version) FROM data WHERE target_sdk_version>=0 AND"
|
||||
" (nb_duplicate_classes>=1 OR nb_def_sdk_34_classes>=1 OR nb_def_platform_non_sdk_34_classes>=1);"
|
||||
).fetchone()[0]
|
||||
avg_min_sdk_any = cur.execute(
|
||||
"SELECT AVG(min_sdk_version) FROM data WHERE min_sdk_version>=0 AND "
|
||||
" (nb_duplicate_classes>=1 OR nb_def_sdk_34_classes>=1 OR nb_def_platform_non_sdk_34_classes>=1);"
|
||||
).fetchone()[0]
|
||||
tot_avg_any = cur.execute(
|
||||
"SELECT AVG("
|
||||
" nb_duplicate_classes+nb_def_sdk_34_classes+nb_def_platform_non_sdk_34_classes"
|
||||
") FROM data WHERE "
|
||||
" (nb_duplicate_classes>=1 OR nb_def_sdk_34_classes>=1 OR nb_def_platform_non_sdk_34_classes>=1);"
|
||||
).fetchone()[0]
|
||||
tot_median_any = cur.execute(
|
||||
"SELECT "
|
||||
" nb_duplicate_classes+nb_def_sdk_34_classes+nb_def_platform_non_sdk_34_classes "
|
||||
"FROM data WHERE "
|
||||
" (nb_duplicate_classes>=1 OR nb_def_sdk_34_classes>=1 OR nb_def_platform_non_sdk_34_classes>=1) "
|
||||
"ORDER BY "
|
||||
" nb_duplicate_classes+nb_def_sdk_34_classes+nb_def_platform_non_sdk_34_classes "
|
||||
"LIMIT 1 "
|
||||
"OFFSET ("
|
||||
" SELECT COUNT(*) FROM data WHERE "
|
||||
" (nb_duplicate_classes>=1 OR nb_def_sdk_34_classes>=1 OR nb_def_platform_non_sdk_34_classes>=1) "
|
||||
") / 2;"
|
||||
).fetchone()[0]
|
||||
data_all = []
|
||||
data_only = []
|
||||
for name, field in [
|
||||
|
|
@ -172,6 +211,9 @@ def stats(db: Path, out: Path, folder_plat_diff_smali: Path):
|
|||
nb_app, avg, avg_min_sdk = cur.execute(
|
||||
f"SELECT COUNT(sha256), AVG({field}), AVG(min_sdk_version) FROM data WHERE {field}>=1;"
|
||||
).fetchone()
|
||||
nb_mal = cur.execute(
|
||||
f"SELECT COUNT(sha256) FROM data WHERE {field}>=1 AND vt_detection>={VT_THRESH};"
|
||||
).fetchone()[0]
|
||||
avg_target_sdk = cur.execute(
|
||||
f"SELECT AVG(target_sdk_version) FROM data WHERE target_sdk_version>=0 AND {field}>=1;"
|
||||
).fetchone()[0]
|
||||
|
|
@ -202,6 +244,7 @@ def stats(db: Path, out: Path, folder_plat_diff_smali: Path):
|
|||
"avgtargetsdk": round(avg_target_sdk, 1),
|
||||
"avgminsdk": round(avg_min_sdk, 1),
|
||||
"ratioapp": round(100 * nb_app / nb_apk_all, 2),
|
||||
"ratiomal": round(100 * nb_mal / nb_app, 2),
|
||||
}
|
||||
)
|
||||
data_all.append(
|
||||
|
|
@ -214,9 +257,23 @@ def stats(db: Path, out: Path, folder_plat_diff_smali: Path):
|
|||
"avgtargetsdk": round(avg_target_sdk_tot, 1),
|
||||
"avgminsdk": round(avg_min_sdk_tot, 1),
|
||||
"ratioapp": round(100 * nb_apk_all / nb_apk_all, 2),
|
||||
"ratiomal": round(100 * nb_mal_all / nb_apk_all, 2),
|
||||
}
|
||||
)
|
||||
|
||||
data_only.append(
|
||||
{
|
||||
"method": "Total",
|
||||
"nbapp": nb_apk_any,
|
||||
"avgshadow": round(tot_avg_any, 1),
|
||||
"median": tot_median_any,
|
||||
"id": tot_prop_id,
|
||||
"avgtargetsdk": round(avg_target_sdk_any, 1),
|
||||
"avgminsdk": round(avg_min_sdk_any, 1),
|
||||
"ratioapp": round(100 * nb_apk_any / nb_apk_all, 2),
|
||||
"ratiomal": round(100 * nb_mal_any / nb_apk_any, 2),
|
||||
}
|
||||
)
|
||||
data_all.append(
|
||||
{
|
||||
"method": "Total",
|
||||
|
|
@ -227,9 +284,15 @@ def stats(db: Path, out: Path, folder_plat_diff_smali: Path):
|
|||
"avgtargetsdk": round(avg_target_sdk_tot, 1),
|
||||
"avgminsdk": round(avg_min_sdk_tot, 1),
|
||||
"ratioapp": round(100 * nb_apk_all / nb_apk_all, 2),
|
||||
"ratiomal": round(100 * nb_mal_all / nb_apk_all, 2),
|
||||
}
|
||||
)
|
||||
|
||||
print(f"NB MALWARE: {nb_mal_all} ({round(100*nb_mal_all/nb_apk_all, 2)}%)")
|
||||
print(
|
||||
f"NB MALWARE USING SHADOWING: {nb_mal_any} ({round(100*nb_mal_any/nb_apk_any, 2)}%)"
|
||||
)
|
||||
|
||||
with (out / "results_50k.csv").open("w") as fd:
|
||||
writer = csv.DictWriter(
|
||||
fd,
|
||||
|
|
@ -242,6 +305,7 @@ def stats(db: Path, out: Path, folder_plat_diff_smali: Path):
|
|||
"avgtargetsdk",
|
||||
"avgminsdk",
|
||||
"ratioapp",
|
||||
"ratiomal",
|
||||
],
|
||||
)
|
||||
writer.writeheader()
|
||||
|
|
@ -259,6 +323,7 @@ def stats(db: Path, out: Path, folder_plat_diff_smali: Path):
|
|||
"avgtargetsdk",
|
||||
"avgminsdk",
|
||||
"ratioapp",
|
||||
"ratiomal",
|
||||
],
|
||||
)
|
||||
writer.writeheader()
|
||||
|
|
@ -300,38 +365,120 @@ def stats(db: Path, out: Path, folder_plat_diff_smali: Path):
|
|||
print(f"{cl:<70} {occ_self_non_id[cl]: >5}")
|
||||
print()
|
||||
|
||||
print()
|
||||
print(f"redefined class SDK <= 7 {' '*40} occurences disctinct")
|
||||
print()
|
||||
for cl in sorted(
|
||||
filter(lambda cl: MIN_MAX_SDK[cl][0] == 7, occ_sdk34.keys()),
|
||||
key=lambda x: occ_sdk34[x],
|
||||
reverse=True,
|
||||
)[:10]:
|
||||
print(f"{cl:<70} {occ_sdk34[cl]: >5} {occ_sdk34_non_id.get(cl, 0): >5}")
|
||||
print()
|
||||
with (out / "redef_sdk_7minus.csv").open("w") as fd:
|
||||
writer = csv.DictWriter(
|
||||
fd,
|
||||
fieldnames=[
|
||||
"class",
|
||||
"occ",
|
||||
"id",
|
||||
"idper",
|
||||
],
|
||||
)
|
||||
writer.writeheader()
|
||||
print()
|
||||
print(
|
||||
f"redefined class SDK <= 7 {' '*40} occurences identique identique%"
|
||||
)
|
||||
print()
|
||||
for cl in sorted(
|
||||
filter(lambda cl: MIN_MAX_SDK[cl][0] == 7, occ_sdk34.keys()),
|
||||
key=lambda x: occ_sdk34[x],
|
||||
reverse=True,
|
||||
)[:10]:
|
||||
occ = occ_sdk34[cl]
|
||||
id = occ - occ_sdk34_non_id.get(cl, 0)
|
||||
id_per = round((100 * id) / occ, 2)
|
||||
print(f"{cl:<70} {occ: >5} {id: >5} {id_per: >5}")
|
||||
writer.writerow({"class": cl, "occ": occ, "id": id, "idper": id_per})
|
||||
print()
|
||||
|
||||
with (out / "redef_sdk_8.csv").open("w") as fd:
|
||||
writer = csv.DictWriter(
|
||||
fd,
|
||||
fieldnames=[
|
||||
"class",
|
||||
"occ",
|
||||
"id",
|
||||
"idper",
|
||||
],
|
||||
)
|
||||
writer.writeheader()
|
||||
print()
|
||||
print(
|
||||
f"redefined class SDK = 8 {' '*40} occurences identique identique%"
|
||||
)
|
||||
print()
|
||||
for cl in sorted(
|
||||
filter(lambda cl: MIN_MAX_SDK[cl][0] == 8, occ_sdk34.keys()),
|
||||
key=lambda x: occ_sdk34[x],
|
||||
reverse=True,
|
||||
)[:10]:
|
||||
occ = occ_sdk34[cl]
|
||||
id = occ - occ_sdk34_non_id.get(cl, 0)
|
||||
id_per = round((100 * id) / occ, 2)
|
||||
print(f"{cl:<70} {occ: >5} {id: >5} {id_per: >5}")
|
||||
writer.writerow({"class": cl, "occ": occ, "id": id, "idper": id_per})
|
||||
print()
|
||||
|
||||
with (out / "redef_sdk_16.csv").open("w") as fd:
|
||||
writer = csv.DictWriter(
|
||||
fd,
|
||||
fieldnames=[
|
||||
"class",
|
||||
"occ",
|
||||
"id",
|
||||
"idper",
|
||||
],
|
||||
)
|
||||
writer.writeheader()
|
||||
print()
|
||||
print(
|
||||
f"redefined class SDK = 16 {' '*40} occurences identique identique%"
|
||||
)
|
||||
print()
|
||||
for cl in sorted(
|
||||
filter(lambda cl: MIN_MAX_SDK[cl][0] == 16, occ_sdk34.keys()),
|
||||
key=lambda x: occ_sdk34[x],
|
||||
reverse=True,
|
||||
)[:10]:
|
||||
occ = occ_sdk34[cl]
|
||||
id = occ - occ_sdk34_non_id.get(cl, 0)
|
||||
id_per = round((100 * id) / occ, 2)
|
||||
print(f"{cl:<70} {occ: >5} {id: >5} {id_per: >5}")
|
||||
writer.writerow({"class": cl, "occ": occ, "id": id, "idper": id_per})
|
||||
print()
|
||||
|
||||
occ_package_hid34 = {}
|
||||
for cl, occ in occ_hid34.items():
|
||||
package = ".".join(cl.removeprefix("L").removesuffix(";").split("/")[:-1])
|
||||
if package not in occ_package_hid34:
|
||||
occ_package_hid34[package] = 0
|
||||
occ_package_hid34[package] += occ
|
||||
occ_package_hid34_non_id = {}
|
||||
for cl, occ in occ_hid34_non_id.items():
|
||||
package = ".".join(cl.removeprefix("L").removesuffix(";").split("/")[:-1])
|
||||
if package not in occ_package_hid34_non_id:
|
||||
occ_package_hid34_non_id[package] = 0
|
||||
occ_package_hid34_non_id[package] += occ
|
||||
|
||||
print()
|
||||
print(f"redefined class SDK = 8 {' '*40} occurences disctinct")
|
||||
print()
|
||||
for cl in sorted(
|
||||
filter(lambda cl: MIN_MAX_SDK[cl][0] == 8, occ_sdk34.keys()),
|
||||
key=lambda x: occ_sdk34[x],
|
||||
print("top 10 hidden package")
|
||||
for pk in sorted(
|
||||
occ_package_hid34.keys(),
|
||||
key=lambda x: occ_package_hid34[x],
|
||||
reverse=True,
|
||||
)[:10]:
|
||||
print(f"{cl:<70} {occ_sdk34[cl]: >5} {occ_sdk34_non_id.get(cl, 0): >5}")
|
||||
print(f" {pk:<70} {occ_package_hid34[pk]}")
|
||||
print()
|
||||
|
||||
print()
|
||||
print(f"redefined class SDK = 16 {' '*40} occurences disctinct")
|
||||
print()
|
||||
for cl in sorted(
|
||||
filter(lambda cl: MIN_MAX_SDK[cl][0] == 16, occ_sdk34.keys()),
|
||||
key=lambda x: occ_sdk34[x],
|
||||
print("top 10 hidden package non id")
|
||||
for pk in sorted(
|
||||
occ_package_hid34_non_id.keys(),
|
||||
key=lambda x: occ_package_hid34_non_id[x],
|
||||
reverse=True,
|
||||
)[:10]:
|
||||
print(f"{cl:<70} {occ_sdk34[cl]: >5} {occ_sdk34_non_id.get(cl, 0): >5}")
|
||||
print()
|
||||
print(f" {pk:<70} {occ_package_hid34_non_id[pk]}")
|
||||
|
||||
|
||||
def analyse_sdk_redef(folder: Path, db: Path, out: Path):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue