android_class_shadowing_sca.../android_class_shadowing_scanner/data.py
Jean-Marie Mineau dabf42fe9c
fix
2024-10-28 14:58:27 +01:00

86 lines
2.8 KiB
Python

import sqlite3
import gzip
import csv
from pathlib import Path
from dataclasses import dataclass, astuple, fields
from typing import Self, Iterator
@dataclass
class ApkData:
sha256: str
nb_duplicate_classes: int
nb_def_platform_32_classes: int
nb_def_platform_non_sdk_32_classes: int
nb_def_sdk_32_classes: int
nb_ref_platform_non_sdk_32_classes: int
nb_def_platform_33_classes: int
nb_def_platform_non_sdk_33_classes: int
nb_def_sdk_33_classes: int
nb_ref_platform_non_sdk_33_classes: int
nb_def_platform_34_classes: int
nb_def_platform_non_sdk_34_classes: int
nb_def_sdk_34_classes: int
nb_ref_platform_non_sdk_34_classes: int
has_classes0_dex: bool
has_classes1_dex: bool
has_classes0X_dex: bool
has_classes_dex_over_10: bool
has_non_numeric_classes_dex: bool
has_non_consecutive_classes_dex: bool
year: int = -1
vt_detection: int = -1
def to_string(self) -> str:
return "|".join(map(str, astuple(self)))
@staticmethod
def from_string(val: str) -> "ApkData":
return ApkData(
*(map(lambda f_v: f_v[1] == "True" if f_v[0].type is bool else f_v[0].type(f_v[1]), zip(fields(ApkData), val.strip().split("|")))) # type: ignore
)
def load_from_directory(
directory: Path, database: Path, androzoo_csv: Path | None = None
):
androzoo_data: dict[str, None | tuple[int, int]] = {
path.name: None for path in directory.glob("*")
}
def open_zoofile(androzoo_csv):
if androzoo_csv.name.endswith(".gz"):
return gzip.open(androzoo_csv, mode="rt", encoding="utf-8")
return androzoo_csv.open("r")
if androzoo_csv is not None:
with open_zoofile(androzoo_csv) as file:
reader = csv.DictReader(file)
for row in reader:
if row["sha256"] in androzoo_data:
androzoo_data[row["sha256"]] = (
int(row["added"].split("-")[0]), # not worth parsing the date
int(row["vt_detection"]) if row["vt_detection"] else -1,
)
def data_it():
for sha256, zoo_data in androzoo_data.items():
with (directory / sha256).open("r") as file:
data = ApkData.from_string(file.read().strip())
if zoo_data is not None:
data.year, data.vt_detection = zoo_data
yield data
save_data_in_db(database, data_it())
def save_data_in_db(database: Path, data: Iterator[ApkData]):
with sqlite3.connect(database) as conn:
conn.execute(
f"CREATE TABLE IF NOT EXISTS data({', '.join(map(lambda f: f.name,fields(ApkData)))})"
)
conn.executemany(
f"INSERT INTO data VALUES({', '.join(['?' for _ in fields(ApkData)])})",
map(astuple, data),
)