86 lines
2.8 KiB
Python
86 lines
2.8 KiB
Python
import sqlite3
|
|
import gzip
|
|
import csv
|
|
|
|
from pathlib import Path
|
|
from dataclasses import dataclass, astuple, fields
|
|
from typing import Self, Iterator
|
|
|
|
|
|
@dataclass
|
|
class ApkData:
|
|
sha256: str
|
|
nb_duplicate_classes: int
|
|
nb_def_platform_32_classes: int
|
|
nb_def_platform_non_sdk_32_classes: int
|
|
nb_def_sdk_32_classes: int
|
|
nb_ref_platform_non_sdk_32_classes: int
|
|
nb_def_platform_33_classes: int
|
|
nb_def_platform_non_sdk_33_classes: int
|
|
nb_def_sdk_33_classes: int
|
|
nb_ref_platform_non_sdk_33_classes: int
|
|
nb_def_platform_34_classes: int
|
|
nb_def_platform_non_sdk_34_classes: int
|
|
nb_def_sdk_34_classes: int
|
|
nb_ref_platform_non_sdk_34_classes: int
|
|
has_classes0_dex: bool
|
|
has_classes1_dex: bool
|
|
has_classes0X_dex: bool
|
|
has_classes_dex_over_10: bool
|
|
has_non_numeric_classes_dex: bool
|
|
has_non_consecutive_classes_dex: bool
|
|
year: int = -1
|
|
vt_detection: int = -1
|
|
|
|
def to_string(self) -> str:
|
|
return "|".join(map(str, astuple(self)))
|
|
|
|
@staticmethod
|
|
def from_string(val: str) -> "ApkData":
|
|
return ApkData(
|
|
*(map(lambda f_v: f_v[1] == "True" if f_v[0].type is bool else f_v[0].type(f_v[1]), zip(fields(ApkData), val.strip().split("|")))) # type: ignore
|
|
)
|
|
|
|
|
|
def load_from_directory(
|
|
directory: Path, database: Path, androzoo_csv: Path | None = None
|
|
):
|
|
androzoo_data: dict[str, None | tuple[int, int]] = {
|
|
path.name: None for path in directory.glob("*")
|
|
}
|
|
|
|
def open_zoofile(androzoo_csv):
|
|
if androzoo_csv.name.endswith(".gz"):
|
|
return gzip.open(androzoo_csv, mode="rt", encoding="utf-8")
|
|
return androzoo_csv.open("r")
|
|
|
|
if androzoo_csv is not None:
|
|
with open_zoofile(androzoo_csv) as file:
|
|
reader = csv.DictReader(file)
|
|
for row in reader:
|
|
if row["sha256"] in androzoo_data:
|
|
androzoo_data[row["sha256"]] = (
|
|
int(row["added"].split("-")[0]), # not worth parsing the date
|
|
int(row["vt_detection"]) if row["vt_detection"] else -1,
|
|
)
|
|
|
|
def data_it():
|
|
for sha256, zoo_data in androzoo_data.items():
|
|
with (directory / sha256).open("r") as file:
|
|
data = ApkData.from_string(file.read().strip())
|
|
if zoo_data is not None:
|
|
data.year, data.vt_detection = zoo_data
|
|
yield data
|
|
|
|
save_data_in_db(database, data_it())
|
|
|
|
|
|
def save_data_in_db(database: Path, data: Iterator[ApkData]):
|
|
with sqlite3.connect(database) as conn:
|
|
conn.execute(
|
|
f"CREATE TABLE IF NOT EXISTS data({', '.join(map(lambda f: f.name,fields(ApkData)))})"
|
|
)
|
|
conn.executemany(
|
|
f"INSERT INTO data VALUES({', '.join(['?' for _ in fields(ApkData)])})",
|
|
map(astuple, data),
|
|
)
|