From 48ec30204d3c8683ac0badbe90510065abf5b031 Mon Sep 17 00:00:00 2001 From: Jean-Marie Mineau Date: Tue, 22 Oct 2024 11:48:42 +0200 Subject: [PATCH] add scan collection --- android_class_shadowing_scanner/__init__.py | 32 ++++++++++++- android_class_shadowing_scanner/data.py | 53 ++++++++++++++++++++- pyproject.toml | 1 + 3 files changed, 83 insertions(+), 3 deletions(-) diff --git a/android_class_shadowing_scanner/__init__.py b/android_class_shadowing_scanner/__init__.py index bc57026..f824b3c 100644 --- a/android_class_shadowing_scanner/__init__.py +++ b/android_class_shadowing_scanner/__init__.py @@ -109,13 +109,13 @@ def main(): # Case 2: apk from SHA256 sha256s = [] if args.sha256: - sha256s.append(args.sha256) + sha256s.append(args.sha256.upper()) if args.sha256_list: with args.sha256_list.open("r") as file: for line in file: if not line.strip(): continue - sha256s.append(line.strip()) + sha256s.append(line.strip().upper()) api_key = "" if args.api_key: @@ -151,3 +151,31 @@ def main(): else: with (args.output_dir / sha256).open("w") as file: file.write(entry) + + +def collect_to_db(): + parser = ArgumentParser( + prog="Android Class Shadowing Scan Collector", + description="Collect Scan results into a database", + ) + apk_parser = parser.add_mutually_exclusive_group(required=True) + parser.add_argument( + "--dir", + help="The directory where the scan results are", + type=Path, + required=True, + ) + parser.add_argument( + "--db", + help="Path to the database", + type=Path, + required=True, + ) + parser.add_argument( + "--androzoo-list", + help="The file 'latest.csv' or 'latest.csv.gz' from androzoo", + type=Path, + required=False, + ) + args = parser.parse_args() + load_from_directory(args.dir, args.db, args.androzoo_list) diff --git a/android_class_shadowing_scanner/data.py b/android_class_shadowing_scanner/data.py index 2272302..946b8b5 100644 --- a/android_class_shadowing_scanner/data.py +++ b/android_class_shadowing_scanner/data.py @@ -1,5 +1,10 @@ +import sqlite3 +import gzip +import csv + +from pathlib import Path from dataclasses import dataclass, astuple, fields -from typing import Self +from typing import Self, Iterator @dataclass @@ -24,6 +29,8 @@ class ApkData: has_classes_dex_over_10: bool has_non_numeric_classes_dex: bool has_non_consecutive_classes_dex: bool + year: int = -1 + vt_detection: int = -1 def to_string(self) -> str: return "|".join(map(str, astuple(self))) @@ -33,3 +40,47 @@ class ApkData: return ApkData( *(map(lambda f_v: f_v[1] == "True" if f_v[0].type is bool else f_v[0].type(f_v[1]), zip(fields(ApkData), val.strip().split("|")))) # type: ignore ) + + +def load_from_directory( + directory: Path, database: Path, androzoo_csv: Path | None = None +): + androzoo_data: dict[str, None | tuple[int, int]] = { + path.name: None for path in directory.glob("*") + } + + def open_zoofile(androzoo_csv): + if androzoo_csv.name.endswith(".gz"): + return gzip.open(androzoo_csv, mode="rt", encoding="utf-8") + return androzoo_csv.open("r") + + if androzoo_csv is not None: + with open_zoofile(androzoo_csv) as file: + reader = csv.DictReader(file) + for row in reader: + if row["sha256"] in androzoo_data: + androzoo_data[row["sha256"]] = ( + int(row["first_seen_year"]), + int(row["vt_detection"]), + ) + + def data_it(): + for sha256, zoo_data in androzoo_data.items(): + with (directory / sha256).open("r") as file: + data = ApkData.from_string(file.read().strip()) + if zoo_data is not None: + data.year, data.vt_detection = zoo_data + yield data + + save_data_in_db(database, data_it()) + + +def save_data_in_db(database: Path, data: Iterator[ApkData]): + with sqlite3.connect(database) as conn: + conn.execute( + f"CREATE TABLE IF NOT EXISTS data({', '.join(map(lambda f: f.name,fields(ApkData)))})" + ) + conn.executemany( + f"INSERT INTO data VALUES({', '.join(['?' for _ in fields(ApkData)])})", + map(astuple, data), + ) diff --git a/pyproject.toml b/pyproject.toml index d1693d9..4a14ffd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,3 +20,4 @@ build-backend = "poetry.core.masonry.api" [tool.poetry.scripts] scan = 'android_class_shadowing_scanner.__init__:main' +collect-scan = 'android_class_shadowing_scanner.__init__:collect_to_db'