first commit
This commit is contained in:
commit
cd1e91bb99
287 changed files with 86425 additions and 0 deletions
193
rasta_exp/apk.py
Normal file
193
rasta_exp/apk.py
Normal file
|
@ -0,0 +1,193 @@
|
|||
import requests
|
||||
import logging
|
||||
import shutil
|
||||
import hashlib
|
||||
import json
|
||||
from utils import sha256_sum
|
||||
from enum import Enum
|
||||
from pathlib import Path
|
||||
from typing import Any, Optional
|
||||
from androguard.core.bytecodes import apk as androguard_apk # type: ignore
|
||||
|
||||
APK_INFO_FOLDER = Path(__file__).parent / "apk_info"
|
||||
if not APK_INFO_FOLDER.exists():
|
||||
APK_INFO_FOLDER.mkdir()
|
||||
|
||||
|
||||
class ApkRef:
|
||||
"""The reference to an apk. The apk it referes to can be in the androzoo repository or
|
||||
on the local file system.
|
||||
- If the app is in androzoon the app is refered to by its sha256
|
||||
- If the app is on the local file system, the app is refered to by its path
|
||||
"""
|
||||
|
||||
RefType = Enum("RefType", ["ANDROZOO", "LOCAL"])
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
type_: "ApkRef.RefType",
|
||||
sha256: Optional[str] = None,
|
||||
path: Optional[Path] = None,
|
||||
):
|
||||
self.type = type_
|
||||
self.sha256 = sha256
|
||||
if self.sha256 is not None:
|
||||
self.sha256 = self.sha256.strip().upper()
|
||||
self.path = path
|
||||
self.integrity_check()
|
||||
|
||||
def __str__(self):
|
||||
return f"APK<{str(self.type)}: sha256={self.sha256}, path={str(self.path)}>"
|
||||
|
||||
def integrity_check(self):
|
||||
"""Check if the ApkRef is coherent."""
|
||||
if self.type == ApkRef.RefType.ANDROZOO and self.sha256 is None:
|
||||
raise RuntimeError(f"Androzoo ApkRef must have a sha256: {str(self)}")
|
||||
if self.type == ApkRef.RefType.LOCAL and self.path is None:
|
||||
raise RuntimeError(f"Local APkRef must have a path: {str(self)}")
|
||||
|
||||
def get_path(self) -> Path:
|
||||
"""Return the path to the apk."""
|
||||
if self.path is None:
|
||||
raise RuntimeError(f"{str(self)} don't have a path")
|
||||
return self.path
|
||||
|
||||
def get_sha256(self) -> str:
|
||||
"""Return the sha256 of the apk."""
|
||||
if self.sha256 is None:
|
||||
if self.path is None:
|
||||
raise RuntimeError(f"Could not compute hash for {str(self)}")
|
||||
self.sha256 = sha256_sum(self.path).upper()
|
||||
return self.sha256
|
||||
|
||||
|
||||
def get_apk(apk_ref: ApkRef, path: Path, api_key: bytes):
|
||||
"""Retrieve and apk from its reference and put it at `path`.
|
||||
`api_key` is always ask because it's easier that way."""
|
||||
if apk_ref.type == ApkRef.RefType.ANDROZOO:
|
||||
downlaod_apk(apk_ref.get_sha256(), api_key, path)
|
||||
elif apk_ref.type == ApkRef.RefType.LOCAL:
|
||||
shutil.copy(apk_ref.get_path(), path)
|
||||
|
||||
|
||||
def downlaod_apk(apk_sha256: str, api_key: bytes, path: Path):
|
||||
"""Download an apk from androzoo and store it at the given location"""
|
||||
logging.debug(f"Start downloading apk {apk_sha256}")
|
||||
resp = requests.get(
|
||||
"https://androzoo.uni.lu/api/download",
|
||||
params={
|
||||
b"apikey": api_key,
|
||||
b"sha256": apk_sha256.encode("utf-8"),
|
||||
},
|
||||
)
|
||||
with path.open("bw") as file:
|
||||
file.write(resp.content)
|
||||
logging.debug(f"Finished downloading apk {apk_sha256}")
|
||||
|
||||
|
||||
def get_apk_info(apk_ref: ApkRef, api_key: bytes) -> dict[str, Any]:
|
||||
"""Return the information availables about an application"""
|
||||
apk_path = APK_INFO_FOLDER / (apk_ref.get_sha256() + ".json")
|
||||
get_apk(apk_ref, apk_path, api_key)
|
||||
info: dict[str, Any] = {}
|
||||
info["apk_size"] = apk_path.stat().st_size
|
||||
info["sha256"] = apk_ref.get_sha256()
|
||||
if apk_ref.path is not None:
|
||||
info["file"] = apk_ref.path.name
|
||||
else:
|
||||
info["file"] = None
|
||||
|
||||
try:
|
||||
apk = androguard_apk.APK(apk_path)
|
||||
info["name"] = apk.get_app_name() # redundant with pkg_name ?
|
||||
info["min_sdk"] = apk.get_min_sdk_version()
|
||||
if info["min_sdk"] is not None:
|
||||
info["min_sdk"] = int(info["min_sdk"])
|
||||
info["max_sdk"] = apk.get_max_sdk_version()
|
||||
if info["max_sdk"] is not None:
|
||||
info["max_sdk"] = int(info["max_sdk"])
|
||||
info["target_sdk"] = apk.get_target_sdk_version()
|
||||
if info["target_sdk"] is not None:
|
||||
info["target_sdk"] = int(info["target_sdk"])
|
||||
info["total_dex_size"] = sum(
|
||||
map(lambda x: len(x), apk.get_all_dex())
|
||||
) # TODO: faster to open the zip and use st_size?
|
||||
except:
|
||||
info["name"] = ""
|
||||
info["min_sdk"] = None
|
||||
info["max_sdk"] = None
|
||||
info["target_sdk"] = None
|
||||
info["total_dex_size"] = None
|
||||
apk_path.unlink()
|
||||
return info
|
||||
|
||||
|
||||
def load_apk_info(apks: list[ApkRef], androzoo_list: Path, api_key: bytes):
|
||||
"""Load the information for the provided apks (`apks` must contain the sha256 of the apk to load)
|
||||
from the androzoo_list. The information are then stored in json files"""
|
||||
logging.debug("Start extracting data from the androzoo list")
|
||||
apks_dict = {a.get_sha256().strip().upper(): a for a in apks}
|
||||
for apk in apks:
|
||||
apk_info_path = APK_INFO_FOLDER / (apk.get_sha256() + ".json")
|
||||
if apk_info_path.exists():
|
||||
del apks_dict[apk.get_sha256()]
|
||||
|
||||
with androzoo_list.open("r") as list_file:
|
||||
first_line = list_file.readline()
|
||||
entrie_names = list(map(lambda x: x.strip(), first_line.split(",")))
|
||||
sha256_index = entrie_names.index(
|
||||
"sha256"
|
||||
) # TODO: if 'sha256' is not found in the first line, we have the wrong file...
|
||||
while line := list_file.readline():
|
||||
if not apks_dict:
|
||||
break
|
||||
entries = list(map(lambda x: x.strip(), line.split(",")))
|
||||
# TODO: don't parse the entries manually...
|
||||
if len(entries) != len(entrie_names):
|
||||
entries_set = set(map(lambda x: x.upper(), entries))
|
||||
inter = entries_set.intersection(apks_dict.keys())
|
||||
if inter:
|
||||
logging.warning(
|
||||
f"The information for the apk {inter} may not be retreived from the list due to malformated line: {line}"
|
||||
)
|
||||
continue
|
||||
info: dict[str, Any] = {}
|
||||
sha256 = entries[sha256_index].upper()
|
||||
if sha256 in apks_dict:
|
||||
for (k, v) in zip(entrie_names, entries):
|
||||
info[k] = v
|
||||
if "markets" in info:
|
||||
info["markets"] = list(
|
||||
map(lambda x: x.strip(), info["markets"].split("|"))
|
||||
)
|
||||
if "apk_size" in info:
|
||||
info["apk_size"] = int(info["apk_size"])
|
||||
if "vt_detection" in info:
|
||||
info["vt_detection"] = int(info["vt_detection"])
|
||||
if "dex_size" in info:
|
||||
info["dex_size"] = int(info["dex_size"])
|
||||
if "pkg_name" in info:
|
||||
info["pkg_name"] = (
|
||||
info["pkg_name"].removeprefix('"').removesuffix('"')
|
||||
)
|
||||
apk_info_path = APK_INFO_FOLDER / (sha256 + ".json")
|
||||
info |= get_apk_info(apks_dict[sha256], api_key)
|
||||
with apk_info_path.open("w") as file:
|
||||
json.dump(info, file)
|
||||
del apks_dict[sha256]
|
||||
for apk_hash in apks_dict:
|
||||
logging.warning(
|
||||
f"The information for the apk {apk_hash} was not found in the androzoo list"
|
||||
)
|
||||
info = get_apk_info(apks_dict[apk_hash], api_key)
|
||||
for key in entrie_names:
|
||||
if key not in info:
|
||||
info[key] = None
|
||||
apk_info_path = APK_INFO_FOLDER / (apk_hash + ".json")
|
||||
info |= get_apk_info(apks_dict[apk_hash], api_key)
|
||||
with apk_info_path.open("w") as file:
|
||||
json.dump(info, file)
|
||||
|
||||
logging.debug(
|
||||
"Finished extracting the information about the apks from androzoo list"
|
||||
)
|
Loading…
Add table
Add a link
Reference in a new issue