rasta/rasta_data_manipulation/rasta_triturage/apk.py
Jean-Marie Mineau cd1e91bb99
first commit
2023-11-16 14:30:24 +01:00

115 lines
3 KiB
Python

"""
Collect data about apks.
"""
import dateutil.parser as dp # type: ignore
import datetime
import numpy as np
import matplotlib.pyplot as plt # type: ignore
from typing import Any, IO, Callable
from pathlib import Path
from .utils import render
def plot_apk_info_by_generic_x(
data: list[Any],
x: str,
title: str,
extract_propertie: Callable,
y_label: str,
x_label: str | None = None,
reductions: dict[str, Callable] | None = None,
xscale: str = "linear",
interactive: bool = True,
image_path: Path | None = None,
):
"""`extract_propertie` is a founction that take a list of element and return
a value representing the value of the list, like a median or a mean.
"""
raise NotImplementedError("TODO: update function to use sqlite3")
# groupped = group_by(x, data, reductions=reductions)
# properties = {k: extract_propertie(v) for k, v in groupped.items()}
# if x_label is None:
# x_label = x
# x_values = list(set(filter(lambda x: x is not None, properties.keys())))
# x_values.sort()
# y_values = [properties[x] for x in x_values]
#
# plt.figure(figsize=(16, 9), dpi=80)
# plt.plot(x_values, y_values)
# plt.xscale(xscale)
# # plt.ylim([-5, 105])
# # plt.legend()
# plt.xlabel(x_label)
# plt.ylabel(y_label)
# render(title, interactive, image_path)
#
def plot_apk_size(
apk_data: list[Any],
interactive: bool = True,
image_path: Path | None = None,
):
sizes = np.array([e["total_dex_size"] for e in apk_data]) / 1024 / 1024
sizes.sort()
plt.figure(figsize=(16, 9), dpi=80)
plt.bar(np.arange(len(sizes)), sizes)
plt.ylabel("Bytecode size (MiB)")
plt.tick_params(
axis="x",
which="both",
bottom=False,
top=False,
labelbottom=False,
)
for s in range(7, 13):
plt.axhline(y=(4**s) / 1024 / 1024, color="r", linestyle=":")
render("Bytecode size of the apks", interactive, image_path)
def plot_apk_size_hl_subset(
apk_data: list[Any],
subset_sha: list[str],
title: str,
interactive: bool = True,
image_path: Path | None = None,
):
apk_data.sort(key=lambda x: x["total_dex_size"])
sizes = (
np.array(
[
e["total_dex_size"] if e["sha256"] not in subset_sha else 0
for e in apk_data
]
)
/ 1024
/ 1024
)
sizes_hl = (
np.array(
[e["total_dex_size"] if e["sha256"] in subset_sha else 0 for e in apk_data]
)
/ 1024
/ 1024
)
plt.figure(figsize=(16, 9), dpi=80)
plt.bar(np.arange(len(sizes)), sizes, edgecolor="black")
plt.bar(
np.arange(len(sizes)), sizes_hl, color="#D55E00", hatch="x", edgecolor="black"
)
plt.ylabel("Bytecode size (MiB)")
plt.tick_params(
axis="x",
which="both",
bottom=False,
top=False,
labelbottom=False,
)
for s in range(7, 13):
plt.axhline(y=(4**s) / 1024 / 1024, color="r", linestyle=":")
render(title, interactive, image_path)