first commit
This commit is contained in:
commit
cd1e91bb99
287 changed files with 86425 additions and 0 deletions
115
rasta_data_manipulation/rasta_triturage/apk.py
Normal file
115
rasta_data_manipulation/rasta_triturage/apk.py
Normal file
|
@ -0,0 +1,115 @@
|
|||
"""
|
||||
Collect data about apks.
|
||||
"""
|
||||
|
||||
import dateutil.parser as dp # type: ignore
|
||||
import datetime
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt # type: ignore
|
||||
|
||||
from typing import Any, IO, Callable
|
||||
from pathlib import Path
|
||||
|
||||
from .utils import render
|
||||
|
||||
|
||||
def plot_apk_info_by_generic_x(
|
||||
data: list[Any],
|
||||
x: str,
|
||||
title: str,
|
||||
extract_propertie: Callable,
|
||||
y_label: str,
|
||||
x_label: str | None = None,
|
||||
reductions: dict[str, Callable] | None = None,
|
||||
xscale: str = "linear",
|
||||
interactive: bool = True,
|
||||
image_path: Path | None = None,
|
||||
):
|
||||
"""`extract_propertie` is a founction that take a list of element and return
|
||||
a value representing the value of the list, like a median or a mean.
|
||||
"""
|
||||
raise NotImplementedError("TODO: update function to use sqlite3")
|
||||
|
||||
|
||||
# groupped = group_by(x, data, reductions=reductions)
|
||||
# properties = {k: extract_propertie(v) for k, v in groupped.items()}
|
||||
# if x_label is None:
|
||||
# x_label = x
|
||||
# x_values = list(set(filter(lambda x: x is not None, properties.keys())))
|
||||
# x_values.sort()
|
||||
# y_values = [properties[x] for x in x_values]
|
||||
#
|
||||
# plt.figure(figsize=(16, 9), dpi=80)
|
||||
# plt.plot(x_values, y_values)
|
||||
# plt.xscale(xscale)
|
||||
# # plt.ylim([-5, 105])
|
||||
# # plt.legend()
|
||||
# plt.xlabel(x_label)
|
||||
# plt.ylabel(y_label)
|
||||
# render(title, interactive, image_path)
|
||||
#
|
||||
|
||||
|
||||
def plot_apk_size(
|
||||
apk_data: list[Any],
|
||||
interactive: bool = True,
|
||||
image_path: Path | None = None,
|
||||
):
|
||||
sizes = np.array([e["total_dex_size"] for e in apk_data]) / 1024 / 1024
|
||||
sizes.sort()
|
||||
plt.figure(figsize=(16, 9), dpi=80)
|
||||
plt.bar(np.arange(len(sizes)), sizes)
|
||||
plt.ylabel("Bytecode size (MiB)")
|
||||
plt.tick_params(
|
||||
axis="x",
|
||||
which="both",
|
||||
bottom=False,
|
||||
top=False,
|
||||
labelbottom=False,
|
||||
)
|
||||
for s in range(7, 13):
|
||||
plt.axhline(y=(4**s) / 1024 / 1024, color="r", linestyle=":")
|
||||
render("Bytecode size of the apks", interactive, image_path)
|
||||
|
||||
|
||||
def plot_apk_size_hl_subset(
|
||||
apk_data: list[Any],
|
||||
subset_sha: list[str],
|
||||
title: str,
|
||||
interactive: bool = True,
|
||||
image_path: Path | None = None,
|
||||
):
|
||||
apk_data.sort(key=lambda x: x["total_dex_size"])
|
||||
sizes = (
|
||||
np.array(
|
||||
[
|
||||
e["total_dex_size"] if e["sha256"] not in subset_sha else 0
|
||||
for e in apk_data
|
||||
]
|
||||
)
|
||||
/ 1024
|
||||
/ 1024
|
||||
)
|
||||
sizes_hl = (
|
||||
np.array(
|
||||
[e["total_dex_size"] if e["sha256"] in subset_sha else 0 for e in apk_data]
|
||||
)
|
||||
/ 1024
|
||||
/ 1024
|
||||
)
|
||||
plt.figure(figsize=(16, 9), dpi=80)
|
||||
plt.bar(np.arange(len(sizes)), sizes, edgecolor="black")
|
||||
plt.bar(
|
||||
np.arange(len(sizes)), sizes_hl, color="#D55E00", hatch="x", edgecolor="black"
|
||||
)
|
||||
plt.ylabel("Bytecode size (MiB)")
|
||||
plt.tick_params(
|
||||
axis="x",
|
||||
which="both",
|
||||
bottom=False,
|
||||
top=False,
|
||||
labelbottom=False,
|
||||
)
|
||||
for s in range(7, 13):
|
||||
plt.axhline(y=(4**s) / 1024 / 1024, color="r", linestyle=":")
|
||||
render(title, interactive, image_path)
|
Loading…
Add table
Add a link
Reference in a new issue