add function to compare result of two experiment

This commit is contained in:
Jean-Marie Mineau 2025-09-07 17:08:42 +02:00
parent 1309d7ea24
commit 0d8ad49c94
Signed by: histausse
GPG key ID: B66AEEDA9B645AD2
5 changed files with 255 additions and 11 deletions

1
.gitignore vendored
View file

@ -1 +1,2 @@
data
*.db

View file

@ -1,15 +1,15 @@
[tool.poetry]
name = "rasta_triturage"
version = "0.2.0"
version = "0.2.1"
description = "'Triturage de donnée' for the Rasta Project"
authors = ["anon"]
authors = ["Jean-Marie Mineau <rasta-github@jean-marie.mineau.eu>"]
readme = "README.md"
#homepage = ""
#repository = ""
license = "Proprietary"
homepage = "https://github.com/histausse/rasta/tree/main"
repository = "https://github.com/histausse/rasta/tree/main"
license = "GPLv3"
[tool.poetry.urls]
#"Bug Tracker" = ""
"Bug Tracker" = "https://github.com/histausse/rasta/issues"
[tool.poetry.dependencies]
python = "^3.10"
@ -50,6 +50,7 @@ rasta-decorelate-factor = "rasta_triturage.cli:plot_decorelated_factor"
rasta-count-error-stacks = "rasta_triturage.cli:count_error_stacks"
rasta-gen-dataset = "rasta_triturage.cli:generate_dataset"
rasta-size-malware = "rasta_triturage.cli:size_malware"
rasta-compare-status = "rasta_triturage.cli:compare_status_by_tool"
[tool.poetry.group.dev.dependencies]
pytest = "*"

View file

@ -17,6 +17,7 @@ from .status import (
plot_status_by_tool_and_malware,
plot_all_status_by_generic_x,
plot_status_by_generic_x,
plot_compare_status,
)
from .apk import (
plot_apk_info_by_generic_x,
@ -577,9 +578,9 @@ def ic3():
ic3_venn(args.data, interactive=args.display, image_path=args.figures_file)
ic3_errors(
args.data,
file=args.figures_file / "ic3_err.csv"
if args.figures_file is not None
else None,
file=(
args.figures_file / "ic3_err.csv" if args.figures_file is not None else None
),
)
@ -1127,3 +1128,64 @@ def size_malware():
print(
f"{size}, {size_apk[(size, True)]:.2f}, {size_apk[(size, False)]:.2f}, {finishing_rate_goodware:.2f}, {finishing_rate_malware:.2f}, {size_apk[(size, True)] / size_apk[(size, False)]:.2f}, {finishing_rate_goodware/finishing_rate_malware:.2f}"
)
def compare_status_by_tool():
"""Compare the repartition of status by tool from two result dbs"""
parser = argparse.ArgumentParser(
prog=sys.argv[0],
description="Compare the repartition of status by tool from two result dbs",
)
parser.add_argument(
"-d1",
"--data1",
required=True,
type=Path,
help="The sqlite3 database that contain the execution report of the first experiment",
)
parser.add_argument(
"-d2",
"--data2",
required=True,
type=Path,
help="The sqlite3 database that contain the execution report of the second experiment",
)
parser.add_argument(
"-f",
"--figures-file",
type=Path,
help="The folder in which the figures must be stored",
)
parser.add_argument(
"--display",
action="store_true",
help="If the figures must be displayed",
)
parser.add_argument(
"-t",
"--tools",
nargs="+",
default=None,
help="The tools to analyse",
)
parser.add_argument(
"--title",
default="Comparision of Exit Status",
help="The title of the graph",
)
parser.add_argument(
"--same-apks",
action="store_true",
help="If the apks are the same in the two databases. If so, the missings applications will be shown.",
)
args = parser.parse_args()
plot_compare_status(
args.data1,
args.data2,
interactive=args.display,
image_path=args.figures_file,
tools=args.tools,
same_apks=args.same_apks,
)

View file

@ -444,3 +444,179 @@ def plot_all_status_by_generic_x(
interactive=args.display,
image_path=args.figures_file,
)
def plot_compare_status(
db1: Path,
db2: Path,
interactive: bool = True,
image_path: Path | None = None,
tools: list[str] | None = None,
title: str = "Comparision of Exit Status",
same_apks: bool = False,
):
"""Plot and compare repartition of status by tools from two experiment.
db1 and db2 are the path to two result sqlite databases to compare
image_path is where to save the result
tools is the list of tools to compare, default will compare all tools found.
title is the title of the figure
same_apks indicate if the two databases uses the same apks. If so, the missing apks will be displayed in the plot.
"""
if tools is None:
tools = list(set(get_list_tools(db1)) | set(get_list_tools(db2)))
tools_list_format = f"({','.join(['?' for _ in tools])})"
with sqlite3.connect(db1) as con:
cur = con.cursor()
status_1 = cur.execute(
(
"SELECT tool_name, tool_status, COUNT(sha256) "
"FROM exec "
f"WHERE tool_name IN {tools_list_format} "
"GROUP BY tool_name, tool_status;"
),
tools,
).fetchall()
apk_1 = set(cur.execute("SELECT sha256 FROM exec;").fetchall())
with sqlite3.connect(db2) as con:
cur = con.cursor()
status_2 = cur.execute(
(
"SELECT tool_name, tool_status, COUNT(sha256) "
"FROM exec "
f"WHERE tool_name IN {tools_list_format} "
"GROUP BY tool_name, tool_status;"
),
tools,
).fetchall()
apk_2 = set(cur.execute("SELECT sha256 FROM exec;").fetchall())
occurences = {}
for tool, stat, occurence in status_1:
occurences[(tool, stat, "db1")] = occurence
for tool, stat, occurence in status_2:
occurences[(tool, stat, "db2")] = occurence
# tools.sort(
# key=lambda t: occurences.get((t, "FINISHED", "db1"), 0)
# + occurences.get((t, "FINISHED", "db2"), 0),
# reverse=True,
# )
tools.sort()
values = {
"Finished": np.zeros(len(tools) * 2),
"Time Out": np.zeros(len(tools) * 2),
"Other": np.zeros(len(tools) * 2),
"Failed": np.zeros(len(tools) * 2),
}
nb_apk_tot = len(apk_1 | apk_2)
if same_apks:
nb_apk_1 = nb_apk_tot
nb_apk_2 = nb_apk_tot
missing_1 = len(apk_2 - apk_1)
missing_2 = len(apk_1 - apk_2)
values["Missing"] = np.zeros(len(tools) * 2)
for i in range(len(tools)):
values["Missing"][2 * i] = (missing_1 * 100) / nb_apk_1
values["Missing"][2 * i + 1] = (missing_2 * 100) / nb_apk_2
else:
nb_apk_1 = len(apk_1)
nb_apk_2 = len(apk_2)
colors = {
"Finished": "#009E73",
"Time Out": "#56B4E9",
"Other": "#555555", # TODO: find beter color
"Failed": "#D55E00",
"Missing": "#555555",
}
hatch = {
"Finished": "/",
"Time Out": "x",
"Other": ".",
"Failed": "\\",
"Missing": "-",
}
for i, tool in enumerate(tools):
i_1 = 2 * i
i_2 = 2 * i + 1
values["Finished"][i_1] = occurences.get((tool, "FINISHED", "db1"), 0)
values["Finished"][i_2] = occurences.get((tool, "FINISHED", "db2"), 0)
values["Time Out"][i_1] = occurences.get((tool, "TIMEOUT", "db1"), 0)
values["Time Out"][i_2] = occurences.get((tool, "TIMEOUT", "db2"), 0)
values["Failed"][i_1] = occurences.get((tool, "FAILED", "db1"), 0)
values["Failed"][i_2] = occurences.get((tool, "FAILED", "db2"), 0)
values["Other"][i_1] = (
len(apk_1)
- values["Finished"][i_1]
- values["Time Out"][i_1]
- values["Failed"][i_1]
)
values["Other"][i_2] = (
len(apk_2)
- values["Finished"][i_2]
- values["Time Out"][i_2]
- values["Failed"][i_2]
)
values["Finished"][i_1] = (
0 if nb_apk_1 == 0 else (100 * values["Finished"][i_1]) / nb_apk_1
)
values["Finished"][i_2] = (
0 if nb_apk_2 == 0 else (100 * values["Finished"][i_2]) / nb_apk_2
)
values["Time Out"][i_1] = (
0 if nb_apk_1 == 0 else (100 * values["Time Out"][i_1]) / nb_apk_1
)
values["Time Out"][i_2] = (
0 if nb_apk_2 == 0 else (100 * values["Time Out"][i_2]) / nb_apk_2
)
values["Failed"][i_1] = (
0 if nb_apk_1 == 0 else (100 * values["Failed"][i_1]) / nb_apk_1
)
values["Failed"][i_2] = (
0 if nb_apk_2 == 0 else (100 * values["Failed"][i_2]) / nb_apk_2
)
values["Other"][i_1] = (
0 if nb_apk_1 == 0 else (100 * values["Other"][i_1]) / nb_apk_1
)
values["Other"][i_2] = (
0 if nb_apk_2 == 0 else (100 * values["Other"][i_2]) / nb_apk_2
)
bottom = np.zeros(len(tools) * 2)
x_axis = np.zeros(len(tools) * 2)
x_width = 3
x_0 = x_width / 2
lstep = 1
bstep = 5
for i in range(len(tools)):
x_0 += bstep + x_width
x_axis[2 * i] = x_0
x_0 += lstep + x_width
x_axis[2 * i + 1] = x_0
tick_legend = []
for tool in tools:
tick_legend.append(f"{tool}") # (f"{tool} on goodware")
tick_legend.append("") # (f"{tool} on malware")
plt.figure(figsize=(20, 9), dpi=80)
if same_apks:
stats = ["Finished", "Time Out", "Other", "Failed", "Missing"]
else:
stats = ["Finished", "Time Out", "Other", "Failed"]
for stat in stats:
plt.bar(
x_axis,
values[stat],
label=stat,
color=colors[stat],
hatch=hatch[stat],
bottom=bottom,
width=x_width,
edgecolor="black",
)
bottom += values[stat]
plt.xticks(x_axis, tick_legend, rotation=80)
plt.legend()
plt.ylabel("% of analysed apk")
render(title, interactive, image_path, format="svg")

View file

@ -112,7 +112,11 @@ def radar_chart(
def render(
title: str, interactive: bool, image_path: Path | None, tight_layout: bool = True
title: str,
interactive: bool,
image_path: Path | None,
tight_layout: bool = True,
format: str = "pdf",
):
"""Render the figure. If `interactive`, display if, if `image_path`, save it."""
# plt.title(title)
@ -121,7 +125,7 @@ def render(
if image_path is not None:
if not image_path.exists():
image_path.mkdir(parents=True, exist_ok=True)
plt.savefig(image_path / (slugify(title) + ".pdf"), format="pdf")
plt.savefig(image_path / (slugify(title) + "." + format), format=format)
if interactive:
plt.show()
plt.close()