Compare commits

...

8 commits

Author SHA1 Message Date
8eaceb6ba7 Update README.md 2025-10-01 15:28:36 +02:00
Jean-Marie Mineau
e56f181da2
Merge branch 'main' of github.com:histausse/rasta 2025-09-07 17:09:22 +02:00
Jean-Marie Mineau
0d8ad49c94
add function to compare result of two experiment 2025-09-07 17:08:42 +02:00
Jean-Marie Mineau
1309d7ea24
allow to use local apks 2025-09-05 10:33:16 +02:00
582f440560
Update README.md 2025-06-17 17:26:40 +02:00
704d1fb26f
Update README.md 2025-06-17 16:55:02 +02:00
630d232628
Update README.md
nix sqlite package changed name
2025-06-17 16:28:17 +02:00
43a35b726b
update container name in cmd 2024-06-13 05:36:26 +02:00
7 changed files with 294 additions and 28 deletions

1
.gitignore vendored
View file

@ -1 +1,2 @@
data
*.db

View file

@ -1,6 +1,6 @@
# RASTA
Rasta stands for Reproducibility of Android Static Tools and Analysis.
Rasta stands for Reusability of Android Static Tools and Analysis.
This repository contains the source code for reproducing the experiments of the paper "Evaluating the Re-Usability of Android Static Analysis Tools" published in the conference ICSR 2024.
@ -32,7 +32,15 @@ To run the Rasta experiment, some tools are required:
- gzip
- sqlite3
One way to install those tools is to use Nixpkgs (`nix-shell -p docker singularity python310 python310Packages.numpy python310Packages.matplotlib sqlite3`), another way is to follow the instructions of the different tools (<https://docs.sylabs.io/guides/3.11/user-guide/>, <https://docs.docker.com/>).
One way to install those tools is to use Nixpkgs (`nix-shell -p docker singularity python310 python310Packages.numpy python310Packages.matplotlib sqlite`), another way is to follow the instructions of the different tools (<https://docs.sylabs.io/guides/3.11/user-guide/>, <https://docs.docker.com/>).
> [!WARNING]
> (One year later, 2025):
>
> Since Ubuntu 23.10, apparmor prevents the creation of unprivileged namespace by default. This means singularity won't work without a specific apparmor profile (which is not installed by nix-shell).
>
> Fortunately, Ubuntu now has a package for singularity: `singularity-container`. Using your distribution package should be the preferred method for installing the tools.
They are also some python dependencies that need to be installed in a virtual env:
@ -252,12 +260,12 @@ cd rasta_exp
cd ..
```
The obtained images are named `rasta-<tool-name>`, and the environment variables associated are in `rasta_exp/envs/<tool-name>_docker.env`. The build_docker_images.sh can be edited to chose only one tool to be built.
The obtained images are named `histausse/rasta-<tool-name>:icsr2024`, and the environment variables associated are in `rasta_exp/envs/<tool-name>_docker.env`. The build_docker_images.sh can be edited to chose only one tool to be built.
After building a tool, a container can be entered interactively by doing:
```
docker run --rm --env-file=rasta_exp/envs/mallodroid_docker.env -v /tmp/mnt:/mnt -it rasta-mallodroid bash
docker run --rm --env-file=rasta_exp/envs/mallodroid_docker.env -v /tmp/mnt:/mnt -it histausse/rasta-mallodroid:icsr2024 bash
```
Here, `/tmp/mnt` is mounted to `/mnt` in the container. Put the `apk` in `/tmp/mnt` to analyze it.

View file

@ -1,15 +1,15 @@
[tool.poetry]
name = "rasta_triturage"
version = "0.2.0"
version = "0.2.1"
description = "'Triturage de donnée' for the Rasta Project"
authors = ["anon"]
authors = ["Jean-Marie Mineau <rasta-github@jean-marie.mineau.eu>"]
readme = "README.md"
#homepage = ""
#repository = ""
license = "Proprietary"
homepage = "https://github.com/histausse/rasta/tree/main"
repository = "https://github.com/histausse/rasta/tree/main"
license = "GPLv3"
[tool.poetry.urls]
#"Bug Tracker" = ""
"Bug Tracker" = "https://github.com/histausse/rasta/issues"
[tool.poetry.dependencies]
python = "^3.10"
@ -50,6 +50,7 @@ rasta-decorelate-factor = "rasta_triturage.cli:plot_decorelated_factor"
rasta-count-error-stacks = "rasta_triturage.cli:count_error_stacks"
rasta-gen-dataset = "rasta_triturage.cli:generate_dataset"
rasta-size-malware = "rasta_triturage.cli:size_malware"
rasta-compare-status = "rasta_triturage.cli:compare_status_by_tool"
[tool.poetry.group.dev.dependencies]
pytest = "*"

View file

@ -17,6 +17,7 @@ from .status import (
plot_status_by_tool_and_malware,
plot_all_status_by_generic_x,
plot_status_by_generic_x,
plot_compare_status,
)
from .apk import (
plot_apk_info_by_generic_x,
@ -577,9 +578,9 @@ def ic3():
ic3_venn(args.data, interactive=args.display, image_path=args.figures_file)
ic3_errors(
args.data,
file=args.figures_file / "ic3_err.csv"
if args.figures_file is not None
else None,
file=(
args.figures_file / "ic3_err.csv" if args.figures_file is not None else None
),
)
@ -1127,3 +1128,64 @@ def size_malware():
print(
f"{size}, {size_apk[(size, True)]:.2f}, {size_apk[(size, False)]:.2f}, {finishing_rate_goodware:.2f}, {finishing_rate_malware:.2f}, {size_apk[(size, True)] / size_apk[(size, False)]:.2f}, {finishing_rate_goodware/finishing_rate_malware:.2f}"
)
def compare_status_by_tool():
"""Compare the repartition of status by tool from two result dbs"""
parser = argparse.ArgumentParser(
prog=sys.argv[0],
description="Compare the repartition of status by tool from two result dbs",
)
parser.add_argument(
"-d1",
"--data1",
required=True,
type=Path,
help="The sqlite3 database that contain the execution report of the first experiment",
)
parser.add_argument(
"-d2",
"--data2",
required=True,
type=Path,
help="The sqlite3 database that contain the execution report of the second experiment",
)
parser.add_argument(
"-f",
"--figures-file",
type=Path,
help="The folder in which the figures must be stored",
)
parser.add_argument(
"--display",
action="store_true",
help="If the figures must be displayed",
)
parser.add_argument(
"-t",
"--tools",
nargs="+",
default=None,
help="The tools to analyse",
)
parser.add_argument(
"--title",
default="Comparision of Exit Status",
help="The title of the graph",
)
parser.add_argument(
"--same-apks",
action="store_true",
help="If the apks are the same in the two databases. If so, the missings applications will be shown.",
)
args = parser.parse_args()
plot_compare_status(
args.data1,
args.data2,
interactive=args.display,
image_path=args.figures_file,
tools=args.tools,
same_apks=args.same_apks,
)

View file

@ -444,3 +444,179 @@ def plot_all_status_by_generic_x(
interactive=args.display,
image_path=args.figures_file,
)
def plot_compare_status(
db1: Path,
db2: Path,
interactive: bool = True,
image_path: Path | None = None,
tools: list[str] | None = None,
title: str = "Comparision of Exit Status",
same_apks: bool = False,
):
"""Plot and compare repartition of status by tools from two experiment.
db1 and db2 are the path to two result sqlite databases to compare
image_path is where to save the result
tools is the list of tools to compare, default will compare all tools found.
title is the title of the figure
same_apks indicate if the two databases uses the same apks. If so, the missing apks will be displayed in the plot.
"""
if tools is None:
tools = list(set(get_list_tools(db1)) | set(get_list_tools(db2)))
tools_list_format = f"({','.join(['?' for _ in tools])})"
with sqlite3.connect(db1) as con:
cur = con.cursor()
status_1 = cur.execute(
(
"SELECT tool_name, tool_status, COUNT(sha256) "
"FROM exec "
f"WHERE tool_name IN {tools_list_format} "
"GROUP BY tool_name, tool_status;"
),
tools,
).fetchall()
apk_1 = set(cur.execute("SELECT sha256 FROM exec;").fetchall())
with sqlite3.connect(db2) as con:
cur = con.cursor()
status_2 = cur.execute(
(
"SELECT tool_name, tool_status, COUNT(sha256) "
"FROM exec "
f"WHERE tool_name IN {tools_list_format} "
"GROUP BY tool_name, tool_status;"
),
tools,
).fetchall()
apk_2 = set(cur.execute("SELECT sha256 FROM exec;").fetchall())
occurences = {}
for tool, stat, occurence in status_1:
occurences[(tool, stat, "db1")] = occurence
for tool, stat, occurence in status_2:
occurences[(tool, stat, "db2")] = occurence
# tools.sort(
# key=lambda t: occurences.get((t, "FINISHED", "db1"), 0)
# + occurences.get((t, "FINISHED", "db2"), 0),
# reverse=True,
# )
tools.sort()
values = {
"Finished": np.zeros(len(tools) * 2),
"Time Out": np.zeros(len(tools) * 2),
"Other": np.zeros(len(tools) * 2),
"Failed": np.zeros(len(tools) * 2),
}
nb_apk_tot = len(apk_1 | apk_2)
if same_apks:
nb_apk_1 = nb_apk_tot
nb_apk_2 = nb_apk_tot
missing_1 = len(apk_2 - apk_1)
missing_2 = len(apk_1 - apk_2)
values["Missing"] = np.zeros(len(tools) * 2)
for i in range(len(tools)):
values["Missing"][2 * i] = (missing_1 * 100) / nb_apk_1
values["Missing"][2 * i + 1] = (missing_2 * 100) / nb_apk_2
else:
nb_apk_1 = len(apk_1)
nb_apk_2 = len(apk_2)
colors = {
"Finished": "#009E73",
"Time Out": "#56B4E9",
"Other": "#555555", # TODO: find beter color
"Failed": "#D55E00",
"Missing": "#555555",
}
hatch = {
"Finished": "/",
"Time Out": "x",
"Other": ".",
"Failed": "\\",
"Missing": "-",
}
for i, tool in enumerate(tools):
i_1 = 2 * i
i_2 = 2 * i + 1
values["Finished"][i_1] = occurences.get((tool, "FINISHED", "db1"), 0)
values["Finished"][i_2] = occurences.get((tool, "FINISHED", "db2"), 0)
values["Time Out"][i_1] = occurences.get((tool, "TIMEOUT", "db1"), 0)
values["Time Out"][i_2] = occurences.get((tool, "TIMEOUT", "db2"), 0)
values["Failed"][i_1] = occurences.get((tool, "FAILED", "db1"), 0)
values["Failed"][i_2] = occurences.get((tool, "FAILED", "db2"), 0)
values["Other"][i_1] = (
len(apk_1)
- values["Finished"][i_1]
- values["Time Out"][i_1]
- values["Failed"][i_1]
)
values["Other"][i_2] = (
len(apk_2)
- values["Finished"][i_2]
- values["Time Out"][i_2]
- values["Failed"][i_2]
)
values["Finished"][i_1] = (
0 if nb_apk_1 == 0 else (100 * values["Finished"][i_1]) / nb_apk_1
)
values["Finished"][i_2] = (
0 if nb_apk_2 == 0 else (100 * values["Finished"][i_2]) / nb_apk_2
)
values["Time Out"][i_1] = (
0 if nb_apk_1 == 0 else (100 * values["Time Out"][i_1]) / nb_apk_1
)
values["Time Out"][i_2] = (
0 if nb_apk_2 == 0 else (100 * values["Time Out"][i_2]) / nb_apk_2
)
values["Failed"][i_1] = (
0 if nb_apk_1 == 0 else (100 * values["Failed"][i_1]) / nb_apk_1
)
values["Failed"][i_2] = (
0 if nb_apk_2 == 0 else (100 * values["Failed"][i_2]) / nb_apk_2
)
values["Other"][i_1] = (
0 if nb_apk_1 == 0 else (100 * values["Other"][i_1]) / nb_apk_1
)
values["Other"][i_2] = (
0 if nb_apk_2 == 0 else (100 * values["Other"][i_2]) / nb_apk_2
)
bottom = np.zeros(len(tools) * 2)
x_axis = np.zeros(len(tools) * 2)
x_width = 3
x_0 = x_width / 2
lstep = 1
bstep = 5
for i in range(len(tools)):
x_0 += bstep + x_width
x_axis[2 * i] = x_0
x_0 += lstep + x_width
x_axis[2 * i + 1] = x_0
tick_legend = []
for tool in tools:
tick_legend.append(f"{tool}") # (f"{tool} on goodware")
tick_legend.append("") # (f"{tool} on malware")
plt.figure(figsize=(20, 9), dpi=80)
if same_apks:
stats = ["Finished", "Time Out", "Other", "Failed", "Missing"]
else:
stats = ["Finished", "Time Out", "Other", "Failed"]
for stat in stats:
plt.bar(
x_axis,
values[stat],
label=stat,
color=colors[stat],
hatch=hatch[stat],
bottom=bottom,
width=x_width,
edgecolor="black",
)
bottom += values[stat]
plt.xticks(x_axis, tick_legend, rotation=80)
plt.legend()
plt.ylabel("% of analysed apk")
render(title, interactive, image_path, format="svg")

View file

@ -112,7 +112,11 @@ def radar_chart(
def render(
title: str, interactive: bool, image_path: Path | None, tight_layout: bool = True
title: str,
interactive: bool,
image_path: Path | None,
tight_layout: bool = True,
format: str = "pdf",
):
"""Render the figure. If `interactive`, display if, if `image_path`, save it."""
# plt.title(title)
@ -121,7 +125,7 @@ def render(
if image_path is not None:
if not image_path.exists():
image_path.mkdir(parents=True, exist_ok=True)
plt.savefig(image_path / (slugify(title) + ".pdf"), format="pdf")
plt.savefig(image_path / (slugify(title) + "." + format), format=format)
if interactive:
plt.show()
plt.close()

View file

@ -203,9 +203,17 @@ if __name__ == "__main__":
parser.add_argument(
"--task", help="[debug] Name of the task to perform", type=str, action="store"
)
parser.add_argument(
app_group = parser.add_mutually_exclusive_group()
app_group.add_argument(
"--sha", help="[debug] sha to make the --task on", type=str, action="store"
)
app_group.add_argument(
"--apk-path",
help="[debug] apk to make the --task on",
type=Path,
action="store",
)
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument("--docker", action="store_true")
group.add_argument("--singularity", action="store_true")
@ -224,7 +232,7 @@ if __name__ == "__main__":
result_dir = args.result_dir
if result_dir is None:
result_dir = base_dir
# base_dir = os.path.join(base_dir, str(uuid.uuid4()))
if not os.path.isdir(base_dir):
os.makedirs(base_dir)
@ -303,20 +311,26 @@ if __name__ == "__main__":
raise Exception("Debug mode must be used with BOTH --task and --sha")
task = args.task
# sha = str(args.sha).upper() # TMP patch
sha = str(args.sha)
if len(sha) != 64:
# sha = str(args.sha)
if args.sha is not None and len(args.sha) != 64:
# raise Exception("invalid --sha value")
print("invalid --sha value, exception disabled for tests")
apk_blob = get_apk_from_androzoo(
sha256=sha,
apikey=androzoo_apikey,
base_url=androzoo_base_url,
reraise=False,
local_cache=androzoo_local_cache,
)
if apk_blob is None:
print(f"Unable to obtain apk for sha={sha}")
if args.sha is not None:
apk_blob = get_apk_from_androzoo(
sha256=args.sha,
apikey=androzoo_apikey,
base_url=androzoo_base_url,
reraise=False,
local_cache=androzoo_local_cache,
)
sha = args.sha
if apk_blob is None:
print(f"Unable to obtain apk for sha={sha}")
else:
with args.apk_path.open("rb") as fp:
apk_blob = fp.read()
sha = args.apk_path.name.removesuffix(".apk") # no a sha, but good enough
if apk_blob is not None:
# do_one_job(sha256: str, tool_name: str, base_dir: str, apk_blob, container_mode, container_image, keep_tmp_dir=False):
res = do_one_job(
sha256=sha,