diff --git a/.gitignore b/.gitignore
index 1269488..c93e84f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
data
+*.db
diff --git a/README.md b/README.md
index e3fcc77..353019f 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
# RASTA
-Rasta stands for Reproducibility of Android Static Tools and Analysis.
+Rasta stands for Reusability of Android Static Tools and Analysis.
This repository contains the source code for reproducing the experiments of the paper "Evaluating the Re-Usability of Android Static Analysis Tools" published in the conference ICSR 2024.
@@ -32,7 +32,15 @@ To run the Rasta experiment, some tools are required:
- gzip
- sqlite3
-One way to install those tools is to use Nixpkgs (`nix-shell -p docker singularity python310 python310Packages.numpy python310Packages.matplotlib sqlite3`), another way is to follow the instructions of the different tools (, ).
+One way to install those tools is to use Nixpkgs (`nix-shell -p docker singularity python310 python310Packages.numpy python310Packages.matplotlib sqlite`), another way is to follow the instructions of the different tools (, ).
+
+> [!WARNING]
+> (One year later, 2025):
+>
+> Since Ubuntu 23.10, apparmor prevents the creation of unprivileged namespace by default. This means singularity won't work without a specific apparmor profile (which is not installed by nix-shell).
+>
+> Fortunately, Ubuntu now has a package for singularity: `singularity-container`. Using your distribution package should be the preferred method for installing the tools.
+
They are also some python dependencies that need to be installed in a virtual env:
@@ -252,12 +260,12 @@ cd rasta_exp
cd ..
```
-The obtained images are named `rasta-`, and the environment variables associated are in `rasta_exp/envs/_docker.env`. The build_docker_images.sh can be edited to chose only one tool to be built.
+The obtained images are named `histausse/rasta-:icsr2024`, and the environment variables associated are in `rasta_exp/envs/_docker.env`. The build_docker_images.sh can be edited to chose only one tool to be built.
After building a tool, a container can be entered interactively by doing:
```
-docker run --rm --env-file=rasta_exp/envs/mallodroid_docker.env -v /tmp/mnt:/mnt -it rasta-mallodroid bash
+docker run --rm --env-file=rasta_exp/envs/mallodroid_docker.env -v /tmp/mnt:/mnt -it histausse/rasta-mallodroid:icsr2024 bash
```
Here, `/tmp/mnt` is mounted to `/mnt` in the container. Put the `apk` in `/tmp/mnt` to analyze it.
diff --git a/rasta_data_manipulation/pyproject.toml b/rasta_data_manipulation/pyproject.toml
index 3d223a6..a3f03ee 100644
--- a/rasta_data_manipulation/pyproject.toml
+++ b/rasta_data_manipulation/pyproject.toml
@@ -1,15 +1,15 @@
[tool.poetry]
name = "rasta_triturage"
-version = "0.2.0"
+version = "0.2.1"
description = "'Triturage de donnée' for the Rasta Project"
-authors = ["anon"]
+authors = ["Jean-Marie Mineau "]
readme = "README.md"
-#homepage = ""
-#repository = ""
-license = "Proprietary"
+homepage = "https://github.com/histausse/rasta/tree/main"
+repository = "https://github.com/histausse/rasta/tree/main"
+license = "GPLv3"
[tool.poetry.urls]
-#"Bug Tracker" = ""
+"Bug Tracker" = "https://github.com/histausse/rasta/issues"
[tool.poetry.dependencies]
python = "^3.10"
@@ -50,6 +50,7 @@ rasta-decorelate-factor = "rasta_triturage.cli:plot_decorelated_factor"
rasta-count-error-stacks = "rasta_triturage.cli:count_error_stacks"
rasta-gen-dataset = "rasta_triturage.cli:generate_dataset"
rasta-size-malware = "rasta_triturage.cli:size_malware"
+rasta-compare-status = "rasta_triturage.cli:compare_status_by_tool"
[tool.poetry.group.dev.dependencies]
pytest = "*"
diff --git a/rasta_data_manipulation/rasta_triturage/cli.py b/rasta_data_manipulation/rasta_triturage/cli.py
index 6298e6d..a42d722 100644
--- a/rasta_data_manipulation/rasta_triturage/cli.py
+++ b/rasta_data_manipulation/rasta_triturage/cli.py
@@ -17,6 +17,7 @@ from .status import (
plot_status_by_tool_and_malware,
plot_all_status_by_generic_x,
plot_status_by_generic_x,
+ plot_compare_status,
)
from .apk import (
plot_apk_info_by_generic_x,
@@ -577,9 +578,9 @@ def ic3():
ic3_venn(args.data, interactive=args.display, image_path=args.figures_file)
ic3_errors(
args.data,
- file=args.figures_file / "ic3_err.csv"
- if args.figures_file is not None
- else None,
+ file=(
+ args.figures_file / "ic3_err.csv" if args.figures_file is not None else None
+ ),
)
@@ -1127,3 +1128,64 @@ def size_malware():
print(
f"{size}, {size_apk[(size, True)]:.2f}, {size_apk[(size, False)]:.2f}, {finishing_rate_goodware:.2f}, {finishing_rate_malware:.2f}, {size_apk[(size, True)] / size_apk[(size, False)]:.2f}, {finishing_rate_goodware/finishing_rate_malware:.2f}"
)
+
+
+def compare_status_by_tool():
+ """Compare the repartition of status by tool from two result dbs"""
+
+ parser = argparse.ArgumentParser(
+ prog=sys.argv[0],
+ description="Compare the repartition of status by tool from two result dbs",
+ )
+ parser.add_argument(
+ "-d1",
+ "--data1",
+ required=True,
+ type=Path,
+ help="The sqlite3 database that contain the execution report of the first experiment",
+ )
+ parser.add_argument(
+ "-d2",
+ "--data2",
+ required=True,
+ type=Path,
+ help="The sqlite3 database that contain the execution report of the second experiment",
+ )
+ parser.add_argument(
+ "-f",
+ "--figures-file",
+ type=Path,
+ help="The folder in which the figures must be stored",
+ )
+ parser.add_argument(
+ "--display",
+ action="store_true",
+ help="If the figures must be displayed",
+ )
+ parser.add_argument(
+ "-t",
+ "--tools",
+ nargs="+",
+ default=None,
+ help="The tools to analyse",
+ )
+ parser.add_argument(
+ "--title",
+ default="Comparision of Exit Status",
+ help="The title of the graph",
+ )
+ parser.add_argument(
+ "--same-apks",
+ action="store_true",
+ help="If the apks are the same in the two databases. If so, the missings applications will be shown.",
+ )
+ args = parser.parse_args()
+
+ plot_compare_status(
+ args.data1,
+ args.data2,
+ interactive=args.display,
+ image_path=args.figures_file,
+ tools=args.tools,
+ same_apks=args.same_apks,
+ )
diff --git a/rasta_data_manipulation/rasta_triturage/status.py b/rasta_data_manipulation/rasta_triturage/status.py
index 819b14d..69cbd93 100644
--- a/rasta_data_manipulation/rasta_triturage/status.py
+++ b/rasta_data_manipulation/rasta_triturage/status.py
@@ -444,3 +444,179 @@ def plot_all_status_by_generic_x(
interactive=args.display,
image_path=args.figures_file,
)
+
+
+def plot_compare_status(
+ db1: Path,
+ db2: Path,
+ interactive: bool = True,
+ image_path: Path | None = None,
+ tools: list[str] | None = None,
+ title: str = "Comparision of Exit Status",
+ same_apks: bool = False,
+):
+ """Plot and compare repartition of status by tools from two experiment.
+
+ db1 and db2 are the path to two result sqlite databases to compare
+ image_path is where to save the result
+ tools is the list of tools to compare, default will compare all tools found.
+ title is the title of the figure
+ same_apks indicate if the two databases uses the same apks. If so, the missing apks will be displayed in the plot.
+ """
+ if tools is None:
+ tools = list(set(get_list_tools(db1)) | set(get_list_tools(db2)))
+ tools_list_format = f"({','.join(['?' for _ in tools])})"
+ with sqlite3.connect(db1) as con:
+ cur = con.cursor()
+ status_1 = cur.execute(
+ (
+ "SELECT tool_name, tool_status, COUNT(sha256) "
+ "FROM exec "
+ f"WHERE tool_name IN {tools_list_format} "
+ "GROUP BY tool_name, tool_status;"
+ ),
+ tools,
+ ).fetchall()
+ apk_1 = set(cur.execute("SELECT sha256 FROM exec;").fetchall())
+ with sqlite3.connect(db2) as con:
+ cur = con.cursor()
+ status_2 = cur.execute(
+ (
+ "SELECT tool_name, tool_status, COUNT(sha256) "
+ "FROM exec "
+ f"WHERE tool_name IN {tools_list_format} "
+ "GROUP BY tool_name, tool_status;"
+ ),
+ tools,
+ ).fetchall()
+ apk_2 = set(cur.execute("SELECT sha256 FROM exec;").fetchall())
+
+ occurences = {}
+ for tool, stat, occurence in status_1:
+ occurences[(tool, stat, "db1")] = occurence
+ for tool, stat, occurence in status_2:
+ occurences[(tool, stat, "db2")] = occurence
+ # tools.sort(
+ # key=lambda t: occurences.get((t, "FINISHED", "db1"), 0)
+ # + occurences.get((t, "FINISHED", "db2"), 0),
+ # reverse=True,
+ # )
+ tools.sort()
+
+ values = {
+ "Finished": np.zeros(len(tools) * 2),
+ "Time Out": np.zeros(len(tools) * 2),
+ "Other": np.zeros(len(tools) * 2),
+ "Failed": np.zeros(len(tools) * 2),
+ }
+ nb_apk_tot = len(apk_1 | apk_2)
+ if same_apks:
+ nb_apk_1 = nb_apk_tot
+ nb_apk_2 = nb_apk_tot
+ missing_1 = len(apk_2 - apk_1)
+ missing_2 = len(apk_1 - apk_2)
+ values["Missing"] = np.zeros(len(tools) * 2)
+ for i in range(len(tools)):
+ values["Missing"][2 * i] = (missing_1 * 100) / nb_apk_1
+ values["Missing"][2 * i + 1] = (missing_2 * 100) / nb_apk_2
+ else:
+ nb_apk_1 = len(apk_1)
+ nb_apk_2 = len(apk_2)
+ colors = {
+ "Finished": "#009E73",
+ "Time Out": "#56B4E9",
+ "Other": "#555555", # TODO: find beter color
+ "Failed": "#D55E00",
+ "Missing": "#555555",
+ }
+ hatch = {
+ "Finished": "/",
+ "Time Out": "x",
+ "Other": ".",
+ "Failed": "\\",
+ "Missing": "-",
+ }
+
+ for i, tool in enumerate(tools):
+ i_1 = 2 * i
+ i_2 = 2 * i + 1
+ values["Finished"][i_1] = occurences.get((tool, "FINISHED", "db1"), 0)
+ values["Finished"][i_2] = occurences.get((tool, "FINISHED", "db2"), 0)
+ values["Time Out"][i_1] = occurences.get((tool, "TIMEOUT", "db1"), 0)
+ values["Time Out"][i_2] = occurences.get((tool, "TIMEOUT", "db2"), 0)
+ values["Failed"][i_1] = occurences.get((tool, "FAILED", "db1"), 0)
+ values["Failed"][i_2] = occurences.get((tool, "FAILED", "db2"), 0)
+ values["Other"][i_1] = (
+ len(apk_1)
+ - values["Finished"][i_1]
+ - values["Time Out"][i_1]
+ - values["Failed"][i_1]
+ )
+ values["Other"][i_2] = (
+ len(apk_2)
+ - values["Finished"][i_2]
+ - values["Time Out"][i_2]
+ - values["Failed"][i_2]
+ )
+ values["Finished"][i_1] = (
+ 0 if nb_apk_1 == 0 else (100 * values["Finished"][i_1]) / nb_apk_1
+ )
+ values["Finished"][i_2] = (
+ 0 if nb_apk_2 == 0 else (100 * values["Finished"][i_2]) / nb_apk_2
+ )
+ values["Time Out"][i_1] = (
+ 0 if nb_apk_1 == 0 else (100 * values["Time Out"][i_1]) / nb_apk_1
+ )
+ values["Time Out"][i_2] = (
+ 0 if nb_apk_2 == 0 else (100 * values["Time Out"][i_2]) / nb_apk_2
+ )
+ values["Failed"][i_1] = (
+ 0 if nb_apk_1 == 0 else (100 * values["Failed"][i_1]) / nb_apk_1
+ )
+ values["Failed"][i_2] = (
+ 0 if nb_apk_2 == 0 else (100 * values["Failed"][i_2]) / nb_apk_2
+ )
+ values["Other"][i_1] = (
+ 0 if nb_apk_1 == 0 else (100 * values["Other"][i_1]) / nb_apk_1
+ )
+ values["Other"][i_2] = (
+ 0 if nb_apk_2 == 0 else (100 * values["Other"][i_2]) / nb_apk_2
+ )
+ bottom = np.zeros(len(tools) * 2)
+
+ x_axis = np.zeros(len(tools) * 2)
+ x_width = 3
+ x_0 = x_width / 2
+ lstep = 1
+ bstep = 5
+ for i in range(len(tools)):
+ x_0 += bstep + x_width
+ x_axis[2 * i] = x_0
+ x_0 += lstep + x_width
+ x_axis[2 * i + 1] = x_0
+ tick_legend = []
+ for tool in tools:
+ tick_legend.append(f"{tool}") # (f"{tool} on goodware")
+ tick_legend.append("") # (f"{tool} on malware")
+
+ plt.figure(figsize=(20, 9), dpi=80)
+ if same_apks:
+ stats = ["Finished", "Time Out", "Other", "Failed", "Missing"]
+ else:
+ stats = ["Finished", "Time Out", "Other", "Failed"]
+ for stat in stats:
+ plt.bar(
+ x_axis,
+ values[stat],
+ label=stat,
+ color=colors[stat],
+ hatch=hatch[stat],
+ bottom=bottom,
+ width=x_width,
+ edgecolor="black",
+ )
+ bottom += values[stat]
+ plt.xticks(x_axis, tick_legend, rotation=80)
+ plt.legend()
+ plt.ylabel("% of analysed apk")
+ render(title, interactive, image_path, format="svg")
diff --git a/rasta_data_manipulation/rasta_triturage/utils.py b/rasta_data_manipulation/rasta_triturage/utils.py
index 91cf08a..ee60c53 100644
--- a/rasta_data_manipulation/rasta_triturage/utils.py
+++ b/rasta_data_manipulation/rasta_triturage/utils.py
@@ -112,7 +112,11 @@ def radar_chart(
def render(
- title: str, interactive: bool, image_path: Path | None, tight_layout: bool = True
+ title: str,
+ interactive: bool,
+ image_path: Path | None,
+ tight_layout: bool = True,
+ format: str = "pdf",
):
"""Render the figure. If `interactive`, display if, if `image_path`, save it."""
# plt.title(title)
@@ -121,7 +125,7 @@ def render(
if image_path is not None:
if not image_path.exists():
image_path.mkdir(parents=True, exist_ok=True)
- plt.savefig(image_path / (slugify(title) + ".pdf"), format="pdf")
+ plt.savefig(image_path / (slugify(title) + "." + format), format=format)
if interactive:
plt.show()
plt.close()
diff --git a/rasta_exp/grunt-worker.py b/rasta_exp/grunt-worker.py
index 82e7b6a..4d3542d 100755
--- a/rasta_exp/grunt-worker.py
+++ b/rasta_exp/grunt-worker.py
@@ -203,9 +203,17 @@ if __name__ == "__main__":
parser.add_argument(
"--task", help="[debug] Name of the task to perform", type=str, action="store"
)
- parser.add_argument(
+ app_group = parser.add_mutually_exclusive_group()
+ app_group.add_argument(
"--sha", help="[debug] sha to make the --task on", type=str, action="store"
)
+ app_group.add_argument(
+ "--apk-path",
+ help="[debug] apk to make the --task on",
+ type=Path,
+ action="store",
+ )
+
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument("--docker", action="store_true")
group.add_argument("--singularity", action="store_true")
@@ -224,7 +232,7 @@ if __name__ == "__main__":
result_dir = args.result_dir
if result_dir is None:
result_dir = base_dir
-
+
# base_dir = os.path.join(base_dir, str(uuid.uuid4()))
if not os.path.isdir(base_dir):
os.makedirs(base_dir)
@@ -303,20 +311,26 @@ if __name__ == "__main__":
raise Exception("Debug mode must be used with BOTH --task and --sha")
task = args.task
# sha = str(args.sha).upper() # TMP patch
- sha = str(args.sha)
- if len(sha) != 64:
+ # sha = str(args.sha)
+ if args.sha is not None and len(args.sha) != 64:
# raise Exception("invalid --sha value")
print("invalid --sha value, exception disabled for tests")
- apk_blob = get_apk_from_androzoo(
- sha256=sha,
- apikey=androzoo_apikey,
- base_url=androzoo_base_url,
- reraise=False,
- local_cache=androzoo_local_cache,
- )
- if apk_blob is None:
- print(f"Unable to obtain apk for sha={sha}")
+ if args.sha is not None:
+ apk_blob = get_apk_from_androzoo(
+ sha256=args.sha,
+ apikey=androzoo_apikey,
+ base_url=androzoo_base_url,
+ reraise=False,
+ local_cache=androzoo_local_cache,
+ )
+ sha = args.sha
+ if apk_blob is None:
+ print(f"Unable to obtain apk for sha={sha}")
else:
+ with args.apk_path.open("rb") as fp:
+ apk_blob = fp.read()
+ sha = args.apk_path.name.removesuffix(".apk") # no a sha, but good enough
+ if apk_blob is not None:
# do_one_job(sha256: str, tool_name: str, base_dir: str, apk_blob, container_mode, container_image, keep_tmp_dir=False):
res = do_one_job(
sha256=sha,