diff --git a/.gitignore b/.gitignore
index c93e84f..1269488 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1 @@
data
-*.db
diff --git a/README.md b/README.md
index 353019f..e3fcc77 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
# RASTA
-Rasta stands for Reusability of Android Static Tools and Analysis.
+Rasta stands for Reproducibility of Android Static Tools and Analysis.
This repository contains the source code for reproducing the experiments of the paper "Evaluating the Re-Usability of Android Static Analysis Tools" published in the conference ICSR 2024.
@@ -32,15 +32,7 @@ To run the Rasta experiment, some tools are required:
- gzip
- sqlite3
-One way to install those tools is to use Nixpkgs (`nix-shell -p docker singularity python310 python310Packages.numpy python310Packages.matplotlib sqlite`), another way is to follow the instructions of the different tools (, ).
-
-> [!WARNING]
-> (One year later, 2025):
->
-> Since Ubuntu 23.10, apparmor prevents the creation of unprivileged namespace by default. This means singularity won't work without a specific apparmor profile (which is not installed by nix-shell).
->
-> Fortunately, Ubuntu now has a package for singularity: `singularity-container`. Using your distribution package should be the preferred method for installing the tools.
-
+One way to install those tools is to use Nixpkgs (`nix-shell -p docker singularity python310 python310Packages.numpy python310Packages.matplotlib sqlite3`), another way is to follow the instructions of the different tools (, ).
They are also some python dependencies that need to be installed in a virtual env:
@@ -260,12 +252,12 @@ cd rasta_exp
cd ..
```
-The obtained images are named `histausse/rasta-:icsr2024`, and the environment variables associated are in `rasta_exp/envs/_docker.env`. The build_docker_images.sh can be edited to chose only one tool to be built.
+The obtained images are named `rasta-`, and the environment variables associated are in `rasta_exp/envs/_docker.env`. The build_docker_images.sh can be edited to chose only one tool to be built.
After building a tool, a container can be entered interactively by doing:
```
-docker run --rm --env-file=rasta_exp/envs/mallodroid_docker.env -v /tmp/mnt:/mnt -it histausse/rasta-mallodroid:icsr2024 bash
+docker run --rm --env-file=rasta_exp/envs/mallodroid_docker.env -v /tmp/mnt:/mnt -it rasta-mallodroid bash
```
Here, `/tmp/mnt` is mounted to `/mnt` in the container. Put the `apk` in `/tmp/mnt` to analyze it.
diff --git a/rasta_data_manipulation/pyproject.toml b/rasta_data_manipulation/pyproject.toml
index a3f03ee..3d223a6 100644
--- a/rasta_data_manipulation/pyproject.toml
+++ b/rasta_data_manipulation/pyproject.toml
@@ -1,15 +1,15 @@
[tool.poetry]
name = "rasta_triturage"
-version = "0.2.1"
+version = "0.2.0"
description = "'Triturage de donnée' for the Rasta Project"
-authors = ["Jean-Marie Mineau "]
+authors = ["anon"]
readme = "README.md"
-homepage = "https://github.com/histausse/rasta/tree/main"
-repository = "https://github.com/histausse/rasta/tree/main"
-license = "GPLv3"
+#homepage = ""
+#repository = ""
+license = "Proprietary"
[tool.poetry.urls]
-"Bug Tracker" = "https://github.com/histausse/rasta/issues"
+#"Bug Tracker" = ""
[tool.poetry.dependencies]
python = "^3.10"
@@ -50,7 +50,6 @@ rasta-decorelate-factor = "rasta_triturage.cli:plot_decorelated_factor"
rasta-count-error-stacks = "rasta_triturage.cli:count_error_stacks"
rasta-gen-dataset = "rasta_triturage.cli:generate_dataset"
rasta-size-malware = "rasta_triturage.cli:size_malware"
-rasta-compare-status = "rasta_triturage.cli:compare_status_by_tool"
[tool.poetry.group.dev.dependencies]
pytest = "*"
diff --git a/rasta_data_manipulation/rasta_triturage/cli.py b/rasta_data_manipulation/rasta_triturage/cli.py
index a42d722..6298e6d 100644
--- a/rasta_data_manipulation/rasta_triturage/cli.py
+++ b/rasta_data_manipulation/rasta_triturage/cli.py
@@ -17,7 +17,6 @@ from .status import (
plot_status_by_tool_and_malware,
plot_all_status_by_generic_x,
plot_status_by_generic_x,
- plot_compare_status,
)
from .apk import (
plot_apk_info_by_generic_x,
@@ -578,9 +577,9 @@ def ic3():
ic3_venn(args.data, interactive=args.display, image_path=args.figures_file)
ic3_errors(
args.data,
- file=(
- args.figures_file / "ic3_err.csv" if args.figures_file is not None else None
- ),
+ file=args.figures_file / "ic3_err.csv"
+ if args.figures_file is not None
+ else None,
)
@@ -1128,64 +1127,3 @@ def size_malware():
print(
f"{size}, {size_apk[(size, True)]:.2f}, {size_apk[(size, False)]:.2f}, {finishing_rate_goodware:.2f}, {finishing_rate_malware:.2f}, {size_apk[(size, True)] / size_apk[(size, False)]:.2f}, {finishing_rate_goodware/finishing_rate_malware:.2f}"
)
-
-
-def compare_status_by_tool():
- """Compare the repartition of status by tool from two result dbs"""
-
- parser = argparse.ArgumentParser(
- prog=sys.argv[0],
- description="Compare the repartition of status by tool from two result dbs",
- )
- parser.add_argument(
- "-d1",
- "--data1",
- required=True,
- type=Path,
- help="The sqlite3 database that contain the execution report of the first experiment",
- )
- parser.add_argument(
- "-d2",
- "--data2",
- required=True,
- type=Path,
- help="The sqlite3 database that contain the execution report of the second experiment",
- )
- parser.add_argument(
- "-f",
- "--figures-file",
- type=Path,
- help="The folder in which the figures must be stored",
- )
- parser.add_argument(
- "--display",
- action="store_true",
- help="If the figures must be displayed",
- )
- parser.add_argument(
- "-t",
- "--tools",
- nargs="+",
- default=None,
- help="The tools to analyse",
- )
- parser.add_argument(
- "--title",
- default="Comparision of Exit Status",
- help="The title of the graph",
- )
- parser.add_argument(
- "--same-apks",
- action="store_true",
- help="If the apks are the same in the two databases. If so, the missings applications will be shown.",
- )
- args = parser.parse_args()
-
- plot_compare_status(
- args.data1,
- args.data2,
- interactive=args.display,
- image_path=args.figures_file,
- tools=args.tools,
- same_apks=args.same_apks,
- )
diff --git a/rasta_data_manipulation/rasta_triturage/status.py b/rasta_data_manipulation/rasta_triturage/status.py
index 69cbd93..819b14d 100644
--- a/rasta_data_manipulation/rasta_triturage/status.py
+++ b/rasta_data_manipulation/rasta_triturage/status.py
@@ -444,179 +444,3 @@ def plot_all_status_by_generic_x(
interactive=args.display,
image_path=args.figures_file,
)
-
-
-def plot_compare_status(
- db1: Path,
- db2: Path,
- interactive: bool = True,
- image_path: Path | None = None,
- tools: list[str] | None = None,
- title: str = "Comparision of Exit Status",
- same_apks: bool = False,
-):
- """Plot and compare repartition of status by tools from two experiment.
-
- db1 and db2 are the path to two result sqlite databases to compare
- image_path is where to save the result
- tools is the list of tools to compare, default will compare all tools found.
- title is the title of the figure
- same_apks indicate if the two databases uses the same apks. If so, the missing apks will be displayed in the plot.
- """
- if tools is None:
- tools = list(set(get_list_tools(db1)) | set(get_list_tools(db2)))
- tools_list_format = f"({','.join(['?' for _ in tools])})"
- with sqlite3.connect(db1) as con:
- cur = con.cursor()
- status_1 = cur.execute(
- (
- "SELECT tool_name, tool_status, COUNT(sha256) "
- "FROM exec "
- f"WHERE tool_name IN {tools_list_format} "
- "GROUP BY tool_name, tool_status;"
- ),
- tools,
- ).fetchall()
- apk_1 = set(cur.execute("SELECT sha256 FROM exec;").fetchall())
- with sqlite3.connect(db2) as con:
- cur = con.cursor()
- status_2 = cur.execute(
- (
- "SELECT tool_name, tool_status, COUNT(sha256) "
- "FROM exec "
- f"WHERE tool_name IN {tools_list_format} "
- "GROUP BY tool_name, tool_status;"
- ),
- tools,
- ).fetchall()
- apk_2 = set(cur.execute("SELECT sha256 FROM exec;").fetchall())
-
- occurences = {}
- for tool, stat, occurence in status_1:
- occurences[(tool, stat, "db1")] = occurence
- for tool, stat, occurence in status_2:
- occurences[(tool, stat, "db2")] = occurence
- # tools.sort(
- # key=lambda t: occurences.get((t, "FINISHED", "db1"), 0)
- # + occurences.get((t, "FINISHED", "db2"), 0),
- # reverse=True,
- # )
- tools.sort()
-
- values = {
- "Finished": np.zeros(len(tools) * 2),
- "Time Out": np.zeros(len(tools) * 2),
- "Other": np.zeros(len(tools) * 2),
- "Failed": np.zeros(len(tools) * 2),
- }
- nb_apk_tot = len(apk_1 | apk_2)
- if same_apks:
- nb_apk_1 = nb_apk_tot
- nb_apk_2 = nb_apk_tot
- missing_1 = len(apk_2 - apk_1)
- missing_2 = len(apk_1 - apk_2)
- values["Missing"] = np.zeros(len(tools) * 2)
- for i in range(len(tools)):
- values["Missing"][2 * i] = (missing_1 * 100) / nb_apk_1
- values["Missing"][2 * i + 1] = (missing_2 * 100) / nb_apk_2
- else:
- nb_apk_1 = len(apk_1)
- nb_apk_2 = len(apk_2)
- colors = {
- "Finished": "#009E73",
- "Time Out": "#56B4E9",
- "Other": "#555555", # TODO: find beter color
- "Failed": "#D55E00",
- "Missing": "#555555",
- }
- hatch = {
- "Finished": "/",
- "Time Out": "x",
- "Other": ".",
- "Failed": "\\",
- "Missing": "-",
- }
-
- for i, tool in enumerate(tools):
- i_1 = 2 * i
- i_2 = 2 * i + 1
- values["Finished"][i_1] = occurences.get((tool, "FINISHED", "db1"), 0)
- values["Finished"][i_2] = occurences.get((tool, "FINISHED", "db2"), 0)
- values["Time Out"][i_1] = occurences.get((tool, "TIMEOUT", "db1"), 0)
- values["Time Out"][i_2] = occurences.get((tool, "TIMEOUT", "db2"), 0)
- values["Failed"][i_1] = occurences.get((tool, "FAILED", "db1"), 0)
- values["Failed"][i_2] = occurences.get((tool, "FAILED", "db2"), 0)
- values["Other"][i_1] = (
- len(apk_1)
- - values["Finished"][i_1]
- - values["Time Out"][i_1]
- - values["Failed"][i_1]
- )
- values["Other"][i_2] = (
- len(apk_2)
- - values["Finished"][i_2]
- - values["Time Out"][i_2]
- - values["Failed"][i_2]
- )
- values["Finished"][i_1] = (
- 0 if nb_apk_1 == 0 else (100 * values["Finished"][i_1]) / nb_apk_1
- )
- values["Finished"][i_2] = (
- 0 if nb_apk_2 == 0 else (100 * values["Finished"][i_2]) / nb_apk_2
- )
- values["Time Out"][i_1] = (
- 0 if nb_apk_1 == 0 else (100 * values["Time Out"][i_1]) / nb_apk_1
- )
- values["Time Out"][i_2] = (
- 0 if nb_apk_2 == 0 else (100 * values["Time Out"][i_2]) / nb_apk_2
- )
- values["Failed"][i_1] = (
- 0 if nb_apk_1 == 0 else (100 * values["Failed"][i_1]) / nb_apk_1
- )
- values["Failed"][i_2] = (
- 0 if nb_apk_2 == 0 else (100 * values["Failed"][i_2]) / nb_apk_2
- )
- values["Other"][i_1] = (
- 0 if nb_apk_1 == 0 else (100 * values["Other"][i_1]) / nb_apk_1
- )
- values["Other"][i_2] = (
- 0 if nb_apk_2 == 0 else (100 * values["Other"][i_2]) / nb_apk_2
- )
- bottom = np.zeros(len(tools) * 2)
-
- x_axis = np.zeros(len(tools) * 2)
- x_width = 3
- x_0 = x_width / 2
- lstep = 1
- bstep = 5
- for i in range(len(tools)):
- x_0 += bstep + x_width
- x_axis[2 * i] = x_0
- x_0 += lstep + x_width
- x_axis[2 * i + 1] = x_0
- tick_legend = []
- for tool in tools:
- tick_legend.append(f"{tool}") # (f"{tool} on goodware")
- tick_legend.append("") # (f"{tool} on malware")
-
- plt.figure(figsize=(20, 9), dpi=80)
- if same_apks:
- stats = ["Finished", "Time Out", "Other", "Failed", "Missing"]
- else:
- stats = ["Finished", "Time Out", "Other", "Failed"]
- for stat in stats:
- plt.bar(
- x_axis,
- values[stat],
- label=stat,
- color=colors[stat],
- hatch=hatch[stat],
- bottom=bottom,
- width=x_width,
- edgecolor="black",
- )
- bottom += values[stat]
- plt.xticks(x_axis, tick_legend, rotation=80)
- plt.legend()
- plt.ylabel("% of analysed apk")
- render(title, interactive, image_path, format="svg")
diff --git a/rasta_data_manipulation/rasta_triturage/utils.py b/rasta_data_manipulation/rasta_triturage/utils.py
index ee60c53..91cf08a 100644
--- a/rasta_data_manipulation/rasta_triturage/utils.py
+++ b/rasta_data_manipulation/rasta_triturage/utils.py
@@ -112,11 +112,7 @@ def radar_chart(
def render(
- title: str,
- interactive: bool,
- image_path: Path | None,
- tight_layout: bool = True,
- format: str = "pdf",
+ title: str, interactive: bool, image_path: Path | None, tight_layout: bool = True
):
"""Render the figure. If `interactive`, display if, if `image_path`, save it."""
# plt.title(title)
@@ -125,7 +121,7 @@ def render(
if image_path is not None:
if not image_path.exists():
image_path.mkdir(parents=True, exist_ok=True)
- plt.savefig(image_path / (slugify(title) + "." + format), format=format)
+ plt.savefig(image_path / (slugify(title) + ".pdf"), format="pdf")
if interactive:
plt.show()
plt.close()
diff --git a/rasta_exp/grunt-worker.py b/rasta_exp/grunt-worker.py
index 4d3542d..82e7b6a 100755
--- a/rasta_exp/grunt-worker.py
+++ b/rasta_exp/grunt-worker.py
@@ -203,17 +203,9 @@ if __name__ == "__main__":
parser.add_argument(
"--task", help="[debug] Name of the task to perform", type=str, action="store"
)
- app_group = parser.add_mutually_exclusive_group()
- app_group.add_argument(
+ parser.add_argument(
"--sha", help="[debug] sha to make the --task on", type=str, action="store"
)
- app_group.add_argument(
- "--apk-path",
- help="[debug] apk to make the --task on",
- type=Path,
- action="store",
- )
-
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument("--docker", action="store_true")
group.add_argument("--singularity", action="store_true")
@@ -232,7 +224,7 @@ if __name__ == "__main__":
result_dir = args.result_dir
if result_dir is None:
result_dir = base_dir
-
+
# base_dir = os.path.join(base_dir, str(uuid.uuid4()))
if not os.path.isdir(base_dir):
os.makedirs(base_dir)
@@ -311,26 +303,20 @@ if __name__ == "__main__":
raise Exception("Debug mode must be used with BOTH --task and --sha")
task = args.task
# sha = str(args.sha).upper() # TMP patch
- # sha = str(args.sha)
- if args.sha is not None and len(args.sha) != 64:
+ sha = str(args.sha)
+ if len(sha) != 64:
# raise Exception("invalid --sha value")
print("invalid --sha value, exception disabled for tests")
- if args.sha is not None:
- apk_blob = get_apk_from_androzoo(
- sha256=args.sha,
- apikey=androzoo_apikey,
- base_url=androzoo_base_url,
- reraise=False,
- local_cache=androzoo_local_cache,
- )
- sha = args.sha
- if apk_blob is None:
- print(f"Unable to obtain apk for sha={sha}")
+ apk_blob = get_apk_from_androzoo(
+ sha256=sha,
+ apikey=androzoo_apikey,
+ base_url=androzoo_base_url,
+ reraise=False,
+ local_cache=androzoo_local_cache,
+ )
+ if apk_blob is None:
+ print(f"Unable to obtain apk for sha={sha}")
else:
- with args.apk_path.open("rb") as fp:
- apk_blob = fp.read()
- sha = args.apk_path.name.removesuffix(".apk") # no a sha, but good enough
- if apk_blob is not None:
# do_one_job(sha256: str, tool_name: str, base_dir: str, apk_blob, container_mode, container_image, keep_tmp_dir=False):
res = do_one_job(
sha256=sha,