diff --git a/.gitignore b/.gitignore index 1269488..c93e84f 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ data +*.db diff --git a/README.md b/README.md index e3fcc77..353019f 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # RASTA -Rasta stands for Reproducibility of Android Static Tools and Analysis. +Rasta stands for Reusability of Android Static Tools and Analysis. This repository contains the source code for reproducing the experiments of the paper "Evaluating the Re-Usability of Android Static Analysis Tools" published in the conference ICSR 2024. @@ -32,7 +32,15 @@ To run the Rasta experiment, some tools are required: - gzip - sqlite3 -One way to install those tools is to use Nixpkgs (`nix-shell -p docker singularity python310 python310Packages.numpy python310Packages.matplotlib sqlite3`), another way is to follow the instructions of the different tools (, ). +One way to install those tools is to use Nixpkgs (`nix-shell -p docker singularity python310 python310Packages.numpy python310Packages.matplotlib sqlite`), another way is to follow the instructions of the different tools (, ). + +> [!WARNING] +> (One year later, 2025): +> +> Since Ubuntu 23.10, apparmor prevents the creation of unprivileged namespace by default. This means singularity won't work without a specific apparmor profile (which is not installed by nix-shell). +> +> Fortunately, Ubuntu now has a package for singularity: `singularity-container`. Using your distribution package should be the preferred method for installing the tools. + They are also some python dependencies that need to be installed in a virtual env: @@ -252,12 +260,12 @@ cd rasta_exp cd .. ``` -The obtained images are named `rasta-`, and the environment variables associated are in `rasta_exp/envs/_docker.env`. The build_docker_images.sh can be edited to chose only one tool to be built. +The obtained images are named `histausse/rasta-:icsr2024`, and the environment variables associated are in `rasta_exp/envs/_docker.env`. The build_docker_images.sh can be edited to chose only one tool to be built. After building a tool, a container can be entered interactively by doing: ``` -docker run --rm --env-file=rasta_exp/envs/mallodroid_docker.env -v /tmp/mnt:/mnt -it rasta-mallodroid bash +docker run --rm --env-file=rasta_exp/envs/mallodroid_docker.env -v /tmp/mnt:/mnt -it histausse/rasta-mallodroid:icsr2024 bash ``` Here, `/tmp/mnt` is mounted to `/mnt` in the container. Put the `apk` in `/tmp/mnt` to analyze it. diff --git a/rasta_data_manipulation/pyproject.toml b/rasta_data_manipulation/pyproject.toml index 3d223a6..a3f03ee 100644 --- a/rasta_data_manipulation/pyproject.toml +++ b/rasta_data_manipulation/pyproject.toml @@ -1,15 +1,15 @@ [tool.poetry] name = "rasta_triturage" -version = "0.2.0" +version = "0.2.1" description = "'Triturage de donnée' for the Rasta Project" -authors = ["anon"] +authors = ["Jean-Marie Mineau "] readme = "README.md" -#homepage = "" -#repository = "" -license = "Proprietary" +homepage = "https://github.com/histausse/rasta/tree/main" +repository = "https://github.com/histausse/rasta/tree/main" +license = "GPLv3" [tool.poetry.urls] -#"Bug Tracker" = "" +"Bug Tracker" = "https://github.com/histausse/rasta/issues" [tool.poetry.dependencies] python = "^3.10" @@ -50,6 +50,7 @@ rasta-decorelate-factor = "rasta_triturage.cli:plot_decorelated_factor" rasta-count-error-stacks = "rasta_triturage.cli:count_error_stacks" rasta-gen-dataset = "rasta_triturage.cli:generate_dataset" rasta-size-malware = "rasta_triturage.cli:size_malware" +rasta-compare-status = "rasta_triturage.cli:compare_status_by_tool" [tool.poetry.group.dev.dependencies] pytest = "*" diff --git a/rasta_data_manipulation/rasta_triturage/cli.py b/rasta_data_manipulation/rasta_triturage/cli.py index 6298e6d..a42d722 100644 --- a/rasta_data_manipulation/rasta_triturage/cli.py +++ b/rasta_data_manipulation/rasta_triturage/cli.py @@ -17,6 +17,7 @@ from .status import ( plot_status_by_tool_and_malware, plot_all_status_by_generic_x, plot_status_by_generic_x, + plot_compare_status, ) from .apk import ( plot_apk_info_by_generic_x, @@ -577,9 +578,9 @@ def ic3(): ic3_venn(args.data, interactive=args.display, image_path=args.figures_file) ic3_errors( args.data, - file=args.figures_file / "ic3_err.csv" - if args.figures_file is not None - else None, + file=( + args.figures_file / "ic3_err.csv" if args.figures_file is not None else None + ), ) @@ -1127,3 +1128,64 @@ def size_malware(): print( f"{size}, {size_apk[(size, True)]:.2f}, {size_apk[(size, False)]:.2f}, {finishing_rate_goodware:.2f}, {finishing_rate_malware:.2f}, {size_apk[(size, True)] / size_apk[(size, False)]:.2f}, {finishing_rate_goodware/finishing_rate_malware:.2f}" ) + + +def compare_status_by_tool(): + """Compare the repartition of status by tool from two result dbs""" + + parser = argparse.ArgumentParser( + prog=sys.argv[0], + description="Compare the repartition of status by tool from two result dbs", + ) + parser.add_argument( + "-d1", + "--data1", + required=True, + type=Path, + help="The sqlite3 database that contain the execution report of the first experiment", + ) + parser.add_argument( + "-d2", + "--data2", + required=True, + type=Path, + help="The sqlite3 database that contain the execution report of the second experiment", + ) + parser.add_argument( + "-f", + "--figures-file", + type=Path, + help="The folder in which the figures must be stored", + ) + parser.add_argument( + "--display", + action="store_true", + help="If the figures must be displayed", + ) + parser.add_argument( + "-t", + "--tools", + nargs="+", + default=None, + help="The tools to analyse", + ) + parser.add_argument( + "--title", + default="Comparision of Exit Status", + help="The title of the graph", + ) + parser.add_argument( + "--same-apks", + action="store_true", + help="If the apks are the same in the two databases. If so, the missings applications will be shown.", + ) + args = parser.parse_args() + + plot_compare_status( + args.data1, + args.data2, + interactive=args.display, + image_path=args.figures_file, + tools=args.tools, + same_apks=args.same_apks, + ) diff --git a/rasta_data_manipulation/rasta_triturage/status.py b/rasta_data_manipulation/rasta_triturage/status.py index 819b14d..69cbd93 100644 --- a/rasta_data_manipulation/rasta_triturage/status.py +++ b/rasta_data_manipulation/rasta_triturage/status.py @@ -444,3 +444,179 @@ def plot_all_status_by_generic_x( interactive=args.display, image_path=args.figures_file, ) + + +def plot_compare_status( + db1: Path, + db2: Path, + interactive: bool = True, + image_path: Path | None = None, + tools: list[str] | None = None, + title: str = "Comparision of Exit Status", + same_apks: bool = False, +): + """Plot and compare repartition of status by tools from two experiment. + + db1 and db2 are the path to two result sqlite databases to compare + image_path is where to save the result + tools is the list of tools to compare, default will compare all tools found. + title is the title of the figure + same_apks indicate if the two databases uses the same apks. If so, the missing apks will be displayed in the plot. + """ + if tools is None: + tools = list(set(get_list_tools(db1)) | set(get_list_tools(db2))) + tools_list_format = f"({','.join(['?' for _ in tools])})" + with sqlite3.connect(db1) as con: + cur = con.cursor() + status_1 = cur.execute( + ( + "SELECT tool_name, tool_status, COUNT(sha256) " + "FROM exec " + f"WHERE tool_name IN {tools_list_format} " + "GROUP BY tool_name, tool_status;" + ), + tools, + ).fetchall() + apk_1 = set(cur.execute("SELECT sha256 FROM exec;").fetchall()) + with sqlite3.connect(db2) as con: + cur = con.cursor() + status_2 = cur.execute( + ( + "SELECT tool_name, tool_status, COUNT(sha256) " + "FROM exec " + f"WHERE tool_name IN {tools_list_format} " + "GROUP BY tool_name, tool_status;" + ), + tools, + ).fetchall() + apk_2 = set(cur.execute("SELECT sha256 FROM exec;").fetchall()) + + occurences = {} + for tool, stat, occurence in status_1: + occurences[(tool, stat, "db1")] = occurence + for tool, stat, occurence in status_2: + occurences[(tool, stat, "db2")] = occurence + # tools.sort( + # key=lambda t: occurences.get((t, "FINISHED", "db1"), 0) + # + occurences.get((t, "FINISHED", "db2"), 0), + # reverse=True, + # ) + tools.sort() + + values = { + "Finished": np.zeros(len(tools) * 2), + "Time Out": np.zeros(len(tools) * 2), + "Other": np.zeros(len(tools) * 2), + "Failed": np.zeros(len(tools) * 2), + } + nb_apk_tot = len(apk_1 | apk_2) + if same_apks: + nb_apk_1 = nb_apk_tot + nb_apk_2 = nb_apk_tot + missing_1 = len(apk_2 - apk_1) + missing_2 = len(apk_1 - apk_2) + values["Missing"] = np.zeros(len(tools) * 2) + for i in range(len(tools)): + values["Missing"][2 * i] = (missing_1 * 100) / nb_apk_1 + values["Missing"][2 * i + 1] = (missing_2 * 100) / nb_apk_2 + else: + nb_apk_1 = len(apk_1) + nb_apk_2 = len(apk_2) + colors = { + "Finished": "#009E73", + "Time Out": "#56B4E9", + "Other": "#555555", # TODO: find beter color + "Failed": "#D55E00", + "Missing": "#555555", + } + hatch = { + "Finished": "/", + "Time Out": "x", + "Other": ".", + "Failed": "\\", + "Missing": "-", + } + + for i, tool in enumerate(tools): + i_1 = 2 * i + i_2 = 2 * i + 1 + values["Finished"][i_1] = occurences.get((tool, "FINISHED", "db1"), 0) + values["Finished"][i_2] = occurences.get((tool, "FINISHED", "db2"), 0) + values["Time Out"][i_1] = occurences.get((tool, "TIMEOUT", "db1"), 0) + values["Time Out"][i_2] = occurences.get((tool, "TIMEOUT", "db2"), 0) + values["Failed"][i_1] = occurences.get((tool, "FAILED", "db1"), 0) + values["Failed"][i_2] = occurences.get((tool, "FAILED", "db2"), 0) + values["Other"][i_1] = ( + len(apk_1) + - values["Finished"][i_1] + - values["Time Out"][i_1] + - values["Failed"][i_1] + ) + values["Other"][i_2] = ( + len(apk_2) + - values["Finished"][i_2] + - values["Time Out"][i_2] + - values["Failed"][i_2] + ) + values["Finished"][i_1] = ( + 0 if nb_apk_1 == 0 else (100 * values["Finished"][i_1]) / nb_apk_1 + ) + values["Finished"][i_2] = ( + 0 if nb_apk_2 == 0 else (100 * values["Finished"][i_2]) / nb_apk_2 + ) + values["Time Out"][i_1] = ( + 0 if nb_apk_1 == 0 else (100 * values["Time Out"][i_1]) / nb_apk_1 + ) + values["Time Out"][i_2] = ( + 0 if nb_apk_2 == 0 else (100 * values["Time Out"][i_2]) / nb_apk_2 + ) + values["Failed"][i_1] = ( + 0 if nb_apk_1 == 0 else (100 * values["Failed"][i_1]) / nb_apk_1 + ) + values["Failed"][i_2] = ( + 0 if nb_apk_2 == 0 else (100 * values["Failed"][i_2]) / nb_apk_2 + ) + values["Other"][i_1] = ( + 0 if nb_apk_1 == 0 else (100 * values["Other"][i_1]) / nb_apk_1 + ) + values["Other"][i_2] = ( + 0 if nb_apk_2 == 0 else (100 * values["Other"][i_2]) / nb_apk_2 + ) + bottom = np.zeros(len(tools) * 2) + + x_axis = np.zeros(len(tools) * 2) + x_width = 3 + x_0 = x_width / 2 + lstep = 1 + bstep = 5 + for i in range(len(tools)): + x_0 += bstep + x_width + x_axis[2 * i] = x_0 + x_0 += lstep + x_width + x_axis[2 * i + 1] = x_0 + tick_legend = [] + for tool in tools: + tick_legend.append(f"{tool}") # (f"{tool} on goodware") + tick_legend.append("") # (f"{tool} on malware") + + plt.figure(figsize=(20, 9), dpi=80) + if same_apks: + stats = ["Finished", "Time Out", "Other", "Failed", "Missing"] + else: + stats = ["Finished", "Time Out", "Other", "Failed"] + for stat in stats: + plt.bar( + x_axis, + values[stat], + label=stat, + color=colors[stat], + hatch=hatch[stat], + bottom=bottom, + width=x_width, + edgecolor="black", + ) + bottom += values[stat] + plt.xticks(x_axis, tick_legend, rotation=80) + plt.legend() + plt.ylabel("% of analysed apk") + render(title, interactive, image_path, format="svg") diff --git a/rasta_data_manipulation/rasta_triturage/utils.py b/rasta_data_manipulation/rasta_triturage/utils.py index 91cf08a..ee60c53 100644 --- a/rasta_data_manipulation/rasta_triturage/utils.py +++ b/rasta_data_manipulation/rasta_triturage/utils.py @@ -112,7 +112,11 @@ def radar_chart( def render( - title: str, interactive: bool, image_path: Path | None, tight_layout: bool = True + title: str, + interactive: bool, + image_path: Path | None, + tight_layout: bool = True, + format: str = "pdf", ): """Render the figure. If `interactive`, display if, if `image_path`, save it.""" # plt.title(title) @@ -121,7 +125,7 @@ def render( if image_path is not None: if not image_path.exists(): image_path.mkdir(parents=True, exist_ok=True) - plt.savefig(image_path / (slugify(title) + ".pdf"), format="pdf") + plt.savefig(image_path / (slugify(title) + "." + format), format=format) if interactive: plt.show() plt.close() diff --git a/rasta_exp/grunt-worker.py b/rasta_exp/grunt-worker.py index 82e7b6a..4d3542d 100755 --- a/rasta_exp/grunt-worker.py +++ b/rasta_exp/grunt-worker.py @@ -203,9 +203,17 @@ if __name__ == "__main__": parser.add_argument( "--task", help="[debug] Name of the task to perform", type=str, action="store" ) - parser.add_argument( + app_group = parser.add_mutually_exclusive_group() + app_group.add_argument( "--sha", help="[debug] sha to make the --task on", type=str, action="store" ) + app_group.add_argument( + "--apk-path", + help="[debug] apk to make the --task on", + type=Path, + action="store", + ) + group = parser.add_mutually_exclusive_group(required=True) group.add_argument("--docker", action="store_true") group.add_argument("--singularity", action="store_true") @@ -224,7 +232,7 @@ if __name__ == "__main__": result_dir = args.result_dir if result_dir is None: result_dir = base_dir - + # base_dir = os.path.join(base_dir, str(uuid.uuid4())) if not os.path.isdir(base_dir): os.makedirs(base_dir) @@ -303,20 +311,26 @@ if __name__ == "__main__": raise Exception("Debug mode must be used with BOTH --task and --sha") task = args.task # sha = str(args.sha).upper() # TMP patch - sha = str(args.sha) - if len(sha) != 64: + # sha = str(args.sha) + if args.sha is not None and len(args.sha) != 64: # raise Exception("invalid --sha value") print("invalid --sha value, exception disabled for tests") - apk_blob = get_apk_from_androzoo( - sha256=sha, - apikey=androzoo_apikey, - base_url=androzoo_base_url, - reraise=False, - local_cache=androzoo_local_cache, - ) - if apk_blob is None: - print(f"Unable to obtain apk for sha={sha}") + if args.sha is not None: + apk_blob = get_apk_from_androzoo( + sha256=args.sha, + apikey=androzoo_apikey, + base_url=androzoo_base_url, + reraise=False, + local_cache=androzoo_local_cache, + ) + sha = args.sha + if apk_blob is None: + print(f"Unable to obtain apk for sha={sha}") else: + with args.apk_path.open("rb") as fp: + apk_blob = fp.read() + sha = args.apk_path.name.removesuffix(".apk") # no a sha, but good enough + if apk_blob is not None: # do_one_job(sha256: str, tool_name: str, base_dir: str, apk_blob, container_mode, container_image, keep_tmp_dir=False): res = do_one_job( sha256=sha,