diff --git a/.gitignore b/.gitignore index c93e84f..1269488 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1 @@ data -*.db diff --git a/README.md b/README.md index 353019f..e3fcc77 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # RASTA -Rasta stands for Reusability of Android Static Tools and Analysis. +Rasta stands for Reproducibility of Android Static Tools and Analysis. This repository contains the source code for reproducing the experiments of the paper "Evaluating the Re-Usability of Android Static Analysis Tools" published in the conference ICSR 2024. @@ -32,15 +32,7 @@ To run the Rasta experiment, some tools are required: - gzip - sqlite3 -One way to install those tools is to use Nixpkgs (`nix-shell -p docker singularity python310 python310Packages.numpy python310Packages.matplotlib sqlite`), another way is to follow the instructions of the different tools (, ). - -> [!WARNING] -> (One year later, 2025): -> -> Since Ubuntu 23.10, apparmor prevents the creation of unprivileged namespace by default. This means singularity won't work without a specific apparmor profile (which is not installed by nix-shell). -> -> Fortunately, Ubuntu now has a package for singularity: `singularity-container`. Using your distribution package should be the preferred method for installing the tools. - +One way to install those tools is to use Nixpkgs (`nix-shell -p docker singularity python310 python310Packages.numpy python310Packages.matplotlib sqlite3`), another way is to follow the instructions of the different tools (, ). They are also some python dependencies that need to be installed in a virtual env: @@ -260,12 +252,12 @@ cd rasta_exp cd .. ``` -The obtained images are named `histausse/rasta-:icsr2024`, and the environment variables associated are in `rasta_exp/envs/_docker.env`. The build_docker_images.sh can be edited to chose only one tool to be built. +The obtained images are named `rasta-`, and the environment variables associated are in `rasta_exp/envs/_docker.env`. The build_docker_images.sh can be edited to chose only one tool to be built. After building a tool, a container can be entered interactively by doing: ``` -docker run --rm --env-file=rasta_exp/envs/mallodroid_docker.env -v /tmp/mnt:/mnt -it histausse/rasta-mallodroid:icsr2024 bash +docker run --rm --env-file=rasta_exp/envs/mallodroid_docker.env -v /tmp/mnt:/mnt -it rasta-mallodroid bash ``` Here, `/tmp/mnt` is mounted to `/mnt` in the container. Put the `apk` in `/tmp/mnt` to analyze it. diff --git a/rasta_data_manipulation/pyproject.toml b/rasta_data_manipulation/pyproject.toml index a3f03ee..3d223a6 100644 --- a/rasta_data_manipulation/pyproject.toml +++ b/rasta_data_manipulation/pyproject.toml @@ -1,15 +1,15 @@ [tool.poetry] name = "rasta_triturage" -version = "0.2.1" +version = "0.2.0" description = "'Triturage de donnée' for the Rasta Project" -authors = ["Jean-Marie Mineau "] +authors = ["anon"] readme = "README.md" -homepage = "https://github.com/histausse/rasta/tree/main" -repository = "https://github.com/histausse/rasta/tree/main" -license = "GPLv3" +#homepage = "" +#repository = "" +license = "Proprietary" [tool.poetry.urls] -"Bug Tracker" = "https://github.com/histausse/rasta/issues" +#"Bug Tracker" = "" [tool.poetry.dependencies] python = "^3.10" @@ -50,7 +50,6 @@ rasta-decorelate-factor = "rasta_triturage.cli:plot_decorelated_factor" rasta-count-error-stacks = "rasta_triturage.cli:count_error_stacks" rasta-gen-dataset = "rasta_triturage.cli:generate_dataset" rasta-size-malware = "rasta_triturage.cli:size_malware" -rasta-compare-status = "rasta_triturage.cli:compare_status_by_tool" [tool.poetry.group.dev.dependencies] pytest = "*" diff --git a/rasta_data_manipulation/rasta_triturage/cli.py b/rasta_data_manipulation/rasta_triturage/cli.py index a42d722..6298e6d 100644 --- a/rasta_data_manipulation/rasta_triturage/cli.py +++ b/rasta_data_manipulation/rasta_triturage/cli.py @@ -17,7 +17,6 @@ from .status import ( plot_status_by_tool_and_malware, plot_all_status_by_generic_x, plot_status_by_generic_x, - plot_compare_status, ) from .apk import ( plot_apk_info_by_generic_x, @@ -578,9 +577,9 @@ def ic3(): ic3_venn(args.data, interactive=args.display, image_path=args.figures_file) ic3_errors( args.data, - file=( - args.figures_file / "ic3_err.csv" if args.figures_file is not None else None - ), + file=args.figures_file / "ic3_err.csv" + if args.figures_file is not None + else None, ) @@ -1128,64 +1127,3 @@ def size_malware(): print( f"{size}, {size_apk[(size, True)]:.2f}, {size_apk[(size, False)]:.2f}, {finishing_rate_goodware:.2f}, {finishing_rate_malware:.2f}, {size_apk[(size, True)] / size_apk[(size, False)]:.2f}, {finishing_rate_goodware/finishing_rate_malware:.2f}" ) - - -def compare_status_by_tool(): - """Compare the repartition of status by tool from two result dbs""" - - parser = argparse.ArgumentParser( - prog=sys.argv[0], - description="Compare the repartition of status by tool from two result dbs", - ) - parser.add_argument( - "-d1", - "--data1", - required=True, - type=Path, - help="The sqlite3 database that contain the execution report of the first experiment", - ) - parser.add_argument( - "-d2", - "--data2", - required=True, - type=Path, - help="The sqlite3 database that contain the execution report of the second experiment", - ) - parser.add_argument( - "-f", - "--figures-file", - type=Path, - help="The folder in which the figures must be stored", - ) - parser.add_argument( - "--display", - action="store_true", - help="If the figures must be displayed", - ) - parser.add_argument( - "-t", - "--tools", - nargs="+", - default=None, - help="The tools to analyse", - ) - parser.add_argument( - "--title", - default="Comparision of Exit Status", - help="The title of the graph", - ) - parser.add_argument( - "--same-apks", - action="store_true", - help="If the apks are the same in the two databases. If so, the missings applications will be shown.", - ) - args = parser.parse_args() - - plot_compare_status( - args.data1, - args.data2, - interactive=args.display, - image_path=args.figures_file, - tools=args.tools, - same_apks=args.same_apks, - ) diff --git a/rasta_data_manipulation/rasta_triturage/status.py b/rasta_data_manipulation/rasta_triturage/status.py index 69cbd93..819b14d 100644 --- a/rasta_data_manipulation/rasta_triturage/status.py +++ b/rasta_data_manipulation/rasta_triturage/status.py @@ -444,179 +444,3 @@ def plot_all_status_by_generic_x( interactive=args.display, image_path=args.figures_file, ) - - -def plot_compare_status( - db1: Path, - db2: Path, - interactive: bool = True, - image_path: Path | None = None, - tools: list[str] | None = None, - title: str = "Comparision of Exit Status", - same_apks: bool = False, -): - """Plot and compare repartition of status by tools from two experiment. - - db1 and db2 are the path to two result sqlite databases to compare - image_path is where to save the result - tools is the list of tools to compare, default will compare all tools found. - title is the title of the figure - same_apks indicate if the two databases uses the same apks. If so, the missing apks will be displayed in the plot. - """ - if tools is None: - tools = list(set(get_list_tools(db1)) | set(get_list_tools(db2))) - tools_list_format = f"({','.join(['?' for _ in tools])})" - with sqlite3.connect(db1) as con: - cur = con.cursor() - status_1 = cur.execute( - ( - "SELECT tool_name, tool_status, COUNT(sha256) " - "FROM exec " - f"WHERE tool_name IN {tools_list_format} " - "GROUP BY tool_name, tool_status;" - ), - tools, - ).fetchall() - apk_1 = set(cur.execute("SELECT sha256 FROM exec;").fetchall()) - with sqlite3.connect(db2) as con: - cur = con.cursor() - status_2 = cur.execute( - ( - "SELECT tool_name, tool_status, COUNT(sha256) " - "FROM exec " - f"WHERE tool_name IN {tools_list_format} " - "GROUP BY tool_name, tool_status;" - ), - tools, - ).fetchall() - apk_2 = set(cur.execute("SELECT sha256 FROM exec;").fetchall()) - - occurences = {} - for tool, stat, occurence in status_1: - occurences[(tool, stat, "db1")] = occurence - for tool, stat, occurence in status_2: - occurences[(tool, stat, "db2")] = occurence - # tools.sort( - # key=lambda t: occurences.get((t, "FINISHED", "db1"), 0) - # + occurences.get((t, "FINISHED", "db2"), 0), - # reverse=True, - # ) - tools.sort() - - values = { - "Finished": np.zeros(len(tools) * 2), - "Time Out": np.zeros(len(tools) * 2), - "Other": np.zeros(len(tools) * 2), - "Failed": np.zeros(len(tools) * 2), - } - nb_apk_tot = len(apk_1 | apk_2) - if same_apks: - nb_apk_1 = nb_apk_tot - nb_apk_2 = nb_apk_tot - missing_1 = len(apk_2 - apk_1) - missing_2 = len(apk_1 - apk_2) - values["Missing"] = np.zeros(len(tools) * 2) - for i in range(len(tools)): - values["Missing"][2 * i] = (missing_1 * 100) / nb_apk_1 - values["Missing"][2 * i + 1] = (missing_2 * 100) / nb_apk_2 - else: - nb_apk_1 = len(apk_1) - nb_apk_2 = len(apk_2) - colors = { - "Finished": "#009E73", - "Time Out": "#56B4E9", - "Other": "#555555", # TODO: find beter color - "Failed": "#D55E00", - "Missing": "#555555", - } - hatch = { - "Finished": "/", - "Time Out": "x", - "Other": ".", - "Failed": "\\", - "Missing": "-", - } - - for i, tool in enumerate(tools): - i_1 = 2 * i - i_2 = 2 * i + 1 - values["Finished"][i_1] = occurences.get((tool, "FINISHED", "db1"), 0) - values["Finished"][i_2] = occurences.get((tool, "FINISHED", "db2"), 0) - values["Time Out"][i_1] = occurences.get((tool, "TIMEOUT", "db1"), 0) - values["Time Out"][i_2] = occurences.get((tool, "TIMEOUT", "db2"), 0) - values["Failed"][i_1] = occurences.get((tool, "FAILED", "db1"), 0) - values["Failed"][i_2] = occurences.get((tool, "FAILED", "db2"), 0) - values["Other"][i_1] = ( - len(apk_1) - - values["Finished"][i_1] - - values["Time Out"][i_1] - - values["Failed"][i_1] - ) - values["Other"][i_2] = ( - len(apk_2) - - values["Finished"][i_2] - - values["Time Out"][i_2] - - values["Failed"][i_2] - ) - values["Finished"][i_1] = ( - 0 if nb_apk_1 == 0 else (100 * values["Finished"][i_1]) / nb_apk_1 - ) - values["Finished"][i_2] = ( - 0 if nb_apk_2 == 0 else (100 * values["Finished"][i_2]) / nb_apk_2 - ) - values["Time Out"][i_1] = ( - 0 if nb_apk_1 == 0 else (100 * values["Time Out"][i_1]) / nb_apk_1 - ) - values["Time Out"][i_2] = ( - 0 if nb_apk_2 == 0 else (100 * values["Time Out"][i_2]) / nb_apk_2 - ) - values["Failed"][i_1] = ( - 0 if nb_apk_1 == 0 else (100 * values["Failed"][i_1]) / nb_apk_1 - ) - values["Failed"][i_2] = ( - 0 if nb_apk_2 == 0 else (100 * values["Failed"][i_2]) / nb_apk_2 - ) - values["Other"][i_1] = ( - 0 if nb_apk_1 == 0 else (100 * values["Other"][i_1]) / nb_apk_1 - ) - values["Other"][i_2] = ( - 0 if nb_apk_2 == 0 else (100 * values["Other"][i_2]) / nb_apk_2 - ) - bottom = np.zeros(len(tools) * 2) - - x_axis = np.zeros(len(tools) * 2) - x_width = 3 - x_0 = x_width / 2 - lstep = 1 - bstep = 5 - for i in range(len(tools)): - x_0 += bstep + x_width - x_axis[2 * i] = x_0 - x_0 += lstep + x_width - x_axis[2 * i + 1] = x_0 - tick_legend = [] - for tool in tools: - tick_legend.append(f"{tool}") # (f"{tool} on goodware") - tick_legend.append("") # (f"{tool} on malware") - - plt.figure(figsize=(20, 9), dpi=80) - if same_apks: - stats = ["Finished", "Time Out", "Other", "Failed", "Missing"] - else: - stats = ["Finished", "Time Out", "Other", "Failed"] - for stat in stats: - plt.bar( - x_axis, - values[stat], - label=stat, - color=colors[stat], - hatch=hatch[stat], - bottom=bottom, - width=x_width, - edgecolor="black", - ) - bottom += values[stat] - plt.xticks(x_axis, tick_legend, rotation=80) - plt.legend() - plt.ylabel("% of analysed apk") - render(title, interactive, image_path, format="svg") diff --git a/rasta_data_manipulation/rasta_triturage/utils.py b/rasta_data_manipulation/rasta_triturage/utils.py index ee60c53..91cf08a 100644 --- a/rasta_data_manipulation/rasta_triturage/utils.py +++ b/rasta_data_manipulation/rasta_triturage/utils.py @@ -112,11 +112,7 @@ def radar_chart( def render( - title: str, - interactive: bool, - image_path: Path | None, - tight_layout: bool = True, - format: str = "pdf", + title: str, interactive: bool, image_path: Path | None, tight_layout: bool = True ): """Render the figure. If `interactive`, display if, if `image_path`, save it.""" # plt.title(title) @@ -125,7 +121,7 @@ def render( if image_path is not None: if not image_path.exists(): image_path.mkdir(parents=True, exist_ok=True) - plt.savefig(image_path / (slugify(title) + "." + format), format=format) + plt.savefig(image_path / (slugify(title) + ".pdf"), format="pdf") if interactive: plt.show() plt.close() diff --git a/rasta_exp/grunt-worker.py b/rasta_exp/grunt-worker.py index 4d3542d..82e7b6a 100755 --- a/rasta_exp/grunt-worker.py +++ b/rasta_exp/grunt-worker.py @@ -203,17 +203,9 @@ if __name__ == "__main__": parser.add_argument( "--task", help="[debug] Name of the task to perform", type=str, action="store" ) - app_group = parser.add_mutually_exclusive_group() - app_group.add_argument( + parser.add_argument( "--sha", help="[debug] sha to make the --task on", type=str, action="store" ) - app_group.add_argument( - "--apk-path", - help="[debug] apk to make the --task on", - type=Path, - action="store", - ) - group = parser.add_mutually_exclusive_group(required=True) group.add_argument("--docker", action="store_true") group.add_argument("--singularity", action="store_true") @@ -232,7 +224,7 @@ if __name__ == "__main__": result_dir = args.result_dir if result_dir is None: result_dir = base_dir - + # base_dir = os.path.join(base_dir, str(uuid.uuid4())) if not os.path.isdir(base_dir): os.makedirs(base_dir) @@ -311,26 +303,20 @@ if __name__ == "__main__": raise Exception("Debug mode must be used with BOTH --task and --sha") task = args.task # sha = str(args.sha).upper() # TMP patch - # sha = str(args.sha) - if args.sha is not None and len(args.sha) != 64: + sha = str(args.sha) + if len(sha) != 64: # raise Exception("invalid --sha value") print("invalid --sha value, exception disabled for tests") - if args.sha is not None: - apk_blob = get_apk_from_androzoo( - sha256=args.sha, - apikey=androzoo_apikey, - base_url=androzoo_base_url, - reraise=False, - local_cache=androzoo_local_cache, - ) - sha = args.sha - if apk_blob is None: - print(f"Unable to obtain apk for sha={sha}") + apk_blob = get_apk_from_androzoo( + sha256=sha, + apikey=androzoo_apikey, + base_url=androzoo_base_url, + reraise=False, + local_cache=androzoo_local_cache, + ) + if apk_blob is None: + print(f"Unable to obtain apk for sha={sha}") else: - with args.apk_path.open("rb") as fp: - apk_blob = fp.read() - sha = args.apk_path.name.removesuffix(".apk") # no a sha, but good enough - if apk_blob is not None: # do_one_job(sha256: str, tool_name: str, base_dir: str, apk_blob, container_mode, container_image, keep_tmp_dir=False): res = do_one_job( sha256=sha,