Compare commits

..

No commits in common. "main" and "icsr2024" have entirely different histories.

7 changed files with 28 additions and 294 deletions

1
.gitignore vendored
View file

@ -1,2 +1 @@
data data
*.db

View file

@ -1,6 +1,6 @@
# RASTA # RASTA
Rasta stands for Reusability of Android Static Tools and Analysis. Rasta stands for Reproducibility of Android Static Tools and Analysis.
This repository contains the source code for reproducing the experiments of the paper "Evaluating the Re-Usability of Android Static Analysis Tools" published in the conference ICSR 2024. This repository contains the source code for reproducing the experiments of the paper "Evaluating the Re-Usability of Android Static Analysis Tools" published in the conference ICSR 2024.
@ -32,15 +32,7 @@ To run the Rasta experiment, some tools are required:
- gzip - gzip
- sqlite3 - sqlite3
One way to install those tools is to use Nixpkgs (`nix-shell -p docker singularity python310 python310Packages.numpy python310Packages.matplotlib sqlite`), another way is to follow the instructions of the different tools (<https://docs.sylabs.io/guides/3.11/user-guide/>, <https://docs.docker.com/>). One way to install those tools is to use Nixpkgs (`nix-shell -p docker singularity python310 python310Packages.numpy python310Packages.matplotlib sqlite3`), another way is to follow the instructions of the different tools (<https://docs.sylabs.io/guides/3.11/user-guide/>, <https://docs.docker.com/>).
> [!WARNING]
> (One year later, 2025):
>
> Since Ubuntu 23.10, apparmor prevents the creation of unprivileged namespace by default. This means singularity won't work without a specific apparmor profile (which is not installed by nix-shell).
>
> Fortunately, Ubuntu now has a package for singularity: `singularity-container`. Using your distribution package should be the preferred method for installing the tools.
They are also some python dependencies that need to be installed in a virtual env: They are also some python dependencies that need to be installed in a virtual env:
@ -260,12 +252,12 @@ cd rasta_exp
cd .. cd ..
``` ```
The obtained images are named `histausse/rasta-<tool-name>:icsr2024`, and the environment variables associated are in `rasta_exp/envs/<tool-name>_docker.env`. The build_docker_images.sh can be edited to chose only one tool to be built. The obtained images are named `rasta-<tool-name>`, and the environment variables associated are in `rasta_exp/envs/<tool-name>_docker.env`. The build_docker_images.sh can be edited to chose only one tool to be built.
After building a tool, a container can be entered interactively by doing: After building a tool, a container can be entered interactively by doing:
``` ```
docker run --rm --env-file=rasta_exp/envs/mallodroid_docker.env -v /tmp/mnt:/mnt -it histausse/rasta-mallodroid:icsr2024 bash docker run --rm --env-file=rasta_exp/envs/mallodroid_docker.env -v /tmp/mnt:/mnt -it rasta-mallodroid bash
``` ```
Here, `/tmp/mnt` is mounted to `/mnt` in the container. Put the `apk` in `/tmp/mnt` to analyze it. Here, `/tmp/mnt` is mounted to `/mnt` in the container. Put the `apk` in `/tmp/mnt` to analyze it.

View file

@ -1,15 +1,15 @@
[tool.poetry] [tool.poetry]
name = "rasta_triturage" name = "rasta_triturage"
version = "0.2.1" version = "0.2.0"
description = "'Triturage de donnée' for the Rasta Project" description = "'Triturage de donnée' for the Rasta Project"
authors = ["Jean-Marie Mineau <rasta-github@jean-marie.mineau.eu>"] authors = ["anon"]
readme = "README.md" readme = "README.md"
homepage = "https://github.com/histausse/rasta/tree/main" #homepage = ""
repository = "https://github.com/histausse/rasta/tree/main" #repository = ""
license = "GPLv3" license = "Proprietary"
[tool.poetry.urls] [tool.poetry.urls]
"Bug Tracker" = "https://github.com/histausse/rasta/issues" #"Bug Tracker" = ""
[tool.poetry.dependencies] [tool.poetry.dependencies]
python = "^3.10" python = "^3.10"
@ -50,7 +50,6 @@ rasta-decorelate-factor = "rasta_triturage.cli:plot_decorelated_factor"
rasta-count-error-stacks = "rasta_triturage.cli:count_error_stacks" rasta-count-error-stacks = "rasta_triturage.cli:count_error_stacks"
rasta-gen-dataset = "rasta_triturage.cli:generate_dataset" rasta-gen-dataset = "rasta_triturage.cli:generate_dataset"
rasta-size-malware = "rasta_triturage.cli:size_malware" rasta-size-malware = "rasta_triturage.cli:size_malware"
rasta-compare-status = "rasta_triturage.cli:compare_status_by_tool"
[tool.poetry.group.dev.dependencies] [tool.poetry.group.dev.dependencies]
pytest = "*" pytest = "*"

View file

@ -17,7 +17,6 @@ from .status import (
plot_status_by_tool_and_malware, plot_status_by_tool_and_malware,
plot_all_status_by_generic_x, plot_all_status_by_generic_x,
plot_status_by_generic_x, plot_status_by_generic_x,
plot_compare_status,
) )
from .apk import ( from .apk import (
plot_apk_info_by_generic_x, plot_apk_info_by_generic_x,
@ -578,9 +577,9 @@ def ic3():
ic3_venn(args.data, interactive=args.display, image_path=args.figures_file) ic3_venn(args.data, interactive=args.display, image_path=args.figures_file)
ic3_errors( ic3_errors(
args.data, args.data,
file=( file=args.figures_file / "ic3_err.csv"
args.figures_file / "ic3_err.csv" if args.figures_file is not None else None if args.figures_file is not None
), else None,
) )
@ -1128,64 +1127,3 @@ def size_malware():
print( print(
f"{size}, {size_apk[(size, True)]:.2f}, {size_apk[(size, False)]:.2f}, {finishing_rate_goodware:.2f}, {finishing_rate_malware:.2f}, {size_apk[(size, True)] / size_apk[(size, False)]:.2f}, {finishing_rate_goodware/finishing_rate_malware:.2f}" f"{size}, {size_apk[(size, True)]:.2f}, {size_apk[(size, False)]:.2f}, {finishing_rate_goodware:.2f}, {finishing_rate_malware:.2f}, {size_apk[(size, True)] / size_apk[(size, False)]:.2f}, {finishing_rate_goodware/finishing_rate_malware:.2f}"
) )
def compare_status_by_tool():
"""Compare the repartition of status by tool from two result dbs"""
parser = argparse.ArgumentParser(
prog=sys.argv[0],
description="Compare the repartition of status by tool from two result dbs",
)
parser.add_argument(
"-d1",
"--data1",
required=True,
type=Path,
help="The sqlite3 database that contain the execution report of the first experiment",
)
parser.add_argument(
"-d2",
"--data2",
required=True,
type=Path,
help="The sqlite3 database that contain the execution report of the second experiment",
)
parser.add_argument(
"-f",
"--figures-file",
type=Path,
help="The folder in which the figures must be stored",
)
parser.add_argument(
"--display",
action="store_true",
help="If the figures must be displayed",
)
parser.add_argument(
"-t",
"--tools",
nargs="+",
default=None,
help="The tools to analyse",
)
parser.add_argument(
"--title",
default="Comparision of Exit Status",
help="The title of the graph",
)
parser.add_argument(
"--same-apks",
action="store_true",
help="If the apks are the same in the two databases. If so, the missings applications will be shown.",
)
args = parser.parse_args()
plot_compare_status(
args.data1,
args.data2,
interactive=args.display,
image_path=args.figures_file,
tools=args.tools,
same_apks=args.same_apks,
)

View file

@ -444,179 +444,3 @@ def plot_all_status_by_generic_x(
interactive=args.display, interactive=args.display,
image_path=args.figures_file, image_path=args.figures_file,
) )
def plot_compare_status(
db1: Path,
db2: Path,
interactive: bool = True,
image_path: Path | None = None,
tools: list[str] | None = None,
title: str = "Comparision of Exit Status",
same_apks: bool = False,
):
"""Plot and compare repartition of status by tools from two experiment.
db1 and db2 are the path to two result sqlite databases to compare
image_path is where to save the result
tools is the list of tools to compare, default will compare all tools found.
title is the title of the figure
same_apks indicate if the two databases uses the same apks. If so, the missing apks will be displayed in the plot.
"""
if tools is None:
tools = list(set(get_list_tools(db1)) | set(get_list_tools(db2)))
tools_list_format = f"({','.join(['?' for _ in tools])})"
with sqlite3.connect(db1) as con:
cur = con.cursor()
status_1 = cur.execute(
(
"SELECT tool_name, tool_status, COUNT(sha256) "
"FROM exec "
f"WHERE tool_name IN {tools_list_format} "
"GROUP BY tool_name, tool_status;"
),
tools,
).fetchall()
apk_1 = set(cur.execute("SELECT sha256 FROM exec;").fetchall())
with sqlite3.connect(db2) as con:
cur = con.cursor()
status_2 = cur.execute(
(
"SELECT tool_name, tool_status, COUNT(sha256) "
"FROM exec "
f"WHERE tool_name IN {tools_list_format} "
"GROUP BY tool_name, tool_status;"
),
tools,
).fetchall()
apk_2 = set(cur.execute("SELECT sha256 FROM exec;").fetchall())
occurences = {}
for tool, stat, occurence in status_1:
occurences[(tool, stat, "db1")] = occurence
for tool, stat, occurence in status_2:
occurences[(tool, stat, "db2")] = occurence
# tools.sort(
# key=lambda t: occurences.get((t, "FINISHED", "db1"), 0)
# + occurences.get((t, "FINISHED", "db2"), 0),
# reverse=True,
# )
tools.sort()
values = {
"Finished": np.zeros(len(tools) * 2),
"Time Out": np.zeros(len(tools) * 2),
"Other": np.zeros(len(tools) * 2),
"Failed": np.zeros(len(tools) * 2),
}
nb_apk_tot = len(apk_1 | apk_2)
if same_apks:
nb_apk_1 = nb_apk_tot
nb_apk_2 = nb_apk_tot
missing_1 = len(apk_2 - apk_1)
missing_2 = len(apk_1 - apk_2)
values["Missing"] = np.zeros(len(tools) * 2)
for i in range(len(tools)):
values["Missing"][2 * i] = (missing_1 * 100) / nb_apk_1
values["Missing"][2 * i + 1] = (missing_2 * 100) / nb_apk_2
else:
nb_apk_1 = len(apk_1)
nb_apk_2 = len(apk_2)
colors = {
"Finished": "#009E73",
"Time Out": "#56B4E9",
"Other": "#555555", # TODO: find beter color
"Failed": "#D55E00",
"Missing": "#555555",
}
hatch = {
"Finished": "/",
"Time Out": "x",
"Other": ".",
"Failed": "\\",
"Missing": "-",
}
for i, tool in enumerate(tools):
i_1 = 2 * i
i_2 = 2 * i + 1
values["Finished"][i_1] = occurences.get((tool, "FINISHED", "db1"), 0)
values["Finished"][i_2] = occurences.get((tool, "FINISHED", "db2"), 0)
values["Time Out"][i_1] = occurences.get((tool, "TIMEOUT", "db1"), 0)
values["Time Out"][i_2] = occurences.get((tool, "TIMEOUT", "db2"), 0)
values["Failed"][i_1] = occurences.get((tool, "FAILED", "db1"), 0)
values["Failed"][i_2] = occurences.get((tool, "FAILED", "db2"), 0)
values["Other"][i_1] = (
len(apk_1)
- values["Finished"][i_1]
- values["Time Out"][i_1]
- values["Failed"][i_1]
)
values["Other"][i_2] = (
len(apk_2)
- values["Finished"][i_2]
- values["Time Out"][i_2]
- values["Failed"][i_2]
)
values["Finished"][i_1] = (
0 if nb_apk_1 == 0 else (100 * values["Finished"][i_1]) / nb_apk_1
)
values["Finished"][i_2] = (
0 if nb_apk_2 == 0 else (100 * values["Finished"][i_2]) / nb_apk_2
)
values["Time Out"][i_1] = (
0 if nb_apk_1 == 0 else (100 * values["Time Out"][i_1]) / nb_apk_1
)
values["Time Out"][i_2] = (
0 if nb_apk_2 == 0 else (100 * values["Time Out"][i_2]) / nb_apk_2
)
values["Failed"][i_1] = (
0 if nb_apk_1 == 0 else (100 * values["Failed"][i_1]) / nb_apk_1
)
values["Failed"][i_2] = (
0 if nb_apk_2 == 0 else (100 * values["Failed"][i_2]) / nb_apk_2
)
values["Other"][i_1] = (
0 if nb_apk_1 == 0 else (100 * values["Other"][i_1]) / nb_apk_1
)
values["Other"][i_2] = (
0 if nb_apk_2 == 0 else (100 * values["Other"][i_2]) / nb_apk_2
)
bottom = np.zeros(len(tools) * 2)
x_axis = np.zeros(len(tools) * 2)
x_width = 3
x_0 = x_width / 2
lstep = 1
bstep = 5
for i in range(len(tools)):
x_0 += bstep + x_width
x_axis[2 * i] = x_0
x_0 += lstep + x_width
x_axis[2 * i + 1] = x_0
tick_legend = []
for tool in tools:
tick_legend.append(f"{tool}") # (f"{tool} on goodware")
tick_legend.append("") # (f"{tool} on malware")
plt.figure(figsize=(20, 9), dpi=80)
if same_apks:
stats = ["Finished", "Time Out", "Other", "Failed", "Missing"]
else:
stats = ["Finished", "Time Out", "Other", "Failed"]
for stat in stats:
plt.bar(
x_axis,
values[stat],
label=stat,
color=colors[stat],
hatch=hatch[stat],
bottom=bottom,
width=x_width,
edgecolor="black",
)
bottom += values[stat]
plt.xticks(x_axis, tick_legend, rotation=80)
plt.legend()
plt.ylabel("% of analysed apk")
render(title, interactive, image_path, format="svg")

View file

@ -112,11 +112,7 @@ def radar_chart(
def render( def render(
title: str, title: str, interactive: bool, image_path: Path | None, tight_layout: bool = True
interactive: bool,
image_path: Path | None,
tight_layout: bool = True,
format: str = "pdf",
): ):
"""Render the figure. If `interactive`, display if, if `image_path`, save it.""" """Render the figure. If `interactive`, display if, if `image_path`, save it."""
# plt.title(title) # plt.title(title)
@ -125,7 +121,7 @@ def render(
if image_path is not None: if image_path is not None:
if not image_path.exists(): if not image_path.exists():
image_path.mkdir(parents=True, exist_ok=True) image_path.mkdir(parents=True, exist_ok=True)
plt.savefig(image_path / (slugify(title) + "." + format), format=format) plt.savefig(image_path / (slugify(title) + ".pdf"), format="pdf")
if interactive: if interactive:
plt.show() plt.show()
plt.close() plt.close()

View file

@ -203,17 +203,9 @@ if __name__ == "__main__":
parser.add_argument( parser.add_argument(
"--task", help="[debug] Name of the task to perform", type=str, action="store" "--task", help="[debug] Name of the task to perform", type=str, action="store"
) )
app_group = parser.add_mutually_exclusive_group() parser.add_argument(
app_group.add_argument(
"--sha", help="[debug] sha to make the --task on", type=str, action="store" "--sha", help="[debug] sha to make the --task on", type=str, action="store"
) )
app_group.add_argument(
"--apk-path",
help="[debug] apk to make the --task on",
type=Path,
action="store",
)
group = parser.add_mutually_exclusive_group(required=True) group = parser.add_mutually_exclusive_group(required=True)
group.add_argument("--docker", action="store_true") group.add_argument("--docker", action="store_true")
group.add_argument("--singularity", action="store_true") group.add_argument("--singularity", action="store_true")
@ -311,26 +303,20 @@ if __name__ == "__main__":
raise Exception("Debug mode must be used with BOTH --task and --sha") raise Exception("Debug mode must be used with BOTH --task and --sha")
task = args.task task = args.task
# sha = str(args.sha).upper() # TMP patch # sha = str(args.sha).upper() # TMP patch
# sha = str(args.sha) sha = str(args.sha)
if args.sha is not None and len(args.sha) != 64: if len(sha) != 64:
# raise Exception("invalid --sha value") # raise Exception("invalid --sha value")
print("invalid --sha value, exception disabled for tests") print("invalid --sha value, exception disabled for tests")
if args.sha is not None: apk_blob = get_apk_from_androzoo(
apk_blob = get_apk_from_androzoo( sha256=sha,
sha256=args.sha, apikey=androzoo_apikey,
apikey=androzoo_apikey, base_url=androzoo_base_url,
base_url=androzoo_base_url, reraise=False,
reraise=False, local_cache=androzoo_local_cache,
local_cache=androzoo_local_cache, )
) if apk_blob is None:
sha = args.sha print(f"Unable to obtain apk for sha={sha}")
if apk_blob is None:
print(f"Unable to obtain apk for sha={sha}")
else: else:
with args.apk_path.open("rb") as fp:
apk_blob = fp.read()
sha = args.apk_path.name.removesuffix(".apk") # no a sha, but good enough
if apk_blob is not None:
# do_one_job(sha256: str, tool_name: str, base_dir: str, apk_blob, container_mode, container_image, keep_tmp_dir=False): # do_one_job(sha256: str, tool_name: str, base_dir: str, apk_blob, container_mode, container_image, keep_tmp_dir=False):
res = do_one_job( res = do_one_job(
sha256=sha, sha256=sha,