Compare commits
8 commits
Author | SHA1 | Date | |
---|---|---|---|
8eaceb6ba7 | |||
|
e56f181da2 | ||
|
0d8ad49c94 | ||
|
1309d7ea24 | ||
582f440560 | |||
704d1fb26f | |||
630d232628 | |||
43a35b726b |
7 changed files with 294 additions and 28 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1 +1,2 @@
|
|||
data
|
||||
*.db
|
||||
|
|
16
README.md
16
README.md
|
@ -1,6 +1,6 @@
|
|||
# RASTA
|
||||
|
||||
Rasta stands for Reproducibility of Android Static Tools and Analysis.
|
||||
Rasta stands for Reusability of Android Static Tools and Analysis.
|
||||
|
||||
This repository contains the source code for reproducing the experiments of the paper "Evaluating the Re-Usability of Android Static Analysis Tools" published in the conference ICSR 2024.
|
||||
|
||||
|
@ -32,7 +32,15 @@ To run the Rasta experiment, some tools are required:
|
|||
- gzip
|
||||
- sqlite3
|
||||
|
||||
One way to install those tools is to use Nixpkgs (`nix-shell -p docker singularity python310 python310Packages.numpy python310Packages.matplotlib sqlite3`), another way is to follow the instructions of the different tools (<https://docs.sylabs.io/guides/3.11/user-guide/>, <https://docs.docker.com/>).
|
||||
One way to install those tools is to use Nixpkgs (`nix-shell -p docker singularity python310 python310Packages.numpy python310Packages.matplotlib sqlite`), another way is to follow the instructions of the different tools (<https://docs.sylabs.io/guides/3.11/user-guide/>, <https://docs.docker.com/>).
|
||||
|
||||
> [!WARNING]
|
||||
> (One year later, 2025):
|
||||
>
|
||||
> Since Ubuntu 23.10, apparmor prevents the creation of unprivileged namespace by default. This means singularity won't work without a specific apparmor profile (which is not installed by nix-shell).
|
||||
>
|
||||
> Fortunately, Ubuntu now has a package for singularity: `singularity-container`. Using your distribution package should be the preferred method for installing the tools.
|
||||
|
||||
|
||||
They are also some python dependencies that need to be installed in a virtual env:
|
||||
|
||||
|
@ -252,12 +260,12 @@ cd rasta_exp
|
|||
cd ..
|
||||
```
|
||||
|
||||
The obtained images are named `rasta-<tool-name>`, and the environment variables associated are in `rasta_exp/envs/<tool-name>_docker.env`. The build_docker_images.sh can be edited to chose only one tool to be built.
|
||||
The obtained images are named `histausse/rasta-<tool-name>:icsr2024`, and the environment variables associated are in `rasta_exp/envs/<tool-name>_docker.env`. The build_docker_images.sh can be edited to chose only one tool to be built.
|
||||
|
||||
After building a tool, a container can be entered interactively by doing:
|
||||
|
||||
```
|
||||
docker run --rm --env-file=rasta_exp/envs/mallodroid_docker.env -v /tmp/mnt:/mnt -it rasta-mallodroid bash
|
||||
docker run --rm --env-file=rasta_exp/envs/mallodroid_docker.env -v /tmp/mnt:/mnt -it histausse/rasta-mallodroid:icsr2024 bash
|
||||
```
|
||||
|
||||
Here, `/tmp/mnt` is mounted to `/mnt` in the container. Put the `apk` in `/tmp/mnt` to analyze it.
|
||||
|
|
|
@ -1,15 +1,15 @@
|
|||
[tool.poetry]
|
||||
name = "rasta_triturage"
|
||||
version = "0.2.0"
|
||||
version = "0.2.1"
|
||||
description = "'Triturage de donnée' for the Rasta Project"
|
||||
authors = ["anon"]
|
||||
authors = ["Jean-Marie Mineau <rasta-github@jean-marie.mineau.eu>"]
|
||||
readme = "README.md"
|
||||
#homepage = ""
|
||||
#repository = ""
|
||||
license = "Proprietary"
|
||||
homepage = "https://github.com/histausse/rasta/tree/main"
|
||||
repository = "https://github.com/histausse/rasta/tree/main"
|
||||
license = "GPLv3"
|
||||
|
||||
[tool.poetry.urls]
|
||||
#"Bug Tracker" = ""
|
||||
"Bug Tracker" = "https://github.com/histausse/rasta/issues"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.10"
|
||||
|
@ -50,6 +50,7 @@ rasta-decorelate-factor = "rasta_triturage.cli:plot_decorelated_factor"
|
|||
rasta-count-error-stacks = "rasta_triturage.cli:count_error_stacks"
|
||||
rasta-gen-dataset = "rasta_triturage.cli:generate_dataset"
|
||||
rasta-size-malware = "rasta_triturage.cli:size_malware"
|
||||
rasta-compare-status = "rasta_triturage.cli:compare_status_by_tool"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
pytest = "*"
|
||||
|
|
|
@ -17,6 +17,7 @@ from .status import (
|
|||
plot_status_by_tool_and_malware,
|
||||
plot_all_status_by_generic_x,
|
||||
plot_status_by_generic_x,
|
||||
plot_compare_status,
|
||||
)
|
||||
from .apk import (
|
||||
plot_apk_info_by_generic_x,
|
||||
|
@ -577,9 +578,9 @@ def ic3():
|
|||
ic3_venn(args.data, interactive=args.display, image_path=args.figures_file)
|
||||
ic3_errors(
|
||||
args.data,
|
||||
file=args.figures_file / "ic3_err.csv"
|
||||
if args.figures_file is not None
|
||||
else None,
|
||||
file=(
|
||||
args.figures_file / "ic3_err.csv" if args.figures_file is not None else None
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
|
@ -1127,3 +1128,64 @@ def size_malware():
|
|||
print(
|
||||
f"{size}, {size_apk[(size, True)]:.2f}, {size_apk[(size, False)]:.2f}, {finishing_rate_goodware:.2f}, {finishing_rate_malware:.2f}, {size_apk[(size, True)] / size_apk[(size, False)]:.2f}, {finishing_rate_goodware/finishing_rate_malware:.2f}"
|
||||
)
|
||||
|
||||
|
||||
def compare_status_by_tool():
|
||||
"""Compare the repartition of status by tool from two result dbs"""
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog=sys.argv[0],
|
||||
description="Compare the repartition of status by tool from two result dbs",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-d1",
|
||||
"--data1",
|
||||
required=True,
|
||||
type=Path,
|
||||
help="The sqlite3 database that contain the execution report of the first experiment",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-d2",
|
||||
"--data2",
|
||||
required=True,
|
||||
type=Path,
|
||||
help="The sqlite3 database that contain the execution report of the second experiment",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-f",
|
||||
"--figures-file",
|
||||
type=Path,
|
||||
help="The folder in which the figures must be stored",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--display",
|
||||
action="store_true",
|
||||
help="If the figures must be displayed",
|
||||
)
|
||||
parser.add_argument(
|
||||
"-t",
|
||||
"--tools",
|
||||
nargs="+",
|
||||
default=None,
|
||||
help="The tools to analyse",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--title",
|
||||
default="Comparision of Exit Status",
|
||||
help="The title of the graph",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--same-apks",
|
||||
action="store_true",
|
||||
help="If the apks are the same in the two databases. If so, the missings applications will be shown.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
plot_compare_status(
|
||||
args.data1,
|
||||
args.data2,
|
||||
interactive=args.display,
|
||||
image_path=args.figures_file,
|
||||
tools=args.tools,
|
||||
same_apks=args.same_apks,
|
||||
)
|
||||
|
|
|
@ -444,3 +444,179 @@ def plot_all_status_by_generic_x(
|
|||
interactive=args.display,
|
||||
image_path=args.figures_file,
|
||||
)
|
||||
|
||||
|
||||
def plot_compare_status(
|
||||
db1: Path,
|
||||
db2: Path,
|
||||
interactive: bool = True,
|
||||
image_path: Path | None = None,
|
||||
tools: list[str] | None = None,
|
||||
title: str = "Comparision of Exit Status",
|
||||
same_apks: bool = False,
|
||||
):
|
||||
"""Plot and compare repartition of status by tools from two experiment.
|
||||
|
||||
db1 and db2 are the path to two result sqlite databases to compare
|
||||
image_path is where to save the result
|
||||
tools is the list of tools to compare, default will compare all tools found.
|
||||
title is the title of the figure
|
||||
same_apks indicate if the two databases uses the same apks. If so, the missing apks will be displayed in the plot.
|
||||
"""
|
||||
if tools is None:
|
||||
tools = list(set(get_list_tools(db1)) | set(get_list_tools(db2)))
|
||||
tools_list_format = f"({','.join(['?' for _ in tools])})"
|
||||
with sqlite3.connect(db1) as con:
|
||||
cur = con.cursor()
|
||||
status_1 = cur.execute(
|
||||
(
|
||||
"SELECT tool_name, tool_status, COUNT(sha256) "
|
||||
"FROM exec "
|
||||
f"WHERE tool_name IN {tools_list_format} "
|
||||
"GROUP BY tool_name, tool_status;"
|
||||
),
|
||||
tools,
|
||||
).fetchall()
|
||||
apk_1 = set(cur.execute("SELECT sha256 FROM exec;").fetchall())
|
||||
with sqlite3.connect(db2) as con:
|
||||
cur = con.cursor()
|
||||
status_2 = cur.execute(
|
||||
(
|
||||
"SELECT tool_name, tool_status, COUNT(sha256) "
|
||||
"FROM exec "
|
||||
f"WHERE tool_name IN {tools_list_format} "
|
||||
"GROUP BY tool_name, tool_status;"
|
||||
),
|
||||
tools,
|
||||
).fetchall()
|
||||
apk_2 = set(cur.execute("SELECT sha256 FROM exec;").fetchall())
|
||||
|
||||
occurences = {}
|
||||
for tool, stat, occurence in status_1:
|
||||
occurences[(tool, stat, "db1")] = occurence
|
||||
for tool, stat, occurence in status_2:
|
||||
occurences[(tool, stat, "db2")] = occurence
|
||||
# tools.sort(
|
||||
# key=lambda t: occurences.get((t, "FINISHED", "db1"), 0)
|
||||
# + occurences.get((t, "FINISHED", "db2"), 0),
|
||||
# reverse=True,
|
||||
# )
|
||||
tools.sort()
|
||||
|
||||
values = {
|
||||
"Finished": np.zeros(len(tools) * 2),
|
||||
"Time Out": np.zeros(len(tools) * 2),
|
||||
"Other": np.zeros(len(tools) * 2),
|
||||
"Failed": np.zeros(len(tools) * 2),
|
||||
}
|
||||
nb_apk_tot = len(apk_1 | apk_2)
|
||||
if same_apks:
|
||||
nb_apk_1 = nb_apk_tot
|
||||
nb_apk_2 = nb_apk_tot
|
||||
missing_1 = len(apk_2 - apk_1)
|
||||
missing_2 = len(apk_1 - apk_2)
|
||||
values["Missing"] = np.zeros(len(tools) * 2)
|
||||
for i in range(len(tools)):
|
||||
values["Missing"][2 * i] = (missing_1 * 100) / nb_apk_1
|
||||
values["Missing"][2 * i + 1] = (missing_2 * 100) / nb_apk_2
|
||||
else:
|
||||
nb_apk_1 = len(apk_1)
|
||||
nb_apk_2 = len(apk_2)
|
||||
colors = {
|
||||
"Finished": "#009E73",
|
||||
"Time Out": "#56B4E9",
|
||||
"Other": "#555555", # TODO: find beter color
|
||||
"Failed": "#D55E00",
|
||||
"Missing": "#555555",
|
||||
}
|
||||
hatch = {
|
||||
"Finished": "/",
|
||||
"Time Out": "x",
|
||||
"Other": ".",
|
||||
"Failed": "\\",
|
||||
"Missing": "-",
|
||||
}
|
||||
|
||||
for i, tool in enumerate(tools):
|
||||
i_1 = 2 * i
|
||||
i_2 = 2 * i + 1
|
||||
values["Finished"][i_1] = occurences.get((tool, "FINISHED", "db1"), 0)
|
||||
values["Finished"][i_2] = occurences.get((tool, "FINISHED", "db2"), 0)
|
||||
values["Time Out"][i_1] = occurences.get((tool, "TIMEOUT", "db1"), 0)
|
||||
values["Time Out"][i_2] = occurences.get((tool, "TIMEOUT", "db2"), 0)
|
||||
values["Failed"][i_1] = occurences.get((tool, "FAILED", "db1"), 0)
|
||||
values["Failed"][i_2] = occurences.get((tool, "FAILED", "db2"), 0)
|
||||
values["Other"][i_1] = (
|
||||
len(apk_1)
|
||||
- values["Finished"][i_1]
|
||||
- values["Time Out"][i_1]
|
||||
- values["Failed"][i_1]
|
||||
)
|
||||
values["Other"][i_2] = (
|
||||
len(apk_2)
|
||||
- values["Finished"][i_2]
|
||||
- values["Time Out"][i_2]
|
||||
- values["Failed"][i_2]
|
||||
)
|
||||
values["Finished"][i_1] = (
|
||||
0 if nb_apk_1 == 0 else (100 * values["Finished"][i_1]) / nb_apk_1
|
||||
)
|
||||
values["Finished"][i_2] = (
|
||||
0 if nb_apk_2 == 0 else (100 * values["Finished"][i_2]) / nb_apk_2
|
||||
)
|
||||
values["Time Out"][i_1] = (
|
||||
0 if nb_apk_1 == 0 else (100 * values["Time Out"][i_1]) / nb_apk_1
|
||||
)
|
||||
values["Time Out"][i_2] = (
|
||||
0 if nb_apk_2 == 0 else (100 * values["Time Out"][i_2]) / nb_apk_2
|
||||
)
|
||||
values["Failed"][i_1] = (
|
||||
0 if nb_apk_1 == 0 else (100 * values["Failed"][i_1]) / nb_apk_1
|
||||
)
|
||||
values["Failed"][i_2] = (
|
||||
0 if nb_apk_2 == 0 else (100 * values["Failed"][i_2]) / nb_apk_2
|
||||
)
|
||||
values["Other"][i_1] = (
|
||||
0 if nb_apk_1 == 0 else (100 * values["Other"][i_1]) / nb_apk_1
|
||||
)
|
||||
values["Other"][i_2] = (
|
||||
0 if nb_apk_2 == 0 else (100 * values["Other"][i_2]) / nb_apk_2
|
||||
)
|
||||
bottom = np.zeros(len(tools) * 2)
|
||||
|
||||
x_axis = np.zeros(len(tools) * 2)
|
||||
x_width = 3
|
||||
x_0 = x_width / 2
|
||||
lstep = 1
|
||||
bstep = 5
|
||||
for i in range(len(tools)):
|
||||
x_0 += bstep + x_width
|
||||
x_axis[2 * i] = x_0
|
||||
x_0 += lstep + x_width
|
||||
x_axis[2 * i + 1] = x_0
|
||||
tick_legend = []
|
||||
for tool in tools:
|
||||
tick_legend.append(f"{tool}") # (f"{tool} on goodware")
|
||||
tick_legend.append("") # (f"{tool} on malware")
|
||||
|
||||
plt.figure(figsize=(20, 9), dpi=80)
|
||||
if same_apks:
|
||||
stats = ["Finished", "Time Out", "Other", "Failed", "Missing"]
|
||||
else:
|
||||
stats = ["Finished", "Time Out", "Other", "Failed"]
|
||||
for stat in stats:
|
||||
plt.bar(
|
||||
x_axis,
|
||||
values[stat],
|
||||
label=stat,
|
||||
color=colors[stat],
|
||||
hatch=hatch[stat],
|
||||
bottom=bottom,
|
||||
width=x_width,
|
||||
edgecolor="black",
|
||||
)
|
||||
bottom += values[stat]
|
||||
plt.xticks(x_axis, tick_legend, rotation=80)
|
||||
plt.legend()
|
||||
plt.ylabel("% of analysed apk")
|
||||
render(title, interactive, image_path, format="svg")
|
||||
|
|
|
@ -112,7 +112,11 @@ def radar_chart(
|
|||
|
||||
|
||||
def render(
|
||||
title: str, interactive: bool, image_path: Path | None, tight_layout: bool = True
|
||||
title: str,
|
||||
interactive: bool,
|
||||
image_path: Path | None,
|
||||
tight_layout: bool = True,
|
||||
format: str = "pdf",
|
||||
):
|
||||
"""Render the figure. If `interactive`, display if, if `image_path`, save it."""
|
||||
# plt.title(title)
|
||||
|
@ -121,7 +125,7 @@ def render(
|
|||
if image_path is not None:
|
||||
if not image_path.exists():
|
||||
image_path.mkdir(parents=True, exist_ok=True)
|
||||
plt.savefig(image_path / (slugify(title) + ".pdf"), format="pdf")
|
||||
plt.savefig(image_path / (slugify(title) + "." + format), format=format)
|
||||
if interactive:
|
||||
plt.show()
|
||||
plt.close()
|
||||
|
|
|
@ -203,9 +203,17 @@ if __name__ == "__main__":
|
|||
parser.add_argument(
|
||||
"--task", help="[debug] Name of the task to perform", type=str, action="store"
|
||||
)
|
||||
parser.add_argument(
|
||||
app_group = parser.add_mutually_exclusive_group()
|
||||
app_group.add_argument(
|
||||
"--sha", help="[debug] sha to make the --task on", type=str, action="store"
|
||||
)
|
||||
app_group.add_argument(
|
||||
"--apk-path",
|
||||
help="[debug] apk to make the --task on",
|
||||
type=Path,
|
||||
action="store",
|
||||
)
|
||||
|
||||
group = parser.add_mutually_exclusive_group(required=True)
|
||||
group.add_argument("--docker", action="store_true")
|
||||
group.add_argument("--singularity", action="store_true")
|
||||
|
@ -224,7 +232,7 @@ if __name__ == "__main__":
|
|||
result_dir = args.result_dir
|
||||
if result_dir is None:
|
||||
result_dir = base_dir
|
||||
|
||||
|
||||
# base_dir = os.path.join(base_dir, str(uuid.uuid4()))
|
||||
if not os.path.isdir(base_dir):
|
||||
os.makedirs(base_dir)
|
||||
|
@ -303,20 +311,26 @@ if __name__ == "__main__":
|
|||
raise Exception("Debug mode must be used with BOTH --task and --sha")
|
||||
task = args.task
|
||||
# sha = str(args.sha).upper() # TMP patch
|
||||
sha = str(args.sha)
|
||||
if len(sha) != 64:
|
||||
# sha = str(args.sha)
|
||||
if args.sha is not None and len(args.sha) != 64:
|
||||
# raise Exception("invalid --sha value")
|
||||
print("invalid --sha value, exception disabled for tests")
|
||||
apk_blob = get_apk_from_androzoo(
|
||||
sha256=sha,
|
||||
apikey=androzoo_apikey,
|
||||
base_url=androzoo_base_url,
|
||||
reraise=False,
|
||||
local_cache=androzoo_local_cache,
|
||||
)
|
||||
if apk_blob is None:
|
||||
print(f"Unable to obtain apk for sha={sha}")
|
||||
if args.sha is not None:
|
||||
apk_blob = get_apk_from_androzoo(
|
||||
sha256=args.sha,
|
||||
apikey=androzoo_apikey,
|
||||
base_url=androzoo_base_url,
|
||||
reraise=False,
|
||||
local_cache=androzoo_local_cache,
|
||||
)
|
||||
sha = args.sha
|
||||
if apk_blob is None:
|
||||
print(f"Unable to obtain apk for sha={sha}")
|
||||
else:
|
||||
with args.apk_path.open("rb") as fp:
|
||||
apk_blob = fp.read()
|
||||
sha = args.apk_path.name.removesuffix(".apk") # no a sha, but good enough
|
||||
if apk_blob is not None:
|
||||
# do_one_job(sha256: str, tool_name: str, base_dir: str, apk_blob, container_mode, container_image, keep_tmp_dir=False):
|
||||
res = do_one_job(
|
||||
sha256=sha,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue