first commit

2023-11-15 15:59:13 +01:00 · 2023-11-15 15:59:13 +01:00 · cd1e91bb99
commit cd1e91bb99
287 changed files with 86425 additions and 0 deletions
--- a/rasta_data_manipulation/.gitattributes
+++ b/rasta_data_manipulation/.gitattributes
@ -0,0 +1 @@
+data.db filter=lfs diff=lfs merge=lfs -text
--- a/rasta_data_manipulation/.gitignore
+++ b/rasta_data_manipulation/.gitignore
@ -0,0 +1,40 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# virtualenv
+.venv/
+venv/
+
+# mypy
+.mypy_cache/
+
+*.db
+year_and_sdk.csv.gz
+latest_with-added-date.csv.gz
+figs_drebin/
+figs_rasta/
+figs
--- a/rasta_data_manipulation/README.md
+++ b/rasta_data_manipulation/README.md
@ -0,0 +1,35 @@
+# Rasta Triturage
+
+Triturage de donnée for the Rasta Project
+
+## Usage
+
+This project is managed by poetry (trying new things :-) ). To use it without poetry, you can install it as a python package in a venv:
+
+```
+git clone git@gitlab.inria.fr:jmineau/rasta_triturage.git
+cd rasta_triturage
+python -m venv venv
+source venv/bin/activate
+pip install . -e
+```
+
+The reports and information about the apk are in the prepopulated database `data.db` (TODO: add script to populate the DB)
+
+To generate all the figures in the file `figures`:
+
+```
+rasta-triturage -d data.db -f figures
+```
+
+To display all the figures:
+
+```
+rasta-triturage -d data.db --display
+```
+
+The option `-t` allow to specify the tools to compare.
+
+## Author
+
+- annon
--- a/rasta_data_manipulation/TODO.md
+++ b/rasta_data_manipulation/TODO.md
@ -0,0 +1,3 @@
+- Regretion en nuage de point: mem by ceilling log feels bad
+- IC3: vein diagram
+- time / mem for specific category
--- a/rasta_data_manipulation/extract_result.sh
+++ b/rasta_data_manipulation/extract_result.sh
@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+
+DATA_DIR=$1
+if [[ -z "${DATA_DIR}" ]]; then
+    echo 'MISSING DATA_DIR parameter'
+    echo 'usage:  ./extract_result.sh DATA_DIR'
+    exit 1
+fi
+DATA_DIR="$(readlink -f "$DATA_DIR")"
+
+DB="${DATA_DIR}/results/rasta.db"
+DB_DREBIN="${DATA_DIR}/results/drebin.db"
+FOLDER="figs"
+
+rasta-status -d ${DB} -f ${FOLDER} --title "Exit status for the Rasta dataset"
+rasta-status -d ${DB_DREBIN} -f ${FOLDER} --title "Exit status for the Drebin dataset"
+rasta-success-year -d ${DB} -f "${FOLDER}/by_year"
+
+rasta-common-errors -d ${DB} -f "${FOLDER}/common_err" -s FAILED
+rasta-avg-nb-errors -d ${DB} -f "${FOLDER}/common_err"
+rasta-error-repartition -d ${DB} -f "${FOLDER}"
+rasta-avg-ressource -d ${DB} -f "${FOLDER}"
+
+rasta-decorelate-factor -d ${DB} -f "${FOLDER}/decorelation" --decile 8
+rasta-decorelate-factor -d ${DB} -f "${FOLDER}/decorelation" --decile 6
--- a/rasta_data_manipulation/find_apks_by_tool_error.sh
+++ b/rasta_data_manipulation/find_apks_by_tool_error.sh
@ -0,0 +1,25 @@
+#!/usr/bin/env sh
+
+PWD=$(pwd)
+TOOL=${1}
+ERROR=${2}
+DATABASE=${3:-'rasta.db'}
+REPORT_FOLDER=${4:-"$PWD/../data/reports/rasta"}
+
+USAGE=$(cat <<- EOM
+usage: ${0} <tool> <error> [<database> [<repport folder>]]
+EOM
+)
+
+if [[ -z "$TOOL" ]] || [[ -z "$ERROR" ]] || [[ -z "$DATABASE" ]] || [[ -z "$REPORT_FOLDER" ]] ; then
+    echo ${USAGE}
+    exit 1
+fi
+
+TMP_FILE=$(mktemp)
+sqlite3 ${DATABASE} "SELECT DISTINCT error.sha256 || '_-_' || error.tool_name FROM error INNER JOIN exec ON error.tool_name = exec.tool_name AND error.sha256 = exec.sha256 WHERE exec.tool_status = 'FAILED' AND error.tool_name = '$TOOL' and error = '$ERROR';" > ${TMP_FILE}
+
+find ${REPPORT_FOLDER} | grep -F -f ${TMP_FILE}
+rm ${TMP_FILE}
+
+
--- a/rasta_data_manipulation/make_db.sh
+++ b/rasta_data_manipulation/make_db.sh
@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+
+DATA_DIR=$1
+if [[ -z "${DATA_DIR}" ]]; then
+    echo 'MISSING DATA_DIR parameter'
+    echo 'usage:  ./make_db.sh DATA_DIR'
+    exit 1
+fi
+DATA_DIR="$(readlink -f "$DATA_DIR")"
+
+
+all_rasta_apk=$(mktemp)
+cat ${DATA_DIR}/dataset/set* > ${all_rasta_apk}
+rasta-populate-db-apk -a ${all_rasta_apk} \
+    -d "${DATA_DIR}/results/rasta.db" \
+    --year-and-sdk "${DATA_DIR}/androzoo/year_and_sdk.csv.gz" \
+    --latest-with-added-date "${DATA_DIR}/androzoo/latest_with-added-date.csv.gz" \
+    --fix-dex-file
+rasta-populate-db-tool -d "${DATA_DIR}/results/rasta.db"
+report_folders="status_set0 status_set1 status_set2 status_set3 status_set4 status_set5 status_set6 status_set7 status_set8 status_set9"
+for folder in ${report_folders}; do
+    rasta-populate-db-report -d "${DATA_DIR}/results/rasta.db" -r "${DATA_DIR}/results/reports/rasta/${folder}"
+done
+rasta-populate-db-report -d "${DATA_DIR}/results/rasta.db" --estimate-cause
+
+rasta-populate-db-apk -a "${DATA_DIR}/dataset/drebin" \
+    -d "${DATA_DIR}/results/drebin.db" \
+    --year-and-sdk "${DATA_DIR}/androzoo/year_and_sdk.csv.gz" \
+    --latest-with-added-date "${DATA_DIR}/androzoo/latest_with-added-date.csv.gz" \
+    --fix-dex-file
+rasta-populate-db-tool -d "${DATA_DIR}/results/drebin.db"
+rasta-populate-db-report -d "${DATA_DIR}/results/drebin.db" -r "${DATA_DIR}/results/reports/drebin/status_drebin"
+rasta-populate-db-report -d "${DATA_DIR}/results/drebin.db" --estimate-cause
+
+rm ${all_rasta_apk}
--- a/rasta_data_manipulation/means_size.sql
+++ b/rasta_data_manipulation/means_size.sql
@ -0,0 +1,6 @@
+SELECT AVG(dex_size) FROM apk;
+SELECT AVG(dex_size) FROM apk WHERE vt_detection = 0;
+SELECT AVG(dex_size) FROM apk WHERE vt_detection != 0;
+SELECT AVG(apk_size) FROM apk;
+SELECT AVG(apk_size) FROM apk WHERE vt_detection = 0;
+SELECT AVG(apk_size) FROM apk WHERE vt_detection != 0;
--- a/rasta_data_manipulation/means_success_by_year.sql
+++ b/rasta_data_manipulation/means_success_by_year.sql
@ -0,0 +1,6 @@
+ SELECT apk1.first_seen_year, (COUNT(*) * 100) / (SELECT 20 * COUNT(*)
+     FROM apk AS apk2 WHERE apk2.first_seen_year = apk1.first_seen_year
+ )
+ FROM exec JOIN apk AS apk1 ON exec.sha256 = apk1.sha256
+ WHERE exec.tool_status = 'FINISHED' OR exec.tool_status = 'UNKNOWN'
+ GROUP BY apk1.first_seen_year ORDER BY apk1.first_seen_year;
--- a/rasta_data_manipulation/mypy.ini
+++ b/rasta_data_manipulation/mypy.ini
@ -0,0 +1,2 @@
+[mypy]
+python_executable = .venv/bin/python
--- a/rasta_data_manipulation/poetry.lock
+++ b/rasta_data_manipulation/poetry.lock
--- a/rasta_data_manipulation/pyproject.toml
+++ b/rasta_data_manipulation/pyproject.toml
@ -0,0 +1,64 @@
+[tool.poetry]
+name = "rasta_triturage"
+version = "0.2.0"
+description = "'Triturage de donnée' for the Rasta Project"
+authors = ["anon"]
+readme = "README.md"
+#homepage = ""
+#repository = ""
+license = "Proprietary"
+
+[tool.poetry.urls]
+#"Bug Tracker" = ""
+
+[tool.poetry.dependencies]
+python = "^3.10"
+matplotlib = "^3.7.1"
+pyqt5 = "^5.15.9"
+numpy = "^1.24.3"
+
+seaborn = "^0.12.2"
+python-slugify = "^8.0.1"
+androguard = "^3.3.5"
+requests = "^2.31.0"
+matplotlib-venn = "^0.11.9"
+python-dateutil = "^2.8.2"
+
+[tool.poetry.scripts]
+rasta-triturage = "rasta_triturage.cli:main"
+rasta-status = "rasta_triturage.cli:show_status_by_tool"
+rasta-collect-apk-info = "rasta_triturage.cli:get_apk_info"
+rasta-success-target-sdk = "rasta_triturage.cli:show_success_rate_by_target_sdk"
+rasta-success-min-sdk = "rasta_triturage.cli:show_success_rate_by_min_sdk"
+rasta-success-year = "rasta_triturage.cli:show_success_rate_by_first_seen_year"
+rasta-success-size = "rasta_triturage.cli:show_success_rate_by_dex_size"
+rasta-success-apk-size = "rasta_triturage.cli:show_success_rate_by_size_decile"
+rasta-timeout-target-sdk = "rasta_triturage.cli:show_timeout_rate_by_target_sdk"
+rasta-timeout-min-sdk = "rasta_triturage.cli:show_timeout_rate_by_min_sdk"
+rasta-timeout-year = "rasta_triturage.cli:show_timeout_rate_by_estimated_year"
+rasta-populate-db-apk = "rasta_triturage.cli:populate_db_apk"
+rasta-populate-db-report = "rasta_triturage.cli:populate_db_exec"
+rasta-populate-db-tool = "rasta_triturage.cli:populate_db_tool"
+rasta-common-errors = "rasta_triturage.cli:show_common_errors"
+rasta-avg-nb-errors = "rasta_triturage.cli:average_nb_errors"
+rasta-error-causes-radar = "rasta_triturage.cli:show_error_cause_radar"
+rasta-error-repartition = "rasta_triturage.cli:show_error_type_repartition"
+rasta-avg-occ-by-exec = "rasta_triturage.cli:show_error_avg_occ_by_exec"
+rasta-ic3-analysis = "rasta_triturage.cli:ic3"
+rasta-avg-ressource = "rasta_triturage.cli:get_avg_ressource_consumption"
+rasta-decorelate-factor = "rasta_triturage.cli:plot_decorelated_factor"
+rasta-count-error-stacks = "rasta_triturage.cli:count_error_stacks"
+rasta-gen-dataset = "rasta_triturage.cli:generate_dataset"
+rasta-size-malware = "rasta_triturage.cli:size_malware"
+
+[tool.poetry.group.dev.dependencies]
+pytest = "*"
+pytest-cov = "*"
+types-requests = "^2.31.0.0"
+
+[tool.pytest.ini_options]
+addopts = "--cov"
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
--- a/rasta_data_manipulation/rasta_triturage/init.py
+++ b/rasta_data_manipulation/rasta_triturage/init.py
@ -0,0 +1,3 @@
+__author__ = "annon"
+__email__ = "annon"
+__version__ = "0.2.0"
--- a/rasta_data_manipulation/rasta_triturage/apk.py
+++ b/rasta_data_manipulation/rasta_triturage/apk.py
@ -0,0 +1,115 @@
+"""
+Collect data about apks.
+"""
+
+import dateutil.parser as dp  # type: ignore
+import datetime
+import numpy as np
+import matplotlib.pyplot as plt  # type: ignore
+
+from typing import Any, IO, Callable
+from pathlib import Path
+
+from .utils import render
+
+
+def plot_apk_info_by_generic_x(
+    data: list[Any],
+    x: str,
+    title: str,
+    extract_propertie: Callable,
+    y_label: str,
+    x_label: str | None = None,
+    reductions: dict[str, Callable] | None = None,
+    xscale: str = "linear",
+    interactive: bool = True,
+    image_path: Path | None = None,
+):
+    """`extract_propertie` is a founction that take a list of element and return
+    a value representing the value of the list, like a median or a mean.
+    """
+    raise NotImplementedError("TODO: update function to use sqlite3")
+
+
+#    groupped = group_by(x, data, reductions=reductions)
+#    properties = {k: extract_propertie(v) for k, v in groupped.items()}
+#    if x_label is None:
+#        x_label = x
+#    x_values = list(set(filter(lambda x: x is not None, properties.keys())))
+#    x_values.sort()
+#    y_values = [properties[x] for x in x_values]
+#
+#    plt.figure(figsize=(16, 9), dpi=80)
+#    plt.plot(x_values, y_values)
+#    plt.xscale(xscale)
+#    # plt.ylim([-5, 105])
+#    # plt.legend()
+#    plt.xlabel(x_label)
+#    plt.ylabel(y_label)
+#    render(title, interactive, image_path)
+#
+
+
+def plot_apk_size(
+    apk_data: list[Any],
+    interactive: bool = True,
+    image_path: Path | None = None,
+):
+    sizes = np.array([e["total_dex_size"] for e in apk_data]) / 1024 / 1024
+    sizes.sort()
+    plt.figure(figsize=(16, 9), dpi=80)
+    plt.bar(np.arange(len(sizes)), sizes)
+    plt.ylabel("Bytecode size (MiB)")
+    plt.tick_params(
+        axis="x",
+        which="both",
+        bottom=False,
+        top=False,
+        labelbottom=False,
+    )
+    for s in range(7, 13):
+        plt.axhline(y=(4**s) / 1024 / 1024, color="r", linestyle=":")
+    render("Bytecode size of the apks", interactive, image_path)
+
+
+def plot_apk_size_hl_subset(
+    apk_data: list[Any],
+    subset_sha: list[str],
+    title: str,
+    interactive: bool = True,
+    image_path: Path | None = None,
+):
+    apk_data.sort(key=lambda x: x["total_dex_size"])
+    sizes = (
+        np.array(
+            [
+                e["total_dex_size"] if e["sha256"] not in subset_sha else 0
+                for e in apk_data
+            ]
+        )
+        / 1024
+        / 1024
+    )
+    sizes_hl = (
+        np.array(
+            [e["total_dex_size"] if e["sha256"] in subset_sha else 0 for e in apk_data]
+        )
+        / 1024
+        / 1024
+    )
+    plt.figure(figsize=(16, 9), dpi=80)
+    plt.bar(np.arange(len(sizes)), sizes, edgecolor="black")
+    plt.bar(
+        np.arange(len(sizes)), sizes_hl, color="#D55E00", hatch="x", edgecolor="black"
+    )
+    plt.ylabel("Bytecode size (MiB)")
+    plt.tick_params(
+        axis="x",
+        which="both",
+        bottom=False,
+        top=False,
+        labelbottom=False,
+    )
+    for s in range(7, 13):
+        plt.axhline(y=(4**s) / 1024 / 1024, color="r", linestyle=":")
+    render(title, interactive, image_path)
--- a/rasta_data_manipulation/rasta_triturage/cli.py
+++ b/rasta_data_manipulation/rasta_triturage/cli.py
--- a/rasta_data_manipulation/rasta_triturage/data_set.py
+++ b/rasta_data_manipulation/rasta_triturage/data_set.py
--- a/rasta_data_manipulation/rasta_triturage/ic3.py
+++ b/rasta_data_manipulation/rasta_triturage/ic3.py
@ -0,0 +1,199 @@
+import sqlite3
+import csv
+import sys
+from pathlib import Path
+from typing import Optional, Any
+from matplotlib_venn import venn2  # type: ignore
+from .utils import render
+
+ERROR_CARACT = (
+    "error_type",
+    "error",
+    "msg",
+    "file",
+    "function",
+    "level",
+    "origin",
+    "raised_info",
+    "called_info",
+)
+ERROR_MSG = " || '|' || ".join(map(lambda s: f"COALESCE({s}, '')", ERROR_CARACT))
+
+
+def ic3_venn(db: Path, interactive: bool = True, image_path: Path | None = None):
+    values = {
+        ("FAILED", "NOT_FAILED"): 0,
+        ("FAILED", "FAILED"): 0,
+        ("NOT_FAILED", "FAILED"): 0,
+    }
+    with sqlite3.connect(db) as con:
+        cur = con.cursor()
+        for ic3_s, ic3_fork_s, n in cur.execute(
+            "SELECT ex1.tool_status, ex2.tool_status, COUNT(*) "
+            "FROM exec AS ex1 OUTER LEFT JOIN exec AS ex2 ON ex1.sha256 = ex2.sha256 "
+            "WHERE ex1.tool_name = 'ic3' AND ex2.tool_name = 'ic3_fork' "
+            "GROUP BY ex1.tool_status, ex2.tool_status"
+        ):
+            if ic3_s == "FAILED" and ic3_fork_s == "FAILED":
+                values[("FAILED", "FAILED")] += n
+            elif ic3_s == "FAILED":
+                values[("FAILED", "NOT_FAILED")] += n
+            elif ic3_fork_s == "FAILED":
+                values[("NOT_FAILED", "FAILED")] += n
+    venn2(
+        subsets=(
+            values[("FAILED", "NOT_FAILED")],
+            values[("NOT_FAILED", "FAILED")],
+            values[("FAILED", "FAILED")],
+        ),
+        set_labels=("IC3 failed", "IC3 fork failed"),
+    )
+    render(
+        "Number of application that IC3 \nand its fork failed to analyse",
+        interactive,
+        image_path,
+    )
+
+
+def ic3_errors(db: Path, file: Path | None = None):
+    errors = []
+    with sqlite3.connect(db) as con:
+        cur = con.cursor()
+        for err in cur.execute(
+            "SELECT ex1.tool_status = 'FAILED', ex2.tool_status = 'FAILED', "
+            "    error.tool_name, error.error, COUNT(DISTINCT error.sha256) AS cnt, "
+            f"    {ERROR_MSG} "
+            "FROM exec AS ex1 "
+            "    OUTER LEFT JOIN exec AS ex2 ON ex1.sha256 = ex2.sha256 "
+            "    INNER JOIN error ON ex1.sha256 = error.sha256 AND error.tool_name = 'ic3_fork' "
+            "WHERE ex1.tool_name = 'ic3' AND ex2.tool_name = 'ic3_fork' AND "
+            "    ex1.tool_status = 'FAILED' AND ex2.tool_status != 'FAILED' "
+            f"GROUP BY ex1.tool_status = 'FAILED', ex2.tool_status != 'FAILED', error.tool_name, error.error, {ERROR_MSG} "
+            "ORDER BY cnt DESC "
+            "LIMIT 10;"
+        ):
+            errors.append(err)
+        for err in cur.execute(
+            "SELECT ex1.tool_status = 'FAILED', ex2.tool_status = 'FAILED', "
+            "    error.tool_name, error.error, COUNT(DISTINCT error.sha256) AS cnt, "
+            f"    {ERROR_MSG} "
+            "FROM exec AS ex1 "
+            "    OUTER LEFT JOIN exec AS ex2 ON ex1.sha256 = ex2.sha256 "
+            "    INNER JOIN error ON ex1.sha256 = error.sha256 AND error.tool_name = 'ic3_fork' "
+            "WHERE ex1.tool_name = 'ic3' AND ex2.tool_name = 'ic3_fork' AND "
+            "    ex1.tool_status != 'FAILED' AND ex2.tool_status = 'FAILED' "
+            f"GROUP BY ex1.tool_status != 'FAILED', ex2.tool_status = 'FAILED', error.tool_name, error.error, {ERROR_MSG}"
+            "ORDER BY cnt DESC "
+            "LIMIT 10;"
+        ):
+            errors.append(err)
+        for err in cur.execute(
+            "SELECT ex1.tool_status = 'FAILED', ex2.tool_status = 'FAILED', "
+            "    error.tool_name, error.error, COUNT(DISTINCT error.sha256) AS cnt, "
+            f"    {ERROR_MSG} "
+            "FROM exec AS ex1 "
+            "    OUTER LEFT JOIN exec AS ex2 ON ex1.sha256 = ex2.sha256 "
+            "    INNER JOIN error ON ex1.sha256 = error.sha256 AND error.tool_name = 'ic3_fork' "
+            "WHERE ex1.tool_name = 'ic3' AND ex2.tool_name = 'ic3_fork' AND "
+            "    ex1.tool_status = 'FAILED' AND ex2.tool_status = 'FAILED' "
+            f"GROUP BY ex1.tool_status = 'FAILED', ex2.tool_status = 'FAILED', error.tool_name, error.error, {ERROR_MSG} "
+            "ORDER BY cnt DESC "
+            "LIMIT 10;"
+        ):
+            errors.append(err)
+        for err in cur.execute(
+            "SELECT ex1.tool_status = 'FAILED', ex2.tool_status = 'FAILED', "
+            "    error.tool_name, error.error, COUNT(DISTINCT error.sha256) AS cnt, "
+            f"    {ERROR_MSG} "
+            "FROM exec AS ex1 "
+            "    OUTER LEFT JOIN exec AS ex2 ON ex1.sha256 = ex2.sha256 "
+            "    INNER JOIN error ON ex1.sha256 = error.sha256 AND error.tool_name = 'ic3' "
+            "WHERE ex1.tool_name = 'ic3' AND ex2.tool_name = 'ic3_fork' AND "
+            "    ex1.tool_status = 'FAILED' AND ex2.tool_status != 'FAILED' "
+            f"GROUP BY ex1.tool_status = 'FAILED', ex2.tool_status != 'FAILED', error.tool_name, error.error, {ERROR_MSG} "
+            "ORDER BY cnt DESC "
+            "LIMIT 10;"
+        ):
+            errors.append(err)
+        for err in cur.execute(
+            "SELECT ex1.tool_status = 'FAILED', ex2.tool_status = 'FAILED', "
+            "    error.tool_name, error.error, COUNT(DISTINCT error.sha256) AS cnt, "
+            f"    {ERROR_MSG} "
+            "FROM exec AS ex1 "
+            "    OUTER LEFT JOIN exec AS ex2 ON ex1.sha256 = ex2.sha256 "
+            "    INNER JOIN error ON ex1.sha256 = error.sha256 AND error.tool_name = 'ic3' "
+            "WHERE ex1.tool_name = 'ic3' AND ex2.tool_name = 'ic3_fork' AND "
+            "    ex1.tool_status != 'FAILED' AND ex2.tool_status = 'FAILED' "
+            f"GROUP BY ex1.tool_status != 'FAILED', ex2.tool_status = 'FAILED', error.tool_name, error.error, {ERROR_MSG} "
+            "ORDER BY cnt DESC "
+            "LIMIT 10;"
+        ):
+            errors.append(err)
+        for err in cur.execute(
+            "SELECT ex1.tool_status = 'FAILED', ex2.tool_status = 'FAILED', "
+            "    error.tool_name, error.error, COUNT(DISTINCT error.sha256) AS cnt, "
+            f"    {ERROR_MSG} "
+            "FROM exec AS ex1 "
+            "    OUTER LEFT JOIN exec AS ex2 ON ex1.sha256 = ex2.sha256 "
+            "    INNER JOIN error ON ex1.sha256 = error.sha256 AND error.tool_name = 'ic3' "
+            "WHERE ex1.tool_name = 'ic3' AND ex2.tool_name = 'ic3_fork' AND "
+            "    ex1.tool_status = 'FAILED' AND ex2.tool_status = 'FAILED' "
+            f"GROUP BY ex1.tool_status = 'FAILED', ex2.tool_status = 'FAILED', error.tool_name, error.error, {ERROR_MSG} "
+            "ORDER BY cnt DESC "
+            "LIMIT 10;"
+        ):
+            errors.append(err)
+    if file is None:
+        fp = sys.stdout
+    else:
+        fp = file.open("w")
+    writer = csv.DictWriter(
+        fp,
+        fieldnames=[
+            "ic3 failed",
+            "ic3 fork failed",
+            "tool",
+            "error",
+            "occurence",
+            "msg",
+        ],
+    )
+    writer.writeheader()
+    for err in map(rewrite_msg, errors):
+        writer.writerow(
+            {
+                k: v
+                for k, v in zip(
+                    [
+                        "ic3 failed",
+                        "ic3 fork failed",
+                        "tool",
+                        "error",
+                        "msg",
+                        "occurence",
+                    ],
+                    err,
+                )
+            }
+        )
+    if file is not None:
+        fp.close()
+
+
+def rewrite_msg(
+    err: tuple[int, int, str, str, int, str]
+) -> tuple[int, int, str, str, int, str]:
+    ic3_failed, ic3_fork_failed, tool, error, occurence, msg = err
+    (
+        error_type,
+        error,
+        msg,
+        file,
+        function,
+        level,
+        origin,
+        raised_info,
+        called_info,
+    ) = map(lambda s: "" if s == "" else s + " ", msg.split("|"))
+    msg = f"{level}{error}{msg}{called_info}{called_info}{file}{function}{origin}"
+    return (ic3_failed, ic3_fork_failed, tool, error, occurence, msg)
--- a/rasta_data_manipulation/rasta_triturage/populate_db_apk.py
+++ b/rasta_data_manipulation/rasta_triturage/populate_db_apk.py
@ -0,0 +1,246 @@
+import sqlite3
+import time
+import gzip
+import csv
+import datetime
+import requests
+import getpass
+import dateutil.parser
+
+from androguard.core.bytecodes import apk as androguard_apk
+from pathlib import Path
+
+
+def int_or_none(str_: str) -> int | None:
+    if str_:
+        return int(str_)
+    else:
+        return None
+
+
+def create_apk_table(db: Path):
+    """Create the db/table if it does not exist."""
+    with sqlite3.connect(db) as con:
+        cur = con.cursor()
+        if (
+            cur.execute("SELECT name FROM sqlite_master WHERE name='apk'").fetchone()
+            is None
+        ):
+            cur.execute(
+                (
+                    "CREATE TABLE apk("
+                    "    sha256, first_seen_year, apk_size,"
+                    "    vt_detection, min_sdk, max_sdk,"
+                    "    target_sdk, apk_size_decile, dex_date date,"
+                    "    pkg_name, vercode, vt_scan_date date,"
+                    "    dex_size, added date, markets, dex_size_decile, "
+                    "    dex_size_decile_by_year"
+                    ")"
+                )
+            )
+            con.commit()
+
+
+def get_sha_set(dataset: Path) -> set[str]:
+    """Read a set of sha256 from a file."""
+    apk_set = set()
+    with dataset.open() as f:
+        for line in f.readlines():
+            apk_set.add(line.strip())
+    return apk_set
+
+
+def populate_from_year_and_sdk(db: Path, year_and_sdk: Path, apks: set[str]):
+    """Add to the info from year_and_sdk.csv.gz to the database
+    for the apks in `apks`.
+    """
+    apks_not_found = apks.copy()
+    with gzip.open(year_and_sdk, "rt", newline="") as f:
+        reader = csv.DictReader(f, quotechar='"')
+        fieldnames = reader.fieldnames
+        assert fieldnames is not None
+        for row in reader:
+            if row["sha256"] not in apks:
+                continue
+            value = {
+                "sha256": row["sha256"],
+                "first_seen_year": int_or_none(row["first_seen_year"]),
+                "vt_detection": int_or_none(row["vt_detection"]),
+                "min_sdk": int_or_none(row["min_sdk"]),
+                "max_sdk": int_or_none(row["max_sdk"]),
+                "target_sdk": int_or_none(row["target_sdk"]),
+                "apk_size_decile": 0,  # Computed at dataset generation
+                "dex_size_decile": 0,  # Computed by compute_dex_decile
+            }
+            with sqlite3.connect(db) as con:
+                cur = con.cursor()
+                cur.execute(
+                    (
+                        "INSERT INTO apk ("
+                        "    sha256, first_seen_year, vt_detection,"
+                        "    min_sdk, max_sdk, target_sdk, apk_size_decile,"
+                        "    dex_size_decile"
+                        ") VALUES("
+                        "    :sha256, :first_seen_year, :vt_detection,"
+                        "    :min_sdk, :max_sdk, :target_sdk, :apk_size_decile,"
+                        "    :dex_size_decile"
+                        ");"
+                    ),
+                    value,
+                )
+                con.commit()
+            apks_not_found.remove(row["sha256"])
+    for apk in apks_not_found:
+        value = {
+            "sha256": apk,
+            "first_seen_year": None,
+            "vt_detection": None,
+            "min_sdk": None,
+            "max_sdk": None,
+            "target_sdk": None,
+            "apk_size_decile": 0,
+            "dex_size_decile": 0,  # Computed by compute_dex_decile
+        }
+        with sqlite3.connect(db) as con:
+            cur = con.cursor()
+            cur.execute(
+                (
+                    "INSERT INTO apk ("
+                    "    sha256, first_seen_year, vt_detection,"
+                    "    min_sdk, max_sdk, target_sdk, apk_size_decile,"
+                    "    dex_size_decile"
+                    ") VALUES("
+                    "    :sha256, :first_seen_year, :vt_detection,"
+                    "    :min_sdk, :max_sdk, :target_sdk, :apk_size_decile,"
+                    "    :dex_size_decile"
+                    ");"
+                ),
+                value,
+            )
+            con.commit()
+
+
+def populate_from_latest_with_added_date(
+    db: Path, latest_with_added_date: Path, apks: set[str]
+):
+    """Add to the info from latest_with-added-date.csv.gz to the database
+    for the apks in `apks`.
+    """
+    with gzip.open(latest_with_added_date, "rt", newline="") as f:
+        reader = csv.DictReader(f, quotechar='"')
+        fieldnames = reader.fieldnames
+        assert fieldnames is not None
+        for row in reader:
+            if row["sha256"] not in apks:
+                continue
+            value = {
+                "sha256": row["sha256"],
+                "apk_size": int_or_none(row["apk_size"]),
+                "dex_date": datetime.datetime.fromisoformat(row["dex_date"])
+                if row["dex_date"]
+                else None,
+                "pkg_name": row["pkg_name"],
+                "vercode": int_or_none(row["vercode"]),
+                "vt_scan_date": datetime.datetime.fromisoformat(row["vt_scan_date"])
+                if row["vt_scan_date"]
+                else None,
+                "dex_size": int_or_none(
+                    row["dex_size"]
+                ),  # Not necessary the right value if multiple dex are used, see 'fix_dex_size()'
+                "added": dateutil.parser.isoparse(row["added"])
+                if row["added"]
+                else None,
+                "markets": row["markets"],
+            }
+            with sqlite3.connect(db) as con:
+                cur = con.cursor()
+                cur.execute(
+                    "UPDATE apk "
+                    "SET apk_size = :apk_size,"
+                    "    dex_date = :dex_date,"
+                    "    pkg_name = :pkg_name,"
+                    "    vercode = :vercode,"
+                    "    vt_scan_date = :vt_scan_date,"
+                    "    dex_size = :dex_size,"
+                    "    added = :added,"
+                    "    markets = :markets "
+                    "WHERE"
+                    "    sha256 = :sha256;",
+                    value,
+                )
+                con.commit()
+
+
+def download_apk(sha256: str, api_key: bytes) -> bytes:
+    while True:
+        resp = requests.get(
+            "https://androzoo.uni.lu/api/download",
+            params={
+                b"apikey": api_key,
+                b"sha256": sha256.encode("utf-8"),
+            },
+        )
+        if resp.status_code == 200:
+            return resp.content
+        else:
+            print(resp)
+            print(resp.content)
+            time.sleep(1)
+
+
+def fix_dex_size(db: Path, apks: set[str], androzoo_key: bytes):
+    """Download the apk from androzoo, compute the total size
+    of all .dex file and update the database.
+    """
+    for sha256 in apks:
+        apk = download_apk(sha256, androzoo_key)
+        apk = androguard_apk.APK(apk, raw=True, skip_analysis=True)
+        dex_size = sum(map(lambda x: len(x), apk.get_all_dex()))
+        with sqlite3.connect(db) as con:
+            cur = con.cursor()
+            cur.execute(
+                ("UPDATE apk " "SET dex_size = ? " "WHERE" "    sha256 = ?;"),
+                (dex_size, sha256),
+            )
+            con.commit()
+
+
+def populate_db_apk(
+    db: Path,
+    dataset: Path,
+    year_and_sdk: Path,
+    latest_with_added_date: Path,
+    fix_dsize: bool,
+):
+    """Populate the database with the apk informations."""
+    if fix_dsize:
+        androzoo_key = (
+            getpass.getpass(prompt="androzoo apikey: ").strip().encode("utf-8")
+        )
+    create_apk_table(db)
+    apks = get_sha_set(dataset)
+    populate_from_year_and_sdk(db, year_and_sdk, apks)
+    populate_from_latest_with_added_date(db, latest_with_added_date, apks)
+    if fix_dsize:
+        fix_dex_size(db, apks, androzoo_key)
+    with sqlite3.connect(db) as con:
+        cur = con.cursor()
+        cur.execute(
+            "UPDATE apk "
+            "SET dex_size_decile = compute.decile "
+            "FROM ("
+            "    SELECT NTILE ( 10 ) OVER ( ORDER BY dex_size ) decile, sha256 FROM apk"
+            ") AS compute "
+            "WHERE apk.sha256 = compute.sha256;"
+        )
+        cur.execute(
+            "UPDATE apk "
+            "SET dex_size_decile_by_year = compute.decile "
+            "FROM ("
+            "    SELECT NTILE ( 10 ) "
+            "    OVER ( PARTITION BY first_seen_year ORDER BY dex_size ) decile, sha256 "
+            "    FROM apk"
+            ") AS compute "
+            "WHERE apk.sha256 = compute.sha256;"
+        )
+        con.commit()
--- a/rasta_data_manipulation/rasta_triturage/populate_db_exec.py
+++ b/rasta_data_manipulation/rasta_triturage/populate_db_exec.py
@ -0,0 +1,186 @@
+import sqlite3
+import json
+import datetime
+from pathlib import Path
+
+from .query_error import estimate_cause
+
+
+def create_tables(db: Path):
+    """Create the db/tables if they do not exist."""
+    with sqlite3.connect(db) as con:
+        cur = con.cursor()
+        cur.execute(
+            (
+                "CREATE TABLE IF NOT EXISTS exec ("
+                "    sha256, id, rev, time, kernel_cpu_time, user_cpu_time, "
+                "    max_rss_mem, avg_rss_mem, avg_total_mem, page_size, "
+                "    nb_major_page_fault, nb_minor_page_fault, nb_fs_input, "
+                "    nb_fs_output, nb_socket_msg_received, nb_socket_msg_sent, "
+                "    nb_signal_delivered, exit_status, timeout, "
+                "    tool_status, tool_name, date date"
+                ");"
+            )
+        )
+        cur.execute(
+            (
+                "CREATE TABLE IF NOT EXISTS error ("
+                "    tool_name, sha256, error_type, error, msg, "
+                "    first_line, last_line, logfile_name, file, "
+                "    line, function, level, origin, raised_info, "
+                "    called_info, cause"
+                ");"
+            )
+        )
+        con.commit()
+
+
+def insert_errors(cur, tool, sha256, errors):
+    for error in errors:
+        error["tool_name"] = tool
+        error["sha256"] = sha256
+        error.setdefault("error_type", None)
+        error.setdefault("error", None)
+        error.setdefault("msg", None)
+        error.setdefault("first_line", None)
+        error.setdefault("last_line", None)
+        error.setdefault("logfile_name", None)
+        error.setdefault("file", None)
+        error.setdefault("line", None)
+        error.setdefault("function", None)
+        error.setdefault("level", None)
+        error.setdefault("origin", None)
+        error.setdefault("raised_info", None)
+        if error["raised_info"] is not None:
+            error["raised_info"] = 'Raised at {} in file "{}", line {}'.format(
+                error["raised_info"]["function"],
+                error["raised_info"]["file"],
+                error["raised_info"]["line"],
+            )
+        error.setdefault("called_info", None)
+        if error["called_info"] is not None:
+            error["called_info"] = 'Called from {} in file "{}", line {}'.format(
+                error["called_info"]["function"],
+                error["called_info"]["file"],
+                error["called_info"]["line"],
+            )
+        # The stack strace can be quite big without being very usefull in
+        # queries
+        error.pop("stack", None)
+    cur.executemany(
+        (
+            "INSERT INTO error VALUES("
+            "    :tool_name, :sha256, :error_type, :error, :msg, "
+            "    :first_line, :last_line, :logfile_name, :file, "
+            "    :line, :function, :level, :origin, :raised_info, "
+            "    :called_info, ''"
+            ");"
+        ),
+        errors,
+    )
+
+
+def fix_error(db: Path, report_with_correct_error: Path):
+    """Infortunatly they was some errors in parsing the errors during the experiment,
+    some another run was made for some pair of tool-apk to get the actual error.
+    This pass was made in a different environnment (!= memory and space constraint),
+    so we only replace the errors (after manual inspection, they don't seam related
+    to the environnment), and keep the other values from the original experiment.
+    """
+    with sqlite3.connect(db) as con:
+        cur = con.cursor()
+        for path in report_with_correct_error.iterdir():
+            with path.open() as f:
+                exec_log = json.load(f)
+            sha256 = exec_log["apk"].removesuffix(".apk")
+            if (
+                len(
+                    cur.execute(
+                        "SELECT * FROM exec WHERE tool_name = ? AND sha256 = ?",
+                        (exec_log["tool-name"], sha256),
+                    ).fetchall()
+                )
+                == 1
+            ):
+                cur.execute(
+                    "DELETE FROM error WHERE tool_name = ? AND sha256 = ?",
+                    (exec_log["tool-name"], sha256),
+                )
+                errors = exec_log.pop("errors", [])
+                insert_errors(cur, exec_log["tool-name"], sha256, errors)
+        con.commit()
+
+
+def populate_execution_report(db: Path, report_folder: Path):
+    """Add to database the report stored in the report_folder."""
+    create_tables(db)
+    i = 0
+    with sqlite3.connect(db) as con:
+        cur = con.cursor()
+        for path in report_folder.iterdir():
+            with path.open() as f:
+                exec_log = json.load(f)
+            exec_log["sha256"] = exec_log["apk"].removesuffix(".apk")
+            exec_log["id"] = exec_log.get("_id", None)
+            exec_log["rev"] = exec_log.get("_rev", None)
+            errors = exec_log.pop("errors", [])
+
+            exec_log["date"] = (
+                datetime.datetime.fromisoformat(exec_log["date"])
+                if exec_log.get("date", None)
+                else None
+            )
+            del exec_log["apk"]
+            if "_id" in exec_log:
+                del exec_log["_id"]
+            if "_rev" in exec_log:
+                del exec_log["_rev"]
+            new_exec_log = {}
+            for key in exec_log:
+                new_key = key.replace("-", "_")
+                new_exec_log[new_key] = exec_log[key]
+            for val in [
+                "sha256",
+                "id",
+                "rev",
+                "time",
+                "kernel_cpu_time",
+                "user_cpu_time",
+                "max_rss_mem",
+                "avg_rss_mem",
+                "avg_total_mem",
+                "page_size",
+                "nb_major_page_fault",
+                "nb_minor_page_fault",
+                "nb_fs_input",
+                "nb_fs_output",
+                "nb_socket_msg_received",
+                "nb_socket_msg_sent",
+                "nb_signal_delivered",
+                "exit_status",
+                "timeout",
+                "tool_status",
+                "tool_name",
+                "date",
+            ]:
+                if val not in new_exec_log:
+                    new_exec_log[val] = None
+            cur.execute(
+                (
+                    "INSERT INTO exec VALUES("
+                    "    :sha256, :id, :rev, :time, :kernel_cpu_time, :user_cpu_time, "
+                    "    :max_rss_mem, :avg_rss_mem, :avg_total_mem, :page_size, "
+                    "    :nb_major_page_fault, :nb_minor_page_fault, :nb_fs_input, "
+                    "    :nb_fs_output, :nb_socket_msg_received, :nb_socket_msg_sent, "
+                    "    :nb_signal_delivered, :exit_status, :timeout, "
+                    "    :tool_status, :tool_name, :date"
+                    ");"
+                ),
+                new_exec_log,
+            )
+            insert_errors(cur, exec_log["tool-name"], exec_log["sha256"], errors)
+            i += 1
+            if i == 10_000:
+                # Not sure how much ram would be needed to commit in one go
+                con.commit()
+        con.commit()
--- a/rasta_data_manipulation/rasta_triturage/populate_db_tool.py
+++ b/rasta_data_manipulation/rasta_triturage/populate_db_tool.py
@ -0,0 +1,176 @@
+import sqlite3
+from pathlib import Path
+
+
+TOOL_INFO = [
+    {
+        "tool_name": "adagio",
+        "use_python": True,
+        "use_androguard": True,
+    },
+    {
+        "tool_name": "amandroid",
+        "use_scala": True,
+        "use_soot": False,
+        "use_apktool": True,
+    },
+    {
+        "tool_name": "anadroid",
+        "use_python": True,
+        "use_java": True,
+        "use_scala": True,
+        "use_soot": False,
+        "use_apktool": True,
+    },
+    {
+        "tool_name": "androguard",
+        "use_python": True,
+        "use_androguard": True,  # Duh
+    },
+    {
+        "tool_name": "androguard_dad",
+        "use_python": True,
+        "use_androguard": True,
+    },
+    {
+        "tool_name": "apparecium",
+        "use_python": True,
+        "use_androguard": True,
+    },
+    {
+        "tool_name": "blueseal",
+        "use_java": True,
+        "use_soot": True,
+        "use_apktool": True,
+    },
+    {
+        "tool_name": "dialdroid",
+        "use_java": True,
+        "use_soot": True,
+    },
+    {
+        "tool_name": "didfail",
+        "use_python": True,
+        "use_java": True,
+        "use_soot": True,
+    },
+    {
+        "tool_name": "droidsafe",
+        "use_python": True,
+        "use_java": True,
+        "use_soot": True,
+        "use_apktool": True,
+    },
+    {
+        "tool_name": "flowdroid",
+        "use_java": True,
+        "use_soot": True,
+    },
+    {
+        "tool_name": "gator",
+        "use_python": True,
+        "use_java": True,
+        "use_soot": True,
+        "use_apktool": True,
+    },
+    {
+        "tool_name": "ic3",
+        "use_java": True,
+        "use_soot": True,
+    },
+    {
+        "tool_name": "ic3_fork",
+        "use_java": True,
+        "use_soot": True,
+    },
+    {
+        "tool_name": "iccta",
+        "use_java": True,
+        "use_soot": True,
+        "use_apktool": True,
+    },
+    {
+        "tool_name": "mallodroid",
+        "use_python": True,
+        "use_androguard": True,
+    },
+    {
+        "tool_name": "perfchecker",
+        "use_java": True,
+        "use_soot": True,
+    },
+    {
+        "tool_name": "redexer",
+        "use_ocaml": True,
+        "use_ruby": True,
+        "use_apktool": True,
+    },
+    {
+        "tool_name": "saaf",
+        "use_java": True,
+        "use_soot": False,
+        "use_apktool": True,
+    },
+    {
+        "tool_name": "wognsen_et_al",
+        "use_python": True,
+        "use_prolog": True,
+        "use_apktool": True,
+    },
+]
+
+for line in TOOL_INFO:
+    for col in [
+        "use_python",
+        "use_java",
+        "use_scala",
+        "use_ocaml",
+        "use_ruby",
+        "use_prolog",
+        "use_soot",
+        "use_androguard",
+        "use_apktool",
+    ]:
+        if col not in line:
+            line[col] = False
+
+
+def create_tool_table(db: Path):
+    """Create the db/table if it does not exist."""
+    with sqlite3.connect(db) as con:
+        cur = con.cursor()
+        if (
+            cur.execute("SELECT name FROM sqlite_master WHERE name='tool';").fetchone()
+            is None
+        ):
+            cur.execute(
+                (
+                    "CREATE TABLE tool ("
+                    "    tool_name, use_python, use_java, use_scala,"
+                    "    use_ocaml, use_ruby, use_prolog, use_soot, "
+                    "    use_androguard, use_apktool"
+                    ");"
+                )
+            )
+            con.commit()
+
+
+def populate_tool(
+    db: Path,
+):
+    """Add to database the tool information"""
+    create_tool_table(db)
+    # DROP table if already exist? replace value?
+    with sqlite3.connect(db) as con:
+        cur = con.cursor()
+        cur.executemany(
+            (
+                "INSERT INTO tool VALUES("
+                "    :tool_name, :use_python, :use_java, :use_scala,"
+                "    :use_ocaml, :use_ruby, :use_prolog, :use_soot, "
+                "    :use_androguard, :use_apktool"
+                ");"
+            ),
+            TOOL_INFO,
+        )
+        con.commit()
--- a/rasta_data_manipulation/rasta_triturage/query_error.py
+++ b/rasta_data_manipulation/rasta_triturage/query_error.py
@ -0,0 +1,699 @@
+import sqlite3
+import sys
+import csv
+import matplotlib.pyplot as plt  # type: ignore
+from .utils import get_list_tools, radar_chart, render
+from pathlib import Path
+from typing import Optional, Any
+
+ERROR_CARACT = (
+    "error_type",
+    "error",
+    "msg",
+    "file",
+    "function",
+    "level",
+    "origin",
+    "raised_info",
+    "called_info",
+)
+
+# Query that remove identical error that occure multiple times on the same execution
+DISTINCT_ERRORS = (
+    "("
+    f"    SELECT DISTINCT tool_name, sha256, {', '.join(ERROR_CARACT)}"
+    "    FROM error"
+    ") AS distinct_error"
+)
+DISTINCT_ERROR_CLASS = (
+    "("
+    f"    SELECT DISTINCT tool_name, sha256, error, error_type"
+    "    FROM error"
+    ") AS distinct_error"
+)
+DISTINCT_CAUSES = (
+    "("
+    "    SELECT DISTINCT tool_name, sha256, cause"
+    "    FROM error"
+    ") AS distinct_cause"
+)
+
+
+def estimate_cause(db: Path):
+    """Estimate the cause of an error to easier grouping."""
+    with sqlite3.connect(db) as con:
+        cur = con.cursor()
+        cur.execute("UPDATE error SET cause = '';")
+        con.commit()
+        # brut.androlib is package defined in apktool
+        # 'Expected: 0x001c0001, got: 0x00000000' errors are always
+        #  part of an apktool stacktrace:
+        #    SELECT COUNT(*) FROM error e1
+        #    WHERE e1.tool_name = '${tool}' AND
+        #      e1.msg = 'Expected: 0x001c0001, got: 0x00000000' AND
+        #      e1.sha256 NOT IN (
+        #        SELECT e2.sha256 FROM error e2
+        #        WHERE e2.tool_name = '${tool}' AND
+        #          e2.msg LIKE '%Could not decode arsc file%'
+        #      )
+        #    is always 0"
+        cur.execute(
+            (
+                "UPDATE error "
+                "SET cause = 'apktool' "
+                "WHERE error = 'brut.androlib.AndrolibException' OR "
+                "    error LIKE 'brut.androlib.err.%' OR "
+                "    msg = 'Expected: 0x001c0001, got: 0x00000000' OR "
+                "    msg LIKE '%brut.androlib.AndrolibException: Could not decode arsc file%' OR "
+                "    msg LIKE 'bad magic value: %' OR "
+                "    error = 'brut.androlib.err.UndefinedResObject';"
+            )
+        )
+        cur.execute(
+            (
+                "UPDATE error "
+                "SET cause = 'memory' "
+                "WHERE error = 'java.lang.StackOverflowError' OR "
+                "    error = 'java.lang.OutOfMemoryError' OR "
+                "    msg LIKE '%java.lang.OutOfMemoryError%' OR "
+                "    msg LIKE '%java.lang.StackOverflowError%' OR "
+                "    msg = 'Stack overflow';"
+            )
+        )
+        cur.execute(
+            (
+                "UPDATE error "
+                "SET cause = 'soot' "
+                "WHERE msg LIKE ? OR "
+                "    msg LIKE '%No call graph present in Scene. Maybe you want Whole Program mode (-w)%' OR "
+                "    msg LIKE '%There were exceptions during IFDS analysis. Exiting.%' OR "  # More hero than soot?
+                "    msg = 'Could not find method' OR "
+                "    msg = 'No sources found, aborting analysis' OR "
+                "    msg = 'No sources or sinks found, aborting analysis' OR "
+                "    msg = 'Only phantom classes loaded, skipping analysis...';"
+            ),
+            (
+                "%RefType java.lang.Object not loaded. If you tried to get the RefType of a library class, did you call loadNecessaryClasses()? Otherwise please check Soot's classpath.%",
+            ),
+        )
+
+        cur.execute(
+            (
+                "UPDATE error "
+                "SET cause = 'index error' "
+                "WHERE error = 'IndexError' OR "
+                "    msg = 'java.lang.ArrayIndexOutOfBoundsException' OR "
+                "    (error_type = 'Python' AND error = 'KeyError') OR "
+                "    error = 'java.lang.IndexOutOfBoundsException' OR "
+                "    error = 'java.lang.ArrayIndexOutOfBoundsException' OR "
+                "    msg LIKE 'java.lang.ArrayIndexOutOfBoundsException:%';"
+            )
+        )
+        cur.execute(
+            (
+                "UPDATE error "
+                "SET cause = 'arithmetique' "
+                "WHERE error = 'java.lang.ArithmeticException';"
+            )
+        )
+        cur.execute("UPDATE error SET cause = 'jasmin' WHERE error = 'jas.jasError';")
+        cur.execute(
+            (
+                "UPDATE error "
+                "SET cause = 'storage' "
+                "WHERE msg = 'No space left on device' OR "
+                "    msg LIKE 'Error copying file: %' OR "
+                "    msg = 'java.io.IOException: No space left on device';"
+            )
+        )
+        cur.execute(
+            (
+                "UPDATE error "
+                "SET cause = 'redexe pattern maching failed' "
+                "WHERE msg  = 'File \"src/ext/logging.ml\", line 712, characters 12-17: Pattern matching failed';"
+            )
+        )
+        cur.execute(
+            (
+                "UPDATE error "
+                "SET cause = 'null pointer' "
+                "WHERE error = 'java.lang.NullPointerException' OR "
+                "    msg LIKE ? OR "
+                "    msg LIKE 'undefined method % for nil:NilClass (NoMethodError)';"
+            ),
+            ("'NoneType' object has no attribute %",),
+        )
+        # Soot ?
+        cur.execute(
+            (
+                "UPDATE error "
+                "SET cause = 'unknown error in thread' "
+                "WHERE msg = 'Worker thread execution failed: null';"
+            )
+        )
+        cur.execute(
+            (
+                "UPDATE error "
+                "SET cause = 'timeout' "
+                "WHERE error = 'java.util.concurrent.TimeoutException';"
+            )
+        )
+        cur.execute(
+            (
+                "UPDATE error "
+                "SET cause = 'file name too long' "
+                "WHERE msg = 'File name too long';"
+            )
+        )
+        cur.execute(
+            (
+                "UPDATE error "
+                "SET cause = 'encoding' "
+                "WHERE error = 'UnicodeEncodeError';"
+            )
+        )
+        cur.execute(
+            (
+                "UPDATE error "
+                "SET cause = 'smali' "
+                "WHERE error LIKE 'org.jf.dexlib2.%' OR error LIKE 'org.jf.util.%';"
+            )
+        )
+        cur.execute(
+            (
+                "UPDATE error "
+                "SET cause = 'redexer dex parser' "
+                "WHERE msg LIKE 'Dex.Wrong_dex(\"%\")';"
+            )
+        )
+        cur.execute(
+            (
+                "UPDATE error "
+                "SET cause = 'bytecode not found' "
+                "WHERE msg LIKE 'No method source set for method %' OR "
+                "    msg LIKE '% is an system library method.' OR "
+                "    msg LIKE '% is an unknown method.';"
+            )
+        )
+        con.commit()
+        # Default
+        #        default = " || '|' || ".join(map(lambda s: f"COALESCE({s}, '')", ERROR_CARACT))
+        #        cur.execute(f"UPDATE error SET cause = {default} WHERE cause = '';")
+        cur.execute("UPDATE error SET cause = 'other' WHERE cause = '';")
+        con.commit()
+
+
+def radar_cause_estimation(
+    db: Path,
+    tools: list[str] | None,
+    interactive: bool,
+    folder: Path | None,
+):
+    # estimate_cause(db)
+    if tools is None:
+        tools = get_list_tools(db)
+
+    with sqlite3.connect(db, timeout=60) as con:
+        cur = con.cursor()
+        causes = [
+            v for v, in cur.execute("SELECT DISTINCT cause FROM error;").fetchall()
+        ]
+        for tool in tools:
+            print(f"tool: {tool}")
+            for cause, count in cur.execute(
+                (
+                    "SELECT cause, COUNT(*) AS cnt "
+                    "FROM error "
+                    "WHERE tool_name = ? "
+                    "GROUP BY cause "
+                    "ORDER BY cnt DESC LIMIT 10;"
+                ),
+                (tool,),
+            ):
+                print(f"{count: 6}: {cause}")
+            print()
+
+    values = []
+    labels = tools
+    for tool in tools:
+        vals = [0 for _ in causes]
+        with sqlite3.connect(db) as con:
+            cur = con.cursor()
+            for cause, cnt in cur.execute(
+                (
+                    "SELECT distinct_cause.cause, COUNT(*) AS cnt "
+                    f"FROM {DISTINCT_CAUSES} "
+                    "WHERE distinct_cause.cause != '' AND distinct_cause.tool_name = ? "
+                    "GROUP BY distinct_cause.cause;"
+                ),
+                (tool,),
+            ):
+                print(f"{tool=}, {cause=}, {cnt=}")
+                if cause in causes:
+                    vals[causes.index(cause)] = cnt
+        print(f"{tool=}, {vals=}")
+        radar_chart(
+            causes, [vals], [tool], f"Causes of error for {tool}", interactive, folder
+        )
+        values.append(vals)
+    radar_chart(causes, values, labels, "Causes of error", interactive, folder)
+
+
+def get_common_errors(
+    db: Path,
+    tool: Optional[str] = None,
+    status: Optional[str] = None,
+    use_androguard: Optional[bool] = None,
+    use_java: Optional[bool] = None,
+    use_prolog: Optional[bool] = None,
+    use_ruby: Optional[bool] = None,
+    use_soot: Optional[bool] = None,
+    use_apktool: Optional[bool] = None,
+    use_ocaml: Optional[bool] = None,
+    use_python: Optional[bool] = None,
+    use_scala: Optional[bool] = None,
+    folder: Optional[Path] = None,
+    limit: int = 10,
+):
+    """Get the most common errors"""
+    args: dict[str, Any] = {"limit": limit}
+    clauses = []
+    if tool is not None:
+        clauses.append("(distinct_error.tool_name = :tool)")
+        args["tool"] = tool
+    if status is not None:
+        clauses.append("(exec.tool_status = :tool_status)")
+        args["tool_status"] = status
+
+    if use_java is not None:
+        clauses.append("(tool.use_java = :use_java)")
+        args["use_java"] = use_java
+    if use_prolog is not None:
+        clauses.append("(tool.use_prolog = :use_prolog)")
+        args["use_prolog"] = use_prolog
+    if use_ruby is not None:
+        clauses.append("(tool.use_ruby = :use_ruby)")
+        args["use_ruby"] = use_ruby
+    if use_soot is not None:
+        clauses.append("(tool.use_soot = :use_soot)")
+        args["use_soot"] = use_soot
+    if use_apktool is not None:
+        clauses.append("(tool.use_apktool = :use_apktool)")
+        args["use_apktool"] = use_apktool
+    if use_ocaml is not None:
+        clauses.append("(tool.use_ocaml = :use_ocaml)")
+        args["use_ocaml"] = use_ocaml
+    if use_python is not None:
+        clauses.append("(tool.use_python = :use_python)")
+        args["use_python"] = use_python
+    if use_scala is not None:
+        clauses.append("(tool.use_scala = :use_scala)")
+        args["use_scala"] = use_scala
+    where_clause = ""
+    if clauses:
+        where_clause = f"WHERE {' AND '.join(clauses)}"
+
+    # print(
+    #     (
+    #         f"SELECT COUNT(*) AS cnt, {', '.join(ERROR_CARACT)} \n"
+    #         f"FROM {DISTINCT_ERRORS} \n"
+    #         "INNER JOIN tool ON distinct_error.tool_name = tool.tool_name \n"
+    #         "INNER JOIN exec ON \n"
+    #         "    distinct_error.tool_name = exec.tool_name AND \n"
+    #         "    distinct_error.sha256 = exec.sha256 \n"
+    #         f"{where_clause}\n"
+    #         f"GROUP BY {', '.join(ERROR_CARACT)} \n"
+    #         "ORDER BY cnt DESC LIMIT :limit;\n"
+    #     )
+    # )
+    # print(args)
+
+    if folder is None:
+        out = sys.stdout
+    else:
+        # Generate filename
+        features = [
+            use_androguard,
+            use_java,
+            use_prolog,
+            use_ruby,
+            use_soot,
+            use_apktool,
+            use_ocaml,
+            use_python,
+            use_scala,
+        ]
+
+        if tool is None:
+            tool_str = ""
+        else:
+            tool_str = f"_for_{tool}"
+        if status is None:
+            status_str = ""
+        else:
+            status_str = f"_when_{status}"
+        if all(map(lambda x: x is None, features)):
+            features_str = ""
+        else:
+            features_str = "_using"
+            if use_androguard:
+                features_str += "_androguard"
+            if use_java:
+                features_str += "_java"
+            if use_prolog:
+                features_str += "_prolog"
+            if use_ruby:
+                features_str += "_ruby"
+            if use_soot:
+                features_str += "_soot"
+            if use_apktool:
+                features_str += "_apktool"
+            if use_ocaml:
+                features_str += "_ocaml"
+            if use_python:
+                features_str += "_python"
+            if use_scala:
+                features_str += "_scala"
+
+        name = f"{limit}_most_common_errors{tool_str}{status_str}{features_str}.csv"
+        # make sure the folder exist
+        folder.mkdir(parents=True, exist_ok=True)
+        out = (folder / name).open("w")
+
+    with sqlite3.connect(db) as con:
+        cur = con.cursor()
+        writer = csv.DictWriter(out, fieldnames=["error", "msg", "count"])
+        writer.writeheader()
+        for row in cur.execute(
+            (
+                f"SELECT COUNT(*) AS cnt, {', '.join(ERROR_CARACT)} "
+                f"FROM {DISTINCT_ERRORS} "
+                "INNER JOIN tool ON distinct_error.tool_name = tool.tool_name "
+                "INNER JOIN exec ON "
+                "    distinct_error.tool_name = exec.tool_name AND "
+                "    distinct_error.sha256 = exec.sha256 "
+                f"{where_clause}"
+                f"GROUP BY {', '.join(ERROR_CARACT)} "
+                "ORDER BY cnt DESC LIMIT :limit;"
+            ),
+            args,
+        ):
+            row_d = {k: v for (k, v) in zip(("cnt", *ERROR_CARACT), row)}
+            writer.writerow(reduce_error_row(row_d))
+    if folder is not None:
+        out.close()
+
+
+def reduce_error_row(row: dict[str, Any]) -> dict[str, Any]:
+    """Reduce an error from an sqlite row to a simpler row for svg."""
+    new_row = {}
+    new_row["error"] = row["error"]
+    msg = row["msg"]
+    error = row["error"]
+    if error:
+        error += " "
+    else:
+        error = ""
+    if msg:
+        msg += " "
+    else:
+        msg = ""
+    file = row["file"]
+    if file:
+        file += " "
+    else:
+        file = ""
+    function = row["function"]
+    if function:
+        function += " "
+    else:
+        function = ""
+    level = row["level"]
+    if level:
+        level += " "
+    else:
+        level = ""
+    origin = row["origin"]
+    if origin:
+        origin += " "
+    else:
+        origin = ""
+    raised_info = row["raised_info"]
+    if raised_info:
+        raised_info += " "
+    else:
+        raised_info = ""
+    called_info = row["called_info"]
+    if called_info:
+        called_info += " "
+    else:
+        called_info = ""
+    new_row[
+        "msg"
+    ] = f"{level}{error}{msg}{called_info}{called_info}{file}{function}{origin}"
+
+    new_row["count"] = row["cnt"]
+    return new_row
+
+
+def get_common_error_classes(
+    db: Path,
+    tool: Optional[str] = None,
+    status: Optional[str] = None,
+    use_androguard: Optional[bool] = None,
+    use_java: Optional[bool] = None,
+    use_prolog: Optional[bool] = None,
+    use_ruby: Optional[bool] = None,
+    use_soot: Optional[bool] = None,
+    use_apktool: Optional[bool] = None,
+    use_ocaml: Optional[bool] = None,
+    use_python: Optional[bool] = None,
+    use_scala: Optional[bool] = None,
+    folder: Optional[Path] = None,
+    limit: int = 10,
+):
+    """Get the most common errors classes"""
+    args: dict[str, Any] = {"limit": limit}
+    clauses = []
+    if tool is not None:
+        clauses.append("(distinct_error.tool_name = :tool)")
+        args["tool"] = tool
+    if status is not None:
+        clauses.append("(exec.tool_status = :tool_status)")
+        args["tool_status"] = status
+
+    if use_java is not None:
+        clauses.append("(tool.use_java = :use_java)")
+        args["use_java"] = use_java
+    if use_prolog is not None:
+        clauses.append("(tool.use_prolog = :use_prolog)")
+        args["use_prolog"] = use_prolog
+    if use_ruby is not None:
+        clauses.append("(tool.use_ruby = :use_ruby)")
+        args["use_ruby"] = use_ruby
+    if use_soot is not None:
+        clauses.append("(tool.use_soot = :use_soot)")
+        args["use_soot"] = use_soot
+    if use_apktool is not None:
+        clauses.append("(tool.use_apktool = :use_apktool)")
+        args["use_apktool"] = use_apktool
+    if use_ocaml is not None:
+        clauses.append("(tool.use_ocaml = :use_ocaml)")
+        args["use_ocaml"] = use_ocaml
+    if use_python is not None:
+        clauses.append("(tool.use_python = :use_python)")
+        args["use_python"] = use_python
+    if use_scala is not None:
+        clauses.append("(tool.use_scala = :use_scala)")
+        args["use_scala"] = use_scala
+    where_clause = ""
+    if clauses:
+        where_clause = f"WHERE {' AND '.join(clauses)}"
+
+    if folder is None:
+        out = sys.stdout
+    else:
+        # Generate filename
+        features = [
+            use_androguard,
+            use_java,
+            use_prolog,
+            use_ruby,
+            use_soot,
+            use_apktool,
+            use_ocaml,
+            use_python,
+            use_scala,
+        ]
+
+        if tool is None:
+            tool_str = ""
+        else:
+            tool_str = f"_for_{tool}"
+        if status is None:
+            status_str = ""
+        else:
+            status_str = f"_when_{status}"
+        if all(map(lambda x: x is None, features)):
+            features_str = ""
+        else:
+            features_str = "_using"
+            if use_androguard:
+                features_str += "_androguard"
+            if use_java:
+                features_str += "_java"
+            if use_prolog:
+                features_str += "_prolog"
+            if use_ruby:
+                features_str += "_ruby"
+            if use_soot:
+                features_str += "_soot"
+            if use_apktool:
+                features_str += "_apktool"
+            if use_ocaml:
+                features_str += "_ocaml"
+            if use_python:
+                features_str += "_python"
+            if use_scala:
+                features_str += "_scala"
+
+        name = f"{limit}_most_common_errors_classes{tool_str}{status_str}{features_str}.csv"
+        # make sure the folder exist
+        folder.mkdir(parents=True, exist_ok=True)
+        out = (folder / name).open("w")
+
+    with sqlite3.connect(db) as con:
+        cur = con.cursor()
+        writer = csv.DictWriter(out, fieldnames=["type", "error", "count"])
+        writer.writeheader()
+        for row in cur.execute(
+            (
+                f"SELECT COUNT(*) AS cnt, distinct_error.error, distinct_error.error_type "
+                f"FROM {DISTINCT_ERROR_CLASS} "
+                "INNER JOIN tool ON distinct_error.tool_name = tool.tool_name "
+                "INNER JOIN exec ON "
+                "    distinct_error.tool_name = exec.tool_name AND "
+                "    distinct_error.sha256 = exec.sha256 "
+                f"{where_clause} "
+                f"GROUP BY distinct_error.error, distinct_error.error_type "
+                "ORDER BY cnt DESC LIMIT :limit;"
+            ),
+            args,
+        ):
+            row_d = {k: v for (k, v) in zip(("count", "error", "type"), row)}
+            writer.writerow(row_d)
+    if folder is not None:
+        out.close()
+
+
+def get_nb_error(
+    db: Path,
+    folder: Optional[Path] = None,
+):
+    NB_ERR = (
+        "("
+        "SELECT "
+        "    exec_id.tool_name, exec_id.sha256, COUNT(error._rowid_) AS nb_err "
+        "FROM ("
+        "    (SELECT tool_name FROM tool) CROSS JOIN (SELECT sha256 FROM apk)"
+        ") AS exec_id LEFT JOIN error "
+        "ON exec_id.tool_name=error.tool_name AND exec_id.sha256=error.sha256 "
+        "GROUP BY exec_id.tool_name, exec_id.sha256"
+        ") AS nb_err"
+    )
+    data = {}
+    tools = set()
+    with sqlite3.connect(db) as con:
+        cur = con.cursor()
+        for tool, status, avg, variance in cur.execute(
+            "SELECT nb_err.tool_name, exec.tool_status, AVG(nb_err.nb_err), "
+            "    AVG(nb_err.nb_err*nb_err.nb_err) - AVG(nb_err.nb_err)*AVG(nb_err.nb_err) "
+            f"FROM {NB_ERR} "
+            "INNER JOIN exec ON nb_err.tool_name = exec.tool_name AND nb_err.sha256 = exec.sha256 "
+            "GROUP BY nb_err.tool_name, exec.tool_status;"
+        ):
+            tools.add(tool)
+            data[(tool, status)] = (avg, variance)
+    fieldnames = list(tools)
+    fieldnames.sort()
+    fieldnames = ["", *fieldnames]
+    if folder is None:
+        fd = sys.stdout
+    else:
+        fd = (folder / "average_number_of_error_by_exec.csv").open("w")
+    writer = csv.DictWriter(fd, fieldnames=fieldnames)
+    writer.writeheader()
+    for status in ("FINISHED", "FAILED", "TIMEOUT"):
+        row = {"": status}
+        for tool in tools:
+            row[tool] = round(data.get((tool, status), (0, 0))[0], 2)
+        writer.writerow(row)
+        row = {"": "standard deviation"}
+        for tool in tools:
+            row[tool] = round(data.get((tool, status), (0, 0))[1] ** (1 / 2), 2)
+        writer.writerow(row)
+    if folder is not None:
+        fd.close()
+
+
+def error_type_repartition(
+    db: Path, interactive: bool = True, folder: Optional[Path] = None
+):
+    data: dict[str, dict[str, int]] = {}
+    total: dict[str, int] = {}
+    with sqlite3.connect(db) as con:
+        cur = con.cursor()
+        for tool, err, n in cur.execute(
+            "SELECT tool_name, error, COUNT(*) FROM error GROUP BY tool_name, error;"
+        ):
+            if tool not in data:
+                data[tool] = {}
+                total[tool] = 0
+            if err is not None and err != "":
+                data[tool][err] = n
+        for tool, n in cur.execute(
+            "SELECT tool_name, COUNT(*) FROM error WHERE error IS NOT NULL AND error != '' GROUP BY tool_name;"
+        ):
+            total[tool] = n
+    errors = set()
+    N = 3
+    for tool in data:
+        for err in sorted(
+            [err for err in data[tool]], key=lambda err: data[tool][err], reverse=True
+        )[:N]:
+            # TODO Check of > 10%?
+            errors.add(err)
+    tools = sorted(data.keys())
+    errors_l = sorted(errors)
+    values = [
+        [
+            data[tool].get(err, 0) * 100 / total[tool] if total[tool] != 0 else 0
+            for tool in tools
+        ]
+        for err in errors_l
+    ]
+    plt.figure(figsize=(22, 20))
+    im = plt.imshow(values, cmap="Greys")
+    cbar = plt.colorbar(im)
+    cbar.ax.set_ylabel(
+        "% of the error type among the error raised by the tool",
+        rotation=-90,
+        va="bottom",
+    )
+
+    import numpy as np
+
+    plt.xticks(np.arange(len(tools)), labels=tools, rotation=80)
+    plt.yticks(np.arange(len(errors_l)), labels=errors_l)
+    plt.xticks(np.arange(len(tools) + 1) - 0.5, minor=True)
+    plt.yticks(np.arange(len(errors_l) + 1) - 0.5, minor=True)
+    plt.grid(which="minor", color="w", linestyle="-", linewidth=3)
+    plt.tick_params(which="minor", bottom=False, left=False)
+    plt.title("Repartition of error types among tools")
+    # plt.figure().set_figheight(10)
+    render(
+        "Repartition of error types among tools",
+        interactive,
+        folder,
+        tight_layout=False,
+    )
--- a/rasta_data_manipulation/rasta_triturage/ressources.py
+++ b/rasta_data_manipulation/rasta_triturage/ressources.py
@ -0,0 +1,62 @@
+import sqlite3
+import sys
+import csv
+
+from pathlib import Path
+from typing import Optional
+
+
+def get_ressource(
+    db: Path,
+    folder: Optional[Path] = None,
+):
+    data_time = {}
+    data_mem = {}
+    tools = set()
+    with sqlite3.connect(db) as con:
+        cur = con.cursor()
+        for tool, status, avg_time, var_time, avg_mem, var_mem in cur.execute(
+            "SELECT tool_name, exec.tool_status, "
+            "    AVG(time), AVG(time*time) - AVG(time)*AVG(time), "
+            "    AVG(max_rss_mem), AVG(max_rss_mem*max_rss_mem) - AVG(max_rss_mem)*AVG(max_rss_mem) "
+            "FROM exec "
+            "GROUP BY tool_name, tool_status;"
+        ):
+            tools.add(tool)
+            if var_time is None:
+                var_time = 0
+            if var_mem is None:
+                var_mem = 0
+            data_time[(tool, status)] = (avg_time, var_time ** (1 / 2))
+            data_mem[(tool, status)] = (avg_mem, var_mem ** (1 / 2))
+    fieldnames = list(tools)
+    fieldnames.sort()
+    fieldnames = ["", *fieldnames]
+    if folder is None:
+        fd_time = sys.stdout
+        fd_mem = sys.stdout
+    else:
+        fd_time = (folder / "average_time.csv").open("w")
+        fd_mem = (folder / "average_mem.csv").open("w")
+    writer_time = csv.DictWriter(fd_time, fieldnames=fieldnames)
+    writer_mem = csv.DictWriter(fd_mem, fieldnames=fieldnames)
+    writer_time.writeheader()
+    writer_mem.writeheader()
+    for status in ("FINISHED", "FAILED", "TIMEOUT"):
+        row_time = {"": status}
+        row_mem = {"": status}
+        for tool in tools:
+            row_time[tool] = round(data_time.get((tool, status), (0, 0))[0], 2)
+            row_mem[tool] = round(data_mem.get((tool, status), (0, 0))[0], 2)
+        writer_time.writerow(row_time)
+        writer_mem.writerow(row_mem)
+        row_time = {"": "standard deviation"}
+        row_mem = {"": "standard deviation"}
+        for tool in tools:
+            row_time[tool] = round(data_time.get((tool, status), (0, 0))[1], 2)
+            row_mem[tool] = round(data_mem.get((tool, status), (0, 0))[1], 2)
+        writer_time.writerow(row_time)
+        writer_mem.writerow(row_mem)
+    if folder is not None:
+        fd_time.close()
+        fd_mem.close()
--- a/rasta_data_manipulation/rasta_triturage/status.py
+++ b/rasta_data_manipulation/rasta_triturage/status.py
@ -0,0 +1,446 @@
+"""
+Plots related to the tool status.
+"""
+
+import numpy as np
+
+import sqlite3
+from pathlib import Path
+from matplotlib import pyplot as plt  # type: ignore
+from typing import Any, Callable, Optional
+from .utils import (
+    render,
+    DENSE_DASH,
+    DENSE_DOT,
+    get_list_tools,
+    plot_generic,
+    MARKERS,
+    COLORS,
+)
+from .populate_db_tool import TOOL_INFO
+
+TOOL_LINE_STYLE = {
+    tool_info["tool_name"]: DENSE_DOT if tool_info["use_soot"] else DENSE_DASH
+    for tool_info in TOOL_INFO
+}
+
+
+def plot_status_by_tool(
+    db: Path,
+    interactive: bool = True,
+    image_path: Path | None = None,
+    tools: list[str] | None = None,
+    title: str = "Exit Status",
+):
+    """Plot the repartition of status by tools."""
+    if tools is None:
+        tools = get_list_tools(db)
+    with sqlite3.connect(db) as con:
+        cur = con.cursor()
+        tools_list_format = f"({','.join(['?' for _ in tools])})"
+        nb_apk = cur.execute("SELECT COUNT(*) FROM apk;").fetchone()[0]
+        status = cur.execute(
+            (
+                "SELECT tool_name, tool_status, COUNT(sha256) "
+                "FROM exec "
+                f"WHERE tool_name IN {tools_list_format}"
+                "GROUP BY tool_name, tool_status;"
+            ),
+            tools,
+        ).fetchall()
+    occurences = {}
+    for tool, stat, occurence in status:
+        occurences[(tool, stat)] = occurence
+    # tools.sort(key=lambda t: occurences.get((t, "FINISHED"), 0), reverse=True)
+    tools.sort()
+
+    values = {
+        "Finished": np.zeros(len(tools)),
+        "Time Out": np.zeros(len(tools)),
+        "Other": np.zeros(len(tools)),
+        "Failed": np.zeros(len(tools)),
+    }
+    colors = {
+        "Finished": "#009E73",
+        "Time Out": "#56B4E9",
+        "Failed": "#D55E00",
+        "Other": "#555555",  # TODO: better color
+    }
+    hatch = {
+        "Finished": "/",
+        "Time Out": "x",
+        "Failed": "\\",
+        "Other": ".",
+    }
+    for i, tool in enumerate(tools):
+        values["Finished"][i] = occurences.get((tool, "FINISHED"), 0)
+        values["Time Out"][i] = occurences.get((tool, "TIMEOUT"), 0)
+        values["Failed"][i] = occurences.get((tool, "FAILED"), 0)
+        values["Other"][i] = (
+            nb_apk - values["Finished"][i] - values["Time Out"][i] - values["Failed"][i]
+        )
+    values["Finished"] = (100 * values["Finished"]) / nb_apk
+    values["Time Out"] = (100 * values["Time Out"]) / nb_apk
+    values["Failed"] = (100 * values["Failed"]) / nb_apk
+    values["Other"] = (100 * values["Other"]) / nb_apk
+    bottom = np.zeros(len(tools) * 2)
+    bottom = np.zeros(len(tools))
+
+    print("Finishing rate:")
+    for t, p in zip(tools, values["Finished"]):
+        print(f"{t}: {p:.2f}%")
+
+    plt.figure(figsize=(20, 9), dpi=80)
+    plt.axhline(y=50, linestyle="dotted")
+    plt.axhline(y=85, linestyle="dotted")
+    plt.axhline(y=15, linestyle="dotted")
+    for stat in ["Finished", "Time Out", "Other", "Failed"]:
+        plt.bar(
+            tools,
+            values[stat],
+            label=stat,
+            color=colors[stat],
+            hatch=hatch[stat],
+            bottom=bottom,
+            width=0.6,
+            edgecolor="black",
+        )
+        bottom += values[stat]
+    plt.xticks(tools, tools, rotation=80)
+    plt.legend()
+    plt.ylabel("% of analysed apk")
+    render(title, interactive, image_path)
+
+
+def plot_status_by_tool_and_malware(
+    db: Path,
+    interactive: bool = True,
+    image_path: Path | None = None,
+    tools: list[str] | None = None,
+    title: str = "Exit Status Goodware/Malware",
+):
+    """Plot the repartition of status by tools and if apk is a malware."""
+    if tools is None:
+        tools = get_list_tools(db)
+    with sqlite3.connect(db) as con:
+        cur = con.cursor()
+        tools_list_format = f"({','.join(['?' for _ in tools])})"
+        nb_goodware = cur.execute(
+            "SELECT COUNT(*) FROM apk WHERE vt_detection == 0;"
+        ).fetchone()[0]
+        nb_malware = cur.execute(
+            "SELECT COUNT(*) FROM apk WHERE vt_detection != 0;"
+        ).fetchone()[0]
+        status = cur.execute(
+            (
+                "SELECT tool_name, tool_status, COUNT(exec.sha256), vt_detection != 0 "
+                "FROM exec INNER JOIN apk ON exec.sha256 = apk.sha256 "
+                f"WHERE tool_name IN {tools_list_format} "
+                "GROUP BY tool_name, tool_status, vt_detection != 0;"
+            ),
+            tools,
+        ).fetchall()
+    occurences = {}
+    for tool, stat, occurence, malware in status:
+        occurences[(tool, stat, bool(malware))] = occurence
+    #    tools.sort(
+    #        key=lambda t: occurences.get((t, "FINISHED", True), 0)
+    #        + occurences.get((t, "FINISHED", False), 0),
+    #        reverse=True,
+    #    )
+    tools.sort()
+
+    values = {
+        "Finished": np.zeros(len(tools) * 2),
+        "Time Out": np.zeros(len(tools) * 2),
+        "Other": np.zeros(len(tools) * 2),
+        "Failed": np.zeros(len(tools) * 2),
+    }
+    colors = {
+        "Finished": "#009E73",
+        "Time Out": "#56B4E9",
+        "Other": "#555555",  # TODO: find beter color
+        "Failed": "#D55E00",
+    }
+    hatch = {
+        "Finished": "/",
+        "Time Out": "x",
+        "Other": ".",
+        "Failed": "\\",
+    }
+    for i, tool in enumerate(tools):
+        i_goodware = 2 * i
+        i_malware = 2 * i + 1
+        values["Finished"][i_goodware] = occurences.get((tool, "FINISHED", False), 0)
+        values["Finished"][i_malware] = occurences.get((tool, "FINISHED", True), 0)
+        values["Time Out"][i_goodware] = occurences.get((tool, "TIMEOUT", False), 0)
+        values["Time Out"][i_malware] = occurences.get((tool, "TIMEOUT", True), 0)
+        values["Failed"][i_goodware] = occurences.get((tool, "FAILED", False), 0)
+        values["Failed"][i_malware] = occurences.get((tool, "FAILED", True), 0)
+        values["Other"][i_goodware] = (
+            nb_goodware
+            - values["Finished"][i_goodware]
+            - values["Time Out"][i_goodware]
+            - values["Failed"][i_goodware]
+        )
+        values["Other"][i_malware] = (
+            nb_malware
+            - values["Finished"][i_malware]
+            - values["Time Out"][i_malware]
+            - values["Failed"][i_malware]
+        )
+        values["Finished"][i_goodware] = (
+            0
+            if nb_goodware == 0
+            else (100 * values["Finished"][i_goodware]) / nb_goodware
+        )
+        values["Finished"][i_malware] = (
+            0 if nb_malware == 0 else (100 * values["Finished"][i_malware]) / nb_malware
+        )
+        values["Time Out"][i_goodware] = (
+            0
+            if nb_goodware == 0
+            else (100 * values["Time Out"][i_goodware]) / nb_goodware
+        )
+        values["Time Out"][i_malware] = (
+            0 if nb_malware == 0 else (100 * values["Time Out"][i_malware]) / nb_malware
+        )
+        values["Failed"][i_goodware] = (
+            0
+            if nb_goodware == 0
+            else (100 * values["Failed"][i_goodware]) / nb_goodware
+        )
+        values["Failed"][i_malware] = (
+            0 if nb_malware == 0 else (100 * values["Failed"][i_malware]) / nb_malware
+        )
+        values["Other"][i_goodware] = (
+            0 if nb_goodware == 0 else (100 * values["Other"][i_goodware]) / nb_goodware
+        )
+        values["Other"][i_malware] = (
+            0 if nb_malware == 0 else (100 * values["Other"][i_malware]) / nb_malware
+        )
+    bottom = np.zeros(len(tools) * 2)
+
+    x_axis = np.zeros(len(tools) * 2)
+    x_width = 3
+    x_0 = x_width / 2
+    lstep = 1
+    bstep = 5
+    for i in range(len(tools)):
+        x_0 += bstep + x_width
+        x_axis[2 * i] = x_0
+        x_0 += lstep + x_width
+        x_axis[2 * i + 1] = x_0
+    tick_legend = []
+    for tool in tools:
+        tick_legend.append(f"{tool}")  # (f"{tool} on goodware")
+        tick_legend.append("")  # (f"{tool} on malware")
+
+    plt.figure(figsize=(20, 9), dpi=80)
+    for stat in ["Finished", "Time Out", "Other", "Failed"]:
+        plt.bar(
+            x_axis,
+            values[stat],
+            label=stat,
+            color=colors[stat],
+            hatch=hatch[stat],
+            bottom=bottom,
+            width=x_width,
+            edgecolor="black",
+        )
+        bottom += values[stat]
+    plt.xticks(x_axis, tick_legend, rotation=80)
+    plt.legend()
+    plt.ylabel("% of analysed apk")
+    render(title, interactive, image_path)
+
+
+def plot_status_by_generic_x(
+    tools: list[str],
+    x_col: str,
+    x_label: str,
+    x_in_title: str,
+    args,
+    group_by: Optional[str] = None,
+):
+    tools.sort()
+    """group_by default to x_col, x_col must be uniq for a grouped by group_by"""
+    if group_by is None:
+        group_by = x_col
+    with sqlite3.connect(args.data) as con:
+        cur = con.cursor()
+        nb_goodware_res = cur.execute(
+            f"SELECT {group_by}, COUNT(*) FROM apk WHERE vt_detection == 0 GROUP BY {group_by};",
+        ).fetchall()
+        nb_goodware = {}
+        for x_group, count in nb_goodware_res:
+            nb_goodware[x_group] = count
+        nb_malware_res = cur.execute(
+            f"SELECT {group_by}, COUNT(*) FROM apk WHERE vt_detection != 0 GROUP BY {group_by};",
+        ).fetchall()
+        nb_malware = {}
+        for x_group, count in nb_malware_res:
+            nb_malware[x_group] = count
+        statuses_res = cur.execute(
+            (
+                f"SELECT tool_name, {x_col}, {group_by}, COUNT(exec.sha256), vt_detection != 0 "
+                "FROM exec INNER JOIN apk ON exec.sha256 = apk.sha256 "
+                f"WHERE tool_status = 'FINISHED' "
+                f"GROUP BY tool_name, tool_status, {group_by}, vt_detection != 0 "
+                f"HAVING {x_col} IS NOT NULL;"
+            )
+        ).fetchall()
+    tots = {}
+    for tool_, x_val, x_group, count, is_malware in statuses_res:
+        if not (tool_, x_group) in tots:
+            tots[(tool_, x_group)] = [x_val, 0]
+        tots[(tool_, x_group)][1] += count
+    plots = []
+    plots_malgood = []
+    metas = []
+    metas_malgood = []
+    for tool in tools:
+        malware_plot = [
+            (x_val, 100 * count / nb_malware[x_group])
+            for (tool_, x_val, x_group, count, is_malware) in statuses_res
+            if (tool_ == tool) and is_malware and nb_malware.get(x_group, 0) != 0
+        ]
+        malware_meta = (f"{tool} on malware", DENSE_DOT, MARKERS[tool], COLORS[tool])
+        goodware_plot = [
+            (x_val, 100 * count / nb_goodware[x_group])
+            for (tool_, x_val, x_group, count, is_malware) in statuses_res
+            if (tool_ == tool) and not is_malware and nb_goodware.get(x_group, 0) != 0
+        ]
+        goodware_meta = (f"{tool} on goodware", DENSE_DASH, MARKERS[tool], COLORS[tool])
+        total_plot = [
+            (
+                x_val,
+                100
+                * count
+                / (nb_malware.get(x_group, 0) + nb_goodware.get(x_group, 0)),
+            )
+            for ((tool_, x_group), (x_val, count)) in tots.items()
+            if (tool_ == tool)
+            and (nb_malware.get(x_group, 0) + nb_goodware.get(x_group, 0)) != 0
+        ]
+        total_meta = (f"{tool}", DENSE_DOT, MARKERS[tool], COLORS[tool])
+        plots.append(total_plot)
+        plots_malgood.append(malware_plot)
+        plots_malgood.append(goodware_plot)
+        metas.append(total_meta)
+        metas_malgood.append(malware_meta)
+        metas_malgood.append(goodware_meta)
+
+        plot_generic(
+            [goodware_plot, malware_plot],
+            [goodware_meta, malware_meta],
+            x_label,
+            "finishing rate",
+            f"Finishing Rate by {x_in_title} for {tool} on malware and goodware",
+            ylim=(-5, 105),
+            interactive=args.display,
+            image_path=args.figures_file,
+        )
+        plot_generic(
+            [total_plot],
+            [total_meta],
+            x_label,
+            "finishing rate",
+            f"Finishing Rate by {x_in_title} for {tool}",
+            ylim=(-5, 105),
+            interactive=args.display,
+            image_path=args.figures_file,
+        )
+    plot_generic(
+        plots_malgood,
+        metas_malgood,
+        x_label,
+        "finishing rate",
+        f"Finishing Rate by {x_in_title} on malware and goodware",
+        ylim=(-5, 105),
+        interactive=args.display,
+        image_path=args.figures_file,
+    )
+    plot_generic(
+        plots,
+        metas,
+        x_label,
+        "finishing rate",
+        f"Finishing Rate by {x_in_title}",
+        ylim=(-5, 105),
+        interactive=args.display,
+        image_path=args.figures_file,
+    )
+
+
+def dbg(arg):
+    # print(arg)
+    return arg
+
+
+def plot_all_status_by_generic_x(
+    tools: list[str],
+    x_col: str,
+    x_label: str,
+    title: str,
+    args,
+    condition: Optional[str] = None,
+    apk_condition: Optional[str] = None,
+    group_by: Optional[str] = None,
+):
+    if condition is None and apk_condition is None:
+        condition = ""
+        apk_condition = ""
+    elif apk_condition is None:
+        condition = f"AND ({condition})"
+        apk_condition = ""
+    elif condition is None:
+        condition = f"AND ({apk_condition})"
+        apk_condition = f"WHERE ({apk_condition})"
+    else:
+        condition = f"AND ({apk_condition}) AND ({condition})"
+        apk_condition = f"WHERE ({apk_condition})"
+    if group_by is None:
+        group_by = x_col
+    nb_apk = {}
+    tools.sort()
+    with sqlite3.connect(args.data) as con:
+        cur = con.cursor()
+        for x_group, count in cur.execute(
+            f"SELECT {group_by}, COUNT(*) FROM apk {apk_condition} GROUP BY {group_by};",
+        ):
+            nb_apk[x_group] = count
+        statuses_res = cur.execute(
+            dbg(
+                f"SELECT exec.tool_name, {x_col}, {group_by}, COUNT(exec.sha256) "
+                "FROM exec "
+                "    INNER JOIN apk ON exec.sha256 = apk.sha256 "
+                "    INNER JOIN tool ON exec.tool_name = tool.tool_name "
+                f"WHERE tool_status = 'FINISHED' {condition} "
+                f"GROUP BY exec.tool_name, tool_status, {group_by} "
+                f"HAVING {x_col} IS NOT NULL;"
+            )
+        ).fetchall()
+    plots = []
+    metas = []
+    for tool in tools:
+        plot = [
+            (x_val, 100 * count / nb_apk[x_group])
+            for (tool_, x_val, x_group, count) in statuses_res
+            if (tool_ == tool) and nb_apk.get(x_group, 0) != 0
+        ]
+        if len(plot) == 0:
+            continue
+        meta = (tool, TOOL_LINE_STYLE[tool], MARKERS[tool], COLORS[tool])
+        plots.append(plot)
+        metas.append(meta)
+    plot_generic(
+        plots,
+        metas,
+        x_label,
+        "finishing rate",
+        title,
+        ylim=(-5, 105),
+        interactive=args.display,
+        image_path=args.figures_file,
+    )
--- a/rasta_data_manipulation/rasta_triturage/utils.py
+++ b/rasta_data_manipulation/rasta_triturage/utils.py
@ -0,0 +1,185 @@
+"""
+Utils.
+"""
+
+import matplotlib.pyplot as plt  # type: ignore
+import numpy as np
+from slugify import slugify  # type: ignore
+from typing import Any, Callable, Optional
+from pathlib import Path
+import sqlite3
+
+DENSE_DASH = (0, (5, 1))
+DENSE_DOT = (0, (1, 3))
+
+MARKERS = {
+    "adagio": ".",
+    "amandroid": "o",
+    "anadroid": "X",
+    "androguard": "+",
+    "androguard_dad": "v",
+    "apparecium": "d",
+    "blueseal": "^",
+    "dialdroid": "<",
+    "didfail": ">",
+    "droidsafe": r"$\circ$",
+    "flowdroid": r"$\boxplus$",
+    "gator": r"$\otimes$",
+    "ic3": "1",
+    "ic3_fork": "s",
+    "iccta": "P",
+    "mallodroid": r"$\divideontimes$",
+    "perfchecker": "*",
+    "redexer": "x",
+    "saaf": "D",
+    "wognsen_et_al": r"$\rtimes$",
+}
+
+COLORS = {
+    "didfail": "#1f77b4",
+    "adagio": "#ff7f0e",
+    "iccta": "#2ca02c",
+    "androguard": "#d62728",
+    "gator": "#9467bd",
+    "mallodroid": "#8c564b",
+    "dialdroid": "#e377c2",
+    "androguard_dad": "#7f7f7f",
+    "wognsen_et_al": "#bcbd22",
+    "perfchecker": "#17becf",
+    "amandroid": "#1f77b4",
+    "ic3": "#ff7f0e",
+    "apparecium": "#2ca02c",
+    "blueseal": "#d62728",
+    "droidsafe": "#9467bd",
+    "redexer": "#8c564b",
+    "anadroid": "#e377c2",
+    "saaf": "#7f7f7f",
+    "ic3_fork": "#bcbd22",
+    "flowdroid": "#17becf",
+    "adagio": "#1f77b4",
+    "androguard": "#ff7f0e",
+    "mallodroid": "#2ca02c",
+    "androguard_dad": "#d62728",
+    "wognsen_et_al": "#9467bd",
+    "amandroid": "#8c564b",
+    "apparecium": "#e377c2",
+    "redexer": "#7f7f7f",
+}
+
+
+def get_list_tools(db: Path) -> list[str]:
+    """Get the list of tool found in the database."""
+    with sqlite3.connect(db) as con:
+        cur = con.cursor()
+        tools = cur.execute("SELECT DISTINCT tool_name FROM exec;")
+    return [tool[0] for tool in tools]
+
+
+def radar_chart(
+    axes: list[str],
+    values: list[list[Any]],
+    labels: list[str],
+    title: str,
+    interactive: bool,
+    image_path: Path | None,
+):
+    plt.rc("grid", linewidth=1, linestyle="-")
+    plt.rc("xtick", labelsize=15)
+    plt.rc("ytick", labelsize=15)
+    angles = np.linspace(0, 2 * np.pi, len(axes), endpoint=False)
+    angles = np.concatenate((angles, [angles[0]]))  # type: ignore
+    fig = plt.figure(figsize=(8, 8))
+    ax = fig.add_subplot(111, polar=True)
+    for label, vals in zip(labels, values):
+        vals = vals + [vals[0]]
+        ax.plot(angles, vals, label=label, marker=MARKERS.get(label, "."))
+        ax.fill(angles, vals, alpha=0.25)
+    ax.set_thetagrids(angles[:-1] * 180 / np.pi, axes)
+    ax.set_ylim(bottom=0)
+    ax.grid(True)
+    ncol = min(5, len(labels))
+    ax.legend(
+        loc="lower left",
+        bbox_to_anchor=(0.0, -0.2, ncol * 1.0 / 5, 0.102),
+        ncol=ncol,
+        mode="expand",
+        borderaxespad=0.0,
+        fancybox=True,
+        shadow=True,
+        fontsize="xx-small",
+    )
+    render(title, interactive, image_path)
+
+
+def render(
+    title: str, interactive: bool, image_path: Path | None, tight_layout: bool = True
+):
+    """Render the figure. If `interactive`, display if, if `image_path`, save it."""
+    # plt.title(title)
+    if tight_layout:
+        plt.tight_layout()
+    if image_path is not None:
+        if not image_path.exists():
+            image_path.mkdir(parents=True, exist_ok=True)
+        plt.savefig(image_path / (slugify(title) + ".pdf"), format="pdf")
+    if interactive:
+        plt.show()
+    plt.close()
+
+
+def mean(field: str) -> Callable[[list[Any]], float]:
+    def compute_mean(data: list[Any]) -> float:
+        s = 0
+        n = 0
+        for e in data:
+            n += 1
+            s += e[field]
+        return 0.0 if n == 0 else s / n
+
+    return compute_mean
+
+
+def median(field: str) -> Callable[[list[Any]], float]:
+    def compute_median(data: list[Any]) -> float:
+        l = [e[field] for e in data if e[field] is not None]
+        l.sort()
+        if not l:
+            return 0.0
+        return l[len(l) // 2]
+
+    return compute_median
+
+
+def plot_generic(
+    data: list[list[tuple[Any, Any]]],
+    meta: list[tuple[str, Any, Any, str]],
+    x_label: str,
+    y_label: str,
+    title: str,
+    ylim: Optional[tuple[int, int]] = None,
+    interactive: bool = True,
+    image_path: Path | None = None,
+):
+    """Plot a list of curve represented by list[(x, y)]. meta is the list of (label, linestyle)
+    for each plot.
+    """
+    plt.figure(figsize=(16, 9), dpi=80)
+    for i, plot in enumerate(data):
+        label, linestyle, marker, color = meta[i]
+        plot.sort(key=lambda p: p[0])
+        x_values = np.array([x for (x, _) in plot])
+        y_values = np.array([y for (_, y) in plot])
+        plt.plot(
+            x_values[~np.isnan(y_values)],
+            y_values[~np.isnan(y_values)],
+            label=label,
+            marker=marker,
+            color=color,
+            linestyle=linestyle,
+        )
+    if ylim is not None:
+        plt.ylim(ylim)
+    plt.legend(loc="upper center", ncol=4, bbox_to_anchor=(0.5, -0.1))
+    plt.xlabel(x_label)
+    plt.ylabel(y_label)
+    render(title, interactive, image_path)