From 4ad17d24848b486b1ca6385edb36b9e3e2b0fd30 Mon Sep 17 00:00:00 2001 From: Jean-Marie Mineau Date: Thu, 7 Aug 2025 00:06:29 +0200 Subject: [PATCH] some work on rasta --- 2_background/3_analysis_techniques.typ | 2 +- 2_background/main.typ | 19 ++++++++++++++++++- 3_rasta/0_intro.typ | 9 +++++---- 3_rasta/1_related_work.typ | 17 +++++++++++++---- 3_rasta/main.typ | 22 +++++++++++++++++++--- 5 files changed, 56 insertions(+), 13 deletions(-) diff --git a/2_background/3_analysis_techniques.typ b/2_background/3_analysis_techniques.typ index aae236e..2f25323 100644 --- a/2_background/3_analysis_techniques.typ +++ b/2_background/3_analysis_techniques.typ @@ -1,4 +1,4 @@ -#import "../lib.typ": todo, APK, etal, ART, eg, jm-note +#import "../lib.typ": todo, APK, etal, ART, SDK, eg, jm-note, jfl-note #import "@preview/diagraph:0.3.3": raw-render == Android Reverse Engineering Techniques diff --git a/2_background/main.typ b/2_background/main.typ index 8674d19..76f2e96 100644 --- a/2_background/main.typ +++ b/2_background/main.typ @@ -1,4 +1,4 @@ -#import "../lib.typ": todo, epigraph +#import "../lib.typ": todo, epigraph, jfl-note = Background @@ -24,3 +24,20 @@ * * Analyse dynamique */ + + +#jfl-note[ + Le chapitre background est tres technique et descriptif: il dit "il y a tel ettel outil". + A ce state et avant le chap 3 on aimerait lire: + - Les objectifs globaux de la these + - Ce que fait classiquement un reverser avec une app et quels sont ses pbs + - Puis de l'état de l'art pour dire quels sont les contribs du passé qui ont tenté d'aider ce reverser + + par ex, le reverser a envi de savoir si l'app fait fuiter des donées de géoloc. + Dans ce cas, on peut utiliser taintdroid, pour calculer si c'est le cas statiquement, et parler des limites. + Idem pour les contribs en analyse dyn. + A la fin on aimerait avoir une idée plus claire des limites en ayant illustré avec différentes taches de reverse. Limites résumées: + - Les outils crashent beaucoup + - Le chargement dyn fait chier + - Une appli dissequee ne peut pas etre analysé +][todo] diff --git a/3_rasta/0_intro.typ b/3_rasta/0_intro.typ index f243f48..00d30cd 100644 --- a/3_rasta/0_intro.typ +++ b/3_rasta/0_intro.typ @@ -1,17 +1,18 @@ -#import "../lib.typ": etal +#import "../lib.typ": etal, jfl-note, jm-note #import "X_var.typ": * == Introduction In this chapter, we study the reusability of open source static analysis tools that appeared between 2011 and 2017, on a recent Android dataset. -The scope of our study is *not* to quantify if the output results are accurate for ensuring reproducibility, because all the studied static analysis tools have different goals in the end. +The scope of our study is *not* to quantify if the output results are accurate to ensure reproducibility, because all the studied static analysis tools have different goals in the end. On the contrary, we take as hypothesis that the provided tools compute the intended result but may crash or fail to compute a result due to the evolution of the internals of an Android application, raising unexpected bugs during an analysis. This chapter intends to show that sharing the software artifacts of a paper may not be sufficient to ensure that the provided software would be reusable. Thus, our contributions are the following. We carefully retrieved static analysis tools for Android applications that were selected by Li #etal~@Li2017 between 2011 and 2017. +#jm-note[Many of those tools where presented in @sec:bg-static.][Yes but not really, @sec:bg-static do not present the contributions in detail \ FIX: develop @sec:bg-static] We contacted the authors, whenever possible, for selecting the best candidate versions and to confirm the good usage of the tools. -We rebuild the tools in their original environment and we plan to share our Docker images with this paper. +We rebuild the tools in their original environment and #jm-note[share our Docker images.][ref] We evaluated the reusability of the tools by measuring the number of successful analysis of applications taken in the Drebin dataset~@Arp2014 and in a custom dataset that contains more recent applications (#NBTOTALSTRING in total). The observation of the success or failure of these analysis enables us to answer the following research questions: @@ -33,6 +34,6 @@ The chapter is structured as follows. @sec:rasta-methodology presents the methodology employed to build our evaluation process and @sec:rasta-xp gives the associated experimental results. // @sec:rasta-discussion investigates the reasons behind the observed failures of some of the tools. @sec:rasta-discussion discusses the limitations of this work and gives some takeaways for future contributions. -@sec:rasta-conclusion concludes the paper. +@sec:rasta-conclusion concludes the chapter. diff --git a/3_rasta/1_related_work.typ b/3_rasta/1_related_work.typ index c5aecab..9cd919e 100644 --- a/3_rasta/1_related_work.typ +++ b/3_rasta/1_related_work.typ @@ -1,4 +1,4 @@ -#import "../lib.typ": etal, eg, ie +#import "../lib.typ": etal, eg, ie, jfl-note #import "X_var.typ": * == Related Work @@ -9,16 +9,25 @@ // For example, taint analysis datasets should provide the source and expected sink of a taint. // In some cases, the datasets are provided with additional software for automatizing part of the analysis. // Thus, -We review in this section the past existing contributions related to static analysis tools reusability. +#jfl-note[We review in this section the past existing contributions related to static analysis tools reusability.][lier a chap 2] Several papers have reviewed Android analysis tools produced by researchers. Li #etal~@Li2017 published a systematic literature review for Android static analysis before May 2015. They analyzed 92 publications and classified them by goal, method used to solve the problem and underlying technical solution for handling the bytecode when performing the static analysis. In particular, they listed 27 approaches with an open-source implementation available. Nevertheless, experiments to evaluate the reusability of the pointed out software were not performed. -We believe that the effort of reviewing the literature for making a comprehensive overview of available approaches should be pushed further: an existing published approach with a software that cannot be used for technical reasons endanger both the reproducibility and reusability of research. +#jfl-note[We believe that the effort of reviewing the literature for making a comprehensive overview of available approaches should be pushed further: an existing published approach with a software that cannot be used for technical reasons endanger both the reproducibility and reusability of research.][A mettre avant?] + +Works that perform benchmaks of tools follow a similar method. +They select a set of tools with similar goals. +Usually, those contribusions are comparing existing tools to their own, but some contributions do not introduce a new tool and focus on surveying the state of the art for some technique. +As we saw in @sec:bg-datasets, the need for a ground truth to compare the results of the tools leads to test datasets to often be handcrafted. +Some studdy will select a few real-world applications instead, a manually reverse engineer those application to get the ground truth. +Once the tools and test dataset are selected, the tools are run on the application dataset, and the results of the tools are compared to the ground truth to determine the accuracy of each tools. +Several factors are the considered to compare the results of the tools. +It can be the number of false positive, false negative, or even the time it took to finish the analysis. +Occasionally, the number of application a tool simply failled to analyse are also compared. -As we saw in @sec:bg-datasets that the need for a ground truth to test analysis tools leads test datasets to often be handcrafted. The few datasets composed of real-world application confirmed that some tools such as Amandroid~@weiAmandroidPreciseGeneral2014 and Flowdroid~@Arzt2014a are less efficient on real-world applications~@bosuCollusiveDataLeak2017 @luoTaintBenchAutomaticRealworld2022. Unfortunatly, those real-world applications datasets are rather small, and a larger number of applications would be more suitable for our goal, #ie evaluating the reusability of a variety of static analysis tools. diff --git a/3_rasta/main.typ b/3_rasta/main.typ index 0bda79a..dd4f242 100644 --- a/3_rasta/main.typ +++ b/3_rasta/main.typ @@ -1,8 +1,24 @@ -#import "../lib.typ": todo +#import "../lib.typ": todo, epigraph, highlight +#import "X_var.typ": resultunusable -= RASTA += Evaluating the Reusability of Android Static Analysis Tools + +#epigraph("Adira Tal and Sylvia Tilly, Star Trek: Discovery, \"People of Earth\"")[ + #block[ + #set align(left) + "This science vessel's practically a museum" \ + "Okay, well, museums are cool, so..." \ + "That's what someone who lives in a museum would say." \ + ] +] + +#align(center, highlight(inset: 15pt, width: 75%, block(align(left)[ + This chapter intends to explore the robustness of past software dedicated to static analysis of Android applications. + We pursue the community effort that identified software supporting publications that perform static analysis of mobile applications and we propose a method for evaluating the reliability of these software. + We extensively evaluate static analysis tools on a recent dataset of Android applications including goodware and malware, that we designed to measure the influence of parameters such as the date and size of applications. + Our results show that #resultunusable of the evaluated tools are no longer usable and that the size of the bytecode and the min SDK version have the greatest influence on the reliability of tested tools. +]))) -#todo[Bring back element from previous version of rasta] #include("0_intro.typ") #include("1_related_work.typ")