From 96b8d24b298e9e0276dffb8208a76780ebed7aa5 Mon Sep 17 00:00:00 2001 From: Jean-Marie 'Histausse' Mineau Date: Sat, 4 Oct 2025 00:29:34 +0200 Subject: [PATCH] midskip --- 2_background/2_1_android.typ | 4 ++-- 2_background/2_2_tools.typ | 4 ++-- 2_background/2_3_static_analysis.typ | 4 ++-- 2_background/4_1_rasta.typ | 4 ++-- 2_background/4_2_classloader.typ | 4 ++-- 2_background/4_3_theseus.typ | 4 ++-- 3_rasta/9_conclusion.typ | 3 +-- 4_class_loader/3_obfuscation.typ | 4 ++-- 4_class_loader/4_in_the_wild.typ | 4 ++-- 5_theseus/3_static_transformation.typ | 4 ++-- 5_theseus/4_dynamic_data_collection.typ | 4 ++-- 5_theseus/5_results.typ | 4 ++-- lib.typ | 3 +++ 13 files changed, 26 insertions(+), 24 deletions(-) diff --git a/2_background/2_1_android.typ b/2_background/2_1_android.typ index 1b9514c..86540f1 100644 --- a/2_background/2_1_android.typ +++ b/2_background/2_1_android.typ @@ -1,4 +1,4 @@ -#import "../lib.typ": eg, num, APK, JAR, AXML, ART, SDK, JNI, NDK, DEX, XML, API, ZIP, paragraph +#import "../lib.typ": eg, num, APK, JAR, AXML, ART, SDK, JNI, NDK, DEX, XML, API, ZIP, paragraph, midskip #import "../lib.typ": todo, jfl-note === Android @@ -165,7 +165,7 @@ Reflection is not limited to bytecode that has been dynamically loaded: it can b Because the `ClassLoader` objects are only used when loading bytecode dynamically or when using reflection, it is often forgotten that the #ART uses class loaders constantly behind the scene, allowing classes from the application and platform classes to cohabit seamlessly. ] -#v(2em) +#midskip In this subsection, we presented the most notable specificities of the Android ecosystem. In the next section, we will continue with the various tools available for an Android reverse engineer. diff --git a/2_background/2_2_tools.typ b/2_background/2_2_tools.typ index 297f894..ba5bcb4 100644 --- a/2_background/2_2_tools.typ +++ b/2_background/2_2_tools.typ @@ -1,4 +1,4 @@ -#import "../lib.typ": APK, IDE, SDK, DEX, ADB, ART, eg, XML, AXML, API, paragraph +#import "../lib.typ": APK, IDE, SDK, DEX, ADB, ART, eg, XML, AXML, API, paragraph, midskip #import "../lib.typ": jfl-note, todo === Reverse Engineering Tools @@ -85,7 +85,7 @@ The main drawback of using Frida is that it is a known tool, easily detected by Malware might implement countermeasures that avoid running malicious payloads if Frida is detected. ] -#v(2em) +#midskip Those tools are quite useful for manual operations. However, considering the complexity of modern Android applications, it might take a lot of work for a reverse engineer to analyse one application. diff --git a/2_background/2_3_static_analysis.typ b/2_background/2_3_static_analysis.typ index ab01b38..5c270a4 100644 --- a/2_background/2_3_static_analysis.typ +++ b/2_background/2_3_static_analysis.typ @@ -1,4 +1,4 @@ -#import "../lib.typ": APK, etal, ART, SDK, DEX, eg, +#import "../lib.typ": APK, etal, ART, SDK, DEX, eg, midskip #import "../lib.typ": todo, jm-note, jfl-note #import "@preview/diagraph:0.3.5": raw-render @@ -143,6 +143,6 @@ However, static analysis tools must overcome many challenges when analysing Andr #todo[Ca serait bien de souligner Dyn Code Load et Reflection] -#v(2em) +#midskip With the bases of Android application analysis in mind, we can now examine our problem statements further. diff --git a/2_background/4_1_rasta.typ b/2_background/4_1_rasta.typ index 4b7b3fc..5bf989f 100644 --- a/2_background/4_1_rasta.typ +++ b/2_background/4_1_rasta.typ @@ -1,4 +1,4 @@ -#import "../lib.typ": APK, etal, ART, SDK, DEX, eg, ie, pb1, pb1-text +#import "../lib.typ": APK, etal, ART, SDK, DEX, eg, ie, pb1, pb1-text, midskip #import "../lib.typ": todo, jm-note, jfl-note #import "@preview/diagraph:0.3.5": raw-render @@ -144,7 +144,7 @@ ReproDroid@pauckAndroidTaintAnalysis2018 DroidBench@Arzt2014a */ -#v(2em) +#midskip To summarise, Li #etal made a systematic literature review of static analysis for Android that listed 27 open-sourced tools. However, they did not test those tools. diff --git a/2_background/4_2_classloader.typ b/2_background/4_2_classloader.typ index abceef9..1b2aecc 100644 --- a/2_background/4_2_classloader.typ +++ b/2_background/4_2_classloader.typ @@ -1,4 +1,4 @@ -#import "../lib.typ": SDK, API, API, DEX, pb2, pb2-text, etal, APIs, ie +#import "../lib.typ": SDK, API, API, DEX, pb2, pb2-text, etal, APIs, ie, midskip #import "../lib.typ": todo === Android Class Loading @@ -53,7 +53,7 @@ More recently, He #etal~@he_systematic_2023 did a systematic study of hidden ser They studied how the hidden #API can be used to bypass Android security restrictions and found that although Google countermeasures are effective, they need to be implemented inside the system services and not the hidden #API due to the lack of in-app privilege isolation: the framework code is in the same process as the user code, meaning any restriction in the framework can be bypassed by the user. Unfortunately, those two contributions do not explore further the consequences of the use of hidden #APIs for a reverse engineer. -#v(2em) +#midskip In conclusion, class loading mechanisms have been studied carefully in the context of the Java language. However, the same cannot be said about Android, whose implementation diverges significantly from classic Java Virtual Machines. diff --git a/2_background/4_3_theseus.typ b/2_background/4_3_theseus.typ index ddf3615..e5f5fb8 100644 --- a/2_background/4_3_theseus.typ +++ b/2_background/4_3_theseus.typ @@ -1,4 +1,4 @@ -#import "../lib.typ": APK, etal, ART, SDK, eg, DEX, eg, pb3, pb3-text +#import "../lib.typ": APK, etal, ART, SDK, eg, DEX, eg, pb3, pb3-text, midskip #import "../lib.typ": todo, jm-note, jfl-note === Allowing Static Analysis Tools to Analyse Obfuscated Application @@ -81,7 +81,7 @@ Those cases are quite common; being able to solve those without resorting to dyn On the other hand, COAL will struggle to solve cases with complex string manipulation and is simply not able to handle cases that rely on external data (#eg downloaded from the internet at runtime). Likewise, this can only access code loaded dynamically if the code was present inside the application without any kind of obfuscation (#eg a #DEX file in the assets of the application can be analysed, but not if it is ciphered). -#v(2em) +#midskip Instrumenting applications to encode the result of an analysis as a unified representation has been explored before. It has been used by tools like AppSpear and DexLego to expose heavily obfuscated bytecode collected dynamically. diff --git a/3_rasta/9_conclusion.typ b/3_rasta/9_conclusion.typ index c731d82..c87ef0a 100644 --- a/3_rasta/9_conclusion.typ +++ b/3_rasta/9_conclusion.typ @@ -1,6 +1,5 @@ -#import "@local/template-thesis-matisse:0.0.1": etal #import "../lib.typ": todo, jfl-note -#import "../lib.typ": pb1, pb1-text, APKs, SDK, highlight-block +#import "../lib.typ": pb1, pb1-text, APKs, SDK, highlight-block, etal #import "X_var.typ": * == Conclusion diff --git a/4_class_loader/3_obfuscation.typ b/4_class_loader/3_obfuscation.typ index baa1610..5c5b70f 100644 --- a/4_class_loader/3_obfuscation.typ +++ b/4_class_loader/3_obfuscation.typ @@ -1,4 +1,4 @@ -#import "../lib.typ": eg, paragraph, DFG, DEX, API, SDK, APK, ART, AOSP +#import "../lib.typ": eg, paragraph, DFG, DEX, API, SDK, APK, ART, AOSP, midskip #import "../lib.typ": todo, jfl-note #import "X_var.typ": * @@ -257,7 +257,7 @@ Flowdroid does have a record of #SDK classes, and gives priority to the actual # Unfortunately, Flowdroid does not have a record of all platform classes, meaning that using #hidec breaks the flow tracking. Solving this issue would require finding the bytecode of all the platform classes of the Android version targeted, and, as we said previously, it requires extracting this information from the emulator or phone. -#v(2em) +#midskip We have seen that tools can be impacted by shadow attacks. In the next section, we will investigate whether these attacks are used in the wild. diff --git a/4_class_loader/4_in_the_wild.typ b/4_class_loader/4_in_the_wild.typ index f8f1b5b..22999dd 100644 --- a/4_class_loader/4_in_the_wild.typ +++ b/4_class_loader/4_in_the_wild.typ @@ -1,4 +1,4 @@ -#import "../lib.typ": num, todo, paragraph, SDK, APK, API, ART, DEX +#import "../lib.typ": num, todo, paragraph, SDK, APK, API, ART, DEX, midskip #import "X_var.typ": * == Shadow Attacks in the Wild @@ -285,7 +285,7 @@ When looking at this new code stored in the field, we found that it does almost Thus, we believe that the developer has upgraded their obfuscation techniques, replacing a native library with inline base64 encoded bytecode. The shadow attack could be unintentional, but it strengthens the masking of the new implementation. -#v(2em) +#midskip As a conclusion, we observed that: - #SDK shadowing is performed by #shadowsdk of applications, but is unintentional: these classes are embedded for retro-compatibility purposes or because the developer added a library already present in Android. diff --git a/5_theseus/3_static_transformation.typ b/5_theseus/3_static_transformation.typ index 70ed379..cc0812e 100644 --- a/5_theseus/3_static_transformation.typ +++ b/5_theseus/3_static_transformation.typ @@ -1,4 +1,4 @@ -#import "../lib.typ": todo, APK, APKs, DEX, JAR, OAT, SDK, eg, ART, jm-note, jfl-note +#import "../lib.typ": todo, APK, APKs, DEX, JAR, OAT, SDK, eg, ART, jm-note, jfl-note, midskip == Code Transformation @@ -245,7 +245,7 @@ In hindsight, we probably should have taken the time to find a way to use smali/ At the time of writing, the feature is still being developed, but in the future, Androguard might also become an option to modify #DEX files. Nevertheless, we published our instrumentation library, Androscalpel, for anyone who wants to use it. #todo[ref to code] -#v(2em) +#midskip Now that we saw the transformations we want to make, we know the runtime information we need to do it. In the next section, we will propose a solution to collect that information. diff --git a/5_theseus/4_dynamic_data_collection.typ b/5_theseus/4_dynamic_data_collection.typ index 4891ed6..a53bb88 100644 --- a/5_theseus/4_dynamic_data_collection.typ +++ b/5_theseus/4_dynamic_data_collection.typ @@ -1,4 +1,4 @@ -#import "../lib.typ": todo, SDK, API, ART, DEX, APK, JAR, ADB, jfl-note, APKs +#import "../lib.typ": todo, SDK, API, ART, DEX, APK, JAR, ADB, jfl-note, APKs, midskip == Collecting Runtime Information @@ -77,7 +77,7 @@ Nonetheless, the benefit of our implementation is that it only requires an #ADB Of course, to analyse a specific application, a reverse engineer could use an actual smartphone and explore the application manually. It would be a lot more stable than our automated batch analysis setup. -#v(2em) +#midskip Now that we saw both the dynamic analysis setup and the transformation we want to perform on the #APKs, we put our proposed approach into practice. In the next section, we will run our dynamic analysis on #APKs and study the data collected, as well as the impact the instrumentation has on applications and different analysis tools. diff --git a/5_theseus/5_results.typ b/5_theseus/5_results.typ index 2664e1a..ecc4783 100644 --- a/5_theseus/5_results.typ +++ b/5_theseus/5_results.typ @@ -1,6 +1,6 @@ #import "@preview/diagraph:0.3.5": render -#import "../lib.typ": SDK, num, mypercent, ART, ie, APKs, API, APIs, etal +#import "../lib.typ": SDK, num, mypercent, ART, ie, APKs, API, APIs, etal, midskip #import "../lib.typ": todo, jfl-note #import "X_var.typ": * #import "../3_rasta/X_var.typ": NBTOTALSTRING @@ -298,7 +298,7 @@ In red on the figure however, we have the calls that were hidded by reflection i caption: [Call Graph of `Main.main()` generated by Androguard after patching], ) -#v(2em) +#midskip To conclude, we showed that our approach indeed improves the results of analysis tools without impacting their finishing rates much. Unfortunately, we also noticed that our dynamic analysis is suboptimal, either due to our experimental setup or due to our solution to explore the applications. diff --git a/lib.typ b/lib.typ index f1cf5d8..e4662af 100644 --- a/lib.typ +++ b/lib.typ @@ -52,3 +52,6 @@ #let pb3 = link()[*Pb3*] #let pb3-text = [_Can we use instrumentation to provide dynamic code loading and reflection data collected dynamically to static analysis tools and improve their results?_] #let pb3-text-fr = [_Peut-on utiliser l'instrumentation pour fournir le code chargé dynamiquement et les informations de réflexion collectées dynamiquement aux outils d'analyse statique pour améliorer leurs résultats?_] + +//#let midskip = align(center, line(length: 80%)) +#let midskip = align(center, sym.therefore)