diff --git a/0_preamble/notations.typ b/0_preamble/notations.typ index 5a95637..cbd2990 100644 --- a/0_preamble/notations.typ +++ b/0_preamble/notations.typ @@ -1,4 +1,7 @@ -#let tldr = link()[TL;DR] +#let APK = link()[APK] +#let DEX = link()[DEX] +#let OAT = link()[OAT] +#let JAR = link()[JAR] #let notation_table = align(center, table( columns: 2, @@ -6,5 +9,8 @@ table.header( [Acronyms], [Meanings], ), - tldr, [Too long; didn't read], + APK, [Android Package, the file format used to install application on Android. The APK format is an extention of the #JAR format ], + DEX, [Dalvik Executable, the file format for the bytecode used for applicatiobs by Android ], + JAR, [Java ARchive file, the file format used to store several java class files. Sometimes used by Android to store #DEX files instead of java classes ], + OAT, [Of Ahead Time, an ahead of time compiled format for #DEX files ] )) diff --git a/6_theseus/1_static_transformation.typ b/6_theseus/1_static_transformation.typ new file mode 100644 index 0000000..b42fa00 --- /dev/null +++ b/6_theseus/1_static_transformation.typ @@ -0,0 +1,93 @@ +#import "../lib.typ": todo, APK, DEX, JAR, OAT, eg + +== Code Transformation + +#todo[Define code loading and reflection somewhere] +#todo[This is a draft, clean this up] +#todo[Reflectif call? Reflection call?] + +In this section, we will see how we can transform the application code to make dynamic codeloading and reflexif calls analysable by static analysis tools. + +=== Reflection + +In Android, reflection can be used to do two things: instanciate a class, or call a method. +Either way, reflection starts by retreiving the `Class` object representing the class to use. +This class is usually retrived using a `ClassLoader` object, but can also be retrieved directly from the classloader of the class defining the calling method. +// elaborate? const-class dalvik instruction / MyClass.class in java? +One the class is retrieve, it can be instanciated using the deprecated method `Class.newInstance()`, like shown in @lst:-th-expl-cl-new-instance, or a specific method can be retrieved. +The current approche to instanciate a class is to retrieve the specific `Constructor` object, then calling `Constructor.newInstance(..)` like in @lst:-th-expl-cl-cnstr. +Similarly, to call a method, the `Method` object must be retrieved, then called using `Method.invoke(..)`, like shown in @lst:-th-expl-cl-call. + +Although the process seems to differ between class instanciation and method call from the Java stand point, the runtime opperations are very similar. +When instanciating an object with `Object obj = cst.newInstance("Hello Void")`, the constructor method `(Ljava/lang/String;)V`, represented by the `Constructor` `cst`, is called on the object `obj`. + +#figure( + ```java + ClassLoader cl = MainActivity.class.getClassLoader(); + Class clz = cl.loadClass("com.example.Reflectee"); + Object obj = clz.newInstance(); + ```, + caption: [Instanciating a class using `Class.newInstance()`] +) + +#figure( + ```java + Constructor cst = clz.getDeclaredConstructor(String.class); + Object obj = cst.newInstance("Hello Void"); + ```, + caption: [Instanciating a class using `Constructor.newInstance(..)`] +) + +#figure( + ```java + Method mth = clz.getMethod("myMethod", String.class); + String retData = (String) mth.invoke(obj, "an argument"); + ```, + caption: [Calling a method using reflection] +) + +To allow static analysis tools to analyse an application that use reflection, we want to replace the reflection call by the bytecode that does the actual calls. + +One of the main reason to use reflection is to access classes not from the application. +Although allows the use classes that do not exist in the application in bytecode, at runtime, if the classes are not found in the current classloader, the application will crash. +Similarly, some analysis tools might have trouble analysis application calling non existing classes. +@sec:th-trans-cl deals with the issue of adding dynamically loaded bytecode to the application. + +A notable issue is that a specific reflection call can call different methods. +@lst:th-worst-case-ref illustrate a worst case scenario where any method can be call at the same reflection call. +In those situation, we cannot garanty that we know all the methodes that can be called (#eg the name of the method called could be retrieved from a remote server). + + +#figure( + ```java + Object myInvoke(Object obj, Method mth, Object[] args) throws .. { + return mth.invoke(obj, args); + } + ```, + caption: [A reflection call that can call any method] +) + +=== Code loading + +#todo[custom class loaders] + +An application can dynamically import code from several format like #DEX, #APK, #JAR or #OAT, either stored in memory or in a file. +Because it is an internal, platform dependant format, we elected to ignore the #OAT format. +Practically, #JAR and #APK files are zip files containing #DEX files. +This means that we only need to find a way to integrate #DEX files to the application. + +We elected to simply add the dex files to the application, using the multi-dex feature introduced by the SDK 21 now used by all applications. +This gives access to the dynamically loaded code to static analysis tool. + +#todo[add drawing of dex insertion] + +We decided to leave untouched the original code that load the bytecode. +At runtime, although the bytecode is already present in the application, the application will still dynamically load the code. +This ensure that the application keep working as intended even if the transformation we applied are incomplete. +Specifically, to call dynamically loaded code, an application needs to use reflection, and we saw in @sec:th-trans-ref that we need to keep reflecton calls, and in order to keep reflection calls, we need the classloader created when loading bytecode. + +=== Class Collisions + +=== Pitfalls + +#todo[interupting try blocks: catch block might expect temporary registers to still stored the saved value] diff --git a/6_theseus/3_results.typ b/6_theseus/3_results.typ new file mode 100644 index 0000000..c43b6fe --- /dev/null +++ b/6_theseus/3_results.typ @@ -0,0 +1,7 @@ +#import "../lib.typ": todo + +== Results #todo[better section name] + +=== Bytecode Loaded by Application + +#todo[Bytecode collected: facebook, google, appsflyer] diff --git a/6_theseus/4_ttv.typ b/6_theseus/4_ttv.typ new file mode 100644 index 0000000..962a44b --- /dev/null +++ b/6_theseus/4_ttv.typ @@ -0,0 +1,9 @@ +#import "../lib.typ": todo + +== Limits and Threat to Validity + +#todo[redaction] + +- Use multidex: min SDK >= 21 (android 5.0, published in 2014, should be ok) +- No support for OAT (platform dependent) + diff --git a/6_theseus/main.typ b/6_theseus/main.typ new file mode 100644 index 0000000..c2d7bea --- /dev/null +++ b/6_theseus/main.typ @@ -0,0 +1,7 @@ +#import "../lib.typ": todo + += #todo[theseus chapter title] + +#include("1_static_transformation.typ") +#include("3_results.typ") +#include("4_ttv.typ") diff --git a/lib.typ b/lib.typ index 1ca8f80..f3abf9c 100644 --- a/lib.typ +++ b/lib.typ @@ -1,4 +1,6 @@ #import "@local/template-thesis-matisse:0.0.1": * +// Notations: +#import "0_preamble/notations.typ": * // Format number. // Following https://www.mit.edu/course/21/21.guide/numbers.htm diff --git a/main.typ b/main.typ index b3aedcf..15641e0 100644 --- a/main.typ +++ b/main.typ @@ -74,10 +74,7 @@ #include("3_related_work/main.typ") #include("4_rasta/main.typ") #include("5_class_loader/main.typ") - -= Contribution n - -#lorem(500) +#include("6_theseus/main.typ") = Conclusion