From 55b4ef015bad0b6746ee12b0643d563c74a3c1c6 Mon Sep 17 00:00:00 2001 From: Jean-Marie Mineau Date: Mon, 8 Jul 2024 15:39:36 +0200 Subject: [PATCH] parse dwarf --- androscalpel_serializer/src/core/mod.rs | 1 + androscalpel_serializer/src/debug.rs | 420 +++++++++++++++++++++++- 2 files changed, 420 insertions(+), 1 deletion(-) diff --git a/androscalpel_serializer/src/core/mod.rs b/androscalpel_serializer/src/core/mod.rs index 3250e15..7e1f74a 100644 --- a/androscalpel_serializer/src/core/mod.rs +++ b/androscalpel_serializer/src/core/mod.rs @@ -17,6 +17,7 @@ pub enum Error { DeserializationError(String), InvalidStringEncoding(String), InconsistantStruct(String), + OutOfBound(String), } pub type Result = core::result::Result; diff --git a/androscalpel_serializer/src/debug.rs b/androscalpel_serializer/src/debug.rs index 488cb02..38065fb 100644 --- a/androscalpel_serializer/src/debug.rs +++ b/androscalpel_serializer/src/debug.rs @@ -1,7 +1,9 @@ //! Debug structs use crate as androscalpel_serializer; -use crate::{ReadSeek, Result, Serializable, SerializableUntil, Sleb128, Uleb128, Uleb128p1}; +use crate::{ + Error, ReadSeek, Result, Serializable, SerializableUntil, Sleb128, Uleb128, Uleb128p1, NO_INDEX, +}; use std::io::Write; /// @@ -55,6 +57,173 @@ impl DebugInfoItem { pub fn parameters_size_field(&self) -> Uleb128 { Uleb128(self.parameter_names.len() as u32) } + pub fn from_debug_infos( + line_start: Uleb128, + parameter_names: Vec, + debug_infos: Vec, + ) -> Self { + let mut bytecode = vec![]; + let mut address = 0; + let mut line = line_start.0; + let mut register_states = vec![]; + for dbg_info in debug_infos { + match dbg_info { + DebugInfo::DefLocal { addr, reg, val } => { + while register_states.len() < (reg + 1) as usize { + register_states.push(DebugRegState { + name_idx: None, + type_idx: None, + sig_idx: None, + in_scope: false, + }); + } + if addr != address { + let addr_diff = addr - address; + bytecode.push(DbgBytecode::AdvancePC { + addr_diff: Uleb128(addr_diff), + }); + address += addr_diff; + } + let mut old_val = register_states[reg as usize]; + let old_val_in_scope = old_val.in_scope; + old_val.in_scope = true; + if old_val_in_scope && old_val == val { + register_states[reg as usize].in_scope = true; + bytecode.push(DbgBytecode::RestartLocal { + register_num: Uleb128(reg), + }); + } else { + register_states[reg as usize] = val; + if val.sig_idx.is_some() { + bytecode.push(DbgBytecode::StartLocalExtended { + register_num: Uleb128(reg), + name_idx: if let Some(name_idx) = val.name_idx { + Uleb128p1(name_idx) + } else { + NO_INDEX + }, + type_idx: if let Some(type_idx) = val.type_idx { + Uleb128p1(type_idx) + } else { + NO_INDEX + }, + sig_idx: if let Some(sig_idx) = val.sig_idx { + Uleb128p1(sig_idx) + } else { + NO_INDEX + }, + }) + } else { + bytecode.push(DbgBytecode::StartLocal { + register_num: Uleb128(reg), + name_idx: if let Some(name_idx) = val.name_idx { + Uleb128p1(name_idx) + } else { + NO_INDEX + }, + type_idx: if let Some(type_idx) = val.type_idx { + Uleb128p1(type_idx) + } else { + NO_INDEX + }, + }) + } + } + } + DebugInfo::EndLocal { addr, reg } => { + while register_states.len() < (reg + 1) as usize { + register_states.push(DebugRegState { + name_idx: None, + type_idx: None, + sig_idx: None, + in_scope: false, + }); + } + if addr != address { + let addr_diff = addr - address; + bytecode.push(DbgBytecode::AdvancePC { + addr_diff: Uleb128(addr_diff), + }); + address += addr_diff; + } + bytecode.push(DbgBytecode::EndLocal { + register_num: Uleb128(reg), + }); + } + DebugInfo::PrologueEnd { addr } => { + if addr != address { + let addr_diff = addr - address; + bytecode.push(DbgBytecode::AdvancePC { + addr_diff: Uleb128(addr_diff), + }); + address += addr_diff; + } + bytecode.push(DbgBytecode::SetPrologueEnd); + } + DebugInfo::EpilogueBegin { addr } => { + if addr != address { + let addr_diff = addr - address; + bytecode.push(DbgBytecode::AdvancePC { + addr_diff: Uleb128(addr_diff), + }); + address += addr_diff; + } + bytecode.push(DbgBytecode::SetEpilogueBegin); + } + DebugInfo::SetSourceFile { + addr, + source_file_idx, + } => { + if addr != address { + let addr_diff = addr - address; + bytecode.push(DbgBytecode::AdvancePC { + addr_diff: Uleb128(addr_diff), + }); + address += addr_diff; + } + bytecode.push(DbgBytecode::SetFile { + name_idx: if let Some(source_file_idx) = source_file_idx { + Uleb128p1(source_file_idx) + } else { + NO_INDEX + }, + }); + } + DebugInfo::SetLineNumber { addr, line_num } => { + let mut line_diff = line_num as i32 - line as i32; + let mut addr_diff = addr - address; + if addr_diff > (0xff - 0x0a) / 15 { + bytecode.push(DbgBytecode::AdvancePC { + addr_diff: Uleb128(addr_diff), + }); + address = addr; + addr_diff = 0; + } + if line_diff < -4 || line_diff >= 15 - 4 { + bytecode.push(DbgBytecode::AdvanceLine { + line_diff: Sleb128(line_diff), + }); + line = line_num; + line_diff = 0; + } + let op = 0x0a + addr_diff as u8 * 15 + (line_diff + 4) as u8; + bytecode.push(DbgBytecode::SpecialOpcode(op)); + } + DebugInfo::EndOfData => { + bytecode.push(DbgBytecode::EndSequence); + break; + } + } + } + if bytecode.len() == 0 || bytecode[bytecode.len() - 1] != DbgBytecode::EndSequence { + bytecode.push(DbgBytecode::EndSequence); + } + Self { + line_start, + parameter_names, + bytecode, + } + } } impl Serializable for DebugInfoItem { @@ -93,6 +262,255 @@ impl Serializable for DebugInfoItem { } } +/// The name and type of the variable in a register. +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub struct DebugRegState { + pub type_idx: Option, + pub name_idx: Option, + pub sig_idx: Option, + pub in_scope: bool, +} + +/// A simplified debug information +#[derive(Debug, PartialEq, Eq, Clone)] +pub enum DebugInfo { + DefLocal { + addr: u32, + reg: u32, + val: DebugRegState, + }, + EndLocal { + addr: u32, + reg: u32, + }, + PrologueEnd { + addr: u32, + }, + EpilogueBegin { + addr: u32, + }, + SetSourceFile { + addr: u32, + source_file_idx: Option, + }, + SetLineNumber { + addr: u32, + line_num: u32, + }, + EndOfData, +} + +/// A state machine that interpret a [`DebugInfoItem`]. +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct DebugStateMachine<'a> { + debug_info: &'a DebugInfoItem, + pub pc: usize, + pub address: u32, + pub line: u32, + // Those are registers described in the doc but not necessary in the end + //pub source_file_idx: Option, + //pub prologue_end: bool, + //pub epilogue_begin: bool, + pub register_states: Vec, +} + +impl<'a> DebugStateMachine<'a> { + pub fn new( + debug_info: &'a DebugInfoItem, + //source_file_idx: Option, + //nb_reg: usize + ) -> Self { + Self { + debug_info, + pc: 0, + address: 0, + line: debug_info.line_start.0, + //source_file_idx, + //prologue_end: false, + //epilogue_begin: false, + //register_states: vec![ + // DebugRegState { + // name_idx: None, + // type_idx: None, + // sig_idx: None, + // in_scope: false, + // }; + // nb_reg + //], + register_states: vec![], // In the end, it's easier to grow this on the fly + } + } + + pub fn get_ins(&self) -> Result { + if self.pc >= self.debug_info.bytecode.len() { + return Err(Error::OutOfBound( + "Try to read an instruction out of bound, maybe after the enf of the debug sequence." + .into() + )); + } + Ok(self.debug_info.bytecode[self.pc]) + } + + pub fn tick(&mut self) -> Result> { + let ins = self.get_ins()?; + self.pc += 1; + match ins { + DbgBytecode::EndSequence => { + self.pc = self.debug_info.bytecode.len(); + Ok(Some(DebugInfo::EndOfData)) + } + DbgBytecode::AdvancePC { + addr_diff: Uleb128(addr_diff), + } => { + self.address += addr_diff; + Ok(None) + } + DbgBytecode::AdvanceLine { + line_diff: Sleb128(line_diff), + } => { + self.line = (self.line as i32 + line_diff) as u32; + Ok(None) + } + DbgBytecode::StartLocal { + register_num: Uleb128(register_num), + name_idx, + type_idx, + } => { + while self.register_states.len() < (register_num + 1) as usize { + self.register_states.push(DebugRegState { + name_idx: None, + type_idx: None, + sig_idx: None, + in_scope: false, + }) + } + self.register_states[register_num as usize] = DebugRegState { + name_idx: if name_idx == NO_INDEX { + None + } else { + Some(name_idx.0) + }, + type_idx: if type_idx == NO_INDEX { + None + } else { + Some(type_idx.0) + }, + sig_idx: None, + in_scope: true, + }; + Ok(Some(DebugInfo::DefLocal { + addr: self.address, + reg: register_num, + val: self.register_states[register_num as usize], + })) + } + DbgBytecode::StartLocalExtended { + register_num: Uleb128(register_num), + name_idx, + type_idx, + sig_idx, + } => { + while self.register_states.len() < (register_num + 1) as usize { + self.register_states.push(DebugRegState { + name_idx: None, + type_idx: None, + sig_idx: None, + in_scope: false, + }) + } + self.register_states[register_num as usize] = DebugRegState { + name_idx: if name_idx == NO_INDEX { + None + } else { + Some(name_idx.0) + }, + type_idx: if type_idx == NO_INDEX { + None + } else { + Some(type_idx.0) + }, + sig_idx: if sig_idx == NO_INDEX { + None + } else { + Some(sig_idx.0) + }, + in_scope: true, + }; + Ok(Some(DebugInfo::DefLocal { + addr: self.address, + reg: register_num, + val: self.register_states[register_num as usize], + })) + } + DbgBytecode::EndLocal { + register_num: Uleb128(register_num), + } => { + self.register_states[register_num as usize].in_scope = false; + Ok(Some(DebugInfo::EndLocal { + addr: self.address, + reg: register_num, + })) + } + DbgBytecode::RestartLocal { + register_num: Uleb128(register_num), + } => { + while self.register_states.len() < (register_num + 1) as usize { + self.register_states.push(DebugRegState { + name_idx: None, + type_idx: None, + sig_idx: None, + in_scope: false, + }) + } + self.register_states[register_num as usize].in_scope = true; + Ok(Some(DebugInfo::DefLocal { + addr: self.address, + reg: register_num, + val: self.register_states[register_num as usize], + })) + } + DbgBytecode::SetPrologueEnd => { + //self.prologue_end = true; + Ok(Some(DebugInfo::PrologueEnd { addr: self.address })) + } + DbgBytecode::SetEpilogueBegin => { + //self.epilogue_begin = true; + Ok(Some(DebugInfo::EpilogueBegin { addr: self.address })) + } + DbgBytecode::SetFile { name_idx: NO_INDEX } => { + //self.source_file_idx = None; + Ok(Some(DebugInfo::SetSourceFile { + addr: self.address, + source_file_idx: None, + })) + } + DbgBytecode::SetFile { + name_idx: Uleb128p1(name_idx), + } => { + //self.source_file_idx = Some(name_idx); + Ok(Some(DebugInfo::SetSourceFile { + addr: self.address, + source_file_idx: Some(name_idx), + })) + } + DbgBytecode::SpecialOpcode(op) => { + //if op >= 0x0a { + // self.prologue_end = false; + // self.epilogue_begin = true; + //} + // See + let adjusted_opcode = op as u32 - 0x0a; + self.line = (self.line as i32 + (adjusted_opcode as i32 % 15) - 4) as u32; + self.address += adjusted_opcode / 15; + Ok(Some(DebugInfo::SetLineNumber { + addr: self.address, + line_num: self.line, + })) + } + } + } +} + #[cfg(test)] mod test { use super::DbgBytecode::*;