androscalpel/androscalpel_serializer/src/debug.rs

577 lines
20 KiB
Rust

//! Debug structs
use crate as androscalpel_serializer;
use crate::{
Error, ReadSeek, Result, Serializable, SerializableUntil, Sleb128, Uleb128, Uleb128p1, NO_INDEX,
};
use std::io::Write;
/// <https://source.android.com/docs/core/runtime/dex-format#debug-info-item>
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct DebugInfoItem {
pub line_start: Uleb128,
//pub parameters_size: Uleb128,
pub parameter_names: Vec<Uleb128p1>,
/// List of opcode. Notice that the trailling [`DbgBytecode::EndSequence`]
/// is not stored in this vec.
pub bytecode: Vec<DbgBytecode>,
}
#[derive(Serializable, Debug, PartialEq, Eq, Copy, Clone)]
#[prefix_type(u8)]
pub enum DbgBytecode {
#[prefix(0x00)]
EndSequence,
#[prefix(0x01)]
AdvancePC { addr_diff: Uleb128 },
#[prefix(0x02)]
AdvanceLine { line_diff: Sleb128 },
#[prefix(0x03)]
StartLocal {
register_num: Uleb128,
name_idx: Uleb128p1,
type_idx: Uleb128p1,
},
#[prefix(0x04)]
StartLocalExtended {
register_num: Uleb128,
name_idx: Uleb128p1,
type_idx: Uleb128p1,
sig_idx: Uleb128p1,
},
#[prefix(0x05)]
EndLocal { register_num: Uleb128 },
#[prefix(0x06)]
RestartLocal { register_num: Uleb128 },
#[prefix(0x07)]
SetPrologueEnd,
#[prefix(0x08)]
SetEpilogueBegin,
#[prefix(0x09)]
SetFile { name_idx: Uleb128p1 },
#[default_variant]
SpecialOpcode(u8),
}
impl DebugInfoItem {
pub fn parameters_size_field(&self) -> Uleb128 {
Uleb128(self.parameter_names.len() as u32)
}
pub fn from_debug_infos(
line_start: Uleb128,
parameter_names: Vec<Uleb128p1>,
debug_infos: Vec<DebugInfo>,
) -> Self {
let mut bytecode = vec![];
let mut address = 0;
let mut line = line_start.0;
let mut register_states = vec![];
for dbg_info in debug_infos {
match dbg_info {
DebugInfo::DefLocal { addr, reg, val } => {
while register_states.len() < (reg + 1) as usize {
register_states.push(DebugRegState {
name_idx: None,
type_idx: None,
sig_idx: None,
in_scope: false,
});
}
if addr != address {
let addr_diff = addr - address;
bytecode.push(DbgBytecode::AdvancePC {
addr_diff: Uleb128(addr_diff),
});
address += addr_diff;
}
let mut old_val = register_states[reg as usize];
let old_val_in_scope = old_val.in_scope;
old_val.in_scope = true;
if old_val_in_scope && old_val == val {
register_states[reg as usize].in_scope = true;
bytecode.push(DbgBytecode::RestartLocal {
register_num: Uleb128(reg),
});
} else {
register_states[reg as usize] = val;
if val.sig_idx.is_some() {
bytecode.push(DbgBytecode::StartLocalExtended {
register_num: Uleb128(reg),
name_idx: if let Some(name_idx) = val.name_idx {
Uleb128p1(name_idx)
} else {
NO_INDEX
},
type_idx: if let Some(type_idx) = val.type_idx {
Uleb128p1(type_idx)
} else {
NO_INDEX
},
sig_idx: if let Some(sig_idx) = val.sig_idx {
Uleb128p1(sig_idx)
} else {
NO_INDEX
},
})
} else {
bytecode.push(DbgBytecode::StartLocal {
register_num: Uleb128(reg),
name_idx: if let Some(name_idx) = val.name_idx {
Uleb128p1(name_idx)
} else {
NO_INDEX
},
type_idx: if let Some(type_idx) = val.type_idx {
Uleb128p1(type_idx)
} else {
NO_INDEX
},
})
}
}
}
DebugInfo::EndLocal { addr, reg } => {
while register_states.len() < (reg + 1) as usize {
register_states.push(DebugRegState {
name_idx: None,
type_idx: None,
sig_idx: None,
in_scope: false,
});
}
if addr != address {
let addr_diff = addr - address;
bytecode.push(DbgBytecode::AdvancePC {
addr_diff: Uleb128(addr_diff),
});
address += addr_diff;
}
bytecode.push(DbgBytecode::EndLocal {
register_num: Uleb128(reg),
});
}
DebugInfo::PrologueEnd { addr } => {
if addr != address {
let addr_diff = addr - address;
bytecode.push(DbgBytecode::AdvancePC {
addr_diff: Uleb128(addr_diff),
});
address += addr_diff;
}
bytecode.push(DbgBytecode::SetPrologueEnd);
}
DebugInfo::EpilogueBegin { addr } => {
if addr != address {
let addr_diff = addr - address;
bytecode.push(DbgBytecode::AdvancePC {
addr_diff: Uleb128(addr_diff),
});
address += addr_diff;
}
bytecode.push(DbgBytecode::SetEpilogueBegin);
}
DebugInfo::SetSourceFile {
addr,
source_file_idx,
} => {
if addr != address {
let addr_diff = addr - address;
bytecode.push(DbgBytecode::AdvancePC {
addr_diff: Uleb128(addr_diff),
});
address += addr_diff;
}
bytecode.push(DbgBytecode::SetFile {
name_idx: if let Some(source_file_idx) = source_file_idx {
Uleb128p1(source_file_idx)
} else {
NO_INDEX
},
});
}
DebugInfo::SetLineNumber { addr, line_num } => {
let mut line_diff = line_num as i32 - line as i32;
let mut addr_diff = addr - address;
if addr_diff > (0xff - 0x0a) / 15 {
bytecode.push(DbgBytecode::AdvancePC {
addr_diff: Uleb128(addr_diff),
});
address = addr;
addr_diff = 0;
}
if !(-4..15 - 4).contains(&line_diff) {
bytecode.push(DbgBytecode::AdvanceLine {
line_diff: Sleb128(line_diff),
});
line = line_num;
line_diff = 0;
}
let op = 0x0a + addr_diff as u8 * 15 + (line_diff + 4) as u8;
bytecode.push(DbgBytecode::SpecialOpcode(op));
}
DebugInfo::EndOfData => {
bytecode.push(DbgBytecode::EndSequence);
break;
}
}
}
if bytecode.is_empty() || bytecode[bytecode.len() - 1] != DbgBytecode::EndSequence {
bytecode.push(DbgBytecode::EndSequence);
}
Self {
line_start,
parameter_names,
bytecode,
}
}
}
impl Serializable for DebugInfoItem {
fn serialize(&self, output: &mut dyn Write) -> Result<()> {
self.line_start.serialize(output)?;
self.parameters_size_field().serialize(output)?;
for item in &self.parameter_names {
item.serialize(output)?;
}
self.bytecode.serialize(output, DbgBytecode::EndSequence)?;
Ok(())
}
fn deserialize(input: &mut dyn ReadSeek) -> Result<Self> {
let line_start = Uleb128::deserialize(input)?;
let Uleb128(parameters_size) = Uleb128::deserialize(input)?;
let mut parameter_names = vec![];
for _ in 0..parameters_size {
parameter_names.push(Uleb128p1::deserialize(input)?);
}
let bytecode = Vec::<DbgBytecode>::deserialize(input, DbgBytecode::EndSequence)?;
Ok(Self {
line_start,
parameter_names,
bytecode,
})
}
fn size(&self) -> usize {
self.line_start.size()
+ self.parameters_size_field().size()
+ self
.parameter_names
.iter()
.map(|param| param.size())
.sum::<usize>()
+ self.bytecode.size(DbgBytecode::EndSequence)
}
}
/// The name and type of the variable in a register.
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub struct DebugRegState {
pub type_idx: Option<u32>,
pub name_idx: Option<u32>,
pub sig_idx: Option<u32>,
pub in_scope: bool,
}
/// A simplified debug information
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum DebugInfo {
DefLocal {
addr: u32,
reg: u32,
val: DebugRegState,
},
EndLocal {
addr: u32,
reg: u32,
},
PrologueEnd {
addr: u32,
},
EpilogueBegin {
addr: u32,
},
SetSourceFile {
addr: u32,
source_file_idx: Option<u32>,
},
SetLineNumber {
addr: u32,
line_num: u32,
},
EndOfData,
}
impl DebugInfo {
pub fn get_addr(&self) -> u32 {
match self {
Self::DefLocal { addr, .. } => *addr,
Self::EndLocal { addr, .. } => *addr,
Self::PrologueEnd { addr } => *addr,
Self::EpilogueBegin { addr } => *addr,
Self::SetSourceFile { addr, .. } => *addr,
Self::SetLineNumber { addr, .. } => *addr,
Self::EndOfData => u32::MAX, // TODO should be an Option::None?
}
}
}
/// A state machine that interpret a [`DebugInfoItem`].
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct DebugStateMachine<'a> {
debug_info: &'a DebugInfoItem,
pub pc: usize,
pub address: u32,
pub line: u32,
// Those are registers described in the doc but not necessary in the end
//pub source_file_idx: Option<u32>,
//pub prologue_end: bool,
//pub epilogue_begin: bool,
pub register_states: Vec<DebugRegState>,
}
impl<'a> DebugStateMachine<'a> {
pub fn new(
debug_info: &'a DebugInfoItem,
//source_file_idx: Option<u32>,
//nb_reg: usize
) -> Self {
Self {
debug_info,
pc: 0,
address: 0,
line: debug_info.line_start.0,
//source_file_idx,
//prologue_end: false,
//epilogue_begin: false,
//register_states: vec![
// DebugRegState {
// name_idx: None,
// type_idx: None,
// sig_idx: None,
// in_scope: false,
// };
// nb_reg
//],
register_states: vec![], // In the end, it's easier to grow this on the fly
}
}
pub fn get_ins(&self) -> Result<DbgBytecode> {
if self.pc >= self.debug_info.bytecode.len() {
return Err(Error::OutOfBound(
"Try to read an instruction out of bound, maybe after the end of the debug sequence."
.into()
));
}
Ok(self.debug_info.bytecode[self.pc])
}
pub fn tick(&mut self) -> Option<DebugInfo> {
let ins = if let Ok(ins) = self.get_ins() {
ins
} else {
return Some(DebugInfo::EndOfData);
};
self.pc += 1;
match ins {
DbgBytecode::EndSequence => {
self.pc = self.debug_info.bytecode.len();
Some(DebugInfo::EndOfData)
}
DbgBytecode::AdvancePC {
addr_diff: Uleb128(addr_diff),
} => {
self.address += addr_diff;
None
}
DbgBytecode::AdvanceLine {
line_diff: Sleb128(line_diff),
} => {
self.line = (self.line as i32 + line_diff) as u32;
None
}
DbgBytecode::StartLocal {
register_num: Uleb128(register_num),
name_idx,
type_idx,
} => {
while self.register_states.len() < (register_num + 1) as usize {
self.register_states.push(DebugRegState {
name_idx: None,
type_idx: None,
sig_idx: None,
in_scope: false,
})
}
self.register_states[register_num as usize] = DebugRegState {
name_idx: if name_idx == NO_INDEX {
None
} else {
Some(name_idx.0)
},
type_idx: if type_idx == NO_INDEX {
None
} else {
Some(type_idx.0)
},
sig_idx: None,
in_scope: true,
};
Some(DebugInfo::DefLocal {
addr: self.address,
reg: register_num,
val: self.register_states[register_num as usize],
})
}
DbgBytecode::StartLocalExtended {
register_num: Uleb128(register_num),
name_idx,
type_idx,
sig_idx,
} => {
while self.register_states.len() < (register_num + 1) as usize {
self.register_states.push(DebugRegState {
name_idx: None,
type_idx: None,
sig_idx: None,
in_scope: false,
})
}
self.register_states[register_num as usize] = DebugRegState {
name_idx: if name_idx == NO_INDEX {
None
} else {
Some(name_idx.0)
},
type_idx: if type_idx == NO_INDEX {
None
} else {
Some(type_idx.0)
},
sig_idx: if sig_idx == NO_INDEX {
None
} else {
Some(sig_idx.0)
},
in_scope: true,
};
Some(DebugInfo::DefLocal {
addr: self.address,
reg: register_num,
val: self.register_states[register_num as usize],
})
}
DbgBytecode::EndLocal {
register_num: Uleb128(register_num),
} => {
self.register_states[register_num as usize].in_scope = false;
Some(DebugInfo::EndLocal {
addr: self.address,
reg: register_num,
})
}
DbgBytecode::RestartLocal {
register_num: Uleb128(register_num),
} => {
while self.register_states.len() < (register_num + 1) as usize {
self.register_states.push(DebugRegState {
name_idx: None,
type_idx: None,
sig_idx: None,
in_scope: false,
})
}
self.register_states[register_num as usize].in_scope = true;
Some(DebugInfo::DefLocal {
addr: self.address,
reg: register_num,
val: self.register_states[register_num as usize],
})
}
DbgBytecode::SetPrologueEnd => {
//self.prologue_end = true;
Some(DebugInfo::PrologueEnd { addr: self.address })
}
DbgBytecode::SetEpilogueBegin => {
//self.epilogue_begin = true;
Some(DebugInfo::EpilogueBegin { addr: self.address })
}
DbgBytecode::SetFile { name_idx: NO_INDEX } => {
//self.source_file_idx = None;
Some(DebugInfo::SetSourceFile {
addr: self.address,
source_file_idx: None,
})
}
DbgBytecode::SetFile {
name_idx: Uleb128p1(name_idx),
} => {
//self.source_file_idx = Some(name_idx);
Some(DebugInfo::SetSourceFile {
addr: self.address,
source_file_idx: Some(name_idx),
})
}
DbgBytecode::SpecialOpcode(op) => {
//if op >= 0x0a {
// self.prologue_end = false;
// self.epilogue_begin = true;
//}
// See <https://source.android.com/docs/core/runtime/dex-format#opcodes>
let adjusted_opcode = op as u32 - 0x0a;
self.line = (self.line as i32 + (adjusted_opcode as i32 % 15) - 4) as u32;
self.address += adjusted_opcode / 15;
Some(DebugInfo::SetLineNumber {
addr: self.address,
line_num: self.line,
})
}
}
}
pub fn next_info(&mut self) -> DebugInfo {
loop {
if let Some(info) = self.tick() {
return info;
}
}
}
}
#[cfg(test)]
mod test {
use super::DbgBytecode::*;
use super::*;
#[test]
fn test_debug_reserialize() {
let debug = DebugInfoItem {
line_start: Uleb128(2902),
parameter_names: vec![],
bytecode: vec![
SpecialOpcode(14),
AdvanceLine {
line_diff: Sleb128(-1551),
},
AdvancePC {
addr_diff: Uleb128(51),
},
SpecialOpcode(14),
],
};
assert_eq!(
debug,
DebugInfoItem::deserialize_from_slice(&debug.serialize_to_vec().unwrap()).unwrap()
);
}
#[test]
fn test_advance_line_reserialize() {
let advance_line = AdvanceLine {
line_diff: Sleb128(-1551),
};
assert_eq!(
advance_line,
DbgBytecode::deserialize_from_slice(&advance_line.serialize_to_vec().unwrap()).unwrap()
);
}
}