androscalpel/androscalpel_serializer/src/debug.rs
Jean-Marie Mineau 65176749bb
fix fix fix!
2025-01-17 11:32:44 +01:00

712 lines
25 KiB
Rust

//! Debug structs
use crate as androscalpel_serializer;
use crate::{
Error, ReadSeek, Result, Serializable, SerializableUntil, Sleb128, Uleb128, Uleb128p1, NO_INDEX,
};
use std::io::Write;
/// <https://source.android.com/docs/core/runtime/dex-format#debug-info-item>
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct DebugInfoItem {
pub line_start: Uleb128,
//pub parameters_size: Uleb128,
pub parameter_names: Vec<Uleb128p1>,
/// List of opcode. Notice that the trailling [`DbgBytecode::EndSequence`]
/// is not stored in this vec.
pub bytecode: Vec<DbgBytecode>,
}
#[derive(Serializable, Debug, PartialEq, Eq, Copy, Clone)]
#[prefix_type(u8)]
pub enum DbgBytecode {
#[prefix(0x00)]
EndSequence,
#[prefix(0x01)]
AdvancePC { addr_diff: Uleb128 },
#[prefix(0x02)]
AdvanceLine { line_diff: Sleb128 },
#[prefix(0x03)]
StartLocal {
register_num: Uleb128,
name_idx: Uleb128p1,
type_idx: Uleb128p1,
},
#[prefix(0x04)]
StartLocalExtended {
register_num: Uleb128,
name_idx: Uleb128p1,
type_idx: Uleb128p1,
sig_idx: Uleb128p1,
},
#[prefix(0x05)]
EndLocal { register_num: Uleb128 },
#[prefix(0x06)]
RestartLocal { register_num: Uleb128 },
#[prefix(0x07)]
SetPrologueEnd,
#[prefix(0x08)]
SetEpilogueBegin,
#[prefix(0x09)]
SetFile { name_idx: Uleb128p1 },
#[default_variant]
SpecialOpcode(u8),
}
impl DebugInfoItem {
pub fn parameters_size_field(&self) -> Uleb128 {
Uleb128(self.parameter_names.len() as u32)
}
}
impl Serializable for DebugInfoItem {
fn serialize(&self, output: &mut dyn Write) -> Result<()> {
self.line_start.serialize(output)?;
self.parameters_size_field().serialize(output)?;
for item in &self.parameter_names {
item.serialize(output)?;
}
self.bytecode.serialize(output, DbgBytecode::EndSequence)?;
Ok(())
}
fn deserialize(input: &mut dyn ReadSeek) -> Result<Self> {
let line_start = Uleb128::deserialize(input)?;
let Uleb128(parameters_size) = Uleb128::deserialize(input)?;
let mut parameter_names = vec![];
for _ in 0..parameters_size {
parameter_names.push(Uleb128p1::deserialize(input)?);
}
let bytecode = Vec::<DbgBytecode>::deserialize(input, DbgBytecode::EndSequence)?;
Ok(Self {
line_start,
parameter_names,
bytecode,
})
}
fn size(&self) -> usize {
self.line_start.size()
+ self.parameters_size_field().size()
+ self
.parameter_names
.iter()
.map(|param| param.size())
.sum::<usize>()
+ self.bytecode.size(DbgBytecode::EndSequence)
}
}
/// The name and type of the variable in a register.
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub struct DebugRegState {
pub type_idx: Option<u32>,
pub name_idx: Option<u32>,
pub sig_idx: Option<u32>,
pub in_scope: bool,
}
/// A simplified debug information
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum DebugInfo {
DefLocal {
addr: u32,
reg: u32,
val: DebugRegState,
},
EndLocal {
addr: u32,
reg: u32,
},
PrologueEnd {
addr: u32,
},
EpilogueBegin {
addr: u32,
},
SetSourceFile {
addr: u32,
source_file_idx: Option<u32>,
},
SetLineNumber {
addr: u32,
line_num: u32,
},
EndOfData,
}
impl DebugInfo {
pub fn get_addr(&self) -> u32 {
match self {
Self::DefLocal { addr, .. } => *addr,
Self::EndLocal { addr, .. } => *addr,
Self::PrologueEnd { addr } => *addr,
Self::EpilogueBegin { addr } => *addr,
Self::SetSourceFile { addr, .. } => *addr,
Self::SetLineNumber { addr, .. } => *addr,
Self::EndOfData => u32::MAX, // TODO should be an Option::None?
}
}
}
/// A state machine that interpret a [`DebugInfoItem`].
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct DebugInfoReader {
debug_info: DebugInfoItem,
pub pc: usize,
pub address: u32,
pub line: u32,
// Those are registers described in the doc but not necessary in the end
//pub source_file_idx: Option<u32>,
//pub prologue_end: bool,
//pub epilogue_begin: bool,
pub register_states: Vec<DebugRegState>,
}
impl DebugInfoReader {
pub fn new(
debug_info: DebugInfoItem,
//source_file_idx: Option<u32>,
//nb_reg: usize
) -> Self {
let line = debug_info.line_start.0;
Self {
debug_info,
pc: 0,
address: 0,
line,
//source_file_idx,
//prologue_end: false,
//epilogue_begin: false,
//register_states: vec![
// DebugRegState {
// name_idx: None,
// type_idx: None,
// sig_idx: None,
// in_scope: false,
// };
// nb_reg
//],
register_states: vec![], // In the end, it's easier to grow this on the fly
}
}
pub fn get_ins(&self) -> Result<DbgBytecode> {
if self.pc >= self.debug_info.bytecode.len() {
return Err(Error::OutOfBound(
"Try to read an instruction out of bound, maybe after the end of the debug sequence."
.into()
));
}
Ok(self.debug_info.bytecode[self.pc])
}
pub fn tick(&mut self) -> Option<DebugInfo> {
let ins = if let Ok(ins) = self.get_ins() {
ins
} else {
return Some(DebugInfo::EndOfData);
};
self.pc += 1;
match ins {
DbgBytecode::EndSequence => {
self.pc = self.debug_info.bytecode.len();
Some(DebugInfo::EndOfData)
}
DbgBytecode::AdvancePC {
addr_diff: Uleb128(addr_diff),
} => {
self.address += addr_diff;
None
}
DbgBytecode::AdvanceLine {
line_diff: Sleb128(line_diff),
} => {
self.line = (self.line as i32 + line_diff) as u32;
None
}
DbgBytecode::StartLocal {
register_num: Uleb128(register_num),
name_idx,
type_idx,
} => {
while self.register_states.len() < (register_num + 1) as usize {
self.register_states.push(DebugRegState {
name_idx: None,
type_idx: None,
sig_idx: None,
in_scope: false,
})
}
self.register_states[register_num as usize] = DebugRegState {
name_idx: if name_idx == NO_INDEX {
None
} else {
Some(name_idx.0)
},
type_idx: if type_idx == NO_INDEX {
None
} else {
Some(type_idx.0)
},
sig_idx: None,
in_scope: true,
};
Some(DebugInfo::DefLocal {
addr: self.address,
reg: register_num,
val: self.register_states[register_num as usize],
})
}
DbgBytecode::StartLocalExtended {
register_num: Uleb128(register_num),
name_idx,
type_idx,
sig_idx,
} => {
while self.register_states.len() < (register_num + 1) as usize {
self.register_states.push(DebugRegState {
name_idx: None,
type_idx: None,
sig_idx: None,
in_scope: false,
})
}
self.register_states[register_num as usize] = DebugRegState {
name_idx: if name_idx == NO_INDEX {
None
} else {
Some(name_idx.0)
},
type_idx: if type_idx == NO_INDEX {
None
} else {
Some(type_idx.0)
},
sig_idx: if sig_idx == NO_INDEX {
None
} else {
Some(sig_idx.0)
},
in_scope: true,
};
Some(DebugInfo::DefLocal {
addr: self.address,
reg: register_num,
val: self.register_states[register_num as usize],
})
}
DbgBytecode::EndLocal {
register_num: Uleb128(register_num),
} => {
// Yes this can happen
while self.register_states.len() < (register_num + 1) as usize {
self.register_states.push(DebugRegState {
name_idx: None,
type_idx: None,
sig_idx: None,
in_scope: false,
})
}
self.register_states[register_num as usize].in_scope = false;
Some(DebugInfo::EndLocal {
addr: self.address,
reg: register_num,
})
}
DbgBytecode::RestartLocal {
register_num: Uleb128(register_num),
} => {
while self.register_states.len() < (register_num + 1) as usize {
self.register_states.push(DebugRegState {
name_idx: None,
type_idx: None,
sig_idx: None,
in_scope: false,
})
}
self.register_states[register_num as usize].in_scope = true;
Some(DebugInfo::DefLocal {
addr: self.address,
reg: register_num,
val: self.register_states[register_num as usize],
})
}
DbgBytecode::SetPrologueEnd => {
//self.prologue_end = true;
Some(DebugInfo::PrologueEnd { addr: self.address })
}
DbgBytecode::SetEpilogueBegin => {
//self.epilogue_begin = true;
Some(DebugInfo::EpilogueBegin { addr: self.address })
}
DbgBytecode::SetFile { name_idx: NO_INDEX } => {
//self.source_file_idx = None;
Some(DebugInfo::SetSourceFile {
addr: self.address,
source_file_idx: None,
})
}
DbgBytecode::SetFile {
name_idx: Uleb128p1(name_idx),
} => {
//self.source_file_idx = Some(name_idx);
Some(DebugInfo::SetSourceFile {
addr: self.address,
source_file_idx: Some(name_idx),
})
}
DbgBytecode::SpecialOpcode(op) => {
//if op >= 0x0a {
// self.prologue_end = false;
// self.epilogue_begin = true;
//}
// See <https://source.android.com/docs/core/runtime/dex-format#opcodes>
let adjusted_opcode = op as u32 - 0x0a;
self.line = (self.line as i32 + (adjusted_opcode as i32 % 15) - 4) as u32;
self.address += adjusted_opcode / 15;
Some(DebugInfo::SetLineNumber {
addr: self.address,
line_num: self.line,
})
}
}
}
pub fn next_info(&mut self) -> DebugInfo {
loop {
if let Some(info) = self.tick() {
return info;
}
}
}
}
/// A state machine that generate a [`DebugInfoItem`].
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct DebugInfoBuilder {
debug_infos: Vec<DbgBytecode>,
line_start: Option<u32>,
parameter_names: Vec<Uleb128p1>,
//pub pc: usize,
address: u32,
line: u32,
// Those are registers described in the doc but not necessary in the end
//pub source_file_idx: Option<u32>,
//pub prologue_end: bool,
//pub epilogue_begin: bool,
register_states: Vec<DebugRegState>,
finished: bool,
}
impl DebugInfoBuilder {
pub fn new(parameter_names: Vec<Uleb128p1>) -> Self {
Self {
debug_infos: vec![],
line_start: None,
parameter_names,
//pc: 0,
address: 0,
line: 0,
//source_file_idx,
//prologue_end: false,
//epilogue_begin: false,
//register_states: vec![
// DebugRegState {
// name_idx: None,
// type_idx: None,
// sig_idx: None,
// in_scope: false,
// };
// nb_reg
//],
register_states: vec![], // In the end, it's easier to grow this on the fly
finished: false,
}
}
pub fn add_info(&mut self, info: &DebugInfo) -> Result<()> {
if self.finished {
return Err(Error::SerializationError(
"Cannot add more information: EndSequence has already been send".into(),
));
}
match info {
DebugInfo::DefLocal { addr, reg, val } => {
if *addr < self.address {
return Err(Error::SerializationError(format!(
"The address register can only increase, \
found 0x{addr:02x} while register is already \
0x{:02x}",
self.address
)));
}
while self.register_states.len() < (reg + 1) as usize {
self.register_states.push(DebugRegState {
name_idx: None,
type_idx: None,
sig_idx: None,
in_scope: false,
});
}
if *addr != self.address {
let addr_diff = *addr - self.address;
self.debug_infos.push(DbgBytecode::AdvancePC {
addr_diff: Uleb128(addr_diff),
});
self.address += addr_diff;
}
let mut old_val = self.register_states[*reg as usize];
let old_val_in_scope = old_val.in_scope;
old_val.in_scope = true;
if old_val_in_scope && old_val == *val {
self.register_states[*reg as usize].in_scope = true;
self.debug_infos.push(DbgBytecode::RestartLocal {
register_num: Uleb128(*reg),
});
} else {
self.register_states[*reg as usize] = *val;
if val.sig_idx.is_some() {
self.debug_infos.push(DbgBytecode::StartLocalExtended {
register_num: Uleb128(*reg),
name_idx: if let Some(name_idx) = val.name_idx {
Uleb128p1(name_idx)
} else {
NO_INDEX
},
type_idx: if let Some(type_idx) = val.type_idx {
Uleb128p1(type_idx)
} else {
NO_INDEX
},
sig_idx: if let Some(sig_idx) = val.sig_idx {
Uleb128p1(sig_idx)
} else {
NO_INDEX
},
})
} else {
self.debug_infos.push(DbgBytecode::StartLocal {
register_num: Uleb128(*reg),
name_idx: if let Some(name_idx) = val.name_idx {
Uleb128p1(name_idx)
} else {
NO_INDEX
},
type_idx: if let Some(type_idx) = val.type_idx {
Uleb128p1(type_idx)
} else {
NO_INDEX
},
})
}
}
Ok(())
}
DebugInfo::EndLocal { addr, reg } => {
if *addr < self.address {
return Err(Error::SerializationError(format!(
"The address register can only increase, \
found 0x{addr:02x} while register is already \
0x{:02x}",
self.address
)));
}
while self.register_states.len() < (reg + 1) as usize {
self.register_states.push(DebugRegState {
name_idx: None,
type_idx: None,
sig_idx: None,
in_scope: false,
});
}
if *addr != self.address {
let addr_diff = *addr - self.address;
self.debug_infos.push(DbgBytecode::AdvancePC {
addr_diff: Uleb128(addr_diff),
});
self.address += addr_diff;
}
self.debug_infos.push(DbgBytecode::EndLocal {
register_num: Uleb128(*reg),
});
Ok(())
}
DebugInfo::PrologueEnd { addr } => {
if *addr < self.address {
return Err(Error::SerializationError(format!(
"The address register can only increase, \
found 0x{addr:02x} while register is already \
0x{:02x}",
self.address
)));
}
if *addr != self.address {
let addr_diff = *addr - self.address;
self.debug_infos.push(DbgBytecode::AdvancePC {
addr_diff: Uleb128(addr_diff),
});
self.address += addr_diff;
}
self.debug_infos.push(DbgBytecode::SetPrologueEnd);
Ok(())
}
DebugInfo::EpilogueBegin { addr } => {
if *addr < self.address {
return Err(Error::SerializationError(format!(
"The address register can only increase, \
found 0x{addr:02x} while register is already \
0x{:02x}",
self.address
)));
}
if *addr != self.address {
let addr_diff = *addr - self.address;
self.debug_infos.push(DbgBytecode::AdvancePC {
addr_diff: Uleb128(addr_diff),
});
self.address += addr_diff;
}
self.debug_infos.push(DbgBytecode::SetEpilogueBegin);
Ok(())
}
DebugInfo::SetLineNumber { addr, line_num } => {
if *addr < self.address {
return Err(Error::SerializationError(format!(
"The address register can only increase, \
found 0x{addr:02x} while register is already \
0x{:02x}",
self.address
)));
}
if self.line_start.is_none() {
self.line_start = Some(*line_num);
self.line = *line_num;
}
let mut line_diff = *line_num as i32 - self.line as i32;
let mut addr_diff = addr - self.address;
if !(-4..15 - 4).contains(&line_diff) {
self.debug_infos.push(DbgBytecode::AdvanceLine {
line_diff: Sleb128(line_diff),
});
self.line = *line_num;
line_diff = 0;
}
if addr_diff as i32 * 15 + 0x0a + line_diff + 4 > 0xff {
self.debug_infos.push(DbgBytecode::AdvancePC {
addr_diff: Uleb128(addr_diff),
});
self.address = *addr;
addr_diff = 0;
}
let op = 0x0a + addr_diff as u8 * 15 + (line_diff + 4) as u8;
self.debug_infos.push(DbgBytecode::SpecialOpcode(op));
self.address += addr_diff;
self.line = (self.line as i32 + line_diff) as u32;
Ok(())
}
DebugInfo::SetSourceFile {
addr,
source_file_idx,
} => {
if *addr != self.address {
let addr_diff = *addr - self.address;
self.debug_infos.push(DbgBytecode::AdvancePC {
addr_diff: Uleb128(addr_diff),
});
self.address += addr_diff;
}
self.debug_infos.push(DbgBytecode::SetFile {
name_idx: if let Some(source_file_idx) = source_file_idx {
Uleb128p1(*source_file_idx)
} else {
NO_INDEX
},
});
Ok(())
}
DebugInfo::EndOfData => {
self.finished = true;
Ok(())
}
}
}
/// If they are no debug information, return None, else compute and return the [`DebugInfoItem`].
pub fn build(self) -> Option<DebugInfoItem> {
if self.debug_infos.is_empty() && self.parameter_names.iter().all(|&idx| idx == NO_INDEX) {
None
} else {
Some(DebugInfoItem {
line_start: Uleb128(self.line_start.unwrap_or(0)),
parameter_names: self.parameter_names,
bytecode: self.debug_infos,
})
}
}
}
#[cfg(test)]
mod test {
use super::DbgBytecode::*;
use super::*;
#[test]
fn test_debug_reserialize() {
let debug = DebugInfoItem {
line_start: Uleb128(2902),
parameter_names: vec![],
bytecode: vec![
SpecialOpcode(14),
AdvanceLine {
line_diff: Sleb128(-1551),
},
AdvancePC {
addr_diff: Uleb128(51),
},
SpecialOpcode(14),
// End a local that do not already exist
EndLocal {
register_num: Uleb128(41),
},
],
};
assert_eq!(
debug,
DebugInfoItem::deserialize_from_slice(&debug.serialize_to_vec().unwrap()).unwrap()
);
}
#[test]
fn test_advance_line_reserialize() {
let advance_line = AdvanceLine {
line_diff: Sleb128(-1551),
};
assert_eq!(
advance_line,
DbgBytecode::deserialize_from_slice(&advance_line.serialize_to_vec().unwrap()).unwrap()
);
}
#[test]
fn test_get_expl_debug() {
const RAW_DEBUG: [u8; 10] = [23, 0, 14, 135, 3, 0, 16, 2, 150, 0];
let debug = DebugInfoItem::deserialize_from_slice(&RAW_DEBUG).unwrap();
let mut reader = DebugInfoReader::new(debug.clone());
let mut list_info = vec![];
loop {
list_info.push(reader.next_info());
if list_info.last() == Some(&DebugInfo::EndOfData) {
break;
}
}
let mut builder = DebugInfoBuilder::new(debug.parameter_names.clone());
for info in list_info {
builder.add_info(&info).unwrap();
}
let debug_computed = builder.build().unwrap();
assert_eq!(
&RAW_DEBUG,
&(debug_computed.serialize_to_vec().unwrap()).as_slice()
);
}
}