diff --git a/androscalpel/src/instructions.rs b/androscalpel/src/instructions.rs index 8534fc0..7ba537f 100644 --- a/androscalpel/src/instructions.rs +++ b/androscalpel/src/instructions.rs @@ -12,7 +12,8 @@ use pyo3::exceptions::PyTypeError; use pyo3::prelude::*; use std::collections::HashMap; - +const I8_MIN_AS_I16: i16 = i8::MIN as i16; +const I8_MAX_AS_I16: i16 = i8::MAX as i16; const I8_MIN_AS_I32: i32 = i8::MIN as i32; const I8_MAX_AS_I32: i32 = i8::MAX as i32; const I16_MIN_AS_I32: i32 = i16::MIN as i32; @@ -4688,7 +4689,7 @@ impl IfLtZ { } } -impl IfLtz { +impl IfLtZ { /// Return the raw instruction ([`InsFormat`]). /// /// `branch_offset` is the offset to the address to jump to if @@ -16255,7 +16256,7 @@ impl AddIntLit { if self.b & 0b1111_0000 != 0 { reg_on_4_bit = false; } - if self.lit < -128 || self.lit > 127 { + if self.lit < I8_MIN_AS_I16 || self.lit > I8_MAX_AS_I16 { lit_on_8_bits = false; } if !reg_on_4_bit && !lit_on_8_bits { @@ -16279,7 +16280,7 @@ impl AddIntLit { pub fn __repr__(&self) -> String { format!( - "Instruction(AddInt2Addr({}, {}, {}))", + "Instruction(AddIntLit({}, {}, {}))", self.dest, self.b, self.lit ) } @@ -16317,6 +16318,47 @@ impl AddIntLit { } } +impl AddIntLit { + /// Return the raw instruction ([`InsFormat`]). + pub fn get_raw_ins(&self) -> InsFormat { + let mut reg_on_4_bit = true; + let lit_on_8_bits = true; + if self.dest & 0b1111_0000 != 0 { + reg_on_4_bit = false; + } + if self.b & 0b1111_0000 != 0 { + reg_on_4_bit = false; + } + if self.lit < I8_MIN_AS_I16 || self.lit > I8_MAX_AS_I16 { + lit_on_8_bits = false; + } + if !reg_on_4_bit && !lit_on_8_bits { + // Should not happen on a sanitized op + panic!( + "add-int/lit uses either registers indexed on 4 bits, and a literal \ + encoded on 16 bits (add-int/lit16), or registers indexed on 8 bits and \ + a literal encoded on 8 bits (add-int/lit8). Found reg {} and {}, and lit \ + {}", + self.dest, self.b, self.lit + ) + } else if reg_on_4_bit { + InsFormat::Format22S { + op: 0xd0, + va: self.dest, + vb: self.b, + c: self.lit, + } + } else { + InsFormat::Format22B { + op: 0xd8, + va: self.dest, + vb: self.b, + c: self.lit as i8, + } + } + } +} + /// Put lit - b in dest. /// /// Either `dest` and `b` are registers indexed on 4 bits and lit is encoded in 16 bits @@ -16347,7 +16389,7 @@ impl RsubIntLit { if self.b & 0b1111_0000 != 0 { reg_on_4_bit = false; } - if self.lit < -128 || self.lit > 127 { + if self.lit < I8_MIN_AS_I16 || self.lit > I8_MAX_AS_I16 { lit_on_8_bits = false; } if !reg_on_4_bit && !lit_on_8_bits { @@ -16371,7 +16413,7 @@ impl RsubIntLit { pub fn __repr__(&self) -> String { format!( - "Instruction(RsubInt2Addr({}, {}, {}))", + "Instruction(RsubIntLit({}, {}, {}))", self.dest, self.b, self.lit ) } @@ -16409,6 +16451,47 @@ impl RsubIntLit { } } +impl RsubIntLit { + /// Return the raw instruction ([`InsFormat`]). + pub fn get_raw_ins(&self) -> InsFormat { + let mut reg_on_4_bit = true; + let lit_on_8_bits = true; + if self.dest & 0b1111_0000 != 0 { + reg_on_4_bit = false; + } + if self.b & 0b1111_0000 != 0 { + reg_on_4_bit = false; + } + if self.lit < I8_MIN_AS_I16 || self.lit > I8_MAX_AS_I16 { + lit_on_8_bits = false; + } + if !reg_on_4_bit && !lit_on_8_bits { + // Should not happen on a sanitized op + panic!( + "sub-int/lit uses either registers indexed on 4 bits, and a literal \ + encoded on 16 bits (sub-int/lit16), or registers indexed on 8 bits and \ + a literal encoded on 8 bits (sub-int/lit8). Found reg {} and {}, and lit \ + {}", + self.dest, self.b, self.lit + ) + } else if reg_on_4_bit { + InsFormat::Format22S { + op: 0xd1, + va: self.dest, + vb: self.b, + c: self.lit, + } + } else { + InsFormat::Format22B { + op: 0xd9, + va: self.dest, + vb: self.b, + c: self.lit as i8, + } + } + } +} + /// Put b * lit in dest. /// /// Either `dest` and `b` are registers indexed on 4 bits and lit is encoded in 16 bits @@ -16439,7 +16522,7 @@ impl MulIntLit { if self.b & 0b1111_0000 != 0 { reg_on_4_bit = false; } - if self.lit < -128 || self.lit > 127 { + if self.lit < I8_MIN_AS_I16 || self.lit > I8_MAX_AS_I16 { lit_on_8_bits = false; } if !reg_on_4_bit && !lit_on_8_bits { @@ -16463,7 +16546,7 @@ impl MulIntLit { pub fn __repr__(&self) -> String { format!( - "Instruction(MulInt2Addr({}, {}, {}))", + "Instruction(MulIntLit({}, {}, {}))", self.dest, self.b, self.lit ) } @@ -16501,6 +16584,47 @@ impl MulIntLit { } } +impl MulIntLit { + /// Return the raw instruction ([`InsFormat`]). + pub fn get_raw_ins(&self) -> InsFormat { + let mut reg_on_4_bit = true; + let lit_on_8_bits = true; + if self.dest & 0b1111_0000 != 0 { + reg_on_4_bit = false; + } + if self.b & 0b1111_0000 != 0 { + reg_on_4_bit = false; + } + if self.lit < I8_MIN_AS_I16 || self.lit > I8_MAX_AS_I16 { + lit_on_8_bits = false; + } + if !reg_on_4_bit && !lit_on_8_bits { + // Should not happen on a sanitized op + panic!( + "mul-int/lit uses either registers indexed on 4 bits, and a literal \ + encoded on 16 bits (mul-int/lit16), or registers indexed on 8 bits and \ + a literal encoded on 8 bits (mul-int/lit8). Found reg {} and {}, and lit \ + {}", + self.dest, self.b, self.lit + ) + } else if reg_on_4_bit { + InsFormat::Format22S { + op: 0xd2, + va: self.dest, + vb: self.b, + c: self.lit, + } + } else { + InsFormat::Format22B { + op: 0xda, + va: self.dest, + vb: self.b, + c: self.lit as i8, + } + } + } +} + /// Put b / lit in dest. /// /// Either `dest` and `b` are registers indexed on 4 bits and lit is encoded in 16 bits @@ -16531,7 +16655,7 @@ impl DivIntLit { if self.b & 0b1111_0000 != 0 { reg_on_4_bit = false; } - if self.lit < -128 || self.lit > 127 { + if self.lit < I8_MIN_AS_I16 || self.lit > I8_MAX_AS_I16 { lit_on_8_bits = false; } if !reg_on_4_bit && !lit_on_8_bits { @@ -16555,7 +16679,7 @@ impl DivIntLit { pub fn __repr__(&self) -> String { format!( - "Instruction(DivInt2Addr({}, {}, {}))", + "Instruction(DivIntLit({}, {}, {}))", self.dest, self.b, self.lit ) } @@ -16593,6 +16717,47 @@ impl DivIntLit { } } +impl DivIntLit { + /// Return the raw instruction ([`InsFormat`]). + pub fn get_raw_ins(&self) -> InsFormat { + let mut reg_on_4_bit = true; + let lit_on_8_bits = true; + if self.dest & 0b1111_0000 != 0 { + reg_on_4_bit = false; + } + if self.b & 0b1111_0000 != 0 { + reg_on_4_bit = false; + } + if self.lit < I8_MIN_AS_I16 || self.lit > I8_MAX_AS_I16 { + lit_on_8_bits = false; + } + if !reg_on_4_bit && !lit_on_8_bits { + // Should not happen on a sanitized op + panic!( + "div-int/lit uses either registers indexed on 4 bits, and a literal \ + encoded on 16 bits (div-int/lit16), or registers indexed on 8 bits and \ + a literal encoded on 8 bits (div-int/lit8). Found reg {} and {}, and lit \ + {}", + self.dest, self.b, self.lit + ) + } else if reg_on_4_bit { + InsFormat::Format22S { + op: 0xd3, + va: self.dest, + vb: self.b, + c: self.lit, + } + } else { + InsFormat::Format22B { + op: 0xdb, + va: self.dest, + vb: self.b, + c: self.lit as i8, + } + } + } +} + /// Put b % lit in dest. /// /// Either `dest` and `b` are registers indexed on 4 bits and lit is encoded in 16 bits @@ -16623,7 +16788,7 @@ impl RemIntLit { if self.b & 0b1111_0000 != 0 { reg_on_4_bit = false; } - if self.lit < -128 || self.lit > 127 { + if self.lit < I8_MIN_AS_I16 || self.lit > I8_MAX_AS_I16 { lit_on_8_bits = false; } if !reg_on_4_bit && !lit_on_8_bits { @@ -16647,7 +16812,7 @@ impl RemIntLit { pub fn __repr__(&self) -> String { format!( - "Instruction(RemInt2Addr({}, {}, {}))", + "Instruction(RemIntLit({}, {}, {}))", self.dest, self.b, self.lit ) } @@ -16685,6 +16850,47 @@ impl RemIntLit { } } +impl RemIntLit { + /// Return the raw instruction ([`InsFormat`]). + pub fn get_raw_ins(&self) -> InsFormat { + let mut reg_on_4_bit = true; + let lit_on_8_bits = true; + if self.dest & 0b1111_0000 != 0 { + reg_on_4_bit = false; + } + if self.b & 0b1111_0000 != 0 { + reg_on_4_bit = false; + } + if self.lit < I8_MIN_AS_I16 || self.lit > I8_MAX_AS_I16 { + lit_on_8_bits = false; + } + if !reg_on_4_bit && !lit_on_8_bits { + // Should not happen on a sanitized op + panic!( + "rem-int/lit uses either registers indexed on 4 bits, and a literal \ + encoded on 16 bits (rem-int/lit16), or registers indexed on 8 bits and \ + a literal encoded on 8 bits (rem-int/lit8). Found reg {} and {}, and lit \ + {}", + self.dest, self.b, self.lit + ) + } else if reg_on_4_bit { + InsFormat::Format22S { + op: 0xd5, + va: self.dest, + vb: self.b, + c: self.lit, + } + } else { + InsFormat::Format22B { + op: 0xdc, + va: self.dest, + vb: self.b, + c: self.lit as i8, + } + } + } +} + /// Put b & lit in dest. /// /// Either `dest` and `b` are registers indexed on 4 bits and lit is encoded in 16 bits @@ -16715,7 +16921,7 @@ impl AndIntLit { if self.b & 0b1111_0000 != 0 { reg_on_4_bit = false; } - if self.lit < -128 || self.lit > 127 { + if self.lit < I8_MIN_AS_I16 || self.lit > I8_MAX_AS_I16 { lit_on_8_bits = false; } if !reg_on_4_bit && !lit_on_8_bits { @@ -16739,7 +16945,7 @@ impl AndIntLit { pub fn __repr__(&self) -> String { format!( - "Instruction(AndInt2Addr({}, {}, {}))", + "Instruction(AndIntLit({}, {}, {}))", self.dest, self.b, self.lit ) } @@ -16777,6 +16983,47 @@ impl AndIntLit { } } +impl AndIntLit { + /// Return the raw instruction ([`InsFormat`]). + pub fn get_raw_ins(&self) -> InsFormat { + let mut reg_on_4_bit = true; + let lit_on_8_bits = true; + if self.dest & 0b1111_0000 != 0 { + reg_on_4_bit = false; + } + if self.b & 0b1111_0000 != 0 { + reg_on_4_bit = false; + } + if self.lit < I8_MIN_AS_I16 || self.lit > I8_MAX_AS_I16 { + lit_on_8_bits = false; + } + if !reg_on_4_bit && !lit_on_8_bits { + // Should not happen on a sanitized op + panic!( + "and-int/lit uses either registers indexed on 4 bits, and a literal \ + encoded on 16 bits (and-int/lit16), or registers indexed on 8 bits and \ + a literal encoded on 8 bits (and-int/lit8). Found reg {} and {}, and lit \ + {}", + self.dest, self.b, self.lit + ) + } else if reg_on_4_bit { + InsFormat::Format22S { + op: 0xd5, + va: self.dest, + vb: self.b, + c: self.lit, + } + } else { + InsFormat::Format22B { + op: 0xdd, + va: self.dest, + vb: self.b, + c: self.lit as i8, + } + } + } +} + /// Put b | lit in dest. /// /// Either `dest` and `b` are registers indexed on 4 bits and lit is encoded in 16 bits @@ -16807,7 +17054,7 @@ impl OrIntLit { if self.b & 0b1111_0000 != 0 { reg_on_4_bit = false; } - if self.lit < -128 || self.lit > 127 { + if self.lit < I8_MIN_AS_I16 || self.lit > I8_MAX_AS_I16 { lit_on_8_bits = false; } if !reg_on_4_bit && !lit_on_8_bits { @@ -16831,7 +17078,7 @@ impl OrIntLit { pub fn __repr__(&self) -> String { format!( - "Instruction(OrInt2Addr({}, {}, {}))", + "Instruction(OrIntLit({}, {}, {}))", self.dest, self.b, self.lit ) } @@ -16869,6 +17116,47 @@ impl OrIntLit { } } +impl OrIntLit { + /// Return the raw instruction ([`InsFormat`]). + pub fn get_raw_ins(&self) -> InsFormat { + let mut reg_on_4_bit = true; + let lit_on_8_bits = true; + if self.dest & 0b1111_0000 != 0 { + reg_on_4_bit = false; + } + if self.b & 0b1111_0000 != 0 { + reg_on_4_bit = false; + } + if self.lit < I8_MIN_AS_I16 || self.lit > I8_MAX_AS_I16 { + lit_on_8_bits = false; + } + if !reg_on_4_bit && !lit_on_8_bits { + // Should not happen on a sanitized op + panic!( + "or-int/lit uses either registers indexed on 4 bits, and a literal \ + encoded on 16 bits (or-int/lit16), or registers indexed on 8 bits and \ + a literal encoded on 8 bits (or-int/lit8). Found reg {} and {}, and lit \ + {}", + self.dest, self.b, self.lit + ) + } else if reg_on_4_bit { + InsFormat::Format22S { + op: 0xd6, + va: self.dest, + vb: self.b, + c: self.lit, + } + } else { + InsFormat::Format22B { + op: 0xde, + va: self.dest, + vb: self.b, + c: self.lit as i8, + } + } + } +} + /// Put b ^ lit in dest. /// /// Either `dest` and `b` are registers indexed on 4 bits and lit is encoded in 16 bits @@ -16899,7 +17187,7 @@ impl XorIntLit { if self.b & 0b1111_0000 != 0 { reg_on_4_bit = false; } - if self.lit < -128 || self.lit > 127 { + if self.lit < I8_MIN_AS_I16 || self.lit > I8_MAX_AS_I16 { lit_on_8_bits = false; } if !reg_on_4_bit && !lit_on_8_bits { @@ -16923,7 +17211,7 @@ impl XorIntLit { pub fn __repr__(&self) -> String { format!( - "Instruction(XorInt2Addr({}, {}, {}))", + "Instruction(XorIntLit({}, {}, {}))", self.dest, self.b, self.lit ) } @@ -16961,6 +17249,47 @@ impl XorIntLit { } } +impl XorIntLit { + /// Return the raw instruction ([`InsFormat`]). + pub fn get_raw_ins(&self) -> InsFormat { + let mut reg_on_4_bit = true; + let lit_on_8_bits = true; + if self.dest & 0b1111_0000 != 0 { + reg_on_4_bit = false; + } + if self.b & 0b1111_0000 != 0 { + reg_on_4_bit = false; + } + if self.lit < I8_MIN_AS_I16 || self.lit > I8_MAX_AS_I16 { + lit_on_8_bits = false; + } + if !reg_on_4_bit && !lit_on_8_bits { + // Should not happen on a sanitized op + panic!( + "xor-int/lit uses either registers indexed on 4 bits, and a literal \ + encoded on 16 bits (xor-int/lit16), or registers indexed on 8 bits and \ + a literal encoded on 8 bits (xor-int/lit8). Found reg {} and {}, and lit \ + {}", + self.dest, self.b, self.lit + ) + } else if reg_on_4_bit { + InsFormat::Format22S { + op: 0xd7, + va: self.dest, + vb: self.b, + c: self.lit, + } + } else { + InsFormat::Format22B { + op: 0xdf, + va: self.dest, + vb: self.b, + c: self.lit as i8, + } + } + } +} + /// Put b << lit in dest. #[pyclass] #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -17021,6 +17350,18 @@ impl ShlIntLit { } } +impl ShlIntLit { + /// Return the raw instruction ([`InsFormat`]). + pub fn get_raw_ins(&self) -> InsFormat { + InsFormat::Format22B { + op: 0xe0, + va: self.dest, + vb: self.b, + c: self.lit, + } + } +} + /// Put b >> lit (signed) in dest. #[pyclass] #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -17081,6 +17422,18 @@ impl ShrIntLit { } } +impl ShrIntLit { + /// Return the raw instruction ([`InsFormat`]). + pub fn get_raw_ins(&self) -> InsFormat { + InsFormat::Format22B { + op: 0xe1, + va: self.dest, + vb: self.b, + c: self.lit, + } + } +} + /// Put b >> lit in dest. #[pyclass] #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -17141,6 +17494,18 @@ impl UshrIntLit { } } +impl UshrIntLit { + /// Return the raw instruction ([`InsFormat`]). + pub fn get_raw_ins(&self) -> InsFormat { + InsFormat::Format22B { + op: 0xe2, + va: self.dest, + vb: self.b, + c: self.lit, + } + } +} + /// Call a polymorphic method. #[pyclass] #[derive(Debug, Clone, PartialEq, Eq)] @@ -17257,7 +17622,76 @@ impl InvokePolymorphic { /// by the `Serializable::size()` method), but instructions in the bytecode /// count addresses by unit of `u16`. pub fn get_ins_size(&self) -> usize { - self.get_raw_ins().size() + 8 + } +} + +impl InvokePolymorphic { + /// Return the raw instruction ([`InsFormat`]). + /// + /// - `method_idx` is the index of the refered method. + /// - `proto_idx` is the index of the protoype used. + pub fn get_raw_ins(&self, meth_idx: usize, proto_idx: usize) -> InsFormat { + let mut last = None; + let mut first = None; + let mut consec = true; + let mut four_bites = true; + let len = self.args.len(); + for r in self.args { + if first.is_none() { + first = Some(r); + } + if let Some(last) = last { + if r != last + 1 { + consec = false; + } + } + if r & 0b1111_0000 != 0 { + four_bites = false; + } + last = Some(r); + } + if four_bites && len <= 5 { + let mut regs = vec![]; + for reg in self.args { + regs.push(reg); + } + while regs.len() != 5 { + regs.push(0); + } + let [vc, vd, ve, vf, vg]: [u8; 5] = regs + .into_iter() + .map(|r| r as u8) + .collect::>() + .try_into() + .ok() + .unwrap(); + let a = self.args.len() as u8; + InsFormat::Format45CC { + op: 0xfa, + a, + vc, + ve, + vd, + vf, + vg, + b: meth_idx as u16, + h: proto_idx as u16, + } + } else if consec && len <= 255 { + let a = self.args.len() as u8; + let vc = if let Some(vc) = first { vc } else { 0 }; + InsFormat::Format4RCC { + op: 0xfb, + a, + vc, + b: meth_idx as u16, + h: proto_idx as u16, + } + } else { + // Not supposed to happend with a sanitized invoke + panic!("Invalid Invoke instruction {self:?}") + } } } @@ -17366,7 +17800,73 @@ impl InvokeCustom { /// by the `Serializable::size()` method), but instructions in the bytecode /// count addresses by unit of `u16`. pub fn get_ins_size(&self) -> usize { - self.get_raw_ins().size() + 6 + } +} + +impl InvokeCustom { + /// Return the raw instruction ([`InsFormat`]). + /// + /// - `call_site_idx` is the index of the call site. + pub fn get_raw_ins(&self, call_site_idx: usize) -> InsFormat { + let mut last = None; + let mut first = None; + let mut consec = true; + let mut four_bites = true; + let len = self.args.len(); + for r in self.args { + if first.is_none() { + first = Some(r); + } + if let Some(last) = last { + if r != last + 1 { + consec = false; + } + } + if r & 0b1111_0000 != 0 { + four_bites = false; + } + last = Some(r); + } + if four_bites && len <= 5 { + let mut regs = vec![]; + for reg in self.args { + regs.push(reg); + } + while regs.len() != 5 { + regs.push(0); + } + let [vc, vd, ve, vf, vg]: [u8; 5] = regs + .into_iter() + .map(|r| r as u8) + .collect::>() + .try_into() + .ok() + .unwrap(); + let a = self.args.len() as u8; + InsFormat::Format35C { + op: 0xfc, + a, + vc, + ve, + vd, + vf, + vg, + b: call_site_idx as u16, + } + } else if consec && len <= 255 { + let a = self.args.len() as u8; + let vc = if let Some(vc) = first { vc } else { 0 }; + InsFormat::Format3RC { + op: 0xfd, + a, + vc, + b: call_site_idx as u16, + } + } else { + // Not supposed to happend with a sanitized invoke + panic!("Invalid Invoke instruction {self:?}") + } } } @@ -17426,7 +17926,20 @@ impl ConstMethodHandle { /// by the `Serializable::size()` method), but instructions in the bytecode /// count addresses by unit of `u16`. pub fn get_ins_size(&self) -> usize { - self.get_raw_ins().size() + 4 + } +} + +impl ConstMethodHandle { + /// Return the raw instruction ([`InsFormat`]). + /// + /// - `method_handle_idx` is the index of the method handle. + pub fn get_raw_ins(&self, method_handle_idx: usize) -> InsFormat { + InsFormat::Format21C { + op: 0xfe, + va: self.to, + b: method_handle_idx as u16, + } } } @@ -17486,7 +17999,20 @@ impl ConstMethodType { /// by the `Serializable::size()` method), but instructions in the bytecode /// count addresses by unit of `u16`. pub fn get_ins_size(&self) -> usize { - self.get_raw_ins().size() + 4 + } +} + +impl ConstMethodType { + /// Return the raw instruction ([`InsFormat`]). + /// + /// - `proto_idx` is the index of the method protoype. + pub fn get_raw_ins(&self, proto_idx: usize) -> InsFormat { + InsFormat::Format21C { + op: 0xff, + va: self.to, + b: proto_idx as u16, + } } } @@ -17581,7 +18107,7 @@ impl Try { /// by the `Serializable::size()` method), but instructions in the bytecode /// count addresses by unit of `u16`. pub fn get_ins_size(&self) -> usize { - self.get_raw_ins().size() + 0 } } @@ -17637,6 +18163,6 @@ impl Label { /// by the `Serializable::size()` method), but instructions in the bytecode /// count addresses by unit of `u16`. pub fn get_ins_size(&self) -> usize { - self.get_raw_ins().size() + 0 } }