Skip to content

Commit

Permalink
Refactor - Dynamic Stack (#638)
Browse files Browse the repository at this point in the history
* Removes STACK_PTR_REG.

* Adds imm alignment check.

* Makes modifications of R10 visible immediately.

* Removes EbpfVm::stack_pointer.

* Uses memory indirect operands for env.call_depth.

* Removes `add64 r10, imm` special handling in execution.

* Updates the ISA spec.
  • Loading branch information
Lichtso authored Nov 27, 2024
1 parent c16589f commit 4ad935b
Show file tree
Hide file tree
Showing 9 changed files with 103 additions and 98 deletions.
7 changes: 3 additions & 4 deletions benches/vm_execution.rs
Original file line number Diff line number Diff line change
Expand Up @@ -251,17 +251,16 @@ fn bench_jit_vs_interpreter_call_depth_dynamic(bencher: &mut Bencher) {
jlt r6, 1024, -4
exit
function_foo:
add r11, -4
stw [r10-4], 0x11223344
add r10, -64
stw [r10+4], 0x11223344
mov r6, r1
jeq r6, 0, +3
mov r1, r6
add r1, -1
call function_foo
add r11, 4
exit",
Config::default(),
176130,
156674,
&mut [],
);
}
7 changes: 2 additions & 5 deletions doc/bytecode.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ All of them are 64 bit wide.
| `r8` | all | GPR | Call-preserved
| `r9` | all | GPR | Call-preserved
| `r10` | all | Frame pointer | System register
| `r11` | from v1 | Stack pointer | System register
| `pc` | all | Program counter | Hidden register


Expand Down Expand Up @@ -258,7 +257,6 @@ Except that the target location of `callx` is the src register, thus runtime dyn
Call instructions (`call` and `callx` but not `syscall`) do:
- Save the registers `r6`, `r7`, `r8`, `r9`, the frame pointer `r10` and the `pc` (pointing at the next instruction)
- If < v1: Add one stack frame size to the frame pointer `r10`
- If ≥ v1: Move the stack pointer `r11` into the frame pointer `r10`

The `exit` (a.k.a. return) instruction does:
- Restore the registers `r6`, `r7`, `r8`, `r9`, the frame pointer `r10` and the `pc`
Expand Down Expand Up @@ -324,13 +322,12 @@ Verification
- For all instructions the source register must be `r0` ≤ src ≤ `r10`
- For all instructions (except for memory writes) the destination register must be `r0` ≤ dst ≤ `r9`
- For all instructions the opcode must be valid
- Memory write instructions can use `r10` as destination register

### until v1
- No instruction can use `r11` as destination register
- Only memory write instruction can use `r10` as destination register

### from v1
- `add64 reg, imm` can use `r11` as destination register
- `add64 reg, imm` can also use `r10` as destination register

### until v2
- Opcodes from the product / quotient / remainder instruction class are forbiden
Expand Down
2 changes: 0 additions & 2 deletions src/ebpf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,6 @@ pub const PROG_MAX_INSNS: usize = 65_536;
pub const INSN_SIZE: usize = 8;
/// Frame pointer register
pub const FRAME_PTR_REG: usize = 10;
/// Stack pointer register
pub const STACK_PTR_REG: usize = 11;
/// First scratch register
pub const FIRST_SCRATCH_REG: usize = 6;
/// Number of scratch registers
Expand Down
20 changes: 2 additions & 18 deletions src/interpreter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
//! Interpreter for eBPF programs.
use crate::{
ebpf::{self, STACK_PTR_REG},
ebpf,
elf::Executable,
error::{EbpfError, ProgramResult},
program::BuiltinFunction,
Expand Down Expand Up @@ -146,9 +146,8 @@ impl<'a, 'b, C: ContextObject> Interpreter<'a, 'b, C> {
// With fixed frames we start the new frame at the next fixed offset
let stack_frame_size =
config.stack_frame_size * if config.enable_stack_frame_gaps { 2 } else { 1 };
self.vm.stack_pointer += stack_frame_size as u64;
self.reg[ebpf::FRAME_PTR_REG] += stack_frame_size as u64;
}
self.reg[ebpf::FRAME_PTR_REG] = self.vm.stack_pointer;

true
}
Expand Down Expand Up @@ -189,16 +188,6 @@ impl<'a, 'b, C: ContextObject> Interpreter<'a, 'b, C> {
}

match insn.opc {
ebpf::ADD64_IMM if dst == STACK_PTR_REG && self.executable.get_sbpf_version().dynamic_stack_frames() => {
// Let the stack overflow. For legitimate programs, this is a nearly
// impossible condition to hit since programs are metered and we already
// enforce a maximum call depth. For programs that intentionally mess
// around with the stack pointer, MemoryRegion::map will return
// InvalidVirtualAddress(stack_ptr) once an invalid stack address is
// accessed.
self.vm.stack_pointer = self.vm.stack_pointer.overflowing_add(insn.imm as u64).0;
}

ebpf::LD_DW_IMM if !self.executable.get_sbpf_version().disable_lddw() => {
ebpf::augment_lddw_unchecked(self.program, &mut insn);
self.reg[dst] = insn.imm as u64;
Expand Down Expand Up @@ -584,11 +573,6 @@ impl<'a, 'b, C: ContextObject> Interpreter<'a, 'b, C> {
self.reg[ebpf::FIRST_SCRATCH_REG
..ebpf::FIRST_SCRATCH_REG + ebpf::SCRATCH_REGS]
.copy_from_slice(&frame.caller_saved_registers);
if !self.executable.get_sbpf_version().dynamic_stack_frames() {
let stack_frame_size =
config.stack_frame_size * if config.enable_stack_frame_gaps { 2 } else { 1 };
self.vm.stack_pointer -= stack_frame_size as u64;
}
check_pc!(self, next_pc, frame.target_pc);
}
_ => throw_error!(self, EbpfError::UnsupportedInstruction),
Expand Down
59 changes: 19 additions & 40 deletions src/jit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use rand::{
use std::{fmt::Debug, mem, ptr};

use crate::{
ebpf::{self, FIRST_SCRATCH_REG, FRAME_PTR_REG, INSN_SIZE, SCRATCH_REGS, STACK_PTR_REG},
ebpf::{self, FIRST_SCRATCH_REG, FRAME_PTR_REG, INSN_SIZE, SCRATCH_REGS},
elf::Executable,
error::{EbpfError, ProgramResult},
memory_management::{
Expand Down Expand Up @@ -255,15 +255,14 @@ struct Jump {
enum RuntimeEnvironmentSlot {
HostStackPointer = 0,
CallDepth = 1,
StackPointer = 2,
ContextObjectPointer = 3,
PreviousInstructionMeter = 4,
DueInsnCount = 5,
StopwatchNumerator = 6,
StopwatchDenominator = 7,
Registers = 8,
ProgramResult = 20,
MemoryMapping = 28,
ContextObjectPointer = 2,
PreviousInstructionMeter = 3,
DueInsnCount = 4,
StopwatchNumerator = 5,
StopwatchDenominator = 6,
Registers = 7,
ProgramResult = 19,
MemoryMapping = 27,
}

/* Explanation of the Instruction Meter
Expand Down Expand Up @@ -418,16 +417,11 @@ impl<'a, C: ContextObject> JitCompiler<'a, C> {
self.emit_ins(X86Instruction::load_immediate(OperandSize::S64, REGISTER_SCRATCH, 0));
}

let dst = if insn.dst == STACK_PTR_REG as u8 { u8::MAX } else { REGISTER_MAP[insn.dst as usize] };
let dst = if insn.dst == FRAME_PTR_REG as u8 { u8::MAX } else { REGISTER_MAP[insn.dst as usize] };
let src = REGISTER_MAP[insn.src as usize];
let target_pc = (self.pc as isize + insn.off as isize + 1) as usize;

match insn.opc {
ebpf::ADD64_IMM if insn.dst == STACK_PTR_REG as u8 && self.executable.get_sbpf_version().dynamic_stack_frames() => {
let stack_ptr_access = X86IndirectAccess::Offset(self.slot_in_vm(RuntimeEnvironmentSlot::StackPointer));
self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x81, 0, REGISTER_PTR_TO_VM, insn.imm, Some(stack_ptr_access)));
}

ebpf::LD_DW_IMM if !self.executable.get_sbpf_version().disable_lddw() => {
self.emit_validate_and_profile_instruction_count(false, Some(self.pc + 2));
self.pc += 1;
Expand Down Expand Up @@ -754,25 +748,16 @@ impl<'a, C: ContextObject> JitCompiler<'a, C> {
self.emit_validate_instruction_count(Some(self.pc));

let call_depth_access = X86IndirectAccess::Offset(self.slot_in_vm(RuntimeEnvironmentSlot::CallDepth));
self.emit_ins(X86Instruction::load(OperandSize::S64, REGISTER_PTR_TO_VM, REGISTER_MAP[FRAME_PTR_REG], call_depth_access));

// If CallDepth == 0, we've reached the exit instruction of the entry point
self.emit_ins(X86Instruction::cmp_immediate(OperandSize::S32, REGISTER_MAP[FRAME_PTR_REG], 0, None));
// If env.call_depth == 0, we've reached the exit instruction of the entry point
self.emit_ins(X86Instruction::cmp_immediate(OperandSize::S32, REGISTER_PTR_TO_VM, 0, Some(call_depth_access)));
if self.config.enable_instruction_meter {
self.emit_ins(X86Instruction::load_immediate(OperandSize::S64, REGISTER_SCRATCH, self.pc as i64));
}
// we're done
self.emit_ins(X86Instruction::conditional_jump_immediate(0x84, self.relative_to_anchor(ANCHOR_EXIT, 6)));

// else decrement and update CallDepth
self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x81, 5, REGISTER_MAP[FRAME_PTR_REG], 1, None));
self.emit_ins(X86Instruction::store(OperandSize::S64, REGISTER_MAP[FRAME_PTR_REG], REGISTER_PTR_TO_VM, call_depth_access));

if !self.executable.get_sbpf_version().dynamic_stack_frames() {
let stack_pointer_access = X86IndirectAccess::Offset(self.slot_in_vm(RuntimeEnvironmentSlot::StackPointer));
let stack_frame_size = self.config.stack_frame_size as i64 * if self.config.enable_stack_frame_gaps { 2 } else { 1 };
self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x81, 5, REGISTER_PTR_TO_VM, stack_frame_size, Some(stack_pointer_access))); // env.stack_pointer -= stack_frame_size;
}
// else decrement and update env.call_depth
self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x81, 5, REGISTER_PTR_TO_VM, 1, Some(call_depth_access))); // env.call_depth -= 1;

// and return
self.emit_profile_instruction_count(false, Some(0));
Expand Down Expand Up @@ -1540,23 +1525,18 @@ impl<'a, C: ContextObject> JitCompiler<'a, C> {
// Push the caller's frame pointer. The code to restore it is emitted at the end of emit_internal_call().
self.emit_ins(X86Instruction::store(OperandSize::S64, REGISTER_MAP[FRAME_PTR_REG], RSP, X86IndirectAccess::OffsetIndexShift(8, RSP, 0)));
self.emit_ins(X86Instruction::xchg(OperandSize::S64, REGISTER_SCRATCH, RSP, Some(X86IndirectAccess::OffsetIndexShift(0, RSP, 0)))); // Push return address and restore original REGISTER_SCRATCH

// Increase CallDepth
// Increase env.call_depth
let call_depth_access = X86IndirectAccess::Offset(self.slot_in_vm(RuntimeEnvironmentSlot::CallDepth));
self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x81, 0, REGISTER_PTR_TO_VM, 1, Some(call_depth_access)));
self.emit_ins(X86Instruction::load(OperandSize::S64, REGISTER_PTR_TO_VM, REGISTER_MAP[FRAME_PTR_REG], call_depth_access));
// If CallDepth == self.config.max_call_depth, stop and return CallDepthExceeded
self.emit_ins(X86Instruction::cmp_immediate(OperandSize::S32, REGISTER_MAP[FRAME_PTR_REG], self.config.max_call_depth as i64, None));
self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x81, 0, REGISTER_PTR_TO_VM, 1, Some(call_depth_access))); // env.call_depth += 1;
// If env.call_depth == self.config.max_call_depth, throw CallDepthExceeded
self.emit_ins(X86Instruction::cmp_immediate(OperandSize::S32, REGISTER_PTR_TO_VM, self.config.max_call_depth as i64, Some(call_depth_access)));
self.emit_ins(X86Instruction::conditional_jump_immediate(0x83, self.relative_to_anchor(ANCHOR_CALL_DEPTH_EXCEEDED, 6)));

// Setup the frame pointer for the new frame. What we do depends on whether we're using dynamic or fixed frames.
let stack_pointer_access = X86IndirectAccess::Offset(self.slot_in_vm(RuntimeEnvironmentSlot::StackPointer));
if !self.executable.get_sbpf_version().dynamic_stack_frames() {
// With fixed frames we start the new frame at the next fixed offset
let stack_frame_size = self.config.stack_frame_size as i64 * if self.config.enable_stack_frame_gaps { 2 } else { 1 };
self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x81, 0, REGISTER_PTR_TO_VM, stack_frame_size, Some(stack_pointer_access))); // env.stack_pointer += stack_frame_size;
self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x81, 0, REGISTER_MAP[FRAME_PTR_REG], stack_frame_size, None)); // REGISTER_MAP[FRAME_PTR_REG] += stack_frame_size;
}
self.emit_ins(X86Instruction::load(OperandSize::S64, REGISTER_PTR_TO_VM, REGISTER_MAP[FRAME_PTR_REG], stack_pointer_access)); // reg[ebpf::FRAME_PTR_REG] = env.stack_pointer;
self.emit_ins(X86Instruction::return_near());

// Routine for emit_internal_call(Value::Register())
Expand Down Expand Up @@ -1757,7 +1737,6 @@ mod tests {

check_slot!(env, host_stack_pointer, HostStackPointer);
check_slot!(env, call_depth, CallDepth);
check_slot!(env, stack_pointer, StackPointer);
check_slot!(env, context_object_pointer, ContextObjectPointer);
check_slot!(env, previous_instruction_meter, PreviousInstructionMeter);
check_slot!(env, due_insn_count, DueInsnCount);
Expand Down
20 changes: 19 additions & 1 deletion src/verifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ pub enum VerifierError {
/// Invalid syscall
#[error("Invalid syscall code {0}")]
InvalidSyscall(u32),
/// Unaligned immediate
#[error("Unaligned immediate (insn #{0})")]
UnalignedImmediate(usize),
}

/// eBPF Verifier
Expand Down Expand Up @@ -121,6 +124,18 @@ fn check_imm_endian(insn: &ebpf::Insn, insn_ptr: usize) -> Result<(), VerifierEr
}
}

fn check_imm_aligned(
insn: &ebpf::Insn,
insn_ptr: usize,
alignment: i64,
) -> Result<(), VerifierError> {
if (insn.imm & (alignment - 1)) == 0 {
Ok(())
} else {
Err(VerifierError::UnalignedImmediate(insn_ptr))
}
}

fn check_load_dw(prog: &[u8], insn_ptr: usize) -> Result<(), VerifierError> {
if (insn_ptr + 1) * ebpf::INSN_SIZE >= prog.len() {
// Last instruction cannot be LD_DW because there would be no 2nd DW
Expand Down Expand Up @@ -184,7 +199,7 @@ fn check_registers(

match (insn.dst, store) {
(0..=9, _) | (10, true) => Ok(()),
(11, _) if sbpf_version.dynamic_stack_frames() && insn.opc == ebpf::ADD64_IMM => Ok(()),
(10, false) if sbpf_version.dynamic_stack_frames() && insn.opc == ebpf::ADD64_IMM => Ok(()),
(10, false) => Err(VerifierError::CannotWriteR10(insn_ptr)),
(_, _) => Err(VerifierError::InvalidDestinationRegister(insn_ptr)),
}
Expand Down Expand Up @@ -305,6 +320,9 @@ impl Verifier for RequisiteVerifier {
ebpf::BE => { check_imm_endian(&insn, insn_ptr)?; },

// BPF_ALU64_STORE class
ebpf::ADD64_IMM if insn.dst == ebpf::FRAME_PTR_REG as u8 && sbpf_version.dynamic_stack_frames() => {
check_imm_aligned(&insn, insn_ptr, 64)?;
},
ebpf::ADD64_IMM => {},
ebpf::ADD64_REG => {},
ebpf::SUB64_IMM => {},
Expand Down
15 changes: 3 additions & 12 deletions src/vm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -287,13 +287,6 @@ pub struct EbpfVm<'a, C: ContextObject> {
/// Incremented on calls and decremented on exits. It's used to enforce
/// config.max_call_depth and to know when to terminate execution.
pub call_depth: u64,
/// Guest stack pointer (r11).
///
/// The stack pointer isn't exposed as an actual register. Only sub and add
/// instructions (typically generated by the LLVM backend) are allowed to
/// access it when sbpf_version.dynamic_stack_frames()=true. Its value is only
/// stored here and therefore the register is not tracked in REGISTER_MAP.
pub stack_pointer: u64,
/// Pointer to ContextObject
pub context_object_pointer: &'a mut C,
/// Last return value of instruction_meter.get_remaining()
Expand Down Expand Up @@ -329,7 +322,8 @@ impl<'a, C: ContextObject> EbpfVm<'a, C> {
stack_len: usize,
) -> Self {
let config = loader.get_config();
let stack_pointer =
let mut registers = [0u64; 12];
registers[ebpf::FRAME_PTR_REG] =
ebpf::MM_STACK_START.saturating_add(if sbpf_version.dynamic_stack_frames() {
// the stack is fully descending, frames start as empty and change size anytime r11 is modified
stack_len
Expand All @@ -343,13 +337,12 @@ impl<'a, C: ContextObject> EbpfVm<'a, C> {
EbpfVm {
host_stack_pointer: std::ptr::null_mut(),
call_depth: 0,
stack_pointer,
context_object_pointer: context_object,
previous_instruction_meter: 0,
due_insn_count: 0,
stopwatch_numerator: 0,
stopwatch_denominator: 0,
registers: [0u64; 12],
registers,
program_result: ProgramResult::Ok(0),
memory_mapping,
call_frames: vec![CallFrame::default(); config.max_call_depth],
Expand All @@ -368,9 +361,7 @@ impl<'a, C: ContextObject> EbpfVm<'a, C> {
interpreted: bool,
) -> (u64, ProgramResult) {
debug_assert!(Arc::ptr_eq(&self.loader, executable.get_loader()));
// R1 points to beginning of input memory, R10 to the stack of the first frame, R11 is the pc (hidden)
self.registers[1] = ebpf::MM_INPUT_START;
self.registers[ebpf::FRAME_PTR_REG] = self.stack_pointer;
self.registers[11] = executable.get_entrypoint_instruction_offset() as u64;
let config = executable.get_config();
let initial_insn_count = if config.enable_instruction_meter {
Expand Down
Loading

0 comments on commit 4ad935b

Please sign in to comment.