From 9e9a328bee45aefce154f1c92f66f804bf48c6d8 Mon Sep 17 00:00:00 2001 From: Aaron Hill Date: Thu, 4 Apr 2019 00:50:25 -0400 Subject: [PATCH] Add Reloc::Data, which supports an addend Fixes #71 This commit creates a new variant 'Data' of the 'Reloc' enum. This can be used to define a relocation to a data section entry with a custom addend. With this change, it's now possible to create a relocation representing an index into an array or some other structure, where the addebd represents the offset from the base. I've only implemented this for ELF. Unfortunately, I'm completely unfamiliar with Mach-O, and don't have an OS X instance available to test with. Additionally, I've moved the magic '-4' into a constant, and added some documentation explaining where it comes from (though I may have a few details wrong). I've also tweaked the binary created by src/bin/main.rs to demonstrate using Reloc::Data --- src/artifact.rs | 5 ++++ src/bin/main.rs | 11 ++++++-- src/elf.rs | 75 +++++++++++++++++++++++++++++++++++++++++++++++-- src/mach.rs | 4 +++ 4 files changed, 90 insertions(+), 5 deletions(-) diff --git a/src/artifact.rs b/src/artifact.rs index 9e026af..7815127 100644 --- a/src/artifact.rs +++ b/src/artifact.rs @@ -37,6 +37,11 @@ pub enum Reloc { /// Addend for the relocation addend: i32, }, + /// A relocation in the data section. + Data { + /// Addend for the relocation + addend: i32, + }, } type StringID = usize; diff --git a/src/bin/main.rs b/src/bin/main.rs index 4979c7c..86dbdb6 100644 --- a/src/bin/main.rs +++ b/src/bin/main.rs @@ -88,6 +88,7 @@ fn run (args: Args) -> Result<(), Error> { ("DEADBEEF", Decl::data_import().into()), ("STATIC", Decl::data().global().writable().into()), ("STATIC_REF", Decl::data().global().writable().into()), + ("GLOBAL_ARR", Decl::data().global().into()), ("printf", Decl::function_import().into()), ]; obj.declarations(declarations.into_iter())?; @@ -123,6 +124,7 @@ fn run (args: Args) -> Result<(), Error> { // 48 8d 3d 00 00 00 00 lea 0x0(%rip),%rdi # 0x1d will be: "deadbeef: 0x%x - %d\n" // 48 8b 0d 00 00 00 00 mov 0x0(%rip),%rcx # 0x24 // 8b 11 mov (%rcx),%edx + // 48 8b 0d 00 00 00 00 mov 0x0(%rip),%rcx # global_arr // 89 c6 mov %eax,%esi // b0 00 mov $0x0,%al // e8 00 00 00 00 callq 0x2f # printf @@ -143,6 +145,7 @@ fn run (args: Args) -> Result<(), Error> { 0x48, 0x8d, 0x3d, 0x00, 0x00, 0x00, 0x00, 0x48, 0x8b, 0x0d, 0x00, 0x00, 0x00, 0x00, 0x8b, 0x11, + 0x48, 0x8b, 0x0d, 0x00, 0x00, 0x00, 0x00, 0x89, 0xc6, 0xb0, 0x00, 0xe8, 0x00, 0x00, 0x00, 0x00, @@ -154,8 +157,9 @@ fn run (args: Args) -> Result<(), Error> { 0xc3, ])?; // define static data - obj.define("str.1", b"deadbeef: 0x%x - 0x%x\n\0".to_vec())?; + obj.define("str.1", b"deadbeef: 0x%x - 0x%x - %d\n\0".to_vec())?; obj.define("STATIC", [0xbe, 0xba, 0xfe, 0xca].to_vec())?; + obj.define("GLOBAL_ARR", [41, 00, 00, 00, 42, 00, 00, 00].to_vec())?; // .data static references need to be zero'd out explicitly for now. obj.define("STATIC_REF", vec![0; 8])?; @@ -165,7 +169,10 @@ fn run (args: Args) -> Result<(), Error> { obj.link(Link { from: "main", to: "deadbeef", at: 0x15 })?; obj.link(Link { from: "main", to: "str.1", at: 0x1c })?; obj.link(Link { from: "main", to: "STATIC_REF", at: 0x23 })?; - obj.link(Link { from: "main", to: "printf", at: 0x2e })?; + // GLOBAL_ARR is an array of (4-byte) integers. We create a relocation + // pointing to the second entry in the array via an addend of 4 bytes + obj.link_with(Link { from: "main", to: "GLOBAL_ARR", at: 0x2c }, Reloc::Data { addend: 4 } )?; + obj.link(Link { from: "main", to: "printf", at: 0x35 })?; // -- deadbeef relocations -- obj.link(Link { from: "deadbeef", to: "DEADBEEF", at: 0x7 })?; diff --git a/src/elf.rs b/src/elf.rs index 7f5c1db..8735afe 100644 --- a/src/elf.rs +++ b/src/elf.rs @@ -37,6 +37,29 @@ type Relocation = goblin::elf::reloc::Reloc; type Symbol = goblin::elf::sym::Sym; type Section = SectionHeader; +/// When we have a link from a function on X86, +/// we create a relocation entry that modifies +/// the PC-relative 32-bit immedaite value of an instruction. (e.g.'call') +/// This value is relative to the address of the *next* instruction - e.g. +/// 4 bytes past the start of the immediate. +/// For example, if we have instruction 'call 0x0', encoded as: +/// 'e8 00 00 00 00' +/// 'WW XX YY ZZ' // some other instruction +/// +/// the offset will computed relative to the address of 'WW' +/// However, when the dynamic linker uses our relocation entry, +/// it will interpet it relative to the location we're modifiying - +/// i.e. the address of the '00' byte following the 'e8' byte. +/// This will cause the final computed value to be 4 bytes greater +/// than it should be. To fix this, we use an addend of '-4' to +/// account for the extra 4 bytes between the location we're relocating +/// (the immediate value of the instruction) and the address of the instruction +/// immediately following it. +/// +/// If a a consumer of 'faerie' provides their own addend via +/// Reloc::Data, we simply add this to '-4' +const X64_IMM_OFFSET: i32 = -4; + struct MachineTag(u16); impl From for MachineTag { @@ -637,9 +660,15 @@ impl<'a> Elf<'a> { // NB: this now forces _all_ function references, whether local or not, through the PLT // although we're not in the worst company here: https://github.com/ocaml/ocaml/pull/1330 Decl::Defined(DefinedDecl::Function { .. }) - | Decl::Import(ImportKind::Function) => (reloc::R_X86_64_PLT32, -4), - Decl::Defined(DefinedDecl::Data { .. }) => (reloc::R_X86_64_PC32, -4), - Decl::Import(ImportKind::Data) => (reloc::R_X86_64_GOTPCREL, -4), + | Decl::Import(ImportKind::Function) => { + (reloc::R_X86_64_PLT32, X64_IMM_OFFSET) + } + Decl::Defined(DefinedDecl::Data { .. }) => { + (reloc::R_X86_64_PC32, X64_IMM_OFFSET) + } + Decl::Import(ImportKind::Data) => { + (reloc::R_X86_64_GOTPCREL, X64_IMM_OFFSET) + } _ => panic!("unsupported relocation {:?}", l), } } @@ -660,6 +689,46 @@ impl<'a> Elf<'a> { 8 => (reloc::R_X86_64_64, addend), _ => panic!("unsupported relocation {:?}", l), }, + Reloc::Data { addend } => { + match *l.to.decl { + Decl::Defined(DefinedDecl::Data { .. }) + | Decl::Import(ImportKind::Data { .. }) => {} + _ => panic!("unsupported relocation {:?}", l.to.decl), + }; + + match *l.from.decl { + Decl::Defined(DefinedDecl::Function { .. }) => match *l.to.decl { + Decl::Defined(DefinedDecl::Data { .. }) => { + // We're referencing a symbol that we've directly + // defined, so we're not going through the GOT. + // This means that the addend will be applied + // directly to the address of the target symbol, + // which can be used to reference a specific location + // within the target (e.g. a particular element of an array) + (reloc::R_X86_64_PC32, X64_IMM_OFFSET + addend) + } + Decl::Import(ImportKind::Data) => { + // If we're linking to an imported symbol, + // we're creating a GOT-relative relocation. + // Having a user-supplied addend doesn't make sense here, + // as it would cause us to move within the GOT itself - + // not relative to the address stored in the GOT entry. + assert!(addend == 0, "Addend must be 0 for reloc {:?}", l); + (reloc::R_X86_64_GOTPCREL, X64_IMM_OFFSET + addend) + } + _ => panic!("unsupported relocation {:?}", l), + }, + Decl::Defined(DefinedDecl::Data { .. }) => { + if self.ctx.is_big() { + // Select an absolute relocation that is the size of a pointer. + (reloc::R_X86_64_64, addend) + } else { + (reloc::R_X86_64_32, addend) + } + } + _ => panic!("unsupported relocation {:?}", l), + } + } }; let addend = i64::from(addend); diff --git a/src/mach.rs b/src/mach.rs index fb25223..61fafd5 100644 --- a/src/mach.rs +++ b/src/mach.rs @@ -840,6 +840,10 @@ fn build_relocations(segment: &mut SegmentBuilder, artifact: &Artifact, symtab: } continue; } + Reloc::Data { .. } => { + // TODO: Implement this + unimplemented!() + } }; match (symtab.offset(link.from.name), symtab.index(link.to.name)) { (Some(base_offset), Some(to_symbol_index)) => {