From aefc9ea3d8667cf9072a0acd14262f3fc088b336 Mon Sep 17 00:00:00 2001 From: TheCPP Date: Fri, 12 Jul 2024 16:43:26 +0200 Subject: [PATCH] [OBJ] starting object builder --- Cargo.lock | 199 ++++++++++++++++++++++++++++++ Cargo.toml | 1 + examples/obj.rs | 27 +++++ output.o | Bin 0 -> 257 bytes src/Obj/mod.rs | 6 + src/Obj/wrapper.rs | 281 +++++++++++++++++++++++++++++++++++++++++++ src/Target/triple.rs | 1 + src/lib.rs | 6 + test.c | 10 ++ 9 files changed, 531 insertions(+) create mode 100644 examples/obj.rs create mode 100644 output.o create mode 100644 src/Obj/mod.rs create mode 100644 src/Obj/wrapper.rs create mode 100644 test.c diff --git a/Cargo.lock b/Cargo.lock index 5fa3bc94..4d79b924 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5,6 +5,185 @@ version = 3 [[package]] name = "Ygen" version = "0.1.0" +dependencies = [ + "object", +] + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "crc32fast" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "flate2" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", +] + +[[package]] +name = "indexmap" +version = "2.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "miniz_oxide" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08" +dependencies = [ + "adler", +] + +[[package]] +name = "object" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "081b846d1d56ddfc18fdf1a922e4f6e07a11768ea1b92dec44e42b72712ccfce" +dependencies = [ + "crc32fast", + "flate2", + "hashbrown", + "indexmap", + "memchr", + "ruzstd", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "proc-macro2" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "ruzstd" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5022b253619b1ba797f243056276bed8ed1a73b0f5a7ce7225d524067644bf8f" +dependencies = [ + "byteorder", + "twox-hash", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "syn" +version = "2.0.70" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f0209b68b3613b093e0ec905354eccaedcfe83b8cb37cbdeae64026c3064c16" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "twox-hash" +version = "1.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" +dependencies = [ + "cfg-if", + "static_assertions", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "ygen-mc" @@ -12,3 +191,23 @@ version = "0.1.0" dependencies = [ "Ygen", ] + +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/Cargo.toml b/Cargo.toml index 19160099..ae4b9ce2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,3 +12,4 @@ version = "0.1.0" edition = "2021" [dependencies] +object = { version = "0.36.1", features = ["write"] } diff --git a/examples/obj.rs b/examples/obj.rs new file mode 100644 index 00000000..2161054f --- /dev/null +++ b/examples/obj.rs @@ -0,0 +1,27 @@ +use std::{error::Error, fs::OpenOptions}; + +use Ygen::{Obj::*, Target::Triple}; + +fn main() -> Result<(), Box> { + let mut obj = ObjectBuilder::new( + Triple::parse("x86_64-pc-windows")? + ); + + obj.decls(vec![ + ("test", Decl::Function, Linkage::External), + ("test_data", Decl::Constant, Linkage::Extern), + ]); + + obj.define("test", vec![ + 0xB8, 0x00, 0x00, 0x00, 0x00, // mov eax, 5 + 0xC3, + ]); + + obj.link(Link { from: "test".into(), to: "test_data".into(), at: 1 }); + + obj.emit( + OpenOptions::new().create(true).write(true).open("output.o")? + )?; + + Ok(()) +} \ No newline at end of file diff --git a/output.o b/output.o new file mode 100644 index 0000000000000000000000000000000000000000..9a2578620c7ede061fd5dea723ea1fa1cb5a23bb GIT binary patch literal 257 zcmYdkV_^V+BS6dqW-&07q*jzbnP7?yNNzzE0J0Sr0uoXZOA^t9F}My40tbrn39AIE zbI>=z&<+;Z1r=rl(h3X$2|Gai!yq0AvjE*&40kUhPzA&v{2*0Oz{DU4Wa)uCrUF(C JBI3cW0ssm`7li-- literal 0 HcmV?d00001 diff --git a/src/Obj/mod.rs b/src/Obj/mod.rs new file mode 100644 index 00000000..86770643 --- /dev/null +++ b/src/Obj/mod.rs @@ -0,0 +1,6 @@ +mod wrapper; + +pub use wrapper::{ + ObjectBuilder, + Decl, Link, Linkage, +}; \ No newline at end of file diff --git a/src/Obj/wrapper.rs b/src/Obj/wrapper.rs new file mode 100644 index 00000000..731d908e --- /dev/null +++ b/src/Obj/wrapper.rs @@ -0,0 +1,281 @@ +use object::write::{Relocation, SectionId, StandardSection, Symbol, SymbolId, SymbolSection}; +use object::{Architecture, BinaryFormat, Endianness, RelocationEncoding, RelocationFlags, RelocationKind, SectionKind, SymbolFlags, SymbolKind, SymbolScope}; + +use crate::prelude::Triple; +use crate::Target::{self, Arch}; +use std::collections::HashMap; +use std::fs::File; +use std::error::Error; + +#[derive(Debug, Clone, PartialEq, Eq)] +enum ObjectError { + UnsupportedArch(Arch), + DefWithoutDecl(String), +} + +impl std::fmt::Display for ObjectError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", match self { + ObjectError::UnsupportedArch(arch) => format!("unsupported architecture for writing to object files: {:?}", arch), + ObjectError::DefWithoutDecl(name) => format!("definition without an corresponding decleration: {}", name), + }) + } +} + +impl std::error::Error for ObjectError {} + +/// A decl to say what's the label/func +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Decl { + /// A function + Function, + /// A mutable data (E.g: a global variable) + Data, + /// A constant data + Constant, +} + +/// Links from one symbol to another +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Link { + /// The link source + pub from: String, + /// The link destination + pub to: String, + /// The binary offset of the start of the function + pub at: usize, +} + +/// The linkage of the target symbol +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Linkage { + /// Can be seen outside and inside of object file + External, + /// From another object file + Extern, + /// Only aviable in the object file + Internal, +} + +/// Builds object files. +/// It also supports debugging information +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ObjectBuilder { + defines: HashMap>, + links: Vec, + + decls: Vec<(String, Decl, Linkage)>, + + triple: Triple +} + +impl ObjectBuilder { + /// Creates an new object builder + pub fn new(triple: Triple) -> Self { + Self { + defines: HashMap::new(), + + links: vec![], + + decls: vec![], + + triple: triple + } + } + + /// Sets the decls of the function + pub fn decls(&mut self, decls: Vec<(&str, Decl, Linkage)>) { + for decl in decls { + self.decl(decl); + } + } + + /// Adds one decl to the function + pub fn decl(&mut self, decl: (&str, Decl, Linkage)) { + self.decls.push((decl.0.to_string(), decl.1, decl.2)); + } + + /// Defines a symbol + pub fn define(&mut self, name: &str, data: Vec) { + self.defines.insert(name.to_string(), data); + } + + /// Links from one symbol to another + pub fn link(&mut self, link: Link) { + self.links.push(link); + } + + /// Writes the object file into the the specified file + pub fn emit(&self, file: File) -> Result<(), Box> { + + let mut obj = object::write::Object::new({ + match self.triple.bin { + Target::ObjFormat::Unknown => BinaryFormat::native_object(), + Target::ObjFormat::Coff => BinaryFormat::Coff, + Target::ObjFormat::Elf => BinaryFormat::Elf, + Target::ObjFormat::MachO => BinaryFormat::MachO, + Target::ObjFormat::Wasm => BinaryFormat::Wasm, + Target::ObjFormat::XCoff => BinaryFormat::Xcoff, + Target::ObjFormat::Default => BinaryFormat::native_object(), + } + }, { + match self.triple.arch { + Target::Arch::Arm => Architecture::Arm, + Target::Arch::Aarch64 => Architecture::Aarch64, + Target::Arch::Avr => Architecture::Avr, + Target::Arch::Bpfel => Architecture::Bpf, + Target::Arch::Bpfeb => Architecture::Bpf, + Target::Arch::Hexagon => Architecture::Hexagon, + Target::Arch::Mips => Architecture::Mips, + Target::Arch::Mips64 => Architecture::Mips64, + Target::Arch::Msp420 => Architecture::Msp430, + Target::Arch::Ppc => Architecture::PowerPc, + Target::Arch::Ppc64 => Architecture::PowerPc64, + Target::Arch::Riscv32 => Architecture::Riscv32, + Target::Arch::Riscv64 => Architecture::Riscv64, + Target::Arch::Sparc => Architecture::Sparc, + Target::Arch::X86 => Architecture::X86_64_X32, + Target::Arch::X86_64 => Architecture::X86_64, + Target::Arch::Wasm32 => Architecture::Wasm32, + Target::Arch::Wasm64 => Architecture::Wasm64, + other => Err( ObjectError::UnsupportedArch(other) )?, + } + }, { + match self.triple.arch { + Target::Arch::Arm => Endianness::Little, + Target::Arch::Aarch64 => Endianness::Little, + Target::Arch::Avr => Endianness::Little, + Target::Arch::Bpfel => Endianness::Little, + Target::Arch::Bpfeb => Endianness::Big, + Target::Arch::Hexagon => Endianness::Little, + Target::Arch::Mips => Endianness::Big, + Target::Arch::Mips64 => Endianness::Big, + Target::Arch::Msp420 => Endianness::Little, + Target::Arch::Ppc => Endianness::Big, + Target::Arch::Ppc64 => Endianness::Big, + Target::Arch::Riscv32 => Endianness::Little, + Target::Arch::Riscv64 => Endianness::Little, + Target::Arch::Sparc => Endianness::Big, + Target::Arch::X86 => Endianness::Little, + Target::Arch::X86_64 => Endianness::Little, + Target::Arch::Wasm32 => Endianness::Little, + Target::Arch::Wasm64 => Endianness::Little, + _ => unreachable!(), // cannot panic cuz the archs are filtered out by the prefius call + } + }); + + let secText = obj.add_section(vec![], "text".as_bytes().to_vec(), SectionKind::Text); + let secData = obj.add_section(vec![], "data".as_bytes().to_vec(), SectionKind::Data); + let secConsts = obj.add_section(vec![], "rodata".as_bytes().to_vec(), SectionKind::ReadOnlyData); + + let mut syms: HashMap, Option, SymbolId)> = HashMap::new(); + + for (name, data) in &self.defines { + let name = name.to_owned(); + let data = data.to_owned(); + + let mut decl = None; + let mut link = None; + + for (declName, declDecl, declLink) in &self.decls { + if *declName == name { + decl = Some(declDecl); + link = Some(declLink); + } + } + + if decl == None { + Err( ObjectError::DefWithoutDecl(name.clone()) )? + } + let decl = decl.unwrap(); + let link = link.unwrap(); + + let sym = obj.add_symbol(Symbol { + name: name.clone().as_bytes().to_vec(), + value: 0, + size: (data.len() - 1) as u64, + kind: { + match decl { + Decl::Function => SymbolKind::Text, + Decl::Data => SymbolKind::Data, + Decl::Constant => SymbolKind::Label, + } + }, + scope: { + match link { + Linkage::External => SymbolScope::Linkage, + Linkage::Extern => SymbolScope::Linkage, + Linkage::Internal => SymbolScope::Compilation, + } + }, + weak: false, + section: SymbolSection::Undefined, + flags: SymbolFlags::None, + }); + + let def_section = match decl { + Decl::Function => obj.add_subsection(StandardSection::Text, name.as_bytes()), + Decl::Data => obj.add_subsection(StandardSection::Data, name.as_bytes()), + Decl::Constant => obj.add_subsection(StandardSection::ReadOnlyData, name.as_bytes()), + }; + + let def_offset = match decl { + Decl::Function => obj.add_symbol_data(sym, secText, &data, 16), + Decl::Data => obj.add_symbol_data(sym, secData, &data, 16), + Decl::Constant => obj.add_symbol_data(sym, secConsts, &data, 16), + }; + + syms.insert(name.clone(), (Some(def_section), Some(def_offset), sym)); + } + + for (name, decl, linkage) in &self.decls { // for extern symbols + if *linkage == Linkage::Extern { + let sym = obj.add_symbol(Symbol { + name: name.as_bytes().to_vec(), + value: 0, + size: 0, + kind: { + match decl { + Decl::Function => SymbolKind::Text, + Decl::Data => SymbolKind::Data, + Decl::Constant => SymbolKind::Data, + } + }, + scope: SymbolScope::Unknown, + weak: false, + section: SymbolSection::Undefined, + flags: SymbolFlags::None, + }); + syms.insert(name.clone(), (None, None, sym)); + } + } + + for link in &self.links { + let (from_sec, from_off, _) = syms.get(&link.from).unwrap(); + let (_, to_off, to_sym) = syms.get(&link.to).unwrap(); + + obj.add_relocation(from_sec.unwrap().to_owned(), Relocation { + offset: from_off.unwrap() + link.at as u64, + symbol: to_sym.to_owned(), + addend: {if let Some(off) = *to_off {off as i64} else {0}} + -4, + flags: RelocationFlags::Generic { + kind: RelocationKind::PltRelative, + encoding: { + match &self.triple.arch { + Target::Arch::Aarch64 => RelocationEncoding::AArch64Call, + Target::Arch::Aarch64BE => RelocationEncoding::AArch64Call, + Target::Arch::X86 => RelocationEncoding::X86Branch, + Target::Arch::X86_64 => RelocationEncoding::X86Branch, + _ => RelocationEncoding::Generic, + } + }, + size: 32, + }, + })?; + } + + obj.write_stream(file)?; + + Ok(()) + } +} \ No newline at end of file diff --git a/src/Target/triple.rs b/src/Target/triple.rs index 13f65976..f658385d 100644 --- a/src/Target/triple.rs +++ b/src/Target/triple.rs @@ -158,6 +158,7 @@ impl Triple { "openBSD" => OS::OpenBSD, "solaris" => OS::Solaris, "win32" => OS::Win32, + "windows" => OS::Win32, "haiku" => OS::Haiku, "minix" => OS::Minix, "rtems" => OS::Rtems, diff --git a/src/lib.rs b/src/lib.rs index 2b7ed790..d4d0a481 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,17 +10,23 @@ /// * The target triple /// * TargetRegistry pub mod Target; + /// The ir module: functions for building function ir pub mod IR; + /// The pass manager module: /// * Includes all passes and their definition /// * The PassManager pub mod PassManager; + /// Other utilites like: /// * Cli args /// * String coloring and padding pub mod Support; +/// Writing/Reading object files +pub mod Obj; + /// Most common used functions, classes, enums of this Libary pub mod prelude { pub use crate::IR::*; diff --git a/test.c b/test.c new file mode 100644 index 00000000..ed928f7d --- /dev/null +++ b/test.c @@ -0,0 +1,10 @@ +#include + +int test_data = 5; + +extern int test(); + +int main() { + printf("func() => %d", test()); + return 0; +} \ No newline at end of file