From 6d8e5e62ab891f17dcf52534149d423b628a47f4 Mon Sep 17 00:00:00 2001 From: TheCPP Date: Fri, 30 Aug 2024 11:32:25 +0200 Subject: [PATCH] [CODE GEN] implementing whitelist --- src/CodeGen/instr.rs | 6 +-- src/CodeGen/reg.rs | 2 +- src/Optimizations/mod.rs | 7 ++- src/Target/mod.rs | 6 ++- src/Target/registry.rs | 4 +- src/Target/target_descr.rs | 13 +++-- src/Target/whitelist.rs | 81 +++++++++++++++++++++++++++++++ src/Target/x64/asm/instr.rs | 2 +- src/Target/x64/asm/mod.rs | 2 +- src/Target/x64/asm/optimizer.rs | 8 +-- src/Target/x64/mod.rs | 25 +++++++++- tests/x64_instruction_encoding.rs | 2 +- 12 files changed, 133 insertions(+), 25 deletions(-) create mode 100644 src/Target/whitelist.rs diff --git a/src/CodeGen/instr.rs b/src/CodeGen/instr.rs index 0437c81e..4dfad5e2 100644 --- a/src/CodeGen/instr.rs +++ b/src/CodeGen/instr.rs @@ -6,7 +6,7 @@ use crate::Obj::Link; use super::reg::Reg; /// a low level instruction which is portable over platforms -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct MachineInstr { pub(crate) operands: Vec, pub(crate) out: Option, @@ -58,7 +58,7 @@ impl Display for MachineInstr { } /// a low level operand which is portable over platforms -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum MachineOperand { /// a number Imm(i64), @@ -77,7 +77,7 @@ impl Display for MachineOperand { /// The mnemonic to use #[allow(missing_docs)] -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum MachineMnemonic { Move, diff --git a/src/CodeGen/reg.rs b/src/CodeGen/reg.rs index 3fc91ac7..4f5be1cf 100644 --- a/src/CodeGen/reg.rs +++ b/src/CodeGen/reg.rs @@ -1,7 +1,7 @@ use crate::Target::{x64Reg, Arch}; /// A shared enum for registers -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum Reg { /// a register of the x64 platform x64(x64Reg), diff --git a/src/Optimizations/mod.rs b/src/Optimizations/mod.rs index 70fa95a8..97d661d1 100644 --- a/src/Optimizations/mod.rs +++ b/src/Optimizations/mod.rs @@ -5,8 +5,11 @@ pub mod Passes; pub use mngr::PassManager; pub use template::Pass; - -use crate::Target::Optimize; +/// used for optimizing +pub trait Optimize { + /// optimizes self + fn optimize(&mut self) -> Self; +} /// Automaticlly optimizes the input till it doesn't change pub fn auto_max_optimize(target: &mut T) where T: Optimize + PartialEq + Clone { diff --git a/src/Target/mod.rs b/src/Target/mod.rs index 1c8c89cb..f33e6764 100644 --- a/src/Target/mod.rs +++ b/src/Target/mod.rs @@ -1,8 +1,10 @@ mod triple; mod target_descr; -mod x64; +pub mod x64; mod registry; -pub use x64::*; +mod whitelist; +pub use x64::{x64Reg, initializeX64Target, Token}; +pub use whitelist::*; pub use triple::Triple; pub use target_descr::TargetBackendDescr; pub use registry::TargetRegistry; diff --git a/src/Target/registry.rs b/src/Target/registry.rs index 4d12378a..9ae0cb76 100644 --- a/src/Target/registry.rs +++ b/src/Target/registry.rs @@ -63,7 +63,7 @@ impl TargetRegistry { if let Some(org) = self.targets.get_mut(&triple.arch) { org.block = Some(block.clone()); let instrs = org.build_instrs(&funct, &triple); - let instrs = org.lower(instrs); + let instrs = org.lower(instrs)?; let mut asm = vec![]; @@ -90,7 +90,7 @@ impl TargetRegistry { org.block = Some(block.clone()); let instrs = org.build_instrs(&funct, &triple); - let instrs = org.lower(instrs); + let instrs = org.lower(instrs)?; let mut res = vec![]; let mut links = vec![]; diff --git a/src/Target/target_descr.rs b/src/Target/target_descr.rs index 159a7afc..f9b9e30f 100644 --- a/src/Target/target_descr.rs +++ b/src/Target/target_descr.rs @@ -1,9 +1,11 @@ +use std::error::Error; + use crate::prelude::{ir::*, Block, Var}; use crate::CodeGen::MCInstr; use crate::CodeGen::{compilation::CompilationHelper, MachineInstr}; use crate::IR::{Const, Function, Type, TypeMetadata}; -use super::Triple; +use super::{Triple, WhiteList}; use super::{CallConv, Compiler, Lexer}; /// The TargetBackendDescr is used to store all the functions/information to compile ir nodes into assembly @@ -20,6 +22,8 @@ pub struct TargetBackendDescr { pub(crate) call: CallConv, pub(crate) sink: Vec, + + pub(crate) whitelist: WhiteList, } macro_rules! compile_func { @@ -52,6 +56,7 @@ impl TargetBackendDescr { block: None, call: CallConv::SystemV, helper: None, + whitelist: WhiteList::new(), sink: vec![], } } @@ -94,10 +99,12 @@ impl TargetBackendDescr { } /// Used for lowering machine instructions into dyn MCInstr - pub fn lower(&self, instrs: Vec) -> Vec> { + pub fn lower(&self, instrs: Vec) -> Result>, Box> { if let Some(helper) = &self.helper { + self.whitelist.check_for_forbidden_mnemonics(&instrs)?; + if let Some(lower) = helper.lower { - lower(instrs) + Ok(lower(instrs)) } else { todo!("the target architecture {:?} doesn't support instruction lowering", helper.arch) } diff --git a/src/Target/whitelist.rs b/src/Target/whitelist.rs new file mode 100644 index 00000000..a0b8ccd4 --- /dev/null +++ b/src/Target/whitelist.rs @@ -0,0 +1,81 @@ +use std::{collections::HashMap, error::Error, fmt::Display}; + +use crate::CodeGen::{MachineInstr, MachineMnemonic}; + +/// Stores allowed instructions +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct WhiteList { + instrs: HashMap, +} + +impl WhiteList { + /// Creates a new instruction whitelist + pub fn new() -> Self { + Self { + instrs: HashMap::new() + } + } + + /// Allowes a specifc mnemonic + pub fn allow(&mut self, mnemonic: MachineMnemonic) { + if let Some(option) = self.instrs.get_mut(&mnemonic) { + *option = AllowmentOption::Allowed; + } else { + self.instrs.insert(mnemonic, AllowmentOption::Allowed); + } + } + + /// Forbids a specfic mnemonic + pub fn forbid(&mut self, mnemonic: MachineMnemonic) { + if let Some(option) = self.instrs.get_mut(&mnemonic) { + *option = AllowmentOption::NotAllowed; + } else { + self.instrs.insert(mnemonic, AllowmentOption::NotAllowed); + } + } + + /// Checks if the mnemonic is allowed + pub fn is_allowed(&self, mnemonic: MachineMnemonic) -> AllowmentOption { + if let Some(option) = self.instrs.get(&mnemonic) { + *option + } else { + AllowmentOption::Unknown + } + } + + /// Checks for forbidden mnemonics + pub fn check_for_forbidden_mnemonics(&self, vec: &Vec) -> Result<(), WhiteListError> { + for instr in vec { + if self.is_allowed(instr.mnemonic.clone()) == AllowmentOption::NotAllowed { + Err(WhiteListError::NotAllowed(instr.mnemonic.clone()))? + } + } + + Ok(()) + } +} + +/// how strong allowed the object is +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[allow(missing_docs)] +pub enum AllowmentOption { + Allowed, + NotAllowed, + Unknown, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +#[allow(missing_docs)] +pub enum WhiteListError { + NotAllowed(MachineMnemonic) +} + +impl Display for WhiteListError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", match self { + WhiteListError::NotAllowed(mne) => format!("the instruction {} is not allowed but was suppyled", mne), + }) + } +} + +impl Error for WhiteListError {} \ No newline at end of file diff --git a/src/Target/x64/asm/instr.rs b/src/Target/x64/asm/instr.rs index 0bb23db5..c6da8620 100644 --- a/src/Target/x64/asm/instr.rs +++ b/src/Target/x64/asm/instr.rs @@ -1,6 +1,6 @@ use std::{fmt::Display, ops::{Add, Sub}, str::FromStr}; -use crate::{CodeGen::MCInstr, Obj::Link, Support::{ColorClass, ColorProfile}, Target::{isa::{buildOpcode, MandatoryPrefix, RexPrefix}, x64Reg}}; +use crate::{CodeGen::MCInstr, Obj::Link, Support::{ColorClass, ColorProfile}, Target::{x64::isa::{buildOpcode, MandatoryPrefix, RexPrefix}, x64Reg}}; use super::isa::ModRm; diff --git a/src/Target/x64/asm/mod.rs b/src/Target/x64/asm/mod.rs index 57fa0d77..1f410b80 100644 --- a/src/Target/x64/asm/mod.rs +++ b/src/Target/x64/asm/mod.rs @@ -8,4 +8,4 @@ mod optimizer; pub use lexer::*; pub use parser::*; -pub use optimizer::*; \ No newline at end of file +//pub(crate) use optimizer::*; \ No newline at end of file diff --git a/src/Target/x64/asm/optimizer.rs b/src/Target/x64/asm/optimizer.rs index 65fe2410..3184713d 100644 --- a/src/Target/x64/asm/optimizer.rs +++ b/src/Target/x64/asm/optimizer.rs @@ -1,13 +1,7 @@ -use crate::Target::x64Reg; +use crate::{Optimizations::Optimize, Target::x64Reg}; use super::instr::{X64MCInstr, Mnemonic, Operand}; -/// used for optimizing -pub trait Optimize { - /// optimizes self - fn optimize(&mut self) -> Self; -} - impl Optimize for Vec { fn optimize(&mut self) -> Vec { let mut out: Vec = vec![]; diff --git a/src/Target/x64/mod.rs b/src/Target/x64/mod.rs index f8455de1..8587a6cf 100644 --- a/src/Target/x64/mod.rs +++ b/src/Target/x64/mod.rs @@ -5,7 +5,7 @@ use std::collections::VecDeque; mod compilation; //use compilation::*; -use super::{CallConv, Lexer, TargetBackendDescr}; +use super::{CallConv, Lexer, TargetBackendDescr, WhiteList}; mod reg; use compilation::construct_compilation_helper; pub use reg::*; @@ -16,7 +16,7 @@ mod lower; pub use asm::*; -use crate::Target::Compiler; +use crate::{CodeGen::MachineMnemonic, Target::Compiler}; /// Initializes the x86-64 target pub fn initializeX64Target(call_conv: CallConv) -> TargetBackendDescr { @@ -27,10 +27,31 @@ pub fn initializeX64Target(call_conv: CallConv) -> TargetBackendDescr { target.lexer = Some(x64Lexer {}.boxed()); target.compile = Some(x64Parser { tokens: VecDeque::new(), out: None }.boxed()); + target.whitelist = construct_whitelist(); + target.helper = Some(construct_compilation_helper(call_conv)); target.call = call_conv; target +} + +fn construct_whitelist() -> WhiteList { + let mut whitelist = WhiteList::new(); + + whitelist.allow(MachineMnemonic::Move); + whitelist.allow(MachineMnemonic::Add); + whitelist.allow(MachineMnemonic::And); + whitelist.allow(MachineMnemonic::Div); + whitelist.allow(MachineMnemonic::Mul); + whitelist.allow(MachineMnemonic::Or); + whitelist.allow(MachineMnemonic::Sub); + whitelist.allow(MachineMnemonic::Xor); + whitelist.allow(MachineMnemonic::Return); + + //whitelist.allow(MachineMnemonic::Zext); todo!() + //whitelist.allow(MachineMnemonic::Downcast); todo!() + + whitelist } \ No newline at end of file diff --git a/tests/x64_instruction_encoding.rs b/tests/x64_instruction_encoding.rs index 74550359..80697aa2 100644 --- a/tests/x64_instruction_encoding.rs +++ b/tests/x64_instruction_encoding.rs @@ -1,4 +1,4 @@ -use ygen::{Optimizations::auto_max_optimize, Target::{instr::*, x64Reg}}; +use ygen::{Optimizations::auto_max_optimize, Target::{x64::instr::*, x64Reg}}; #[test] pub fn test_mov() {