From 62871d97dd63ac934f81ca5ab55feecd23ce124b Mon Sep 17 00:00:00 2001 From: Cr0a3 Date: Sat, 19 Oct 2024 21:06:54 +0200 Subject: [PATCH] [WASM] starting wasm instruction encoding --- src/Target/compiler.rs | 5 +- src/Target/lexer.rs | 6 +- src/Target/wasm/asm/instr.rs | 197 ++++++++++++++++++++++++++++++++-- src/Target/wasm/asm/lexer.rs | 82 ++++++++++++++ src/Target/wasm/asm/mod.rs | 2 + src/Target/wasm/asm/parser.rs | 130 ++++++++++++++++++++++ src/Target/wasm/mod.rs | 3 + src/Target/x64/asm/lexer.rs | 8 +- src/Target/x64/asm/parser.rs | 14 ++- 9 files changed, 426 insertions(+), 21 deletions(-) create mode 100644 src/Target/wasm/asm/lexer.rs create mode 100644 src/Target/wasm/asm/parser.rs diff --git a/src/Target/compiler.rs b/src/Target/compiler.rs index fcc5034e..c8ead8b6 100644 --- a/src/Target/compiler.rs +++ b/src/Target/compiler.rs @@ -1,13 +1,12 @@ -use std::error::Error; +use std::{any::Any, error::Error}; use crate::Support::ColorProfile; -use super::Token; /// An wrapper trait for assembly compilers pub trait Compiler { /// Creates an new assembly compiler - fn new(&self, tokens: Vec) -> Box; + fn new(&self, tokens: Vec>) -> Box; /// compiles an assembly string into machine code fn parse(&mut self) -> Result<(), Box>; /// Returns the output machine code diff --git a/src/Target/lexer.rs b/src/Target/lexer.rs index 8ccb5d32..98515ff5 100644 --- a/src/Target/lexer.rs +++ b/src/Target/lexer.rs @@ -1,11 +1,9 @@ -use std::error::Error; - -use super::Token; +use std::{any::Any, error::Error}; /// The lexer trait pub trait Lexer { /// lexes the string - fn lex(&self, string: String) -> Result, Box>; + fn lex(&self, string: String) -> Result>, Box>; /// Returns self into a boxed lexer trait fn boxed(&self) -> Box; diff --git a/src/Target/wasm/asm/instr.rs b/src/Target/wasm/asm/instr.rs index 96c68b96..34d79789 100644 --- a/src/Target/wasm/asm/instr.rs +++ b/src/Target/wasm/asm/instr.rs @@ -6,31 +6,114 @@ use crate::CodeGen::MCInstr; #[derive(Debug, Clone, PartialEq, Eq)] pub struct WasmMCInstr { pub(crate) mnemonic: WasmMnemonic, + pub(crate) prefix: Option, pub(crate) op1: Option, } impl WasmMCInstr { /// Creates an wasm instruction without any operands - pub fn with0(mne: WasmMnemonic) -> Self { + pub fn with0(prefix: Option, mne: WasmMnemonic) -> Self { Self { + prefix: prefix, mnemonic: mne, op1: None, } } /// Creates an wasm instruction with 1 operand - pub fn with1(mne: WasmMnemonic, op1: WasmOperand) -> Self { + pub fn with1(prefix: Option, mne: WasmMnemonic, op1: WasmOperand) -> Self { Self { + prefix: prefix, mnemonic: mne, op1: Some(op1), } } pub(crate) fn encode(&self) -> Result<(Vec, Option), Box> { - let mut encoded: Vec = Vec::new(); + let mut encoded; match self.mnemonic { - + WasmMnemonic::Get => { + let op1 = self.op1.expect("...get expects localidx"); + let op1 = match op1 { + WasmOperand::Var(var) => var as u8, + WasmOperand::Const(_) => panic!("...set expects localidx"), + }; + + let op = match self.prefix.expect("...get expects an prefix") { + WasmPrefix::Local => 0x20, + WasmPrefix::Global => 0x23, + _ => panic!("...get expects either local or global as its prefix") + }; + + encoded = vec![op, op1] + }, + WasmMnemonic::Set => { + let op1 = self.op1.expect("...set expects localidx"); + let op1 = match op1 { + WasmOperand::Var(var) => var as u8, + WasmOperand::Const(_) => panic!("...set expects localidx"), + }; + + let op = match self.prefix.expect("...set expects an prefix") { + WasmPrefix::Local => 0x21, + WasmPrefix::Global => 0x24, + _ => panic!("...set expects either local or global as its prefix") + }; + + encoded = vec![op, op1] + }, + WasmMnemonic::Const => { + let op1 = self.op1.expect("...const expects a imm op"); + let op1 = match op1 { + WasmOperand::Const(imm) => imm, + _ => panic!("...const expects a imm op"), + }; + + let op = match self.prefix.expect("...const expects an prefix") { + WasmPrefix::i32 => 0x41, + WasmPrefix::i64 => 0x42, + WasmPrefix::f32 => 0x43, + WasmPrefix::f64 => 0x44, + _ => panic!("...const must only have either i32, i64, f32 or f64 as its prefix") + }; + + let mut bytes = match self.prefix.expect("...const expects an prefix") { + WasmPrefix::i32 => (op1 as i32).to_le_bytes().to_vec(), + WasmPrefix::i64 => (op1 as i64).to_le_bytes().to_vec(), + WasmPrefix::f32 => (op1 as f32).to_bits().to_le_bytes().to_vec(), + WasmPrefix::f64 => (op1 as f64).to_bits().to_le_bytes().to_vec(), + _ => unreachable!(), + }; + + while let Some(&last) = bytes.last() { + if last == 0 { + bytes.pop(); + } else { + break; + } + } + + encoded = vec![op]; + + encoded.extend_from_slice(&bytes); + + if bytes.len() == 0 { + encoded.push(0); + } + + match bytes.len() { + 1 => encoded.push(0x01), + 2 => encoded.push(0x03), + 3 => encoded.push(0x07), + 4 => encoded.push(0x0f), + 5 => encoded.push(0x1f), + 6 => encoded.push(0x3f), + 7 => encoded.push(0x00), + 8 => encoded.push(0x0f), + _ => {}, + } + } } Ok((encoded, None)) @@ -38,21 +121,121 @@ impl WasmMCInstr { } } -/// A webassembly mnemonic +/// A webassembly mnemonic (prefix.mnemonic) #[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[allow(missing_docs)] pub enum WasmMnemonic { + Get, + Set, + Const, +} +impl From for WasmMnemonic { + fn from(value: String) -> Self { + match value.as_str() { + "get" => WasmMnemonic::Get, + "set" => WasmMnemonic::Set, + "const" => WasmMnemonic::Const, + _ => panic!("unkown wasm mnemonic: {value}"), + } + } } -/// A webassembly operand +impl Display for WasmMnemonic { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", match self { + WasmMnemonic::Get => "get", + WasmMnemonic::Set => "set", + WasmMnemonic::Const => "const", + }) + } +} + +/// A webassembly prefix (prefix.mnemonic) #[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[allow(missing_docs)] +pub enum WasmPrefix { + Local, + Global, + + i32, + i64, + f32, + f64 +} + +impl From for WasmPrefix { + fn from(value: String) -> Self { + match value.as_str() { + "local" => WasmPrefix::Local, + "global" => WasmPrefix::Global, + "i32" => WasmPrefix::i32, + "i64" => WasmPrefix::i64, + "f32" => WasmPrefix::f32, + "f64" => WasmPrefix::f64, + + _ => panic!("unkown wasm prefix: {value}"), + } + } +} + +impl Display for WasmPrefix { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", match self { + WasmPrefix::Local => "local", + WasmPrefix::Global => "global", + WasmPrefix::i32 => "i32", + WasmPrefix::i64 => "i64", + WasmPrefix::f32 => "f32", + WasmPrefix::f64 => "f64", + }) + } +} + +/// A webassembly operand +#[derive(Debug, Clone, Copy)] +#[allow(missing_docs)] pub enum WasmOperand { + Var(i32), + Const(f64), +} + +impl PartialEq for WasmOperand { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Self::Var(l0), Self::Var(r0)) => l0 == r0, + (Self::Const(l0), Self::Const(r0)) => l0 == r0, + _ => false, + } + } +} +impl Eq for WasmOperand {} + +impl Display for WasmOperand { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", match self { + WasmOperand::Var(var) => format!("{}", var), + WasmOperand::Const(imm) => format!("{:.5}", imm), + }) + } } impl Display for WasmMCInstr { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let fmt = String::new(); + let mut fmt = String::new(); + + if let Some(prefix) = self.prefix { + fmt.push_str(&prefix.to_string()); + fmt.push('.'); + } + + fmt.push_str(&self.mnemonic.to_string()); + + if let Some(op1) = self.op1 { + fmt.push(' '); + fmt.push_str(&op1.to_string()); + } write!(f, "{}", fmt) } diff --git a/src/Target/wasm/asm/lexer.rs b/src/Target/wasm/asm/lexer.rs new file mode 100644 index 00000000..63703a78 --- /dev/null +++ b/src/Target/wasm/asm/lexer.rs @@ -0,0 +1,82 @@ +use std::{any::Any, error::Error, fmt::Display, num::ParseIntError}; + +use logos::Logos; + +use crate::Target::Lexer; + +/// An error which can occure during lexing +#[derive(Default, Debug, Clone, PartialEq)] +pub(crate) enum LexingError { + /// An invalid intenger + InvalidInteger(String), + #[default] + /// A not supported character + NonAsciiCharacter, +} + +impl Display for LexingError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "error") + } +} +impl From> for LexingError { + fn from(_err: Box) -> Self { + LexingError::InvalidInteger(format!("error")) + } +} + +impl std::error::Error for LexingError {} + +impl From for LexingError { + fn from(err: ParseIntError) -> Self { + LexingError::InvalidInteger(format!("{:?}", err.kind())) + } +} + +/// An assembly token +#[derive(Logos, Debug, Clone)] +#[logos(skip r"[ \t\n\f]+")] +#[logos(error = LexingError)] +#[doc(hidden)] +pub(crate) enum Token { + #[regex("[a-zA-Z][a-zA-Z0-9_]*", priority = 5, callback = |lex| lex.slice().to_string())] + Ident(String), + + #[regex(r"-?[0-9]+(\.[0-9]+)?", |lex| lex.slice().parse::().unwrap())] + Num(f64), + + #[token(".")] + Dot, +} + +impl PartialEq for Token { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Self::Ident(l0), Self::Ident(r0)) => l0 == r0, + (Self::Num(l0), Self::Num(r0)) => l0 == r0, + _ => core::mem::discriminant(self) == core::mem::discriminant(other), + } + } +} + +impl Eq for Token {} + +/// A temporary structure which implements the Lexer trait +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) struct wasmLexer {} + +impl Lexer for wasmLexer { + fn lex(&self, string: String) -> Result>, Box> { + let mut tokens: Vec> = vec![]; + + for tok in Token::lexer(&string) { + tokens.push( Box::new(tok?) ); + } + + Ok(tokens) + } + + fn boxed(&self) -> Box { + Box::from( self.clone() ) + } +} \ No newline at end of file diff --git a/src/Target/wasm/asm/mod.rs b/src/Target/wasm/asm/mod.rs index 8a779326..56e9737a 100644 --- a/src/Target/wasm/asm/mod.rs +++ b/src/Target/wasm/asm/mod.rs @@ -1,3 +1,5 @@ mod instr; mod opt; +pub(crate) mod lexer; +pub(crate) mod parser; pub use instr::*; \ No newline at end of file diff --git a/src/Target/wasm/asm/parser.rs b/src/Target/wasm/asm/parser.rs new file mode 100644 index 00000000..8be1f8ed --- /dev/null +++ b/src/Target/wasm/asm/parser.rs @@ -0,0 +1,130 @@ +use std::{any::Any, collections::VecDeque, error::Error, fmt::Display}; + +use crate::{Support::ColorProfile, Target::Compiler}; + +use super::{instr::*, lexer::Token}; + +/// The parser for parsing wasn assembly instructions +#[derive(Debug, Clone, PartialEq, Eq)] +pub(crate) struct wasmParser { + pub(crate) tokens: VecDeque, + /// The output instruction + pub out: Option, +} + +impl wasmParser { + /// Creates an new x64 assembly parser + pub(crate) fn new(tokens: Vec) -> Self { + Self { + tokens: tokens.into(), + out: None, + } + } + + /// parses the tokens (output will be saved in `self.out`) + pub(crate) fn parse(&mut self) -> Result<(), Box> { + println!("{:?}", self.tokens); + + let mut instr_string = String::new(); + let prefix_string = if let Some(Token::Ident(prefix)) = self.tokens.front() { + instr_string = prefix.to_owned(); + Some(prefix.clone()) + } else { + Err(ParsingError::FirstTokenNeedsToBeIdent)? + }; + + let mut prefix = None; + + self.tokens.pop_front(); + + if let Some(Token::Dot) = self.tokens.front() { + prefix = Some(prefix_string.unwrap().into()); + + self.tokens.pop_front(); + + instr_string = if let Some(Token::Ident(instr)) = self.tokens.front() { + instr.to_owned() + } else { + Err(ParsingError::MnemonicNeedsToBeIdent)? + }; + + self.tokens.pop_front(); + } + + let instr = instr_string.into(); + + if let Some(Token::Num(num)) = self.tokens.front() { + let num = *num; + self.tokens.pop_front(); + + let mut op = WasmOperand::Var(num as i32); + + // the op is not a var in some special cases for sepific mnemonics + // for those we now check here + if instr == WasmMnemonic::Const { + // and turn it into a imm + op = WasmOperand::Const(num); + } + + self.out = Some(WasmMCInstr::with1(prefix, instr, op)); + } else { + self.out = Some(WasmMCInstr::with0(prefix, instr)); + } + + Ok(()) + } +} + +impl Compiler for wasmParser { + fn new(&self, tokens: Vec>) -> Box { + let mut casted = Vec::new(); + + for token in tokens { + casted.push( + *token.downcast::().expect("the x64 parser expects that the input tokens are also x64 tokens") + ); + } + + Box::from( wasmParser::new(casted) ) + } + + fn parse(&mut self) -> Result<(), Box> { + self.parse() + } + + fn out(&self) -> Result, Box> { + Ok(self.out.as_ref().unwrap().encode()?.0) + } + + fn boxed(&self) -> Box { + Box::from(self.clone()) + } + + fn coloredOut(&self, _: ColorProfile) -> String { + // not yet supported + self.out.as_ref().unwrap().to_string() + } + + fn printOut(&self) -> String { + self.out.as_ref().unwrap().to_string() + } +} + +/// An error which can occure during parsing +#[derive(Debug, Clone, PartialEq, Eq)] +#[allow(missing_docs)] +pub(crate) enum ParsingError { + FirstTokenNeedsToBeIdent, + MnemonicNeedsToBeIdent +} + +impl Display for ParsingError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", match self { + ParsingError::FirstTokenNeedsToBeIdent => "wasm string should start with an ident", + ParsingError::MnemonicNeedsToBeIdent => "ygen expects that wasm instructions have their mnemonic as idents", + }) + } +} + +impl Error for ParsingError {} \ No newline at end of file diff --git a/src/Target/wasm/mod.rs b/src/Target/wasm/mod.rs index 6f4779dc..f86a7899 100644 --- a/src/Target/wasm/mod.rs +++ b/src/Target/wasm/mod.rs @@ -13,6 +13,9 @@ pub fn initializeWasmTarget(_: CallConv) -> TargetBackendDescr { target.call = CallConv::WasmBasicCAbi; target.init = Some(initializeWasmTarget); + + target.lexer = Some(Box::new( asm::lexer::wasmLexer {} )); + target.compile = Some(Box::new( asm::parser::wasmParser::new(Vec::new()))); let mut compiler = CompilationHelper::new( Arch::Wasm64, diff --git a/src/Target/x64/asm/lexer.rs b/src/Target/x64/asm/lexer.rs index aec4faca..e7f0efb4 100644 --- a/src/Target/x64/asm/lexer.rs +++ b/src/Target/x64/asm/lexer.rs @@ -1,4 +1,4 @@ -use std::{error::Error, fmt::Display, num::ParseIntError}; +use std::{any::Any, error::Error, fmt::Display, num::ParseIntError}; use logos::Logos; @@ -80,11 +80,11 @@ pub enum Token { pub struct x64Lexer {} impl Lexer for x64Lexer { - fn lex(&self, string: String) -> Result, Box> { - let mut tokens = vec![]; + fn lex(&self, string: String) -> Result>, Box> { + let mut tokens: Vec> = vec![]; for tok in Token::lexer(&string) { - tokens.push( tok? ); + tokens.push( Box::new(tok?) ); } Ok(tokens) diff --git a/src/Target/x64/asm/parser.rs b/src/Target/x64/asm/parser.rs index 1dab6d69..bd48a3ca 100644 --- a/src/Target/x64/asm/parser.rs +++ b/src/Target/x64/asm/parser.rs @@ -1,4 +1,4 @@ -use std::{collections::VecDeque, error::Error, fmt::Display, str::FromStr}; +use std::{any::Any, collections::VecDeque, error::Error, fmt::Display, str::FromStr}; use crate::{Support::ColorProfile, Target::{x64Reg, Compiler}}; @@ -154,8 +154,16 @@ impl x64Parser { } impl Compiler for x64Parser { - fn new(&self, tokens: Vec) -> Box { - Box::from( x64Parser::new(tokens) ) + fn new(&self, tokens: Vec>) -> Box { + let mut casted = Vec::new(); + + for token in tokens { + casted.push( + *token.downcast::().expect("the x64 parser expects that the input tokens are also x64 tokens") + ); + } + + Box::from( x64Parser::new(casted) ) } fn parse(&mut self) -> Result<(), Box> {