From 0e4445fa5e2e1801ccda98cd7d09a1ac80cbcb6a Mon Sep 17 00:00:00 2001 From: Jake Hughes Date: Tue, 21 Nov 2023 14:21:43 +0000 Subject: [PATCH] Remove use of SmartString The SmartString library uses pointer obfuscation tricks internally which hide GC pointers from alloy. We must therefore remove it as it is incompatible with conservative pointer scanning. --- src/lib/compiler/ast_to_instrs.rs | 52 ++++++++++++++----------------- src/lib/vm/core.rs | 28 ++++++++--------- src/lib/vm/error.rs | 8 ++--- src/lib/vm/objects/class.rs | 9 +++--- src/lib/vm/objects/double.rs | 7 ++--- src/lib/vm/objects/string_.rs | 11 +++---- src/lib/vm/val.rs | 9 +++--- src/main.rs | 5 ++- 8 files changed, 58 insertions(+), 71 deletions(-) diff --git a/src/lib/compiler/ast_to_instrs.rs b/src/lib/compiler/ast_to_instrs.rs index 60ff93ac..b462c587 100644 --- a/src/lib/compiler/ast_to_instrs.rs +++ b/src/lib/compiler/ast_to_instrs.rs @@ -8,7 +8,7 @@ use itertools::Itertools; use lrlex::{DefaultLexerTypes, LRNonStreamingLexer}; use lrpar::{NonStreamingLexer, Span}; use num_bigint::BigInt; -use smartstring::alias::String as SmartString; + use std::gc::{Gc, NonFinalizable}; use crate::{ @@ -31,10 +31,10 @@ const CLOSURE_RETURN_VAR: &str = "$$closurereturn$$"; pub struct Compiler<'a, 'input> { lexer: &'a LRNonStreamingLexer<'a, 'input, DefaultLexerTypes>, path: &'a Path, - upvars_stack: Vec>, + upvars_stack: Vec>, /// The stack of local variables at the current point of evaluation. Maps variable names to /// `(captured, var idx)`. - locals_stack: Vec>, + locals_stack: Vec>, blocks_stack: Vec>>, /// Since SOM's "^" operator returns from the enclosed method, we need to track whether we are /// in a closure -- and, if so, how many nested closures we are inside at the current point of @@ -58,7 +58,7 @@ impl<'a, 'input> Compiler<'a, 'input> { lexer: &LRNonStreamingLexer>, path: &Path, astcls: &ast::Class, - ) -> Result<(SmartString, Val), String> { + ) -> Result<(String, Val), String> { let mut compiler = Compiler { lexer, path, @@ -68,7 +68,7 @@ impl<'a, 'input> Compiler<'a, 'input> { closure_depth: 0, }; - let name = SmartString::from(lexer.span_str(astcls.name)); + let name = String::from(lexer.span_str(astcls.name)); let (supercls_val, supercls_meta_val) = if name.as_str() != "Object" { let supercls_val = if let Some(n) = astcls.supername.map(|x| lexer.span_str(x)) { match vm.load_class(n) { @@ -159,7 +159,7 @@ impl<'a, 'input> Compiler<'a, 'input> { &mut self, vm: &mut VM, lexer: &'a LRNonStreamingLexer>, - name: SmartString, + name: String, supercls_val: Val, ast_inst_vars: &[Span], ast_methods: &[ast::Method], @@ -190,7 +190,7 @@ impl<'a, 'input> Compiler<'a, 'input> { }; for var in ast_inst_vars { let vars_len = inst_vars.len(); - let n = SmartString::from(lexer.span_str(*var)); + let n = String::from(lexer.span_str(*var)); match inst_vars.entry(n) { hash_map::Entry::Occupied(_) => { return Err(vec![( @@ -233,7 +233,7 @@ impl<'a, 'input> Compiler<'a, 'input> { let inst_vars_map = self.locals_stack[0] .iter() .map(|(k, v)| (k.clone(), v.1)) - .collect::>(); + .collect::>(); self.locals_stack.pop(); if !errs.is_empty() { return Err(errs); @@ -257,27 +257,21 @@ impl<'a, 'input> Compiler<'a, 'input> { Ok(cls_val) } - fn c_method( - &mut self, - vm: &mut VM, - astmeth: &ast::Method, - ) -> CompileResult<(SmartString, Val)> { + fn c_method(&mut self, vm: &mut VM, astmeth: &ast::Method) -> CompileResult<(String, Val)> { let (name, args) = match astmeth.name { ast::MethodName::BinaryOp(op, arg) => { let arg_v = match arg { Some(l) => vec![l], None => vec![], }; - ((op, SmartString::from(self.lexer.span_str(op))), arg_v) - } - ast::MethodName::Id(span) => { - ((span, SmartString::from(self.lexer.span_str(span))), vec![]) + ((op, String::from(self.lexer.span_str(op))), arg_v) } + ast::MethodName::Id(span) => ((span, String::from(self.lexer.span_str(span))), vec![]), ast::MethodName::Keywords(ref pairs) => { let name = pairs .iter() .map(|x| self.lexer.span_str(x.0)) - .collect::(); + .collect::(); let args = pairs.iter().map(|x| x.1).collect::>(); ((pairs[0].0, name), args) } @@ -444,10 +438,10 @@ impl<'a, 'input> Compiler<'a, 'input> { ) -> CompileResult<(usize, Vec>, Vec)> { self.upvars_stack.push(Vec::new()); let mut vars = HashMap::new(); - vars.insert(SmartString::from("self"), (false, 0)); + vars.insert(String::from("self"), (false, 0)); let mut process_var = |var_sp: Span| { let vars_len = vars.len(); - let var_str = SmartString::from(self.lexer.span_str(var_sp)); + let var_str = String::from(self.lexer.span_str(var_sp)); match vars.entry(var_str.clone()) { hash_map::Entry::Occupied(_) => Err(vec![( var_sp, @@ -542,7 +536,7 @@ impl<'a, 'input> Compiler<'a, 'input> { ast::Expr::BinaryMsg { span, lhs, op, rhs } => { let mut stack_size = self.c_expr(vm, lhs)?; stack_size = max(stack_size, 1 + self.c_expr(vm, rhs)?); - let send_off = vm.add_send((SmartString::from(self.lexer.span_str(*op)), 1)); + let send_off = vm.add_send((String::from(self.lexer.span_str(*op)), 1)); let instr = match lhs { box ast::Expr::UnaryMsg { receiver: box ast::Expr::VarLookup(span2), @@ -640,7 +634,7 @@ impl<'a, 'input> Compiler<'a, 'input> { msglist, } => { let mut max_stack = self.c_expr(vm, receiver)?; - let mut mn = SmartString::new(); + let mut mn = String::new(); for (i, (kw, expr)) in msglist.iter().enumerate() { mn.push_str(self.lexer.span_str(*kw)); let expr_stack = self.c_expr(vm, expr)?; @@ -668,7 +662,7 @@ impl<'a, 'input> Compiler<'a, 'input> { } => { let max_stack = self.c_expr(vm, receiver)?; for (i, id) in ids.iter().enumerate() { - let send_off = vm.add_send((SmartString::from(self.lexer.span_str(*id)), 0)); + let send_off = vm.add_send((String::from(self.lexer.span_str(*id)), 0)); let instr = match receiver { box ast::Expr::VarLookup(span) if i == 0 && self.lexer.span_str(*span) == "super" => @@ -697,7 +691,7 @@ impl<'a, 'input> Compiler<'a, 'input> { // "self" as an upvalue. let locals_stack_len = self.locals_stack[1].len(); self.locals_stack[1] - .entry(SmartString::from(CLOSURE_RETURN_VAR)) + .entry(String::from(CLOSURE_RETURN_VAR)) .or_insert((false, locals_stack_len)); match self.find_var_name(CLOSURE_RETURN_VAR) { VarLookup::UpVar(upidx) => { @@ -711,7 +705,7 @@ impl<'a, 'input> Compiler<'a, 'input> { } ast::Expr::String(span) => { let s_orig = self.lexer.span_str(*span); - let mut new_s = SmartString::new(); + let mut new_s = String::new(); // Start by ignoring the beginning quote. let mut i = '\"'.len_utf8(); // End by ignoring the beginning quote. @@ -752,13 +746,13 @@ impl<'a, 'input> Compiler<'a, 'input> { // XXX are there string escaping rules we need to take account of? let s_orig = self.lexer.span_str(*span); // Strip off the beginning/end quotes. - let s = SmartString::from(&s_orig[1..s_orig.len() - 1]); + let s = String::from(&s_orig[1..s_orig.len() - 1]); let instr = Instr::Symbol(vm.add_symbol(s)); vm.instrs_push(instr, *span); Ok(1) } ast::Expr::Symbol(span) => { - let s = SmartString::from(self.lexer.span_str(*span)); + let s = String::from(self.lexer.span_str(*span)); let instr = Instr::Symbol(vm.add_symbol(s)); vm.instrs_push(instr, *span); Ok(1) @@ -814,7 +808,7 @@ impl<'a, 'input> Compiler<'a, 'input> { e.1 }; self.upvars_stack[depth + 1].push(( - SmartString::from(name), + String::from(name), UpVarDef { capture_local: true, upidx, @@ -828,7 +822,7 @@ impl<'a, 'input> Compiler<'a, 'input> { let mut upidx = i; for j in depth + 1..self.upvars_stack.len() { self.upvars_stack[j].push(( - SmartString::from(name), + String::from(name), UpVarDef { capture_local: false, upidx, diff --git a/src/lib/vm/core.rs b/src/lib/vm/core.rs index 54c532e1..c2c406eb 100644 --- a/src/lib/vm/core.rs +++ b/src/lib/vm/core.rs @@ -13,7 +13,7 @@ use std::{ use lrpar::Span; use num_bigint::BigInt; -use smartstring::alias::String as SmartString; + use static_assertions::const_assert; use std::gc::Gc; @@ -78,24 +78,24 @@ pub struct VM { /// The current known set of globals including those not yet assigned to: in other words, it is /// expected that some entries of this `Vec` are illegal (i.e. created by `Val::illegal`). globals: Vec, - reverse_globals: HashMap, + reverse_globals: HashMap, inline_caches: Vec)>>, /// `instrs` and `instr_span`s are always the same length: they are separated only because we /// rarely access `instr_spans`. instrs: Vec, instr_spans: Vec, - sends: Vec<(Gc, usize)>, + sends: Vec<(Gc, usize)>, /// reverse_sends is an optimisation allowing us to reuse sends: it maps a send `(String, /// usize)` to a `usize` where the latter represents the index of the send in `sends`. - reverse_sends: HashMap<(Gc, usize), usize>, + reverse_sends: HashMap<(Gc, usize), usize>, stack: Gc, arbints: Vec, strings: Vec, /// reverse_strings is an optimisation allowing us to reuse strings: it maps a `String to a /// `usize` where the latter represents the index of the string in `strings`. - reverse_strings: HashMap, + reverse_strings: HashMap, symbols: Vec, - reverse_symbols: HashMap, + reverse_symbols: HashMap, time_at_start: Instant, open_upvars: Option>, } @@ -850,7 +850,7 @@ impl VM { Err(_) => { return SendReturn::Err(VMError::new( self, - VMErrorKind::InvalidInteger(SmartString::from(s)), + VMErrorKind::InvalidInteger(String::from(s)), )) } }, @@ -861,7 +861,7 @@ impl VM { Err(_) => { return SendReturn::Err(VMError::new( self, - VMErrorKind::InvalidDouble(SmartString::from(s)), + VMErrorKind::InvalidDouble(String::from(s)), )) } } @@ -1061,7 +1061,7 @@ impl VM { let path = PathBuf::from(stry!(path_val.downcast::(self)).as_str()); match read_to_string(path) { Ok(s) => { - let v = String_::new_str(self, SmartString::from(s)); + let v = String_::new_str(self, String::from(s)); self.stack.push(v); } Err(_) => self.stack.push(self.nil), @@ -1408,7 +1408,7 @@ impl VM { /// Add the send `send` to the VM, returning its index. Note that sends are reused, so indexes /// are also reused. - pub fn add_send(&mut self, send: (SmartString, usize)) -> usize { + pub fn add_send(&mut self, send: (String, usize)) -> usize { // We want to avoid `clone`ing `send` in the (hopefully common) case of a cache hit, hence // this slightly laborious dance and double-lookup. let send = (Gc::new(send.0), send.1); @@ -1432,7 +1432,7 @@ impl VM { /// Add the string `s` to the VM, returning its index. Note that strings are reused, so indexes /// are also reused. - pub fn add_string(&mut self, s: SmartString) -> usize { + pub fn add_string(&mut self, s: String) -> usize { // We want to avoid `clone`ing `s` in the (hopefully common) case of a cache hit, hence // this slightly laborious dance and double-lookup. if let Some(i) = self.reverse_strings.get(&s) { @@ -1448,7 +1448,7 @@ impl VM { /// Add the symbol `s` to the VM, returning its index. Note that symbols are reused, so indexes /// are also reused. - pub fn add_symbol(&mut self, s: SmartString) -> usize { + pub fn add_symbol(&mut self, s: String) -> usize { // We want to avoid `clone`ing `s` in the (hopefully common) case of a cache hit, hence // this slightly laborious dance and double-lookup. if let Some(i) = self.reverse_symbols.get(&s) { @@ -1469,7 +1469,7 @@ impl VM { *i } else { let len = self.globals.len(); - self.reverse_globals.insert(SmartString::from(s), len); + self.reverse_globals.insert(String::from(s), len); self.globals.push(Val::illegal()); len } @@ -1516,7 +1516,7 @@ impl VM { self.globals[*i] = v; } else { self.reverse_globals - .insert(SmartString::from(name), self.globals.len()); + .insert(String::from(name), self.globals.len()); self.globals.push(v); } } diff --git a/src/lib/vm/error.rs b/src/lib/vm/error.rs index df78825e..78b26caf 100644 --- a/src/lib/vm/error.rs +++ b/src/lib/vm/error.rs @@ -1,7 +1,7 @@ use std::{fs::read_to_string, io::stderr}; use lrpar::Span; -use smartstring::alias::String as SmartString; + use std::gc::Gc; use termion::{is_tty, style}; @@ -163,9 +163,9 @@ pub enum VMErrorKind { got_cls: Val, }, /// Tried to convert an invalid string to a Double. - InvalidDouble(SmartString), + InvalidDouble(String), /// Tried to convert an invalid string to an Integer. - InvalidInteger(SmartString), + InvalidInteger(String), /// Tried to access a global before it being initialised. InvalidSymbol, /// Tried to do a shl or shr with a value below zero. @@ -182,7 +182,7 @@ pub enum VMErrorKind { /// Tried to do a shl that would overflow memory and/or not fit in the required integer size. ShiftTooBig, /// An unknown global. - UnknownGlobal(SmartString), + UnknownGlobal(String), /// An unknown method. UnknownMethod, /// Tried calling a method with the wrong number of arguments. diff --git a/src/lib/vm/objects/class.rs b/src/lib/vm/objects/class.rs index e3ce03c3..f44e74f8 100644 --- a/src/lib/vm/objects/class.rs +++ b/src/lib/vm/objects/class.rs @@ -8,7 +8,6 @@ use std::{ str, }; -use smartstring::alias::String as SmartString; use std::gc::{Gc, NonFinalizable}; use crate::vm::{ @@ -26,14 +25,14 @@ pub struct Class { /// Offset to this class's instructions in VM::instrs. pub instrs_off: usize, supercls: Cell, - pub inst_vars_map: NonFinalizable>, + pub inst_vars_map: NonFinalizable>, /// A SOM Array of methods (though note that it is *not* guaranteed that these definitely point /// to SOM `Method` instances -- anything can be stored in this array!). methods: Val, /// A map from method names to indexes into the methods SOM Array. Note that indexes are stored /// with SOM indexing (starting from 1). We guarantee that the indexes are valid indexes for /// the `methods` array. - methods_map: NonFinalizable>, + methods_map: NonFinalizable>, inst_vars: UnsafeCell>, } @@ -94,9 +93,9 @@ impl Class { path: PathBuf, instrs_off: usize, supercls: Val, - inst_vars_map: NonFinalizable>, + inst_vars_map: NonFinalizable>, methods: Val, - methods_map: NonFinalizable>, + methods_map: NonFinalizable>, ) -> Val { #[cfg(debug_assertions)] { diff --git a/src/lib/vm/objects/double.rs b/src/lib/vm/objects/double.rs index 75b24d0d..0ff06ff9 100644 --- a/src/lib/vm/objects/double.rs +++ b/src/lib/vm/objects/double.rs @@ -4,7 +4,7 @@ use std::{collections::hash_map::DefaultHasher, hash::Hasher}; use num_bigint::BigInt; use num_traits::{FromPrimitive, ToPrimitive, Zero}; -use smartstring::alias::String as SmartString; + use std::gc::Gc; use crate::vm::{ @@ -33,10 +33,7 @@ impl Obj for Double { fn to_strval(self: Gc, vm: &mut VM) -> Result> { let mut buf = ryu::Buffer::new(); - Ok(String_::new_str( - vm, - SmartString::from(buf.format(self.val)), - )) + Ok(String_::new_str(vm, String::from(buf.format(self.val)))) } fn hashcode(self: Gc) -> u64 { diff --git a/src/lib/vm/objects/string_.rs b/src/lib/vm/objects/string_.rs index e39b2d80..a2d58f24 100644 --- a/src/lib/vm/objects/string_.rs +++ b/src/lib/vm/objects/string_.rs @@ -7,7 +7,6 @@ use std::{ str, }; -use smartstring::alias::String as SmartString; use std::gc::Gc; use crate::vm::{ @@ -26,7 +25,7 @@ pub struct String_ { /// that if anyone ever manages to make a string of usize::MAX characters, we won't cache its /// length correctly, but will recalculate it each time. chars_len: Cell, - s: SmartString, + s: String, } // This is safe because there is no non-GC'd shared ownership inside `String_`. @@ -99,13 +98,13 @@ impl StaticObjType for String_ { } impl String_ { - pub fn new_str(vm: &mut VM, s: SmartString) -> Val { + pub fn new_str(vm: &mut VM, s: String) -> Val { String_::new_str_chars_len(vm, s, usize::MAX) } /// Create a new `String_` whose number of Unicode characters is already known. Note that it is /// safe to pass `usize::MAX` for `chars_len`. - fn new_str_chars_len(vm: &mut VM, s: SmartString, chars_len: usize) -> Val { + fn new_str_chars_len(vm: &mut VM, s: String, chars_len: usize) -> Val { Val::from_obj(String_ { cls: Cell::new(vm.str_cls), chars_len: Cell::new(chars_len), @@ -113,7 +112,7 @@ impl String_ { }) } - pub fn new_sym(vm: &mut VM, s: SmartString) -> Val { + pub fn new_sym(vm: &mut VM, s: String) -> Val { Val::from_obj(String_ { cls: Cell::new(vm.sym_cls), chars_len: Cell::new(usize::MAX), @@ -170,7 +169,7 @@ impl String_ { todo!(); } if end < start { - return Ok(String_::new_str(vm, SmartString::new())); + return Ok(String_::new_str(vm, String::new())); } if start > self.s.len() || end > self.s.len() { todo!(); diff --git a/src/lib/vm/val.rs b/src/lib/vm/val.rs index 20ad472f..f933456f 100644 --- a/src/lib/vm/val.rs +++ b/src/lib/vm/val.rs @@ -662,7 +662,6 @@ mod tests { }; use serial_test::serial; - use smartstring::alias::String as SmartString; #[test] #[serial] @@ -733,7 +732,7 @@ mod tests { assert_eq!(v.as_usize(&mut vm).unwrap(), 1 << (BITSIZE - 2)); assert_eq!(v.as_isize(&mut vm).unwrap(), 1 << (BITSIZE - 2)); - let v = String_::new_str(&mut vm, SmartString::new()); + let v = String_::new_str(&mut vm, String::new()); assert!(v.as_usize(&mut vm).is_err()); } @@ -743,7 +742,7 @@ mod tests { let mut vm = VM::new_no_bootstrap(); let v = { - let v = String_::new_str(&mut vm, SmartString::from("s")); + let v = String_::new_str(&mut vm, String::from("s")); let v_tobj = v.tobj(&mut vm).unwrap(); let v_str: Gc = v_tobj.downcast().unwrap(); let v_recovered = Val::recover(v_str); @@ -759,7 +758,7 @@ mod tests { #[serial] fn test_cast() { let mut vm = VM::new_no_bootstrap(); - let v = String_::new_str(&mut vm, SmartString::from("s")); + let v = String_::new_str(&mut vm, String::from("s")); assert!(v.downcast::(&mut vm).is_ok()); assert_eq!( v.downcast::(&mut vm).unwrap_err().kind, @@ -774,7 +773,7 @@ mod tests { #[serial] fn test_downcast() { let mut vm = VM::new_no_bootstrap(); - let v = String_::new_str(&mut vm, SmartString::from("s")); + let v = String_::new_str(&mut vm, String::from("s")); assert!(v.downcast::(&mut vm).is_ok()); assert!(v.downcast::(&mut vm).is_err()); assert!(v.try_downcast::(&mut vm).is_some()); diff --git a/src/main.rs b/src/main.rs index 1987cf03..ca8345af 100644 --- a/src/main.rs +++ b/src/main.rs @@ -12,7 +12,6 @@ use std::{ }; use getopts::Options; -use smartstring::alias::String as SmartString; use yksom::vm::{ objects::{NormalArray, String_}, @@ -99,14 +98,14 @@ fn main() { }; let mut src_fname = PathBuf::from(src_fname); src_fname.set_extension(""); - let src_fname_val = String_::new_str(&mut vm, SmartString::from(src_fname.to_str().unwrap())); + let src_fname_val = String_::new_str(&mut vm, String::from(src_fname.to_str().unwrap())); let mut args_vec = vec![src_fname_val]; args_vec.extend( matches .free .iter() .skip(1) - .map(|x| String_::new_str(&mut vm, SmartString::from(x))), + .map(|x| String_::new_str(&mut vm, String::from(x))), ); let args = NormalArray::from_vec(args_vec); match vm.top_level_send(system, "initialize:", vec![args]) {