From ee0d914124ff6609169a4ae276095a3ac9365dab Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Thu, 26 Sep 2024 15:17:20 +0200 Subject: [PATCH 1/3] Detect infinite recursions in `nickel doc` This commit fixes an issue where legit recursive configurations cause infinite recursion when trying to extract their documentation. Indeed, `nickel doc` calls to `eval_record_spine`, which is unable to detect loops that span multiple evaluation step (an evaluation step is evaluating one field to a weak head normal form). This commit changes `eval_record_spine` and related methods to lock (in practice just flip a bit in the thunk's state) the active thunks, that is the thunks of parents of the field that we are currently evaluating. If we ever come across a thunk that is already locked, the field is left unevaluated, avoiding infinite recursion. --- cli/tests/snapshot/inputs/docs/recursive.ncl | 11 ++ .../inputs/docs/recursive_false_positive.ncl | 12 +++ .../snapshot__doc_stdout_recursive.ncl.snap | 13 +++ ...c_stdout_recursive_false_positive.ncl.snap | 19 ++++ core/src/eval/cache/lazy.rs | 89 ++++++++++++--- core/src/program.rs | 101 ++++++++++++++---- core/src/term/mod.rs | 9 ++ 7 files changed, 216 insertions(+), 38 deletions(-) create mode 100644 cli/tests/snapshot/inputs/docs/recursive.ncl create mode 100644 cli/tests/snapshot/inputs/docs/recursive_false_positive.ncl create mode 100644 cli/tests/snapshot/snapshots/snapshot__doc_stdout_recursive.ncl.snap create mode 100644 cli/tests/snapshot/snapshots/snapshot__doc_stdout_recursive_false_positive.ncl.snap diff --git a/cli/tests/snapshot/inputs/docs/recursive.ncl b/cli/tests/snapshot/inputs/docs/recursive.ncl new file mode 100644 index 0000000000..7050b8e3d2 --- /dev/null +++ b/cli/tests/snapshot/inputs/docs/recursive.ncl @@ -0,0 +1,11 @@ +# capture = 'stdout' +# command = ['doc', '--stdout'] + +# Regression test for https://github.com/tweag/nickel/issues/1967 (`nickel doc` +# shouldn't overflow the stack on recursive data) +{ + Recursive = { + foo | Number, + bar | Recursive | optional + } +} diff --git a/cli/tests/snapshot/inputs/docs/recursive_false_positive.ncl b/cli/tests/snapshot/inputs/docs/recursive_false_positive.ncl new file mode 100644 index 0000000000..7d102aa21f --- /dev/null +++ b/cli/tests/snapshot/inputs/docs/recursive_false_positive.ncl @@ -0,0 +1,12 @@ +# capture = 'stdout' +# command = ['doc', '--stdout'] + +# Companion test for the `recursive` regression test +# Check that the infinite recursion detection of `nickel doc` doesn't have +# obvious false positive +{ + outer = { + mid = { inner | doc "this is inner" }, + z | doc "this is z" = outer.mid, + } +} diff --git a/cli/tests/snapshot/snapshots/snapshot__doc_stdout_recursive.ncl.snap b/cli/tests/snapshot/snapshots/snapshot__doc_stdout_recursive.ncl.snap new file mode 100644 index 0000000000..bb1875be5d --- /dev/null +++ b/cli/tests/snapshot/snapshots/snapshot__doc_stdout_recursive.ncl.snap @@ -0,0 +1,13 @@ +--- +source: cli/tests/snapshot/main.rs +expression: out +--- +# `Recursive` + +## `bar` + +- `bar | Recursive` + +## `foo` + +- `foo | Number` diff --git a/cli/tests/snapshot/snapshots/snapshot__doc_stdout_recursive_false_positive.ncl.snap b/cli/tests/snapshot/snapshots/snapshot__doc_stdout_recursive_false_positive.ncl.snap new file mode 100644 index 0000000000..b0e3ec12fc --- /dev/null +++ b/cli/tests/snapshot/snapshots/snapshot__doc_stdout_recursive_false_positive.ncl.snap @@ -0,0 +1,19 @@ +--- +source: cli/tests/snapshot/main.rs +expression: out +--- +# `outer` + +## `mid` + +### `inner` + +this is inner + +## `z` + +this is z + +### `inner` + +this is inner diff --git a/core/src/eval/cache/lazy.rs b/core/src/eval/cache/lazy.rs index d70a8f3128..111bdb953f 100644 --- a/core/src/eval/cache/lazy.rs +++ b/core/src/eval/cache/lazy.rs @@ -19,9 +19,10 @@ use std::rc::{Rc, Weak}; /// overflowing the stack or looping forever. Finally, once the content of a thunk has been /// evaluated, the thunk is updated with the new value and flagged as evaluated, so that future /// accesses won't even push an update frame on the stack. -#[derive(Copy, Clone, Eq, PartialEq, Debug)] +#[derive(Copy, Clone, Eq, PartialEq, Debug, Default)] pub enum ThunkState { Blackholed, + #[default] Suspended, Evaluated, } @@ -31,6 +32,8 @@ pub enum ThunkState { pub struct ThunkData { inner: InnerThunkData, state: ThunkState, + /// A flag indicating whether the thunk is locked. See [Thunk::lock]. + locked: bool, } /// The part of [ThunkData] responsible for storing the closure itself. It can either be: @@ -122,6 +125,7 @@ impl ThunkData { ThunkData { inner: InnerThunkData::Standard(closure), state: ThunkState::Suspended, + locked: false, } } @@ -134,6 +138,7 @@ impl ThunkData { deps, }, state: ThunkState::Suspended, + locked: false, } } @@ -307,6 +312,8 @@ impl ThunkData { /// /// For revertible thunk data, the result is independent from the original one: any update to /// one of the thunks doesn't affect the other. + /// + /// The new thunk is unlocked, whatever the locking status of the original thunk. pub fn revert(thunk: &Rc>) -> Rc> { match thunk.borrow().inner { InnerThunkData::Standard(_) => Rc::clone(thunk), @@ -319,6 +326,7 @@ impl ThunkData { deps: deps.clone(), }, state: ThunkState::Suspended, + locked: false, })), } } @@ -326,6 +334,8 @@ impl ThunkData { /// Map a function over the content of the thunk to create a new independent thunk. If the /// thunk is revertible, the mapping function is applied on both the original expression and /// the cached expression. + /// + /// The new thunk is unlocked, whatever the locking status of the original thunk. pub fn map(&self, mut f: F) -> Self where F: FnMut(&Closure) -> Closure, @@ -334,6 +344,7 @@ impl ThunkData { InnerThunkData::Standard(ref c) => ThunkData { inner: InnerThunkData::Standard(f(c)), state: self.state, + locked: false, }, InnerThunkData::Revertible { ref orig, @@ -346,6 +357,7 @@ impl ThunkData { deps: deps.clone(), }, state: self.state, + locked: false, }, } } @@ -405,18 +417,49 @@ impl Thunk { } /// Set the state to evaluated. - pub fn set_evaluated(&mut self) { + pub fn set_evaluated(&self) { self.data.borrow_mut().state = ThunkState::Evaluated; } + /// Lock a thunk. This flips a dedicated bit in the thunk's state. Returns `true` if the + /// locking succeeded and the thunk wasn't previous locked, or `false` if the thunk was already + /// in a locked state. + /// + /// Locking is used to prevent infinite loops for some step-by-step variants of deep evaluation + /// such as `%deep_seq%`, `%force%` or [crate::program::Program::eval_record_spine], where the + /// normal thunk update workflow isn't adapted. + pub fn lock(&self) -> bool { + let mut data_ref = self.data.borrow_mut(); + + if data_ref.locked { + return false; + } + + data_ref.locked = true; + true + } + + /// Unlock a thunk previously locked (see [Self::lock]). If the thunk wasn't locked, this is a + /// no-op. Returns `true` if the thunk was previously locked, `false` otherwise. + pub fn unlock(&self) -> bool { + let mut data_ref = self.data.borrow_mut(); + + let was_locked = data_ref.locked; + data_ref.locked = false; + + was_locked + } + /// Generate an update frame from this thunk and set the state to `Blackholed`. Return an /// error if the thunk was already black-holed. pub fn mk_update_frame(&mut self) -> Result { - if self.data.borrow().state == ThunkState::Blackholed { + let mut data_ref = self.data.borrow_mut(); + + if data_ref.state == ThunkState::Blackholed { return Err(BlackholedError); } - self.data.borrow_mut().state = ThunkState::Blackholed; + data_ref.state = ThunkState::Blackholed; Ok(ThunkUpdateFrame { data: Rc::downgrade(&self.data), @@ -479,7 +522,7 @@ impl Thunk { } } - pub fn build_cached(&mut self, rec_env: &[(Ident, Thunk)]) { + pub fn build_cached(&self, rec_env: &[(Ident, Thunk)]) { self.data.borrow_mut().init_cached(rec_env) } @@ -585,9 +628,20 @@ impl Thunk { self.data.borrow().deps() } + /// Check for physical equality between two thunks. This method is used for fast equality + /// checking, as if two thunks are physically equal, they must be equal as Nickel values. pub fn ptr_eq(this: &Thunk, that: &Thunk) -> bool { Rc::ptr_eq(&this.data, &that.data) } + + /// Return a unique identifier for this thunk used for fast equality checking or other + /// optimizations. If two thunks have the same UID, they store the same expression. The + /// converse isn't true: two thunks can be equal as expressions but have different UID. + /// + /// In practice, the UID is currently the underlying `Rc` pointer value. + pub fn uid(&self) -> usize { + self.data.as_ptr() as usize + } } impl std::fmt::Pointer for Thunk { @@ -647,18 +701,21 @@ impl Cache for CBNCache { &mut self, idx: &mut CacheIndex, ) -> Result, BlackholedError> { - if idx.state() != ThunkState::Evaluated { - if idx.should_update() { - idx.mk_update_frame().map(Some) - } - // If the thunk isn't to be updated, directly set the evaluated flag. - else { - idx.set_evaluated(); - Ok(None) - } - } else { - Ok(None) + // If the thunk is already evaluated, we don't return an update frame. + if idx.state() == ThunkState::Evaluated { + return Ok(None); } + + // If the thunk isn't worth updating, we set its state to evaluated and we don't create an + // update frame either. There's one exception: if the thunk is blackholed, we still want to + // raise an infinite recursion error. This will be handled by `mk_update_frame()` below. + if !idx.should_update() && idx.state() != ThunkState::Blackholed { + idx.set_evaluated(); + return Ok(None); + } + + // Otherwise, we return an update frame. + Some(idx.mk_update_frame()).transpose() } fn add(&mut self, clos: Closure, bty: BindingType) -> CacheIndex { diff --git a/core/src/program.rs b/core/src/program.rs index eab255f736..6a8039ade9 100644 --- a/core/src/program.rs +++ b/core/src/program.rs @@ -41,12 +41,12 @@ use crate::{ use clap::ColorChoice; use codespan::FileId; use codespan_reporting::term::termcolor::{Ansi, NoColor, WriteColor}; -use std::path::PathBuf; use std::{ ffi::OsString, fmt, io::{self, Read, Write}, + path::PathBuf, result::Result, }; @@ -674,12 +674,34 @@ impl Program { } fn maybe_closurized_eval_record_spine(&mut self, closurize: bool) -> Result { - use crate::eval::Environment; - use crate::match_sharedterm; - use crate::term::{record::RecordData, RuntimeContract}; + use crate::{ + eval::Environment, + match_sharedterm, + term::{record::RecordData, RuntimeContract}, + }; let prepared = self.prepare_eval()?; + // Naively evaluating some legit recursive structures might lead to an infinite loop. Take + // for example this simple contract definition: + // + // ```nickel + // { + // Tree = { + // data | Number, + // left | Tree | optional, + // right | Tree | optional, + // } + // } + // ``` + // + // Here, we don't want to unfold the occurrences of `Tree` appearing in `left` and `right`, + // or we will just go on indefinitely (until a stack overflow in practice). To avoid this, + // we store the cache index (thunk) corresponding to the content of the `Tree` field before + // evaluating it. After we have successfully evaluated it to a record, we mark it (lock), + // and if we come across the same thunk while evaluating one of its children, here `left` + // for example, we don't evaluate it further. + // Eval pending contracts as well, in order to extract more information from potential // record contract fields. fn eval_contracts( @@ -692,23 +714,42 @@ impl Program { for ctr in pending_contracts.iter_mut() { let rt = ctr.contract.clone(); - ctr.contract = do_eval(vm, rt, current_env.clone(), closurize)?; + // Note that contracts can't be referred to recursively, as they aren't binding + // anything. Only fields are. This is why we pass `None` for `self_idx`: there is + // no locking required here. + ctr.contract = eval_guarded(vm, rt, current_env.clone(), closurize)?; } Ok(pending_contracts) } - fn do_eval( + // Handles thunk locking (and unlocking upon errors) to detect infinite recursion, but + // hands over the meat of the work to `do_eval`. + fn eval_guarded( vm: &mut VirtualMachine, term: RichTerm, env: Environment, closurize: bool, ) -> Result { vm.reset(); - let result = vm.eval_closure(Closure { - body: term.clone(), - env, - }); + + let curr_thunk = term.as_ref().try_as_closure(); + + if let Some(thunk) = curr_thunk.as_ref() { + // If the thunk is already locked, it's the thunk of some parent field, and we stop + // here to avoid infinite recursion. + if !thunk.lock() { + return Ok(term); + } + } + + let result = do_eval(vm, term.clone(), env, closurize); + + // Once we're done evaluating all the children, or if there was an error, we unlock the + // current thunk + if let Some(thunk) = curr_thunk.as_ref() { + thunk.unlock(); + } // We expect to hit `MissingFieldDef` errors. When a configuration // contains undefined record fields they most likely will be used @@ -718,13 +759,27 @@ impl Program { // be able to extract dcoumentation from their values anyways. All // other evaluation errors should however be reported to the user // instead of resulting in documentation being silently skipped. + if matches!( + result, + Err(Error::EvalError(EvalError::MissingFieldDef { .. })) + ) { + return Ok(term); + } - let result = match result { - Err(EvalError::MissingFieldDef { .. }) => return Ok(term), - _ => result, - }?; + result + } - match_sharedterm!(match (result.body.term) { + // Evaluates the closure, and if it's a record, recursively evaluate its fields and their + // contrats. + fn do_eval( + vm: &mut VirtualMachine, + term: RichTerm, + env: Environment, + closurize: bool, + ) -> Result { + let evaled = vm.eval_closure(Closure { body: term, env })?; + + let result = match_sharedterm!(match (evaled.body.term) { Term::Record(data) => { let fields = data .fields @@ -736,13 +791,13 @@ impl Program { value: field .value .map(|value| { - do_eval(vm, value, result.env.clone(), closurize) + eval_guarded(vm, value, evaled.env.clone(), closurize) }) .transpose()?, pending_contracts: eval_contracts( vm, field.pending_contracts, - result.env.clone(), + evaled.env.clone(), closurize, )?, ..field @@ -753,19 +808,21 @@ impl Program { Ok(RichTerm::new( Term::Record(RecordData { fields, ..data }), - result.body.pos, + evaled.body.pos, )) } _ => if closurize { - Ok(result.body.closurize(&mut vm.cache, result.env)) + Ok(evaled.body.closurize(&mut vm.cache, evaled.env)) } else { - Ok(result.body) + Ok(evaled.body) }, - }) + }); + + result } - do_eval(&mut self.vm, prepared.body, prepared.env, closurize) + eval_guarded(&mut self.vm, prepared.body, prepared.env, closurize) } /// Extract documentation from the program diff --git a/core/src/term/mod.rs b/core/src/term/mod.rs index 7c92c8f838..7a7b69e711 100644 --- a/core/src/term/mod.rs +++ b/core/src/term/mod.rs @@ -1198,6 +1198,15 @@ impl Term { _ => None, } } + + /// Extract the cache index (thunk) from a closure. If `self` isn't a closure, `None` is + /// returned. + pub fn try_as_closure(&self) -> Option { + match self { + Term::Closure(idx) => Some(idx.clone()), + _ => None, + } + } } #[derive(Debug, Clone, PartialEq)] From 4e9463b0ad0bad1d7abfe85eabc5a64dadc21521 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Wed, 2 Oct 2024 15:06:41 +0200 Subject: [PATCH 2/3] Fix clippy warning --- core/src/program.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/core/src/program.rs b/core/src/program.rs index 6a8039ade9..a70a376cb1 100644 --- a/core/src/program.rs +++ b/core/src/program.rs @@ -779,7 +779,7 @@ impl Program { ) -> Result { let evaled = vm.eval_closure(Closure { body: term, env })?; - let result = match_sharedterm!(match (evaled.body.term) { + match_sharedterm!(match (evaled.body.term) { Term::Record(data) => { let fields = data .fields @@ -817,9 +817,7 @@ impl Program { } else { Ok(evaled.body) }, - }); - - result + }) } eval_guarded(&mut self.vm, prepared.body, prepared.env, closurize) From 055312edc5294b264ec73af1ed5834a382cdcb74 Mon Sep 17 00:00:00 2001 From: Yann Hamdaoui Date: Wed, 2 Oct 2024 15:07:45 +0200 Subject: [PATCH 3/3] Fix typo in code comment --- core/src/eval/operation.rs | 2 +- core/src/program.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/eval/operation.rs b/core/src/eval/operation.rs index d6a0fa1168..2efc7ead82 100644 --- a/core/src/eval/operation.rs +++ b/core/src/eval/operation.rs @@ -3767,7 +3767,7 @@ fn eq( // does, just do `eqs.rev()` // We should apply all contracts here, otherwise we risk having wrong values, think - // record contrats with default values, wrapped terms, etc. + // record contracts with default values, wrapped terms, etc. let mut eqs = l1 .into_iter() diff --git a/core/src/program.rs b/core/src/program.rs index a70a376cb1..92c643fe6a 100644 --- a/core/src/program.rs +++ b/core/src/program.rs @@ -770,7 +770,7 @@ impl Program { } // Evaluates the closure, and if it's a record, recursively evaluate its fields and their - // contrats. + // contracts. fn do_eval( vm: &mut VirtualMachine, term: RichTerm,