diff --git a/quol/red/include/Powerset.h b/quol/red/include/Powerset.h index 1f978e3..16dc0c7 100644 --- a/quol/red/include/Powerset.h +++ b/quol/red/include/Powerset.h @@ -45,17 +45,26 @@ namespace zezax::red { +// This is a mapping from a distinct multi-char to a set of NFA state IDs. +// All the distinct multi-chars are put in an array at the start. The +// index into that array becomes the index into this sparse array. typedef SparseVec IdxToNfaIdSet; -// sometimes called translation or transition table... +// This is sometimes called the translation or transition table. It maps +// sets of NFA states to the mapping described above. typedef std::unordered_map NfaStatesToTransitions; +// Here we keep track of how many times each accepting state occurs in +// the translation table. This is used as a tie-breaker when multiple +// accepting results are possible; the lowest number wins. typedef std::unordered_map NfaIdToCount; +// NfaStatesToId keeps the mapping from sets of NFA states to the +// corresponding DFA state, by ID. Final conversion uses this. typedef std::unordered_map NfaStatesToId; -// converts nfa to dfa via rabin-scott +// Main class that converts NFA to DFA via Rabin-Scott class PowersetConverter { public: explicit PowersetConverter(const NfaObj &input, diff --git a/quol/red/lib/Powerset.cpp b/quol/red/lib/Powerset.cpp index 105f405..c8f7cd9 100644 --- a/quol/red/lib/Powerset.cpp +++ b/quol/red/lib/Powerset.cpp @@ -39,6 +39,7 @@ using std::vector; namespace { +// Picks the best from a set of NFA states, or -1 if no acceptances Result getResult(const NfaIdSet &nis, const NfaIdToCount &counts, const NfaObj &nfa) { @@ -63,6 +64,7 @@ Result getResult(const NfaIdSet &nis, /////////////////////////////////////////////////////////////////////////////// +// The main driver function for the NFA to DFA conversion DfaObj PowersetConverter::convert() { if (stats_) stats_->preDfa_ = std::chrono::steady_clock::now(); @@ -116,6 +118,7 @@ DfaObj PowersetConverter::convert() { } +// Doorway function to recursive transcription to final DFA DfaId PowersetConverter::dfaFromNfa(const std::vector &multiChars, const NfaStatesToTransitions &table, const NfaIdToCount &counts, @@ -172,7 +175,9 @@ MultiCharSet basisMultiChars(const MultiCharSet &mcs) { } -// this is a performace-critical function +// Make the translation table that the entire conversion process depends +// on. Map sets of NFA states to maps from multi-chars to NFA state sets. +// This is a performace-critical function. NfaStatesToTransitions makeTable(NfaId initial, const NfaObj &nfa, const vector &allMultiChars) { @@ -212,6 +217,7 @@ NfaStatesToTransitions makeTable(NfaId initial, } +// Sum all the occurrences of each accepting state in the translation table NfaIdToCount countAcceptingStates(const NfaStatesToTransitions &table, const NfaObj &nfa) { NfaIdToCount rv; @@ -225,6 +231,7 @@ NfaIdToCount countAcceptingStates(const NfaStatesToTransitions &table, } +// The real work of building the DFA from the translation table, counts, etc. DfaId dfaFromNfaRecurse(const vector &multiChars, const NfaStatesToTransitions &table, const NfaIdToCount &counts,